import os
import logging
from flask import Flask, render_template_string, send_file, abort
from huggingface_hub import hf_hub_download, login as hf_login
from dotenv import load_dotenv
load_dotenv()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
hf_token = os.getenv("HF_TOKEN")
try:
hf_login(token=hf_token)
logger.info("Login en Hugging Face exitoso con el token proporcionado.")
except Exception as e:
logger.error(f"Error durante el login en Hugging Face: {e}")
app = Flask(__name__)
MODEL_FILENAME = 'gemma3-1b-it-int4.task'
HUGGINGFACE_REPO = 'litert-community/Gemma3-1B-IT'
MODEL_LOCAL_PATH = os.path.join(os.getcwd(), MODEL_FILENAME)
def download_model_file():
if not os.path.exists(MODEL_LOCAL_PATH):
logger.info("Archivo de modelo no encontrado localmente. Iniciando descarga desde Hugging Face a la carpeta local...")
try:
hf_hub_download(
repo_id=HUGGINGFACE_REPO,
filename=MODEL_FILENAME,
local_dir=".",
local_dir_use_symlinks=False
)
logger.info(f"Descarga completada: {MODEL_LOCAL_PATH}")
except Exception as e:
logger.error(f"Error al descargar el archivo de modelo: {e}")
raise
else:
logger.info("El archivo de modelo ya existe localmente.")
return MODEL_LOCAL_PATH
model_file_path = download_model_file()
logger.info(f"Ruta del archivo de modelo: {model_file_path}")
HTML_CONTENT = """
LLM Inference Web Demo
Demo de Inferencia LLM
Ingresa el texto de entrada y presiona "Get Response".
` +
`**Troubleshooting Steps:** ` +
`1. **Verify WebGPU Support:** ` +
` - **Browser:** Use **Google Chrome** or **Microsoft Edge (latest versions).** ` +
` - **Check chrome://gpu (or edge://gpu):** In your browser address bar, type \`chrome://gpu\` (or \`edge://gpu\`) and press Enter. Look for "WebGPU" section. ` +
` - **Status:** Should say "Hardware accelerated". If disabled or "Software only", WebGPU is not working correctly. ` +
` - **maxStorageBufferBindingSize:** Verify the reported value. If it's very low, your GPU/browser might be too limited. ` +
` - **Enable WebGPU Flags (if needed):** In chrome://flags (or edge://flags), search for "WebGPU" and try enabling flags like \`#enable-unsafe-webgpu\` and restart browser.
` +
`2. **Update Browser and GPU Drivers:** ` +
` - **Browser:** Update Chrome/Edge to the latest version. ` +
` - **GPU Drivers:** Download and install the latest drivers from NVIDIA, AMD, or Intel websites for your specific GPU and operating system. **Restart your computer after driver install.**
` +
`3. **Restart Your Computer:** A simple restart can resolve temporary issues.
` +
`4. **Try a Different Browser/Computer:** Test with a different WebGPU-compatible browser (Chrome/Edge) or on a different computer with a more capable GPU if possible.
` +
`5. **Check GPU Compatibility:** Older or very low-end GPUs might have limited WebGPU support.
` +
`If the issue persists after these steps, your GPU or browser may have inherent limitations for running this LLM demo in WebGPU.`;
submit.disabled = true;
submit.value = 'Failed to load model';
});
} catch (e) {
console.error("Error during LlmInference setup:", e);
errorMessageDiv.textContent = `Failed to set up LlmInference. Details: ${e}`;
submit.disabled = true;
submit.value = 'Failed to load model';
}
}
runDemo();
"""
@app.route('/')
def index():
return render_template_string(HTML_CONTENT)
@app.route('/index.js')
def serve_js():
return JS_CONTENT, 200, {'Content-Type': 'application/javascript'}
@app.route('/download')
def download_file():
logger.info(f"Solicitud para descargar el modelo desde: {model_file_path}")
if os.path.exists(model_file_path):
return send_file(model_file_path)
else:
logger.error(f"Archivo de modelo no encontrado en la ruta: {model_file_path}")
abort(404, description="Archivo de modelo no encontrado.")
if __name__ == '__main__':
logger.info("Iniciando la aplicación Flask en el puerto 5000")
app.run(debug=True, host="0.0.0.0", port=7860)