import os import logging from flask import Flask, render_template_string, send_file, abort from huggingface_hub import hf_hub_download, login as hf_login from dotenv import load_dotenv load_dotenv() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) hf_token = os.getenv("HF_TOKEN") try: hf_login(token=hf_token) logger.info("Login en Hugging Face exitoso con el token proporcionado.") except Exception as e: logger.error(f"Error durante el login en Hugging Face: {e}") app = Flask(__name__) MODEL_FILENAME = 'gemma3-1b-it-int4.task' HUGGINGFACE_REPO = 'litert-community/Gemma3-1B-IT' MODEL_LOCAL_PATH = os.path.join(os.getcwd(), MODEL_FILENAME) def download_model_file(): if not os.path.exists(MODEL_LOCAL_PATH): logger.info("Archivo de modelo no encontrado localmente. Iniciando descarga desde Hugging Face a la carpeta local...") try: hf_hub_download( repo_id=HUGGINGFACE_REPO, filename=MODEL_FILENAME, local_dir=".", local_dir_use_symlinks=False ) logger.info(f"Descarga completada: {MODEL_LOCAL_PATH}") except Exception as e: logger.error(f"Error al descargar el archivo de modelo: {e}") raise else: logger.info("El archivo de modelo ya existe localmente.") return MODEL_LOCAL_PATH model_file_path = download_model_file() logger.info(f"Ruta del archivo de modelo: {model_file_path}") HTML_CONTENT = """ LLM Inference Web Demo

Demo de Inferencia LLM

Ingresa el texto de entrada y presiona "Get Response".

Input:

Result:

""" JS_CONTENT = """import {FilesetResolver, LlmInference} from 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai'; const input = document.getElementById('input'); const output = document.getElementById('output'); const submit = document.getElementById('submit'); const modelFileName = '/download'; const errorMessageDiv = document.getElementById('error-message'); function displayPartialResults(partialResults, complete) { output.textContent += partialResults; if (complete) { if (!output.textContent) { output.textContent = 'Result is empty'; } submit.disabled = false; } } async function runDemo() { const genaiFileset = await FilesetResolver.forGenAiTasks( 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai/wasm'); let llmInference; submit.onclick = () => { output.textContent = ''; submit.disabled = true; llmInference.generateResponse(input.value, displayPartialResults); }; submit.value = 'Loading the model...'; try { LlmInference .createFromOptions(genaiFileset, { baseOptions: {modelAssetPath: modelFileName}, }) .then(llm => { llmInference = llm; submit.disabled = false; submit.value = 'Get Response'; }) .catch(error => { console.error("Error initializing LlmInference:", error); errorMessageDiv.innerHTML = `Failed to initialize the task due to WebGPU limitations. Details: ${error}

` + `**Troubleshooting Steps:**
` + `1. **Verify WebGPU Support:**
` + ` - **Browser:** Use **Google Chrome** or **Microsoft Edge (latest versions).**
` + ` - **Check chrome://gpu (or edge://gpu):** In your browser address bar, type \`chrome://gpu\` (or \`edge://gpu\`) and press Enter. Look for "WebGPU" section.
` + ` - **Status:** Should say "Hardware accelerated". If disabled or "Software only", WebGPU is not working correctly.
` + ` - **maxStorageBufferBindingSize:** Verify the reported value. If it's very low, your GPU/browser might be too limited.
` + ` - **Enable WebGPU Flags (if needed):** In chrome://flags (or edge://flags), search for "WebGPU" and try enabling flags like \`#enable-unsafe-webgpu\` and restart browser.

` + `2. **Update Browser and GPU Drivers:**
` + ` - **Browser:** Update Chrome/Edge to the latest version.
` + ` - **GPU Drivers:** Download and install the latest drivers from NVIDIA, AMD, or Intel websites for your specific GPU and operating system. **Restart your computer after driver install.**

` + `3. **Restart Your Computer:** A simple restart can resolve temporary issues.

` + `4. **Try a Different Browser/Computer:** Test with a different WebGPU-compatible browser (Chrome/Edge) or on a different computer with a more capable GPU if possible.

` + `5. **Check GPU Compatibility:** Older or very low-end GPUs might have limited WebGPU support.

` + `If the issue persists after these steps, your GPU or browser may have inherent limitations for running this LLM demo in WebGPU.`; submit.disabled = true; submit.value = 'Failed to load model'; }); } catch (e) { console.error("Error during LlmInference setup:", e); errorMessageDiv.textContent = `Failed to set up LlmInference. Details: ${e}`; submit.disabled = true; submit.value = 'Failed to load model'; } } runDemo(); """ @app.route('/') def index(): return render_template_string(HTML_CONTENT) @app.route('/index.js') def serve_js(): return JS_CONTENT, 200, {'Content-Type': 'application/javascript'} @app.route('/download') def download_file(): logger.info(f"Solicitud para descargar el modelo desde: {model_file_path}") if os.path.exists(model_file_path): return send_file(model_file_path) else: logger.error(f"Archivo de modelo no encontrado en la ruta: {model_file_path}") abort(404, description="Archivo de modelo no encontrado.") if __name__ == '__main__': logger.info("Iniciando la aplicación Flask en el puerto 5000") app.run(debug=True, host="0.0.0.0", port=7860)