Spaces:

akhaliq
/

MobileLLM-Pro

Running on Zero

feat(optim): load the model and tokenizer outside of the spaces wrapped method

by raphael-gl HF Staff - opened 10 days ago

←

Files changed (1) hide show

app.py CHANGED Viewed

@@ -54,13 +54,14 @@ def _history_to_messages(history: List[Tuple[str, str]]) -> List[Dict[str, str]]
             msgs.append({"role": "assistant", "content": bot_msg})
     return msgs
 @spaces.GPU(duration=120)
 def generate_stream(message: str, history: List[Tuple[str, str]]):
     """
     Minimal streaming chat function for gr.ChatInterface.
     Uses instruct chat template. No token UI. No extra controls.
     """
-    _ensure_loaded()
     messages = _history_to_messages(history) + [{"role": "user", "content": message}]
     inputs = _tokenizer.apply_chat_template(

             msgs.append({"role": "assistant", "content": bot_msg})
     return msgs
+_ensure_loaded()
 @spaces.GPU(duration=120)
 def generate_stream(message: str, history: List[Tuple[str, str]]):
     """
     Minimal streaming chat function for gr.ChatInterface.
     Uses instruct chat template. No token UI. No extra controls.
     """
     messages = _history_to_messages(history) + [{"role": "user", "content": message}]
     inputs = _tokenizer.apply_chat_template(