feat(optim): load the model and tokenizer outside of the spaces wrapped method

#3
by raphael-gl HF Staff - opened
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -54,13 +54,14 @@ def _history_to_messages(history: List[Tuple[str, str]]) -> List[Dict[str, str]]
54
  msgs.append({"role": "assistant", "content": bot_msg})
55
  return msgs
56
 
 
 
57
  @spaces.GPU(duration=120)
58
  def generate_stream(message: str, history: List[Tuple[str, str]]):
59
  """
60
  Minimal streaming chat function for gr.ChatInterface.
61
  Uses instruct chat template. No token UI. No extra controls.
62
  """
63
- _ensure_loaded()
64
 
65
  messages = _history_to_messages(history) + [{"role": "user", "content": message}]
66
  inputs = _tokenizer.apply_chat_template(
 
54
  msgs.append({"role": "assistant", "content": bot_msg})
55
  return msgs
56
 
57
+ _ensure_loaded()
58
+
59
  @spaces.GPU(duration=120)
60
  def generate_stream(message: str, history: List[Tuple[str, str]]):
61
  """
62
  Minimal streaming chat function for gr.ChatInterface.
63
  Uses instruct chat template. No token UI. No extra controls.
64
  """
 
65
 
66
  messages = _history_to_messages(history) + [{"role": "user", "content": message}]
67
  inputs = _tokenizer.apply_chat_template(