Alysha Creelman
commited on
changing stream in API from True to False app.py
Browse files
app.py
CHANGED
|
@@ -14,7 +14,7 @@ print(token)
|
|
| 14 |
|
| 15 |
# Inference client setup with token from environment
|
| 16 |
# token = os.getenv('HF_TOKEN')
|
| 17 |
-
client = InferenceClient(model="HuggingFaceH4/zephyr-7b-alpha", token=token
|
| 18 |
# pipe = pipeline("text-generation", "TinyLlama/TinyLlama_v1.1", torch_dtype=torch.bfloat16, device_map="auto")
|
| 19 |
pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
|
| 20 |
|
|
@@ -54,7 +54,6 @@ def respond(
|
|
| 54 |
temperature=temperature,
|
| 55 |
do_sample=True,
|
| 56 |
top_p=top_p,
|
| 57 |
-
stream=False,
|
| 58 |
):
|
| 59 |
if stop_inference:
|
| 60 |
response = "Inference cancelled."
|
|
@@ -78,7 +77,7 @@ def respond(
|
|
| 78 |
for message_chunk in client.chat_completion(
|
| 79 |
messages,
|
| 80 |
max_tokens=max_tokens,
|
| 81 |
-
stream=
|
| 82 |
temperature=temperature,
|
| 83 |
top_p=top_p,
|
| 84 |
):
|
|
|
|
| 14 |
|
| 15 |
# Inference client setup with token from environment
|
| 16 |
# token = os.getenv('HF_TOKEN')
|
| 17 |
+
client = InferenceClient(model="HuggingFaceH4/zephyr-7b-alpha", token=token)
|
| 18 |
# pipe = pipeline("text-generation", "TinyLlama/TinyLlama_v1.1", torch_dtype=torch.bfloat16, device_map="auto")
|
| 19 |
pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
|
| 20 |
|
|
|
|
| 54 |
temperature=temperature,
|
| 55 |
do_sample=True,
|
| 56 |
top_p=top_p,
|
|
|
|
| 57 |
):
|
| 58 |
if stop_inference:
|
| 59 |
response = "Inference cancelled."
|
|
|
|
| 77 |
for message_chunk in client.chat_completion(
|
| 78 |
messages,
|
| 79 |
max_tokens=max_tokens,
|
| 80 |
+
stream=False,
|
| 81 |
temperature=temperature,
|
| 82 |
top_p=top_p,
|
| 83 |
):
|