Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -29,7 +29,7 @@ def download_model():
|
|
| 29 |
def start_tunnel():
|
| 30 |
# Start nport tunnel
|
| 31 |
tunnel_process = subprocess.Popen(
|
| 32 |
-
["npx", "nport", "-s", "ai-service", "-p", "5000"],
|
| 33 |
stdout=subprocess.PIPE,
|
| 34 |
stderr=subprocess.PIPE,
|
| 35 |
)
|
|
@@ -56,8 +56,9 @@ def push_tunnel_url_to_repo(tunnel_url):
|
|
| 56 |
|
| 57 |
# Clone the repository
|
| 58 |
repo_dir = "/tmp/repo"
|
|
|
|
| 59 |
subprocess.run(
|
| 60 |
-
["git", "clone",
|
| 61 |
check=True,
|
| 62 |
)
|
| 63 |
os.chdir(repo_dir)
|
|
@@ -77,8 +78,21 @@ def push_tunnel_url_to_repo(tunnel_url):
|
|
| 77 |
@app.route("/chat", methods=["POST"])
|
| 78 |
def chat():
|
| 79 |
data = request.json
|
| 80 |
-
prompt
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
return jsonify({"response": output["choices"][0]["text"].strip()})
|
| 83 |
|
| 84 |
if __name__ == "__main__":
|
|
@@ -88,7 +102,7 @@ if __name__ == "__main__":
|
|
| 88 |
# Initialize the LLM
|
| 89 |
llm = Llama(
|
| 90 |
model_path=MODEL_PATH,
|
| 91 |
-
n_ctx=
|
| 92 |
n_threads=2,
|
| 93 |
n_gpu_layers=0,
|
| 94 |
verbose=False,
|
|
|
|
| 29 |
def start_tunnel():
|
| 30 |
# Start nport tunnel
|
| 31 |
tunnel_process = subprocess.Popen(
|
| 32 |
+
["npx", "nport", "-s", "ai-service-new", "-p", "5000"],
|
| 33 |
stdout=subprocess.PIPE,
|
| 34 |
stderr=subprocess.PIPE,
|
| 35 |
)
|
|
|
|
| 56 |
|
| 57 |
# Clone the repository
|
| 58 |
repo_dir = "/tmp/repo"
|
| 59 |
+
repo_url = f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git"
|
| 60 |
subprocess.run(
|
| 61 |
+
["git", "clone", repo_url, repo_dir],
|
| 62 |
check=True,
|
| 63 |
)
|
| 64 |
os.chdir(repo_dir)
|
|
|
|
| 78 |
@app.route("/chat", methods=["POST"])
|
| 79 |
def chat():
|
| 80 |
data = request.json
|
| 81 |
+
# Construct the prompt without duplicate special tokens
|
| 82 |
+
prompt = (
|
| 83 |
+
f"<|begin_of_text|>"
|
| 84 |
+
f"<|start_header_id|>user<|end_header_id|>\n"
|
| 85 |
+
f"{data.get('message', '')}"
|
| 86 |
+
f"<|eot_id|>\n"
|
| 87 |
+
f"<|start_header_id|>assistant<|end_header_id|>\n"
|
| 88 |
+
)
|
| 89 |
+
output = llm(
|
| 90 |
+
prompt,
|
| 91 |
+
max_tokens=2048,
|
| 92 |
+
stop=["<|eot_id|>"],
|
| 93 |
+
temperature=0.8,
|
| 94 |
+
top_p=0.9,
|
| 95 |
+
)
|
| 96 |
return jsonify({"response": output["choices"][0]["text"].strip()})
|
| 97 |
|
| 98 |
if __name__ == "__main__":
|
|
|
|
| 102 |
# Initialize the LLM
|
| 103 |
llm = Llama(
|
| 104 |
model_path=MODEL_PATH,
|
| 105 |
+
n_ctx=131072, # Set to match the training context length
|
| 106 |
n_threads=2,
|
| 107 |
n_gpu_layers=0,
|
| 108 |
verbose=False,
|