Spaces:

NitinBot001
/

Gemini_tts_api

Sleeping

NitinBot001 commited on Mar 5

Commit

362683e

verified ·

1 Parent(s): b0629f5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -29,7 +29,7 @@ def download_model():
 def start_tunnel():
     # Start nport tunnel
     tunnel_process = subprocess.Popen(
-        ["npx", "nport", "-s", "ai-service", "-p", "5000"],
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
     )
@@ -56,8 +56,9 @@ def push_tunnel_url_to_repo(tunnel_url):
     # Clone the repository
     repo_dir = "/tmp/repo"
     subprocess.run(
-        ["git", "clone", f"https://x-access-token:{GH_PAT}@{REPO_URL.split('https://')[1]}", repo_dir],
         check=True,
     )
     os.chdir(repo_dir)
@@ -77,8 +78,21 @@ def push_tunnel_url_to_repo(tunnel_url):
 @app.route("/chat", methods=["POST"])
 def chat():
     data = request.json
-    prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{data.get('message','')}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
-    output = llm(prompt, max_tokens=2048, stop=["<|eot_id|>"], temperature=0.8, top_p=0.9)
     return jsonify({"response": output["choices"][0]["text"].strip()})
 if __name__ == "__main__":
@@ -88,7 +102,7 @@ if __name__ == "__main__":
     # Initialize the LLM
     llm = Llama(
         model_path=MODEL_PATH,
-        n_ctx=8192,
         n_threads=2,
         n_gpu_layers=0,
         verbose=False,

 def start_tunnel():
     # Start nport tunnel
     tunnel_process = subprocess.Popen(
+        ["npx", "nport", "-s", "ai-service-new", "-p", "5000"],
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
     )
     # Clone the repository
     repo_dir = "/tmp/repo"
+    repo_url = f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git"
     subprocess.run(
+        ["git", "clone", repo_url, repo_dir],
         check=True,
     )
     os.chdir(repo_dir)
 @app.route("/chat", methods=["POST"])
 def chat():
     data = request.json
+    # Construct the prompt without duplicate special tokens
+    prompt = (
+        f"<|begin_of_text|>"
+        f"<|start_header_id|>user<|end_header_id|>\n"
+        f"{data.get('message', '')}"
+        f"<|eot_id|>\n"
+        f"<|start_header_id|>assistant<|end_header_id|>\n"
+    )
+    output = llm(
+        prompt,
+        max_tokens=2048,
+        stop=["<|eot_id|>"],
+        temperature=0.8,
+        top_p=0.9,
+    )
     return jsonify({"response": output["choices"][0]["text"].strip()})
 if __name__ == "__main__":
     # Initialize the LLM
     llm = Llama(
         model_path=MODEL_PATH,
+        n_ctx=131072,  # Set to match the training context length
         n_threads=2,
         n_gpu_layers=0,
         verbose=False,