Spaces:

nexusbert
/

milestone3

Sleeping

App Files Files Community

nexusbert commited on Oct 12

Commit

634bf12

1 Parent(s): 22c3401

push

Browse files

Files changed (1) hide show

app.py +44 -9

app.py CHANGED Viewed

@@ -185,7 +185,6 @@ def preprocess_audio_ffmpeg(audio_data: bytes, target_sr: int = 16000) -> np.nda
         raise HTTPException(status_code=400, detail="Audio preprocessing failed. Ensure ffmpeg is installed.")
 def _validate_and_normalize_audio(audio_array: np.ndarray) -> np.ndarray:
-    # Check for silence or very low amplitude
     rms = np.sqrt(np.mean(audio_array**2))
     if rms < 0.001:
         logger.warning("Audio appears to be very quiet or silent")
@@ -560,6 +559,43 @@ async def status():
         "message": "Using lightweight keyword-based language detection - no heavy models required"
     }
 @app.post("/chat")
 async def chat(text: str = Form(...), speak: bool = False, raw: bool = False):
     if not text.strip():
@@ -587,11 +623,10 @@ async def speak_to_ai(audio_file: UploadFile = File(...), language: str = Form(.
     ai_response = get_ai_response(transcription)
     if speak:
-        output_path = text_to_speech_file(ai_response)
-        lang = detect_language(ai_response)
-        if lang == "ig":
-            return FileResponse(output_path, media_type="text/plain", filename="response.txt")
         else:
             return FileResponse(output_path, media_type="audio/wav", filename="response.wav")
     return {"transcription": transcription, "ai_response": ai_response, "language": language}
@@ -604,11 +639,11 @@ async def speak_to_ai_auto(audio_file: UploadFile = File(...), speak: bool = Tru
     transcription = speech_to_text(audio_data)
     ai_response = get_ai_response(transcription)
     if speak:
-        output_path = text_to_speech_file(ai_response)
-        lang = detect_language(ai_response)
-        if lang == "ig":
-            return FileResponse(output_path, media_type="text/plain", filename="response.txt")
         else:
             return FileResponse(output_path, media_type="audio/wav", filename="response.wav")
     return {"transcription": transcription, "ai_response": ai_response}

         raise HTTPException(status_code=400, detail="Audio preprocessing failed. Ensure ffmpeg is installed.")
 def _validate_and_normalize_audio(audio_array: np.ndarray) -> np.ndarray:
     rms = np.sqrt(np.mean(audio_array**2))
     if rms < 0.001:
         logger.warning("Audio appears to be very quiet or silent")
         "message": "Using lightweight keyword-based language detection - no heavy models required"
     }
+@app.get("/languages")
+async def get_languages():
+    return {
+        "supported_languages": {
+            "yo": {
+                "name": "Yoruba",
+                "code": "yo",
+                "tts_available": True,
+                "asr_model": "NCAIR1/Yoruba-ASR"
+            },
+            "ha": {
+                "name": "Hausa",
+                "code": "ha",
+                "tts_available": True,
+                "asr_model": "NCAIR1/Hausa-ASR"
+            },
+            "ig": {
+                "name": "Igbo",
+                "code": "ig",
+                "tts_available": False,
+                "asr_model": "NCAIR1/Igbo-ASR",
+                "note": "Text response only - TTS not available"
+            },
+            "en": {
+                "name": "English",
+                "code": "en",
+                "tts_available": True,
+                "asr_model": "NCAIR1/NigerianAccentedEnglish"
+            }
+        },
+        "usage": {
+            "specify_language": "Use /speak endpoint with language parameter for best accuracy",
+            "auto_detection": "Use /speak-auto endpoint when language is unknown",
+            "igbo_response": "Igbo responses return text only (no audio TTS available)"
+        }
+    }
 @app.post("/chat")
 async def chat(text: str = Form(...), speak: bool = False, raw: bool = False):
     if not text.strip():
     ai_response = get_ai_response(transcription)
     if speak:
+        if language == "ig":
+            return {"transcription": transcription, "ai_response": ai_response, "language": language, "response_type": "text", "note": "Igbo TTS not available - returning text response"}
         else:
+            output_path = text_to_speech_file(ai_response)
             return FileResponse(output_path, media_type="audio/wav", filename="response.wav")
     return {"transcription": transcription, "ai_response": ai_response, "language": language}
     transcription = speech_to_text(audio_data)
     ai_response = get_ai_response(transcription)
     if speak:
+        detected_lang = detect_language(ai_response)
+        if detected_lang == "ig":
+            return {"transcription": transcription, "ai_response": ai_response, "detected_language": detected_lang, "response_type": "text", "note": "Igbo TTS not available - returning text response"}
         else:
+            output_path = text_to_speech_file(ai_response)
             return FileResponse(output_path, media_type="audio/wav", filename="response.wav")
     return {"transcription": transcription, "ai_response": ai_response}