Spaces:
Sleeping
Sleeping
| import os | |
| import torch | |
| from fastapi import FastAPI, UploadFile, File | |
| from fastapi.responses import JSONResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| # ----------------------------------- | |
| # Environment setup | |
| # ----------------------------------- | |
| os.environ["HF_HOME"] = "/tmp" | |
| os.environ["TRANSFORMERS_CACHE"] = "/tmp" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| compute_type = "float16" if device == "cuda" else "int8" | |
| print(f"π Loading Faster-Whisper model on {device}...") | |
| # Import and load faster-whisper | |
| from faster_whisper import WhisperModel | |
| # Load model - options: tiny, base, small, medium, large-v2, large-v3 | |
| model = WhisperModel( | |
| "base", # Change to "small" or "medium" for better Quran accuracy | |
| device=device, | |
| compute_type=compute_type, | |
| download_root="/tmp" | |
| ) | |
| print("β Faster-Whisper model loaded with word-level timestamp support") | |
| # ----------------------------------- | |
| # FastAPI app setup | |
| # ----------------------------------- | |
| app = FastAPI(title="Qur'an Whisper Arabic Transcriber with Word-Level Timestamps") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| async def transcribe(audio: UploadFile = File(...)): | |
| """ | |
| Transcribe Arabic audio with word-level timestamps. | |
| Transcribes the entire audio at once and returns timestamp for each word. | |
| """ | |
| temp_path = None | |
| try: | |
| temp_path = f"/tmp/{audio.filename}" | |
| with open(temp_path, "wb") as f: | |
| f.write(await audio.read()) | |
| print(f"π§ Transcribing: {temp_path}") | |
| # Transcribe with word-level timestamps | |
| segments, info = model.transcribe( | |
| temp_path, | |
| language="ar", # Arabic | |
| task="transcribe", | |
| word_timestamps=True, # Enable word-level timestamps | |
| vad_filter=True, # Voice activity detection | |
| ) | |
| # Convert generator to list and extract data | |
| segments_list = list(segments) | |
| # Extract full text and word-level timestamps | |
| full_text = [] | |
| words = [] | |
| for segment in segments_list: | |
| full_text.append(segment.text) | |
| # Extract word-level timestamps | |
| for word in segment.words: | |
| words.append({ | |
| "word": word.word.strip(), | |
| "start": round(word.start, 2), | |
| "end": round(word.end, 2), | |
| "probability": round(word.probability, 3) | |
| }) | |
| output = { | |
| "text": " ".join(full_text).strip(), | |
| "words": words, | |
| "language": info.language, | |
| "language_probability": round(info.language_probability, 3), | |
| "duration": round(info.duration, 2), | |
| "model": "faster-whisper-base", | |
| } | |
| print(f"β Transcription complete: {len(words)} words, duration: {info.duration}s") | |
| return JSONResponse(output) | |
| except Exception as e: | |
| print("β Error:", e) | |
| import traceback | |
| traceback.print_exc() | |
| return JSONResponse({"error": str(e)}, status_code=500) | |
| finally: | |
| # Clean up temp file | |
| if temp_path and os.path.exists(temp_path): | |
| try: | |
| os.remove(temp_path) | |
| except: | |
| pass | |
| def health(): | |
| return { | |
| "status": "healthy", | |
| "model": "faster-whisper-base", | |
| "device": device, | |
| "timestamp_support": "word-level", | |
| "language": "arabic", | |
| } | |
| def index(): | |
| return {"message": "π Qur'an Whisper Transcription API with Word-Level Timestamps is running!"} | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |