mahmoudsaber0's picture
Update app/main.py
839adbb verified
import os
import torch
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
# -----------------------------------
# Environment setup
# -----------------------------------
os.environ["HF_HOME"] = "/tmp"
os.environ["TRANSFORMERS_CACHE"] = "/tmp"
device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "float16" if device == "cuda" else "int8"
print(f"πŸš€ Loading Faster-Whisper model on {device}...")
# Import and load faster-whisper
from faster_whisper import WhisperModel
# Load model - options: tiny, base, small, medium, large-v2, large-v3
model = WhisperModel(
"base", # Change to "small" or "medium" for better Quran accuracy
device=device,
compute_type=compute_type,
download_root="/tmp"
)
print("βœ… Faster-Whisper model loaded with word-level timestamp support")
# -----------------------------------
# FastAPI app setup
# -----------------------------------
app = FastAPI(title="Qur'an Whisper Arabic Transcriber with Word-Level Timestamps")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.post("/transcribe")
async def transcribe(audio: UploadFile = File(...)):
"""
Transcribe Arabic audio with word-level timestamps.
Transcribes the entire audio at once and returns timestamp for each word.
"""
temp_path = None
try:
temp_path = f"/tmp/{audio.filename}"
with open(temp_path, "wb") as f:
f.write(await audio.read())
print(f"🎧 Transcribing: {temp_path}")
# Transcribe with word-level timestamps
segments, info = model.transcribe(
temp_path,
language="ar", # Arabic
task="transcribe",
word_timestamps=True, # Enable word-level timestamps
vad_filter=True, # Voice activity detection
)
# Convert generator to list and extract data
segments_list = list(segments)
# Extract full text and word-level timestamps
full_text = []
words = []
for segment in segments_list:
full_text.append(segment.text)
# Extract word-level timestamps
for word in segment.words:
words.append({
"word": word.word.strip(),
"start": round(word.start, 2),
"end": round(word.end, 2),
"probability": round(word.probability, 3)
})
output = {
"text": " ".join(full_text).strip(),
"words": words,
"language": info.language,
"language_probability": round(info.language_probability, 3),
"duration": round(info.duration, 2),
"model": "faster-whisper-base",
}
print(f"βœ… Transcription complete: {len(words)} words, duration: {info.duration}s")
return JSONResponse(output)
except Exception as e:
print("❌ Error:", e)
import traceback
traceback.print_exc()
return JSONResponse({"error": str(e)}, status_code=500)
finally:
# Clean up temp file
if temp_path and os.path.exists(temp_path):
try:
os.remove(temp_path)
except:
pass
@app.get("/health")
def health():
return {
"status": "healthy",
"model": "faster-whisper-base",
"device": device,
"timestamp_support": "word-level",
"language": "arabic",
}
@app.get("/")
def index():
return {"message": "πŸ•Œ Qur'an Whisper Transcription API with Word-Level Timestamps is running!"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)