rabah-tarteel-i

Sleeping

App Files Files Community

rabah-tarteel-i / app /main.py

mahmoudsaber0

Update app/main.py

839adbb verified about 1 month ago

raw

history blame contribute delete

3.93 kB

	import os
	import torch
	from fastapi import FastAPI, UploadFile, File
	from fastapi.responses import JSONResponse
	from fastapi.middleware.cors import CORSMiddleware

	# -----------------------------------
	# Environment setup
	# -----------------------------------
	os.environ["HF_HOME"] = "/tmp"
	os.environ["TRANSFORMERS_CACHE"] = "/tmp"

	device = "cuda" if torch.cuda.is_available() else "cpu"
	compute_type = "float16" if device == "cuda" else "int8"

	print(f"🚀 Loading Faster-Whisper model on {device}...")

	# Import and load faster-whisper
	from faster_whisper import WhisperModel

	# Load model - options: tiny, base, small, medium, large-v2, large-v3
	model = WhisperModel(
	"base", # Change to "small" or "medium" for better Quran accuracy
	device=device,
	compute_type=compute_type,
	download_root="/tmp"
	)

	print("✅ Faster-Whisper model loaded with word-level timestamp support")

	# -----------------------------------
	# FastAPI app setup
	# -----------------------------------
	app = FastAPI(title="Qur'an Whisper Arabic Transcriber with Word-Level Timestamps")

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	@app.post("/transcribe")
	async def transcribe(audio: UploadFile = File(...)):
	"""
	Transcribe Arabic audio with word-level timestamps.
	Transcribes the entire audio at once and returns timestamp for each word.
	"""
	temp_path = None
	try:
	temp_path = f"/tmp/{audio.filename}"
	with open(temp_path, "wb") as f:
	f.write(await audio.read())

	print(f"🎧 Transcribing: {temp_path}")

	# Transcribe with word-level timestamps
	segments, info = model.transcribe(
	temp_path,
	language="ar", # Arabic
	task="transcribe",
	word_timestamps=True, # Enable word-level timestamps
	vad_filter=True, # Voice activity detection
	)

	# Convert generator to list and extract data
	segments_list = list(segments)

	# Extract full text and word-level timestamps
	full_text = []
	words = []

	for segment in segments_list:
	full_text.append(segment.text)

	# Extract word-level timestamps
	for word in segment.words:
	words.append({
	"word": word.word.strip(),
	"start": round(word.start, 2),
	"end": round(word.end, 2),
	"probability": round(word.probability, 3)
	})

	output = {
	"text": " ".join(full_text).strip(),
	"words": words,
	"language": info.language,
	"language_probability": round(info.language_probability, 3),
	"duration": round(info.duration, 2),
	"model": "faster-whisper-base",
	}

	print(f"✅ Transcription complete: {len(words)} words, duration: {info.duration}s")

	return JSONResponse(output)

	except Exception as e:
	print("❌ Error:", e)
	import traceback
	traceback.print_exc()
	return JSONResponse({"error": str(e)}, status_code=500)

	finally:
	# Clean up temp file
	if temp_path and os.path.exists(temp_path):
	try:
	os.remove(temp_path)
	except:
	pass

	@app.get("/health")
	def health():
	return {
	"status": "healthy",
	"model": "faster-whisper-base",
	"device": device,
	"timestamp_support": "word-level",
	"language": "arabic",
	}

	@app.get("/")
	def index():
	return {"message": "🕌 Qur'an Whisper Transcription API with Word-Level Timestamps is running!"}

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)