import os
import torch
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware

# -----------------------------------
# Environment setup
# -----------------------------------
os.environ["HF_HOME"] = "/tmp"
os.environ["TRANSFORMERS_CACHE"] = "/tmp"

device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "float16" if device == "cuda" else "int8"

print(f"🚀 Loading Faster-Whisper model on {device}...")

# Import and load faster-whisper
from faster_whisper import WhisperModel

# Load model - options: tiny, base, small, medium, large-v2, large-v3
model = WhisperModel(
    "base",  # Change to "small" or "medium" for better Quran accuracy
    device=device,
    compute_type=compute_type,
    download_root="/tmp"
)

print("✅ Faster-Whisper model loaded with word-level timestamp support")

# -----------------------------------
# FastAPI app setup
# -----------------------------------
app = FastAPI(title="Qur'an Whisper Arabic Transcriber with Word-Level Timestamps")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.post("/transcribe")
async def transcribe(audio: UploadFile = File(...)):
    """
    Transcribe Arabic audio with word-level timestamps.
    Transcribes the entire audio at once and returns timestamp for each word.
    """
    temp_path = None
    try:
        temp_path = f"/tmp/{audio.filename}"
        with open(temp_path, "wb") as f:
            f.write(await audio.read())
        
        print(f"🎧 Transcribing: {temp_path}")
        
        # Transcribe with word-level timestamps
        segments, info = model.transcribe(
            temp_path,
            language="ar",  # Arabic
            task="transcribe",
            word_timestamps=True,  # Enable word-level timestamps
            vad_filter=True,  # Voice activity detection
        )
        
        # Convert generator to list and extract data
        segments_list = list(segments)
        
        # Extract full text and word-level timestamps
        full_text = []
        words = []
        
        for segment in segments_list:
            full_text.append(segment.text)
            
            # Extract word-level timestamps
            for word in segment.words:
                words.append({
                    "word": word.word.strip(),
                    "start": round(word.start, 2),
                    "end": round(word.end, 2),
                    "probability": round(word.probability, 3)
                })
        
        output = {
            "text": " ".join(full_text).strip(),
            "words": words,
            "language": info.language,
            "language_probability": round(info.language_probability, 3),
            "duration": round(info.duration, 2),
            "model": "faster-whisper-base",
        }
        
        print(f"✅ Transcription complete: {len(words)} words, duration: {info.duration}s")
        
        return JSONResponse(output)
    
    except Exception as e:
        print("❌ Error:", e)
        import traceback
        traceback.print_exc()
        return JSONResponse({"error": str(e)}, status_code=500)
    
    finally:
        # Clean up temp file
        if temp_path and os.path.exists(temp_path):
            try:
                os.remove(temp_path)
            except:
                pass

@app.get("/health")
def health():
    return {
        "status": "healthy",
        "model": "faster-whisper-base",
        "device": device,
        "timestamp_support": "word-level",
        "language": "arabic",
    }

@app.get("/")
def index():
    return {"message": "🕌 Qur'an Whisper Transcription API with Word-Level Timestamps is running!"}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)