"""
MediaTek Breeze-ASR-25 台灣國語識別測試 Space
適用於 HuggingFace Zero GPU Spaces 部署
修復版：解決 ZeroGPU 會話間模型載入問題
"""

import gradio as gr
import spaces
from transformers import pipeline
import torch
import time
import torchaudio

@spaces.GPU(duration=60) 
def transcribe_audio(audio_file):
    """ASR 推論與效能測試 - 每次調用時載入模型"""
    
    if audio_file is None:
        return "❌ 請上傳音訊檔案", "", ""
    
    start_total = time.time()
    
    try:
        # 每次推論時載入模型（ZeroGPU 限制）
        print("🔄 載入 MediaTek Breeze-ASR-25 模型...")
        model_load_start = time.time()
        
        asr_model = pipeline(
            "automatic-speech-recognition",
            model="MediaTek-Research/Breeze-ASR-25",
            torch_dtype=torch.float16,
            device="cuda",
            return_timestamps=True
        )
        
        model_load_time = time.time() - model_load_start
        print(f"✅ 模型載入完成 ({model_load_time:.2f}s)")
        
        # 載入音訊檔案獲取長度
        waveform, sample_rate = torchaudio.load(audio_file)
        audio_duration = waveform.shape[1] / sample_rate
        
        # 執行 ASR 推論
        inference_start = time.time()
        result = asr_model(audio_file)
        inference_time = time.time() - inference_start
        
        # 計算總處理時間
        total_time = time.time() - start_total
        rtf = total_time / audio_duration
        
        # 提取識別結果
        transcript = result["text"] if isinstance(result, dict) else str(result)
        
        # 檢查 GPU 記憶體使用
        gpu_info = ""
        if torch.cuda.is_available():
            gpu_memory = torch.cuda.memory_allocated() / 1024**3
            gpu_info = f"💾 GPU 記憶體: {gpu_memory:.2f}GB"
        
        # 格式化性能指標
        performance = f"""⏱️ 總處理時間: {total_time:.2f}s
🔄 模型載入時間: {model_load_time:.2f}s  
🎯 推論時間: {inference_time:.2f}s
🎵 音訊長度: {audio_duration:.2f}s
📈 RTF: {rtf:.3f} ({'實時' if rtf < 1.0 else '非實時'})
💾 模型: MediaTek Breeze-ASR-25
{gpu_info}"""
        
        return transcript, performance, "✅ 識別成功"
        
    except Exception as e:
        error_msg = f"❌ 處理失敗: {str(e)}"
        print(error_msg)
        return error_msg, "", "❌ 處理失敗"

def get_model_info():
    """獲取模型資訊 (CPU 函數)"""
    return """🤖 MediaTek Breeze-ASR-25 模型資訊:
- 基於 Whisper 架構，專為台灣國語優化
- 支援繁體中文語音識別
- ZeroGPU 動態載入模式
- 每次推論重新載入以確保穩定性"""

# Gradio 界面
with gr.Blocks(title="MediaTek ASR 台灣國語測試") as demo:
    gr.Markdown("# 🎤 MediaTek Breeze-ASR-25 台灣國語識別測試")
    gr.Markdown("**專為台灣國語優化的語音識別測試平台**")
    
    # 模型資訊顯示
    with gr.Accordion("🤖 模型資訊", open=False):
        model_info = gr.Textbox(
            value=get_model_info(),
            label="模型詳細資訊",
            lines=6,
            interactive=False
        )
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("### 🎙️ 音訊輸入")
            audio_input = gr.Audio(
                type="filepath", 
                label="上傳音訊檔案 (wav, mp3, m4a)",
                format="wav"
            )
            
            gr.Markdown("### 📋 測試說明")
            gr.Markdown("""
            - 🎯 上傳 5-60 秒的台灣國語音訊
            - 🔊 建議使用清晰、低噪音的錄音
            - ⚡ 每次識別會重新載入模型 (ZeroGPU 限制)
            - 📊 系統會顯示詳細的性能指標
            """)
            
            submit_btn = gr.Button("🚀 開始識別", variant="primary", size="lg")
            
        with gr.Column():
            gr.Markdown("### 📄 識別結果")
            transcript_output = gr.Textbox(
                label="✨ 識別文字", 
                lines=5,
                placeholder="識別結果將顯示在這裡..."
            )
            
            performance_output = gr.Textbox(
                label="⚡ 性能指標",
                lines=8,
                placeholder="性能數據將顯示在這裡..."
            )
            
            status_output = gr.Textbox(
                label="📊 處理狀態",
                lines=2
            )
    
    # 使用範例
    with gr.Accordion("📖 使用範例與 API", open=False):
        gr.Markdown("""
        ## 🔗 Gradio Client API 使用
        
        ```python
        from gradio_client import Client
        
        client = Client("sheep52031/mediatek-asr-test")
        result = client.predict("audio_file.wav", api_name="/predict")
        
        transcript = result[0]    # 識別文字
        performance = result[1]   # 性能指標  
        status = result[2]        # 處理狀態
        ```
        
        ## 📊 評估指標
        - **RTF < 1.0**: 實時處理能力
        - **準確度**: 台灣國語識別正確率
        - **處理時間**: 總耗時包含模型載入
        """)
    
    # 事件綁定
    submit_btn.click(
        transcribe_audio,
        inputs=[audio_input],
        outputs=[transcript_output, performance_output, status_output]
    )

if __name__ == "__main__":
    demo.launch()