# app.py (为 Hugging Face ZeroGPU 修改)
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import spaces
# --------------------------------------------------------------------------
# 1. 配置 (在应用启动时执行)
# --------------------------------------------------------------------------

# !! 重要的模型 ID (从 HF Hub 加载)
model_id = "AIDC-AI/Marco-MT-Algharb"

# --- ZeroGPU 修改 1: 
# 在启动时 *只* 定义全局变量为 None
# 大模型将在第一个请求到来时被加载
# ---
model = None
tokenizer = None
generation_config = None

print("ZeroGPU 启动脚本开始...")
print(f"准备从 {model_id} 加载 Tokenizer...")

# Tokenizer 很小, 可以在启动时加载
# ★★★ 提醒: 这仍然需要你已在 Space settings 中设置 HF_TOKEN 密钥 ★★★
try:
    tokenizer = AutoTokenizer.from_pretrained(
        model_id, 
        trust_remote_code=True
    )
    print("Tokenizer 加载成功!")

    # --- ZeroGPU 修改 2: 
    # Tokenizer 加载成功后, *立即* 定义 GenerationConfig
    # (这解决了你之前关于 Qwen3 停止 token 的问题)
    # ---
    im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
    eot_id = tokenizer.eos_token_id
    
    print(f"设置停止 IDs: <|im_end|_id={im_end_id}, <|endoftext|_id={eot_id}")
    
    generation_config = GenerationConfig(
        do_sample=False,
        max_new_tokens=512,
        eos_token_id=[im_end_id, eot_id], 
        pad_token_id=eot_id
    )
    print("GenerationConfig 配置成功。")
    
except Exception as e:
    print(f"Tokenizer 加载失败: {e}")
    print("!! 严重错误: 如果这是 Gated Repo 问题, 请确保 HF_TOKEN 密钥已设置并重启 Space。")

# 语言代码到全名的映射 (保持不变)
source_lang_name_map = {
    "en": "english",
    "ja": "japanese",
    "cs": "czech",
    "de": "german",
}

target_lang_name_map = {
    "zh": "chinese",
    "ko": "korean",
    "ja": "japanese",
    "ar": "arabic",
    "cs": "czech",
    "ru": "russian",
    "uk": "ukraine",
    "et": "estonian",
    "bho": "bhojpuri",
    "sr_latin": "serbian",
    "de": "german",
}

# --------------------------------------------------------------------------
# 2. 定义核心翻译函数 (修改版)
# --------------------------------------------------------------------------
@spaces.GPU
def translate(source_text, source_lang_code, target_lang_code):
    """
    接收用户输入并返回翻译结果
    (ZeroGPU: 在首次调用时加载模型)
    """
    global model # ★★★ 关键: 引用全局 'model' 变量
    
    # --- ZeroGPU 修改 3: 首次调用时加载模型 ---
    if model is None:
        if tokenizer is None:
            return "错误：Tokenizer 未能成功加载，无法继续。请检查启动日志。"
        
        print("--- 首次请求 ---")
        print("检测到模型未加载。正在加载模型到 ZeroGPU (Nvidia H200)...")
        try:
            # 这一步会触发 ZeroGPU 分配 H200
            model = AutoModelForCausalLM.from_pretrained(
                model_id,
                torch_dtype="auto",
                device_map="auto",    # 'auto' 将会检测到 H200
                trust_remote_code=True
            )
            model.eval()
            print("模型已成功加载到 GPU!")
        except Exception as e:
            print(f"在首次加载时模型失败: {e}")
            return f"错误：模型在加载到 GPU 时失败: {e}"
    # -----------------------------------------
    
    # (从这里开始, 代码与之前版本相同)
    
    # 简单的输入验证
    if not source_text or not source_text.strip():
        return ""

    source_language_name = source_lang_name_map.get(source_lang_code, "the source language")
    target_language_name = target_lang_name_map.get(target_lang_code, "the target language")

    prompt = (
        f"Human: Please translate the following text into {target_language_name}: \n"
        f"{source_text}<|im_end|>\n"
        f"Assistant:"
    )
    
    try:
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                generation_config=generation_config
            )
        
        input_length = inputs.input_ids.shape[1]
        generated_ids = outputs[0][input_length:]
        generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
        
        return generated_text
    
    except Exception as e:
        print(f"翻译过程中出错: {e}")
        return f"翻译时发生错误: {e}"

# --------------------------------------------------------------------------
# 3. 创建并配置 Gradio 界面 (这部分保持不变)
# --------------------------------------------------------------------------

# <--- 定义自定义 CSS 样式 --->
css = """
/* ... 你的所有 CSS 样式 ... */
.gradio-textbox {
    min-height: 300px !important;
}
"""

# <--- 修复: choices 定义 ---
source_lang_choices = [(name.capitalize(), code) for code, name in source_lang_name_map.items()]
target_lang_choices = [(name.capitalize(), code) for code, name in target_lang_name_map.items()]


# <--- 使用 gr.Blocks 并保持主题 --->
with gr.Blocks(
    theme=gr.themes.Soft(primary_hue="amber", secondary_hue="amber"), 
    css=css,
) as demo:

    gr.HTML(
        """
        <div align="center" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif, 'Microsoft YaHei', sans-serif; padding: 20px 0;">
            <h1 style="font-weight: 700; color: #2C3E50; margin-bottom: 0.25rem; font-size: 2.5rem;">
                Marco-MT-Algharb
            </h1>
            <p style="margin-top: 0; margin-bottom: 1.5rem;">
                <a href="https://www.aidc-ai.com/marcomt" style="font-size: 1.25rem; color: #E67E22; text-decoration: none; font-weight: 500;">
                    Alibaba International Digital Commerce
                </a>
            </p>
    
            <div style="display: flex; justify-content: center; gap: 8px;">
                <a href="https://github.com/AIDC-AI/Marco-MT">
                    <img src="https://img.shields.io/badge/GitHub-Repository-181717?logo=github&style=for-the-badge" alt="GitHub">
                </a>
                <a href="https://huggingface.co/AIDC-AI/Marco-MT-Algharb">
                    <img src="https://img.shields.io/badge/Hugging%20Face-Model-FFC107?logo=huggingface&style=for-the-badge" alt="Hugging Face Model">
                </a>
                <a href="https://www2.statmt.org/wmt25/pdf/2025.wmt-1.33.pdf">
                    <img src="https://img.shields.io/badge/Paper-WMT%202025-C0392B?logo=arxiv&style=for-the-badge" alt="Paper WMT 2025">
                </a>
                <a href="https://huggingface.co/spaces/AIDC-AI/Marco-MT-Algharb">
                    <img src="https://img.shields.io/badge/Demo-HF%20Space-E67E22?logo=huggingface&style=for-the-badge" alt="Demo HF Space">
                </a>
            </div>
        </div>
        """
    )
    # --- 标题 ---
    gr.HTML(f"""
    
    """)
    
    # --- 翻译器主界面 (两栏布局) ---
    with gr.Row(variant="panel", equal_height=True):
        
        # --- 左侧输入卡片 ---
        with gr.Group():
            source_lang_dd = gr.Dropdown(
                choices=source_lang_choices,
                value="en", 
                label="源语言 (Source Language)"
            )
            source_text_tb = gr.Textbox(
                lines=10, 
                label="源文本 (Source Text)", 
                placeholder="Enter text to translate here...",
                elem_classes=["gradio-textbox"]
            )

        # --- 右侧输出卡片 ---
        with gr.Group():
            target_lang_dd = gr.Dropdown(
                choices=target_lang_choices,
                value="zh", 
                label="目标语言 (Target Language)"
            )
            output_text_tb = gr.Textbox(
                lines=10, 
                label="翻译结果 (Translation)", 
                interactive=False,
                elem_classes=["gradio-textbox"]
            )

    # --- 按钮行 ---
    with gr.Row():
        clear_btn = gr.ClearButton(
            value="清除 (Clear)",
            components=[source_text_tb, output_text_tb, source_lang_dd, target_lang_dd]
        )
        submit_btn = gr.Button("翻译 (Submit)", variant="primary", scale=1)

    # --- 示例 ---
    example_list = [
            ["The quick brown fox jumps over the lazy dog.", "en", "zh"],
            ["The sunset painted the sky with brilliant shades of orange and purple.", "en", "ko"],
            ["The ancient ruins stand as a silent testament to the rise and fall of a great civilization.", "en", "ja"],
        ]
    gr.Examples(
        examples=example_list,
        inputs=[source_text_tb, source_lang_dd, target_lang_dd]
    )

    # --- 支持的语向卡片 ---
    gr.HTML(f"""
    <div style="color: #444; font-size: 16px; margin-top: 30px; padding: 20px 25px; background-color: #FFFFFF; border-radius: 15px; max-width: 900px; margin-left: auto; margin-right: auto; box-shadow: 0 4px 20px rgba(0,0,0,0.05);">
        
        <h3 style="text-align: center; margin-top: 5px; margin-bottom: 20px; color: #444444; font-weight: 600;">Supported Language Pairs</h3>
        
        <div style="display: flex; justify-content: space-around; text-align: left; line-height: 1.8;">
            
            <div>
                <strong>From English (en):</strong>
                <ul style="list-style-type: '» '; margin: 5px 0 0 20px; padding: 0;">
                    <li>en2zh</li>
                    <li>en2ja</li>
                    <li>en2ko</li>
                    <li>en2ar</li>
                    <li>en2et</li>
                    <li>en2sr_latin</li>
                    <li>en2ru</li>
                    <li>en2uk</li>
                    <li>en2cs</li>
                    <li>en2bho</li>
                </ul>
            </div>
            
            <div style="margin-left: 20px;">
                <strong>From Czech (cs):</strong>
                <ul style="list-style-type: '» '; margin: 5px 0 15px 20px; padding: 0;">
                    <li>cs2uk</li>
                    <li>cs2de</li>
                </ul>
                
                <strong>From Japanese (ja):</strong>
                <ul style="list-style-type: '» '; margin: 5px 0 0 20px; padding: 0;">
                    <li>ja2zh</li>
                </ul>
            </div>
        </div>
    </div>
    """)

    # --- 设定按钮的点击逻辑 ---
    submit_btn.click(
        fn=translate,
        inputs=[source_text_tb, source_lang_dd, target_lang_dd],
        outputs=[output_text_tb],
        api_name="translate"
    )

# 启动应用
if __name__ == "__main__":
    demo.launch()