import base64
import html
import mimetypes
import os
from pathlib import Path
from typing import Any, Dict, List

import gradio as gr
from openai import OpenAI

DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "ERNIE-4.5-VL-28B-A3B-Thinking")
BASE_URL = os.getenv("BASE_URL","")
api_key = os.getenv("ERNIE_API_KEY","")


CUSTOM_CSS = """
body {
    background: radial-gradient(circle at top, #fdfbff 0%, #e7ecf7 45%, #dfe6f5 100%);
    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Arial, sans-serif;
    color: #0f172a;
}
.gradio-container {
    max-width: 1200px !important;
    margin: 0 auto;
}
#ernie-hero {
    padding: 12px 0 4px;
}
#ernie-hero h1 {
    font-size: 1.85rem;
    margin-bottom: 0;
    font-weight: 500;
}
#model-link {
    margin-top: 6px;
    font-size: 0.95rem;
}
#model-link a {
    color: #4c1d95;
    text-decoration: none;
    font-weight: 500;
}
#model-link a:hover {
    text-decoration: underline;
}
#examples-panel {
    margin-top: 20px;
    padding: 18px 22px;
    border-radius: 18px;
    border: 1px solid rgba(15, 23, 42, 0.12);
    background: rgba(255, 255, 255, 0.92);
    box-shadow: 0 15px 35px rgba(15, 23, 42, 0.08);
    gap: 18px;
}
#examples-panel h4 {
    margin: 0 0 8px;
    font-size: 1.1rem;
    font-weight: 500;
}
#examples-panel p {
    margin: 0;
    color: rgba(15, 23, 42, 0.7);
    font-size: 0.95rem;
}
#examples-grid table {
    width: 100%;
}
#examples-grid table tbody {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
    gap: 12px;
}
#examples-grid table tr {
    display: block;
    background: #f7f9ff;
    border-radius: 14px;
    border: 1px solid rgba(15, 23, 42, 0.08);
    padding: 14px;
    box-shadow: 0 10px 28px rgba(15, 23, 42, 0.08);
}
#examples-grid table td {
    display: block;
    padding: 0;
}
#chat-wrapper {
    margin-top: 32px;
    border-radius: 24px;
    padding: 18px;
    background: rgba(255, 255, 255, 0.95);
    border: 1px solid rgba(15, 23, 42, 0.1);
    box-shadow: 0 25px 60px rgba(15, 23, 42, 0.12);
}
.ernie-section {
    border-radius: 18px;
    margin-bottom: 14px;
    padding: 16px 18px;
    border: 1px solid rgba(15, 23, 42, 0.1);
    background: rgba(255, 255, 255, 0.95);
    box-shadow: 0 10px 24px rgba(15, 23, 42, 0.08);
}
.ernie-section-header {
    font-size: 0.85rem;
    text-transform: uppercase;
    letter-spacing: 0.08em;
    font-weight: 600;
    color: rgba(15, 23, 42, 0.65);
    display: flex;
    align-items: center;
    gap: 6px;
}
.ernie-section-body {
    margin-top: 10px;
    font-size: 1rem;
    color: rgba(15, 23, 42, 0.92);
    white-space: pre-wrap;
    line-height: 1.55;
}
.ernie-thinking {
    border-color: rgba(79, 70, 229, 0.35);
    background: rgba(129, 140, 248, 0.08);
}
.ernie-answer {
    border-color: rgba(16, 185, 129, 0.35);
    background: rgba(110, 231, 183, 0.08);
}

@media (prefers-color-scheme: dark) {
    body {
        background: radial-gradient(circle at top, #1f264b 0%, #0f172a 45%, #040713 100%);
        color: #ecf2ff;
    }
    #model-link a {
        color: #a5b4fc;
    }
    #examples-panel {
        border: 1px solid rgba(255, 255, 255, 0.05);
        background: rgba(8, 13, 30, 0.85);
        box-shadow: 0 15px 45px rgba(3, 7, 18, 0.55);
    }
    #examples-panel p {
        color: rgba(236, 242, 255, 0.75);
    }
    #examples-grid table tr {
        background: rgba(15, 23, 42, 0.7);
        border: 1px solid rgba(255, 255, 255, 0.04);
        box-shadow: 0 10px 30px rgba(4, 6, 15, 0.45);
    }
    #chat-wrapper {
        background: rgba(2, 6, 23, 0.78);
        border: 1px solid rgba(99, 102, 241, 0.25);
        box-shadow: 0 25px 70px rgba(2, 6, 23, 0.7);
    }
    .ernie-section {
        border: 1px solid rgba(255, 255, 255, 0.08);
        background: rgba(15, 23, 42, 0.85);
        box-shadow: 0 10px 30px rgba(2, 6, 23, 0.55);
    }
    .ernie-section-header {
        color: rgba(236, 242, 255, 0.75);
    }
    .ernie-section-body {
        color: rgba(248, 250, 255, 0.95);
    }
    .ernie-answer {
        border-color: rgba(45, 212, 191, 0.45);
        background: rgba(8, 47, 56, 0.65);
    }
    .ernie-thinking {
        border-color: rgba(165, 180, 252, 0.4);
        background: rgba(30, 27, 75, 0.65);
    }
}
"""

_client = OpenAI(
    base_url=BASE_URL,
    api_key=api_key,
)

def _data_url(path: str) -> str:
    mime, _ = mimetypes.guess_type(path)
    mime = mime or "application/octet-stream"
    data = base64.b64encode(Path(path).read_bytes()).decode("utf-8")
    return f"data:{mime};base64,{data}"

def _media_content(path: str) -> Dict[str, Any]:
    """支持图片和视频"""
    mime, _ = mimetypes.guess_type(path)
    if mime and mime.startswith("video"):
        # 视频格式
        return {"type": "video_url", "video_url": {"url": _data_url(path)}}
    else:
        # 图片格式（默认）
        return {"type": "image_url", "image_url": {"url": _data_url(path)}}

def _text_content(text: str) -> Dict[str, Any]:
    return {"type": "text", "text": text}

def _message(role: str, content: Any) -> Dict[str, Any]:
    return {"role": role, "content": content}

def _format_sections(thinking: str, answer: str | None = None) -> str:
    """Render Thinking/Answer blocks with HTML so the chatbot can style them."""
    def _build_block(kind: str, label: str, text: str, icon: str) -> str:
        text = (text or "").strip()
        if not text:
            return ""
        escaped = html.escape(text)
        return (
            f'<div class="ernie-section ernie-{kind}">'
            f'<div class="ernie-section-header">{icon} {label}</div>'
            f'<div class="ernie-section-body">{escaped}</div>'
            "</div>"
        )

    sections = [
        _build_block("thinking", "Thinking", thinking, "🧠"),
        _build_block("answer", "Answer", answer, "✨") if answer is not None else "",
    ]
    rendered = "".join(section for section in sections if section)
    return rendered

def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
    files = message.get("files") or []
    text = (message.get("text") or "").strip()
    content: List[Dict[str, Any]] = [_media_content(p) for p in files]
    if text:
        content.append(_text_content(text))
    return _message("user", content)

def _convert_history(history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    msgs: List[Dict[str, Any]] = []
    user_content: List[Dict[str, Any]] = []
    
    for turn in history or []:
        role, content = turn.get("role"), turn.get("content")
        if role == "user":
            if isinstance(content, str):
                user_content.append(_text_content(content))
            elif isinstance(content, tuple):
                user_content.extend(_media_content(path) for path in content if path)
        elif role == "assistant":
            if "Answer:\n" in content:
                # 分割并仅保留Answer部分
                answer_only = content.split("Answer:\n", 1)[1].strip()
            else:
                # 兼容没有Thinking的情况
                answer_only = content.strip()

            if user_content:
                msgs.append(_message("user", user_content.copy()))
                user_content.clear()

            msgs.append(_message("assistant", [{"type": "text", "text": answer_only}]))
            
    return msgs


def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], model_name: str = DEFAULT_MODEL):
    messages = _convert_history(history)
    messages.append(_build_user_message(message))
  
    try:
        stream = _client.chat.completions.create(
            model="default",
            messages=messages,
            stream=True
        )
        thinking_parts: List[str] = []
        answer_parts: List[str] = []
        answer_started = False

        for chunk in stream:
            delta = chunk.choices[0].delta

            if getattr(delta, "reasoning_content", None):
                thinking_parts.append(delta.reasoning_content)

            if getattr(delta, "content", None):
                answer_started = True
                answer_parts.append(delta.content)

            thinking_text = "".join(thinking_parts)
            answer_text = "".join(answer_parts) if answer_parts else None

            if answer_started:
                rendered = _format_sections(thinking_text, answer_text)
            else:
                rendered = _format_sections(thinking_text)

            if rendered:
                yield rendered

        if not answer_started and thinking_parts:
            # 流结束但模型未返回Answer时，至少保证Thinking被展示完全
            rendered = _format_sections("".join(thinking_parts))
            if rendered:
                yield rendered
    except Exception as e:
        yield f"Failed to get response: {e}"

def run_example(message: Dict[str, Any], history: List[Dict[str, Any]] | None = None):
    """
    用于 Examples 点击时直接走大模型。
    - 输入还是 ChatInterface 那种 message dict：{"text": ..., "files": [...]}
    - history 是 Chatbot 当前的消息列表（type="messages"）
    - 输出改成 Chatbot 需要的消息列表：[{role, content}, ...]
    """
    history = history or []

    # 直接复用你现有的流式函数，只是把它返回的 HTML 包一层 messages
    for rendered in stream_response(message, history):
        # 这里只简单把 user 文本展示出来；图片就当“上下文里有了”，不专门渲染
        user_text = (message.get("text") or "").strip() or "[Example]"
        display_history = history + [
            {"role": "user", "content": user_text},
            {"role": "assistant", "content": rendered},
        ]
        # 关键：对 Chatbot 来说，返回值要是「完整的消息列表」
        yield display_history

def build_demo() -> gr.Blocks:
    theme = gr.themes.Soft(primary_hue="violet", secondary_hue="cyan", neutral_hue="slate")

    with gr.Blocks(
        title="ERNIE-4.5-VL-28B-A3B-Thinking",
        theme=theme,
        css=CUSTOM_CSS,
    ) as demo:
        with gr.Column(elem_id="ernie-hero"):
            gr.Markdown(
                """
                <h1>Chat with ERNIE-4.5-VL-28B-A3B-Thinking</h1>
                """,
                elem_id="hero-text",
            )
            gr.Markdown(
                """
                <p id="model-link">
                    Model Repository:
                    <a href="https://huggingface.co/baidu/ERNIE-4.5-VL-28B-A3B-Thinking" target="_blank" rel="noopener">
                        ERNIE-4.5-VL-28B-A3B-Thinking
                    </a>
                </p>
                """
            )

        textbox = gr.MultimodalTextbox(
            show_label=False,
            placeholder="Enter text, or upload one or more images...",
            file_types=["image","video"],
            file_count="multiple"
        )
        chatbot = gr.Chatbot(
            type="messages",
            allow_tags=["think"],
            height=560,
            render_markdown=True,
            show_copy_button=True,
        )
        
        examples = [
            {
                "text": "这道题怎么解",
                "files": ["examples/case1.png"] 
            },
            {
                "text": "How many real people are actually in the picture?",
                "files": ["examples/case2.png"]
            },
        ]

        with gr.Column(elem_id="examples-panel"): 
            gr.Examples(
                examples=examples,
                inputs=textbox,
                label=None,
                examples_per_page=4,
                elem_id="examples-grid",
                fn=run_example,      # 点击示例时，直接走大模型
                outputs=chatbot,     
                run_on_click=True,   
            )

        with gr.Column(elem_id="chat-wrapper"):
            chat_interface = gr.ChatInterface(
                fn=stream_response,
                type="messages",
                multimodal=True,
                chatbot=chatbot,
                textbox=textbox,
            )


    return demo.queue(default_concurrency_limit=8)


if __name__ == "__main__":
    build_demo().launch()