Spaces:

baidu
/

ERNIE-4.5-VL-28B-A3B-Thinking

Running

App Files Files Community

LokeZhou commited on Nov 12

Commit

0bec3bf

1 Parent(s): dc2f4d5

add examples and suppport video

Browse files

Files changed (3) hide show

app.py +50 -19
examples/case1.png +3 -0
examples/case2.png +3 -0

app.py CHANGED Viewed

@@ -21,8 +21,15 @@ def _data_url(path: str) -> str:
     data = base64.b64encode(Path(path).read_bytes()).decode("utf-8")
     return f"data:{mime};base64,{data}"
-def _image_content(path: str) -> Dict[str, Any]:
-    return {"type": "image_url", "image_url": {"url": _data_url(path)}}
 def _text_content(text: str) -> Dict[str, Any]:
     return {"type": "text", "text": text}
@@ -33,7 +40,7 @@ def _message(role: str, content: Any) -> Dict[str, Any]:
 def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
     files = message.get("files") or []
     text = (message.get("text") or "").strip()
-    content: List[Dict[str, Any]] = [_image_content(p) for p in files]
     if text:
         content.append(_text_content(text))
     return _message("user", content)
@@ -48,7 +55,7 @@ def _convert_history(history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
             if isinstance(content, str):
                 user_content.append(_text_content(content))
             elif isinstance(content, tuple):
-                user_content.extend(_image_content(path) for path in content if path)
         elif role == "assistant":
             if "Answer:\n" in content:
                 # 分割并仅保留Answer部分
@@ -108,22 +115,46 @@ def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], mode
     except Exception as e:
         yield f"Failed to get response: {e}"
 def build_demo() -> gr.Blocks:
-    chatbot = gr.Chatbot(type="messages", allow_tags=["think"],height=600)
-    textbox = gr.MultimodalTextbox(
-        show_label=False,
-        placeholder="Enter text, or upload one or more images...",
-        file_types=["image"],
-        file_count="multiple"
-    )
-    return gr.ChatInterface(
-        fn=stream_response,
-        type="messages",
-        multimodal=True,
-        chatbot=chatbot,
-        textbox=textbox,
-        title="ERNIE-4.5-VL-28B-A3B-Thinking",
-    ).queue(default_concurrency_limit=8)

     data = base64.b64encode(Path(path).read_bytes()).decode("utf-8")
     return f"data:{mime};base64,{data}"
+def _media_content(path: str) -> Dict[str, Any]:
+    """支持图片和视频"""
+    mime, _ = mimetypes.guess_type(path)
+    if mime and mime.startswith("video"):
+        # 视频格式
+        return {"type": "video_url", "video_url": {"url": _data_url(path)}}
+    else:
+        # 图片格式（默认）
+        return {"type": "image_url", "image_url": {"url": _data_url(path)}}
 def _text_content(text: str) -> Dict[str, Any]:
     return {"type": "text", "text": text}
 def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
     files = message.get("files") or []
     text = (message.get("text") or "").strip()
+    content: List[Dict[str, Any]] = [_media_content(p) for p in files]
     if text:
         content.append(_text_content(text))
     return _message("user", content)
             if isinstance(content, str):
                 user_content.append(_text_content(content))
             elif isinstance(content, tuple):
+                user_content.extend(_media_content(path) for path in content if path)
         elif role == "assistant":
             if "Answer:\n" in content:
                 # 分割并仅保留Answer部分
     except Exception as e:
         yield f"Failed to get response: {e}"
 def build_demo() -> gr.Blocks:
+    with gr.Blocks(title="ERNIE-4.5-VL-28B-A3B-Thinking") as demo:
+        chatbot = gr.Chatbot(type="messages", allow_tags=["think"],height=600)
+        textbox = gr.MultimodalTextbox(
+            show_label=False,
+            placeholder="Enter text, or upload one or more images...",
+            file_types=["image","video"],
+            file_count="multiple"
+        )
+        examples = [
+            {
+                "text": "这道题怎么解",
+                "files": ["examples/case1.png"]
+            },
+            {
+                "text": "图中实际上有几个真人",
+                "files": ["examples/case2.png"]
+            },
+        ]
+        chat_interface = gr.ChatInterface(
+            fn=stream_response,
+            type="messages",
+            multimodal=True,
+            chatbot=chatbot,
+            textbox=textbox,
+        )
+        with gr.Row():
+            gr.Examples(
+                examples=examples,
+                inputs=textbox,
+                label="示例 Examples",
+                examples_per_page=2,
+            )
+    return demo.queue(default_concurrency_limit=8)

examples/case1.png ADDED Viewed

Git LFS Details

SHA256: f5c3bb6f9fa82a8bc11ff986741c1cc9fda2ec18f89d714fe4e3e9139becf53a
Pointer size: 131 Bytes
Size of remote file: 445 kB

examples/case2.png ADDED Viewed

Git LFS Details

SHA256: b07559227c68fa2ec088c8f7d641ec1d039913a2c02b55a1106dbfd4d0ef2090
Pointer size: 130 Bytes
Size of remote file: 14.6 kB