LokeZhou commited on
Commit
0bec3bf
·
1 Parent(s): dc2f4d5

add examples and suppport video

Browse files
Files changed (3) hide show
  1. app.py +50 -19
  2. examples/case1.png +3 -0
  3. examples/case2.png +3 -0
app.py CHANGED
@@ -21,8 +21,15 @@ def _data_url(path: str) -> str:
21
  data = base64.b64encode(Path(path).read_bytes()).decode("utf-8")
22
  return f"data:{mime};base64,{data}"
23
 
24
- def _image_content(path: str) -> Dict[str, Any]:
25
- return {"type": "image_url", "image_url": {"url": _data_url(path)}}
 
 
 
 
 
 
 
26
 
27
  def _text_content(text: str) -> Dict[str, Any]:
28
  return {"type": "text", "text": text}
@@ -33,7 +40,7 @@ def _message(role: str, content: Any) -> Dict[str, Any]:
33
  def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
34
  files = message.get("files") or []
35
  text = (message.get("text") or "").strip()
36
- content: List[Dict[str, Any]] = [_image_content(p) for p in files]
37
  if text:
38
  content.append(_text_content(text))
39
  return _message("user", content)
@@ -48,7 +55,7 @@ def _convert_history(history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
48
  if isinstance(content, str):
49
  user_content.append(_text_content(content))
50
  elif isinstance(content, tuple):
51
- user_content.extend(_image_content(path) for path in content if path)
52
  elif role == "assistant":
53
  if "Answer:\n" in content:
54
  # 分割并仅保留Answer部分
@@ -108,22 +115,46 @@ def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], mode
108
  except Exception as e:
109
  yield f"Failed to get response: {e}"
110
 
 
111
  def build_demo() -> gr.Blocks:
112
- chatbot = gr.Chatbot(type="messages", allow_tags=["think"],height=600)
113
- textbox = gr.MultimodalTextbox(
114
- show_label=False,
115
- placeholder="Enter text, or upload one or more images...",
116
- file_types=["image"],
117
- file_count="multiple"
118
- )
119
- return gr.ChatInterface(
120
- fn=stream_response,
121
- type="messages",
122
- multimodal=True,
123
- chatbot=chatbot,
124
- textbox=textbox,
125
- title="ERNIE-4.5-VL-28B-A3B-Thinking",
126
- ).queue(default_concurrency_limit=8)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
 
129
 
 
21
  data = base64.b64encode(Path(path).read_bytes()).decode("utf-8")
22
  return f"data:{mime};base64,{data}"
23
 
24
+ def _media_content(path: str) -> Dict[str, Any]:
25
+ """支持图片和视频"""
26
+ mime, _ = mimetypes.guess_type(path)
27
+ if mime and mime.startswith("video"):
28
+ # 视频格式
29
+ return {"type": "video_url", "video_url": {"url": _data_url(path)}}
30
+ else:
31
+ # 图片格式(默认)
32
+ return {"type": "image_url", "image_url": {"url": _data_url(path)}}
33
 
34
  def _text_content(text: str) -> Dict[str, Any]:
35
  return {"type": "text", "text": text}
 
40
  def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
41
  files = message.get("files") or []
42
  text = (message.get("text") or "").strip()
43
+ content: List[Dict[str, Any]] = [_media_content(p) for p in files]
44
  if text:
45
  content.append(_text_content(text))
46
  return _message("user", content)
 
55
  if isinstance(content, str):
56
  user_content.append(_text_content(content))
57
  elif isinstance(content, tuple):
58
+ user_content.extend(_media_content(path) for path in content if path)
59
  elif role == "assistant":
60
  if "Answer:\n" in content:
61
  # 分割并仅保留Answer部分
 
115
  except Exception as e:
116
  yield f"Failed to get response: {e}"
117
 
118
+
119
  def build_demo() -> gr.Blocks:
120
+ with gr.Blocks(title="ERNIE-4.5-VL-28B-A3B-Thinking") as demo:
121
+ chatbot = gr.Chatbot(type="messages", allow_tags=["think"],height=600)
122
+ textbox = gr.MultimodalTextbox(
123
+ show_label=False,
124
+ placeholder="Enter text, or upload one or more images...",
125
+ file_types=["image","video"],
126
+ file_count="multiple"
127
+ )
128
+
129
+ examples = [
130
+ {
131
+ "text": "这道题怎么解",
132
+ "files": ["examples/case1.png"]
133
+ },
134
+ {
135
+ "text": "图中实际上有几个真人",
136
+ "files": ["examples/case2.png"]
137
+ },
138
+ ]
139
+
140
+ chat_interface = gr.ChatInterface(
141
+ fn=stream_response,
142
+ type="messages",
143
+ multimodal=True,
144
+ chatbot=chatbot,
145
+ textbox=textbox,
146
+ )
147
+
148
+ with gr.Row():
149
+ gr.Examples(
150
+ examples=examples,
151
+ inputs=textbox,
152
+ label="示例 Examples",
153
+ examples_per_page=2,
154
+ )
155
+
156
+
157
+ return demo.queue(default_concurrency_limit=8)
158
 
159
 
160
 
examples/case1.png ADDED

Git LFS Details

  • SHA256: f5c3bb6f9fa82a8bc11ff986741c1cc9fda2ec18f89d714fe4e3e9139becf53a
  • Pointer size: 131 Bytes
  • Size of remote file: 445 kB
examples/case2.png ADDED

Git LFS Details

  • SHA256: b07559227c68fa2ec088c8f7d641ec1d039913a2c02b55a1106dbfd4d0ef2090
  • Pointer size: 130 Bytes
  • Size of remote file: 14.6 kB