TerminalCalm commited on
Commit
7370b88
·
verified ·
1 Parent(s): 0c5c48f

Initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environments
2
+ .venv/
3
+ .env
4
+
5
+ # Python cache
6
+ __pycache__/
7
+ *.pyc
8
+
9
+ # Gradio cache
10
+ .gradio_cache/
11
+
12
+ # Uploaded files
13
+ /tmp/
14
+
15
+ # VSCode
16
+ .vscode/
17
+
18
+ save.json
.gradio/flagged/dataset1.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ word,letter,output,timestamp
2
+ TEstasdsdasd,,13,2025-06-02 16:48:30.771994
app.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import shutil
3
+ import os
4
+ import subprocess
5
+ from src.mcp.tools import letter_counter
6
+ from src.mcp import video_tools
7
+ from src.llm import llm
8
+
9
+ def is_ffmpeg_installed():
10
+ """Checks if FFmpeg is installed and accessible in the system's PATH."""
11
+ return shutil.which("ffmpeg") is not None
12
+
13
+ def update_ffmpeg_status():
14
+ """Creates a colored status message indicating if FFmpeg is installed."""
15
+ if is_ffmpeg_installed():
16
+ return "<p style='color:green; font-weight:bold;'>ffmpeg is installed</p>", True
17
+ return "<p style='color:red; font-weight:bold;'>ffmpeg is not installed</p>", False
18
+
19
+ def handle_video_upload(file_obj, ffmpeg_installed):
20
+ """
21
+ Validates an uploaded MP4 file, saves it to a temporary directory,
22
+ and controls the visibility of UI components based on validation success.
23
+ """
24
+ if not ffmpeg_installed:
25
+ return None, "Cannot process video: FFmpeg is not installed.", None, gr.update(visible=False)
26
+ if not file_obj:
27
+ return None, "Please upload a file.", None, gr.update(visible=False)
28
+
29
+ tmp_dir = "tmp"
30
+ if not os.path.exists(tmp_dir):
31
+ os.makedirs(tmp_dir)
32
+
33
+ # Basic check for file extension
34
+ if not file_obj.name.lower().endswith('.mp4'):
35
+ return None, "File must be an .mp4 file.", None, gr.update(visible=False)
36
+
37
+ # More robust check using ffprobe
38
+ try:
39
+ subprocess.run(
40
+ ["ffprobe", "-v", "error", "-show_format", "-show_streams", file_obj.name],
41
+ capture_output=True, text=True, check=True
42
+ )
43
+ except (subprocess.CalledProcessError, FileNotFoundError):
44
+ return None, "Could not validate file. Ensure it's a valid MP4.", None, gr.update(visible=False)
45
+
46
+ file_path = os.path.join(tmp_dir, os.path.basename(file_obj.name))
47
+ shutil.copy(file_obj.name, file_path)
48
+
49
+ return file_path, "File uploaded successfully!", file_path, gr.update(visible=True)
50
+
51
+ def clear_previous_outputs():
52
+ """Clears all video-related outputs to ensure a clean state for new uploads."""
53
+ return (
54
+ None, # video_output
55
+ "", # upload_status_text
56
+ None, # uploaded_video_path_state
57
+ gr.update(visible=False), # video_tools_group
58
+ None, # first_frame_img
59
+ None, # last_frame_img
60
+ None, # llm_first_frame_img
61
+ None, # llm_last_frame_img
62
+ None, # llm_gif_output
63
+ "", # llm_result_text
64
+ "", # raw_prompt_text
65
+ "" # raw_response_text
66
+ )
67
+
68
+ def on_load_tasks():
69
+ """Runs on app startup to load settings and check FFmpeg status."""
70
+ url, model, _ = llm.load_settings()
71
+ ffmpeg_status_text, ffmpeg_installed = update_ffmpeg_status()
72
+ # The return order must match the outputs in demo.load
73
+ return url, model, ffmpeg_status_text, ffmpeg_installed, url # Pass url to both state and input box
74
+
75
+ # --- Gradio UI ---
76
+ with gr.Blocks() as demo:
77
+ # --- State Variables ---
78
+ ffmpeg_installed_state = gr.State(False)
79
+ preferred_llm_state = gr.State("")
80
+ ollama_url_state = gr.State("")
81
+ uploaded_video_path_state = gr.State("")
82
+
83
+ with gr.Tabs():
84
+ # --- Setup & Video Upload Tab ---
85
+ with gr.Tab("Setup & Video Upload"):
86
+ gr.Markdown("## System Status")
87
+ with gr.Row():
88
+ check_ffmpeg_btn = gr.Button("Check FFmpeg Status")
89
+ status_text = gr.Markdown("Status will be checked on load.")
90
+
91
+ gr.Markdown("---")
92
+ gr.Markdown("## Video Tools")
93
+ with gr.Row():
94
+ with gr.Column(scale=1):
95
+ gr.Markdown("### Upload Video")
96
+ file_input = gr.File(label="Upload MP4", file_types=[".mp4"])
97
+ video_output = gr.Video(label="Preview", interactive=False)
98
+ upload_status_text = gr.Textbox(label="Upload Status", interactive=False)
99
+
100
+ with gr.Column(scale=2, visible=False) as video_tools_group:
101
+ gr.Markdown("### Manual Frame Extraction")
102
+ with gr.Row():
103
+ get_first_frame_btn = gr.Button("Get First Frame")
104
+ get_last_frame_btn = gr.Button("Get Last Frame")
105
+ with gr.Row():
106
+ first_frame_img = gr.Image(label="First Frame", type="filepath", interactive=False)
107
+ last_frame_img = gr.Image(label="Last Frame", type="filepath", interactive=False)
108
+
109
+ # --- LLM Configuration Tab ---
110
+ with gr.Tab("LLM Configuration"):
111
+ gr.Markdown("# Ollama Endpoint Configuration")
112
+ with gr.Row():
113
+ llm_endpoint_input = gr.Textbox(
114
+ label="Ollama Endpoint URL",
115
+ placeholder="e.g., http://localhost:11434"
116
+ )
117
+ check_llm_button = gr.Button("Check Endpoint")
118
+ llm_status_text = gr.Textbox(label="Endpoint Status", interactive=False)
119
+
120
+ with gr.Column(visible=False) as llm_selection_group:
121
+ with gr.Row():
122
+ llm_model_select = gr.Dropdown(label="Available Models")
123
+ set_llm_button = gr.Button("Set as Preferred")
124
+ preferred_llm_display = gr.Textbox(label="Current Preferred Model", interactive=False)
125
+
126
+ # --- MCP Integration Tab ---
127
+ with gr.Tab("MCP Integration"):
128
+ gr.Markdown("## Test MCP Tool Calls with a Local LLM")
129
+ with gr.Row():
130
+ with gr.Column(scale=1):
131
+ gr.Markdown("### Video Command Center")
132
+ video_prompt_input = gr.Textbox(label="Video Command Prompt", placeholder="e.g., 'Get me the first frame of the video'")
133
+ run_video_prompt_btn = gr.Button("Run Command")
134
+
135
+ llm_result_text = gr.Textbox(label="LLM Result", interactive=False)
136
+
137
+ with gr.Accordion("LLM Debug Info", open=False):
138
+ raw_prompt_text = gr.Textbox(label="Raw Prompt to LLM", interactive=False, lines=15)
139
+ raw_response_text = gr.Textbox(label="Raw Response from LLM", interactive=False, lines=15)
140
+
141
+ with gr.Column(scale=1):
142
+ gr.Markdown("### Frame Results (from LLM)")
143
+ with gr.Row():
144
+ llm_first_frame_img = gr.Image(label="First Frame", type="filepath", interactive=False)
145
+ llm_last_frame_img = gr.Image(label="Last Frame", type="filepath", interactive=False)
146
+ llm_gif_output = gr.Image(label="Generated GIF", type="filepath", interactive=False)
147
+
148
+ # --- Event Handlers ---
149
+
150
+ # --- System Events ---
151
+ check_ffmpeg_btn.click(
152
+ fn=update_ffmpeg_status,
153
+ outputs=[status_text, ffmpeg_installed_state]
154
+ )
155
+
156
+ file_input.upload(
157
+ fn=clear_previous_outputs,
158
+ outputs=[
159
+ video_output,
160
+ upload_status_text,
161
+ uploaded_video_path_state,
162
+ video_tools_group,
163
+ first_frame_img,
164
+ last_frame_img,
165
+ llm_first_frame_img,
166
+ llm_last_frame_img,
167
+ llm_gif_output,
168
+ llm_result_text,
169
+ raw_prompt_text,
170
+ raw_response_text
171
+ ]
172
+ ).then(
173
+ fn=handle_video_upload,
174
+ inputs=[file_input, ffmpeg_installed_state],
175
+ outputs=[video_output, upload_status_text, uploaded_video_path_state, video_tools_group]
176
+ )
177
+
178
+ get_first_frame_btn.click(
179
+ fn=video_tools.getFirstFrame,
180
+ inputs=uploaded_video_path_state,
181
+ outputs=first_frame_img
182
+ )
183
+
184
+ get_last_frame_btn.click(
185
+ fn=video_tools.getLastFrame,
186
+ inputs=uploaded_video_path_state,
187
+ outputs=last_frame_img
188
+ )
189
+
190
+ # --- LLM Events ---
191
+ check_llm_button.click(
192
+ fn=llm.check_ollama_endpoint,
193
+ inputs=[llm_endpoint_input, preferred_llm_state],
194
+ outputs=[llm_status_text, llm_model_select, llm_selection_group, ollama_url_state]
195
+ )
196
+
197
+ llm_model_select.change(
198
+ fn=llm.set_preferred_model,
199
+ inputs=[llm_model_select, ollama_url_state],
200
+ outputs=[preferred_llm_state, preferred_llm_display]
201
+ )
202
+
203
+ run_video_prompt_btn.click(
204
+ fn=llm.process_video_prompt,
205
+ inputs=[
206
+ video_prompt_input,
207
+ uploaded_video_path_state,
208
+ ollama_url_state,
209
+ preferred_llm_state,
210
+ llm_first_frame_img, # Pass the component itself
211
+ llm_last_frame_img, # Pass the component itself
212
+ llm_gif_output
213
+ ],
214
+ outputs=[
215
+ llm_first_frame_img,
216
+ llm_last_frame_img,
217
+ llm_gif_output,
218
+ llm_result_text,
219
+ raw_prompt_text,
220
+ raw_response_text
221
+ ]
222
+ )
223
+
224
+ # --- App Load Event ---
225
+ demo.load(
226
+ fn=on_load_tasks,
227
+ outputs=[
228
+ ollama_url_state,
229
+ preferred_llm_state,
230
+ status_text,
231
+ ffmpeg_installed_state,
232
+ llm_endpoint_input
233
+ ]
234
+ ).then(
235
+ fn=llm.check_on_load,
236
+ inputs=[ollama_url_state, preferred_llm_state],
237
+ outputs=[llm_status_text, llm_model_select, llm_selection_group, ollama_url_state, preferred_llm_display]
238
+ )
239
+
240
+ # --- MCP Server Launch ---
241
+ if __name__ == "__main__":
242
+ # Expose the functions we want to be available as tools
243
+ demo.add_tools(
244
+ {
245
+ "letter_counter": letter_counter,
246
+ "getFirstFrame": video_tools.getFirstFrame,
247
+ "getLastFrame": video_tools.getLastFrame,
248
+ "convert_mp4_to_gif": video_tools.convert_mp4_to_gif,
249
+ }
250
+ )
251
+ # Launch the Gradio app with MCP server enabled
252
+ demo.launch(mcp_server=True)
253
+
254
+
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ requests
src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file makes the 'src' directory a Python package.
src/llm/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file makes the 'llm' directory a Python package.
src/llm/llm.py ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import requests
4
+ import gradio as gr
5
+ from ..mcp import video_tools
6
+
7
+ SAVE_FILE = "save.json"
8
+
9
+ def save_settings(url=None, model_list=None, preferred_model=None):
10
+ """Saves endpoint-specific settings to the save file."""
11
+ settings = {}
12
+ if os.path.exists(SAVE_FILE):
13
+ try:
14
+ with open(SAVE_FILE, 'r') as f:
15
+ settings = json.load(f)
16
+ except (json.JSONDecodeError, IOError):
17
+ settings = {} # Start with fresh settings if file is corrupt
18
+
19
+ # Ensure the settings object has the correct structure
20
+ if 'endpoints' not in settings or not isinstance(settings['endpoints'], list):
21
+ settings['endpoints'] = []
22
+ if 'last_active_url' not in settings:
23
+ settings['last_active_url'] = ""
24
+
25
+ if url:
26
+ # Normalize the URL before saving or looking up
27
+ if not (url.startswith("http://") or url.startswith("https://")):
28
+ url = "http://" + url
29
+
30
+ settings['last_active_url'] = url
31
+ endpoint_data = next((e for e in settings['endpoints'] if e['url'] == url), None)
32
+ if not endpoint_data:
33
+ endpoint_data = {'url': url, 'models': [], 'preferred_model': ''}
34
+ settings['endpoints'].append(endpoint_data)
35
+
36
+ if model_list is not None:
37
+ endpoint_data['models'] = model_list
38
+ if preferred_model is not None:
39
+ endpoint_data['preferred_model'] = preferred_model
40
+
41
+ with open(SAVE_FILE, 'w') as f:
42
+ json.dump(settings, f, indent=4)
43
+
44
+ def load_settings():
45
+ """Loads settings for the last active endpoint."""
46
+ if not os.path.exists(SAVE_FILE):
47
+ return "", "", ""
48
+ try:
49
+ with open(SAVE_FILE, "r") as f:
50
+ data = json.load(f)
51
+ last_url = data.get("last_active_url", "")
52
+ if not last_url:
53
+ return "", "", ""
54
+
55
+ endpoint_data = next((e for e in data['endpoints'] if e['url'] == last_url), None)
56
+ if not endpoint_data:
57
+ return last_url, "", ""
58
+
59
+ model = endpoint_data.get("preferred_model", "")
60
+ model_display_text = f"Preferred model set to: {model}" if model else ""
61
+ return last_url, model, model_display_text
62
+ except (json.JSONDecodeError, IOError):
63
+ return "", "", ""
64
+
65
+ def parse_param_size(size_str: str) -> float:
66
+ """Converts a model size string (e.g., '7B') to a float."""
67
+ if not isinstance(size_str, str):
68
+ return 0.0
69
+ size_str = size_str.upper().strip()
70
+ if size_str.endswith('B'):
71
+ try:
72
+ return float(size_str[:-1])
73
+ except (ValueError, TypeError):
74
+ return 0.0
75
+ return 0.0
76
+
77
+ def get_model_details(url: str, model_name: str) -> dict:
78
+ """Inspects a model's file for details like tool use and parameter size."""
79
+ details = {
80
+ 'name': model_name,
81
+ 'supports_tool_calling': False,
82
+ 'parameter_size': 0.0,
83
+ 'parameter_size_str': "N/A"
84
+ }
85
+ try:
86
+ res = requests.post(
87
+ f"{url.rstrip('/')}/api/show",
88
+ json={"name": model_name},
89
+ timeout=10
90
+ )
91
+ res.raise_for_status()
92
+ data = res.json()
93
+ modelfile = data.get("modelfile", "")
94
+
95
+ keywords = ["tool", "function", "available_tools", "function_call"]
96
+ details['supports_tool_calling'] = any(keyword in modelfile.lower() for keyword in keywords)
97
+
98
+ param_size_str = data.get("details", {}).get("parameter_size", "0B")
99
+ details['parameter_size_str'] = param_size_str
100
+ details['parameter_size'] = parse_param_size(param_size_str)
101
+
102
+ return details
103
+ except (requests.exceptions.RequestException, json.JSONDecodeError):
104
+ return details
105
+
106
+ def check_ollama_endpoint(url, preferred_model=None):
107
+ """
108
+ Checks an Ollama endpoint, gets a list of available models with their capabilities,
109
+ sorts them, and updates the UI accordingly.
110
+ """
111
+ if not url or not url.strip():
112
+ return "Please enter a URL.", gr.update(visible=False), gr.update(visible=False), url
113
+
114
+ if not (url.startswith("http://") or url.startswith("https://")):
115
+ url = "http://" + url
116
+
117
+ api_url = f"{url.rstrip('/')}/api/tags"
118
+
119
+ try:
120
+ response = requests.get(api_url, timeout=5)
121
+ response.raise_for_status()
122
+
123
+ models_data = response.json().get("models", [])
124
+ if not models_data:
125
+ return "Connected, but no models found.", gr.update(visible=False), gr.update(visible=False), url
126
+
127
+ detailed_models = [get_model_details(url, m['name']) for m in models_data]
128
+
129
+ # Sort models: 1. Tool support (desc), 2. Param size (desc), 3. Name (asc)
130
+ detailed_models.sort(key=lambda m: (not m['supports_tool_calling'], -m['parameter_size'], m['name']))
131
+
132
+ save_settings(url=url, model_list=detailed_models)
133
+
134
+ dropdown_choices = []
135
+ for m in detailed_models:
136
+ tool_text = "Tools: Yes" if m['supports_tool_calling'] else "Tools: No"
137
+ name_display = f"🛠️ {m['name']}" if m['supports_tool_calling'] else m['name']
138
+ dropdown_choices.append(f"{name_display} ({tool_text}, {m['parameter_size_str']})")
139
+
140
+ status_message = f"Success! Found and sorted {len(detailed_models)} models."
141
+
142
+ default_choice = dropdown_choices[0] if dropdown_choices else None
143
+ if preferred_model:
144
+ matching_choice = next((choice for choice in dropdown_choices if preferred_model in choice), None)
145
+ if matching_choice:
146
+ default_choice = matching_choice
147
+
148
+ return status_message, gr.update(choices=dropdown_choices, value=default_choice, visible=True), gr.update(visible=True), url
149
+
150
+ except requests.exceptions.RequestException:
151
+ error_message = "Connection Error: Is the address correct and Ollama running?"
152
+ return error_message, gr.update(visible=False), gr.update(visible=False), url
153
+
154
+ def set_preferred_model(model_selection, current_url):
155
+ """Stores the selected model for the current endpoint and saves it."""
156
+ # Parse model name from variants like "🛠️ model-name (Tools: Yes, 7B)"
157
+ if not model_selection:
158
+ return "", "No model selected."
159
+ model_name = model_selection.split(" (")[0].strip().replace("🛠️ ", "")
160
+ save_settings(url=current_url, preferred_model=model_name)
161
+ return model_name, f"Preferred model for {current_url} set to: {model_name}"
162
+
163
+ def check_on_load(url, preferred_model):
164
+ """
165
+ Wrapper to trigger endpoint check on load if a URL exists,
166
+ otherwise sets a neutral status. Also sets the dropdown to the preferred model.
167
+ """
168
+ if not url or not url.strip():
169
+ # The empty string for preferred_llm_display ensures it's cleared on a failed load
170
+ return "Enter an endpoint URL and click 'Check' to begin.", gr.update(visible=False), gr.update(visible=False), None, ""
171
+
172
+ # Unpack the results from the main checking function, now passing the preferred model
173
+ status, dropdown_update, button_update, current_url = check_ollama_endpoint(url, preferred_model)
174
+
175
+ # Construct the display text for the textbox.
176
+ model_display_text = f"Preferred model set to: {preferred_model}" if preferred_model else ""
177
+
178
+ # The last value returned now correctly populates the preferred_llm_display textbox.
179
+ return status, dropdown_update, button_update, current_url, model_display_text
180
+
181
+ def process_video_prompt(
182
+ prompt: str,
183
+ video_path: str,
184
+ ollama_url: str,
185
+ model_name: str,
186
+ first_frame_component: gr.Image,
187
+ last_frame_component: gr.Image,
188
+ gif_component: gr.Image
189
+ ):
190
+ """
191
+ Sends a prompt to an Ollama model with video tools and executes the response.
192
+ Returns updates for the Gradio UI components.
193
+ """
194
+ if not all([prompt, video_path, ollama_url, model_name]):
195
+ error_msg = "Error: Missing prompt, video path, or LLM configuration."
196
+ return first_frame_component, last_frame_component, gif_component, None, error_msg, "N/A", "N/A"
197
+
198
+ api_url = f"{ollama_url.rstrip('/')}/api/chat" # Use the /api/chat endpoint for tool calling
199
+
200
+ # Define the tools available to the LLM based on our functions
201
+ tools = [
202
+ {
203
+ "type": "function",
204
+ "function": {
205
+ "name": "getFirstFrame",
206
+ "description": "Extracts the very first frame from a video file.",
207
+ "parameters": {
208
+ "type": "object",
209
+ "properties": {
210
+ "video_path": {
211
+ "type": "string",
212
+ "description": "The path to the video file to process."
213
+ }
214
+ },
215
+ "required": ["video_path"]
216
+ }
217
+ }
218
+ },
219
+ {
220
+ "type": "function",
221
+ "function": {
222
+ "name": "getLastFrame",
223
+ "description": "Extracts the very last frame from a video file.",
224
+ "parameters": {
225
+ "type": "object",
226
+ "properties": {
227
+ "video_path": {
228
+ "type": "string",
229
+ "description": "The path to the video file to process."
230
+ }
231
+ },
232
+ "required": ["video_path"]
233
+ }
234
+ }
235
+ },
236
+ {
237
+ "type": "function",
238
+ "function": {
239
+ "name": "convert_mp4_to_gif",
240
+ "description": "Converts a full MP4 video into a high-quality animated GIF.",
241
+ "parameters": {
242
+ "type": "object",
243
+ "properties": {
244
+ "video_path": {
245
+ "type": "string",
246
+ "description": "The path to the video file to convert."
247
+ },
248
+ "maxResolution": {
249
+ "type": "integer",
250
+ "description": "Optional. The maximum dimension (width or height) for the GIF. Must be between 16 and 500. Defaults to 500.",
251
+ "default": 500
252
+ },
253
+ "fps": {
254
+ "type": "integer",
255
+ "description": "Optional. Frames per second for the GIF. Must be between 14 and 50 due to browser limitations. Defaults to 15.",
256
+ "default": 15
257
+ },
258
+ "pingpong": {
259
+ "type": "boolean",
260
+ "description": "Optional. If true, the GIF will play forwards then backwards. Defaults to false.",
261
+ "default": False
262
+ }
263
+ },
264
+ "required": ["video_path"]
265
+ }
266
+ }
267
+ }
268
+ ]
269
+
270
+ # Map tool names to actual Python functions
271
+ available_tools = {
272
+ "getFirstFrame": video_tools.getFirstFrame,
273
+ "getLastFrame": video_tools.getLastFrame,
274
+ "convert_mp4_to_gif": video_tools.convert_mp4_to_gif,
275
+ }
276
+
277
+ # For /api/chat, we provide a user message. The model should infer the context.
278
+ prompt_with_context = f"{prompt}\n\nThe video for this prompt is located at the following path: '{video_path}'"
279
+
280
+ raw_response_str = ""
281
+ try:
282
+ # Request to the LLM with the messages and tools
283
+ response = requests.post(
284
+ api_url,
285
+ json={
286
+ "model": model_name,
287
+ "messages": [{"role": "user", "content": prompt_with_context}],
288
+ "stream": False,
289
+ "tools": tools,
290
+ },
291
+ timeout=60,
292
+ )
293
+ response.raise_for_status()
294
+ response_data = response.json()
295
+ raw_response_str = json.dumps(response_data, indent=2)
296
+
297
+ # Check if the model decided to use a tool, using the /api/chat response structure.
298
+ message = response_data.get("message", {})
299
+ tool_calls = message.get("tool_calls")
300
+
301
+ if not tool_calls:
302
+ status = message.get("content", "The model did not call a tool.")
303
+ return first_frame_component, last_frame_component, gif_component, status, prompt_with_context, raw_response_str
304
+
305
+ # --- Loop through all tool calls from the LLM ---
306
+ first_frame_update = first_frame_component
307
+ last_frame_update = last_frame_component
308
+ gif_update = gif_component # Start with the original component state
309
+ execution_statuses = []
310
+
311
+ for tool_call in tool_calls:
312
+ function_call = tool_call.get("function", {})
313
+ tool_name_from_llm = function_call.get("name")
314
+
315
+ if not tool_name_from_llm:
316
+ execution_statuses.append("Error: Could not parse tool name from a tool call.")
317
+ continue
318
+
319
+ # Make the check case-insensitive
320
+ tool_name_lower = tool_name_from_llm.lower()
321
+ available_tools_lower = {k.lower(): v for k, v in available_tools.items()}
322
+
323
+ if tool_name_lower in available_tools_lower:
324
+ original_tool_name = list(available_tools.keys())[list(available_tools_lower.keys()).index(tool_name_lower)]
325
+ tool_function = available_tools[original_tool_name]
326
+
327
+ arguments = function_call.get("arguments", {})
328
+
329
+ # Prepare arguments for the tool function
330
+ tool_args = {}
331
+ if "video_path" in arguments:
332
+ tool_args["video_path"] = arguments["video_path"]
333
+ else:
334
+ # Fallback for safety, though path is required in schema
335
+ tool_args["video_path"] = video_path
336
+
337
+ if original_tool_name == "convert_mp4_to_gif":
338
+ if "maxResolution" in arguments:
339
+ tool_args["maxResolution"] = arguments["maxResolution"]
340
+ if "fps" in arguments:
341
+ tool_args["fps"] = arguments["fps"]
342
+ if "pingpong" in arguments:
343
+ tool_args["pingpong"] = arguments["pingpong"]
344
+
345
+ result_path = tool_function(**tool_args)
346
+
347
+ if "Error:" in result_path:
348
+ execution_statuses.append(f"Tool '{original_tool_name}' failed: {result_path}")
349
+ else:
350
+ execution_statuses.append(f"Successfully executed tool: {original_tool_name}")
351
+ if original_tool_name == "getFirstFrame":
352
+ first_frame_update = gr.update(value=result_path)
353
+ elif original_tool_name == "getLastFrame":
354
+ last_frame_update = gr.update(value=result_path)
355
+ elif original_tool_name == "convert_mp4_to_gif":
356
+ gif_update = gr.update(value=result_path)
357
+ else:
358
+ execution_statuses.append(f"Error: The model tried to call an unknown tool: {tool_name_from_llm}")
359
+
360
+ final_status = "\n".join(execution_statuses)
361
+ return first_frame_update, last_frame_update, gif_update, final_status, prompt_with_context, raw_response_str
362
+
363
+ except requests.exceptions.RequestException as e:
364
+ error_msg = f"API Error: Could not connect to Ollama. {e}"
365
+ return first_frame_component, last_frame_component, gif_component, error_msg, prompt_with_context, getattr(e.response, 'text', 'No response text')
366
+ except (KeyError, IndexError, json.JSONDecodeError) as e:
367
+ error_msg = f"Error processing LLM response: {e}"
368
+ return first_frame_component, last_frame_component, gif_component, error_msg, prompt_with_context, raw_response_str
src/mcp/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file makes the 'mcp' directory a Python package.
src/mcp/tools.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def letter_counter(word, letter):
2
+ """Count the occurrences of a specific letter in a word.
3
+
4
+ Args:
5
+ word: The word or phrase to analyze
6
+ letter: The letter to count occurrences of
7
+
8
+ Returns:
9
+ The number of times the letter appears in the word
10
+ """
11
+ return word.lower().count(letter.lower())
src/mcp/video_tools.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+
4
+ def getFirstFrame(video_path: str) -> str:
5
+ """
6
+ Extracts the first frame from an MP4 video file using FFmpeg.
7
+
8
+ Args:
9
+ video_path: The relative path to the MP4 file (e.g., 'tmp/my_video.mp4').
10
+
11
+ Returns:
12
+ The path to the extracted JPEG image frame, or an error message string.
13
+ """
14
+ if not video_path or not os.path.exists(video_path):
15
+ return "Error: Video file not found at the specified path."
16
+
17
+ # Ensure the output directory exists
18
+ output_dir = os.path.dirname(video_path)
19
+ if not os.path.exists(output_dir):
20
+ os.makedirs(output_dir)
21
+
22
+ # Define the output path for the frame
23
+ base_name = os.path.basename(video_path)
24
+ file_name_without_ext = os.path.splitext(base_name)[0]
25
+ output_frame_path = os.path.join(output_dir, f"{file_name_without_ext}_frame.jpg")
26
+
27
+ # Construct and run the ffmpeg command
28
+ command = [
29
+ "ffmpeg",
30
+ "-i", video_path, # Input file
31
+ "-vframes", "1", # Extract only one frame
32
+ "-q:v", "2", # Output quality (2 is high)
33
+ "-y", # Overwrite output file if it exists
34
+ output_frame_path
35
+ ]
36
+
37
+ try:
38
+ # Use subprocess.run to execute the command
39
+ result = subprocess.run(
40
+ command,
41
+ capture_output=True,
42
+ text=True,
43
+ check=True # This will raise a CalledProcessError if ffmpeg returns a non-zero exit code
44
+ )
45
+ return output_frame_path
46
+ except FileNotFoundError:
47
+ return "Error: ffmpeg is not installed or not found in the system's PATH."
48
+ except subprocess.CalledProcessError as e:
49
+ # Provide the stderr from ffmpeg for easier debugging
50
+ return f"Error during frame extraction: {e.stderr}"
51
+
52
+ def getLastFrame(video_path: str) -> str:
53
+ """
54
+ Extracts the last frame from an MP4 video file using FFmpeg.
55
+
56
+ Args:
57
+ video_path: The relative path to the MP4 file (e.g., 'tmp/my_video.mp4').
58
+
59
+ Returns:
60
+ The path to the extracted JPEG image frame, or an error message string.
61
+ """
62
+ if not video_path or not os.path.exists(video_path):
63
+ return "Error: Video file not found at the specified path."
64
+
65
+ # Ensure the output directory exists
66
+ output_dir = os.path.dirname(video_path)
67
+ if not os.path.exists(output_dir):
68
+ os.makedirs(output_dir)
69
+
70
+ # Define the output path for the frame
71
+ base_name = os.path.basename(video_path)
72
+ file_name_without_ext = os.path.splitext(base_name)[0]
73
+ output_frame_path = os.path.join(output_dir, f"{file_name_without_ext}_last_frame.jpg")
74
+
75
+ # Construct and run the ffmpeg command
76
+ # -sseof -1 seeks to 1 second before the end of the file to grab the last frame.
77
+ command = [
78
+ "ffmpeg",
79
+ "-sseof", "-1", # Seek to 1s before the end.
80
+ "-i", video_path, # Input file
81
+ "-vframes", "1", # Extract only one frame
82
+ "-q:v", "2", # Output quality (2 is high)
83
+ "-y", # Overwrite output file if it exists
84
+ output_frame_path
85
+ ]
86
+
87
+ try:
88
+ # Use subprocess.run to execute the command
89
+ result = subprocess.run(
90
+ command,
91
+ capture_output=True,
92
+ text=True,
93
+ check=True
94
+ )
95
+ return output_frame_path
96
+ except FileNotFoundError:
97
+ return "Error: ffmpeg is not installed or not found in the system's PATH."
98
+ except subprocess.CalledProcessError as e:
99
+ # Provide the stderr from ffmpeg for easier debugging
100
+ return f"Error: Could not extract last frame. {e}"
101
+
102
+ def convert_mp4_to_gif(video_path: str, maxResolution: int = 500, fps: int = 15, pingpong: bool = False) -> str:
103
+ """
104
+ Converts a video file to a high-quality animated GIF using a two-pass ffmpeg method.
105
+
106
+ Args:
107
+ video_path: The path to the input video file.
108
+ maxResolution: The maximum dimension (width or height) of the output GIF.
109
+ Value must be between 16 and 500. Defaults to 500.
110
+ fps: Frames per second for the output GIF. Must be between 14 and 50.
111
+ pingpong: If True, the GIF will play forwards and then reverse.
112
+
113
+ Returns:
114
+ The path to the generated GIF file, or an error string.
115
+ """
116
+ try:
117
+ maxResolution = int(maxResolution)
118
+ fps = int(fps)
119
+ # Handle boolean from LLM which might be a string
120
+ if isinstance(pingpong, str):
121
+ pingpong = pingpong.lower() in ['true', '1', 't', 'y', 'yes']
122
+ except (ValueError, TypeError):
123
+ return f"Error: maxResolution and fps must be valid integers."
124
+
125
+ if not 16 <= maxResolution <= 500:
126
+ return f"Error: maxResolution must be between 16 and 500, but was {maxResolution}."
127
+ if not 14 <= fps <= 50:
128
+ return f"Error: fps must be between 14 and 50, but was {fps}."
129
+
130
+ if not os.path.exists(video_path):
131
+ return f"Error: Video file not found at {video_path}"
132
+
133
+ base_path, _ = os.path.splitext(video_path)
134
+ output_gif_path = f"{base_path}.gif"
135
+ palette_path = os.path.join(os.path.dirname(video_path), "palette.png")
136
+
137
+ try:
138
+ scaling_filter = f"scale={maxResolution}:{maxResolution}:force_original_aspect_ratio=decrease:flags=lanczos"
139
+
140
+ if pingpong:
141
+ vf_options = f"[0:v]split[original][copy];[copy]reverse[reversed];[original][reversed]concat=n=2:v=1:a=0,fps={fps},{scaling_filter}"
142
+ else:
143
+ vf_options = f"fps={fps},{scaling_filter}"
144
+
145
+ # Pass 1: Generate the color palette
146
+ palettegen_cmd = [
147
+ "ffmpeg",
148
+ "-i", video_path,
149
+ "-vf", f"{vf_options},palettegen",
150
+ "-y",
151
+ palette_path
152
+ ]
153
+ subprocess.run(palettegen_cmd, check=True, capture_output=True, text=True)
154
+
155
+ # Pass 2: Use the palette to create the GIF
156
+ gif_conversion_cmd = [
157
+ "ffmpeg",
158
+ "-i", video_path,
159
+ "-i", palette_path,
160
+ "-lavfi", f"{vf_options} [x]; [x][1:v] paletteuse",
161
+ "-y",
162
+ output_gif_path
163
+ ]
164
+ subprocess.run(gif_conversion_cmd, check=True, capture_output=True, text=True)
165
+
166
+ return output_gif_path
167
+
168
+ except subprocess.CalledProcessError as e:
169
+ # Provide more specific error from ffmpeg's stderr
170
+ return f"Error during GIF conversion: {e.stderr}"
171
+ finally:
172
+ # Clean up the temporary palette file
173
+ if os.path.exists(palette_path):
174
+ os.remove(palette_path)