Qwen3-4b-toolcall-gguf-llamacpp-codex / qwen3_toolcalling_example.py
Manojb's picture
Update qwen3_toolcalling_example.py
cd0eaba verified
#!/usr/bin/env python3
"""
Complete example of using Qwen3-4B-toolcalling model for function calling
"""
import json
import re
from llama_cpp import Llama
class Qwen3ToolCalling:
def __init__(self, model_path):
"""Initialize the Qwen3 tool calling model"""
self.llm = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=8,
n_batch=512,
temperature=0.7,
top_p=0.8,
repeat_penalty=1.1,
verbose=False,
)
def extract_tool_calls(self, text):
"""Extract tool calls from model response"""
tool_calls = []
# Look for JSON-like structures in the response
json_pattern = r'\[.*?\]'
matches = re.findall(json_pattern, text)
for match in matches:
try:
parsed = json.loads(match)
if isinstance(parsed, list):
for item in parsed:
if isinstance(item, dict) and 'name' in item:
tool_calls.append(item)
except json.JSONDecodeError:
continue
return tool_calls
def chat(self, message, system_message=None):
"""Chat with the model and extract tool calls"""
# Build the prompt
prompt_parts = []
if system_message:
prompt_parts.append(f"<|im_start|>system\n{system_message}<|im_end|>")
prompt_parts.append(f"<|im_start|>user\n{message}<|im_end|>")
prompt_parts.append("<|im_start|>assistant\n")
formatted_prompt = "\n".join(prompt_parts)
# Generate response
response = self.llm(
formatted_prompt,
max_tokens=512,
stop=["<|im_end|>", "<|im_start|>"],
temperature=0.7
)
response_text = response['choices'][0]['text']
tool_calls = self.extract_tool_calls(response_text)
return {
'response': response_text,
'tool_calls': tool_calls
}
def main():
"""Main function to demonstrate tool calling"""
# Initialize the model
model_path = "/home/user/work/Qwen3-4B-toolcalling-gguf-codex/Qwen3-4B-Function-Calling-Pro.gguf"
qwen = Qwen3ToolCalling(model_path)
print("πŸš€ Qwen3-4B Tool Calling Demo")
print("=" * 50)
# Test cases
test_cases = [
"What's the weather like in London?",
"Find me a hotel in Paris for next week",
"Calculate 25 + 17",
"Book a flight from New York to Tokyo",
"Get the latest news about AI"
]
for i, message in enumerate(test_cases, 1):
print(f"\nπŸ“ Test {i}: {message}")
print("-" * 40)
result = qwen.chat(message)
print(f"Response: {result['response']}")
if result['tool_calls']:
print(f"\nπŸ”§ Tool Calls ({len(result['tool_calls'])}):")
for j, tool_call in enumerate(result['tool_calls'], 1):
print(f" {j}. {tool_call['name']}")
print(f" Arguments: {tool_call.get('arguments', {})}")
else:
print("\n❌ No tool calls detected")
if __name__ == "__main__":
main()