Qwen3-4b-toolcall-gguf-llamacpp-codex / qwen3_toolcalling_example.py

Update qwen3_toolcalling_example.py

cd0eaba verified about 2 months ago

3.34 kB

	#!/usr/bin/env python3
	"""
	Complete example of using Qwen3-4B-toolcalling model for function calling
	"""

	import json
	import re
	from llama_cpp import Llama

	class Qwen3ToolCalling:
	def __init__(self, model_path):
	"""Initialize the Qwen3 tool calling model"""
	self.llm = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_threads=8,
	n_batch=512,
	temperature=0.7,
	top_p=0.8,
	repeat_penalty=1.1,
	verbose=False,
	)

	def extract_tool_calls(self, text):
	"""Extract tool calls from model response"""
	tool_calls = []

	# Look for JSON-like structures in the response
	json_pattern = r'\[.*?\]'
	matches = re.findall(json_pattern, text)

	for match in matches:
	try:
	parsed = json.loads(match)
	if isinstance(parsed, list):
	for item in parsed:
	if isinstance(item, dict) and 'name' in item:
	tool_calls.append(item)
	except json.JSONDecodeError:
	continue

	return tool_calls

	def chat(self, message, system_message=None):
	"""Chat with the model and extract tool calls"""

	# Build the prompt
	prompt_parts = []

	if system_message:
	prompt_parts.append(f"<\|im_start\|>system\n{system_message}<\|im_end\|>")

	prompt_parts.append(f"<\|im_start\|>user\n{message}<\|im_end\|>")
	prompt_parts.append("<\|im_start\|>assistant\n")

	formatted_prompt = "\n".join(prompt_parts)

	# Generate response
	response = self.llm(
	formatted_prompt,
	max_tokens=512,
	stop=["<\|im_end\|>", "<\|im_start\|>"],
	temperature=0.7
	)

	response_text = response['choices'][0]['text']
	tool_calls = self.extract_tool_calls(response_text)

	return {
	'response': response_text,
	'tool_calls': tool_calls
	}

	def main():
	"""Main function to demonstrate tool calling"""

	# Initialize the model
	model_path = "/home/user/work/Qwen3-4B-toolcalling-gguf-codex/Qwen3-4B-Function-Calling-Pro.gguf"
	qwen = Qwen3ToolCalling(model_path)

	print("🚀 Qwen3-4B Tool Calling Demo")
	print("=" * 50)

	# Test cases
	test_cases = [
	"What's the weather like in London?",
	"Find me a hotel in Paris for next week",
	"Calculate 25 + 17",
	"Book a flight from New York to Tokyo",
	"Get the latest news about AI"
	]

	for i, message in enumerate(test_cases, 1):
	print(f"\n📝 Test {i}: {message}")
	print("-" * 40)

	result = qwen.chat(message)

	print(f"Response: {result['response']}")

	if result['tool_calls']:
	print(f"\n🔧 Tool Calls ({len(result['tool_calls'])}):")
	for j, tool_call in enumerate(result['tool_calls'], 1):
	print(f" {j}. {tool_call['name']}")
	print(f" Arguments: {tool_call.get('arguments', {})}")
	else:
	print("\n❌ No tool calls detected")

	if __name__ == "__main__":
	main()