Manojb
/

Qwen3-4b-toolcall-gguf-llamacpp-codex

Text Generation

function-calling

openai-alternative

8-bit precision

Model card Files Files and versions

Qwen3-4b-toolcall-gguf-llamacpp-codex / run_model.sh

Manojb's picture

Upload folder using huggingface_hub

812540e verified about 2 months ago

history blame contribute delete

3.19 kB

	#!/bin/bash

	# Qwen3-4B Tool Calling with llama-cpp-python
	# This script sets up and runs the model for local inference
	#
	# Usage:
	# ./run_model.sh (requires chmod +x)
	# source ./run_model.sh (no chmod needed)

	echo "🚀 Qwen3-4B Tool Calling Setup"
	echo "================================"

	# Check if model file exists
	if [ ! -f "Qwen3-4B-Function-Calling-Pro.gguf" ]; then
	echo "❌ Model file not found: Qwen3-4B-Function-Calling-Pro.gguf"
	echo "📥 Please download the model file first."
	echo " You can download it from: https://huggingface.co/Manojb/qwen3-4b-toolcall-gguf-llamacpp-codex"
	exit 1
	fi

	# Check if Python is available
	if ! command -v python3 &> /dev/null; then
	echo "❌ Python3 not found. Please install Python 3.8+ first."
	exit 1
	fi

	# Check if llama-cpp-python is installed
	if ! python3 -c "import llama_cpp" 2>/dev/null; then
	echo "📦 Installing llama-cpp-python..."
	pip3 install llama-cpp-python
	if [ $? -ne 0 ]; then
	echo "❌ Failed to install llama-cpp-python"
	exit 1
	fi
	echo "✅ llama-cpp-python installed successfully"
	fi

	# Function to run the model
	run_model() {
	echo "🔄 Starting Qwen3-4B Tool Calling model..."
	echo " Model: Qwen3-4B-Function-Calling-Pro.gguf"
	echo " Context: 2048 tokens"
	echo " Threads: 8"
	echo ""
	echo "💡 Usage examples:"
	echo " - 'What's the weather in London?'"
	echo " - 'Find me a hotel in Paris'"
	echo " - 'Calculate 25 + 17'"
	echo " - 'Book a flight from New York to Tokyo'"
	echo ""
	echo "Press Ctrl+C to exit"
	echo "================================"

	python3 quick_start.py
	}

	# Function to run the server
	run_server() {
	echo "🌐 Starting Codex-compatible server..."
	echo " Server: http://localhost:8000"
	echo " Model: Qwen3-4B-Function-Calling-Pro"
	echo ""
	echo "💡 Configure Codex with:"
	echo " - Server URL: http://localhost:8000"
	echo " - Model: Qwen3-4B-Function-Calling-Pro"
	echo " - API Key: (not required)"
	echo ""
	echo "Press Ctrl+C to stop server"
	echo "================================"

	python3 -m llama_cpp.server \
	--model Qwen3-4B-Function-Calling-Pro.gguf \
	--host 0.0.0.0 \
	--port 8000 \
	--n_ctx 2048 \
	--n_threads 8 \
	--temperature 0.7
	}

	# Function to show help
	show_help() {
	echo "Usage: $0 [OPTION]"
	echo ""
	echo "Options:"
	echo " run, r Run the model interactively (default)"
	echo " server, s Start Codex-compatible server"
	echo " help, h Show this help message"
	echo ""
	echo "Examples:"
	echo " $0 # Run interactively"
	echo " $0 run # Run interactively"
	echo " $0 server # Start server for Codex"
	echo " $0 help # Show this help"
	}

	# Main script logic
	case "${1:-run}" in
	"run"\|"r"\|"")
	run_model
	;;
	"server"\|"s")
	run_server
	;;
	"help"\|"h"\|"-h"\|"--help")
	show_help
	;;
	*)
	echo "❌ Unknown option: $1"
	echo ""
	show_help
	exit 1
	;;
	esac