Manojb's picture
Upload folder using huggingface_hub
812540e verified
#!/bin/bash
# Qwen3-4B Tool Calling with llama-cpp-python
# This script sets up and runs the model for local inference
#
# Usage:
# ./run_model.sh (requires chmod +x)
# source ./run_model.sh (no chmod needed)
echo "πŸš€ Qwen3-4B Tool Calling Setup"
echo "================================"
# Check if model file exists
if [ ! -f "Qwen3-4B-Function-Calling-Pro.gguf" ]; then
echo "❌ Model file not found: Qwen3-4B-Function-Calling-Pro.gguf"
echo "πŸ“₯ Please download the model file first."
echo " You can download it from: https://huggingface.co/Manojb/qwen3-4b-toolcall-gguf-llamacpp-codex"
exit 1
fi
# Check if Python is available
if ! command -v python3 &> /dev/null; then
echo "❌ Python3 not found. Please install Python 3.8+ first."
exit 1
fi
# Check if llama-cpp-python is installed
if ! python3 -c "import llama_cpp" 2>/dev/null; then
echo "πŸ“¦ Installing llama-cpp-python..."
pip3 install llama-cpp-python
if [ $? -ne 0 ]; then
echo "❌ Failed to install llama-cpp-python"
exit 1
fi
echo "βœ… llama-cpp-python installed successfully"
fi
# Function to run the model
run_model() {
echo "πŸ”„ Starting Qwen3-4B Tool Calling model..."
echo " Model: Qwen3-4B-Function-Calling-Pro.gguf"
echo " Context: 2048 tokens"
echo " Threads: 8"
echo ""
echo "πŸ’‘ Usage examples:"
echo " - 'What's the weather in London?'"
echo " - 'Find me a hotel in Paris'"
echo " - 'Calculate 25 + 17'"
echo " - 'Book a flight from New York to Tokyo'"
echo ""
echo "Press Ctrl+C to exit"
echo "================================"
python3 quick_start.py
}
# Function to run the server
run_server() {
echo "🌐 Starting Codex-compatible server..."
echo " Server: http://localhost:8000"
echo " Model: Qwen3-4B-Function-Calling-Pro"
echo ""
echo "πŸ’‘ Configure Codex with:"
echo " - Server URL: http://localhost:8000"
echo " - Model: Qwen3-4B-Function-Calling-Pro"
echo " - API Key: (not required)"
echo ""
echo "Press Ctrl+C to stop server"
echo "================================"
python3 -m llama_cpp.server \
--model Qwen3-4B-Function-Calling-Pro.gguf \
--host 0.0.0.0 \
--port 8000 \
--n_ctx 2048 \
--n_threads 8 \
--temperature 0.7
}
# Function to show help
show_help() {
echo "Usage: $0 [OPTION]"
echo ""
echo "Options:"
echo " run, r Run the model interactively (default)"
echo " server, s Start Codex-compatible server"
echo " help, h Show this help message"
echo ""
echo "Examples:"
echo " $0 # Run interactively"
echo " $0 run # Run interactively"
echo " $0 server # Start server for Codex"
echo " $0 help # Show this help"
}
# Main script logic
case "${1:-run}" in
"run"|"r"|"")
run_model
;;
"server"|"s")
run_server
;;
"help"|"h"|"-h"|"--help")
show_help
;;
*)
echo "❌ Unknown option: $1"
echo ""
show_help
exit 1
;;
esac