version: '3.8' services: llama: image: ghcr.io/ggerganov/llama.cpp:server ports: - "8001:8001" volumes: - ./kai-model-7.2B-Q4_0.gguf:/models/kai-model-7.2B-Q4_0.gguf:ro command: > --model /models/kai-model-7.2B-Q4_0.gguf --alias kai-model:latest --host 0.0.0.0 --port 8001 --ctx-size 4096 --threads 4 fastapi: build: . container_name: kai_fastapi ports: - "8000:8000" environment: OPENAI_API_BASE: "http://llama:8001/v1" OPENAI_API_KEY: "sk-no-key-needed" depends_on: - llama