| version: '3.8' | |
| services: | |
| llama: | |
| image: ghcr.io/ggerganov/llama.cpp:server | |
| ports: | |
| - "8001:8001" | |
| volumes: | |
| - ./kai-model-7.2B-Q4_0.gguf:/models/kai-model-7.2B-Q4_0.gguf:ro | |
| command: > | |
| --model /models/kai-model-7.2B-Q4_0.gguf | |
| --alias kai-model:latest | |
| --host 0.0.0.0 | |
| --port 8001 | |
| --ctx-size 4096 | |
| --threads 4 | |
| fastapi: | |
| build: . | |
| container_name: kai_fastapi | |
| ports: | |
| - "8000:8000" | |
| environment: | |
| OPENAI_API_BASE: "http://llama:8001/v1" | |
| OPENAI_API_KEY: "sk-no-key-needed" | |
| depends_on: | |
| - llama | |