Spaces:
Build error
Build error
cygon
commited on
Commit
·
d61feef
1
Parent(s):
05538b8
Initial deployment with Ollama support
Browse files- .dockerignore +18 -0
- .env.example +43 -0
- .gitignore +6 -0
- CHANGELOG.md +152 -0
- COMPLETE_DEPLOYMENT_GUIDE.md +1529 -0
- DEPLOYMENT.md +435 -0
- DEVELOPMENT.md +106 -0
- Dockerfile +74 -0
- HUGGINGFACE_OLLAMA_DEPLOY.md +423 -0
- QUICKSTART.md +319 -0
- README.md +9 -8
- backend/adapters/anthropic_adapter.ts +76 -0
- backend/adapters/huggingface_adapter.ts +164 -0
- backend/adapters/ollama_adapter.ts +153 -0
- backend/adapters/openai_adapter.ts +193 -0
- backend/adapters/vector_db_adapter.ts +146 -0
- backend/api/auth.ts +26 -0
- backend/api/chat.ts +99 -0
- backend/api/documents.ts +119 -0
- backend/api/encore.service.ts +3 -0
- backend/api/health.ts +55 -0
- backend/api/image.ts +44 -0
- backend/api/rag.ts +72 -0
- backend/api/voice.ts +101 -0
- backend/encore.app +1 -0
- backend/package.json +16 -0
- backend/services/ai_service.ts +193 -0
- backend/services/document_service.ts +238 -0
- backend/services/image_service.ts +122 -0
- backend/services/rag_service.ts +182 -0
- backend/services/voice_service.ts +149 -0
- backend/tsconfig.json +31 -0
- backend/types/config.ts +93 -0
- backend/types/models.ts +256 -0
- backend/utils/auth.ts +69 -0
- backend/utils/logger.ts +48 -0
- backend/utils/metrics.ts +90 -0
- backend/utils/rate_limit.ts +114 -0
- backend/vite-env.d.ts +1 -0
- backend/workers/ingestion_worker.ts +128 -0
- docker-compose.yml +51 -0
- examples/curl.sh +116 -0
- examples/js_client.js +203 -0
- package.json +9 -0
- strcture.md +493 -0
- tests/api.test.ts +233 -0
.dockerignore
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
node_modules
|
| 2 |
+
npm-debug.log
|
| 3 |
+
.env
|
| 4 |
+
.env.local
|
| 5 |
+
.git
|
| 6 |
+
.gitignore
|
| 7 |
+
README.md
|
| 8 |
+
.vscode
|
| 9 |
+
.idea
|
| 10 |
+
*.md
|
| 11 |
+
!README.md
|
| 12 |
+
.DS_Store
|
| 13 |
+
dist
|
| 14 |
+
build
|
| 15 |
+
coverage
|
| 16 |
+
.encore
|
| 17 |
+
data
|
| 18 |
+
*.log
|
.env.example
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# API Keys and Credentials (at least one required, or use Ollama)
|
| 2 |
+
OPENAI_API_KEY=your_openai_api_key_here
|
| 3 |
+
HUGGINGFACE_API_KEY=your_huggingface_api_key_here
|
| 4 |
+
ANTHROPIC_API_KEY=your_anthropic_api_key_here
|
| 5 |
+
|
| 6 |
+
# Ollama Configuration (local LLM - no API key needed)
|
| 7 |
+
OLLAMA_BASE_URL=http://localhost:11434
|
| 8 |
+
OLLAMA_MODEL=llama2
|
| 9 |
+
OLLAMA_EMBEDDING_MODEL=nomic-embed-text
|
| 10 |
+
|
| 11 |
+
# Vector Database Configuration
|
| 12 |
+
PINECONE_API_KEY=your_pinecone_api_key_here
|
| 13 |
+
PINECONE_ENVIRONMENT=us-west1-gcp
|
| 14 |
+
PINECONE_INDEX_NAME=ai-api-vectors
|
| 15 |
+
|
| 16 |
+
# Authentication
|
| 17 |
+
API_KEYS=demo-key-1,demo-key-2,admin-key-3
|
| 18 |
+
ADMIN_API_KEYS=admin-key-3
|
| 19 |
+
|
| 20 |
+
# Rate Limiting (requests per minute)
|
| 21 |
+
RATE_LIMIT_DEFAULT=60
|
| 22 |
+
RATE_LIMIT_PREMIUM=300
|
| 23 |
+
RATE_LIMIT_ADMIN=1000
|
| 24 |
+
|
| 25 |
+
# Model Configuration
|
| 26 |
+
DEFAULT_CHAT_MODEL=llama2
|
| 27 |
+
DEFAULT_EMBEDDING_MODEL=nomic-embed-text
|
| 28 |
+
DEFAULT_IMAGE_MODEL=dall-e-3
|
| 29 |
+
DEFAULT_VOICE_MODEL=tts-1
|
| 30 |
+
|
| 31 |
+
# Service Configuration
|
| 32 |
+
PORT=8000
|
| 33 |
+
LOG_LEVEL=info
|
| 34 |
+
CORS_ORIGINS=http://localhost:3000,http://localhost:5173
|
| 35 |
+
|
| 36 |
+
# Document Processing
|
| 37 |
+
MAX_FILE_SIZE_MB=10
|
| 38 |
+
CHUNK_SIZE=1000
|
| 39 |
+
CHUNK_OVERLAP=200
|
| 40 |
+
|
| 41 |
+
# Background Workers
|
| 42 |
+
ENABLE_BACKGROUND_WORKERS=true
|
| 43 |
+
WORKER_CONCURRENCY=5
|
.gitignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.encore
|
| 2 |
+
encore.gen.go
|
| 3 |
+
encore.gen.cue
|
| 4 |
+
/.encore
|
| 5 |
+
node_modules
|
| 6 |
+
/encore.gen
|
CHANGELOG.md
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Changelog
|
| 2 |
+
|
| 3 |
+
All notable changes to the AI API Service will be documented in this file.
|
| 4 |
+
|
| 5 |
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
| 6 |
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
| 7 |
+
|
| 8 |
+
## [1.0.0] - 2025-10-01
|
| 9 |
+
|
| 10 |
+
### Added
|
| 11 |
+
|
| 12 |
+
#### Core Features
|
| 13 |
+
- **Multi-turn Chat API** - Conversational AI with context management supporting multiple LLM providers
|
| 14 |
+
- **RAG (Retrieval-Augmented Generation)** - Query documents with AI-powered vector retrieval
|
| 15 |
+
- **Image Generation** - Text-to-image using DALL-E or Stable Diffusion
|
| 16 |
+
- **Voice Synthesis** - Text-to-speech with multiple voice options via OpenAI TTS
|
| 17 |
+
- **Speech Recognition** - Audio transcription using Whisper
|
| 18 |
+
- **Document Ingestion** - Upload and process PDF, DOCX, TXT files with automatic chunking
|
| 19 |
+
|
| 20 |
+
#### Model Support
|
| 21 |
+
- OpenAI integration (GPT-4, GPT-3.5-turbo, DALL-E, TTS, Whisper)
|
| 22 |
+
- HuggingFace Inference API support (Mistral, Stable Diffusion, embeddings)
|
| 23 |
+
- Anthropic Claude models (Claude 3 Sonnet, Opus)
|
| 24 |
+
- Local model support (optional, via transformers)
|
| 25 |
+
|
| 26 |
+
#### Vector Database
|
| 27 |
+
- Pinecone adapter for production vector storage
|
| 28 |
+
- In-memory vector DB fallback for development
|
| 29 |
+
- Cosine similarity search
|
| 30 |
+
- Metadata filtering support
|
| 31 |
+
|
| 32 |
+
#### Authentication & Security
|
| 33 |
+
- API Key authentication with Bearer token support
|
| 34 |
+
- Role-based access control (default, premium, admin tiers)
|
| 35 |
+
- Token bucket rate limiting (configurable per tier)
|
| 36 |
+
- Input validation with TypeScript type safety
|
| 37 |
+
|
| 38 |
+
#### Observability
|
| 39 |
+
- Structured JSON logging with configurable log levels
|
| 40 |
+
- Prometheus-style metrics endpoint
|
| 41 |
+
- Health check endpoint with service status
|
| 42 |
+
- Request/response time tracking
|
| 43 |
+
- Model usage statistics
|
| 44 |
+
|
| 45 |
+
#### Background Processing
|
| 46 |
+
- Async document ingestion workers
|
| 47 |
+
- Configurable worker concurrency
|
| 48 |
+
- Webhook notifications for completion events
|
| 49 |
+
- Automatic text chunking with overlap
|
| 50 |
+
|
| 51 |
+
#### Developer Experience
|
| 52 |
+
- Comprehensive TypeScript types
|
| 53 |
+
- Auto-generated API clients
|
| 54 |
+
- Example curl scripts
|
| 55 |
+
- JavaScript/Node.js client library
|
| 56 |
+
- Full test suite with vitest
|
| 57 |
+
- Detailed API documentation
|
| 58 |
+
|
| 59 |
+
#### Deployment
|
| 60 |
+
- Docker support with multi-stage builds
|
| 61 |
+
- Docker Compose for local development
|
| 62 |
+
- Environment-based configuration
|
| 63 |
+
- Health checks and graceful shutdown
|
| 64 |
+
- Production-ready error handling
|
| 65 |
+
|
| 66 |
+
### API Endpoints
|
| 67 |
+
|
| 68 |
+
#### Health & Monitoring
|
| 69 |
+
- `GET /health` - Service health check with component status
|
| 70 |
+
- `GET /metrics` - Request metrics and usage statistics
|
| 71 |
+
|
| 72 |
+
#### Authentication
|
| 73 |
+
- `POST /auth/verify` - Validate API key and check rate limits
|
| 74 |
+
|
| 75 |
+
#### AI Chat
|
| 76 |
+
- `POST /ai/chat` - Multi-turn conversation with context
|
| 77 |
+
- `GET /ai/query` - Simple question answering
|
| 78 |
+
|
| 79 |
+
#### RAG
|
| 80 |
+
- `POST /rag/query` - Query with document retrieval
|
| 81 |
+
- `GET /rag/models` - List available LLM models
|
| 82 |
+
|
| 83 |
+
#### Image Generation
|
| 84 |
+
- `POST /image/generate` - Generate images from text prompts
|
| 85 |
+
|
| 86 |
+
#### Voice
|
| 87 |
+
- `POST /voice/synthesize` - Text to speech synthesis
|
| 88 |
+
- `POST /voice/transcribe` - Speech to text transcription
|
| 89 |
+
|
| 90 |
+
#### Documents
|
| 91 |
+
- `POST /upload` - Upload and ingest documents
|
| 92 |
+
- `GET /docs/:id/sources` - Retrieve document chunks
|
| 93 |
+
- `POST /webhook/events` - Ingestion completion webhooks
|
| 94 |
+
|
| 95 |
+
### Configuration
|
| 96 |
+
|
| 97 |
+
Environment variables for all services:
|
| 98 |
+
- LLM provider API keys (OpenAI, HuggingFace, Anthropic)
|
| 99 |
+
- Vector DB configuration (Pinecone)
|
| 100 |
+
- Rate limiting settings per tier
|
| 101 |
+
- Document processing parameters
|
| 102 |
+
- Worker configuration
|
| 103 |
+
- CORS and security settings
|
| 104 |
+
|
| 105 |
+
### Testing
|
| 106 |
+
|
| 107 |
+
- Unit tests for all core services
|
| 108 |
+
- Integration tests for API endpoints
|
| 109 |
+
- Mock implementations for external services
|
| 110 |
+
- Rate limiting validation
|
| 111 |
+
- Authentication flow tests
|
| 112 |
+
- Vector DB operations tests
|
| 113 |
+
|
| 114 |
+
### Documentation
|
| 115 |
+
|
| 116 |
+
- Comprehensive README with architecture diagram
|
| 117 |
+
- API reference with curl examples
|
| 118 |
+
- Environment variable guide
|
| 119 |
+
- Deployment instructions (Docker, Hugging Face Spaces, cloud providers)
|
| 120 |
+
- Scaling considerations and best practices
|
| 121 |
+
- Cost optimization guidelines
|
| 122 |
+
- Troubleshooting guide
|
| 123 |
+
|
| 124 |
+
### Known Limitations
|
| 125 |
+
|
| 126 |
+
- Maximum file upload size: 10MB (configurable)
|
| 127 |
+
- In-memory vector DB not suitable for production
|
| 128 |
+
- No built-in caching layer (add Redis for production)
|
| 129 |
+
- Synchronous API calls (streaming support coming soon)
|
| 130 |
+
|
| 131 |
+
### Future Roadmap
|
| 132 |
+
|
| 133 |
+
- Server-Sent Events (SSE) for streaming responses
|
| 134 |
+
- Redis caching layer for frequent queries
|
| 135 |
+
- Multi-language support for responses
|
| 136 |
+
- Fine-tuning pipeline integration
|
| 137 |
+
- Analytics dashboard
|
| 138 |
+
- Webhook integrations for third-party services
|
| 139 |
+
- GraphQL API support
|
| 140 |
+
- gRPC endpoints for high-performance use cases
|
| 141 |
+
- Kubernetes deployment manifests
|
| 142 |
+
- Auto-scaling configuration
|
| 143 |
+
|
| 144 |
+
---
|
| 145 |
+
|
| 146 |
+
## Release Notes
|
| 147 |
+
|
| 148 |
+
This is the initial release of the AI API Service, a production-ready TypeScript API for integrating multiple AI capabilities into chatbots, LLM applications, and intelligent systems.
|
| 149 |
+
|
| 150 |
+
The service is built on Encore.ts for type-safe backend development and includes comprehensive documentation, tests, and deployment configurations.
|
| 151 |
+
|
| 152 |
+
For questions, issues, or contributions, please visit the GitHub repository.
|
COMPLETE_DEPLOYMENT_GUIDE.md
ADDED
|
@@ -0,0 +1,1529 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Complete Step-by-Step Guide: Deploy AI API with Ollama to Hugging Face Spaces
|
| 2 |
+
## (Absolute Beginner-Friendly Guide)
|
| 3 |
+
|
| 4 |
+
**What you'll build**: A fully working AI API running on Hugging Face Spaces that anyone can access via the internet, powered by Ollama (no OpenAI key needed).
|
| 5 |
+
|
| 6 |
+
**Time needed**: 30-45 minutes
|
| 7 |
+
**Cost**: FREE (or $0.60/hour for faster GPU)
|
| 8 |
+
**No prior experience needed!**
|
| 9 |
+
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
## 📋 **What You Need Before Starting**
|
| 13 |
+
|
| 14 |
+
1. ✅ A Hugging Face account (we'll create this if you don't have one)
|
| 15 |
+
2. ✅ Git installed on your computer
|
| 16 |
+
3. ✅ Basic ability to copy/paste and follow instructions
|
| 17 |
+
4. ✅ This project's code files (you already have these)
|
| 18 |
+
|
| 19 |
+
---
|
| 20 |
+
|
| 21 |
+
## 🎯 **PART 1: Create Hugging Face Account & Space**
|
| 22 |
+
|
| 23 |
+
### **Step 1.1: Create Hugging Face Account** (Skip if you have one)
|
| 24 |
+
|
| 25 |
+
1. Open your web browser
|
| 26 |
+
2. Go to: https://huggingface.co/join
|
| 27 |
+
3. Fill in:
|
| 28 |
+
- **Email**: Your email address
|
| 29 |
+
- **Username**: Pick a username (you'll need this later - write it down!)
|
| 30 |
+
- **Password**: Choose a strong password
|
| 31 |
+
4. Click **"Sign Up"**
|
| 32 |
+
5. Check your email and click the verification link
|
| 33 |
+
6. You're now logged into Hugging Face!
|
| 34 |
+
|
| 35 |
+
### **Step 1.2: Create a New Space**
|
| 36 |
+
|
| 37 |
+
1. **Go to**: https://huggingface.co/new-space
|
| 38 |
+
|
| 39 |
+
2. **Fill in the form**:
|
| 40 |
+
|
| 41 |
+
| Field | What to Enter | Example |
|
| 42 |
+
|-------|---------------|---------|
|
| 43 |
+
| **Owner** | Your username | `yourname` |
|
| 44 |
+
| **Space name** | `ai-api-ollama` | (or anything you like) |
|
| 45 |
+
| **License** | Select "MIT" | |
|
| 46 |
+
| **Select the Space SDK** | Click on **"Docker"** | ⚠️ IMPORTANT: Must be Docker! |
|
| 47 |
+
| **Space hardware** | Select **"CPU basic - Free"** for now | (We'll upgrade later if needed) |
|
| 48 |
+
| **Repo type** | Leave as **"Public"** | (or Private if you prefer) |
|
| 49 |
+
|
| 50 |
+
3. **Click "Create Space"** button at the bottom
|
| 51 |
+
|
| 52 |
+
4. **IMPORTANT - Write down your Space URL**:
|
| 53 |
+
```
|
| 54 |
+
https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama
|
| 55 |
+
```
|
| 56 |
+
Replace `YOUR_USERNAME` with your actual username.
|
| 57 |
+
|
| 58 |
+
5. You'll see a page with instructions - **ignore them for now**, we'll do it differently.
|
| 59 |
+
|
| 60 |
+
---
|
| 61 |
+
|
| 62 |
+
## 🔧 **PART 2: Install Git and Set Up Authentication**
|
| 63 |
+
|
| 64 |
+
### **Step 2.1: Check if Git is Installed**
|
| 65 |
+
|
| 66 |
+
**On Windows**:
|
| 67 |
+
1. Press `Windows Key + R`
|
| 68 |
+
2. Type `cmd` and press Enter
|
| 69 |
+
3. Type: `git --version`
|
| 70 |
+
4. If you see a version number (like `git version 2.40.0`), you have Git ✅
|
| 71 |
+
5. If you see an error, download Git from: https://git-scm.com/download/win
|
| 72 |
+
|
| 73 |
+
**On Mac**:
|
| 74 |
+
1. Press `Command + Space`
|
| 75 |
+
2. Type `terminal` and press Enter
|
| 76 |
+
3. Type: `git --version`
|
| 77 |
+
4. If you see a version number, you have Git ✅
|
| 78 |
+
5. If not, it will prompt you to install Xcode Command Line Tools - click Install
|
| 79 |
+
|
| 80 |
+
**On Linux**:
|
| 81 |
+
```bash
|
| 82 |
+
git --version
|
| 83 |
+
```
|
| 84 |
+
If not installed:
|
| 85 |
+
```bash
|
| 86 |
+
sudo apt-get update
|
| 87 |
+
sudo apt-get install git
|
| 88 |
+
```
|
| 89 |
+
|
| 90 |
+
### **Step 2.2: Create Hugging Face Access Token**
|
| 91 |
+
|
| 92 |
+
1. Go to: https://huggingface.co/settings/tokens
|
| 93 |
+
2. Click **"New token"** button
|
| 94 |
+
3. Fill in:
|
| 95 |
+
- **Name**: `git-access` (or anything you like)
|
| 96 |
+
- **Role**: Select **"Write"**
|
| 97 |
+
4. Click **"Generate token"**
|
| 98 |
+
5. **CRITICAL**: Copy the token and save it somewhere safe (Notepad, password manager)
|
| 99 |
+
- It looks like: `hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxx`
|
| 100 |
+
- ⚠️ **You won't be able to see this again!**
|
| 101 |
+
|
| 102 |
+
---
|
| 103 |
+
|
| 104 |
+
## 💻 **PART 3: Clone Your Space to Your Computer**
|
| 105 |
+
|
| 106 |
+
### **Step 3.1: Open Terminal/Command Prompt**
|
| 107 |
+
|
| 108 |
+
**Windows**:
|
| 109 |
+
1. Press `Windows Key + R`
|
| 110 |
+
2. Type `cmd` and press Enter
|
| 111 |
+
3. Navigate to where you want to work (e.g., Desktop):
|
| 112 |
+
```
|
| 113 |
+
cd Desktop
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
**Mac/Linux**:
|
| 117 |
+
1. Open Terminal
|
| 118 |
+
2. Navigate to where you want to work:
|
| 119 |
+
```bash
|
| 120 |
+
cd ~/Desktop
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
### **Step 3.2: Clone the Space Repository**
|
| 124 |
+
|
| 125 |
+
1. **Copy this command** (replace YOUR_USERNAME with your actual Hugging Face username):
|
| 126 |
+
```bash
|
| 127 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama
|
| 128 |
+
```
|
| 129 |
+
|
| 130 |
+
2. **Example**:
|
| 131 |
+
```bash
|
| 132 |
+
git clone https://huggingface.co/spaces/johndoe/ai-api-ollama
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
3. **Press Enter**
|
| 136 |
+
|
| 137 |
+
4. When prompted for username and password:
|
| 138 |
+
- **Username**: Your Hugging Face username
|
| 139 |
+
- **Password**: **Paste your token** (NOT your password!) - the one that starts with `hf_`
|
| 140 |
+
|
| 141 |
+
5. You should see:
|
| 142 |
+
```
|
| 143 |
+
Cloning into 'ai-api-ollama'...
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
6. **Verify the folder was created**:
|
| 147 |
+
```bash
|
| 148 |
+
cd ai-api-ollama
|
| 149 |
+
ls
|
| 150 |
+
```
|
| 151 |
+
(On Windows use `dir` instead of `ls`)
|
| 152 |
+
|
| 153 |
+
---
|
| 154 |
+
|
| 155 |
+
## 📂 **PART 4: Copy Project Files to Space**
|
| 156 |
+
|
| 157 |
+
### **Step 4.1: Locate Your AI API Service Files**
|
| 158 |
+
|
| 159 |
+
You should have the project files in a folder. Let's say they're in:
|
| 160 |
+
- Windows: `C:\Users\YourName\Downloads\ai-api-service\`
|
| 161 |
+
- Mac/Linux: `~/Downloads/ai-api-service/`
|
| 162 |
+
|
| 163 |
+
### **Step 4.2: Copy ALL Files to Space Folder**
|
| 164 |
+
|
| 165 |
+
**Option A: Using File Explorer (Easiest)**
|
| 166 |
+
|
| 167 |
+
**Windows**:
|
| 168 |
+
1. Open File Explorer
|
| 169 |
+
2. Navigate to your original `ai-api-service` folder
|
| 170 |
+
3. Press `Ctrl + A` to select all files
|
| 171 |
+
4. Press `Ctrl + C` to copy
|
| 172 |
+
5. Navigate to `Desktop\ai-api-ollama` (your Space folder)
|
| 173 |
+
6. Press `Ctrl + V` to paste
|
| 174 |
+
7. When asked about replacing files, click **"Replace"**
|
| 175 |
+
|
| 176 |
+
**Mac**:
|
| 177 |
+
1. Open Finder
|
| 178 |
+
2. Navigate to your original `ai-api-service` folder
|
| 179 |
+
3. Press `Cmd + A` to select all files
|
| 180 |
+
4. Press `Cmd + C` to copy
|
| 181 |
+
5. Navigate to `Desktop/ai-api-ollama` (your Space folder)
|
| 182 |
+
6. Press `Cmd + V` to paste
|
| 183 |
+
|
| 184 |
+
**Option B: Using Command Line**
|
| 185 |
+
|
| 186 |
+
From the terminal, in your Space folder:
|
| 187 |
+
|
| 188 |
+
**Windows**:
|
| 189 |
+
```bash
|
| 190 |
+
xcopy /E /I "C:\Users\YourName\Downloads\ai-api-service\*" .
|
| 191 |
+
```
|
| 192 |
+
|
| 193 |
+
**Mac/Linux**:
|
| 194 |
+
```bash
|
| 195 |
+
cp -r ~/Downloads/ai-api-service/* .
|
| 196 |
+
```
|
| 197 |
+
|
| 198 |
+
### **Step 4.3: Verify Files Were Copied**
|
| 199 |
+
|
| 200 |
+
In your terminal (inside the `ai-api-ollama` folder):
|
| 201 |
+
|
| 202 |
+
```bash
|
| 203 |
+
ls
|
| 204 |
+
```
|
| 205 |
+
|
| 206 |
+
You should see these folders/files:
|
| 207 |
+
- `backend/`
|
| 208 |
+
- `examples/`
|
| 209 |
+
- `tests/`
|
| 210 |
+
- `package.json`
|
| 211 |
+
- `README.md`
|
| 212 |
+
- `.env.example`
|
| 213 |
+
- `Dockerfile.huggingface`
|
| 214 |
+
- And many more files...
|
| 215 |
+
|
| 216 |
+
✅ If you see these, you're good to proceed!
|
| 217 |
+
|
| 218 |
+
---
|
| 219 |
+
|
| 220 |
+
## 🐳 **PART 5: Prepare the Dockerfile for Hugging Face**
|
| 221 |
+
|
| 222 |
+
### **Step 5.1: Rename the Dockerfile**
|
| 223 |
+
|
| 224 |
+
Hugging Face expects a file named exactly `Dockerfile` (no extension).
|
| 225 |
+
|
| 226 |
+
**Windows Command Prompt**:
|
| 227 |
+
```bash
|
| 228 |
+
ren Dockerfile.huggingface Dockerfile
|
| 229 |
+
```
|
| 230 |
+
|
| 231 |
+
**Mac/Linux Terminal**:
|
| 232 |
+
```bash
|
| 233 |
+
mv Dockerfile.huggingface Dockerfile
|
| 234 |
+
```
|
| 235 |
+
|
| 236 |
+
### **Step 5.2: Verify the Dockerfile**
|
| 237 |
+
|
| 238 |
+
```bash
|
| 239 |
+
cat Dockerfile
|
| 240 |
+
```
|
| 241 |
+
|
| 242 |
+
You should see content starting with `FROM node:18-alpine AS builder`
|
| 243 |
+
|
| 244 |
+
✅ Good to go!
|
| 245 |
+
|
| 246 |
+
---
|
| 247 |
+
|
| 248 |
+
## 📝 **PART 6: Create Space Configuration Files**
|
| 249 |
+
|
| 250 |
+
### **Step 6.1: Create README.md for Your Space**
|
| 251 |
+
|
| 252 |
+
This file tells Hugging Face how to run your Space.
|
| 253 |
+
|
| 254 |
+
**Create a new file called `README.md`** in your `ai-api-ollama` folder:
|
| 255 |
+
|
| 256 |
+
**Windows**:
|
| 257 |
+
```bash
|
| 258 |
+
notepad README.md
|
| 259 |
+
```
|
| 260 |
+
|
| 261 |
+
**Mac/Linux**:
|
| 262 |
+
```bash
|
| 263 |
+
nano README.md
|
| 264 |
+
```
|
| 265 |
+
|
| 266 |
+
**Copy and paste this EXACT content** (replace YOUR_USERNAME):
|
| 267 |
+
|
| 268 |
+
```markdown
|
| 269 |
+
---
|
| 270 |
+
title: AI API Service with Ollama
|
| 271 |
+
emoji: 🤖
|
| 272 |
+
colorFrom: blue
|
| 273 |
+
colorTo: purple
|
| 274 |
+
sdk: docker
|
| 275 |
+
app_port: 7860
|
| 276 |
+
pinned: false
|
| 277 |
+
---
|
| 278 |
+
|
| 279 |
+
# AI API Service with Ollama
|
| 280 |
+
|
| 281 |
+
A production-ready AI API service powered by Ollama. No OpenAI API key needed!
|
| 282 |
+
|
| 283 |
+
## 🚀 Features
|
| 284 |
+
|
| 285 |
+
- 💬 **Multi-turn Chat** - Conversational AI with Llama2/Llama3
|
| 286 |
+
- 📚 **RAG** - Retrieval-Augmented Generation with vector search
|
| 287 |
+
- 🖼️ **Image Generation** - Text-to-image (requires additional API key)
|
| 288 |
+
- 🎙️ **Voice Synthesis** - Text-to-speech (requires additional API key)
|
| 289 |
+
- 📄 **Document Processing** - Upload and query PDFs, DOCX, TXT
|
| 290 |
+
- 🔒 **Authentication** - Secure API key-based access
|
| 291 |
+
- ⚡ **Rate Limiting** - Prevent abuse
|
| 292 |
+
|
| 293 |
+
## 📡 API Endpoint
|
| 294 |
+
|
| 295 |
+
```
|
| 296 |
+
https://YOUR_USERNAME-ai-api-ollama.hf.space
|
| 297 |
+
```
|
| 298 |
+
|
| 299 |
+
## 🔑 Quick Start
|
| 300 |
+
|
| 301 |
+
### Health Check
|
| 302 |
+
|
| 303 |
+
```bash
|
| 304 |
+
curl https://YOUR_USERNAME-ai-api-ollama.hf.space/health
|
| 305 |
+
```
|
| 306 |
+
|
| 307 |
+
### Chat Example
|
| 308 |
+
|
| 309 |
+
```bash
|
| 310 |
+
curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/ai/chat \
|
| 311 |
+
-H "Authorization: Bearer demo-key-1" \
|
| 312 |
+
-H "Content-Type: application/json" \
|
| 313 |
+
-d '{
|
| 314 |
+
"conversation": [
|
| 315 |
+
{"role": "user", "content": "Explain machine learning in simple terms"}
|
| 316 |
+
]
|
| 317 |
+
}'
|
| 318 |
+
```
|
| 319 |
+
|
| 320 |
+
### RAG Example
|
| 321 |
+
|
| 322 |
+
```bash
|
| 323 |
+
curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/rag/query \
|
| 324 |
+
-H "Authorization: Bearer demo-key-1" \
|
| 325 |
+
-H "Content-Type: application/json" \
|
| 326 |
+
-d '{
|
| 327 |
+
"query": "What are transformers in AI?",
|
| 328 |
+
"top_k": 5
|
| 329 |
+
}'
|
| 330 |
+
```
|
| 331 |
+
|
| 332 |
+
## 🔐 Authentication
|
| 333 |
+
|
| 334 |
+
Default API key: `demo-key-1`
|
| 335 |
+
|
| 336 |
+
**⚠️ IMPORTANT**: Change this in Space settings for production use!
|
| 337 |
+
|
| 338 |
+
## 📚 Available Endpoints
|
| 339 |
+
|
| 340 |
+
| Endpoint | Method | Description |
|
| 341 |
+
|----------|--------|-------------|
|
| 342 |
+
| `/health` | GET | Service health check |
|
| 343 |
+
| `/metrics` | GET | Usage metrics |
|
| 344 |
+
| `/ai/chat` | POST | Multi-turn conversation |
|
| 345 |
+
| `/ai/query` | GET | Simple question answering |
|
| 346 |
+
| `/rag/query` | POST | Query with document retrieval |
|
| 347 |
+
| `/image/generate` | POST | Generate images (needs API key) |
|
| 348 |
+
| `/voice/synthesize` | POST | Text to speech (needs API key) |
|
| 349 |
+
| `/upload` | POST | Upload documents |
|
| 350 |
+
|
| 351 |
+
## ⚙️ Configuration
|
| 352 |
+
|
| 353 |
+
Configured with Ollama running **inside the Space** for true serverless deployment.
|
| 354 |
+
|
| 355 |
+
**Current Settings**:
|
| 356 |
+
- Model: Llama 2 (7B)
|
| 357 |
+
- Embedding Model: nomic-embed-text
|
| 358 |
+
- Hardware: See Space settings
|
| 359 |
+
|
| 360 |
+
## 🎯 Use Cases
|
| 361 |
+
|
| 362 |
+
- Chatbot backend for web/mobile apps
|
| 363 |
+
- Document Q&A system
|
| 364 |
+
- AI-powered search
|
| 365 |
+
- Content generation API
|
| 366 |
+
- Educational AI assistant
|
| 367 |
+
|
| 368 |
+
## 📖 Documentation
|
| 369 |
+
|
| 370 |
+
Full API documentation: [See repository](https://github.com/your-username/ai-api-service)
|
| 371 |
+
|
| 372 |
+
## 💡 Tips
|
| 373 |
+
|
| 374 |
+
1. **First request is slow** - Ollama loads the model on first use (~30 seconds)
|
| 375 |
+
2. **Subsequent requests are fast** - Model stays in memory
|
| 376 |
+
3. **Use persistent hardware** - Upgrade from CPU to GPU for better performance
|
| 377 |
+
4. **Monitor costs** - Free tier works great for testing, upgrade for production
|
| 378 |
+
|
| 379 |
+
## 🆘 Support
|
| 380 |
+
|
| 381 |
+
Having issues? Check the logs or open an issue on GitHub.
|
| 382 |
+
|
| 383 |
+
---
|
| 384 |
+
|
| 385 |
+
Built with [Encore.ts](https://encore.dev) and [Ollama](https://ollama.ai)
|
| 386 |
+
```
|
| 387 |
+
|
| 388 |
+
**Save the file**:
|
| 389 |
+
- Notepad: File → Save
|
| 390 |
+
- Nano: Press `Ctrl + O`, then `Enter`, then `Ctrl + X`
|
| 391 |
+
|
| 392 |
+
---
|
| 393 |
+
|
| 394 |
+
## 🔐 **PART 7: Configure Environment Variables in Space Settings**
|
| 395 |
+
|
| 396 |
+
### **Step 7.1: Go to Your Space Settings**
|
| 397 |
+
|
| 398 |
+
1. Open your browser
|
| 399 |
+
2. Go to: `https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama/settings`
|
| 400 |
+
3. Scroll down to **"Variables and secrets"** section
|
| 401 |
+
|
| 402 |
+
### **Step 7.2: Add Environment Variables**
|
| 403 |
+
|
| 404 |
+
Click **"New variable"** for each of these:
|
| 405 |
+
|
| 406 |
+
#### **Variable 1: API_KEYS**
|
| 407 |
+
- **Name**: `API_KEYS`
|
| 408 |
+
- **Value**: `my-secret-key-12345,another-key-67890`
|
| 409 |
+
- ⚠️ **IMPORTANT**: Replace with your own random keys!
|
| 410 |
+
- Use strong, random strings (20+ characters)
|
| 411 |
+
- Separate multiple keys with commas (no spaces)
|
| 412 |
+
- Click **"Save"**
|
| 413 |
+
|
| 414 |
+
#### **Variable 2: ADMIN_API_KEYS** (Optional but recommended)
|
| 415 |
+
- **Name**: `ADMIN_API_KEYS`
|
| 416 |
+
- **Value**: `admin-super-secret-key-99999`
|
| 417 |
+
- ⚠️ Make this DIFFERENT from regular API keys
|
| 418 |
+
- This bypasses rate limits
|
| 419 |
+
- Click **"Save"**
|
| 420 |
+
|
| 421 |
+
#### **Variable 3: OLLAMA_MODEL**
|
| 422 |
+
- **Name**: `OLLAMA_MODEL`
|
| 423 |
+
- **Value**: Choose one:
|
| 424 |
+
- `phi:latest` (Fastest, smallest - 1.3GB - **RECOMMENDED FOR FREE CPU**)
|
| 425 |
+
- `llama2:latest` (Good quality - 4GB)
|
| 426 |
+
- `llama3:latest` (Best quality - 4.7GB - needs GPU)
|
| 427 |
+
- `mistral:latest` (Very good - 4GB)
|
| 428 |
+
- Click **"Save"**
|
| 429 |
+
|
| 430 |
+
**Recommendation for FREE tier**: Use `phi:latest`
|
| 431 |
+
|
| 432 |
+
#### **Variable 4: OLLAMA_EMBEDDING_MODEL**
|
| 433 |
+
- **Name**: `OLLAMA_EMBEDDING_MODEL`
|
| 434 |
+
- **Value**: `nomic-embed-text`
|
| 435 |
+
- Leave as is, this works great for RAG
|
| 436 |
+
- Click **"Save"**
|
| 437 |
+
|
| 438 |
+
#### **Variable 5: RATE_LIMIT_DEFAULT**
|
| 439 |
+
- **Name**: `RATE_LIMIT_DEFAULT`
|
| 440 |
+
- **Value**: `100`
|
| 441 |
+
- This means 100 requests per minute for regular API keys
|
| 442 |
+
- Click **"Save"**
|
| 443 |
+
|
| 444 |
+
#### **Variable 6: LOG_LEVEL** (Optional)
|
| 445 |
+
- **Name**: `LOG_LEVEL`
|
| 446 |
+
- **Value**: `info`
|
| 447 |
+
- Click **"Save"**
|
| 448 |
+
|
| 449 |
+
### **Step 7.3: Verify Your Variables**
|
| 450 |
+
|
| 451 |
+
You should now see these variables listed:
|
| 452 |
+
- ✅ `API_KEYS`
|
| 453 |
+
- ✅ `ADMIN_API_KEYS` (if you added it)
|
| 454 |
+
- ✅ `OLLAMA_MODEL`
|
| 455 |
+
- ✅ `OLLAMA_EMBEDDING_MODEL`
|
| 456 |
+
- ✅ `RATE_LIMIT_DEFAULT`
|
| 457 |
+
|
| 458 |
+
---
|
| 459 |
+
|
| 460 |
+
## 📤 **PART 8: Push Code to Hugging Face**
|
| 461 |
+
|
| 462 |
+
Now we'll upload all the files to Hugging Face.
|
| 463 |
+
|
| 464 |
+
### **Step 8.1: Configure Git (First Time Only)**
|
| 465 |
+
|
| 466 |
+
In your terminal (inside the `ai-api-ollama` folder):
|
| 467 |
+
|
| 468 |
+
```bash
|
| 469 |
+
git config user.email "[email protected]"
|
| 470 |
+
git config user.name "Your Name"
|
| 471 |
+
```
|
| 472 |
+
|
| 473 |
+
Replace with your actual email and name.
|
| 474 |
+
|
| 475 |
+
### **Step 8.2: Add All Files to Git**
|
| 476 |
+
|
| 477 |
+
```bash
|
| 478 |
+
git add .
|
| 479 |
+
```
|
| 480 |
+
|
| 481 |
+
The `.` means "add all files in this folder"
|
| 482 |
+
|
| 483 |
+
### **Step 8.3: Commit the Files**
|
| 484 |
+
|
| 485 |
+
```bash
|
| 486 |
+
git commit -m "Initial deployment with Ollama support"
|
| 487 |
+
```
|
| 488 |
+
|
| 489 |
+
You should see output like:
|
| 490 |
+
```
|
| 491 |
+
[main abc1234] Initial deployment with Ollama support
|
| 492 |
+
XX files changed, XXX insertions(+)
|
| 493 |
+
```
|
| 494 |
+
|
| 495 |
+
### **Step 8.4: Push to Hugging Face**
|
| 496 |
+
|
| 497 |
+
```bash
|
| 498 |
+
git push
|
| 499 |
+
```
|
| 500 |
+
|
| 501 |
+
When prompted for credentials:
|
| 502 |
+
- **Username**: Your Hugging Face username
|
| 503 |
+
- **Password**: Your Hugging Face token (starts with `hf_`)
|
| 504 |
+
|
| 505 |
+
You'll see:
|
| 506 |
+
```
|
| 507 |
+
Enumerating objects: XX, done.
|
| 508 |
+
Counting objects: 100% (XX/XX), done.
|
| 509 |
+
Writing objects: 100% (XX/XX), XX.XX MiB | XX.XX MiB/s, done.
|
| 510 |
+
```
|
| 511 |
+
|
| 512 |
+
✅ **Success!** Your code is now on Hugging Face.
|
| 513 |
+
|
| 514 |
+
---
|
| 515 |
+
|
| 516 |
+
## ⏳ **PART 9: Wait for Build & Monitor Progress**
|
| 517 |
+
|
| 518 |
+
### **Step 9.1: Go to Your Space**
|
| 519 |
+
|
| 520 |
+
1. Open browser: `https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama`
|
| 521 |
+
2. You'll see a yellow "Building" status at the top
|
| 522 |
+
|
| 523 |
+
### **Step 9.2: Watch the Build Logs**
|
| 524 |
+
|
| 525 |
+
1. Click on the **"Logs"** tab (near the top)
|
| 526 |
+
2. You'll see real-time output like:
|
| 527 |
+
```
|
| 528 |
+
Building Docker image...
|
| 529 |
+
Step 1/15 : FROM node:18-alpine AS builder
|
| 530 |
+
...
|
| 531 |
+
```
|
| 532 |
+
|
| 533 |
+
### **Step 9.3: What to Expect (Timeline)**
|
| 534 |
+
|
| 535 |
+
| Time | What's Happening | What You'll See |
|
| 536 |
+
|------|------------------|-----------------|
|
| 537 |
+
| 0-2 min | Docker image building | `Building Docker image...` |
|
| 538 |
+
| 2-5 min | Installing Node dependencies | `npm install...` |
|
| 539 |
+
| 5-8 min | Installing Ollama | `Installing Ollama...` |
|
| 540 |
+
| 8-10 min | Starting services | `Starting Ollama...` |
|
| 541 |
+
| 10-15 min | **Downloading Ollama model** | `Pulling model: phi:latest` ⏳ **LONGEST STEP** |
|
| 542 |
+
| 15+ min | Warming up model | `Warming up model...` |
|
| 543 |
+
| Final | **Space is RUNNING** | 🟢 Green "Running" status |
|
| 544 |
+
|
| 545 |
+
**Total time**: 15-20 minutes for first deployment
|
| 546 |
+
|
| 547 |
+
### **Step 9.4: Troubleshooting Build Errors**
|
| 548 |
+
|
| 549 |
+
If you see **red error messages**:
|
| 550 |
+
|
| 551 |
+
**Common Error 1**: `npm install failed`
|
| 552 |
+
- **Fix**: Check that `package.json` was copied correctly
|
| 553 |
+
- Re-run: `git add package.json && git commit -m "fix package.json" && git push`
|
| 554 |
+
|
| 555 |
+
**Common Error 2**: `Port 7860 already in use`
|
| 556 |
+
- **Fix**: This shouldn't happen, but if it does, check README.md has `app_port: 7860`
|
| 557 |
+
|
| 558 |
+
**Common Error 3**: `Model download timeout`
|
| 559 |
+
- **Fix**: Use a smaller model like `phi:latest` in environment variables
|
| 560 |
+
- Or upgrade to GPU hardware (see Part 10)
|
| 561 |
+
|
| 562 |
+
**Common Error 4**: `Out of memory`
|
| 563 |
+
- **Fix**: Model too big for free CPU. Use `phi:latest` or upgrade to paid tier
|
| 564 |
+
|
| 565 |
+
### **Step 9.5: Verify Space is Running**
|
| 566 |
+
|
| 567 |
+
When build completes:
|
| 568 |
+
1. Status changes to 🟢 **"Running"**
|
| 569 |
+
2. You'll see in logs: `Starting AI API Service on port 7860...`
|
| 570 |
+
3. **Your API is now LIVE!**
|
| 571 |
+
|
| 572 |
+
---
|
| 573 |
+
|
| 574 |
+
## 🎉 **PART 10: Test Your Live API**
|
| 575 |
+
|
| 576 |
+
### **Step 10.1: Get Your Space URL**
|
| 577 |
+
|
| 578 |
+
Your API is available at:
|
| 579 |
+
```
|
| 580 |
+
https://YOUR_USERNAME-ai-api-ollama.hf.space
|
| 581 |
+
```
|
| 582 |
+
|
| 583 |
+
**Example**:
|
| 584 |
+
```
|
| 585 |
+
https://johndoe-ai-api-ollama.hf.space
|
| 586 |
+
```
|
| 587 |
+
|
| 588 |
+
### **Step 10.2: Test Health Endpoint**
|
| 589 |
+
|
| 590 |
+
**Option A: Use Browser**
|
| 591 |
+
1. Open your browser
|
| 592 |
+
2. Go to: `https://YOUR_USERNAME-ai-api-ollama.hf.space/health`
|
| 593 |
+
3. You should see JSON like:
|
| 594 |
+
```json
|
| 595 |
+
{
|
| 596 |
+
"status": "healthy",
|
| 597 |
+
"version": "1.0.0",
|
| 598 |
+
"services": [...]
|
| 599 |
+
}
|
| 600 |
+
```
|
| 601 |
+
|
| 602 |
+
✅ If you see this, your API is working!
|
| 603 |
+
|
| 604 |
+
**Option B: Use Command Line**
|
| 605 |
+
|
| 606 |
+
```bash
|
| 607 |
+
curl https://YOUR_USERNAME-ai-api-ollama.hf.space/health
|
| 608 |
+
```
|
| 609 |
+
|
| 610 |
+
### **Step 10.3: Test Chat Endpoint**
|
| 611 |
+
|
| 612 |
+
**Copy this command** (replace YOUR_USERNAME and use one of your API keys):
|
| 613 |
+
|
| 614 |
+
```bash
|
| 615 |
+
curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/ai/chat \
|
| 616 |
+
-H "Authorization: Bearer my-secret-key-12345" \
|
| 617 |
+
-H "Content-Type: application/json" \
|
| 618 |
+
-d '{
|
| 619 |
+
"conversation": [
|
| 620 |
+
{
|
| 621 |
+
"role": "user",
|
| 622 |
+
"content": "Hello! Can you explain what you are in one sentence?"
|
| 623 |
+
}
|
| 624 |
+
]
|
| 625 |
+
}'
|
| 626 |
+
```
|
| 627 |
+
|
| 628 |
+
**Expected response** (takes 5-30 seconds for first request):
|
| 629 |
+
```json
|
| 630 |
+
{
|
| 631 |
+
"reply": "I am an AI assistant powered by Llama, designed to help answer questions...",
|
| 632 |
+
"model": "llama2",
|
| 633 |
+
"usage": {
|
| 634 |
+
"prompt_tokens": 25,
|
| 635 |
+
"completion_tokens": 50,
|
| 636 |
+
"total_tokens": 75
|
| 637 |
+
},
|
| 638 |
+
"sources": null
|
| 639 |
+
}
|
| 640 |
+
```
|
| 641 |
+
|
| 642 |
+
✅ **Success!** Your AI API is working!
|
| 643 |
+
|
| 644 |
+
### **Step 10.4: Test RAG Endpoint (Optional)**
|
| 645 |
+
|
| 646 |
+
First, upload a document:
|
| 647 |
+
|
| 648 |
+
```bash
|
| 649 |
+
# Create a test document
|
| 650 |
+
echo "The AI API Service is a production-ready API for chatbots. It supports Ollama, OpenAI, and HuggingFace." > test.txt
|
| 651 |
+
|
| 652 |
+
# Convert to base64
|
| 653 |
+
base64 test.txt > test.txt.b64
|
| 654 |
+
|
| 655 |
+
# Upload (Mac/Linux)
|
| 656 |
+
curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/upload \
|
| 657 |
+
-H "Authorization: Bearer my-secret-key-12345" \
|
| 658 |
+
-H "Content-Type: application/json" \
|
| 659 |
+
-d "{
|
| 660 |
+
\"filename\": \"test.txt\",
|
| 661 |
+
\"content_base64\": \"$(cat test.txt.b64)\",
|
| 662 |
+
\"metadata\": {\"title\": \"Test Document\"}
|
| 663 |
+
}"
|
| 664 |
+
```
|
| 665 |
+
|
| 666 |
+
Then query it:
|
| 667 |
+
|
| 668 |
+
```bash
|
| 669 |
+
curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/rag/query \
|
| 670 |
+
-H "Authorization: Bearer my-secret-key-12345" \
|
| 671 |
+
-H "Content-Type: application/json" \
|
| 672 |
+
-d '{
|
| 673 |
+
"query": "What does the API support?",
|
| 674 |
+
"top_k": 3
|
| 675 |
+
}'
|
| 676 |
+
```
|
| 677 |
+
|
| 678 |
+
---
|
| 679 |
+
|
| 680 |
+
## 📊 **PART 11: Monitor and Optimize (Optional)**
|
| 681 |
+
|
| 682 |
+
### **Step 11.1: Check Metrics**
|
| 683 |
+
|
| 684 |
+
```bash
|
| 685 |
+
curl https://YOUR_USERNAME-ai-api-ollama.hf.space/metrics \
|
| 686 |
+
-H "Authorization: Bearer my-secret-key-12345"
|
| 687 |
+
```
|
| 688 |
+
|
| 689 |
+
You'll see:
|
| 690 |
+
- Total requests
|
| 691 |
+
- Errors
|
| 692 |
+
- Response times
|
| 693 |
+
- Model usage
|
| 694 |
+
|
| 695 |
+
### **Step 11.2: Upgrade Hardware (If Needed)**
|
| 696 |
+
|
| 697 |
+
If your Space is slow or timing out:
|
| 698 |
+
|
| 699 |
+
1. Go to: `https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama/settings`
|
| 700 |
+
2. Scroll to **"Space hardware"**
|
| 701 |
+
3. Click **"Change hardware"**
|
| 702 |
+
4. Select:
|
| 703 |
+
- **CPU upgrade** ($0.60/hr) - 2x faster than free
|
| 704 |
+
- **GPU T4** ($0.60/hr) - 10x faster, supports bigger models
|
| 705 |
+
- **GPU A10G** ($3.15/hr) - Best performance
|
| 706 |
+
5. Click **"Update Space"**
|
| 707 |
+
6. Space will restart with new hardware (~5 minutes)
|
| 708 |
+
|
| 709 |
+
### **Step 11.3: Use Bigger Models**
|
| 710 |
+
|
| 711 |
+
Once you have GPU:
|
| 712 |
+
|
| 713 |
+
1. Go to Settings → Variables and secrets
|
| 714 |
+
2. Edit `OLLAMA_MODEL`
|
| 715 |
+
3. Change to: `llama3:latest` or `mistral:latest`
|
| 716 |
+
4. Save
|
| 717 |
+
5. Space will restart and download new model
|
| 718 |
+
|
| 719 |
+
---
|
| 720 |
+
|
| 721 |
+
## 🔒 **PART 12: Security Best Practices**
|
| 722 |
+
|
| 723 |
+
### **Step 12.1: Change Default API Keys**
|
| 724 |
+
|
| 725 |
+
**⚠️ CRITICAL FOR PRODUCTION**
|
| 726 |
+
|
| 727 |
+
1. Go to Space Settings → Variables
|
| 728 |
+
2. Edit `API_KEYS`
|
| 729 |
+
3. Replace `demo-key-1` with strong random keys:
|
| 730 |
+
```
|
| 731 |
+
ak_live_a8f7d9e2c1b4f5a7d8e9c2b1a5f7,ak_live_b9c2d1e3f4a5b7c8d9e1f2a3b5
|
| 732 |
+
```
|
| 733 |
+
4. **Never share these keys publicly!**
|
| 734 |
+
|
| 735 |
+
### **Step 12.2: Make Space Private (Optional)**
|
| 736 |
+
|
| 737 |
+
1. Go to: `https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama/settings`
|
| 738 |
+
2. Scroll to **"Rename or change repo visibility"**
|
| 739 |
+
3. Click **"Make private"**
|
| 740 |
+
4. Confirm
|
| 741 |
+
|
| 742 |
+
Now only you can see the Space, but the API still works for anyone with the URL and API key.
|
| 743 |
+
|
| 744 |
+
### **Step 12.3: Monitor Usage**
|
| 745 |
+
|
| 746 |
+
Check logs regularly:
|
| 747 |
+
1. Go to Space → Logs tab
|
| 748 |
+
2. Look for suspicious activity:
|
| 749 |
+
- Many failed authentication attempts
|
| 750 |
+
- Unusually high request volume
|
| 751 |
+
- Error patterns
|
| 752 |
+
|
| 753 |
+
---
|
| 754 |
+
|
| 755 |
+
## 🎯 **PART 13: Using Your API in Applications**
|
| 756 |
+
|
| 757 |
+
### **Example: JavaScript/TypeScript Web App**
|
| 758 |
+
|
| 759 |
+
```javascript
|
| 760 |
+
// Save as: app.js
|
| 761 |
+
|
| 762 |
+
const API_URL = 'https://YOUR_USERNAME-ai-api-ollama.hf.space';
|
| 763 |
+
const API_KEY = 'my-secret-key-12345'; // Your actual key
|
| 764 |
+
|
| 765 |
+
async function chat(message) {
|
| 766 |
+
const response = await fetch(`${API_URL}/ai/chat`, {
|
| 767 |
+
method: 'POST',
|
| 768 |
+
headers: {
|
| 769 |
+
'Authorization': `Bearer ${API_KEY}`,
|
| 770 |
+
'Content-Type': 'application/json',
|
| 771 |
+
},
|
| 772 |
+
body: JSON.stringify({
|
| 773 |
+
conversation: [
|
| 774 |
+
{ role: 'user', content: message }
|
| 775 |
+
]
|
| 776 |
+
})
|
| 777 |
+
});
|
| 778 |
+
|
| 779 |
+
const data = await response.json();
|
| 780 |
+
return data.reply;
|
| 781 |
+
}
|
| 782 |
+
|
| 783 |
+
// Usage
|
| 784 |
+
chat('Hello!').then(reply => {
|
| 785 |
+
console.log('AI:', reply);
|
| 786 |
+
});
|
| 787 |
+
```
|
| 788 |
+
|
| 789 |
+
### **Example: Python Application**
|
| 790 |
+
|
| 791 |
+
```python
|
| 792 |
+
# Save as: app.py
|
| 793 |
+
|
| 794 |
+
import requests
|
| 795 |
+
|
| 796 |
+
API_URL = 'https://YOUR_USERNAME-ai-api-ollama.hf.space'
|
| 797 |
+
API_KEY = 'my-secret-key-12345'
|
| 798 |
+
|
| 799 |
+
def chat(message):
|
| 800 |
+
response = requests.post(
|
| 801 |
+
f'{API_URL}/ai/chat',
|
| 802 |
+
headers={
|
| 803 |
+
'Authorization': f'Bearer {API_KEY}',
|
| 804 |
+
'Content-Type': 'application/json'
|
| 805 |
+
},
|
| 806 |
+
json={
|
| 807 |
+
'conversation': [
|
| 808 |
+
{'role': 'user', 'content': message}
|
| 809 |
+
]
|
| 810 |
+
}
|
| 811 |
+
)
|
| 812 |
+
return response.json()['reply']
|
| 813 |
+
|
| 814 |
+
# Usage
|
| 815 |
+
reply = chat('Hello!')
|
| 816 |
+
print(f'AI: {reply}')
|
| 817 |
+
```
|
| 818 |
+
|
| 819 |
+
### **Example: Mobile App (React Native)**
|
| 820 |
+
|
| 821 |
+
```javascript
|
| 822 |
+
// Save as: ChatService.js
|
| 823 |
+
|
| 824 |
+
const API_URL = 'https://YOUR_USERNAME-ai-api-ollama.hf.space';
|
| 825 |
+
const API_KEY = 'my-secret-key-12345';
|
| 826 |
+
|
| 827 |
+
export async function sendMessage(message) {
|
| 828 |
+
try {
|
| 829 |
+
const response = await fetch(`${API_URL}/ai/chat`, {
|
| 830 |
+
method: 'POST',
|
| 831 |
+
headers: {
|
| 832 |
+
'Authorization': `Bearer ${API_KEY}`,
|
| 833 |
+
'Content-Type': 'application/json',
|
| 834 |
+
},
|
| 835 |
+
body: JSON.stringify({
|
| 836 |
+
conversation: [
|
| 837 |
+
{ role: 'user', content: message }
|
| 838 |
+
]
|
| 839 |
+
})
|
| 840 |
+
});
|
| 841 |
+
|
| 842 |
+
if (!response.ok) {
|
| 843 |
+
throw new Error('API request failed');
|
| 844 |
+
}
|
| 845 |
+
|
| 846 |
+
const data = await response.json();
|
| 847 |
+
return data.reply;
|
| 848 |
+
} catch (error) {
|
| 849 |
+
console.error('Chat error:', error);
|
| 850 |
+
throw error;
|
| 851 |
+
}
|
| 852 |
+
}
|
| 853 |
+
```
|
| 854 |
+
|
| 855 |
+
---
|
| 856 |
+
|
| 857 |
+
## 🆘 **PART 14: Troubleshooting Common Issues**
|
| 858 |
+
|
| 859 |
+
### **Issue 1: "Space is building for too long"**
|
| 860 |
+
|
| 861 |
+
**Symptoms**: Build takes 30+ minutes
|
| 862 |
+
|
| 863 |
+
**Causes**:
|
| 864 |
+
- Large model download (llama3 is 4.7GB)
|
| 865 |
+
- Slow internet on Hugging Face servers
|
| 866 |
+
- Free tier resource limits
|
| 867 |
+
|
| 868 |
+
**Solutions**:
|
| 869 |
+
1. Use smaller model: `phi:latest` (1.3GB)
|
| 870 |
+
2. Upgrade to GPU hardware for faster downloads
|
| 871 |
+
3. Wait patiently - first build is always slow
|
| 872 |
+
|
| 873 |
+
---
|
| 874 |
+
|
| 875 |
+
### **Issue 2: "Space crashed / Runtime error"**
|
| 876 |
+
|
| 877 |
+
**Symptoms**: Red "Runtime error" status
|
| 878 |
+
|
| 879 |
+
**Check logs for**:
|
| 880 |
+
|
| 881 |
+
**Error**: `Out of memory`
|
| 882 |
+
- **Fix**: Model too big for hardware
|
| 883 |
+
- **Solution**: Use `phi:latest` or upgrade to GPU T4
|
| 884 |
+
|
| 885 |
+
**Error**: `Port 7860 already in use`
|
| 886 |
+
- **Fix**: Check README.md has correct `app_port: 7860`
|
| 887 |
+
- **Solution**: Edit README.md and push again
|
| 888 |
+
|
| 889 |
+
**Error**: `Ollama failed to start`
|
| 890 |
+
- **Fix**: Dockerfile issue
|
| 891 |
+
- **Solution**: Verify Dockerfile was renamed correctly
|
| 892 |
+
|
| 893 |
+
---
|
| 894 |
+
|
| 895 |
+
### **Issue 3: "API returns 401 Unauthorized"**
|
| 896 |
+
|
| 897 |
+
**Symptoms**:
|
| 898 |
+
```json
|
| 899 |
+
{"error": "Invalid API key"}
|
| 900 |
+
```
|
| 901 |
+
|
| 902 |
+
**Solutions**:
|
| 903 |
+
1. **Check your Authorization header**:
|
| 904 |
+
```bash
|
| 905 |
+
# Correct format:
|
| 906 |
+
-H "Authorization: Bearer my-secret-key-12345"
|
| 907 |
+
|
| 908 |
+
# NOT:
|
| 909 |
+
-H "Authorization: my-secret-key-12345" # Missing "Bearer"
|
| 910 |
+
```
|
| 911 |
+
|
| 912 |
+
2. **Verify API key is in Space settings**:
|
| 913 |
+
- Go to Settings → Variables
|
| 914 |
+
- Check `API_KEYS` contains your key
|
| 915 |
+
- Keys are case-sensitive!
|
| 916 |
+
|
| 917 |
+
3. **Try the default key**:
|
| 918 |
+
```bash
|
| 919 |
+
-H "Authorization: Bearer demo-key-1"
|
| 920 |
+
```
|
| 921 |
+
|
| 922 |
+
---
|
| 923 |
+
|
| 924 |
+
### **Issue 4: "API is very slow (30+ seconds)"**
|
| 925 |
+
|
| 926 |
+
**Causes**:
|
| 927 |
+
- First request loads model into memory (normal)
|
| 928 |
+
- Free CPU tier is slow
|
| 929 |
+
- Model is too large for hardware
|
| 930 |
+
|
| 931 |
+
**Solutions**:
|
| 932 |
+
1. **First request is always slow** - subsequent requests are fast
|
| 933 |
+
2. **Upgrade to GPU T4**:
|
| 934 |
+
- Settings → Space hardware → GPU T4
|
| 935 |
+
- 10x faster inference
|
| 936 |
+
3. **Use smaller model**: `phi:latest`
|
| 937 |
+
4. **Add model warmup** (already in Dockerfile):
|
| 938 |
+
- Keeps model loaded
|
| 939 |
+
- Reduces cold start time
|
| 940 |
+
|
| 941 |
+
---
|
| 942 |
+
|
| 943 |
+
### **Issue 5: "Cannot upload documents"**
|
| 944 |
+
|
| 945 |
+
**Error**: `File too large`
|
| 946 |
+
|
| 947 |
+
**Fix**:
|
| 948 |
+
- Default max size is 10MB
|
| 949 |
+
- To increase, add environment variable:
|
| 950 |
+
```
|
| 951 |
+
MAX_FILE_SIZE_MB=50
|
| 952 |
+
```
|
| 953 |
+
|
| 954 |
+
**Error**: `Invalid file format`
|
| 955 |
+
|
| 956 |
+
**Fix**:
|
| 957 |
+
- Only supports: PDF, DOCX, TXT
|
| 958 |
+
- Ensure file extension is correct
|
| 959 |
+
- Check file is not corrupted
|
| 960 |
+
|
| 961 |
+
---
|
| 962 |
+
|
| 963 |
+
### **Issue 6: "RAG returns no results"**
|
| 964 |
+
|
| 965 |
+
**Symptoms**: Empty `sources` array in response
|
| 966 |
+
|
| 967 |
+
**Causes**:
|
| 968 |
+
1. No documents uploaded yet
|
| 969 |
+
2. Query doesn't match document content
|
| 970 |
+
3. Embedding model not loaded
|
| 971 |
+
|
| 972 |
+
**Solutions**:
|
| 973 |
+
1. **Upload a document first**:
|
| 974 |
+
```bash
|
| 975 |
+
curl -X POST https://YOUR_API/upload \
|
| 976 |
+
-H "Authorization: Bearer YOUR_KEY" \
|
| 977 |
+
-F "[email protected]"
|
| 978 |
+
```
|
| 979 |
+
|
| 980 |
+
2. **Wait for processing** (check logs):
|
| 981 |
+
```
|
| 982 |
+
Document processed successfully: doc_abc123
|
| 983 |
+
```
|
| 984 |
+
|
| 985 |
+
3. **Try broader query**:
|
| 986 |
+
- Instead of: "What is the exact price?"
|
| 987 |
+
- Try: "pricing information"
|
| 988 |
+
|
| 989 |
+
---
|
| 990 |
+
|
| 991 |
+
### **Issue 7: "How do I see errors?"**
|
| 992 |
+
|
| 993 |
+
**Steps**:
|
| 994 |
+
1. Go to your Space
|
| 995 |
+
2. Click **"Logs"** tab
|
| 996 |
+
3. Look for lines with:
|
| 997 |
+
```
|
| 998 |
+
"level": "error"
|
| 999 |
+
```
|
| 1000 |
+
4. Read the `"message"` field
|
| 1001 |
+
|
| 1002 |
+
**Common errors and fixes**:
|
| 1003 |
+
|
| 1004 |
+
```json
|
| 1005 |
+
{"level":"error","message":"Invalid API key"}
|
| 1006 |
+
```
|
| 1007 |
+
→ Fix: Check Authorization header
|
| 1008 |
+
|
| 1009 |
+
```json
|
| 1010 |
+
{"level":"error","message":"Rate limit exceeded"}
|
| 1011 |
+
```
|
| 1012 |
+
→ Fix: Wait 60 seconds or use admin key
|
| 1013 |
+
|
| 1014 |
+
```json
|
| 1015 |
+
{"level":"error","message":"Ollama API error"}
|
| 1016 |
+
```
|
| 1017 |
+
→ Fix: Model not loaded, wait for startup to complete
|
| 1018 |
+
|
| 1019 |
+
---
|
| 1020 |
+
|
| 1021 |
+
### **Issue 8: "Space keeps restarting"**
|
| 1022 |
+
|
| 1023 |
+
**Symptoms**: Status alternates between Building and Running
|
| 1024 |
+
|
| 1025 |
+
**Causes**:
|
| 1026 |
+
- Application crashes on startup
|
| 1027 |
+
- Out of memory
|
| 1028 |
+
- Port configuration issue
|
| 1029 |
+
|
| 1030 |
+
**Debug steps**:
|
| 1031 |
+
1. Check logs for crash reason
|
| 1032 |
+
2. Verify environment variables are set
|
| 1033 |
+
3. Try smaller model
|
| 1034 |
+
4. Contact Hugging Face support if persistent
|
| 1035 |
+
|
| 1036 |
+
---
|
| 1037 |
+
|
| 1038 |
+
## 📖 **PART 15: Complete API Reference**
|
| 1039 |
+
|
| 1040 |
+
### **Base URL**
|
| 1041 |
+
```
|
| 1042 |
+
https://YOUR_USERNAME-ai-api-ollama.hf.space
|
| 1043 |
+
```
|
| 1044 |
+
|
| 1045 |
+
### **Authentication**
|
| 1046 |
+
All endpoints (except `/health`) require:
|
| 1047 |
+
```
|
| 1048 |
+
Authorization: Bearer YOUR_API_KEY
|
| 1049 |
+
```
|
| 1050 |
+
|
| 1051 |
+
---
|
| 1052 |
+
|
| 1053 |
+
### **1. Health Check**
|
| 1054 |
+
|
| 1055 |
+
**Endpoint**: `GET /health`
|
| 1056 |
+
|
| 1057 |
+
**No authentication required**
|
| 1058 |
+
|
| 1059 |
+
**Example**:
|
| 1060 |
+
```bash
|
| 1061 |
+
curl https://YOUR_API/health
|
| 1062 |
+
```
|
| 1063 |
+
|
| 1064 |
+
**Response**:
|
| 1065 |
+
```json
|
| 1066 |
+
{
|
| 1067 |
+
"status": "healthy",
|
| 1068 |
+
"version": "1.0.0",
|
| 1069 |
+
"services": [
|
| 1070 |
+
{"name": "llm", "status": "up"},
|
| 1071 |
+
{"name": "vector_db", "status": "up"}
|
| 1072 |
+
],
|
| 1073 |
+
"uptime_seconds": 3600
|
| 1074 |
+
}
|
| 1075 |
+
```
|
| 1076 |
+
|
| 1077 |
+
---
|
| 1078 |
+
|
| 1079 |
+
### **2. Metrics**
|
| 1080 |
+
|
| 1081 |
+
**Endpoint**: `GET /metrics`
|
| 1082 |
+
|
| 1083 |
+
**Requires authentication**
|
| 1084 |
+
|
| 1085 |
+
**Example**:
|
| 1086 |
+
```bash
|
| 1087 |
+
curl https://YOUR_API/metrics \
|
| 1088 |
+
-H "Authorization: Bearer YOUR_KEY"
|
| 1089 |
+
```
|
| 1090 |
+
|
| 1091 |
+
**Response**:
|
| 1092 |
+
```json
|
| 1093 |
+
{
|
| 1094 |
+
"timestamp": 1698765432000,
|
| 1095 |
+
"requests_total": 150,
|
| 1096 |
+
"requests_by_endpoint": {
|
| 1097 |
+
"/ai/chat": 100,
|
| 1098 |
+
"/rag/query": 50
|
| 1099 |
+
},
|
| 1100 |
+
"errors_total": 5,
|
| 1101 |
+
"rate_limit_hits": 2,
|
| 1102 |
+
"average_response_time_ms": 1250
|
| 1103 |
+
}
|
| 1104 |
+
```
|
| 1105 |
+
|
| 1106 |
+
---
|
| 1107 |
+
|
| 1108 |
+
### **3. Simple Chat**
|
| 1109 |
+
|
| 1110 |
+
**Endpoint**: `POST /ai/chat`
|
| 1111 |
+
|
| 1112 |
+
**Request**:
|
| 1113 |
+
```json
|
| 1114 |
+
{
|
| 1115 |
+
"conversation": [
|
| 1116 |
+
{"role": "user", "content": "Hello!"}
|
| 1117 |
+
],
|
| 1118 |
+
"model": "llama2",
|
| 1119 |
+
"options": {
|
| 1120 |
+
"temperature": 0.7,
|
| 1121 |
+
"max_tokens": 500
|
| 1122 |
+
}
|
| 1123 |
+
}
|
| 1124 |
+
```
|
| 1125 |
+
|
| 1126 |
+
**Response**:
|
| 1127 |
+
```json
|
| 1128 |
+
{
|
| 1129 |
+
"reply": "Hello! How can I help you today?",
|
| 1130 |
+
"model": "llama2",
|
| 1131 |
+
"usage": {
|
| 1132 |
+
"prompt_tokens": 10,
|
| 1133 |
+
"completion_tokens": 20,
|
| 1134 |
+
"total_tokens": 30
|
| 1135 |
+
},
|
| 1136 |
+
"sources": null
|
| 1137 |
+
}
|
| 1138 |
+
```
|
| 1139 |
+
|
| 1140 |
+
**Example**:
|
| 1141 |
+
```bash
|
| 1142 |
+
curl -X POST https://YOUR_API/ai/chat \
|
| 1143 |
+
-H "Authorization: Bearer YOUR_KEY" \
|
| 1144 |
+
-H "Content-Type: application/json" \
|
| 1145 |
+
-d '{
|
| 1146 |
+
"conversation": [
|
| 1147 |
+
{"role": "user", "content": "Explain AI in one sentence"}
|
| 1148 |
+
]
|
| 1149 |
+
}'
|
| 1150 |
+
```
|
| 1151 |
+
|
| 1152 |
+
---
|
| 1153 |
+
|
| 1154 |
+
### **4. Multi-turn Conversation**
|
| 1155 |
+
|
| 1156 |
+
**Endpoint**: `POST /ai/chat`
|
| 1157 |
+
|
| 1158 |
+
**Request** (with context):
|
| 1159 |
+
```json
|
| 1160 |
+
{
|
| 1161 |
+
"conversation": [
|
| 1162 |
+
{"role": "user", "content": "What is 2+2?"},
|
| 1163 |
+
{"role": "assistant", "content": "2+2 equals 4."},
|
| 1164 |
+
{"role": "user", "content": "What about 2+3?"}
|
| 1165 |
+
]
|
| 1166 |
+
}
|
| 1167 |
+
```
|
| 1168 |
+
|
| 1169 |
+
**Response**:
|
| 1170 |
+
```json
|
| 1171 |
+
{
|
| 1172 |
+
"reply": "2+3 equals 5.",
|
| 1173 |
+
"model": "llama2",
|
| 1174 |
+
"usage": {...}
|
| 1175 |
+
}
|
| 1176 |
+
```
|
| 1177 |
+
|
| 1178 |
+
---
|
| 1179 |
+
|
| 1180 |
+
### **5. RAG Query**
|
| 1181 |
+
|
| 1182 |
+
**Endpoint**: `POST /rag/query`
|
| 1183 |
+
|
| 1184 |
+
**Request**:
|
| 1185 |
+
```json
|
| 1186 |
+
{
|
| 1187 |
+
"query": "What are the main features?",
|
| 1188 |
+
"top_k": 5,
|
| 1189 |
+
"model": "llama2",
|
| 1190 |
+
"use_retrieval": true
|
| 1191 |
+
}
|
| 1192 |
+
```
|
| 1193 |
+
|
| 1194 |
+
**Response**:
|
| 1195 |
+
```json
|
| 1196 |
+
{
|
| 1197 |
+
"answer": "The main features include...",
|
| 1198 |
+
"sources": [
|
| 1199 |
+
{
|
| 1200 |
+
"doc_id": "doc_123",
|
| 1201 |
+
"chunk_id": "chunk_5",
|
| 1202 |
+
"content": "Feature description...",
|
| 1203 |
+
"score": 0.92,
|
| 1204 |
+
"metadata": {"title": "Documentation"}
|
| 1205 |
+
}
|
| 1206 |
+
],
|
| 1207 |
+
"model": "llama2",
|
| 1208 |
+
"usage": {...},
|
| 1209 |
+
"retrieval_time_ms": 250
|
| 1210 |
+
}
|
| 1211 |
+
```
|
| 1212 |
+
|
| 1213 |
+
**Example**:
|
| 1214 |
+
```bash
|
| 1215 |
+
curl -X POST https://YOUR_API/rag/query \
|
| 1216 |
+
-H "Authorization: Bearer YOUR_KEY" \
|
| 1217 |
+
-H "Content-Type: application/json" \
|
| 1218 |
+
-d '{
|
| 1219 |
+
"query": "What is machine learning?",
|
| 1220 |
+
"top_k": 3
|
| 1221 |
+
}'
|
| 1222 |
+
```
|
| 1223 |
+
|
| 1224 |
+
---
|
| 1225 |
+
|
| 1226 |
+
### **6. Upload Document**
|
| 1227 |
+
|
| 1228 |
+
**Endpoint**: `POST /upload`
|
| 1229 |
+
|
| 1230 |
+
**Request**:
|
| 1231 |
+
```json
|
| 1232 |
+
{
|
| 1233 |
+
"filename": "document.txt",
|
| 1234 |
+
"content_base64": "VGhpcyBpcyBhIHRlc3Q=",
|
| 1235 |
+
"metadata": {
|
| 1236 |
+
"title": "Test Document",
|
| 1237 |
+
"category": "docs"
|
| 1238 |
+
}
|
| 1239 |
+
}
|
| 1240 |
+
```
|
| 1241 |
+
|
| 1242 |
+
**Response**:
|
| 1243 |
+
```json
|
| 1244 |
+
{
|
| 1245 |
+
"doc_id": "doc_abc123",
|
| 1246 |
+
"filename": "document.txt",
|
| 1247 |
+
"size_bytes": 1024,
|
| 1248 |
+
"status": "processing",
|
| 1249 |
+
"estimated_chunks": 5
|
| 1250 |
+
}
|
| 1251 |
+
```
|
| 1252 |
+
|
| 1253 |
+
**Example (Linux/Mac)**:
|
| 1254 |
+
```bash
|
| 1255 |
+
# Encode file to base64
|
| 1256 |
+
base64 document.txt > document.b64
|
| 1257 |
+
|
| 1258 |
+
# Upload
|
| 1259 |
+
curl -X POST https://YOUR_API/upload \
|
| 1260 |
+
-H "Authorization: Bearer YOUR_KEY" \
|
| 1261 |
+
-H "Content-Type: application/json" \
|
| 1262 |
+
-d "{
|
| 1263 |
+
\"filename\": \"document.txt\",
|
| 1264 |
+
\"content_base64\": \"$(cat document.b64)\",
|
| 1265 |
+
\"metadata\": {\"title\": \"My Document\"}
|
| 1266 |
+
}"
|
| 1267 |
+
```
|
| 1268 |
+
|
| 1269 |
+
---
|
| 1270 |
+
|
| 1271 |
+
### **7. Get Document Sources**
|
| 1272 |
+
|
| 1273 |
+
**Endpoint**: `GET /docs/:id/sources`
|
| 1274 |
+
|
| 1275 |
+
**Example**:
|
| 1276 |
+
```bash
|
| 1277 |
+
curl https://YOUR_API/docs/doc_abc123/sources \
|
| 1278 |
+
-H "Authorization: Bearer YOUR_KEY"
|
| 1279 |
+
```
|
| 1280 |
+
|
| 1281 |
+
**Response**:
|
| 1282 |
+
```json
|
| 1283 |
+
{
|
| 1284 |
+
"sources": [
|
| 1285 |
+
{
|
| 1286 |
+
"doc_id": "doc_abc123",
|
| 1287 |
+
"chunk_id": "chunk_0",
|
| 1288 |
+
"content": "This is the first chunk...",
|
| 1289 |
+
"score": 1.0,
|
| 1290 |
+
"metadata": {...}
|
| 1291 |
+
}
|
| 1292 |
+
]
|
| 1293 |
+
}
|
| 1294 |
+
```
|
| 1295 |
+
|
| 1296 |
+
---
|
| 1297 |
+
|
| 1298 |
+
### **8. Simple Query**
|
| 1299 |
+
|
| 1300 |
+
**Endpoint**: `GET /ai/query?q=QUESTION`
|
| 1301 |
+
|
| 1302 |
+
**Example**:
|
| 1303 |
+
```bash
|
| 1304 |
+
curl "https://YOUR_API/ai/query?q=What+is+AI" \
|
| 1305 |
+
-H "Authorization: Bearer YOUR_KEY"
|
| 1306 |
+
```
|
| 1307 |
+
|
| 1308 |
+
**Response**:
|
| 1309 |
+
```json
|
| 1310 |
+
{
|
| 1311 |
+
"answer": "AI stands for Artificial Intelligence...",
|
| 1312 |
+
"model": "llama2"
|
| 1313 |
+
}
|
| 1314 |
+
```
|
| 1315 |
+
|
| 1316 |
+
---
|
| 1317 |
+
|
| 1318 |
+
### **9. Get Available Models**
|
| 1319 |
+
|
| 1320 |
+
**Endpoint**: `GET /rag/models`
|
| 1321 |
+
|
| 1322 |
+
**Example**:
|
| 1323 |
+
```bash
|
| 1324 |
+
curl https://YOUR_API/rag/models \
|
| 1325 |
+
-H "Authorization: Bearer YOUR_KEY"
|
| 1326 |
+
```
|
| 1327 |
+
|
| 1328 |
+
**Response**:
|
| 1329 |
+
```json
|
| 1330 |
+
{
|
| 1331 |
+
"models": ["ollama", "llama", "llama2", "llama3", "mistral"],
|
| 1332 |
+
"default_model": "llama2"
|
| 1333 |
+
}
|
| 1334 |
+
```
|
| 1335 |
+
|
| 1336 |
+
---
|
| 1337 |
+
|
| 1338 |
+
## 🎓 **PART 16: Advanced Tips & Tricks**
|
| 1339 |
+
|
| 1340 |
+
### **Tip 1: Optimize Response Time**
|
| 1341 |
+
|
| 1342 |
+
**Add warmup requests** to keep model in memory:
|
| 1343 |
+
|
| 1344 |
+
Create a simple cron job or scheduled task:
|
| 1345 |
+
```bash
|
| 1346 |
+
# Every 5 minutes, make a request to keep model loaded
|
| 1347 |
+
*/5 * * * * curl -X POST https://YOUR_API/ai/chat \
|
| 1348 |
+
-H "Authorization: Bearer YOUR_KEY" \
|
| 1349 |
+
-H "Content-Type: application/json" \
|
| 1350 |
+
-d '{"conversation":[{"role":"user","content":"ping"}]}'
|
| 1351 |
+
```
|
| 1352 |
+
|
| 1353 |
+
---
|
| 1354 |
+
|
| 1355 |
+
### **Tip 2: Use System Prompts for Consistency**
|
| 1356 |
+
|
| 1357 |
+
```bash
|
| 1358 |
+
curl -X POST https://YOUR_API/ai/chat \
|
| 1359 |
+
-H "Authorization: Bearer YOUR_KEY" \
|
| 1360 |
+
-H "Content-Type: application/json" \
|
| 1361 |
+
-d '{
|
| 1362 |
+
"conversation": [
|
| 1363 |
+
{
|
| 1364 |
+
"role": "system",
|
| 1365 |
+
"content": "You are a friendly customer support agent. Be helpful and concise."
|
| 1366 |
+
},
|
| 1367 |
+
{
|
| 1368 |
+
"role": "user",
|
| 1369 |
+
"content": "How do I reset my password?"
|
| 1370 |
+
}
|
| 1371 |
+
]
|
| 1372 |
+
}'
|
| 1373 |
+
```
|
| 1374 |
+
|
| 1375 |
+
---
|
| 1376 |
+
|
| 1377 |
+
### **Tip 3: Batch Document Upload**
|
| 1378 |
+
|
| 1379 |
+
Upload multiple documents efficiently:
|
| 1380 |
+
|
| 1381 |
+
```bash
|
| 1382 |
+
# Create script: batch_upload.sh
|
| 1383 |
+
|
| 1384 |
+
for file in docs/*.txt; do
|
| 1385 |
+
echo "Uploading $file..."
|
| 1386 |
+
base64 "$file" > temp.b64
|
| 1387 |
+
curl -X POST https://YOUR_API/upload \
|
| 1388 |
+
-H "Authorization: Bearer YOUR_KEY" \
|
| 1389 |
+
-H "Content-Type: application/json" \
|
| 1390 |
+
-d "{
|
| 1391 |
+
\"filename\": \"$(basename $file)\",
|
| 1392 |
+
\"content_base64\": \"$(cat temp.b64)\"
|
| 1393 |
+
}"
|
| 1394 |
+
sleep 2 # Rate limiting
|
| 1395 |
+
done
|
| 1396 |
+
|
| 1397 |
+
rm temp.b64
|
| 1398 |
+
```
|
| 1399 |
+
|
| 1400 |
+
---
|
| 1401 |
+
|
| 1402 |
+
### **Tip 4: Monitor Costs**
|
| 1403 |
+
|
| 1404 |
+
If using paid hardware:
|
| 1405 |
+
|
| 1406 |
+
1. Check Hugging Face billing: https://huggingface.co/settings/billing
|
| 1407 |
+
2. Set up budget alerts
|
| 1408 |
+
3. Monitor Space uptime
|
| 1409 |
+
4. Pause Space when not in use:
|
| 1410 |
+
- Settings → "Pause Space"
|
| 1411 |
+
- Saves money, stops billing
|
| 1412 |
+
- Resume anytime
|
| 1413 |
+
|
| 1414 |
+
---
|
| 1415 |
+
|
| 1416 |
+
### **Tip 5: Create API Key Tiers**
|
| 1417 |
+
|
| 1418 |
+
**In Space Settings**, set up different keys for different users:
|
| 1419 |
+
|
| 1420 |
+
```
|
| 1421 |
+
# Free tier - limited rate
|
| 1422 |
+
API_KEYS=free_user_key_1,free_user_key_2
|
| 1423 |
+
|
| 1424 |
+
# Premium tier - higher rate
|
| 1425 |
+
PREMIUM_API_KEYS=premium_user_key_1
|
| 1426 |
+
|
| 1427 |
+
# Admin tier - unlimited
|
| 1428 |
+
ADMIN_API_KEYS=admin_key_1
|
| 1429 |
+
```
|
| 1430 |
+
|
| 1431 |
+
Then adjust rate limits:
|
| 1432 |
+
```
|
| 1433 |
+
RATE_LIMIT_DEFAULT=60
|
| 1434 |
+
RATE_LIMIT_PREMIUM=300
|
| 1435 |
+
RATE_LIMIT_ADMIN=10000
|
| 1436 |
+
```
|
| 1437 |
+
|
| 1438 |
+
---
|
| 1439 |
+
|
| 1440 |
+
## ✅ **Final Checklist**
|
| 1441 |
+
|
| 1442 |
+
Before going live, verify:
|
| 1443 |
+
|
| 1444 |
+
- [ ] Space is running (green status)
|
| 1445 |
+
- [ ] Health check returns `"status": "healthy"`
|
| 1446 |
+
- [ ] Chat endpoint responds correctly
|
| 1447 |
+
- [ ] Changed default API keys to strong random strings
|
| 1448 |
+
- [ ] Tested with your own API key
|
| 1449 |
+
- [ ] Documented your API keys securely (password manager)
|
| 1450 |
+
- [ ] Set appropriate rate limits
|
| 1451 |
+
- [ ] Chose right model for your hardware
|
| 1452 |
+
- [ ] Tested all endpoints you plan to use
|
| 1453 |
+
- [ ] Reviewed logs for errors
|
| 1454 |
+
- [ ] (Optional) Upgraded hardware if needed
|
| 1455 |
+
- [ ] (Optional) Made Space private if needed
|
| 1456 |
+
|
| 1457 |
+
---
|
| 1458 |
+
|
| 1459 |
+
## 🎉 **Congratulations!**
|
| 1460 |
+
|
| 1461 |
+
You now have:
|
| 1462 |
+
✅ A fully functional AI API running on Hugging Face Spaces
|
| 1463 |
+
✅ Powered by Ollama (no OpenAI costs!)
|
| 1464 |
+
✅ Accessible from anywhere via HTTPS
|
| 1465 |
+
✅ Secure with API key authentication
|
| 1466 |
+
✅ Ready to integrate into your apps
|
| 1467 |
+
|
| 1468 |
+
**Your API URL**:
|
| 1469 |
+
```
|
| 1470 |
+
https://YOUR_USERNAME-ai-api-ollama.hf.space
|
| 1471 |
+
```
|
| 1472 |
+
|
| 1473 |
+
**Share your API** (securely):
|
| 1474 |
+
- Give URL + API key to developers
|
| 1475 |
+
- Use in web apps, mobile apps, scripts
|
| 1476 |
+
- Process millions of requests
|
| 1477 |
+
- Scale as needed
|
| 1478 |
+
|
| 1479 |
+
---
|
| 1480 |
+
|
| 1481 |
+
## 📞 **Need Help?**
|
| 1482 |
+
|
| 1483 |
+
**If you're stuck**:
|
| 1484 |
+
1. ✅ Re-read the relevant section
|
| 1485 |
+
2. ✅ Check Space logs for errors
|
| 1486 |
+
3. ✅ Try the troubleshooting section
|
| 1487 |
+
4. ✅ Open an issue on GitHub
|
| 1488 |
+
5. ✅ Ask on Hugging Face forums
|
| 1489 |
+
|
| 1490 |
+
**Common beginner mistakes**:
|
| 1491 |
+
- Forgot to rename `Dockerfile.huggingface` to `Dockerfile`
|
| 1492 |
+
- Used wrong API key format (missing "Bearer")
|
| 1493 |
+
- Chose model too large for hardware
|
| 1494 |
+
- Didn't wait for initial model download
|
| 1495 |
+
|
| 1496 |
+
---
|
| 1497 |
+
|
| 1498 |
+
## 📚 **What's Next?**
|
| 1499 |
+
|
| 1500 |
+
Now that your API is live:
|
| 1501 |
+
|
| 1502 |
+
1. **Build a chat interface**:
|
| 1503 |
+
- React app
|
| 1504 |
+
- Vue app
|
| 1505 |
+
- Mobile app
|
| 1506 |
+
- WordPress plugin
|
| 1507 |
+
|
| 1508 |
+
2. **Add more features**:
|
| 1509 |
+
- User accounts
|
| 1510 |
+
- Usage analytics
|
| 1511 |
+
- Custom models
|
| 1512 |
+
- Advanced RAG
|
| 1513 |
+
|
| 1514 |
+
3. **Scale up**:
|
| 1515 |
+
- Upgrade hardware
|
| 1516 |
+
- Add caching
|
| 1517 |
+
- Load balancing
|
| 1518 |
+
- CDN
|
| 1519 |
+
|
| 1520 |
+
4. **Monetize** (optional):
|
| 1521 |
+
- Charge for API access
|
| 1522 |
+
- Offer different tiers
|
| 1523 |
+
- White-label for clients
|
| 1524 |
+
|
| 1525 |
+
---
|
| 1526 |
+
|
| 1527 |
+
**You did it! 🎉🚀**
|
| 1528 |
+
|
| 1529 |
+
Your AI-powered API is now live and ready to change the world!
|
DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,435 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Deployment Guide
|
| 2 |
+
|
| 3 |
+
This guide covers deploying the AI API Service to various platforms.
|
| 4 |
+
|
| 5 |
+
## Table of Contents
|
| 6 |
+
- [Local Development](#local-development)
|
| 7 |
+
- [Docker Deployment](#docker-deployment)
|
| 8 |
+
- [Encore Cloud](#encore-cloud)
|
| 9 |
+
- [Hugging Face Spaces](#hugging-face-spaces)
|
| 10 |
+
- [AWS Deployment](#aws-deployment)
|
| 11 |
+
- [Google Cloud Platform](#google-cloud-platform)
|
| 12 |
+
- [Azure Deployment](#azure-deployment)
|
| 13 |
+
- [Environment Variables](#environment-variables)
|
| 14 |
+
|
| 15 |
+
## Local Development
|
| 16 |
+
|
| 17 |
+
### Prerequisites
|
| 18 |
+
- Node.js 18+
|
| 19 |
+
- npm or yarn
|
| 20 |
+
- Encore CLI
|
| 21 |
+
|
| 22 |
+
### Steps
|
| 23 |
+
|
| 24 |
+
1. **Install Encore CLI**
|
| 25 |
+
```bash
|
| 26 |
+
npm install -g encore
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
2. **Install dependencies**
|
| 30 |
+
```bash
|
| 31 |
+
npm install
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
3. **Configure environment**
|
| 35 |
+
```bash
|
| 36 |
+
cp .env.example .env
|
| 37 |
+
# Edit .env with your API keys
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
4. **Run development server**
|
| 41 |
+
```bash
|
| 42 |
+
encore run
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
The API will be available at `http://localhost:8000`
|
| 46 |
+
|
| 47 |
+
## Docker Deployment
|
| 48 |
+
|
| 49 |
+
### Build and Run Locally
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
docker-compose up -d
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
This starts:
|
| 56 |
+
- API service on port 8000
|
| 57 |
+
- Redis for caching (optional)
|
| 58 |
+
|
| 59 |
+
### Build Production Image
|
| 60 |
+
|
| 61 |
+
```bash
|
| 62 |
+
docker build -t ai-api-service:latest .
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
### Run Production Container
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
docker run -d \
|
| 69 |
+
-p 8000:8000 \
|
| 70 |
+
-e OPENAI_API_KEY=your_key \
|
| 71 |
+
-e API_KEYS=your_api_keys \
|
| 72 |
+
--name ai-api \
|
| 73 |
+
ai-api-service:latest
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
## Encore Cloud
|
| 77 |
+
|
| 78 |
+
Encore Cloud provides the easiest deployment experience with automatic infrastructure provisioning.
|
| 79 |
+
|
| 80 |
+
### Steps
|
| 81 |
+
|
| 82 |
+
1. **Install Encore CLI**
|
| 83 |
+
```bash
|
| 84 |
+
npm install -g encore
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
2. **Login to Encore**
|
| 88 |
+
```bash
|
| 89 |
+
encore auth login
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
3. **Create app (first time)**
|
| 93 |
+
```bash
|
| 94 |
+
encore app create ai-api-service
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
4. **Set secrets**
|
| 98 |
+
```bash
|
| 99 |
+
encore secret set OPENAI_API_KEY
|
| 100 |
+
encore secret set HUGGINGFACE_API_KEY
|
| 101 |
+
encore secret set PINECONE_API_KEY
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
5. **Deploy**
|
| 105 |
+
```bash
|
| 106 |
+
encore deploy
|
| 107 |
+
```
|
| 108 |
+
|
| 109 |
+
Your API will be deployed with:
|
| 110 |
+
- Auto-scaling
|
| 111 |
+
- Load balancing
|
| 112 |
+
- SSL/TLS certificates
|
| 113 |
+
- Monitoring and logs
|
| 114 |
+
- Database backups
|
| 115 |
+
|
| 116 |
+
## Hugging Face Spaces
|
| 117 |
+
|
| 118 |
+
Deploy as a Docker Space on Hugging Face for easy sharing.
|
| 119 |
+
|
| 120 |
+
### Steps
|
| 121 |
+
|
| 122 |
+
1. **Create new Space**
|
| 123 |
+
- Go to https://huggingface.co/new-space
|
| 124 |
+
- Select "Docker" as SDK
|
| 125 |
+
- Choose hardware tier (CPU or GPU)
|
| 126 |
+
|
| 127 |
+
2. **Clone Space repository**
|
| 128 |
+
```bash
|
| 129 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE
|
| 130 |
+
cd YOUR_SPACE
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
3. **Copy project files**
|
| 134 |
+
```bash
|
| 135 |
+
cp -r /path/to/ai-api-service/* .
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
4. **Create Dockerfile for HF Spaces**
|
| 139 |
+
```dockerfile
|
| 140 |
+
FROM node:18-alpine
|
| 141 |
+
|
| 142 |
+
WORKDIR /app
|
| 143 |
+
|
| 144 |
+
COPY package*.json ./
|
| 145 |
+
RUN npm ci --only=production
|
| 146 |
+
|
| 147 |
+
COPY . .
|
| 148 |
+
|
| 149 |
+
ENV PORT=7860
|
| 150 |
+
EXPOSE 7860
|
| 151 |
+
|
| 152 |
+
CMD ["npm", "start"]
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
5. **Configure secrets in Space settings**
|
| 156 |
+
- `OPENAI_API_KEY`
|
| 157 |
+
- `HUGGINGFACE_API_KEY`
|
| 158 |
+
- `API_KEYS`
|
| 159 |
+
|
| 160 |
+
6. **Push to Space**
|
| 161 |
+
```bash
|
| 162 |
+
git add .
|
| 163 |
+
git commit -m "Initial deployment"
|
| 164 |
+
git push
|
| 165 |
+
```
|
| 166 |
+
|
| 167 |
+
## AWS Deployment
|
| 168 |
+
|
| 169 |
+
### Using AWS ECS (Elastic Container Service)
|
| 170 |
+
|
| 171 |
+
1. **Push image to ECR**
|
| 172 |
+
```bash
|
| 173 |
+
aws ecr create-repository --repository-name ai-api-service
|
| 174 |
+
|
| 175 |
+
docker build -t ai-api-service .
|
| 176 |
+
|
| 177 |
+
aws ecr get-login-password --region us-east-1 | \
|
| 178 |
+
docker login --username AWS --password-stdin \
|
| 179 |
+
YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com
|
| 180 |
+
|
| 181 |
+
docker tag ai-api-service:latest \
|
| 182 |
+
YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com/ai-api-service:latest
|
| 183 |
+
|
| 184 |
+
docker push YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com/ai-api-service:latest
|
| 185 |
+
```
|
| 186 |
+
|
| 187 |
+
2. **Create ECS Task Definition**
|
| 188 |
+
```json
|
| 189 |
+
{
|
| 190 |
+
"family": "ai-api-service",
|
| 191 |
+
"networkMode": "awsvpc",
|
| 192 |
+
"requiresCompatibilities": ["FARGATE"],
|
| 193 |
+
"cpu": "1024",
|
| 194 |
+
"memory": "2048",
|
| 195 |
+
"containerDefinitions": [{
|
| 196 |
+
"name": "ai-api",
|
| 197 |
+
"image": "YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com/ai-api-service:latest",
|
| 198 |
+
"portMappings": [{
|
| 199 |
+
"containerPort": 8000,
|
| 200 |
+
"protocol": "tcp"
|
| 201 |
+
}],
|
| 202 |
+
"environment": [],
|
| 203 |
+
"secrets": [{
|
| 204 |
+
"name": "OPENAI_API_KEY",
|
| 205 |
+
"valueFrom": "arn:aws:secretsmanager:us-east-1:ACCOUNT:secret:openai-api-key"
|
| 206 |
+
}]
|
| 207 |
+
}]
|
| 208 |
+
}
|
| 209 |
+
```
|
| 210 |
+
|
| 211 |
+
3. **Create ECS Service with ALB**
|
| 212 |
+
- Configure Application Load Balancer
|
| 213 |
+
- Set up target group (port 8000)
|
| 214 |
+
- Configure auto-scaling
|
| 215 |
+
- Add health checks
|
| 216 |
+
|
| 217 |
+
### Using AWS Lambda (API Gateway)
|
| 218 |
+
|
| 219 |
+
For serverless deployment, wrap endpoints with AWS Lambda handlers.
|
| 220 |
+
|
| 221 |
+
## Google Cloud Platform
|
| 222 |
+
|
| 223 |
+
### Using Cloud Run
|
| 224 |
+
|
| 225 |
+
1. **Build and push to GCR**
|
| 226 |
+
```bash
|
| 227 |
+
gcloud builds submit --tag gcr.io/PROJECT_ID/ai-api-service
|
| 228 |
+
|
| 229 |
+
gcloud run deploy ai-api-service \
|
| 230 |
+
--image gcr.io/PROJECT_ID/ai-api-service \
|
| 231 |
+
--platform managed \
|
| 232 |
+
--region us-central1 \
|
| 233 |
+
--allow-unauthenticated \
|
| 234 |
+
--set-env-vars OPENAI_API_KEY=your_key
|
| 235 |
+
```
|
| 236 |
+
|
| 237 |
+
2. **Configure secrets**
|
| 238 |
+
```bash
|
| 239 |
+
echo -n "your_openai_key" | \
|
| 240 |
+
gcloud secrets create openai-api-key --data-file=-
|
| 241 |
+
|
| 242 |
+
gcloud run services update ai-api-service \
|
| 243 |
+
--update-secrets OPENAI_API_KEY=openai-api-key:latest
|
| 244 |
+
```
|
| 245 |
+
|
| 246 |
+
### Using GKE (Kubernetes)
|
| 247 |
+
|
| 248 |
+
1. **Create cluster**
|
| 249 |
+
```bash
|
| 250 |
+
gcloud container clusters create ai-api-cluster \
|
| 251 |
+
--num-nodes=3 \
|
| 252 |
+
--machine-type=n1-standard-2
|
| 253 |
+
```
|
| 254 |
+
|
| 255 |
+
2. **Deploy application**
|
| 256 |
+
```bash
|
| 257 |
+
kubectl apply -f k8s/deployment.yaml
|
| 258 |
+
kubectl apply -f k8s/service.yaml
|
| 259 |
+
kubectl apply -f k8s/ingress.yaml
|
| 260 |
+
```
|
| 261 |
+
|
| 262 |
+
## Azure Deployment
|
| 263 |
+
|
| 264 |
+
### Using Azure Container Instances
|
| 265 |
+
|
| 266 |
+
```bash
|
| 267 |
+
az container create \
|
| 268 |
+
--resource-group ai-api-rg \
|
| 269 |
+
--name ai-api-service \
|
| 270 |
+
--image your-registry.azurecr.io/ai-api-service:latest \
|
| 271 |
+
--cpu 2 \
|
| 272 |
+
--memory 4 \
|
| 273 |
+
--ports 8000 \
|
| 274 |
+
--environment-variables \
|
| 275 |
+
PORT=8000 \
|
| 276 |
+
--secure-environment-variables \
|
| 277 |
+
OPENAI_API_KEY=your_key \
|
| 278 |
+
API_KEYS=demo-key-1
|
| 279 |
+
```
|
| 280 |
+
|
| 281 |
+
### Using Azure App Service
|
| 282 |
+
|
| 283 |
+
1. **Create App Service Plan**
|
| 284 |
+
```bash
|
| 285 |
+
az appservice plan create \
|
| 286 |
+
--name ai-api-plan \
|
| 287 |
+
--resource-group ai-api-rg \
|
| 288 |
+
--is-linux \
|
| 289 |
+
--sku B1
|
| 290 |
+
```
|
| 291 |
+
|
| 292 |
+
2. **Create Web App**
|
| 293 |
+
```bash
|
| 294 |
+
az webapp create \
|
| 295 |
+
--resource-group ai-api-rg \
|
| 296 |
+
--plan ai-api-plan \
|
| 297 |
+
--name ai-api-service \
|
| 298 |
+
--deployment-container-image-name your-registry.azurecr.io/ai-api-service:latest
|
| 299 |
+
```
|
| 300 |
+
|
| 301 |
+
3. **Configure settings**
|
| 302 |
+
```bash
|
| 303 |
+
az webapp config appsettings set \
|
| 304 |
+
--resource-group ai-api-rg \
|
| 305 |
+
--name ai-api-service \
|
| 306 |
+
--settings \
|
| 307 |
+
[email protected](SecretUri=...)
|
| 308 |
+
```
|
| 309 |
+
|
| 310 |
+
## Environment Variables
|
| 311 |
+
|
| 312 |
+
### Required Variables
|
| 313 |
+
|
| 314 |
+
| Variable | Description | Example |
|
| 315 |
+
|----------|-------------|---------|
|
| 316 |
+
| `API_KEYS` | Comma-separated API keys | `key1,key2,key3` |
|
| 317 |
+
| `OPENAI_API_KEY` | OpenAI API key (or alternative) | `sk-...` |
|
| 318 |
+
|
| 319 |
+
### Optional Variables
|
| 320 |
+
|
| 321 |
+
| Variable | Description | Default |
|
| 322 |
+
|----------|-------------|---------|
|
| 323 |
+
| `HUGGINGFACE_API_KEY` | HuggingFace API key | - |
|
| 324 |
+
| `ANTHROPIC_API_KEY` | Anthropic API key | - |
|
| 325 |
+
| `PINECONE_API_KEY` | Pinecone vector DB key | - |
|
| 326 |
+
| `RATE_LIMIT_DEFAULT` | Requests/min for default tier | `60` |
|
| 327 |
+
| `RATE_LIMIT_ADMIN` | Requests/min for admin tier | `1000` |
|
| 328 |
+
| `LOG_LEVEL` | Logging level | `info` |
|
| 329 |
+
| `MAX_FILE_SIZE_MB` | Max upload size in MB | `10` |
|
| 330 |
+
|
| 331 |
+
### Setting Secrets
|
| 332 |
+
|
| 333 |
+
**Encore Cloud:**
|
| 334 |
+
```bash
|
| 335 |
+
encore secret set OPENAI_API_KEY
|
| 336 |
+
```
|
| 337 |
+
|
| 338 |
+
**Docker:**
|
| 339 |
+
```bash
|
| 340 |
+
docker run -e OPENAI_API_KEY=your_key ...
|
| 341 |
+
```
|
| 342 |
+
|
| 343 |
+
**Kubernetes:**
|
| 344 |
+
```bash
|
| 345 |
+
kubectl create secret generic api-secrets \
|
| 346 |
+
--from-literal=OPENAI_API_KEY=your_key
|
| 347 |
+
```
|
| 348 |
+
|
| 349 |
+
**AWS Secrets Manager:**
|
| 350 |
+
```bash
|
| 351 |
+
aws secretsmanager create-secret \
|
| 352 |
+
--name openai-api-key \
|
| 353 |
+
--secret-string your_key
|
| 354 |
+
```
|
| 355 |
+
|
| 356 |
+
## Monitoring
|
| 357 |
+
|
| 358 |
+
### Health Checks
|
| 359 |
+
|
| 360 |
+
Configure health check endpoint:
|
| 361 |
+
```
|
| 362 |
+
GET /health
|
| 363 |
+
```
|
| 364 |
+
|
| 365 |
+
Expected response:
|
| 366 |
+
```json
|
| 367 |
+
{
|
| 368 |
+
"status": "healthy",
|
| 369 |
+
"version": "1.0.0",
|
| 370 |
+
"services": [...]
|
| 371 |
+
}
|
| 372 |
+
```
|
| 373 |
+
|
| 374 |
+
### Metrics
|
| 375 |
+
|
| 376 |
+
Access metrics at:
|
| 377 |
+
```
|
| 378 |
+
GET /metrics
|
| 379 |
+
```
|
| 380 |
+
|
| 381 |
+
### Logging
|
| 382 |
+
|
| 383 |
+
Logs are output as structured JSON:
|
| 384 |
+
```json
|
| 385 |
+
{
|
| 386 |
+
"timestamp": "2025-10-01T12:00:00Z",
|
| 387 |
+
"level": "info",
|
| 388 |
+
"message": "Request processed",
|
| 389 |
+
"duration_ms": 245
|
| 390 |
+
}
|
| 391 |
+
```
|
| 392 |
+
|
| 393 |
+
## Scaling Recommendations
|
| 394 |
+
|
| 395 |
+
### Horizontal Scaling
|
| 396 |
+
- Start with 2-3 replicas
|
| 397 |
+
- Auto-scale based on CPU (70% threshold)
|
| 398 |
+
- Use load balancer for distribution
|
| 399 |
+
|
| 400 |
+
### Vertical Scaling
|
| 401 |
+
- Minimum: 1 CPU, 2GB RAM
|
| 402 |
+
- Recommended: 2 CPU, 4GB RAM
|
| 403 |
+
- High traffic: 4 CPU, 8GB RAM
|
| 404 |
+
|
| 405 |
+
### Database Scaling
|
| 406 |
+
- Use Pinecone for production vector storage
|
| 407 |
+
- Implement Redis for caching
|
| 408 |
+
- Consider read replicas for high traffic
|
| 409 |
+
|
| 410 |
+
## Troubleshooting
|
| 411 |
+
|
| 412 |
+
### Common Issues
|
| 413 |
+
|
| 414 |
+
**"No LLM adapter available"**
|
| 415 |
+
- Check that at least one API key is set (OpenAI, HuggingFace, or Anthropic)
|
| 416 |
+
|
| 417 |
+
**"Rate limit exceeded"**
|
| 418 |
+
- Increase rate limits in environment variables
|
| 419 |
+
- Use admin API key for testing
|
| 420 |
+
|
| 421 |
+
**"Vector DB connection failed"**
|
| 422 |
+
- Service falls back to in-memory storage
|
| 423 |
+
- Check Pinecone credentials
|
| 424 |
+
|
| 425 |
+
**High latency**
|
| 426 |
+
- Enable caching (Redis)
|
| 427 |
+
- Use closer region for APIs
|
| 428 |
+
- Optimize model selection
|
| 429 |
+
|
| 430 |
+
## Support
|
| 431 |
+
|
| 432 |
+
For deployment assistance:
|
| 433 |
+
- GitHub Issues
|
| 434 |
+
- Documentation at docs/
|
| 435 |
+
- Community Discord
|
DEVELOPMENT.md
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Getting Started
|
| 2 |
+
|
| 3 |
+
This project consists of an Encore application. Follow the steps below to get the app running locally.
|
| 4 |
+
|
| 5 |
+
## Prerequisites
|
| 6 |
+
|
| 7 |
+
If this is your first time using Encore, you need to install the CLI that runs the local development environment. Use the appropriate command for your system:
|
| 8 |
+
|
| 9 |
+
- **macOS:** `brew install encoredev/tap/encore`
|
| 10 |
+
- **Linux:** `curl -L https://encore.dev/install.sh | bash`
|
| 11 |
+
- **Windows:** `iwr https://encore.dev/install.ps1 | iex`
|
| 12 |
+
|
| 13 |
+
You also need to have bun installed for package management. If you don't have bun installed, you can install it by running:
|
| 14 |
+
|
| 15 |
+
```bash
|
| 16 |
+
npm install -g bun
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
## Running the Application
|
| 20 |
+
|
| 21 |
+
### Backend Setup
|
| 22 |
+
|
| 23 |
+
1. Navigate to the backend directory:
|
| 24 |
+
```bash
|
| 25 |
+
cd backend
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
2. Start the Encore development server:
|
| 29 |
+
```bash
|
| 30 |
+
encore run
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
The backend will be available at the URL shown in your terminal (typically `http://localhost:4000`).
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
## Deployment
|
| 40 |
+
|
| 41 |
+
### Self-hosting
|
| 42 |
+
See the [self-hosting instructions](https://encore.dev/docs/self-host/docker-build) for how to use encore build docker to create a Docker image and
|
| 43 |
+
configure it.
|
| 44 |
+
|
| 45 |
+
### Encore Cloud Platform
|
| 46 |
+
|
| 47 |
+
#### Step 1: Login to your Encore Cloud Account
|
| 48 |
+
|
| 49 |
+
Before deploying, ensure you have authenticated the Encore CLI with your Encore account (same as your Leap account)
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
encore auth login
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
#### Step 2: Set Up Git Remote
|
| 56 |
+
|
| 57 |
+
Add Encore's git remote to enable direct deployment:
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
git remote add encore encore://scalable-ai-api-service-ysyi
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
#### Step 3: Deploy Your Application
|
| 64 |
+
|
| 65 |
+
Deploy by pushing your code:
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
git add -A .
|
| 69 |
+
git commit -m "Deploy to Encore Cloud"
|
| 70 |
+
git push encore
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
Monitor your deployment progress in the [Encore Cloud dashboard](https://app.encore.dev/scalable-ai-api-service-ysyi/deploys).
|
| 74 |
+
|
| 75 |
+
## GitHub Integration (Recommended for Production)
|
| 76 |
+
|
| 77 |
+
For production applications, we recommend integrating with GitHub instead of using Encore's managed git:
|
| 78 |
+
|
| 79 |
+
### Connecting Your GitHub Account
|
| 80 |
+
|
| 81 |
+
1. Open your app in the **Encore Cloud dashboard**
|
| 82 |
+
2. Navigate to Encore Cloud [GitHub Integration settings](https://app.encore.cloud/scalable-ai-api-service-ysyi/settings/integrations/github)
|
| 83 |
+
3. Click **Connect Account to GitHub**
|
| 84 |
+
4. Grant access to your repository
|
| 85 |
+
|
| 86 |
+
Once connected, pushing to your GitHub repository will automatically trigger deployments. Encore Cloud Pro users also get Preview Environments for each pull request.
|
| 87 |
+
|
| 88 |
+
### Deploy via GitHub
|
| 89 |
+
|
| 90 |
+
After connecting GitHub, deploy by pushing to your repository:
|
| 91 |
+
|
| 92 |
+
```bash
|
| 93 |
+
git add -A .
|
| 94 |
+
git commit -m "Deploy via GitHub"
|
| 95 |
+
git push origin main
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
## Additional Resources
|
| 99 |
+
|
| 100 |
+
- [Encore Documentation](https://encore.dev/docs)
|
| 101 |
+
- [Deployment Guide](https://encore.dev/docs/platform/deploy/deploying)
|
| 102 |
+
- [GitHub Integration](https://encore.dev/docs/platform/integrations/github)
|
| 103 |
+
- [Encore Cloud Dashboard](https://app.encore.dev)
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
|
Dockerfile
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM node:18-alpine AS builder
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY package*.json ./
|
| 6 |
+
RUN npm ci
|
| 7 |
+
|
| 8 |
+
COPY . .
|
| 9 |
+
RUN npm run build || echo "Build will happen on startup"
|
| 10 |
+
|
| 11 |
+
FROM node:18
|
| 12 |
+
|
| 13 |
+
WORKDIR /app
|
| 14 |
+
|
| 15 |
+
RUN apt-get update && apt-get install -y curl && \
|
| 16 |
+
curl -fsSL https://ollama.com/install.sh | sh && \
|
| 17 |
+
apt-get clean && rm -rf /var/lib/apt/lists/*
|
| 18 |
+
|
| 19 |
+
COPY --from=builder /app ./
|
| 20 |
+
RUN npm ci --only=production
|
| 21 |
+
|
| 22 |
+
ENV PORT=7860
|
| 23 |
+
ENV NODE_ENV=production
|
| 24 |
+
ENV OLLAMA_BASE_URL=http://localhost:11434
|
| 25 |
+
ENV OLLAMA_MODEL=llama2
|
| 26 |
+
ENV OLLAMA_EMBEDDING_MODEL=nomic-embed-text
|
| 27 |
+
ENV API_KEYS=demo-key-1,demo-key-2
|
| 28 |
+
ENV RATE_LIMIT_DEFAULT=100
|
| 29 |
+
ENV RATE_LIMIT_ADMIN=1000
|
| 30 |
+
ENV LOG_LEVEL=info
|
| 31 |
+
ENV ENABLE_BACKGROUND_WORKERS=true
|
| 32 |
+
ENV OLLAMA_MODELS=/data/ollama-models
|
| 33 |
+
|
| 34 |
+
EXPOSE 7860
|
| 35 |
+
|
| 36 |
+
RUN echo '#!/bin/bash\n\
|
| 37 |
+
set -e\n\
|
| 38 |
+
\n\
|
| 39 |
+
echo "=== Starting AI API Service with Ollama ==="\n\
|
| 40 |
+
\n\
|
| 41 |
+
ollama serve &\n\
|
| 42 |
+
OLLAMA_PID=$!\n\
|
| 43 |
+
echo "Ollama started with PID $OLLAMA_PID"\n\
|
| 44 |
+
\n\
|
| 45 |
+
echo "Waiting for Ollama to be ready..."\n\
|
| 46 |
+
for i in {1..30}; do\n\
|
| 47 |
+
if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then\n\
|
| 48 |
+
echo "Ollama is ready!"\n\
|
| 49 |
+
break\n\
|
| 50 |
+
fi\n\
|
| 51 |
+
echo "Waiting... ($i/30)"\n\
|
| 52 |
+
sleep 2\n\
|
| 53 |
+
done\n\
|
| 54 |
+
\n\
|
| 55 |
+
echo "Pulling Ollama model: $OLLAMA_MODEL"\n\
|
| 56 |
+
ollama pull $OLLAMA_MODEL || echo "Warning: Model pull failed, will retry on first request"\n\
|
| 57 |
+
\n\
|
| 58 |
+
if [ "$OLLAMA_EMBEDDING_MODEL" != "$OLLAMA_MODEL" ]; then\n\
|
| 59 |
+
echo "Pulling embedding model: $OLLAMA_EMBEDDING_MODEL"\n\
|
| 60 |
+
ollama pull $OLLAMA_EMBEDDING_MODEL || echo "Warning: Embedding model pull failed"\n\
|
| 61 |
+
fi\n\
|
| 62 |
+
\n\
|
| 63 |
+
echo "Warming up model..."\n\
|
| 64 |
+
timeout 30s ollama run $OLLAMA_MODEL "Hi" > /dev/null 2>&1 || echo "Warmup completed"\n\
|
| 65 |
+
\n\
|
| 66 |
+
echo "Starting AI API Service on port $PORT..."\n\
|
| 67 |
+
echo "Available models: $(ollama list)"\n\
|
| 68 |
+
\n\
|
| 69 |
+
exec node .encore/build/backend/main.js || exec npm start\n\
|
| 70 |
+
' > /app/start.sh && chmod +x /app/start.sh
|
| 71 |
+
|
| 72 |
+
VOLUME /data
|
| 73 |
+
|
| 74 |
+
CMD ["/app/start.sh"]
|
HUGGINGFACE_OLLAMA_DEPLOY.md
ADDED
|
@@ -0,0 +1,423 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Deploying AI API Service to Hugging Face Spaces with Ollama
|
| 2 |
+
|
| 3 |
+
This guide shows you how to deploy the AI API service to Hugging Face Spaces using Ollama as your LLM backend (no API keys needed!).
|
| 4 |
+
|
| 5 |
+
## Why Ollama on Hugging Face Spaces?
|
| 6 |
+
|
| 7 |
+
✅ **No API costs** - Run models locally in your Space
|
| 8 |
+
✅ **Privacy** - Data stays within your Space
|
| 9 |
+
✅ **Model choice** - Use Llama 2, Llama 3, Mistral, Phi, Gemma, etc.
|
| 10 |
+
✅ **No rate limits** - Only limited by Space hardware
|
| 11 |
+
✅ **Full control** - Customize models and parameters
|
| 12 |
+
|
| 13 |
+
## Prerequisites
|
| 14 |
+
|
| 15 |
+
- Hugging Face account (free)
|
| 16 |
+
- Basic knowledge of Git
|
| 17 |
+
|
| 18 |
+
## Step-by-Step Deployment
|
| 19 |
+
|
| 20 |
+
### 1. Create a New Space
|
| 21 |
+
|
| 22 |
+
1. Go to https://huggingface.co/new-space
|
| 23 |
+
2. Choose:
|
| 24 |
+
- **Name**: `ai-api-ollama` (or your preferred name)
|
| 25 |
+
- **License**: MIT
|
| 26 |
+
- **SDK**: Docker
|
| 27 |
+
- **Hardware**:
|
| 28 |
+
- **CPU Basic (free)**: Works for small models (phi, gemma:2b)
|
| 29 |
+
- **CPU Upgrade ($0.60/hr)**: Better for medium models (llama2, mistral)
|
| 30 |
+
- **GPU T4 ($0.60/hr)**: Recommended for fast inference
|
| 31 |
+
- **GPU A10G ($3.15/hr)**: For large models (llama3:70b)
|
| 32 |
+
3. Click **Create Space**
|
| 33 |
+
|
| 34 |
+
### 2. Clone Your Space Repository
|
| 35 |
+
|
| 36 |
+
```bash
|
| 37 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama
|
| 38 |
+
cd ai-api-ollama
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
### 3. Copy Project Files
|
| 42 |
+
|
| 43 |
+
Copy all files from this project to your Space directory:
|
| 44 |
+
|
| 45 |
+
```bash
|
| 46 |
+
# From the ai-api-service directory
|
| 47 |
+
cp -r backend examples tests *.md *.json *.yml .dockerignore .env.example ../ai-api-ollama/
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
### 4. Create Hugging Face Space Dockerfile
|
| 51 |
+
|
| 52 |
+
Create a new `Dockerfile` optimized for Hugging Face Spaces with Ollama:
|
| 53 |
+
|
| 54 |
+
```dockerfile
|
| 55 |
+
FROM node:18-alpine AS builder
|
| 56 |
+
|
| 57 |
+
WORKDIR /app
|
| 58 |
+
|
| 59 |
+
# Copy package files
|
| 60 |
+
COPY package*.json ./
|
| 61 |
+
RUN npm ci
|
| 62 |
+
|
| 63 |
+
# Copy source code
|
| 64 |
+
COPY . .
|
| 65 |
+
|
| 66 |
+
# Build the application
|
| 67 |
+
RUN npm run build || echo "Build step skipped - Encore will build on startup"
|
| 68 |
+
|
| 69 |
+
# Production stage with Ollama
|
| 70 |
+
FROM node:18
|
| 71 |
+
|
| 72 |
+
WORKDIR /app
|
| 73 |
+
|
| 74 |
+
# Install Ollama
|
| 75 |
+
RUN curl -fsSL https://ollama.com/install.sh | sh
|
| 76 |
+
|
| 77 |
+
# Copy built application
|
| 78 |
+
COPY --from=builder /app ./
|
| 79 |
+
|
| 80 |
+
# Install production dependencies
|
| 81 |
+
RUN npm ci --only=production
|
| 82 |
+
|
| 83 |
+
# Set environment variables for Hugging Face Spaces
|
| 84 |
+
ENV PORT=7860
|
| 85 |
+
ENV OLLAMA_BASE_URL=http://localhost:11434
|
| 86 |
+
ENV OLLAMA_MODEL=llama2
|
| 87 |
+
ENV OLLAMA_EMBEDDING_MODEL=nomic-embed-text
|
| 88 |
+
ENV API_KEYS=demo-key-1,demo-key-2
|
| 89 |
+
ENV RATE_LIMIT_DEFAULT=60
|
| 90 |
+
ENV RATE_LIMIT_ADMIN=1000
|
| 91 |
+
ENV LOG_LEVEL=info
|
| 92 |
+
ENV ENABLE_BACKGROUND_WORKERS=true
|
| 93 |
+
|
| 94 |
+
EXPOSE 7860
|
| 95 |
+
|
| 96 |
+
# Create startup script
|
| 97 |
+
RUN echo '#!/bin/bash\n\
|
| 98 |
+
# Start Ollama in background\n\
|
| 99 |
+
ollama serve &\n\
|
| 100 |
+
OLLAMA_PID=$!\n\
|
| 101 |
+
\n\
|
| 102 |
+
# Wait for Ollama to start\n\
|
| 103 |
+
echo "Waiting for Ollama to start..."\n\
|
| 104 |
+
sleep 5\n\
|
| 105 |
+
\n\
|
| 106 |
+
# Pull the model\n\
|
| 107 |
+
echo "Pulling Ollama model: $OLLAMA_MODEL"\n\
|
| 108 |
+
ollama pull $OLLAMA_MODEL || echo "Model pull failed, will try on first request"\n\
|
| 109 |
+
\n\
|
| 110 |
+
# Pull embedding model if different\n\
|
| 111 |
+
if [ "$OLLAMA_EMBEDDING_MODEL" != "$OLLAMA_MODEL" ]; then\n\
|
| 112 |
+
echo "Pulling embedding model: $OLLAMA_EMBEDDING_MODEL"\n\
|
| 113 |
+
ollama pull $OLLAMA_EMBEDDING_MODEL || echo "Embedding model pull failed"\n\
|
| 114 |
+
fi\n\
|
| 115 |
+
\n\
|
| 116 |
+
# Start the API service\n\
|
| 117 |
+
echo "Starting AI API Service on port $PORT..."\n\
|
| 118 |
+
node .encore/build/backend/main.js || npm start\n\
|
| 119 |
+
' > /app/start.sh && chmod +x /app/start.sh
|
| 120 |
+
|
| 121 |
+
CMD ["/app/start.sh"]
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
### 5. Configure Environment Variables in Space Settings
|
| 125 |
+
|
| 126 |
+
In your Space settings on Hugging Face:
|
| 127 |
+
|
| 128 |
+
1. Go to **Settings** → **Variables and secrets**
|
| 129 |
+
2. Add these environment variables:
|
| 130 |
+
|
| 131 |
+
| Variable | Value | Description |
|
| 132 |
+
|----------|-------|-------------|
|
| 133 |
+
| `API_KEYS` | `your-secret-key-here` | Comma-separated API keys for authentication |
|
| 134 |
+
| `ADMIN_API_KEYS` | `admin-key-here` | Admin-level API keys (optional) |
|
| 135 |
+
| `OLLAMA_MODEL` | `llama2` | Default: llama2, or use llama3, mistral, phi, gemma |
|
| 136 |
+
| `OLLAMA_EMBEDDING_MODEL` | `nomic-embed-text` | Embedding model for RAG |
|
| 137 |
+
| `RATE_LIMIT_DEFAULT` | `100` | Requests per minute for default users |
|
| 138 |
+
|
| 139 |
+
**Recommended Models by Hardware:**
|
| 140 |
+
|
| 141 |
+
| Hardware | Recommended Model | Speed | Quality |
|
| 142 |
+
|----------|------------------|-------|---------|
|
| 143 |
+
| CPU Basic | `phi:latest` or `gemma:2b` | Fast | Good |
|
| 144 |
+
| CPU Upgrade | `llama2:latest` or `mistral:latest` | Medium | Better |
|
| 145 |
+
| GPU T4 | `llama3:latest` | Fast | Excellent |
|
| 146 |
+
| GPU A10G | `llama3:70b` | Medium | Best |
|
| 147 |
+
|
| 148 |
+
### 6. Create README.md for Your Space
|
| 149 |
+
|
| 150 |
+
Create a `README.md` in your Space root:
|
| 151 |
+
|
| 152 |
+
```markdown
|
| 153 |
+
---
|
| 154 |
+
title: AI API Service with Ollama
|
| 155 |
+
emoji: 🤖
|
| 156 |
+
colorFrom: blue
|
| 157 |
+
colorTo: purple
|
| 158 |
+
sdk: docker
|
| 159 |
+
pinned: false
|
| 160 |
+
---
|
| 161 |
+
|
| 162 |
+
# AI API Service with Ollama
|
| 163 |
+
|
| 164 |
+
Production-ready AI API with chat, RAG, image generation, and voice synthesis.
|
| 165 |
+
|
| 166 |
+
## Features
|
| 167 |
+
|
| 168 |
+
- 💬 Multi-turn chat conversations
|
| 169 |
+
- 📚 RAG (Retrieval-Augmented Generation)
|
| 170 |
+
- 🖼️ Image generation
|
| 171 |
+
- 🎙️ Voice synthesis
|
| 172 |
+
- 📄 Document ingestion
|
| 173 |
+
- 🔒 API key authentication
|
| 174 |
+
- ⚡ Rate limiting
|
| 175 |
+
|
| 176 |
+
## Quick Start
|
| 177 |
+
|
| 178 |
+
### API Documentation
|
| 179 |
+
|
| 180 |
+
Base URL: `https://YOUR_USERNAME-ai-api-ollama.hf.space`
|
| 181 |
+
|
| 182 |
+
### Example Request
|
| 183 |
+
|
| 184 |
+
```bash
|
| 185 |
+
curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/ai/chat \
|
| 186 |
+
-H "Authorization: Bearer demo-key-1" \
|
| 187 |
+
-H "Content-Type: application/json" \
|
| 188 |
+
-d '{
|
| 189 |
+
"conversation": [
|
| 190 |
+
{"role": "user", "content": "Hello! How are you?"}
|
| 191 |
+
]
|
| 192 |
+
}'
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
### Available Endpoints
|
| 196 |
+
|
| 197 |
+
- `GET /health` - Health check
|
| 198 |
+
- `POST /ai/chat` - Chat conversation
|
| 199 |
+
- `POST /rag/query` - Query with retrieval
|
| 200 |
+
- `POST /image/generate` - Generate images
|
| 201 |
+
- `POST /voice/synthesize` - Text to speech
|
| 202 |
+
- `POST /upload` - Upload documents
|
| 203 |
+
|
| 204 |
+
See full API documentation in the repository.
|
| 205 |
+
|
| 206 |
+
## Using Your Own API Key
|
| 207 |
+
|
| 208 |
+
Replace `demo-key-1` with your configured API key from Space settings.
|
| 209 |
+
|
| 210 |
+
## Local Development
|
| 211 |
+
|
| 212 |
+
See [QUICKSTART.md](QUICKSTART.md) for local setup instructions.
|
| 213 |
+
```
|
| 214 |
+
|
| 215 |
+
### 7. Push to Hugging Face
|
| 216 |
+
|
| 217 |
+
```bash
|
| 218 |
+
git add .
|
| 219 |
+
git commit -m "Initial deployment with Ollama"
|
| 220 |
+
git push
|
| 221 |
+
```
|
| 222 |
+
|
| 223 |
+
### 8. Wait for Build
|
| 224 |
+
|
| 225 |
+
- Hugging Face will automatically build your Docker image
|
| 226 |
+
- This takes 5-10 minutes for first build
|
| 227 |
+
- Watch the **Logs** tab for progress
|
| 228 |
+
- Initial startup will download the Ollama model (2-5 minutes depending on model size)
|
| 229 |
+
|
| 230 |
+
### 9. Test Your Deployment
|
| 231 |
+
|
| 232 |
+
Once the Space is running:
|
| 233 |
+
|
| 234 |
+
```bash
|
| 235 |
+
# Replace YOUR_USERNAME with your Hugging Face username
|
| 236 |
+
SPACE_URL="https://YOUR_USERNAME-ai-api-ollama.hf.space"
|
| 237 |
+
|
| 238 |
+
# Health check
|
| 239 |
+
curl $SPACE_URL/health
|
| 240 |
+
|
| 241 |
+
# Chat request
|
| 242 |
+
curl -X POST $SPACE_URL/ai/chat \
|
| 243 |
+
-H "Authorization: Bearer demo-key-1" \
|
| 244 |
+
-H "Content-Type: application/json" \
|
| 245 |
+
-d '{
|
| 246 |
+
"conversation": [
|
| 247 |
+
{"role": "user", "content": "Tell me a joke about AI"}
|
| 248 |
+
]
|
| 249 |
+
}'
|
| 250 |
+
```
|
| 251 |
+
|
| 252 |
+
## Optimizations for Hugging Face Spaces
|
| 253 |
+
|
| 254 |
+
### 1. Reduce Model Download Time
|
| 255 |
+
|
| 256 |
+
Pre-download models in Dockerfile:
|
| 257 |
+
|
| 258 |
+
```dockerfile
|
| 259 |
+
RUN ollama pull llama2 && \
|
| 260 |
+
ollama pull nomic-embed-text
|
| 261 |
+
```
|
| 262 |
+
|
| 263 |
+
### 2. Use Smaller Models for Free Tier
|
| 264 |
+
|
| 265 |
+
```env
|
| 266 |
+
OLLAMA_MODEL=phi:latest
|
| 267 |
+
```
|
| 268 |
+
|
| 269 |
+
Phi is only 1.3GB vs Llama2's 4GB.
|
| 270 |
+
|
| 271 |
+
### 3. Enable Persistent Storage
|
| 272 |
+
|
| 273 |
+
Hugging Face Spaces have persistent storage in `/data`:
|
| 274 |
+
|
| 275 |
+
```dockerfile
|
| 276 |
+
# Add to Dockerfile
|
| 277 |
+
VOLUME /data
|
| 278 |
+
ENV OLLAMA_MODELS=/data/ollama-models
|
| 279 |
+
```
|
| 280 |
+
|
| 281 |
+
This prevents re-downloading models on restart.
|
| 282 |
+
|
| 283 |
+
### 4. Optimize for Cold Starts
|
| 284 |
+
|
| 285 |
+
Add model warmup in startup script:
|
| 286 |
+
|
| 287 |
+
```bash
|
| 288 |
+
# Add to start.sh
|
| 289 |
+
echo "Warming up model..."
|
| 290 |
+
ollama run $OLLAMA_MODEL "Hello" --timeout 10s
|
| 291 |
+
```
|
| 292 |
+
|
| 293 |
+
## Cost Comparison
|
| 294 |
+
|
| 295 |
+
| Option | Cost | Pros | Cons |
|
| 296 |
+
|--------|------|------|------|
|
| 297 |
+
| **Free CPU** | $0 | Free! | Slow inference, small models only |
|
| 298 |
+
| **CPU Upgrade** | $0.60/hr (~$432/mo) | Better performance | Still slower than GPU |
|
| 299 |
+
| **GPU T4** | $0.60/hr (~$432/mo) | Fast inference | Limited for huge models |
|
| 300 |
+
| **OpenAI API** | Pay per token | No hosting, fast | Ongoing costs, data sent to OpenAI |
|
| 301 |
+
| **Self-hosted** | VPS costs | Full control | Maintenance required |
|
| 302 |
+
|
| 303 |
+
**Recommendation**: Start with **Free CPU + Phi** for testing, upgrade to **GPU T4 + Llama3** for production.
|
| 304 |
+
|
| 305 |
+
## Troubleshooting
|
| 306 |
+
|
| 307 |
+
### Space won't start
|
| 308 |
+
|
| 309 |
+
**Check logs for**:
|
| 310 |
+
- Ollama installation errors → Use official Ollama install script
|
| 311 |
+
- Model download timeout → Use smaller model or upgrade hardware
|
| 312 |
+
- Port conflicts → Ensure PORT=7860
|
| 313 |
+
|
| 314 |
+
### "No LLM adapter available"
|
| 315 |
+
|
| 316 |
+
**Solution**: Ollama adapter is now always initialized. Check Ollama is running:
|
| 317 |
+
```bash
|
| 318 |
+
# In Space terminal
|
| 319 |
+
curl http://localhost:11434/api/tags
|
| 320 |
+
```
|
| 321 |
+
|
| 322 |
+
### Slow responses
|
| 323 |
+
|
| 324 |
+
**Solutions**:
|
| 325 |
+
- Use smaller model (phi instead of llama2)
|
| 326 |
+
- Upgrade to GPU hardware
|
| 327 |
+
- Reduce max_tokens in requests
|
| 328 |
+
|
| 329 |
+
### Model not found
|
| 330 |
+
|
| 331 |
+
**Solution**: Pull model manually:
|
| 332 |
+
```bash
|
| 333 |
+
# In Space terminal or startup script
|
| 334 |
+
ollama pull llama2
|
| 335 |
+
```
|
| 336 |
+
|
| 337 |
+
## Advanced Configuration
|
| 338 |
+
|
| 339 |
+
### Use Multiple Models
|
| 340 |
+
|
| 341 |
+
```env
|
| 342 |
+
# In Space settings
|
| 343 |
+
OLLAMA_MODEL=llama3:latest
|
| 344 |
+
```
|
| 345 |
+
|
| 346 |
+
Then specify model in API requests:
|
| 347 |
+
```json
|
| 348 |
+
{
|
| 349 |
+
"conversation": [...],
|
| 350 |
+
"model": "llama3"
|
| 351 |
+
}
|
| 352 |
+
```
|
| 353 |
+
|
| 354 |
+
### Custom System Prompts
|
| 355 |
+
|
| 356 |
+
```bash
|
| 357 |
+
curl -X POST $SPACE_URL/ai/chat \
|
| 358 |
+
-H "Authorization: Bearer your-key" \
|
| 359 |
+
-H "Content-Type: application/json" \
|
| 360 |
+
-d '{
|
| 361 |
+
"conversation": [
|
| 362 |
+
{"role": "system", "content": "You are a helpful coding assistant."},
|
| 363 |
+
{"role": "user", "content": "Explain Python decorators"}
|
| 364 |
+
]
|
| 365 |
+
}'
|
| 366 |
+
```
|
| 367 |
+
|
| 368 |
+
### Enable RAG with Documents
|
| 369 |
+
|
| 370 |
+
```bash
|
| 371 |
+
# Upload a document
|
| 372 |
+
curl -X POST $SPACE_URL/upload \
|
| 373 |
+
-H "Authorization: Bearer your-key" \
|
| 374 |
+
-F "[email protected]"
|
| 375 |
+
|
| 376 |
+
# Query with RAG
|
| 377 |
+
curl -X POST $SPACE_URL/rag/query \
|
| 378 |
+
-H "Authorization: Bearer your-key" \
|
| 379 |
+
-H "Content-Type: application/json" \
|
| 380 |
+
-d '{"query": "What does the document say about X?"}'
|
| 381 |
+
```
|
| 382 |
+
|
| 383 |
+
## Monitoring
|
| 384 |
+
|
| 385 |
+
### Check Space Health
|
| 386 |
+
|
| 387 |
+
```bash
|
| 388 |
+
curl https://YOUR_USERNAME-ai-api-ollama.hf.space/health
|
| 389 |
+
```
|
| 390 |
+
|
| 391 |
+
### View Metrics
|
| 392 |
+
|
| 393 |
+
```bash
|
| 394 |
+
curl https://YOUR_USERNAME-ai-api-ollama.hf.space/metrics \
|
| 395 |
+
-H "Authorization: Bearer your-key"
|
| 396 |
+
```
|
| 397 |
+
|
| 398 |
+
## Scaling
|
| 399 |
+
|
| 400 |
+
### Horizontal Scaling
|
| 401 |
+
|
| 402 |
+
Hugging Face Spaces don't support horizontal scaling. For high traffic:
|
| 403 |
+
|
| 404 |
+
1. **Use multiple Spaces** with load balancer
|
| 405 |
+
2. **Deploy to cloud** (AWS ECS, GCP Cloud Run) with auto-scaling
|
| 406 |
+
3. **Use managed API** (OpenAI, Anthropic) for high volume
|
| 407 |
+
|
| 408 |
+
### Vertical Scaling
|
| 409 |
+
|
| 410 |
+
Upgrade hardware in Space settings:
|
| 411 |
+
- Free CPU → CPU Upgrade (2x faster)
|
| 412 |
+
- CPU → GPU T4 (10x faster)
|
| 413 |
+
- GPU T4 → GPU A10G (2x faster, larger models)
|
| 414 |
+
|
| 415 |
+
## Support
|
| 416 |
+
|
| 417 |
+
- [GitHub Issues](https://github.com/your-org/ai-api-service/issues)
|
| 418 |
+
- [Hugging Face Discussions](https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama/discussions)
|
| 419 |
+
- [Documentation](https://github.com/your-org/ai-api-service)
|
| 420 |
+
|
| 421 |
+
## License
|
| 422 |
+
|
| 423 |
+
MIT License - see LICENSE file
|
QUICKSTART.md
ADDED
|
@@ -0,0 +1,319 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Quick Start Guide
|
| 2 |
+
|
| 3 |
+
Get your AI API Service up and running in 5 minutes!
|
| 4 |
+
|
| 5 |
+
## Prerequisites
|
| 6 |
+
|
| 7 |
+
- Node.js 18+
|
| 8 |
+
- npm or yarn
|
| 9 |
+
- At least one LLM API key (OpenAI, HuggingFace, or Anthropic)
|
| 10 |
+
|
| 11 |
+
## 5-Minute Setup
|
| 12 |
+
|
| 13 |
+
### 1. Install Dependencies
|
| 14 |
+
|
| 15 |
+
```bash
|
| 16 |
+
npm install
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
### 2. Configure Environment
|
| 20 |
+
|
| 21 |
+
```bash
|
| 22 |
+
cp .env.example .env
|
| 23 |
+
```
|
| 24 |
+
|
| 25 |
+
Edit `.env` and add your API keys:
|
| 26 |
+
|
| 27 |
+
```env
|
| 28 |
+
OPENAI_API_KEY=sk-your-openai-key
|
| 29 |
+
API_KEYS=demo-key-1,my-secret-key
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
### 3. Start the Server
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
npm run dev
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
The API will be available at `http://localhost:8000`
|
| 39 |
+
|
| 40 |
+
### 4. Test the API
|
| 41 |
+
|
| 42 |
+
```bash
|
| 43 |
+
curl http://localhost:8000/health
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
Expected response:
|
| 47 |
+
```json
|
| 48 |
+
{
|
| 49 |
+
"status": "healthy",
|
| 50 |
+
"version": "1.0.0",
|
| 51 |
+
"services": [...],
|
| 52 |
+
"uptime_seconds": 5
|
| 53 |
+
}
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
### 5. Make Your First Request
|
| 57 |
+
|
| 58 |
+
```bash
|
| 59 |
+
curl -X POST http://localhost:8000/ai/chat \
|
| 60 |
+
-H "Authorization: Bearer demo-key-1" \
|
| 61 |
+
-H "Content-Type: application/json" \
|
| 62 |
+
-d '{
|
| 63 |
+
"conversation": [
|
| 64 |
+
{"role": "user", "content": "Hello!"}
|
| 65 |
+
]
|
| 66 |
+
}'
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
## Example Requests
|
| 70 |
+
|
| 71 |
+
### Chat
|
| 72 |
+
```bash
|
| 73 |
+
curl -X POST http://localhost:8000/ai/chat \
|
| 74 |
+
-H "Authorization: Bearer demo-key-1" \
|
| 75 |
+
-H "Content-Type: application/json" \
|
| 76 |
+
-d '{"conversation": [{"role": "user", "content": "What is AI?"}]}'
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
### RAG Query
|
| 80 |
+
```bash
|
| 81 |
+
curl -X POST http://localhost:8000/rag/query \
|
| 82 |
+
-H "Authorization: Bearer demo-key-1" \
|
| 83 |
+
-H "Content-Type: application/json" \
|
| 84 |
+
-d '{"query": "What are the key features?", "top_k": 5}'
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
### Image Generation
|
| 88 |
+
```bash
|
| 89 |
+
curl -X POST http://localhost:8000/image/generate \
|
| 90 |
+
-H "Authorization: Bearer demo-key-1" \
|
| 91 |
+
-H "Content-Type: application/json" \
|
| 92 |
+
-d '{"prompt": "A sunset over mountains", "size": "1024x1024"}'
|
| 93 |
+
```
|
| 94 |
+
|
| 95 |
+
## What Each Component Does
|
| 96 |
+
|
| 97 |
+
### 🔐 **Authentication (`/backend/utils/auth.ts`)**
|
| 98 |
+
- Validates API keys from the Authorization header
|
| 99 |
+
- Implements role-based access (default, premium, admin)
|
| 100 |
+
- Used by all protected endpoints
|
| 101 |
+
|
| 102 |
+
### ⚡ **Rate Limiting (`/backend/utils/rate_limit.ts`)**
|
| 103 |
+
- Token bucket algorithm
|
| 104 |
+
- Configurable limits per tier (60/300/1000 requests/min)
|
| 105 |
+
- Automatic reset after 1 minute
|
| 106 |
+
- Prevents abuse and cost overruns
|
| 107 |
+
|
| 108 |
+
### 🤖 **AI Service (`/backend/services/ai_service.ts`)**
|
| 109 |
+
- Multi-provider LLM routing (OpenAI, HuggingFace, Anthropic)
|
| 110 |
+
- Automatic model selection and fallback
|
| 111 |
+
- Chat completions with context management
|
| 112 |
+
- Embedding generation for RAG
|
| 113 |
+
|
| 114 |
+
### 📚 **RAG Service (`/backend/services/rag_service.ts`)**
|
| 115 |
+
- Vector-based document retrieval
|
| 116 |
+
- Automatic context injection into prompts
|
| 117 |
+
- Supports Pinecone or in-memory vector DB
|
| 118 |
+
- Returns sources with similarity scores
|
| 119 |
+
|
| 120 |
+
### 🖼️ **Image Service (`/backend/services/image_service.ts`)**
|
| 121 |
+
- Text-to-image generation
|
| 122 |
+
- Supports DALL-E and Stable Diffusion
|
| 123 |
+
- Configurable sizes and quality
|
| 124 |
+
- Returns base64 or URLs
|
| 125 |
+
|
| 126 |
+
### 🎙️ **Voice Service (`/backend/services/voice_service.ts`)**
|
| 127 |
+
- Text-to-speech synthesis (TTS)
|
| 128 |
+
- Speech-to-text transcription (STT)
|
| 129 |
+
- Multiple voice options
|
| 130 |
+
- Various audio formats (mp3, opus, etc.)
|
| 131 |
+
|
| 132 |
+
### 📄 **Document Service (`/backend/services/document_service.ts`)**
|
| 133 |
+
- Upload PDF, DOCX, TXT files
|
| 134 |
+
- Automatic text extraction
|
| 135 |
+
- Chunking with overlap for better retrieval
|
| 136 |
+
- Background processing with workers
|
| 137 |
+
- Stores chunks in vector DB
|
| 138 |
+
|
| 139 |
+
### 🔌 **Adapters**
|
| 140 |
+
|
| 141 |
+
#### **OpenAI Adapter (`/backend/adapters/openai_adapter.ts`)**
|
| 142 |
+
- Chat completions (GPT-4, GPT-3.5)
|
| 143 |
+
- Embeddings (text-embedding-ada-002)
|
| 144 |
+
- Image generation (DALL-E)
|
| 145 |
+
- Voice synthesis and transcription
|
| 146 |
+
- Implements LLMAdapter, ImageAdapter, VoiceAdapter interfaces
|
| 147 |
+
|
| 148 |
+
#### **HuggingFace Adapter (`/backend/adapters/huggingface_adapter.ts`)**
|
| 149 |
+
- Open-source models (Mistral, Llama, etc.)
|
| 150 |
+
- Stable Diffusion for images
|
| 151 |
+
- Sentence transformers for embeddings
|
| 152 |
+
- Free tier available
|
| 153 |
+
|
| 154 |
+
#### **Anthropic Adapter (`/backend/adapters/anthropic_adapter.ts`)**
|
| 155 |
+
- Claude models (Sonnet, Opus)
|
| 156 |
+
- Advanced reasoning capabilities
|
| 157 |
+
- Long context windows
|
| 158 |
+
|
| 159 |
+
#### **Vector DB Adapters (`/backend/adapters/vector_db_adapter.ts`)**
|
| 160 |
+
- **PineconeAdapter**: Production vector storage with managed scaling
|
| 161 |
+
- **InMemoryVectorDB**: Development fallback with cosine similarity
|
| 162 |
+
- Supports metadata filtering and batch operations
|
| 163 |
+
|
| 164 |
+
### 📊 **Observability**
|
| 165 |
+
|
| 166 |
+
#### **Logger (`/backend/utils/logger.ts`)**
|
| 167 |
+
- Structured JSON logging
|
| 168 |
+
- Configurable log levels (debug, info, warn, error)
|
| 169 |
+
- Automatic timestamping
|
| 170 |
+
- Production-ready format
|
| 171 |
+
|
| 172 |
+
#### **Metrics (`/backend/utils/metrics.ts`)**
|
| 173 |
+
- Request counting by endpoint
|
| 174 |
+
- Error tracking
|
| 175 |
+
- Response time measurement
|
| 176 |
+
- Model usage statistics
|
| 177 |
+
- Vector DB query counts
|
| 178 |
+
- Document processing stats
|
| 179 |
+
|
| 180 |
+
### 🔄 **Background Workers (`/backend/workers/ingestion_worker.ts`)**
|
| 181 |
+
- Async document processing
|
| 182 |
+
- Configurable concurrency
|
| 183 |
+
- Job status tracking
|
| 184 |
+
- Webhook notifications on completion
|
| 185 |
+
- Automatic retries on failure
|
| 186 |
+
|
| 187 |
+
### 🌐 **API Endpoints**
|
| 188 |
+
|
| 189 |
+
All endpoints are in `/backend/api/`:
|
| 190 |
+
|
| 191 |
+
#### **Health & Metrics (`health.ts`)**
|
| 192 |
+
- `GET /health` - Service health with component status
|
| 193 |
+
- `GET /metrics` - Usage metrics and statistics
|
| 194 |
+
|
| 195 |
+
#### **Authentication (`auth.ts`)**
|
| 196 |
+
- `POST /auth/verify` - Validate API key
|
| 197 |
+
|
| 198 |
+
#### **Chat (`chat.ts`)**
|
| 199 |
+
- `POST /ai/chat` - Multi-turn conversation
|
| 200 |
+
- `GET /ai/query` - Simple Q&A
|
| 201 |
+
|
| 202 |
+
#### **RAG (`rag.ts`)**
|
| 203 |
+
- `POST /rag/query` - Query with retrieval
|
| 204 |
+
- `GET /rag/models` - List available models
|
| 205 |
+
|
| 206 |
+
#### **Images (`image.ts`)**
|
| 207 |
+
- `POST /image/generate` - Generate images
|
| 208 |
+
|
| 209 |
+
#### **Voice (`voice.ts`)**
|
| 210 |
+
- `POST /voice/synthesize` - Text to speech
|
| 211 |
+
- `POST /voice/transcribe` - Speech to text
|
| 212 |
+
|
| 213 |
+
#### **Documents (`documents.ts`)**
|
| 214 |
+
- `POST /upload` - Upload document
|
| 215 |
+
- `GET /docs/:id/sources` - Get document chunks
|
| 216 |
+
- `POST /webhook/events` - Processing webhooks
|
| 217 |
+
|
| 218 |
+
## Architecture Flow
|
| 219 |
+
|
| 220 |
+
```
|
| 221 |
+
┌─────────┐
|
| 222 |
+
│ Client │
|
| 223 |
+
└────┬────┘
|
| 224 |
+
│
|
| 225 |
+
├─ Authorization Header (Bearer token)
|
| 226 |
+
↓
|
| 227 |
+
┌─────────────────┐
|
| 228 |
+
│ Auth Middleware │ ← Validates API key
|
| 229 |
+
└────┬────────────┘
|
| 230 |
+
├─ Checks rate limit
|
| 231 |
+
↓
|
| 232 |
+
┌──────────────┐
|
| 233 |
+
│ API Endpoint │ ← Routes request
|
| 234 |
+
└────┬─────────┘
|
| 235 |
+
├─ POST /ai/chat → AI Service
|
| 236 |
+
├─ POST /rag/query → RAG Service → Vector DB → AI Service
|
| 237 |
+
├─ POST /image/generate → Image Service
|
| 238 |
+
├─ POST /voice/synthesize → Voice Service
|
| 239 |
+
├─ POST /upload → Document Service → Worker → Vector DB
|
| 240 |
+
↓
|
| 241 |
+
┌───────────┐
|
| 242 |
+
│ Response │ ← JSON with data + metadata
|
| 243 |
+
└───────────┘
|
| 244 |
+
```
|
| 245 |
+
|
| 246 |
+
## Configuration
|
| 247 |
+
|
| 248 |
+
### Environment Variables
|
| 249 |
+
|
| 250 |
+
| Variable | What It Does | Example |
|
| 251 |
+
|----------|-------------|---------|
|
| 252 |
+
| `OPENAI_API_KEY` | OpenAI access for GPT models | `sk-...` |
|
| 253 |
+
| `HUGGINGFACE_API_KEY` | HuggingFace models access | `hf_...` |
|
| 254 |
+
| `API_KEYS` | Valid API keys (comma-separated) | `key1,key2` |
|
| 255 |
+
| `RATE_LIMIT_DEFAULT` | Requests/min for basic users | `60` |
|
| 256 |
+
| `RATE_LIMIT_ADMIN` | Requests/min for admins | `1000` |
|
| 257 |
+
| `MAX_FILE_SIZE_MB` | Max document upload size | `10` |
|
| 258 |
+
| `CHUNK_SIZE` | Text chunk size for RAG | `1000` |
|
| 259 |
+
| `LOG_LEVEL` | Logging verbosity | `info` |
|
| 260 |
+
|
| 261 |
+
### Tier System
|
| 262 |
+
|
| 263 |
+
- **Default**: 60 requests/min
|
| 264 |
+
- **Premium**: 300 requests/min (add to config)
|
| 265 |
+
- **Admin**: 1000 requests/min (via `ADMIN_API_KEYS`)
|
| 266 |
+
|
| 267 |
+
## Testing
|
| 268 |
+
|
| 269 |
+
Run tests:
|
| 270 |
+
```bash
|
| 271 |
+
npm test
|
| 272 |
+
```
|
| 273 |
+
|
| 274 |
+
Run with coverage:
|
| 275 |
+
```bash
|
| 276 |
+
npm run test:coverage
|
| 277 |
+
```
|
| 278 |
+
|
| 279 |
+
## Production Checklist
|
| 280 |
+
|
| 281 |
+
- [ ] Set strong `API_KEYS`
|
| 282 |
+
- [ ] Configure `ADMIN_API_KEYS` separately
|
| 283 |
+
- [ ] Set up Pinecone for vector storage
|
| 284 |
+
- [ ] Increase rate limits based on needs
|
| 285 |
+
- [ ] Enable background workers
|
| 286 |
+
- [ ] Set `LOG_LEVEL=info` or `warn`
|
| 287 |
+
- [ ] Configure CORS origins
|
| 288 |
+
- [ ] Set up monitoring/alerting
|
| 289 |
+
- [ ] Review cost limits on LLM providers
|
| 290 |
+
|
| 291 |
+
## Troubleshooting
|
| 292 |
+
|
| 293 |
+
**"No LLM adapter available"**
|
| 294 |
+
→ Add at least one API key (OPENAI_API_KEY, HUGGINGFACE_API_KEY, or ANTHROPIC_API_KEY)
|
| 295 |
+
|
| 296 |
+
**"Invalid API key"**
|
| 297 |
+
→ Check Authorization header: `Bearer your-key-here`
|
| 298 |
+
|
| 299 |
+
**"Rate limit exceeded"**
|
| 300 |
+
→ Wait 60 seconds or use admin key
|
| 301 |
+
|
| 302 |
+
**Vector DB queries fail**
|
| 303 |
+
→ Service falls back to in-memory storage automatically
|
| 304 |
+
|
| 305 |
+
## Next Steps
|
| 306 |
+
|
| 307 |
+
1. **Read the full README**: `README.md`
|
| 308 |
+
2. **Check deployment guide**: `DEPLOYMENT.md`
|
| 309 |
+
3. **Review examples**: `examples/js_client.js` and `examples/curl.sh`
|
| 310 |
+
4. **Run tests**: `npm test`
|
| 311 |
+
5. **Deploy to production**: See DEPLOYMENT.md
|
| 312 |
+
|
| 313 |
+
## Support
|
| 314 |
+
|
| 315 |
+
- GitHub Issues
|
| 316 |
+
- Documentation in `/docs`
|
| 317 |
+
- Example code in `/examples`
|
| 318 |
+
|
| 319 |
+
Enjoy building with the AI API Service! 🚀
|
README.md
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
| 8 |
-
|
| 9 |
-
---
|
| 10 |
-
|
| 11 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
+
cygon24:
|
| 2 |
+
|
| 3 |
+
```markdown
|
| 4 |
---
|
| 5 |
+
title: AI API Service with Ollama
|
| 6 |
+
emoji: 🤖
|
| 7 |
+
colorFrom: blue
|
| 8 |
+
colorTo: purple
|
| 9 |
sdk: docker
|
| 10 |
+
app_port: 7860
|
| 11 |
pinned: false
|
| 12 |
+
---
|
|
|
|
|
|
|
|
|
backend/adapters/anthropic_adapter.ts
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import Anthropic from '@anthropic-ai/sdk';
|
| 2 |
+
import type {
|
| 3 |
+
LLMAdapter,
|
| 4 |
+
Message,
|
| 5 |
+
ChatOptions,
|
| 6 |
+
ChatResponse,
|
| 7 |
+
EmbeddingResponse,
|
| 8 |
+
} from '../types/models';
|
| 9 |
+
|
| 10 |
+
export class AnthropicAdapter implements LLMAdapter {
|
| 11 |
+
private client: Anthropic | null = null;
|
| 12 |
+
private apiKey: string;
|
| 13 |
+
private defaultModel: string;
|
| 14 |
+
|
| 15 |
+
constructor(apiKey: string, defaultModel = 'claude-3-sonnet-20240229') {
|
| 16 |
+
this.apiKey = apiKey;
|
| 17 |
+
this.defaultModel = defaultModel;
|
| 18 |
+
|
| 19 |
+
if (apiKey) {
|
| 20 |
+
this.client = new Anthropic({ apiKey });
|
| 21 |
+
}
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
async isAvailable(): Promise<boolean> {
|
| 25 |
+
if (!this.client) return false;
|
| 26 |
+
try {
|
| 27 |
+
await this.client.messages.create({
|
| 28 |
+
model: this.defaultModel,
|
| 29 |
+
max_tokens: 1,
|
| 30 |
+
messages: [{ role: 'user', content: 'test' }],
|
| 31 |
+
});
|
| 32 |
+
return true;
|
| 33 |
+
} catch {
|
| 34 |
+
return false;
|
| 35 |
+
}
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
async generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse> {
|
| 39 |
+
if (!this.client) {
|
| 40 |
+
throw new Error('Anthropic client not initialized. Please provide ANTHROPIC_API_KEY.');
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
const systemMessage = messages.find(m => m.role === 'system');
|
| 44 |
+
const conversationMessages = messages.filter(m => m.role !== 'system');
|
| 45 |
+
|
| 46 |
+
const response = await this.client.messages.create({
|
| 47 |
+
model: this.defaultModel,
|
| 48 |
+
max_tokens: options?.max_tokens || 1000,
|
| 49 |
+
temperature: options?.temperature ?? 0.7,
|
| 50 |
+
top_p: options?.top_p,
|
| 51 |
+
system: systemMessage?.content,
|
| 52 |
+
messages: conversationMessages.map(m => ({
|
| 53 |
+
role: m.role === 'assistant' ? 'assistant' : 'user',
|
| 54 |
+
content: m.content,
|
| 55 |
+
})),
|
| 56 |
+
stop_sequences: options?.stop,
|
| 57 |
+
});
|
| 58 |
+
|
| 59 |
+
const textContent = response.content.find(c => c.type === 'text');
|
| 60 |
+
|
| 61 |
+
return {
|
| 62 |
+
reply: textContent?.type === 'text' ? textContent.text : '',
|
| 63 |
+
model: response.model,
|
| 64 |
+
usage: {
|
| 65 |
+
prompt_tokens: response.usage.input_tokens,
|
| 66 |
+
completion_tokens: response.usage.output_tokens,
|
| 67 |
+
total_tokens: response.usage.input_tokens + response.usage.output_tokens,
|
| 68 |
+
},
|
| 69 |
+
sources: null,
|
| 70 |
+
};
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
async generateEmbedding(_text: string | string[]): Promise<EmbeddingResponse> {
|
| 74 |
+
throw new Error('Anthropic does not support embeddings. Use OpenAI or HuggingFace adapter.');
|
| 75 |
+
}
|
| 76 |
+
}
|
backend/adapters/huggingface_adapter.ts
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { HfInference } from '@huggingface/inference';
|
| 2 |
+
import type {
|
| 3 |
+
LLMAdapter,
|
| 4 |
+
ImageAdapter,
|
| 5 |
+
Message,
|
| 6 |
+
ChatOptions,
|
| 7 |
+
ChatResponse,
|
| 8 |
+
EmbeddingResponse,
|
| 9 |
+
ImageGenerationRequest,
|
| 10 |
+
ImageGenerationResponse,
|
| 11 |
+
} from '../types/models';
|
| 12 |
+
|
| 13 |
+
export class HuggingFaceAdapter implements LLMAdapter, ImageAdapter {
|
| 14 |
+
private client: HfInference | null = null;
|
| 15 |
+
private apiKey: string;
|
| 16 |
+
private defaultModel: string;
|
| 17 |
+
private defaultEmbeddingModel: string;
|
| 18 |
+
private defaultImageModel: string;
|
| 19 |
+
|
| 20 |
+
constructor(
|
| 21 |
+
apiKey: string,
|
| 22 |
+
defaultModel = 'mistralai/Mistral-7B-Instruct-v0.1',
|
| 23 |
+
defaultEmbeddingModel = 'sentence-transformers/all-MiniLM-L6-v2',
|
| 24 |
+
defaultImageModel = 'stabilityai/stable-diffusion-xl-base-1.0'
|
| 25 |
+
) {
|
| 26 |
+
this.apiKey = apiKey;
|
| 27 |
+
this.defaultModel = defaultModel;
|
| 28 |
+
this.defaultEmbeddingModel = defaultEmbeddingModel;
|
| 29 |
+
this.defaultImageModel = defaultImageModel;
|
| 30 |
+
|
| 31 |
+
if (apiKey) {
|
| 32 |
+
this.client = new HfInference(apiKey);
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
async isAvailable(): Promise<boolean> {
|
| 37 |
+
if (!this.client) return false;
|
| 38 |
+
try {
|
| 39 |
+
await this.client.textGeneration({
|
| 40 |
+
model: this.defaultModel,
|
| 41 |
+
inputs: 'test',
|
| 42 |
+
parameters: { max_new_tokens: 1 },
|
| 43 |
+
});
|
| 44 |
+
return true;
|
| 45 |
+
} catch {
|
| 46 |
+
return false;
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
async generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse> {
|
| 51 |
+
if (!this.client) {
|
| 52 |
+
throw new Error('HuggingFace client not initialized. Please provide HUGGINGFACE_API_KEY.');
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
const prompt = this.formatMessagesAsPrompt(messages);
|
| 56 |
+
|
| 57 |
+
const response = await this.client.textGeneration({
|
| 58 |
+
model: this.defaultModel,
|
| 59 |
+
inputs: prompt,
|
| 60 |
+
parameters: {
|
| 61 |
+
max_new_tokens: options?.max_tokens || 1000,
|
| 62 |
+
temperature: options?.temperature ?? 0.7,
|
| 63 |
+
top_p: options?.top_p ?? 0.95,
|
| 64 |
+
repetition_penalty: 1.1,
|
| 65 |
+
return_full_text: false,
|
| 66 |
+
},
|
| 67 |
+
});
|
| 68 |
+
|
| 69 |
+
const estimatedTokens = Math.ceil(prompt.length / 4);
|
| 70 |
+
const completionTokens = Math.ceil((response.generated_text?.length || 0) / 4);
|
| 71 |
+
|
| 72 |
+
return {
|
| 73 |
+
reply: response.generated_text || '',
|
| 74 |
+
model: this.defaultModel,
|
| 75 |
+
usage: {
|
| 76 |
+
prompt_tokens: estimatedTokens,
|
| 77 |
+
completion_tokens: completionTokens,
|
| 78 |
+
total_tokens: estimatedTokens + completionTokens,
|
| 79 |
+
},
|
| 80 |
+
sources: null,
|
| 81 |
+
};
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
async generateEmbedding(text: string | string[]): Promise<EmbeddingResponse> {
|
| 85 |
+
if (!this.client) {
|
| 86 |
+
throw new Error('HuggingFace client not initialized. Please provide HUGGINGFACE_API_KEY.');
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
const inputs = Array.isArray(text) ? text : [text];
|
| 90 |
+
const embeddings: number[][] = [];
|
| 91 |
+
|
| 92 |
+
for (const input of inputs) {
|
| 93 |
+
const response = await this.client.featureExtraction({
|
| 94 |
+
model: this.defaultEmbeddingModel,
|
| 95 |
+
inputs: input,
|
| 96 |
+
});
|
| 97 |
+
|
| 98 |
+
if (Array.isArray(response) && Array.isArray(response[0])) {
|
| 99 |
+
embeddings.push(response[0] as number[]);
|
| 100 |
+
} else if (Array.isArray(response)) {
|
| 101 |
+
embeddings.push(response as number[]);
|
| 102 |
+
}
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
const totalTokens = inputs.reduce((sum, input) => sum + Math.ceil(input.length / 4), 0);
|
| 106 |
+
|
| 107 |
+
return {
|
| 108 |
+
embeddings,
|
| 109 |
+
model: this.defaultEmbeddingModel,
|
| 110 |
+
usage: {
|
| 111 |
+
prompt_tokens: totalTokens,
|
| 112 |
+
completion_tokens: 0,
|
| 113 |
+
total_tokens: totalTokens,
|
| 114 |
+
},
|
| 115 |
+
};
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
async generateImage(prompt: string, options?: Partial<ImageGenerationRequest>): Promise<ImageGenerationResponse> {
|
| 119 |
+
if (!this.client) {
|
| 120 |
+
throw new Error('HuggingFace client not initialized. Please provide HUGGINGFACE_API_KEY.');
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
const model = options?.model || this.defaultImageModel;
|
| 124 |
+
|
| 125 |
+
const response = await this.client.textToImage({
|
| 126 |
+
model,
|
| 127 |
+
inputs: prompt,
|
| 128 |
+
});
|
| 129 |
+
|
| 130 |
+
let buffer: Buffer;
|
| 131 |
+
if (typeof response === 'object' && 'arrayBuffer' in response) {
|
| 132 |
+
const arrayBuffer = await (response as any).arrayBuffer();
|
| 133 |
+
buffer = Buffer.from(arrayBuffer);
|
| 134 |
+
} else {
|
| 135 |
+
buffer = Buffer.from(response as any);
|
| 136 |
+
}
|
| 137 |
+
const base64Image = buffer.toString('base64');
|
| 138 |
+
|
| 139 |
+
return {
|
| 140 |
+
images: [{
|
| 141 |
+
url: `data:image/png;base64,${base64Image}`,
|
| 142 |
+
}],
|
| 143 |
+
model,
|
| 144 |
+
created: Date.now(),
|
| 145 |
+
};
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
private formatMessagesAsPrompt(messages: Message[]): string {
|
| 149 |
+
let prompt = '';
|
| 150 |
+
|
| 151 |
+
for (const message of messages) {
|
| 152 |
+
if (message.role === 'system') {
|
| 153 |
+
prompt += `System: ${message.content}\n\n`;
|
| 154 |
+
} else if (message.role === 'user') {
|
| 155 |
+
prompt += `User: ${message.content}\n\n`;
|
| 156 |
+
} else if (message.role === 'assistant') {
|
| 157 |
+
prompt += `Assistant: ${message.content}\n\n`;
|
| 158 |
+
}
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
prompt += 'Assistant: ';
|
| 162 |
+
return prompt;
|
| 163 |
+
}
|
| 164 |
+
}
|
backend/adapters/ollama_adapter.ts
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type {
|
| 2 |
+
LLMAdapter,
|
| 3 |
+
Message,
|
| 4 |
+
ChatOptions,
|
| 5 |
+
ChatResponse,
|
| 6 |
+
EmbeddingResponse,
|
| 7 |
+
} from '../types/models';
|
| 8 |
+
import { logger } from '../utils/logger';
|
| 9 |
+
|
| 10 |
+
export class OllamaAdapter implements LLMAdapter {
|
| 11 |
+
private baseUrl: string;
|
| 12 |
+
private defaultModel: string;
|
| 13 |
+
private defaultEmbeddingModel: string;
|
| 14 |
+
|
| 15 |
+
constructor(
|
| 16 |
+
baseUrl = 'http://localhost:11434',
|
| 17 |
+
defaultModel = 'llama2',
|
| 18 |
+
defaultEmbeddingModel = 'nomic-embed-text'
|
| 19 |
+
) {
|
| 20 |
+
this.baseUrl = baseUrl;
|
| 21 |
+
this.defaultModel = defaultModel;
|
| 22 |
+
this.defaultEmbeddingModel = defaultEmbeddingModel;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
async isAvailable(): Promise<boolean> {
|
| 26 |
+
try {
|
| 27 |
+
const response = await fetch(`${this.baseUrl}/api/tags`);
|
| 28 |
+
return response.ok;
|
| 29 |
+
} catch {
|
| 30 |
+
return false;
|
| 31 |
+
}
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
async generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse> {
|
| 35 |
+
try {
|
| 36 |
+
const prompt = this.formatMessagesAsPrompt(messages);
|
| 37 |
+
|
| 38 |
+
const response = await fetch(`${this.baseUrl}/api/generate`, {
|
| 39 |
+
method: 'POST',
|
| 40 |
+
headers: {
|
| 41 |
+
'Content-Type': 'application/json',
|
| 42 |
+
},
|
| 43 |
+
body: JSON.stringify({
|
| 44 |
+
model: this.defaultModel,
|
| 45 |
+
prompt,
|
| 46 |
+
stream: false,
|
| 47 |
+
options: {
|
| 48 |
+
temperature: options?.temperature ?? 0.7,
|
| 49 |
+
num_predict: options?.max_tokens ?? 1000,
|
| 50 |
+
top_p: options?.top_p ?? 0.9,
|
| 51 |
+
stop: options?.stop,
|
| 52 |
+
},
|
| 53 |
+
}),
|
| 54 |
+
});
|
| 55 |
+
|
| 56 |
+
if (!response.ok) {
|
| 57 |
+
throw new Error(`Ollama API error: ${response.statusText}`);
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
const data = await response.json() as any;
|
| 61 |
+
|
| 62 |
+
const estimatedPromptTokens = Math.ceil(prompt.length / 4);
|
| 63 |
+
const estimatedCompletionTokens = Math.ceil((data.response?.length || 0) / 4);
|
| 64 |
+
|
| 65 |
+
return {
|
| 66 |
+
reply: data.response || '',
|
| 67 |
+
model: this.defaultModel,
|
| 68 |
+
usage: {
|
| 69 |
+
prompt_tokens: estimatedPromptTokens,
|
| 70 |
+
completion_tokens: estimatedCompletionTokens,
|
| 71 |
+
total_tokens: estimatedPromptTokens + estimatedCompletionTokens,
|
| 72 |
+
},
|
| 73 |
+
sources: null,
|
| 74 |
+
};
|
| 75 |
+
} catch (error) {
|
| 76 |
+
logger.error('Ollama completion error', {
|
| 77 |
+
error: error instanceof Error ? error.message : String(error),
|
| 78 |
+
});
|
| 79 |
+
throw error;
|
| 80 |
+
}
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
async generateEmbedding(text: string | string[]): Promise<EmbeddingResponse> {
|
| 84 |
+
try {
|
| 85 |
+
const inputs = Array.isArray(text) ? text : [text];
|
| 86 |
+
const embeddings: number[][] = [];
|
| 87 |
+
|
| 88 |
+
for (const input of inputs) {
|
| 89 |
+
const response = await fetch(`${this.baseUrl}/api/embeddings`, {
|
| 90 |
+
method: 'POST',
|
| 91 |
+
headers: {
|
| 92 |
+
'Content-Type': 'application/json',
|
| 93 |
+
},
|
| 94 |
+
body: JSON.stringify({
|
| 95 |
+
model: this.defaultEmbeddingModel,
|
| 96 |
+
prompt: input,
|
| 97 |
+
}),
|
| 98 |
+
});
|
| 99 |
+
|
| 100 |
+
if (!response.ok) {
|
| 101 |
+
throw new Error(`Ollama embeddings error: ${response.statusText}`);
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
const data = await response.json() as any;
|
| 105 |
+
embeddings.push(data.embedding);
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
const totalTokens = inputs.reduce((sum, input) => sum + Math.ceil(input.length / 4), 0);
|
| 109 |
+
|
| 110 |
+
return {
|
| 111 |
+
embeddings,
|
| 112 |
+
model: this.defaultEmbeddingModel,
|
| 113 |
+
usage: {
|
| 114 |
+
prompt_tokens: totalTokens,
|
| 115 |
+
completion_tokens: 0,
|
| 116 |
+
total_tokens: totalTokens,
|
| 117 |
+
},
|
| 118 |
+
};
|
| 119 |
+
} catch (error) {
|
| 120 |
+
logger.error('Ollama embedding error', {
|
| 121 |
+
error: error instanceof Error ? error.message : String(error),
|
| 122 |
+
});
|
| 123 |
+
throw error;
|
| 124 |
+
}
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
private formatMessagesAsPrompt(messages: Message[]): string {
|
| 128 |
+
let prompt = '';
|
| 129 |
+
|
| 130 |
+
for (const message of messages) {
|
| 131 |
+
if (message.role === 'system') {
|
| 132 |
+
prompt += `System: ${message.content}\n\n`;
|
| 133 |
+
} else if (message.role === 'user') {
|
| 134 |
+
prompt += `User: ${message.content}\n\n`;
|
| 135 |
+
} else if (message.role === 'assistant') {
|
| 136 |
+
prompt += `Assistant: ${message.content}\n\n`;
|
| 137 |
+
}
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
prompt += 'Assistant: ';
|
| 141 |
+
return prompt;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
setModel(modelName: string): void {
|
| 145 |
+
this.defaultModel = modelName;
|
| 146 |
+
logger.info('Ollama model changed', { model: modelName });
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
setEmbeddingModel(modelName: string): void {
|
| 150 |
+
this.defaultEmbeddingModel = modelName;
|
| 151 |
+
logger.info('Ollama embedding model changed', { model: modelName });
|
| 152 |
+
}
|
| 153 |
+
}
|
backend/adapters/openai_adapter.ts
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import OpenAI from 'openai';
|
| 2 |
+
import type {
|
| 3 |
+
LLMAdapter,
|
| 4 |
+
ImageAdapter,
|
| 5 |
+
VoiceAdapter,
|
| 6 |
+
Message,
|
| 7 |
+
ChatOptions,
|
| 8 |
+
ChatResponse,
|
| 9 |
+
EmbeddingResponse,
|
| 10 |
+
ImageGenerationRequest,
|
| 11 |
+
ImageGenerationResponse,
|
| 12 |
+
VoiceSynthesisRequest,
|
| 13 |
+
VoiceSynthesisResponse,
|
| 14 |
+
TranscriptionRequest,
|
| 15 |
+
TranscriptionResponse,
|
| 16 |
+
} from '../types/models';
|
| 17 |
+
|
| 18 |
+
export class OpenAIAdapter implements LLMAdapter, ImageAdapter, VoiceAdapter {
|
| 19 |
+
private client: OpenAI | null = null;
|
| 20 |
+
private apiKey: string;
|
| 21 |
+
private defaultChatModel: string;
|
| 22 |
+
private defaultEmbeddingModel: string;
|
| 23 |
+
private defaultImageModel: string;
|
| 24 |
+
private defaultVoiceModel: string;
|
| 25 |
+
|
| 26 |
+
constructor(
|
| 27 |
+
apiKey: string,
|
| 28 |
+
defaultChatModel = 'gpt-3.5-turbo',
|
| 29 |
+
defaultEmbeddingModel = 'text-embedding-ada-002',
|
| 30 |
+
defaultImageModel = 'dall-e-3',
|
| 31 |
+
defaultVoiceModel = 'tts-1'
|
| 32 |
+
) {
|
| 33 |
+
this.apiKey = apiKey;
|
| 34 |
+
this.defaultChatModel = defaultChatModel;
|
| 35 |
+
this.defaultEmbeddingModel = defaultEmbeddingModel;
|
| 36 |
+
this.defaultImageModel = defaultImageModel;
|
| 37 |
+
this.defaultVoiceModel = defaultVoiceModel;
|
| 38 |
+
|
| 39 |
+
if (apiKey) {
|
| 40 |
+
this.client = new OpenAI({ apiKey });
|
| 41 |
+
}
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
async isAvailable(): Promise<boolean> {
|
| 45 |
+
if (!this.client) return false;
|
| 46 |
+
try {
|
| 47 |
+
await this.client.models.list();
|
| 48 |
+
return true;
|
| 49 |
+
} catch {
|
| 50 |
+
return false;
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
async generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse> {
|
| 55 |
+
if (!this.client) {
|
| 56 |
+
throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
const completion = await this.client.chat.completions.create({
|
| 60 |
+
model: this.defaultChatModel,
|
| 61 |
+
messages: messages.map(m => ({
|
| 62 |
+
role: m.role,
|
| 63 |
+
content: m.content,
|
| 64 |
+
})),
|
| 65 |
+
temperature: options?.temperature ?? 0.7,
|
| 66 |
+
max_tokens: options?.max_tokens ?? 1000,
|
| 67 |
+
top_p: options?.top_p,
|
| 68 |
+
frequency_penalty: options?.frequency_penalty,
|
| 69 |
+
presence_penalty: options?.presence_penalty,
|
| 70 |
+
stop: options?.stop,
|
| 71 |
+
});
|
| 72 |
+
|
| 73 |
+
return {
|
| 74 |
+
reply: completion.choices[0]?.message?.content || '',
|
| 75 |
+
model: completion.model,
|
| 76 |
+
usage: {
|
| 77 |
+
prompt_tokens: completion.usage?.prompt_tokens || 0,
|
| 78 |
+
completion_tokens: completion.usage?.completion_tokens || 0,
|
| 79 |
+
total_tokens: completion.usage?.total_tokens || 0,
|
| 80 |
+
},
|
| 81 |
+
sources: null,
|
| 82 |
+
};
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
async generateEmbedding(text: string | string[]): Promise<EmbeddingResponse> {
|
| 86 |
+
if (!this.client) {
|
| 87 |
+
throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
const input = Array.isArray(text) ? text : [text];
|
| 91 |
+
|
| 92 |
+
const response = await this.client.embeddings.create({
|
| 93 |
+
model: this.defaultEmbeddingModel,
|
| 94 |
+
input,
|
| 95 |
+
});
|
| 96 |
+
|
| 97 |
+
if (!response.data) {
|
| 98 |
+
throw new Error('No embedding data returned from OpenAI');
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
return {
|
| 102 |
+
embeddings: response.data.map(d => d.embedding),
|
| 103 |
+
model: response.model,
|
| 104 |
+
usage: {
|
| 105 |
+
prompt_tokens: response.usage.prompt_tokens,
|
| 106 |
+
completion_tokens: 0,
|
| 107 |
+
total_tokens: response.usage.total_tokens,
|
| 108 |
+
},
|
| 109 |
+
};
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
async generateImage(prompt: string, options?: Partial<ImageGenerationRequest>): Promise<ImageGenerationResponse> {
|
| 113 |
+
if (!this.client) {
|
| 114 |
+
throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
const model = options?.model || this.defaultImageModel;
|
| 118 |
+
const isDallE3 = model.includes('dall-e-3');
|
| 119 |
+
|
| 120 |
+
const response = await this.client.images.generate({
|
| 121 |
+
model,
|
| 122 |
+
prompt,
|
| 123 |
+
n: isDallE3 ? 1 : (options?.n || 1),
|
| 124 |
+
size: options?.size || '1024x1024',
|
| 125 |
+
quality: options?.quality,
|
| 126 |
+
style: options?.style,
|
| 127 |
+
});
|
| 128 |
+
|
| 129 |
+
if (!response.data) {
|
| 130 |
+
throw new Error('No image data returned from OpenAI');
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
return {
|
| 134 |
+
images: response.data.map(img => ({
|
| 135 |
+
url: img.url || '',
|
| 136 |
+
revised_prompt: img.revised_prompt,
|
| 137 |
+
b64_json: img.b64_json,
|
| 138 |
+
})),
|
| 139 |
+
model,
|
| 140 |
+
created: response.created,
|
| 141 |
+
};
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
async synthesize(text: string, options?: Partial<VoiceSynthesisRequest>): Promise<VoiceSynthesisResponse> {
|
| 145 |
+
if (!this.client) {
|
| 146 |
+
throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
const voice = options?.voice || 'alloy';
|
| 150 |
+
const model = options?.model || this.defaultVoiceModel;
|
| 151 |
+
const format = options?.format || 'mp3';
|
| 152 |
+
|
| 153 |
+
const response = await this.client.audio.speech.create({
|
| 154 |
+
model,
|
| 155 |
+
voice,
|
| 156 |
+
input: text,
|
| 157 |
+
response_format: format as any,
|
| 158 |
+
speed: options?.speed,
|
| 159 |
+
});
|
| 160 |
+
|
| 161 |
+
const buffer = Buffer.from(await response.arrayBuffer());
|
| 162 |
+
const base64Audio = buffer.toString('base64');
|
| 163 |
+
|
| 164 |
+
return {
|
| 165 |
+
audio_url: `data:audio/${format};base64,${base64Audio}`,
|
| 166 |
+
voice,
|
| 167 |
+
format,
|
| 168 |
+
size_bytes: buffer.length,
|
| 169 |
+
};
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
async transcribe(audio: Buffer, options?: Partial<TranscriptionRequest>): Promise<TranscriptionResponse> {
|
| 173 |
+
if (!this.client) {
|
| 174 |
+
throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
const file = audio as any;
|
| 178 |
+
|
| 179 |
+
const response = await this.client.audio.transcriptions.create({
|
| 180 |
+
file,
|
| 181 |
+
model: options?.model || 'whisper-1',
|
| 182 |
+
language: options?.language,
|
| 183 |
+
prompt: options?.prompt,
|
| 184 |
+
});
|
| 185 |
+
|
| 186 |
+
return {
|
| 187 |
+
text: response.text,
|
| 188 |
+
language: options?.language || 'en',
|
| 189 |
+
duration: 0,
|
| 190 |
+
model: 'whisper-1',
|
| 191 |
+
};
|
| 192 |
+
}
|
| 193 |
+
}
|
backend/adapters/vector_db_adapter.ts
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Pinecone } from '@pinecone-database/pinecone';
|
| 2 |
+
import type { VectorDBAdapter, VectorSearchResult } from '../types/models';
|
| 3 |
+
|
| 4 |
+
export class PineconeAdapter implements VectorDBAdapter {
|
| 5 |
+
private client: Pinecone | null = null;
|
| 6 |
+
private indexName: string;
|
| 7 |
+
private namespace: string;
|
| 8 |
+
private initialized = false;
|
| 9 |
+
|
| 10 |
+
constructor(apiKey: string, indexName: string, namespace = 'default') {
|
| 11 |
+
this.indexName = indexName;
|
| 12 |
+
this.namespace = namespace;
|
| 13 |
+
|
| 14 |
+
if (apiKey) {
|
| 15 |
+
this.client = new Pinecone({ apiKey });
|
| 16 |
+
}
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
async isAvailable(): Promise<boolean> {
|
| 20 |
+
if (!this.client) return false;
|
| 21 |
+
try {
|
| 22 |
+
await this.client.listIndexes();
|
| 23 |
+
return true;
|
| 24 |
+
} catch {
|
| 25 |
+
return false;
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
async upsert(vectors: { id: string; values: number[]; metadata: Record<string, any> }[]): Promise<void> {
|
| 30 |
+
if (!this.client) {
|
| 31 |
+
throw new Error('Pinecone client not initialized. Please provide PINECONE_API_KEY.');
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
const index = this.client.index(this.indexName);
|
| 35 |
+
|
| 36 |
+
await index.namespace(this.namespace).upsert(vectors);
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
async query(
|
| 40 |
+
queryVector: number[],
|
| 41 |
+
topK: number,
|
| 42 |
+
filter?: Record<string, any>
|
| 43 |
+
): Promise<VectorSearchResult[]> {
|
| 44 |
+
if (!this.client) {
|
| 45 |
+
throw new Error('Pinecone client not initialized. Please provide PINECONE_API_KEY.');
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
const index = this.client.index(this.indexName);
|
| 49 |
+
|
| 50 |
+
const results = await index.namespace(this.namespace).query({
|
| 51 |
+
vector: queryVector,
|
| 52 |
+
topK,
|
| 53 |
+
filter,
|
| 54 |
+
includeMetadata: true,
|
| 55 |
+
});
|
| 56 |
+
|
| 57 |
+
return results.matches.map(match => ({
|
| 58 |
+
id: match.id,
|
| 59 |
+
score: match.score || 0,
|
| 60 |
+
metadata: (match.metadata || {}) as Record<string, any>,
|
| 61 |
+
}));
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
async delete(ids: string[]): Promise<void> {
|
| 65 |
+
if (!this.client) {
|
| 66 |
+
throw new Error('Pinecone client not initialized. Please provide PINECONE_API_KEY.');
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
const index = this.client.index(this.indexName);
|
| 70 |
+
|
| 71 |
+
await index.namespace(this.namespace).deleteMany(ids);
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
export class InMemoryVectorDB implements VectorDBAdapter {
|
| 76 |
+
private vectors: Map<string, { values: number[]; metadata: Record<string, any> }> = new Map();
|
| 77 |
+
|
| 78 |
+
async isAvailable(): Promise<boolean> {
|
| 79 |
+
return true;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
async upsert(vectors: { id: string; values: number[]; metadata: Record<string, any> }[]): Promise<void> {
|
| 83 |
+
for (const vector of vectors) {
|
| 84 |
+
this.vectors.set(vector.id, {
|
| 85 |
+
values: vector.values,
|
| 86 |
+
metadata: vector.metadata,
|
| 87 |
+
});
|
| 88 |
+
}
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
async query(
|
| 92 |
+
queryVector: number[],
|
| 93 |
+
topK: number,
|
| 94 |
+
filter?: Record<string, any>
|
| 95 |
+
): Promise<VectorSearchResult[]> {
|
| 96 |
+
const results: Array<{ id: string; score: number; metadata: Record<string, any> }> = [];
|
| 97 |
+
|
| 98 |
+
for (const [id, vector] of this.vectors.entries()) {
|
| 99 |
+
if (filter && !this.matchesFilter(vector.metadata, filter)) {
|
| 100 |
+
continue;
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
const score = this.cosineSimilarity(queryVector, vector.values);
|
| 104 |
+
results.push({
|
| 105 |
+
id,
|
| 106 |
+
score,
|
| 107 |
+
metadata: vector.metadata,
|
| 108 |
+
});
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
results.sort((a, b) => b.score - a.score);
|
| 112 |
+
return results.slice(0, topK);
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
async delete(ids: string[]): Promise<void> {
|
| 116 |
+
for (const id of ids) {
|
| 117 |
+
this.vectors.delete(id);
|
| 118 |
+
}
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
private cosineSimilarity(a: number[], b: number[]): number {
|
| 122 |
+
if (a.length !== b.length) return 0;
|
| 123 |
+
|
| 124 |
+
let dotProduct = 0;
|
| 125 |
+
let normA = 0;
|
| 126 |
+
let normB = 0;
|
| 127 |
+
|
| 128 |
+
for (let i = 0; i < a.length; i++) {
|
| 129 |
+
dotProduct += a[i] * b[i];
|
| 130 |
+
normA += a[i] * a[i];
|
| 131 |
+
normB += b[i] * b[i];
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
| 135 |
+
return denominator === 0 ? 0 : dotProduct / denominator;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
private matchesFilter(metadata: Record<string, any>, filter: Record<string, any>): boolean {
|
| 139 |
+
for (const [key, value] of Object.entries(filter)) {
|
| 140 |
+
if (metadata[key] !== value) {
|
| 141 |
+
return false;
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
return true;
|
| 145 |
+
}
|
| 146 |
+
}
|
backend/api/auth.ts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { api } from "encore.dev/api";
|
| 2 |
+
import { auth, validateApiKey, getApiKeyInfo } from "../utils/auth";
|
| 3 |
+
import { getRateLimitInfo } from "../utils/rate_limit";
|
| 4 |
+
import type { ApiKeyInfo, RateLimitInfo } from "../types/models";
|
| 5 |
+
|
| 6 |
+
interface VerifyResponse {
|
| 7 |
+
valid: boolean;
|
| 8 |
+
key_info: ApiKeyInfo;
|
| 9 |
+
rate_limit: RateLimitInfo;
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
export const verify = api<void, VerifyResponse>(
|
| 13 |
+
{ expose: true, method: "POST", path: "/auth/verify", auth: false },
|
| 14 |
+
async () => {
|
| 15 |
+
const authHeader = auth();
|
| 16 |
+
const authData = validateApiKey(authHeader);
|
| 17 |
+
const keyInfo = getApiKeyInfo(authData.apiKey);
|
| 18 |
+
const rateLimitInfo = getRateLimitInfo(authData.apiKey, authData.tier);
|
| 19 |
+
|
| 20 |
+
return {
|
| 21 |
+
valid: true,
|
| 22 |
+
key_info: keyInfo,
|
| 23 |
+
rate_limit: rateLimitInfo,
|
| 24 |
+
};
|
| 25 |
+
}
|
| 26 |
+
);
|
backend/api/chat.ts
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { api, APIError, Query } from "encore.dev/api";
|
| 2 |
+
import { auth, validateApiKey } from "../utils/auth";
|
| 3 |
+
import { checkRateLimit } from "../utils/rate_limit";
|
| 4 |
+
import { metrics } from "../utils/metrics";
|
| 5 |
+
import { aiService } from "../services/ai_service";
|
| 6 |
+
import type { ChatRequest, ChatResponse } from "../types/models";
|
| 7 |
+
|
| 8 |
+
export const chat = api<ChatRequest, ChatResponse>(
|
| 9 |
+
{ expose: true, method: "POST", path: "/ai/chat", auth: false },
|
| 10 |
+
async (req) => {
|
| 11 |
+
const startTime = Date.now();
|
| 12 |
+
|
| 13 |
+
try {
|
| 14 |
+
const authHeader = auth();
|
| 15 |
+
const authData = validateApiKey(authHeader);
|
| 16 |
+
checkRateLimit(authData.apiKey, authData.tier);
|
| 17 |
+
|
| 18 |
+
metrics.incrementRequests("/ai/chat");
|
| 19 |
+
|
| 20 |
+
if (!req.conversation || req.conversation.length === 0) {
|
| 21 |
+
throw APIError.invalidArgument("conversation must contain at least one message");
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
const response = await aiService.chat(
|
| 25 |
+
req.conversation,
|
| 26 |
+
req.model,
|
| 27 |
+
req.options
|
| 28 |
+
);
|
| 29 |
+
|
| 30 |
+
metrics.recordResponseTime(Date.now() - startTime);
|
| 31 |
+
|
| 32 |
+
return response;
|
| 33 |
+
} catch (error) {
|
| 34 |
+
metrics.incrementErrors();
|
| 35 |
+
|
| 36 |
+
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
|
| 37 |
+
const err = error as any;
|
| 38 |
+
throw APIError.resourceExhausted(err.message).withDetails({
|
| 39 |
+
limit: err.limit,
|
| 40 |
+
remaining: err.remaining,
|
| 41 |
+
reset_at: err.resetAt,
|
| 42 |
+
});
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
throw error instanceof Error ? error : APIError.internal(String(error));
|
| 46 |
+
}
|
| 47 |
+
}
|
| 48 |
+
);
|
| 49 |
+
|
| 50 |
+
interface SimpleQueryRequest {
|
| 51 |
+
q: Query<string>;
|
| 52 |
+
model?: Query<string>;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
interface SimpleQueryResponse {
|
| 56 |
+
answer: string;
|
| 57 |
+
model: string;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
export const query = api<SimpleQueryRequest, SimpleQueryResponse>(
|
| 61 |
+
{ expose: true, method: "GET", path: "/ai/query", auth: false },
|
| 62 |
+
async (req) => {
|
| 63 |
+
const startTime = Date.now();
|
| 64 |
+
|
| 65 |
+
try {
|
| 66 |
+
const authHeader = auth();
|
| 67 |
+
const authData = validateApiKey(authHeader);
|
| 68 |
+
checkRateLimit(authData.apiKey, authData.tier);
|
| 69 |
+
|
| 70 |
+
metrics.incrementRequests("/ai/query");
|
| 71 |
+
|
| 72 |
+
if (!req.q) {
|
| 73 |
+
throw APIError.invalidArgument("query parameter 'q' is required");
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
const answer = await aiService.simpleQuery(req.q, req.model);
|
| 77 |
+
|
| 78 |
+
metrics.recordResponseTime(Date.now() - startTime);
|
| 79 |
+
|
| 80 |
+
return {
|
| 81 |
+
answer,
|
| 82 |
+
model: req.model || 'default',
|
| 83 |
+
};
|
| 84 |
+
} catch (error) {
|
| 85 |
+
metrics.incrementErrors();
|
| 86 |
+
|
| 87 |
+
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
|
| 88 |
+
const err = error as any;
|
| 89 |
+
throw APIError.resourceExhausted(err.message).withDetails({
|
| 90 |
+
limit: err.limit,
|
| 91 |
+
remaining: err.remaining,
|
| 92 |
+
reset_at: err.resetAt,
|
| 93 |
+
});
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
throw error instanceof Error ? error : APIError.internal(String(error));
|
| 97 |
+
}
|
| 98 |
+
}
|
| 99 |
+
);
|
backend/api/documents.ts
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { api, APIError } from "encore.dev/api";
|
| 2 |
+
import { auth, validateApiKey } from "../utils/auth";
|
| 3 |
+
import { checkRateLimit } from "../utils/rate_limit";
|
| 4 |
+
import { metrics } from "../utils/metrics";
|
| 5 |
+
import { documentService } from "../services/document_service";
|
| 6 |
+
import type {
|
| 7 |
+
DocumentUploadResponse,
|
| 8 |
+
DocumentSource,
|
| 9 |
+
WebhookEvent
|
| 10 |
+
} from "../types/models";
|
| 11 |
+
|
| 12 |
+
interface UploadRequest {
|
| 13 |
+
filename: string;
|
| 14 |
+
content_base64: string;
|
| 15 |
+
metadata?: {
|
| 16 |
+
title?: string;
|
| 17 |
+
author?: string;
|
| 18 |
+
category?: string;
|
| 19 |
+
tags?: string[];
|
| 20 |
+
};
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
export const upload = api<UploadRequest, DocumentUploadResponse>(
|
| 24 |
+
{ expose: true, method: "POST", path: "/upload", auth: false },
|
| 25 |
+
async (req) => {
|
| 26 |
+
const startTime = Date.now();
|
| 27 |
+
|
| 28 |
+
try {
|
| 29 |
+
const authHeader = auth();
|
| 30 |
+
const authData = validateApiKey(authHeader);
|
| 31 |
+
checkRateLimit(authData.apiKey, authData.tier);
|
| 32 |
+
|
| 33 |
+
metrics.incrementRequests("/upload");
|
| 34 |
+
|
| 35 |
+
if (!req.filename) {
|
| 36 |
+
throw APIError.invalidArgument("filename is required");
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
if (!req.content_base64) {
|
| 40 |
+
throw APIError.invalidArgument("content_base64 is required");
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
const content = Buffer.from(req.content_base64, 'base64');
|
| 44 |
+
|
| 45 |
+
const response = await documentService.uploadDocument(
|
| 46 |
+
req.filename,
|
| 47 |
+
content,
|
| 48 |
+
req.metadata
|
| 49 |
+
);
|
| 50 |
+
|
| 51 |
+
metrics.recordResponseTime(Date.now() - startTime);
|
| 52 |
+
|
| 53 |
+
return response;
|
| 54 |
+
} catch (error) {
|
| 55 |
+
metrics.incrementErrors();
|
| 56 |
+
|
| 57 |
+
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
|
| 58 |
+
const err = error as any;
|
| 59 |
+
throw APIError.resourceExhausted(err.message).withDetails({
|
| 60 |
+
limit: err.limit,
|
| 61 |
+
remaining: err.remaining,
|
| 62 |
+
reset_at: err.resetAt,
|
| 63 |
+
});
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
throw error instanceof Error ? error : APIError.internal(String(error));
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
);
|
| 70 |
+
|
| 71 |
+
interface GetSourcesRequest {
|
| 72 |
+
id: string;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
interface GetSourcesResponse {
|
| 76 |
+
sources: DocumentSource[];
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
export const getSources = api<GetSourcesRequest, GetSourcesResponse>(
|
| 80 |
+
{ expose: true, method: "GET", path: "/docs/:id/sources", auth: false },
|
| 81 |
+
async (req) => {
|
| 82 |
+
try {
|
| 83 |
+
const authHeader = auth();
|
| 84 |
+
validateApiKey(authHeader);
|
| 85 |
+
|
| 86 |
+
metrics.incrementRequests("/docs/:id/sources");
|
| 87 |
+
|
| 88 |
+
const sources = await documentService.getDocumentSources(req.id);
|
| 89 |
+
|
| 90 |
+
return { sources };
|
| 91 |
+
} catch (error) {
|
| 92 |
+
metrics.incrementErrors();
|
| 93 |
+
|
| 94 |
+
if (error instanceof Error && error.message === 'Document not found') {
|
| 95 |
+
throw APIError.notFound("document not found");
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
throw error instanceof Error ? error : APIError.internal(String(error));
|
| 99 |
+
}
|
| 100 |
+
}
|
| 101 |
+
);
|
| 102 |
+
|
| 103 |
+
interface WebhookResponse {
|
| 104 |
+
received: boolean;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
export const webhook = api<WebhookEvent, WebhookResponse>(
|
| 108 |
+
{ expose: true, method: "POST", path: "/webhook/events", auth: false },
|
| 109 |
+
async () => {
|
| 110 |
+
try {
|
| 111 |
+
metrics.incrementRequests("/webhook/events");
|
| 112 |
+
|
| 113 |
+
return { received: true };
|
| 114 |
+
} catch (error) {
|
| 115 |
+
metrics.incrementErrors();
|
| 116 |
+
throw error instanceof Error ? error : APIError.internal(String(error));
|
| 117 |
+
}
|
| 118 |
+
}
|
| 119 |
+
);
|
backend/api/encore.service.ts
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Service } from "encore.dev/service";
|
| 2 |
+
|
| 3 |
+
export default new Service("api");
|
backend/api/health.ts
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { api } from "encore.dev/api";
|
| 2 |
+
import { aiService } from "../services/ai_service";
|
| 3 |
+
import { ragService } from "../services/rag_service";
|
| 4 |
+
import { imageService } from "../services/image_service";
|
| 5 |
+
import { voiceService } from "../services/voice_service";
|
| 6 |
+
import { metrics } from "../utils/metrics";
|
| 7 |
+
import type { HealthCheckResponse, MetricsResponse } from "../types/models";
|
| 8 |
+
|
| 9 |
+
const startTime = Date.now();
|
| 10 |
+
const version = "1.0.0";
|
| 11 |
+
|
| 12 |
+
export const health = api<void, HealthCheckResponse>(
|
| 13 |
+
{ expose: true, method: "GET", path: "/health" },
|
| 14 |
+
async () => {
|
| 15 |
+
const services = [];
|
| 16 |
+
|
| 17 |
+
try {
|
| 18 |
+
const llmHealth = await aiService.healthCheck();
|
| 19 |
+
services.push({
|
| 20 |
+
name: "llm",
|
| 21 |
+
status: llmHealth.some(h => h.available) ? ("up" as const) : ("down" as const),
|
| 22 |
+
});
|
| 23 |
+
} catch {
|
| 24 |
+
services.push({ name: "llm", status: "down" as const });
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
try {
|
| 28 |
+
const vectorDbAvailable = await ragService.healthCheck();
|
| 29 |
+
services.push({
|
| 30 |
+
name: "vector_db",
|
| 31 |
+
status: vectorDbAvailable ? ("up" as const) : ("down" as const),
|
| 32 |
+
});
|
| 33 |
+
} catch {
|
| 34 |
+
services.push({ name: "vector_db", status: "down" as const });
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
const allUp = services.every(s => s.status === "up");
|
| 38 |
+
const status = allUp ? "healthy" : "degraded";
|
| 39 |
+
|
| 40 |
+
return {
|
| 41 |
+
status,
|
| 42 |
+
timestamp: Date.now(),
|
| 43 |
+
version,
|
| 44 |
+
services,
|
| 45 |
+
uptime_seconds: Math.floor((Date.now() - startTime) / 1000),
|
| 46 |
+
};
|
| 47 |
+
}
|
| 48 |
+
);
|
| 49 |
+
|
| 50 |
+
export const getMetrics = api<void, MetricsResponse>(
|
| 51 |
+
{ expose: true, method: "GET", path: "/metrics" },
|
| 52 |
+
async () => {
|
| 53 |
+
return metrics.getMetrics();
|
| 54 |
+
}
|
| 55 |
+
);
|
backend/api/image.ts
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { api, APIError } from "encore.dev/api";
|
| 2 |
+
import { auth, validateApiKey } from "../utils/auth";
|
| 3 |
+
import { checkRateLimit } from "../utils/rate_limit";
|
| 4 |
+
import { metrics } from "../utils/metrics";
|
| 5 |
+
import { imageService } from "../services/image_service";
|
| 6 |
+
import type { ImageGenerationRequest, ImageGenerationResponse } from "../types/models";
|
| 7 |
+
|
| 8 |
+
export const generate = api<ImageGenerationRequest, ImageGenerationResponse>(
|
| 9 |
+
{ expose: true, method: "POST", path: "/image/generate", auth: false },
|
| 10 |
+
async (req) => {
|
| 11 |
+
const startTime = Date.now();
|
| 12 |
+
|
| 13 |
+
try {
|
| 14 |
+
const authHeader = auth();
|
| 15 |
+
const authData = validateApiKey(authHeader);
|
| 16 |
+
checkRateLimit(authData.apiKey, authData.tier);
|
| 17 |
+
|
| 18 |
+
metrics.incrementRequests("/image/generate");
|
| 19 |
+
|
| 20 |
+
if (!req.prompt) {
|
| 21 |
+
throw APIError.invalidArgument("prompt is required");
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
const response = await imageService.generate(req);
|
| 25 |
+
|
| 26 |
+
metrics.recordResponseTime(Date.now() - startTime);
|
| 27 |
+
|
| 28 |
+
return response;
|
| 29 |
+
} catch (error) {
|
| 30 |
+
metrics.incrementErrors();
|
| 31 |
+
|
| 32 |
+
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
|
| 33 |
+
const err = error as any;
|
| 34 |
+
throw APIError.resourceExhausted(err.message).withDetails({
|
| 35 |
+
limit: err.limit,
|
| 36 |
+
remaining: err.remaining,
|
| 37 |
+
reset_at: err.resetAt,
|
| 38 |
+
});
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
throw error instanceof Error ? error : APIError.internal(String(error));
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
);
|
backend/api/rag.ts
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { api, APIError } from "encore.dev/api";
|
| 2 |
+
import { auth, validateApiKey } from "../utils/auth";
|
| 3 |
+
import { checkRateLimit } from "../utils/rate_limit";
|
| 4 |
+
import { metrics } from "../utils/metrics";
|
| 5 |
+
import { ragService } from "../services/rag_service";
|
| 6 |
+
import { aiService } from "../services/ai_service";
|
| 7 |
+
import type { RAGQueryRequest, RAGQueryResponse } from "../types/models";
|
| 8 |
+
|
| 9 |
+
export const ragQuery = api<RAGQueryRequest, RAGQueryResponse>(
|
| 10 |
+
{ expose: true, method: "POST", path: "/rag/query", auth: false },
|
| 11 |
+
async (req) => {
|
| 12 |
+
const startTime = Date.now();
|
| 13 |
+
|
| 14 |
+
try {
|
| 15 |
+
const authHeader = auth();
|
| 16 |
+
const authData = validateApiKey(authHeader);
|
| 17 |
+
checkRateLimit(authData.apiKey, authData.tier);
|
| 18 |
+
|
| 19 |
+
metrics.incrementRequests("/rag/query");
|
| 20 |
+
|
| 21 |
+
if (!req.query) {
|
| 22 |
+
throw APIError.invalidArgument("query is required");
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
const response = await ragService.query(req);
|
| 26 |
+
|
| 27 |
+
metrics.recordResponseTime(Date.now() - startTime);
|
| 28 |
+
|
| 29 |
+
return response;
|
| 30 |
+
} catch (error) {
|
| 31 |
+
metrics.incrementErrors();
|
| 32 |
+
|
| 33 |
+
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
|
| 34 |
+
const err = error as any;
|
| 35 |
+
throw APIError.resourceExhausted(err.message).withDetails({
|
| 36 |
+
limit: err.limit,
|
| 37 |
+
remaining: err.remaining,
|
| 38 |
+
reset_at: err.resetAt,
|
| 39 |
+
});
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
throw error instanceof Error ? error : APIError.internal(String(error));
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
);
|
| 46 |
+
|
| 47 |
+
interface ModelsResponse {
|
| 48 |
+
models: string[];
|
| 49 |
+
default_model: string;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
export const getModels = api<void, ModelsResponse>(
|
| 53 |
+
{ expose: true, method: "GET", path: "/rag/models", auth: false },
|
| 54 |
+
async () => {
|
| 55 |
+
try {
|
| 56 |
+
const authHeader = auth();
|
| 57 |
+
validateApiKey(authHeader);
|
| 58 |
+
|
| 59 |
+
metrics.incrementRequests("/rag/models");
|
| 60 |
+
|
| 61 |
+
const models = aiService.getAvailableModels();
|
| 62 |
+
|
| 63 |
+
return {
|
| 64 |
+
models,
|
| 65 |
+
default_model: models[0] || 'gpt-3.5-turbo',
|
| 66 |
+
};
|
| 67 |
+
} catch (error) {
|
| 68 |
+
metrics.incrementErrors();
|
| 69 |
+
throw error instanceof Error ? error : APIError.internal(String(error));
|
| 70 |
+
}
|
| 71 |
+
}
|
| 72 |
+
);
|
backend/api/voice.ts
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { api, APIError } from "encore.dev/api";
|
| 2 |
+
import { auth, validateApiKey } from "../utils/auth";
|
| 3 |
+
import { checkRateLimit } from "../utils/rate_limit";
|
| 4 |
+
import { metrics } from "../utils/metrics";
|
| 5 |
+
import { voiceService } from "../services/voice_service";
|
| 6 |
+
import type {
|
| 7 |
+
VoiceSynthesisRequest,
|
| 8 |
+
VoiceSynthesisResponse,
|
| 9 |
+
TranscriptionRequest,
|
| 10 |
+
TranscriptionResponse
|
| 11 |
+
} from "../types/models";
|
| 12 |
+
|
| 13 |
+
export const synthesize = api<VoiceSynthesisRequest, VoiceSynthesisResponse>(
|
| 14 |
+
{ expose: true, method: "POST", path: "/voice/synthesize", auth: false },
|
| 15 |
+
async (req) => {
|
| 16 |
+
const startTime = Date.now();
|
| 17 |
+
|
| 18 |
+
try {
|
| 19 |
+
const authHeader = auth();
|
| 20 |
+
const authData = validateApiKey(authHeader);
|
| 21 |
+
checkRateLimit(authData.apiKey, authData.tier);
|
| 22 |
+
|
| 23 |
+
metrics.incrementRequests("/voice/synthesize");
|
| 24 |
+
|
| 25 |
+
if (!req.text) {
|
| 26 |
+
throw APIError.invalidArgument("text is required");
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
const response = await voiceService.synthesize(req);
|
| 30 |
+
|
| 31 |
+
metrics.recordResponseTime(Date.now() - startTime);
|
| 32 |
+
|
| 33 |
+
return response;
|
| 34 |
+
} catch (error) {
|
| 35 |
+
metrics.incrementErrors();
|
| 36 |
+
|
| 37 |
+
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
|
| 38 |
+
const err = error as any;
|
| 39 |
+
throw APIError.resourceExhausted(err.message).withDetails({
|
| 40 |
+
limit: err.limit,
|
| 41 |
+
remaining: err.remaining,
|
| 42 |
+
reset_at: err.resetAt,
|
| 43 |
+
});
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
throw error instanceof Error ? error : APIError.internal(String(error));
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
);
|
| 50 |
+
|
| 51 |
+
interface TranscribeRequestBody {
|
| 52 |
+
audio_base64: string;
|
| 53 |
+
model?: string;
|
| 54 |
+
language?: string;
|
| 55 |
+
prompt?: string;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
export const transcribe = api<TranscribeRequestBody, TranscriptionResponse>(
|
| 59 |
+
{ expose: true, method: "POST", path: "/voice/transcribe", auth: false },
|
| 60 |
+
async (req) => {
|
| 61 |
+
const startTime = Date.now();
|
| 62 |
+
|
| 63 |
+
try {
|
| 64 |
+
const authHeader = auth();
|
| 65 |
+
const authData = validateApiKey(authHeader);
|
| 66 |
+
checkRateLimit(authData.apiKey, authData.tier);
|
| 67 |
+
|
| 68 |
+
metrics.incrementRequests("/voice/transcribe");
|
| 69 |
+
|
| 70 |
+
if (!req.audio_base64) {
|
| 71 |
+
throw APIError.invalidArgument("audio_base64 is required");
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
const audioBuffer = Buffer.from(req.audio_base64, 'base64');
|
| 75 |
+
|
| 76 |
+
const response = await voiceService.transcribe(audioBuffer, {
|
| 77 |
+
audio_url: '',
|
| 78 |
+
model: req.model,
|
| 79 |
+
language: req.language,
|
| 80 |
+
prompt: req.prompt,
|
| 81 |
+
});
|
| 82 |
+
|
| 83 |
+
metrics.recordResponseTime(Date.now() - startTime);
|
| 84 |
+
|
| 85 |
+
return response;
|
| 86 |
+
} catch (error) {
|
| 87 |
+
metrics.incrementErrors();
|
| 88 |
+
|
| 89 |
+
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
|
| 90 |
+
const err = error as any;
|
| 91 |
+
throw APIError.resourceExhausted(err.message).withDetails({
|
| 92 |
+
limit: err.limit,
|
| 93 |
+
remaining: err.remaining,
|
| 94 |
+
reset_at: err.resetAt,
|
| 95 |
+
});
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
throw error instanceof Error ? error : APIError.internal(String(error));
|
| 99 |
+
}
|
| 100 |
+
}
|
| 101 |
+
);
|
backend/encore.app
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"id": "scalable-ai-api-service-ysyi", "lang": "typescript"}
|
backend/package.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "backend",
|
| 3 |
+
"version": "1.0.0",
|
| 4 |
+
"type": "module",
|
| 5 |
+
"packageManager": "bun",
|
| 6 |
+
"dependencies": {
|
| 7 |
+
"@anthropic-ai/sdk": "^0.24.1",
|
| 8 |
+
"@huggingface/inference": "^3.10.0",
|
| 9 |
+
"@pinecone-database/pinecone": "^6.1.1",
|
| 10 |
+
"encore.dev": "^1.50.4",
|
| 11 |
+
"openai": "^4.90.0"
|
| 12 |
+
},
|
| 13 |
+
"devDependencies": {
|
| 14 |
+
"typescript": "^5.8.3"
|
| 15 |
+
}
|
| 16 |
+
}
|
backend/services/ai_service.ts
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { OpenAIAdapter } from '../adapters/openai_adapter';
|
| 2 |
+
import { HuggingFaceAdapter } from '../adapters/huggingface_adapter';
|
| 3 |
+
import { AnthropicAdapter } from '../adapters/anthropic_adapter';
|
| 4 |
+
import { OllamaAdapter } from '../adapters/ollama_adapter';
|
| 5 |
+
import { loadConfig } from '../types/config';
|
| 6 |
+
import { logger } from '../utils/logger';
|
| 7 |
+
import { metrics } from '../utils/metrics';
|
| 8 |
+
import type { Message, ChatOptions, ChatResponse, LLMAdapter } from '../types/models';
|
| 9 |
+
|
| 10 |
+
const config = loadConfig();
|
| 11 |
+
|
| 12 |
+
class AIService {
|
| 13 |
+
private adapters: Map<string, LLMAdapter> = new Map();
|
| 14 |
+
private defaultAdapter: LLMAdapter | null = null;
|
| 15 |
+
|
| 16 |
+
constructor() {
|
| 17 |
+
this.initializeAdapters();
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
private initializeAdapters(): void {
|
| 21 |
+
if (config.openai.apiKey) {
|
| 22 |
+
const openaiAdapter = new OpenAIAdapter(
|
| 23 |
+
config.openai.apiKey,
|
| 24 |
+
config.openai.defaultChatModel,
|
| 25 |
+
config.openai.defaultEmbeddingModel,
|
| 26 |
+
config.openai.defaultImageModel,
|
| 27 |
+
config.openai.defaultVoiceModel
|
| 28 |
+
);
|
| 29 |
+
this.adapters.set('openai', openaiAdapter);
|
| 30 |
+
this.adapters.set('gpt-4', openaiAdapter);
|
| 31 |
+
this.adapters.set('gpt-3.5-turbo', openaiAdapter);
|
| 32 |
+
this.adapters.set('gpt-4-turbo', openaiAdapter);
|
| 33 |
+
|
| 34 |
+
if (!this.defaultAdapter) {
|
| 35 |
+
this.defaultAdapter = openaiAdapter;
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
if (config.huggingface.apiKey) {
|
| 40 |
+
const hfAdapter = new HuggingFaceAdapter(config.huggingface.apiKey);
|
| 41 |
+
this.adapters.set('huggingface', hfAdapter);
|
| 42 |
+
this.adapters.set('mistral', hfAdapter);
|
| 43 |
+
|
| 44 |
+
if (!this.defaultAdapter) {
|
| 45 |
+
this.defaultAdapter = hfAdapter;
|
| 46 |
+
}
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
if (config.anthropic.apiKey) {
|
| 50 |
+
const anthropicAdapter = new AnthropicAdapter(config.anthropic.apiKey);
|
| 51 |
+
this.adapters.set('anthropic', anthropicAdapter);
|
| 52 |
+
this.adapters.set('claude', anthropicAdapter);
|
| 53 |
+
this.adapters.set('claude-3-sonnet', anthropicAdapter);
|
| 54 |
+
this.adapters.set('claude-3-opus', anthropicAdapter);
|
| 55 |
+
|
| 56 |
+
if (!this.defaultAdapter) {
|
| 57 |
+
this.defaultAdapter = anthropicAdapter;
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
const ollamaBaseUrl = process.env.OLLAMA_BASE_URL || 'http://localhost:11434';
|
| 62 |
+
const ollamaModel = process.env.OLLAMA_MODEL || 'llama2';
|
| 63 |
+
const ollamaEmbeddingModel = process.env.OLLAMA_EMBEDDING_MODEL || 'nomic-embed-text';
|
| 64 |
+
|
| 65 |
+
const ollamaAdapter = new OllamaAdapter(ollamaBaseUrl, ollamaModel, ollamaEmbeddingModel);
|
| 66 |
+
this.adapters.set('ollama', ollamaAdapter);
|
| 67 |
+
this.adapters.set('llama', ollamaAdapter);
|
| 68 |
+
this.adapters.set('llama2', ollamaAdapter);
|
| 69 |
+
this.adapters.set('llama3', ollamaAdapter);
|
| 70 |
+
this.adapters.set('mistral', ollamaAdapter);
|
| 71 |
+
this.adapters.set('phi', ollamaAdapter);
|
| 72 |
+
this.adapters.set('gemma', ollamaAdapter);
|
| 73 |
+
|
| 74 |
+
if (!this.defaultAdapter) {
|
| 75 |
+
this.defaultAdapter = ollamaAdapter;
|
| 76 |
+
logger.info('Using Ollama as default LLM provider');
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
if (!this.defaultAdapter) {
|
| 80 |
+
logger.warn('No LLM adapters initialized. Please configure at least one API key.');
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
private getAdapter(model?: string): LLMAdapter {
|
| 85 |
+
if (!model) {
|
| 86 |
+
if (!this.defaultAdapter) {
|
| 87 |
+
throw new Error('No LLM adapter available. Please configure API keys.');
|
| 88 |
+
}
|
| 89 |
+
return this.defaultAdapter;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
const lowerModel = model.toLowerCase();
|
| 93 |
+
|
| 94 |
+
for (const [key, adapter] of this.adapters.entries()) {
|
| 95 |
+
if (lowerModel.includes(key) || key.includes(lowerModel)) {
|
| 96 |
+
return adapter;
|
| 97 |
+
}
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
if (!this.defaultAdapter) {
|
| 101 |
+
throw new Error('No LLM adapter available. Please configure API keys.');
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
logger.warn(`Model ${model} not found, using default adapter`);
|
| 105 |
+
return this.defaultAdapter;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
async chat(messages: Message[], model?: string, options?: ChatOptions): Promise<ChatResponse> {
|
| 109 |
+
try {
|
| 110 |
+
const adapter = this.getAdapter(model);
|
| 111 |
+
|
| 112 |
+
logger.info('Generating chat completion', {
|
| 113 |
+
model: model || 'default',
|
| 114 |
+
messageCount: messages.length
|
| 115 |
+
});
|
| 116 |
+
|
| 117 |
+
const response = await adapter.generateCompletion(messages, options);
|
| 118 |
+
|
| 119 |
+
metrics.incrementModelUsage(response.model);
|
| 120 |
+
|
| 121 |
+
logger.info('Chat completion generated', {
|
| 122 |
+
model: response.model,
|
| 123 |
+
tokensUsed: response.usage.total_tokens,
|
| 124 |
+
});
|
| 125 |
+
|
| 126 |
+
return response;
|
| 127 |
+
} catch (error) {
|
| 128 |
+
logger.error('Error generating chat completion', {
|
| 129 |
+
error: error instanceof Error ? error.message : String(error),
|
| 130 |
+
model: model || 'default'
|
| 131 |
+
});
|
| 132 |
+
throw error;
|
| 133 |
+
}
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
async simpleQuery(query: string, model?: string, options?: ChatOptions): Promise<string> {
|
| 137 |
+
const messages: Message[] = [
|
| 138 |
+
{
|
| 139 |
+
role: 'system',
|
| 140 |
+
content: 'You are a helpful assistant. Provide clear, concise answers.',
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
role: 'user',
|
| 144 |
+
content: query,
|
| 145 |
+
},
|
| 146 |
+
];
|
| 147 |
+
|
| 148 |
+
const response = await this.chat(messages, model, options);
|
| 149 |
+
return response.reply;
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
async generateEmbedding(text: string | string[], model?: string) {
|
| 153 |
+
try {
|
| 154 |
+
const adapter = this.getAdapter(model);
|
| 155 |
+
|
| 156 |
+
logger.info('Generating embeddings', {
|
| 157 |
+
model: model || 'default',
|
| 158 |
+
textCount: Array.isArray(text) ? text.length : 1
|
| 159 |
+
});
|
| 160 |
+
|
| 161 |
+
const response = await adapter.generateEmbedding(text);
|
| 162 |
+
|
| 163 |
+
logger.info('Embeddings generated', {
|
| 164 |
+
model: response.model,
|
| 165 |
+
count: response.embeddings.length,
|
| 166 |
+
});
|
| 167 |
+
|
| 168 |
+
return response;
|
| 169 |
+
} catch (error) {
|
| 170 |
+
logger.error('Error generating embeddings', {
|
| 171 |
+
error: error instanceof Error ? error.message : String(error)
|
| 172 |
+
});
|
| 173 |
+
throw error;
|
| 174 |
+
}
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
getAvailableModels(): string[] {
|
| 178 |
+
return Array.from(this.adapters.keys());
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
async healthCheck(): Promise<{ provider: string; available: boolean }[]> {
|
| 182 |
+
const results: { provider: string; available: boolean }[] = [];
|
| 183 |
+
|
| 184 |
+
for (const [provider, adapter] of this.adapters.entries()) {
|
| 185 |
+
const available = await adapter.isAvailable();
|
| 186 |
+
results.push({ provider, available });
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
return results;
|
| 190 |
+
}
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
export const aiService = new AIService();
|
backend/services/document_service.ts
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { ragService } from './rag_service';
|
| 2 |
+
import { loadConfig } from '../types/config';
|
| 3 |
+
import { logger } from '../utils/logger';
|
| 4 |
+
import { metrics } from '../utils/metrics';
|
| 5 |
+
import crypto from 'crypto';
|
| 6 |
+
|
| 7 |
+
function uuidv4(): string {
|
| 8 |
+
return crypto.randomUUID();
|
| 9 |
+
}
|
| 10 |
+
import type {
|
| 11 |
+
DocumentUploadResponse,
|
| 12 |
+
DocumentMetadata,
|
| 13 |
+
DocumentChunk,
|
| 14 |
+
DocumentSource
|
| 15 |
+
} from '../types/models';
|
| 16 |
+
|
| 17 |
+
const config = loadConfig();
|
| 18 |
+
|
| 19 |
+
interface StoredDocument {
|
| 20 |
+
doc_id: string;
|
| 21 |
+
filename: string;
|
| 22 |
+
content: string;
|
| 23 |
+
metadata: DocumentMetadata;
|
| 24 |
+
chunks: DocumentChunk[];
|
| 25 |
+
status: 'processing' | 'completed' | 'failed';
|
| 26 |
+
error?: string;
|
| 27 |
+
created_at: number;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
class DocumentService {
|
| 31 |
+
private documents = new Map<string, StoredDocument>();
|
| 32 |
+
private processingQueue: string[] = [];
|
| 33 |
+
|
| 34 |
+
async uploadDocument(
|
| 35 |
+
filename: string,
|
| 36 |
+
content: Buffer,
|
| 37 |
+
metadata?: DocumentMetadata
|
| 38 |
+
): Promise<DocumentUploadResponse> {
|
| 39 |
+
try {
|
| 40 |
+
const doc_id = uuidv4();
|
| 41 |
+
const size_bytes = content.length;
|
| 42 |
+
|
| 43 |
+
const maxSize = config.documents.maxFileSizeMB * 1024 * 1024;
|
| 44 |
+
if (size_bytes > maxSize) {
|
| 45 |
+
throw new Error(`File size exceeds maximum of ${config.documents.maxFileSizeMB}MB`);
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
logger.info('Uploading document', { doc_id, filename, size_bytes });
|
| 49 |
+
|
| 50 |
+
const textContent = await this.extractText(filename, content);
|
| 51 |
+
|
| 52 |
+
const estimatedChunks = Math.ceil(textContent.length / config.documents.chunkSize);
|
| 53 |
+
|
| 54 |
+
const document: StoredDocument = {
|
| 55 |
+
doc_id,
|
| 56 |
+
filename,
|
| 57 |
+
content: textContent,
|
| 58 |
+
metadata: metadata || {},
|
| 59 |
+
chunks: [],
|
| 60 |
+
status: 'processing',
|
| 61 |
+
created_at: Date.now(),
|
| 62 |
+
};
|
| 63 |
+
|
| 64 |
+
this.documents.set(doc_id, document);
|
| 65 |
+
this.processingQueue.push(doc_id);
|
| 66 |
+
|
| 67 |
+
if (config.workers.enabled) {
|
| 68 |
+
this.processDocumentAsync(doc_id);
|
| 69 |
+
} else {
|
| 70 |
+
await this.processDocument(doc_id);
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
return {
|
| 74 |
+
doc_id,
|
| 75 |
+
filename,
|
| 76 |
+
size_bytes,
|
| 77 |
+
status: document.status,
|
| 78 |
+
estimated_chunks: estimatedChunks,
|
| 79 |
+
webhook_url: '/webhook/events',
|
| 80 |
+
};
|
| 81 |
+
} catch (error) {
|
| 82 |
+
logger.error('Error uploading document', {
|
| 83 |
+
error: error instanceof Error ? error.message : String(error),
|
| 84 |
+
filename,
|
| 85 |
+
});
|
| 86 |
+
throw error;
|
| 87 |
+
}
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
private async extractText(filename: string, content: Buffer): Promise<string> {
|
| 91 |
+
const extension = filename.split('.').pop()?.toLowerCase();
|
| 92 |
+
|
| 93 |
+
if (extension === 'txt') {
|
| 94 |
+
return content.toString('utf-8');
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
if (extension === 'pdf' || extension === 'docx') {
|
| 98 |
+
logger.warn(`${extension} parsing not implemented, treating as text`, { filename });
|
| 99 |
+
return content.toString('utf-8');
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
return content.toString('utf-8');
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
private async processDocument(doc_id: string): Promise<void> {
|
| 106 |
+
const document = this.documents.get(doc_id);
|
| 107 |
+
if (!document) {
|
| 108 |
+
logger.error('Document not found', { doc_id });
|
| 109 |
+
return;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
try {
|
| 113 |
+
logger.info('Processing document', { doc_id, filename: document.filename });
|
| 114 |
+
|
| 115 |
+
const chunks = this.chunkText(document.content, doc_id, document.metadata);
|
| 116 |
+
document.chunks = chunks;
|
| 117 |
+
|
| 118 |
+
const chunkData = chunks.map(chunk => ({
|
| 119 |
+
id: chunk.chunk_id,
|
| 120 |
+
content: chunk.content,
|
| 121 |
+
metadata: {
|
| 122 |
+
doc_id: chunk.doc_id,
|
| 123 |
+
chunk_index: chunk.chunk_index,
|
| 124 |
+
total_chunks: chunk.total_chunks,
|
| 125 |
+
...chunk.metadata,
|
| 126 |
+
},
|
| 127 |
+
}));
|
| 128 |
+
|
| 129 |
+
await ragService.addDocumentChunks(chunkData);
|
| 130 |
+
|
| 131 |
+
document.status = 'completed';
|
| 132 |
+
metrics.incrementDocumentsProcessed();
|
| 133 |
+
|
| 134 |
+
logger.info('Document processed successfully', {
|
| 135 |
+
doc_id,
|
| 136 |
+
chunksCreated: chunks.length,
|
| 137 |
+
});
|
| 138 |
+
} catch (error) {
|
| 139 |
+
document.status = 'failed';
|
| 140 |
+
document.error = error instanceof Error ? error.message : String(error);
|
| 141 |
+
|
| 142 |
+
logger.error('Error processing document', {
|
| 143 |
+
error: document.error,
|
| 144 |
+
doc_id,
|
| 145 |
+
});
|
| 146 |
+
}
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
private async processDocumentAsync(doc_id: string): Promise<void> {
|
| 150 |
+
setTimeout(async () => {
|
| 151 |
+
await this.processDocument(doc_id);
|
| 152 |
+
}, 100);
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
private chunkText(
|
| 156 |
+
text: string,
|
| 157 |
+
doc_id: string,
|
| 158 |
+
metadata: DocumentMetadata
|
| 159 |
+
): DocumentChunk[] {
|
| 160 |
+
const chunkSize = config.documents.chunkSize;
|
| 161 |
+
const overlap = config.documents.chunkOverlap;
|
| 162 |
+
const chunks: DocumentChunk[] = [];
|
| 163 |
+
|
| 164 |
+
let start = 0;
|
| 165 |
+
let chunkIndex = 0;
|
| 166 |
+
|
| 167 |
+
while (start < text.length) {
|
| 168 |
+
const end = Math.min(start + chunkSize, text.length);
|
| 169 |
+
const content = text.slice(start, end);
|
| 170 |
+
|
| 171 |
+
const chunk_id = `${doc_id}_chunk_${chunkIndex}`;
|
| 172 |
+
|
| 173 |
+
chunks.push({
|
| 174 |
+
chunk_id,
|
| 175 |
+
doc_id,
|
| 176 |
+
content,
|
| 177 |
+
metadata,
|
| 178 |
+
chunk_index: chunkIndex,
|
| 179 |
+
total_chunks: 0,
|
| 180 |
+
});
|
| 181 |
+
|
| 182 |
+
start += chunkSize - overlap;
|
| 183 |
+
chunkIndex++;
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
const totalChunks = chunks.length;
|
| 187 |
+
chunks.forEach(chunk => {
|
| 188 |
+
chunk.total_chunks = totalChunks;
|
| 189 |
+
});
|
| 190 |
+
|
| 191 |
+
return chunks;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
async getDocumentSources(doc_id: string): Promise<DocumentSource[]> {
|
| 195 |
+
const document = this.documents.get(doc_id);
|
| 196 |
+
if (!document) {
|
| 197 |
+
throw new Error('Document not found');
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
return document.chunks.map(chunk => ({
|
| 201 |
+
doc_id: chunk.doc_id,
|
| 202 |
+
chunk_id: chunk.chunk_id,
|
| 203 |
+
content: chunk.content,
|
| 204 |
+
score: 1.0,
|
| 205 |
+
metadata: chunk.metadata,
|
| 206 |
+
}));
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
async getDocumentStatus(doc_id: string): Promise<DocumentUploadResponse> {
|
| 210 |
+
const document = this.documents.get(doc_id);
|
| 211 |
+
if (!document) {
|
| 212 |
+
throw new Error('Document not found');
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
return {
|
| 216 |
+
doc_id: document.doc_id,
|
| 217 |
+
filename: document.filename,
|
| 218 |
+
size_bytes: document.content.length,
|
| 219 |
+
status: document.status,
|
| 220 |
+
estimated_chunks: document.chunks.length,
|
| 221 |
+
error: document.error,
|
| 222 |
+
};
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
async deleteDocument(doc_id: string): Promise<void> {
|
| 226 |
+
const document = this.documents.get(doc_id);
|
| 227 |
+
if (!document) {
|
| 228 |
+
throw new Error('Document not found');
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
await ragService.deleteDocument(doc_id);
|
| 232 |
+
this.documents.delete(doc_id);
|
| 233 |
+
|
| 234 |
+
logger.info('Document deleted', { doc_id });
|
| 235 |
+
}
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
export const documentService = new DocumentService();
|
backend/services/image_service.ts
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { OpenAIAdapter } from '../adapters/openai_adapter';
|
| 2 |
+
import { HuggingFaceAdapter } from '../adapters/huggingface_adapter';
|
| 3 |
+
import { loadConfig } from '../types/config';
|
| 4 |
+
import { logger } from '../utils/logger';
|
| 5 |
+
import { metrics } from '../utils/metrics';
|
| 6 |
+
import type { ImageAdapter, ImageGenerationRequest, ImageGenerationResponse } from '../types/models';
|
| 7 |
+
|
| 8 |
+
const config = loadConfig();
|
| 9 |
+
|
| 10 |
+
class ImageService {
|
| 11 |
+
private adapters: Map<string, ImageAdapter> = new Map();
|
| 12 |
+
private defaultAdapter: ImageAdapter | null = null;
|
| 13 |
+
|
| 14 |
+
constructor() {
|
| 15 |
+
this.initializeAdapters();
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
private initializeAdapters(): void {
|
| 19 |
+
if (config.openai.apiKey) {
|
| 20 |
+
const openaiAdapter = new OpenAIAdapter(
|
| 21 |
+
config.openai.apiKey,
|
| 22 |
+
config.openai.defaultChatModel,
|
| 23 |
+
config.openai.defaultEmbeddingModel,
|
| 24 |
+
config.openai.defaultImageModel
|
| 25 |
+
);
|
| 26 |
+
this.adapters.set('openai', openaiAdapter);
|
| 27 |
+
this.adapters.set('dall-e', openaiAdapter);
|
| 28 |
+
this.adapters.set('dall-e-2', openaiAdapter);
|
| 29 |
+
this.adapters.set('dall-e-3', openaiAdapter);
|
| 30 |
+
|
| 31 |
+
if (!this.defaultAdapter) {
|
| 32 |
+
this.defaultAdapter = openaiAdapter;
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
if (config.huggingface.apiKey) {
|
| 37 |
+
const hfAdapter = new HuggingFaceAdapter(config.huggingface.apiKey);
|
| 38 |
+
this.adapters.set('huggingface', hfAdapter);
|
| 39 |
+
this.adapters.set('stable-diffusion', hfAdapter);
|
| 40 |
+
this.adapters.set('sdxl', hfAdapter);
|
| 41 |
+
|
| 42 |
+
if (!this.defaultAdapter) {
|
| 43 |
+
this.defaultAdapter = hfAdapter;
|
| 44 |
+
}
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
if (!this.defaultAdapter) {
|
| 48 |
+
logger.warn('No image generation adapters initialized. Please configure API keys.');
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
private getAdapter(model?: string): ImageAdapter {
|
| 53 |
+
if (!model) {
|
| 54 |
+
if (!this.defaultAdapter) {
|
| 55 |
+
throw new Error('No image adapter available. Please configure API keys.');
|
| 56 |
+
}
|
| 57 |
+
return this.defaultAdapter;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
const lowerModel = model.toLowerCase();
|
| 61 |
+
|
| 62 |
+
for (const [key, adapter] of this.adapters.entries()) {
|
| 63 |
+
if (lowerModel.includes(key) || key.includes(lowerModel)) {
|
| 64 |
+
return adapter;
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
if (!this.defaultAdapter) {
|
| 69 |
+
throw new Error('No image adapter available. Please configure API keys.');
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
logger.warn(`Model ${model} not found, using default adapter`);
|
| 73 |
+
return this.defaultAdapter;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
async generate(request: ImageGenerationRequest): Promise<ImageGenerationResponse> {
|
| 77 |
+
try {
|
| 78 |
+
const adapter = this.getAdapter(request.model);
|
| 79 |
+
|
| 80 |
+
logger.info('Generating image', {
|
| 81 |
+
prompt: request.prompt.substring(0, 100),
|
| 82 |
+
model: request.model || 'default',
|
| 83 |
+
size: request.size,
|
| 84 |
+
n: request.n,
|
| 85 |
+
});
|
| 86 |
+
|
| 87 |
+
const response = await adapter.generateImage(request.prompt, request);
|
| 88 |
+
|
| 89 |
+
metrics.incrementModelUsage(response.model);
|
| 90 |
+
|
| 91 |
+
logger.info('Image generated successfully', {
|
| 92 |
+
model: response.model,
|
| 93 |
+
imageCount: response.images.length,
|
| 94 |
+
});
|
| 95 |
+
|
| 96 |
+
return response;
|
| 97 |
+
} catch (error) {
|
| 98 |
+
logger.error('Error generating image', {
|
| 99 |
+
error: error instanceof Error ? error.message : String(error),
|
| 100 |
+
model: request.model || 'default',
|
| 101 |
+
});
|
| 102 |
+
throw error;
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
getAvailableModels(): string[] {
|
| 107 |
+
return Array.from(this.adapters.keys());
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
async healthCheck(): Promise<{ provider: string; available: boolean }[]> {
|
| 111 |
+
const results: { provider: string; available: boolean }[] = [];
|
| 112 |
+
|
| 113 |
+
for (const [provider, adapter] of this.adapters.entries()) {
|
| 114 |
+
const available = await adapter.isAvailable();
|
| 115 |
+
results.push({ provider, available });
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
return results;
|
| 119 |
+
}
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
export const imageService = new ImageService();
|
backend/services/rag_service.ts
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { aiService } from './ai_service';
|
| 2 |
+
import { PineconeAdapter, InMemoryVectorDB } from '../adapters/vector_db_adapter';
|
| 3 |
+
import { loadConfig } from '../types/config';
|
| 4 |
+
import { logger } from '../utils/logger';
|
| 5 |
+
import { metrics } from '../utils/metrics';
|
| 6 |
+
import type {
|
| 7 |
+
RAGQueryRequest,
|
| 8 |
+
RAGQueryResponse,
|
| 9 |
+
DocumentSource,
|
| 10 |
+
VectorDBAdapter
|
| 11 |
+
} from '../types/models';
|
| 12 |
+
|
| 13 |
+
const config = loadConfig();
|
| 14 |
+
|
| 15 |
+
class RAGService {
|
| 16 |
+
private vectorDB: VectorDBAdapter;
|
| 17 |
+
|
| 18 |
+
constructor() {
|
| 19 |
+
this.vectorDB = this.initializeVectorDB();
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
private initializeVectorDB(): VectorDBAdapter {
|
| 23 |
+
if (config.pinecone.apiKey) {
|
| 24 |
+
logger.info('Initializing Pinecone vector DB');
|
| 25 |
+
return new PineconeAdapter(
|
| 26 |
+
config.pinecone.apiKey,
|
| 27 |
+
config.pinecone.indexName
|
| 28 |
+
);
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
logger.warn('Pinecone not configured, using in-memory vector DB');
|
| 32 |
+
return new InMemoryVectorDB();
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
async query(request: RAGQueryRequest): Promise<RAGQueryResponse> {
|
| 36 |
+
const startTime = Date.now();
|
| 37 |
+
|
| 38 |
+
try {
|
| 39 |
+
logger.info('Processing RAG query', {
|
| 40 |
+
query: request.query.substring(0, 100),
|
| 41 |
+
topK: request.top_k || 5
|
| 42 |
+
});
|
| 43 |
+
|
| 44 |
+
let sources: DocumentSource[] = [];
|
| 45 |
+
let contextPrompt = request.query;
|
| 46 |
+
|
| 47 |
+
if (request.use_retrieval !== false) {
|
| 48 |
+
const embeddingResponse = await aiService.generateEmbedding(request.query);
|
| 49 |
+
const queryVector = embeddingResponse.embeddings[0];
|
| 50 |
+
|
| 51 |
+
metrics.incrementVectorDbQueries();
|
| 52 |
+
|
| 53 |
+
const results = await this.vectorDB.query(
|
| 54 |
+
queryVector,
|
| 55 |
+
request.top_k || 5,
|
| 56 |
+
request.filters
|
| 57 |
+
);
|
| 58 |
+
|
| 59 |
+
sources = results.map(result => ({
|
| 60 |
+
doc_id: result.metadata.doc_id || result.id,
|
| 61 |
+
chunk_id: result.id,
|
| 62 |
+
content: result.metadata.content || '',
|
| 63 |
+
score: result.score,
|
| 64 |
+
metadata: result.metadata,
|
| 65 |
+
}));
|
| 66 |
+
|
| 67 |
+
if (sources.length > 0) {
|
| 68 |
+
const context = sources
|
| 69 |
+
.map(s => `[Source: ${s.doc_id}]\n${s.content}`)
|
| 70 |
+
.join('\n\n');
|
| 71 |
+
|
| 72 |
+
contextPrompt = this.buildRAGPrompt(request.query, context);
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
const messages = [
|
| 77 |
+
{
|
| 78 |
+
role: 'system' as const,
|
| 79 |
+
content: 'You are a helpful assistant. Answer questions based on the provided context. If the context doesn\'t contain relevant information, say so.',
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
role: 'user' as const,
|
| 83 |
+
content: contextPrompt,
|
| 84 |
+
},
|
| 85 |
+
];
|
| 86 |
+
|
| 87 |
+
const chatResponse = await aiService.chat(messages, request.model);
|
| 88 |
+
|
| 89 |
+
const retrievalTimeMs = Date.now() - startTime;
|
| 90 |
+
|
| 91 |
+
logger.info('RAG query completed', {
|
| 92 |
+
sourcesFound: sources.length,
|
| 93 |
+
retrievalTimeMs,
|
| 94 |
+
model: chatResponse.model,
|
| 95 |
+
});
|
| 96 |
+
|
| 97 |
+
return {
|
| 98 |
+
answer: chatResponse.reply,
|
| 99 |
+
sources,
|
| 100 |
+
model: chatResponse.model,
|
| 101 |
+
usage: chatResponse.usage,
|
| 102 |
+
retrieval_time_ms: retrievalTimeMs,
|
| 103 |
+
};
|
| 104 |
+
} catch (error) {
|
| 105 |
+
logger.error('Error processing RAG query', {
|
| 106 |
+
error: error instanceof Error ? error.message : String(error),
|
| 107 |
+
});
|
| 108 |
+
throw error;
|
| 109 |
+
}
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
async addDocumentChunks(chunks: Array<{
|
| 113 |
+
id: string;
|
| 114 |
+
content: string;
|
| 115 |
+
metadata: Record<string, any>;
|
| 116 |
+
}>): Promise<void> {
|
| 117 |
+
try {
|
| 118 |
+
logger.info('Adding document chunks to vector DB', { count: chunks.length });
|
| 119 |
+
|
| 120 |
+
const texts = chunks.map(c => c.content);
|
| 121 |
+
const embeddingResponse = await aiService.generateEmbedding(texts);
|
| 122 |
+
|
| 123 |
+
const vectors = chunks.map((chunk, index) => ({
|
| 124 |
+
id: chunk.id,
|
| 125 |
+
values: embeddingResponse.embeddings[index],
|
| 126 |
+
metadata: {
|
| 127 |
+
...chunk.metadata,
|
| 128 |
+
content: chunk.content,
|
| 129 |
+
},
|
| 130 |
+
}));
|
| 131 |
+
|
| 132 |
+
await this.vectorDB.upsert(vectors);
|
| 133 |
+
|
| 134 |
+
logger.info('Document chunks added successfully', { count: chunks.length });
|
| 135 |
+
} catch (error) {
|
| 136 |
+
logger.error('Error adding document chunks', {
|
| 137 |
+
error: error instanceof Error ? error.message : String(error),
|
| 138 |
+
});
|
| 139 |
+
throw error;
|
| 140 |
+
}
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
async deleteDocument(docId: string): Promise<void> {
|
| 144 |
+
try {
|
| 145 |
+
logger.info('Deleting document from vector DB', { docId });
|
| 146 |
+
|
| 147 |
+
const results = await this.vectorDB.query([], 10000, { doc_id: docId });
|
| 148 |
+
const chunkIds = results.map(r => r.id);
|
| 149 |
+
|
| 150 |
+
if (chunkIds.length > 0) {
|
| 151 |
+
await this.vectorDB.delete(chunkIds);
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
logger.info('Document deleted successfully', { docId, chunksDeleted: chunkIds.length });
|
| 155 |
+
} catch (error) {
|
| 156 |
+
logger.error('Error deleting document', {
|
| 157 |
+
error: error instanceof Error ? error.message : String(error),
|
| 158 |
+
docId,
|
| 159 |
+
});
|
| 160 |
+
throw error;
|
| 161 |
+
}
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
private buildRAGPrompt(query: string, context: string): string {
|
| 165 |
+
return `Context information is below:
|
| 166 |
+
---
|
| 167 |
+
${context}
|
| 168 |
+
---
|
| 169 |
+
|
| 170 |
+
Based on the context above, please answer the following question. If the context doesn't contain enough information to answer the question, please say so.
|
| 171 |
+
|
| 172 |
+
Question: ${query}
|
| 173 |
+
|
| 174 |
+
Answer:`;
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
async healthCheck(): Promise<boolean> {
|
| 178 |
+
return await this.vectorDB.isAvailable();
|
| 179 |
+
}
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
export const ragService = new RAGService();
|
backend/services/voice_service.ts
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { OpenAIAdapter } from '../adapters/openai_adapter';
|
| 2 |
+
import { loadConfig } from '../types/config';
|
| 3 |
+
import { logger } from '../utils/logger';
|
| 4 |
+
import { metrics } from '../utils/metrics';
|
| 5 |
+
import type {
|
| 6 |
+
VoiceAdapter,
|
| 7 |
+
VoiceSynthesisRequest,
|
| 8 |
+
VoiceSynthesisResponse,
|
| 9 |
+
TranscriptionRequest,
|
| 10 |
+
TranscriptionResponse
|
| 11 |
+
} from '../types/models';
|
| 12 |
+
|
| 13 |
+
const config = loadConfig();
|
| 14 |
+
|
| 15 |
+
class VoiceService {
|
| 16 |
+
private adapters: Map<string, VoiceAdapter> = new Map();
|
| 17 |
+
private defaultAdapter: VoiceAdapter | null = null;
|
| 18 |
+
|
| 19 |
+
constructor() {
|
| 20 |
+
this.initializeAdapters();
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
private initializeAdapters(): void {
|
| 24 |
+
if (config.openai.apiKey) {
|
| 25 |
+
const openaiAdapter = new OpenAIAdapter(
|
| 26 |
+
config.openai.apiKey,
|
| 27 |
+
config.openai.defaultChatModel,
|
| 28 |
+
config.openai.defaultEmbeddingModel,
|
| 29 |
+
config.openai.defaultImageModel,
|
| 30 |
+
config.openai.defaultVoiceModel
|
| 31 |
+
);
|
| 32 |
+
this.adapters.set('openai', openaiAdapter);
|
| 33 |
+
this.adapters.set('tts-1', openaiAdapter);
|
| 34 |
+
this.adapters.set('tts-1-hd', openaiAdapter);
|
| 35 |
+
this.adapters.set('whisper', openaiAdapter);
|
| 36 |
+
|
| 37 |
+
if (!this.defaultAdapter) {
|
| 38 |
+
this.defaultAdapter = openaiAdapter;
|
| 39 |
+
}
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
if (!this.defaultAdapter) {
|
| 43 |
+
logger.warn('No voice synthesis adapters initialized. Please configure API keys.');
|
| 44 |
+
}
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
private getAdapter(model?: string): VoiceAdapter {
|
| 48 |
+
if (!model) {
|
| 49 |
+
if (!this.defaultAdapter) {
|
| 50 |
+
throw new Error('No voice adapter available. Please configure API keys.');
|
| 51 |
+
}
|
| 52 |
+
return this.defaultAdapter;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
const lowerModel = model.toLowerCase();
|
| 56 |
+
|
| 57 |
+
for (const [key, adapter] of this.adapters.entries()) {
|
| 58 |
+
if (lowerModel.includes(key) || key.includes(lowerModel)) {
|
| 59 |
+
return adapter;
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
if (!this.defaultAdapter) {
|
| 64 |
+
throw new Error('No voice adapter available. Please configure API keys.');
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
logger.warn(`Model ${model} not found, using default adapter`);
|
| 68 |
+
return this.defaultAdapter;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
async synthesize(request: VoiceSynthesisRequest): Promise<VoiceSynthesisResponse> {
|
| 72 |
+
try {
|
| 73 |
+
const adapter = this.getAdapter(request.model);
|
| 74 |
+
|
| 75 |
+
logger.info('Synthesizing speech', {
|
| 76 |
+
textLength: request.text.length,
|
| 77 |
+
voice: request.voice || 'default',
|
| 78 |
+
model: request.model || 'default',
|
| 79 |
+
});
|
| 80 |
+
|
| 81 |
+
const response = await adapter.synthesize(request.text, request);
|
| 82 |
+
|
| 83 |
+
metrics.incrementModelUsage(request.model || 'tts-1');
|
| 84 |
+
|
| 85 |
+
logger.info('Speech synthesized successfully', {
|
| 86 |
+
voice: response.voice,
|
| 87 |
+
format: response.format,
|
| 88 |
+
sizeBytes: response.size_bytes,
|
| 89 |
+
});
|
| 90 |
+
|
| 91 |
+
return response;
|
| 92 |
+
} catch (error) {
|
| 93 |
+
logger.error('Error synthesizing speech', {
|
| 94 |
+
error: error instanceof Error ? error.message : String(error),
|
| 95 |
+
model: request.model || 'default',
|
| 96 |
+
});
|
| 97 |
+
throw error;
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
async transcribe(audio: Buffer, request: TranscriptionRequest): Promise<TranscriptionResponse> {
|
| 102 |
+
try {
|
| 103 |
+
const adapter = this.getAdapter(request.model);
|
| 104 |
+
|
| 105 |
+
if (!adapter.transcribe) {
|
| 106 |
+
throw new Error('Transcription not supported by this adapter');
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
logger.info('Transcribing audio', {
|
| 110 |
+
model: request.model || 'default',
|
| 111 |
+
language: request.language,
|
| 112 |
+
});
|
| 113 |
+
|
| 114 |
+
const response = await adapter.transcribe(audio, request);
|
| 115 |
+
|
| 116 |
+
metrics.incrementModelUsage(request.model || 'whisper-1');
|
| 117 |
+
|
| 118 |
+
logger.info('Audio transcribed successfully', {
|
| 119 |
+
textLength: response.text.length,
|
| 120 |
+
language: response.language,
|
| 121 |
+
});
|
| 122 |
+
|
| 123 |
+
return response;
|
| 124 |
+
} catch (error) {
|
| 125 |
+
logger.error('Error transcribing audio', {
|
| 126 |
+
error: error instanceof Error ? error.message : String(error),
|
| 127 |
+
model: request.model || 'default',
|
| 128 |
+
});
|
| 129 |
+
throw error;
|
| 130 |
+
}
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
getAvailableModels(): string[] {
|
| 134 |
+
return Array.from(this.adapters.keys());
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
async healthCheck(): Promise<{ provider: string; available: boolean }[]> {
|
| 138 |
+
const results: { provider: string; available: boolean }[] = [];
|
| 139 |
+
|
| 140 |
+
for (const [provider, adapter] of this.adapters.entries()) {
|
| 141 |
+
const available = await adapter.isAvailable();
|
| 142 |
+
results.push({ provider, available });
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
return results;
|
| 146 |
+
}
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
export const voiceService = new VoiceService();
|
backend/tsconfig.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"$schema": "https://json.schemastore.org/tsconfig",
|
| 3 |
+
"compilerOptions": {
|
| 4 |
+
/* Basic Options */
|
| 5 |
+
"lib": ["ES2022"],
|
| 6 |
+
"target": "ES2022",
|
| 7 |
+
"module": "ES2022",
|
| 8 |
+
"types": ["node"],
|
| 9 |
+
"paths": {
|
| 10 |
+
"~encore/*": ["./encore.gen/*"]
|
| 11 |
+
},
|
| 12 |
+
|
| 13 |
+
/* Workspace Settings */
|
| 14 |
+
"composite": true,
|
| 15 |
+
|
| 16 |
+
/* Strict Type-Checking Options */
|
| 17 |
+
"strict": true,
|
| 18 |
+
|
| 19 |
+
/* Module Resolution Options */
|
| 20 |
+
"moduleResolution": "bundler",
|
| 21 |
+
"allowSyntheticDefaultImports": true,
|
| 22 |
+
"isolatedModules": true,
|
| 23 |
+
"sourceMap": true,
|
| 24 |
+
|
| 25 |
+
"declaration": true,
|
| 26 |
+
|
| 27 |
+
/* Advanced Options */
|
| 28 |
+
"forceConsistentCasingInFileNames": true,
|
| 29 |
+
"skipLibCheck": true
|
| 30 |
+
}
|
| 31 |
+
}
|
backend/types/config.ts
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export interface AppConfig {
|
| 2 |
+
openai: {
|
| 3 |
+
apiKey: string;
|
| 4 |
+
defaultChatModel: string;
|
| 5 |
+
defaultEmbeddingModel: string;
|
| 6 |
+
defaultImageModel: string;
|
| 7 |
+
defaultVoiceModel: string;
|
| 8 |
+
};
|
| 9 |
+
huggingface: {
|
| 10 |
+
apiKey: string;
|
| 11 |
+
defaultModel: string;
|
| 12 |
+
};
|
| 13 |
+
anthropic: {
|
| 14 |
+
apiKey: string;
|
| 15 |
+
defaultModel: string;
|
| 16 |
+
};
|
| 17 |
+
pinecone: {
|
| 18 |
+
apiKey: string;
|
| 19 |
+
environment: string;
|
| 20 |
+
indexName: string;
|
| 21 |
+
};
|
| 22 |
+
auth: {
|
| 23 |
+
apiKeys: string[];
|
| 24 |
+
adminApiKeys: string[];
|
| 25 |
+
};
|
| 26 |
+
rateLimit: {
|
| 27 |
+
default: number;
|
| 28 |
+
premium: number;
|
| 29 |
+
admin: number;
|
| 30 |
+
};
|
| 31 |
+
documents: {
|
| 32 |
+
maxFileSizeMB: number;
|
| 33 |
+
chunkSize: number;
|
| 34 |
+
chunkOverlap: number;
|
| 35 |
+
};
|
| 36 |
+
workers: {
|
| 37 |
+
enabled: boolean;
|
| 38 |
+
concurrency: number;
|
| 39 |
+
};
|
| 40 |
+
server: {
|
| 41 |
+
port: number;
|
| 42 |
+
logLevel: string;
|
| 43 |
+
corsOrigins: string[];
|
| 44 |
+
};
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
export function loadConfig(): AppConfig {
|
| 48 |
+
return {
|
| 49 |
+
openai: {
|
| 50 |
+
apiKey: process.env.OPENAI_API_KEY || '',
|
| 51 |
+
defaultChatModel: process.env.DEFAULT_CHAT_MODEL || 'gpt-3.5-turbo',
|
| 52 |
+
defaultEmbeddingModel: process.env.DEFAULT_EMBEDDING_MODEL || 'text-embedding-ada-002',
|
| 53 |
+
defaultImageModel: process.env.DEFAULT_IMAGE_MODEL || 'dall-e-3',
|
| 54 |
+
defaultVoiceModel: process.env.DEFAULT_VOICE_MODEL || 'tts-1',
|
| 55 |
+
},
|
| 56 |
+
huggingface: {
|
| 57 |
+
apiKey: process.env.HUGGINGFACE_API_KEY || '',
|
| 58 |
+
defaultModel: process.env.HF_DEFAULT_MODEL || 'mistralai/Mistral-7B-Instruct-v0.1',
|
| 59 |
+
},
|
| 60 |
+
anthropic: {
|
| 61 |
+
apiKey: process.env.ANTHROPIC_API_KEY || '',
|
| 62 |
+
defaultModel: process.env.ANTHROPIC_DEFAULT_MODEL || 'claude-3-sonnet-20240229',
|
| 63 |
+
},
|
| 64 |
+
pinecone: {
|
| 65 |
+
apiKey: process.env.PINECONE_API_KEY || '',
|
| 66 |
+
environment: process.env.PINECONE_ENVIRONMENT || 'us-west1-gcp',
|
| 67 |
+
indexName: process.env.PINECONE_INDEX_NAME || 'ai-api-vectors',
|
| 68 |
+
},
|
| 69 |
+
auth: {
|
| 70 |
+
apiKeys: (process.env.API_KEYS || 'demo-key-1,demo-key-2').split(',').map(k => k.trim()),
|
| 71 |
+
adminApiKeys: (process.env.ADMIN_API_KEYS || '').split(',').map(k => k.trim()).filter(Boolean),
|
| 72 |
+
},
|
| 73 |
+
rateLimit: {
|
| 74 |
+
default: parseInt(process.env.RATE_LIMIT_DEFAULT || '60', 10),
|
| 75 |
+
premium: parseInt(process.env.RATE_LIMIT_PREMIUM || '300', 10),
|
| 76 |
+
admin: parseInt(process.env.RATE_LIMIT_ADMIN || '1000', 10),
|
| 77 |
+
},
|
| 78 |
+
documents: {
|
| 79 |
+
maxFileSizeMB: parseInt(process.env.MAX_FILE_SIZE_MB || '10', 10),
|
| 80 |
+
chunkSize: parseInt(process.env.CHUNK_SIZE || '1000', 10),
|
| 81 |
+
chunkOverlap: parseInt(process.env.CHUNK_OVERLAP || '200', 10),
|
| 82 |
+
},
|
| 83 |
+
workers: {
|
| 84 |
+
enabled: process.env.ENABLE_BACKGROUND_WORKERS === 'true',
|
| 85 |
+
concurrency: parseInt(process.env.WORKER_CONCURRENCY || '5', 10),
|
| 86 |
+
},
|
| 87 |
+
server: {
|
| 88 |
+
port: parseInt(process.env.PORT || '8000', 10),
|
| 89 |
+
logLevel: process.env.LOG_LEVEL || 'info',
|
| 90 |
+
corsOrigins: (process.env.CORS_ORIGINS || 'http://localhost:3000').split(',').map(o => o.trim()),
|
| 91 |
+
},
|
| 92 |
+
};
|
| 93 |
+
}
|
backend/types/models.ts
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export interface Message {
|
| 2 |
+
role: 'system' | 'user' | 'assistant';
|
| 3 |
+
content: string;
|
| 4 |
+
timestamp?: number;
|
| 5 |
+
}
|
| 6 |
+
|
| 7 |
+
export interface ChatRequest {
|
| 8 |
+
conversation: Message[];
|
| 9 |
+
model?: string;
|
| 10 |
+
options?: ChatOptions;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
export interface ChatOptions {
|
| 14 |
+
temperature?: number;
|
| 15 |
+
max_tokens?: number;
|
| 16 |
+
top_p?: number;
|
| 17 |
+
frequency_penalty?: number;
|
| 18 |
+
presence_penalty?: number;
|
| 19 |
+
stop?: string[];
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
export interface ChatResponse {
|
| 23 |
+
reply: string;
|
| 24 |
+
model: string;
|
| 25 |
+
usage: TokenUsage;
|
| 26 |
+
sources?: DocumentSource[] | null;
|
| 27 |
+
conversation_id?: string;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
export interface TokenUsage {
|
| 31 |
+
prompt_tokens: number;
|
| 32 |
+
completion_tokens: number;
|
| 33 |
+
total_tokens: number;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
export interface RAGQueryRequest {
|
| 37 |
+
query: string;
|
| 38 |
+
top_k?: number;
|
| 39 |
+
model?: string;
|
| 40 |
+
use_retrieval?: boolean;
|
| 41 |
+
filters?: Record<string, any>;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
export interface RAGQueryResponse {
|
| 45 |
+
answer: string;
|
| 46 |
+
sources: DocumentSource[];
|
| 47 |
+
model: string;
|
| 48 |
+
usage: TokenUsage;
|
| 49 |
+
retrieval_time_ms?: number;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
export interface DocumentSource {
|
| 53 |
+
doc_id: string;
|
| 54 |
+
chunk_id: string;
|
| 55 |
+
content: string;
|
| 56 |
+
score: number;
|
| 57 |
+
metadata?: Record<string, any>;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
export interface ImageGenerationRequest {
|
| 61 |
+
prompt: string;
|
| 62 |
+
model?: string;
|
| 63 |
+
size?: '256x256' | '512x512' | '1024x1024' | '1792x1024' | '1024x1792';
|
| 64 |
+
n?: number;
|
| 65 |
+
quality?: 'standard' | 'hd';
|
| 66 |
+
style?: 'vivid' | 'natural';
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
export interface ImageGenerationResponse {
|
| 70 |
+
images: GeneratedImage[];
|
| 71 |
+
model: string;
|
| 72 |
+
created: number;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
export interface GeneratedImage {
|
| 76 |
+
url: string;
|
| 77 |
+
revised_prompt?: string;
|
| 78 |
+
b64_json?: string;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
export interface VoiceSynthesisRequest {
|
| 82 |
+
text: string;
|
| 83 |
+
voice?: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
|
| 84 |
+
model?: string;
|
| 85 |
+
format?: 'mp3' | 'opus' | 'aac' | 'flac';
|
| 86 |
+
speed?: number;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
export interface VoiceSynthesisResponse {
|
| 90 |
+
audio_url: string;
|
| 91 |
+
voice: string;
|
| 92 |
+
format: string;
|
| 93 |
+
duration_ms?: number;
|
| 94 |
+
size_bytes?: number;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
export interface TranscriptionRequest {
|
| 98 |
+
audio_url: string;
|
| 99 |
+
model?: string;
|
| 100 |
+
language?: string;
|
| 101 |
+
prompt?: string;
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
export interface TranscriptionResponse {
|
| 105 |
+
text: string;
|
| 106 |
+
language: string;
|
| 107 |
+
duration: number;
|
| 108 |
+
model: string;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
export interface DocumentUploadRequest {
|
| 112 |
+
filename: string;
|
| 113 |
+
content: Buffer;
|
| 114 |
+
metadata?: DocumentMetadata;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
export interface DocumentMetadata {
|
| 118 |
+
title?: string;
|
| 119 |
+
author?: string;
|
| 120 |
+
category?: string;
|
| 121 |
+
tags?: string[];
|
| 122 |
+
[key: string]: any;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
export interface DocumentUploadResponse {
|
| 126 |
+
doc_id: string;
|
| 127 |
+
filename: string;
|
| 128 |
+
size_bytes: number;
|
| 129 |
+
status: 'processing' | 'completed' | 'failed';
|
| 130 |
+
estimated_chunks?: number;
|
| 131 |
+
webhook_url?: string;
|
| 132 |
+
error?: string;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
export interface DocumentChunk {
|
| 136 |
+
chunk_id: string;
|
| 137 |
+
doc_id: string;
|
| 138 |
+
content: string;
|
| 139 |
+
embedding?: number[];
|
| 140 |
+
metadata: DocumentMetadata;
|
| 141 |
+
chunk_index: number;
|
| 142 |
+
total_chunks: number;
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
export interface HealthCheckResponse {
|
| 146 |
+
status: 'healthy' | 'degraded' | 'unhealthy';
|
| 147 |
+
timestamp: number;
|
| 148 |
+
version: string;
|
| 149 |
+
services: ServiceHealth[];
|
| 150 |
+
uptime_seconds: number;
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
export interface ServiceHealth {
|
| 154 |
+
name: string;
|
| 155 |
+
status: 'up' | 'down' | 'degraded';
|
| 156 |
+
latency_ms?: number;
|
| 157 |
+
error?: string;
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
export interface MetricsResponse {
|
| 161 |
+
timestamp: number;
|
| 162 |
+
requests_total: number;
|
| 163 |
+
requests_by_endpoint: Record<string, number>;
|
| 164 |
+
errors_total: number;
|
| 165 |
+
rate_limit_hits: number;
|
| 166 |
+
active_connections: number;
|
| 167 |
+
average_response_time_ms: number;
|
| 168 |
+
model_usage: Record<string, number>;
|
| 169 |
+
vector_db_queries: number;
|
| 170 |
+
documents_processed: number;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
export interface ApiKeyInfo {
|
| 174 |
+
key_hash: string;
|
| 175 |
+
tier: 'default' | 'premium' | 'admin';
|
| 176 |
+
rate_limit: number;
|
| 177 |
+
created_at: number;
|
| 178 |
+
last_used?: number;
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
export interface RateLimitInfo {
|
| 182 |
+
limit: number;
|
| 183 |
+
remaining: number;
|
| 184 |
+
reset_at: number;
|
| 185 |
+
tier: string;
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
export interface WebhookEvent {
|
| 189 |
+
event_type: 'document.ingestion.completed' | 'document.ingestion.failed';
|
| 190 |
+
doc_id: string;
|
| 191 |
+
timestamp: number;
|
| 192 |
+
data: {
|
| 193 |
+
chunks_created?: number;
|
| 194 |
+
error?: string;
|
| 195 |
+
status: string;
|
| 196 |
+
};
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
export interface EmbeddingRequest {
|
| 200 |
+
text: string | string[];
|
| 201 |
+
model?: string;
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
export interface EmbeddingResponse {
|
| 205 |
+
embeddings: number[][];
|
| 206 |
+
model: string;
|
| 207 |
+
usage: TokenUsage;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
export interface VectorSearchRequest {
|
| 211 |
+
query_vector: number[];
|
| 212 |
+
top_k: number;
|
| 213 |
+
filter?: Record<string, any>;
|
| 214 |
+
namespace?: string;
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
export interface VectorSearchResult {
|
| 218 |
+
id: string;
|
| 219 |
+
score: number;
|
| 220 |
+
metadata: Record<string, any>;
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
export type ModelProvider = 'openai' | 'huggingface' | 'anthropic' | 'local';
|
| 224 |
+
|
| 225 |
+
export interface ModelConfig {
|
| 226 |
+
provider: ModelProvider;
|
| 227 |
+
model_name: string;
|
| 228 |
+
api_key?: string;
|
| 229 |
+
max_tokens?: number;
|
| 230 |
+
temperature?: number;
|
| 231 |
+
endpoint?: string;
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
export interface LLMAdapter {
|
| 235 |
+
generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse>;
|
| 236 |
+
generateEmbedding(text: string | string[]): Promise<EmbeddingResponse>;
|
| 237 |
+
isAvailable(): Promise<boolean>;
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
export interface VectorDBAdapter {
|
| 241 |
+
upsert(vectors: { id: string; values: number[]; metadata: Record<string, any> }[]): Promise<void>;
|
| 242 |
+
query(queryVector: number[], topK: number, filter?: Record<string, any>): Promise<VectorSearchResult[]>;
|
| 243 |
+
delete(ids: string[]): Promise<void>;
|
| 244 |
+
isAvailable(): Promise<boolean>;
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
export interface ImageAdapter {
|
| 248 |
+
generateImage(prompt: string, options?: Partial<ImageGenerationRequest>): Promise<ImageGenerationResponse>;
|
| 249 |
+
isAvailable(): Promise<boolean>;
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
export interface VoiceAdapter {
|
| 253 |
+
synthesize(text: string, options?: Partial<VoiceSynthesisRequest>): Promise<VoiceSynthesisResponse>;
|
| 254 |
+
transcribe?(audio: Buffer, options?: Partial<TranscriptionRequest>): Promise<TranscriptionResponse>;
|
| 255 |
+
isAvailable(): Promise<boolean>;
|
| 256 |
+
}
|
backend/utils/auth.ts
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Header as HeaderType } from "encore.dev/api";
|
| 2 |
+
import { loadConfig } from "../types/config";
|
| 3 |
+
import type { ApiKeyInfo } from "../types/models";
|
| 4 |
+
|
| 5 |
+
const config = loadConfig();
|
| 6 |
+
|
| 7 |
+
export interface AuthData {
|
| 8 |
+
apiKey: string;
|
| 9 |
+
tier: 'default' | 'premium' | 'admin';
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
export function auth(): HeaderType<"authorization"> {
|
| 13 |
+
return "" as HeaderType<"authorization">;
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
export function validateApiKey(authHeader?: string): AuthData {
|
| 17 |
+
if (!authHeader) {
|
| 18 |
+
throw new Error('Missing Authorization header');
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
const apiKey = authHeader.replace(/^Bearer\s+/i, '').trim();
|
| 22 |
+
|
| 23 |
+
if (!apiKey) {
|
| 24 |
+
throw new Error('Invalid Authorization header format');
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
if (config.auth.adminApiKeys.includes(apiKey)) {
|
| 28 |
+
return { apiKey, tier: 'admin' };
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
if (config.auth.apiKeys.includes(apiKey)) {
|
| 32 |
+
return { apiKey, tier: 'default' };
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
throw new Error('Invalid API key');
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
export function getApiKeyInfo(apiKey: string): ApiKeyInfo {
|
| 39 |
+
let tier: 'default' | 'premium' | 'admin' = 'default';
|
| 40 |
+
if (config.auth.adminApiKeys.includes(apiKey)) {
|
| 41 |
+
tier = 'admin';
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
let rateLimit = config.rateLimit.default;
|
| 45 |
+
if (tier === 'admin') {
|
| 46 |
+
rateLimit = config.rateLimit.admin;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
return {
|
| 50 |
+
key_hash: hashApiKey(apiKey),
|
| 51 |
+
tier,
|
| 52 |
+
rate_limit: rateLimit,
|
| 53 |
+
created_at: Date.now(),
|
| 54 |
+
};
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
function hashApiKey(apiKey: string): string {
|
| 58 |
+
let hash = 0;
|
| 59 |
+
for (let i = 0; i < apiKey.length; i++) {
|
| 60 |
+
const char = apiKey.charCodeAt(i);
|
| 61 |
+
hash = ((hash << 5) - hash) + char;
|
| 62 |
+
hash = hash & hash;
|
| 63 |
+
}
|
| 64 |
+
return Math.abs(hash).toString(16);
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
export function requireAuth(authHeader?: string): AuthData {
|
| 68 |
+
return validateApiKey(authHeader);
|
| 69 |
+
}
|
backend/utils/logger.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
| 2 |
+
|
| 3 |
+
class Logger {
|
| 4 |
+
private level: LogLevel;
|
| 5 |
+
|
| 6 |
+
constructor(level: LogLevel = 'info') {
|
| 7 |
+
this.level = level;
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
private shouldLog(level: LogLevel): boolean {
|
| 11 |
+
const levels: LogLevel[] = ['debug', 'info', 'warn', 'error'];
|
| 12 |
+
return levels.indexOf(level) >= levels.indexOf(this.level);
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
private log(level: LogLevel, message: string, meta?: Record<string, any>): void {
|
| 16 |
+
if (!this.shouldLog(level)) return;
|
| 17 |
+
|
| 18 |
+
const timestamp = new Date().toISOString();
|
| 19 |
+
const logEntry = {
|
| 20 |
+
timestamp,
|
| 21 |
+
level,
|
| 22 |
+
message,
|
| 23 |
+
...meta,
|
| 24 |
+
};
|
| 25 |
+
|
| 26 |
+
console.log(JSON.stringify(logEntry));
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
debug(message: string, meta?: Record<string, any>): void {
|
| 30 |
+
this.log('debug', message, meta);
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
info(message: string, meta?: Record<string, any>): void {
|
| 34 |
+
this.log('info', message, meta);
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
warn(message: string, meta?: Record<string, any>): void {
|
| 38 |
+
this.log('warn', message, meta);
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
error(message: string, meta?: Record<string, any>): void {
|
| 42 |
+
this.log('error', message, meta);
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
export const logger = new Logger(
|
| 47 |
+
(process.env.LOG_LEVEL as LogLevel) || 'info'
|
| 48 |
+
);
|
backend/utils/metrics.ts
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { MetricsResponse } from '../types/models';
|
| 2 |
+
|
| 3 |
+
class MetricsCollector {
|
| 4 |
+
private startTime = Date.now();
|
| 5 |
+
private requestsTotal = 0;
|
| 6 |
+
private requestsByEndpoint = new Map<string, number>();
|
| 7 |
+
private errorsTotal = 0;
|
| 8 |
+
private rateLimitHits = 0;
|
| 9 |
+
private responseTimes: number[] = [];
|
| 10 |
+
private modelUsage = new Map<string, number>();
|
| 11 |
+
private vectorDbQueries = 0;
|
| 12 |
+
private documentsProcessed = 0;
|
| 13 |
+
|
| 14 |
+
incrementRequests(endpoint: string): void {
|
| 15 |
+
this.requestsTotal++;
|
| 16 |
+
const count = this.requestsByEndpoint.get(endpoint) || 0;
|
| 17 |
+
this.requestsByEndpoint.set(endpoint, count + 1);
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
incrementErrors(): void {
|
| 21 |
+
this.errorsTotal++;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
incrementRateLimitHits(): void {
|
| 25 |
+
this.rateLimitHits++;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
recordResponseTime(timeMs: number): void {
|
| 29 |
+
this.responseTimes.push(timeMs);
|
| 30 |
+
if (this.responseTimes.length > 1000) {
|
| 31 |
+
this.responseTimes.shift();
|
| 32 |
+
}
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
incrementModelUsage(model: string): void {
|
| 36 |
+
const count = this.modelUsage.get(model) || 0;
|
| 37 |
+
this.modelUsage.set(model, count + 1);
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
incrementVectorDbQueries(): void {
|
| 41 |
+
this.vectorDbQueries++;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
incrementDocumentsProcessed(): void {
|
| 45 |
+
this.documentsProcessed++;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
getMetrics(): MetricsResponse {
|
| 49 |
+
const avgResponseTime = this.responseTimes.length > 0
|
| 50 |
+
? this.responseTimes.reduce((a, b) => a + b, 0) / this.responseTimes.length
|
| 51 |
+
: 0;
|
| 52 |
+
|
| 53 |
+
const requestsByEndpoint: Record<string, number> = {};
|
| 54 |
+
for (const [endpoint, count] of this.requestsByEndpoint.entries()) {
|
| 55 |
+
requestsByEndpoint[endpoint] = count;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
const modelUsageObj: Record<string, number> = {};
|
| 59 |
+
for (const [model, count] of this.modelUsage.entries()) {
|
| 60 |
+
modelUsageObj[model] = count;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
return {
|
| 64 |
+
timestamp: Date.now(),
|
| 65 |
+
requests_total: this.requestsTotal,
|
| 66 |
+
requests_by_endpoint: requestsByEndpoint,
|
| 67 |
+
errors_total: this.errorsTotal,
|
| 68 |
+
rate_limit_hits: this.rateLimitHits,
|
| 69 |
+
active_connections: 0,
|
| 70 |
+
average_response_time_ms: Math.round(avgResponseTime),
|
| 71 |
+
model_usage: modelUsageObj,
|
| 72 |
+
vector_db_queries: this.vectorDbQueries,
|
| 73 |
+
documents_processed: this.documentsProcessed,
|
| 74 |
+
};
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
reset(): void {
|
| 78 |
+
this.startTime = Date.now();
|
| 79 |
+
this.requestsTotal = 0;
|
| 80 |
+
this.requestsByEndpoint.clear();
|
| 81 |
+
this.errorsTotal = 0;
|
| 82 |
+
this.rateLimitHits = 0;
|
| 83 |
+
this.responseTimes = [];
|
| 84 |
+
this.modelUsage.clear();
|
| 85 |
+
this.vectorDbQueries = 0;
|
| 86 |
+
this.documentsProcessed = 0;
|
| 87 |
+
}
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
export const metrics = new MetricsCollector();
|
backend/utils/rate_limit.ts
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { loadConfig } from "../types/config";
|
| 2 |
+
import type { RateLimitInfo } from "../types/models";
|
| 3 |
+
|
| 4 |
+
const config = loadConfig();
|
| 5 |
+
|
| 6 |
+
interface RateLimitBucket {
|
| 7 |
+
tokens: number;
|
| 8 |
+
lastRefill: number;
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
class RateLimiter {
|
| 12 |
+
private buckets = new Map<string, RateLimitBucket>();
|
| 13 |
+
private readonly refillInterval = 60000;
|
| 14 |
+
|
| 15 |
+
checkRateLimit(apiKey: string, tier: 'default' | 'premium' | 'admin'): RateLimitInfo {
|
| 16 |
+
const limit = this.getLimitForTier(tier);
|
| 17 |
+
const now = Date.now();
|
| 18 |
+
|
| 19 |
+
let bucket = this.buckets.get(apiKey);
|
| 20 |
+
|
| 21 |
+
if (!bucket) {
|
| 22 |
+
bucket = {
|
| 23 |
+
tokens: limit,
|
| 24 |
+
lastRefill: now,
|
| 25 |
+
};
|
| 26 |
+
this.buckets.set(apiKey, bucket);
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
const timeSinceRefill = now - bucket.lastRefill;
|
| 30 |
+
if (timeSinceRefill >= this.refillInterval) {
|
| 31 |
+
bucket.tokens = limit;
|
| 32 |
+
bucket.lastRefill = now;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
if (bucket.tokens <= 0) {
|
| 36 |
+
const resetAt = bucket.lastRefill + this.refillInterval;
|
| 37 |
+
throw {
|
| 38 |
+
statusCode: 429,
|
| 39 |
+
message: 'Rate limit exceeded',
|
| 40 |
+
limit,
|
| 41 |
+
remaining: 0,
|
| 42 |
+
resetAt,
|
| 43 |
+
};
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
bucket.tokens -= 1;
|
| 47 |
+
|
| 48 |
+
const resetAt = bucket.lastRefill + this.refillInterval;
|
| 49 |
+
|
| 50 |
+
return {
|
| 51 |
+
limit,
|
| 52 |
+
remaining: bucket.tokens,
|
| 53 |
+
reset_at: resetAt,
|
| 54 |
+
tier,
|
| 55 |
+
};
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
private getLimitForTier(tier: 'default' | 'premium' | 'admin'): number {
|
| 59 |
+
switch (tier) {
|
| 60 |
+
case 'admin':
|
| 61 |
+
return config.rateLimit.admin;
|
| 62 |
+
case 'premium':
|
| 63 |
+
return config.rateLimit.premium;
|
| 64 |
+
default:
|
| 65 |
+
return config.rateLimit.default;
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
getRateLimitInfo(apiKey: string, tier: 'default' | 'premium' | 'admin'): RateLimitInfo {
|
| 70 |
+
const limit = this.getLimitForTier(tier);
|
| 71 |
+
const bucket = this.buckets.get(apiKey);
|
| 72 |
+
|
| 73 |
+
if (!bucket) {
|
| 74 |
+
return {
|
| 75 |
+
limit,
|
| 76 |
+
remaining: limit,
|
| 77 |
+
reset_at: Date.now() + this.refillInterval,
|
| 78 |
+
tier,
|
| 79 |
+
};
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
return {
|
| 83 |
+
limit,
|
| 84 |
+
remaining: bucket.tokens,
|
| 85 |
+
reset_at: bucket.lastRefill + this.refillInterval,
|
| 86 |
+
tier,
|
| 87 |
+
};
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
cleanup(): void {
|
| 91 |
+
const now = Date.now();
|
| 92 |
+
const maxAge = this.refillInterval * 2;
|
| 93 |
+
|
| 94 |
+
for (const [key, bucket] of this.buckets.entries()) {
|
| 95 |
+
if (now - bucket.lastRefill > maxAge) {
|
| 96 |
+
this.buckets.delete(key);
|
| 97 |
+
}
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
export const rateLimiter = new RateLimiter();
|
| 103 |
+
|
| 104 |
+
setInterval(() => {
|
| 105 |
+
rateLimiter.cleanup();
|
| 106 |
+
}, 300000);
|
| 107 |
+
|
| 108 |
+
export function checkRateLimit(apiKey: string, tier: 'default' | 'premium' | 'admin'): RateLimitInfo {
|
| 109 |
+
return rateLimiter.checkRateLimit(apiKey, tier);
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
export function getRateLimitInfo(apiKey: string, tier: 'default' | 'premium' | 'admin'): RateLimitInfo {
|
| 113 |
+
return rateLimiter.getRateLimitInfo(apiKey, tier);
|
| 114 |
+
}
|
backend/vite-env.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
/// <reference types="vite/client" />
|
backend/workers/ingestion_worker.ts
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { logger } from '../utils/logger';
|
| 2 |
+
import type { WebhookEvent } from '../types/models';
|
| 3 |
+
|
| 4 |
+
interface IngestionJob {
|
| 5 |
+
doc_id: string;
|
| 6 |
+
filename: string;
|
| 7 |
+
status: 'pending' | 'processing' | 'completed' | 'failed';
|
| 8 |
+
created_at: number;
|
| 9 |
+
completed_at?: number;
|
| 10 |
+
error?: string;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
class IngestionWorker {
|
| 14 |
+
private jobs = new Map<string, IngestionJob>();
|
| 15 |
+
private isRunning = false;
|
| 16 |
+
private concurrency: number;
|
| 17 |
+
|
| 18 |
+
constructor(concurrency = 5) {
|
| 19 |
+
this.concurrency = concurrency;
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
async start(): Promise<void> {
|
| 23 |
+
if (this.isRunning) {
|
| 24 |
+
logger.warn('Ingestion worker already running');
|
| 25 |
+
return;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
this.isRunning = true;
|
| 29 |
+
logger.info('Ingestion worker started', { concurrency: this.concurrency });
|
| 30 |
+
|
| 31 |
+
this.processQueue();
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
async stop(): Promise<void> {
|
| 35 |
+
this.isRunning = false;
|
| 36 |
+
logger.info('Ingestion worker stopped');
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
async addJob(doc_id: string, filename: string): Promise<void> {
|
| 40 |
+
const job: IngestionJob = {
|
| 41 |
+
doc_id,
|
| 42 |
+
filename,
|
| 43 |
+
status: 'pending',
|
| 44 |
+
created_at: Date.now(),
|
| 45 |
+
};
|
| 46 |
+
|
| 47 |
+
this.jobs.set(doc_id, job);
|
| 48 |
+
logger.info('Job added to ingestion queue', { doc_id, filename });
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
private async processQueue(): Promise<void> {
|
| 52 |
+
while (this.isRunning) {
|
| 53 |
+
const pendingJobs = Array.from(this.jobs.values())
|
| 54 |
+
.filter(job => job.status === 'pending')
|
| 55 |
+
.slice(0, this.concurrency);
|
| 56 |
+
|
| 57 |
+
if (pendingJobs.length === 0) {
|
| 58 |
+
await this.sleep(1000);
|
| 59 |
+
continue;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
await Promise.all(
|
| 63 |
+
pendingJobs.map(job => this.processJob(job))
|
| 64 |
+
);
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
private async processJob(job: IngestionJob): Promise<void> {
|
| 69 |
+
try {
|
| 70 |
+
job.status = 'processing';
|
| 71 |
+
logger.info('Processing ingestion job', { doc_id: job.doc_id });
|
| 72 |
+
|
| 73 |
+
await this.sleep(Math.random() * 2000 + 1000);
|
| 74 |
+
|
| 75 |
+
job.status = 'completed';
|
| 76 |
+
job.completed_at = Date.now();
|
| 77 |
+
|
| 78 |
+
logger.info('Ingestion job completed', { doc_id: job.doc_id });
|
| 79 |
+
|
| 80 |
+
await this.sendWebhook({
|
| 81 |
+
event_type: 'document.ingestion.completed',
|
| 82 |
+
doc_id: job.doc_id,
|
| 83 |
+
timestamp: Date.now(),
|
| 84 |
+
data: {
|
| 85 |
+
chunks_created: Math.floor(Math.random() * 20) + 5,
|
| 86 |
+
status: 'completed',
|
| 87 |
+
},
|
| 88 |
+
});
|
| 89 |
+
} catch (error) {
|
| 90 |
+
job.status = 'failed';
|
| 91 |
+
job.error = error instanceof Error ? error.message : String(error);
|
| 92 |
+
job.completed_at = Date.now();
|
| 93 |
+
|
| 94 |
+
logger.error('Ingestion job failed', {
|
| 95 |
+
doc_id: job.doc_id,
|
| 96 |
+
error: job.error,
|
| 97 |
+
});
|
| 98 |
+
|
| 99 |
+
await this.sendWebhook({
|
| 100 |
+
event_type: 'document.ingestion.failed',
|
| 101 |
+
doc_id: job.doc_id,
|
| 102 |
+
timestamp: Date.now(),
|
| 103 |
+
data: {
|
| 104 |
+
error: job.error,
|
| 105 |
+
status: 'failed',
|
| 106 |
+
},
|
| 107 |
+
});
|
| 108 |
+
}
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
private async sendWebhook(event: WebhookEvent): Promise<void> {
|
| 112 |
+
logger.info('Webhook event', event);
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
private sleep(ms: number): Promise<void> {
|
| 116 |
+
return new Promise(resolve => setTimeout(resolve, ms));
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
getJobStatus(doc_id: string): IngestionJob | undefined {
|
| 120 |
+
return this.jobs.get(doc_id);
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
getAllJobs(): IngestionJob[] {
|
| 124 |
+
return Array.from(this.jobs.values());
|
| 125 |
+
}
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
export const ingestionWorker = new IngestionWorker();
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
api:
|
| 5 |
+
build: .
|
| 6 |
+
ports:
|
| 7 |
+
- "8000:8000"
|
| 8 |
+
environment:
|
| 9 |
+
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
| 10 |
+
- HUGGINGFACE_API_KEY=${HUGGINGFACE_API_KEY}
|
| 11 |
+
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
|
| 12 |
+
- PINECONE_API_KEY=${PINECONE_API_KEY}
|
| 13 |
+
- PINECONE_ENVIRONMENT=${PINECONE_ENVIRONMENT:-us-west1-gcp}
|
| 14 |
+
- PINECONE_INDEX_NAME=${PINECONE_INDEX_NAME:-ai-api-vectors}
|
| 15 |
+
- API_KEYS=${API_KEYS:-demo-key-1,demo-key-2}
|
| 16 |
+
- ADMIN_API_KEYS=${ADMIN_API_KEYS}
|
| 17 |
+
- RATE_LIMIT_DEFAULT=${RATE_LIMIT_DEFAULT:-60}
|
| 18 |
+
- RATE_LIMIT_PREMIUM=${RATE_LIMIT_PREMIUM:-300}
|
| 19 |
+
- RATE_LIMIT_ADMIN=${RATE_LIMIT_ADMIN:-1000}
|
| 20 |
+
- DEFAULT_CHAT_MODEL=${DEFAULT_CHAT_MODEL:-gpt-3.5-turbo}
|
| 21 |
+
- DEFAULT_EMBEDDING_MODEL=${DEFAULT_EMBEDDING_MODEL:-text-embedding-ada-002}
|
| 22 |
+
- DEFAULT_IMAGE_MODEL=${DEFAULT_IMAGE_MODEL:-dall-e-3}
|
| 23 |
+
- DEFAULT_VOICE_MODEL=${DEFAULT_VOICE_MODEL:-tts-1}
|
| 24 |
+
- MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-10}
|
| 25 |
+
- CHUNK_SIZE=${CHUNK_SIZE:-1000}
|
| 26 |
+
- CHUNK_OVERLAP=${CHUNK_OVERLAP:-200}
|
| 27 |
+
- ENABLE_BACKGROUND_WORKERS=${ENABLE_BACKGROUND_WORKERS:-true}
|
| 28 |
+
- WORKER_CONCURRENCY=${WORKER_CONCURRENCY:-5}
|
| 29 |
+
- LOG_LEVEL=${LOG_LEVEL:-info}
|
| 30 |
+
- CORS_ORIGINS=${CORS_ORIGINS:-http://localhost:3000}
|
| 31 |
+
volumes:
|
| 32 |
+
- ./data:/app/data
|
| 33 |
+
restart: unless-stopped
|
| 34 |
+
healthcheck:
|
| 35 |
+
test: ["CMD", "wget", "--spider", "-q", "http://localhost:8000/health"]
|
| 36 |
+
interval: 30s
|
| 37 |
+
timeout: 10s
|
| 38 |
+
retries: 3
|
| 39 |
+
start_period: 40s
|
| 40 |
+
|
| 41 |
+
redis:
|
| 42 |
+
image: redis:7-alpine
|
| 43 |
+
ports:
|
| 44 |
+
- "6379:6379"
|
| 45 |
+
volumes:
|
| 46 |
+
- redis_data:/data
|
| 47 |
+
restart: unless-stopped
|
| 48 |
+
command: redis-server --appendonly yes
|
| 49 |
+
|
| 50 |
+
volumes:
|
| 51 |
+
redis_data:
|
examples/curl.sh
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
API_URL="http://localhost:8000"
|
| 4 |
+
API_KEY="demo-key-1"
|
| 5 |
+
|
| 6 |
+
echo "=== AI API Service - Example Requests ==="
|
| 7 |
+
echo ""
|
| 8 |
+
|
| 9 |
+
echo "1. Health Check"
|
| 10 |
+
echo "==============="
|
| 11 |
+
curl -s "${API_URL}/health" | jq .
|
| 12 |
+
echo ""
|
| 13 |
+
echo ""
|
| 14 |
+
|
| 15 |
+
echo "2. Verify API Key"
|
| 16 |
+
echo "================="
|
| 17 |
+
curl -s -X POST "${API_URL}/auth/verify" \
|
| 18 |
+
-H "Authorization: Bearer ${API_KEY}" | jq .
|
| 19 |
+
echo ""
|
| 20 |
+
echo ""
|
| 21 |
+
|
| 22 |
+
echo "3. Simple Query"
|
| 23 |
+
echo "==============="
|
| 24 |
+
curl -s "${API_URL}/ai/query?q=What%20is%20machine%20learning%3F" \
|
| 25 |
+
-H "Authorization: Bearer ${API_KEY}" | jq .
|
| 26 |
+
echo ""
|
| 27 |
+
echo ""
|
| 28 |
+
|
| 29 |
+
echo "4. Chat Conversation"
|
| 30 |
+
echo "===================="
|
| 31 |
+
curl -s -X POST "${API_URL}/ai/chat" \
|
| 32 |
+
-H "Authorization: Bearer ${API_KEY}" \
|
| 33 |
+
-H "Content-Type: application/json" \
|
| 34 |
+
-d '{
|
| 35 |
+
"conversation": [
|
| 36 |
+
{
|
| 37 |
+
"role": "user",
|
| 38 |
+
"content": "Explain quantum computing in simple terms"
|
| 39 |
+
}
|
| 40 |
+
],
|
| 41 |
+
"options": {
|
| 42 |
+
"temperature": 0.7,
|
| 43 |
+
"max_tokens": 200
|
| 44 |
+
}
|
| 45 |
+
}' | jq .
|
| 46 |
+
echo ""
|
| 47 |
+
echo ""
|
| 48 |
+
|
| 49 |
+
echo "5. RAG Query (with retrieval)"
|
| 50 |
+
echo "============================="
|
| 51 |
+
curl -s -X POST "${API_URL}/rag/query" \
|
| 52 |
+
-H "Authorization: Bearer ${API_KEY}" \
|
| 53 |
+
-H "Content-Type: application/json" \
|
| 54 |
+
-d '{
|
| 55 |
+
"query": "What are the key features?",
|
| 56 |
+
"top_k": 5,
|
| 57 |
+
"use_retrieval": true
|
| 58 |
+
}' | jq .
|
| 59 |
+
echo ""
|
| 60 |
+
echo ""
|
| 61 |
+
|
| 62 |
+
echo "6. Image Generation"
|
| 63 |
+
echo "==================="
|
| 64 |
+
curl -s -X POST "${API_URL}/image/generate" \
|
| 65 |
+
-H "Authorization: Bearer ${API_KEY}" \
|
| 66 |
+
-H "Content-Type: application/json" \
|
| 67 |
+
-d '{
|
| 68 |
+
"prompt": "A serene mountain landscape at sunset",
|
| 69 |
+
"size": "1024x1024",
|
| 70 |
+
"n": 1
|
| 71 |
+
}' | jq .
|
| 72 |
+
echo ""
|
| 73 |
+
echo ""
|
| 74 |
+
|
| 75 |
+
echo "7. Voice Synthesis"
|
| 76 |
+
echo "=================="
|
| 77 |
+
curl -s -X POST "${API_URL}/voice/synthesize" \
|
| 78 |
+
-H "Authorization: Bearer ${API_KEY}" \
|
| 79 |
+
-H "Content-Type: application/json" \
|
| 80 |
+
-d '{
|
| 81 |
+
"text": "Hello, this is a test of the voice synthesis system.",
|
| 82 |
+
"voice": "alloy",
|
| 83 |
+
"format": "mp3"
|
| 84 |
+
}' | jq .
|
| 85 |
+
echo ""
|
| 86 |
+
echo ""
|
| 87 |
+
|
| 88 |
+
echo "8. Document Upload"
|
| 89 |
+
echo "=================="
|
| 90 |
+
CONTENT=$(echo "This is a sample document for testing." | base64)
|
| 91 |
+
curl -s -X POST "${API_URL}/upload" \
|
| 92 |
+
-H "Authorization: Bearer ${API_KEY}" \
|
| 93 |
+
-H "Content-Type: application/json" \
|
| 94 |
+
-d "{
|
| 95 |
+
\"filename\": \"sample.txt\",
|
| 96 |
+
\"content_base64\": \"${CONTENT}\",
|
| 97 |
+
\"metadata\": {
|
| 98 |
+
\"title\": \"Sample Document\",
|
| 99 |
+
\"category\": \"test\"
|
| 100 |
+
}
|
| 101 |
+
}" | jq .
|
| 102 |
+
echo ""
|
| 103 |
+
echo ""
|
| 104 |
+
|
| 105 |
+
echo "9. Get Metrics"
|
| 106 |
+
echo "=============="
|
| 107 |
+
curl -s "${API_URL}/metrics" \
|
| 108 |
+
-H "Authorization: Bearer ${API_KEY}" | jq .
|
| 109 |
+
echo ""
|
| 110 |
+
echo ""
|
| 111 |
+
|
| 112 |
+
echo "10. Get Available Models"
|
| 113 |
+
echo "======================="
|
| 114 |
+
curl -s "${API_URL}/rag/models" \
|
| 115 |
+
-H "Authorization: Bearer ${API_KEY}" | jq .
|
| 116 |
+
echo ""
|
examples/js_client.js
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const API_URL = 'http://localhost:8000';
|
| 2 |
+
const API_KEY = 'demo-key-1';
|
| 3 |
+
|
| 4 |
+
class AIAPIClient {
|
| 5 |
+
constructor(apiUrl, apiKey) {
|
| 6 |
+
this.apiUrl = apiUrl;
|
| 7 |
+
this.apiKey = apiKey;
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
async request(endpoint, options = {}) {
|
| 11 |
+
const url = `${this.apiUrl}${endpoint}`;
|
| 12 |
+
const headers = {
|
| 13 |
+
'Authorization': `Bearer ${this.apiKey}`,
|
| 14 |
+
'Content-Type': 'application/json',
|
| 15 |
+
...options.headers,
|
| 16 |
+
};
|
| 17 |
+
|
| 18 |
+
const response = await fetch(url, {
|
| 19 |
+
...options,
|
| 20 |
+
headers,
|
| 21 |
+
});
|
| 22 |
+
|
| 23 |
+
if (!response.ok) {
|
| 24 |
+
const error = await response.json();
|
| 25 |
+
throw new Error(`API Error: ${error.message || response.statusText}`);
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
return response.json();
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
async healthCheck() {
|
| 32 |
+
return this.request('/health', { method: 'GET' });
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
async verifyApiKey() {
|
| 36 |
+
return this.request('/auth/verify', { method: 'POST' });
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
async chat(conversation, model = null, options = {}) {
|
| 40 |
+
return this.request('/ai/chat', {
|
| 41 |
+
method: 'POST',
|
| 42 |
+
body: JSON.stringify({
|
| 43 |
+
conversation,
|
| 44 |
+
model,
|
| 45 |
+
options,
|
| 46 |
+
}),
|
| 47 |
+
});
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
async simpleQuery(query, model = null) {
|
| 51 |
+
const params = new URLSearchParams({ q: query });
|
| 52 |
+
if (model) params.append('model', model);
|
| 53 |
+
|
| 54 |
+
return this.request(`/ai/query?${params}`, { method: 'GET' });
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
async ragQuery(query, topK = 5, model = null, useRetrieval = true, filters = null) {
|
| 58 |
+
return this.request('/rag/query', {
|
| 59 |
+
method: 'POST',
|
| 60 |
+
body: JSON.stringify({
|
| 61 |
+
query,
|
| 62 |
+
top_k: topK,
|
| 63 |
+
model,
|
| 64 |
+
use_retrieval: useRetrieval,
|
| 65 |
+
filters,
|
| 66 |
+
}),
|
| 67 |
+
});
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
async generateImage(prompt, options = {}) {
|
| 71 |
+
return this.request('/image/generate', {
|
| 72 |
+
method: 'POST',
|
| 73 |
+
body: JSON.stringify({
|
| 74 |
+
prompt,
|
| 75 |
+
...options,
|
| 76 |
+
}),
|
| 77 |
+
});
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
async synthesizeVoice(text, voice = 'alloy', format = 'mp3', speed = 1.0) {
|
| 81 |
+
return this.request('/voice/synthesize', {
|
| 82 |
+
method: 'POST',
|
| 83 |
+
body: JSON.stringify({
|
| 84 |
+
text,
|
| 85 |
+
voice,
|
| 86 |
+
format,
|
| 87 |
+
speed,
|
| 88 |
+
}),
|
| 89 |
+
});
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
async transcribeAudio(audioBase64, model = null, language = null) {
|
| 93 |
+
return this.request('/voice/transcribe', {
|
| 94 |
+
method: 'POST',
|
| 95 |
+
body: JSON.stringify({
|
| 96 |
+
audio_base64: audioBase64,
|
| 97 |
+
model,
|
| 98 |
+
language,
|
| 99 |
+
}),
|
| 100 |
+
});
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
async uploadDocument(filename, contentBase64, metadata = {}) {
|
| 104 |
+
return this.request('/upload', {
|
| 105 |
+
method: 'POST',
|
| 106 |
+
body: JSON.stringify({
|
| 107 |
+
filename,
|
| 108 |
+
content_base64: contentBase64,
|
| 109 |
+
metadata,
|
| 110 |
+
}),
|
| 111 |
+
});
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
async getDocumentSources(docId) {
|
| 115 |
+
return this.request(`/docs/${docId}/sources`, { method: 'GET' });
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
async getMetrics() {
|
| 119 |
+
return this.request('/metrics', { method: 'GET' });
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
async getAvailableModels() {
|
| 123 |
+
return this.request('/rag/models', { method: 'GET' });
|
| 124 |
+
}
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
async function main() {
|
| 128 |
+
const client = new AIAPIClient(API_URL, API_KEY);
|
| 129 |
+
|
| 130 |
+
try {
|
| 131 |
+
console.log('=== AI API Client Examples ===\n');
|
| 132 |
+
|
| 133 |
+
console.log('1. Health Check');
|
| 134 |
+
const health = await client.healthCheck();
|
| 135 |
+
console.log(JSON.stringify(health, null, 2));
|
| 136 |
+
console.log('\n');
|
| 137 |
+
|
| 138 |
+
console.log('2. Simple Query');
|
| 139 |
+
const queryResult = await client.simpleQuery('What is artificial intelligence?');
|
| 140 |
+
console.log(JSON.stringify(queryResult, null, 2));
|
| 141 |
+
console.log('\n');
|
| 142 |
+
|
| 143 |
+
console.log('3. Chat Conversation');
|
| 144 |
+
const chatResult = await client.chat([
|
| 145 |
+
{ role: 'user', content: 'Tell me a fun fact about space' }
|
| 146 |
+
], null, { temperature: 0.8, max_tokens: 150 });
|
| 147 |
+
console.log(JSON.stringify(chatResult, null, 2));
|
| 148 |
+
console.log('\n');
|
| 149 |
+
|
| 150 |
+
console.log('4. RAG Query');
|
| 151 |
+
const ragResult = await client.ragQuery(
|
| 152 |
+
'What are the main features?',
|
| 153 |
+
5,
|
| 154 |
+
null,
|
| 155 |
+
true
|
| 156 |
+
);
|
| 157 |
+
console.log(JSON.stringify(ragResult, null, 2));
|
| 158 |
+
console.log('\n');
|
| 159 |
+
|
| 160 |
+
console.log('5. Image Generation');
|
| 161 |
+
const imageResult = await client.generateImage(
|
| 162 |
+
'A futuristic cityscape at night',
|
| 163 |
+
{ size: '1024x1024', n: 1 }
|
| 164 |
+
);
|
| 165 |
+
console.log('Image generated:', imageResult.images[0].url.substring(0, 100) + '...');
|
| 166 |
+
console.log('\n');
|
| 167 |
+
|
| 168 |
+
console.log('6. Voice Synthesis');
|
| 169 |
+
const voiceResult = await client.synthesizeVoice(
|
| 170 |
+
'Welcome to the AI API service.',
|
| 171 |
+
'alloy',
|
| 172 |
+
'mp3'
|
| 173 |
+
);
|
| 174 |
+
console.log('Audio generated:', voiceResult.audio_url.substring(0, 100) + '...');
|
| 175 |
+
console.log('\n');
|
| 176 |
+
|
| 177 |
+
console.log('7. Document Upload');
|
| 178 |
+
const docContent = Buffer.from('This is a sample document.').toString('base64');
|
| 179 |
+
const uploadResult = await client.uploadDocument(
|
| 180 |
+
'sample.txt',
|
| 181 |
+
docContent,
|
| 182 |
+
{ title: 'Sample', category: 'test' }
|
| 183 |
+
);
|
| 184 |
+
console.log(JSON.stringify(uploadResult, null, 2));
|
| 185 |
+
console.log('\n');
|
| 186 |
+
|
| 187 |
+
console.log('8. Get Metrics');
|
| 188 |
+
const metrics = await client.getMetrics();
|
| 189 |
+
console.log(JSON.stringify(metrics, null, 2));
|
| 190 |
+
console.log('\n');
|
| 191 |
+
|
| 192 |
+
} catch (error) {
|
| 193 |
+
console.error('Error:', error.message);
|
| 194 |
+
}
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
if (typeof window === 'undefined') {
|
| 198 |
+
main();
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
if (typeof module !== 'undefined' && module.exports) {
|
| 202 |
+
module.exports = AIAPIClient;
|
| 203 |
+
}
|
package.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "leap-app",
|
| 3 |
+
"version": "1.0.0",
|
| 4 |
+
"type": "module",
|
| 5 |
+
"packageManager": "bun",
|
| 6 |
+
"workspaces": [
|
| 7 |
+
"backend"
|
| 8 |
+
]
|
| 9 |
+
}
|
strcture.md
ADDED
|
@@ -0,0 +1,493 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AI API Service
|
| 2 |
+
|
| 3 |
+
A production-ready, scalable AI API service built with TypeScript and Encore.ts. Supports conversational chat, RAG (Retrieval-Augmented Generation), image generation, voice synthesis, and document ingestion.
|
| 4 |
+
|
| 5 |
+
## 🏗️ Architecture
|
| 6 |
+
|
| 7 |
+
```
|
| 8 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 9 |
+
│ API Gateway Layer │
|
| 10 |
+
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
| 11 |
+
│ │ Auth Filter │→ │ Rate Limiter │→ │ Routes │ │
|
| 12 |
+
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
| 13 |
+
└─────────────────────────────────────────────────────────────────┘
|
| 14 |
+
↓
|
| 15 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 16 |
+
│ Service Layer │
|
| 17 |
+
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
| 18 |
+
│ │ AI Service │ │ RAG Service │ │Image Service │ │
|
| 19 |
+
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
| 20 |
+
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
| 21 |
+
│ │Voice Service │ │ Doc Service │ │Worker Service│ │
|
| 22 |
+
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
| 23 |
+
└─────────────────────────────────────────────────────────────────┘
|
| 24 |
+
↓
|
| 25 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 26 |
+
│ Adapter Layer │
|
| 27 |
+
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
| 28 |
+
│ │OpenAI Adapter│ │ HF Adapter │ │Anthropic Adp │ │
|
| 29 |
+
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
| 30 |
+
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
| 31 |
+
│ │Vector DB Adp │ │Embedding Adp │ │ Local Models │ │
|
| 32 |
+
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
| 33 |
+
└─────────────────────────────────────────────────────────────────┘
|
| 34 |
+
↓
|
| 35 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 36 |
+
│ Storage Layer │
|
| 37 |
+
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
| 38 |
+
│ │ Pinecone │ │ In-Memory │ │ File Storage │ │
|
| 39 |
+
│ │ (Vector DB) │ │ (Fallback) │ │ (Documents) │ │
|
| 40 |
+
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
| 41 |
+
└─────────────────────────────────────────────────────────────────┘
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
## ✨ Features
|
| 45 |
+
|
| 46 |
+
### Core Capabilities
|
| 47 |
+
- **Multi-turn Chat** - Conversational AI with context management
|
| 48 |
+
- **RAG (Retrieval-Augmented Generation)** - Query documents with AI-powered retrieval
|
| 49 |
+
- **Image Generation** - Text-to-image using DALL-E or Stable Diffusion
|
| 50 |
+
- **Voice Synthesis** - Text-to-speech with multiple voice options
|
| 51 |
+
- **Document Ingestion** - Upload PDFs, DOCX, TXT with automatic chunking & embedding
|
| 52 |
+
|
| 53 |
+
### Model Support
|
| 54 |
+
- **OpenAI** - GPT-4, GPT-3.5-turbo, DALL-E, Whisper, TTS
|
| 55 |
+
- **HuggingFace** - Open-source models via Inference API
|
| 56 |
+
- **Anthropic** - Claude models
|
| 57 |
+
- **Local Models** - Run transformers locally (optional)
|
| 58 |
+
|
| 59 |
+
### Enterprise Features
|
| 60 |
+
- **API Key Authentication** - Secure access control
|
| 61 |
+
- **Role-based Rate Limiting** - Default, Premium, Admin tiers
|
| 62 |
+
- **Multi-model Routing** - Select models via API or policy
|
| 63 |
+
- **Background Workers** - Async document processing
|
| 64 |
+
- **Observability** - Health checks, metrics, structured logging
|
| 65 |
+
- **CORS Support** - Cross-origin requests
|
| 66 |
+
|
| 67 |
+
## 📋 API Endpoints
|
| 68 |
+
|
| 69 |
+
### Health & Metrics
|
| 70 |
+
```bash
|
| 71 |
+
GET /health # Service health check
|
| 72 |
+
GET /metrics # Prometheus-style metrics
|
| 73 |
+
POST /auth/verify # Verify API key validity
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
### AI Chat
|
| 77 |
+
```bash
|
| 78 |
+
POST /ai/chat # Multi-turn conversation
|
| 79 |
+
GET /ai/query # Simple question answering
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
### RAG (Retrieval-Augmented Generation)
|
| 83 |
+
```bash
|
| 84 |
+
POST /rag/query # Query with document retrieval
|
| 85 |
+
GET /rag/models # List available models
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
### Image Generation
|
| 89 |
+
```bash
|
| 90 |
+
POST /image/generate # Generate images from text
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
### Voice Synthesis
|
| 94 |
+
```bash
|
| 95 |
+
POST /voice/synthesize # Text to speech
|
| 96 |
+
POST /voice/transcribe # Speech to text (optional)
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
### Document Management
|
| 100 |
+
```bash
|
| 101 |
+
POST /upload # Upload document for ingestion
|
| 102 |
+
GET /docs/:id/sources # Get document chunks
|
| 103 |
+
POST /webhook/events # Ingestion completion webhook
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
## 🚀 Quick Start
|
| 107 |
+
|
| 108 |
+
### Prerequisites
|
| 109 |
+
- Node.js 18+ and npm
|
| 110 |
+
- Encore CLI: `npm install -g encore`
|
| 111 |
+
- API keys (OpenAI, HuggingFace, etc.)
|
| 112 |
+
|
| 113 |
+
### Local Development
|
| 114 |
+
|
| 115 |
+
1. **Clone and install dependencies**
|
| 116 |
+
```bash
|
| 117 |
+
npm install
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
2. **Configure environment variables**
|
| 121 |
+
```bash
|
| 122 |
+
cp .env.example .env
|
| 123 |
+
# Edit .env with your API keys
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
3. **Run the development server**
|
| 127 |
+
```bash
|
| 128 |
+
encore run
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
The API will be available at `http://localhost:8000`
|
| 132 |
+
|
| 133 |
+
4. **Run tests**
|
| 134 |
+
```bash
|
| 135 |
+
npm test
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
## 🔑 Environment Variables
|
| 139 |
+
|
| 140 |
+
| Variable | Description | Required | Default |
|
| 141 |
+
|----------|-------------|----------|---------|
|
| 142 |
+
| `OPENAI_API_KEY` | OpenAI API key for GPT models | No* | - |
|
| 143 |
+
| `HUGGINGFACE_API_KEY` | HuggingFace API key | No* | - |
|
| 144 |
+
| `ANTHROPIC_API_KEY` | Anthropic API key for Claude | No* | - |
|
| 145 |
+
| `PINECONE_API_KEY` | Pinecone vector DB key | No | In-memory fallback |
|
| 146 |
+
| `API_KEYS` | Comma-separated valid API keys | Yes | `demo-key-1` |
|
| 147 |
+
| `ADMIN_API_KEYS` | Admin-level API keys | No | - |
|
| 148 |
+
| `RATE_LIMIT_DEFAULT` | Requests/min for default tier | No | 60 |
|
| 149 |
+
| `RATE_LIMIT_PREMIUM` | Requests/min for premium tier | No | 300 |
|
| 150 |
+
| `DEFAULT_CHAT_MODEL` | Default LLM model | No | `gpt-3.5-turbo` |
|
| 151 |
+
|
| 152 |
+
*At least one LLM provider key is required
|
| 153 |
+
|
| 154 |
+
## 📖 API Usage Examples
|
| 155 |
+
|
| 156 |
+
### 1. Chat Endpoint
|
| 157 |
+
|
| 158 |
+
**Request:**
|
| 159 |
+
```bash
|
| 160 |
+
curl -X POST http://localhost:8000/ai/chat \
|
| 161 |
+
-H "Authorization: Bearer demo-key-1" \
|
| 162 |
+
-H "Content-Type: application/json" \
|
| 163 |
+
-d '{
|
| 164 |
+
"conversation": [
|
| 165 |
+
{"role": "user", "content": "What is machine learning?"}
|
| 166 |
+
],
|
| 167 |
+
"model": "gpt-3.5-turbo",
|
| 168 |
+
"options": {
|
| 169 |
+
"temperature": 0.7,
|
| 170 |
+
"max_tokens": 500
|
| 171 |
+
}
|
| 172 |
+
}'
|
| 173 |
+
```
|
| 174 |
+
|
| 175 |
+
**Response:**
|
| 176 |
+
```json
|
| 177 |
+
{
|
| 178 |
+
"reply": "Machine learning is a subset of artificial intelligence...",
|
| 179 |
+
"model": "gpt-3.5-turbo",
|
| 180 |
+
"usage": {
|
| 181 |
+
"prompt_tokens": 15,
|
| 182 |
+
"completion_tokens": 120,
|
| 183 |
+
"total_tokens": 135
|
| 184 |
+
},
|
| 185 |
+
"sources": null
|
| 186 |
+
}
|
| 187 |
+
```
|
| 188 |
+
|
| 189 |
+
### 2. RAG Query Endpoint
|
| 190 |
+
|
| 191 |
+
**Request:**
|
| 192 |
+
```bash
|
| 193 |
+
curl -X POST http://localhost:8000/rag/query \
|
| 194 |
+
-H "Authorization: Bearer demo-key-1" \
|
| 195 |
+
-H "Content-Type: application/json" \
|
| 196 |
+
-d '{
|
| 197 |
+
"query": "What are the key features of our product?",
|
| 198 |
+
"top_k": 5,
|
| 199 |
+
"model": "gpt-4",
|
| 200 |
+
"use_retrieval": true
|
| 201 |
+
}'
|
| 202 |
+
```
|
| 203 |
+
|
| 204 |
+
**Response:**
|
| 205 |
+
```json
|
| 206 |
+
{
|
| 207 |
+
"answer": "Based on the documentation, the key features include...",
|
| 208 |
+
"sources": [
|
| 209 |
+
{
|
| 210 |
+
"doc_id": "doc_123",
|
| 211 |
+
"chunk_id": "chunk_5",
|
| 212 |
+
"content": "Our product features...",
|
| 213 |
+
"score": 0.92
|
| 214 |
+
}
|
| 215 |
+
],
|
| 216 |
+
"model": "gpt-4",
|
| 217 |
+
"usage": {
|
| 218 |
+
"prompt_tokens": 450,
|
| 219 |
+
"completion_tokens": 180,
|
| 220 |
+
"total_tokens": 630
|
| 221 |
+
}
|
| 222 |
+
}
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
### 3. Image Generation
|
| 226 |
+
|
| 227 |
+
**Request:**
|
| 228 |
+
```bash
|
| 229 |
+
curl -X POST http://localhost:8000/image/generate \
|
| 230 |
+
-H "Authorization: Bearer demo-key-1" \
|
| 231 |
+
-H "Content-Type: application/json" \
|
| 232 |
+
-d '{
|
| 233 |
+
"prompt": "A futuristic city with flying cars at sunset",
|
| 234 |
+
"model": "dall-e-3",
|
| 235 |
+
"size": "1024x1024",
|
| 236 |
+
"n": 1
|
| 237 |
+
}'
|
| 238 |
+
```
|
| 239 |
+
|
| 240 |
+
**Response:**
|
| 241 |
+
```json
|
| 242 |
+
{
|
| 243 |
+
"images": [
|
| 244 |
+
{
|
| 245 |
+
"url": "https://...",
|
| 246 |
+
"revised_prompt": "A futuristic city with flying cars..."
|
| 247 |
+
}
|
| 248 |
+
],
|
| 249 |
+
"model": "dall-e-3",
|
| 250 |
+
"created": 1698765432
|
| 251 |
+
}
|
| 252 |
+
```
|
| 253 |
+
|
| 254 |
+
### 4. Voice Synthesis
|
| 255 |
+
|
| 256 |
+
**Request:**
|
| 257 |
+
```bash
|
| 258 |
+
curl -X POST http://localhost:8000/voice/synthesize \
|
| 259 |
+
-H "Authorization: Bearer demo-key-1" \
|
| 260 |
+
-H "Content-Type: application/json" \
|
| 261 |
+
-d '{
|
| 262 |
+
"text": "Hello, this is a test of the voice synthesis system.",
|
| 263 |
+
"voice": "alloy",
|
| 264 |
+
"format": "mp3"
|
| 265 |
+
}'
|
| 266 |
+
```
|
| 267 |
+
|
| 268 |
+
**Response:**
|
| 269 |
+
```json
|
| 270 |
+
{
|
| 271 |
+
"audio_url": "data:audio/mp3;base64,//uQx...",
|
| 272 |
+
"voice": "alloy",
|
| 273 |
+
"format": "mp3",
|
| 274 |
+
"duration_ms": 3200
|
| 275 |
+
}
|
| 276 |
+
```
|
| 277 |
+
|
| 278 |
+
### 5. Document Upload
|
| 279 |
+
|
| 280 |
+
**Request:**
|
| 281 |
+
```bash
|
| 282 |
+
curl -X POST http://localhost:8000/upload \
|
| 283 |
+
-H "Authorization: Bearer demo-key-1" \
|
| 284 |
+
-F "[email protected]" \
|
| 285 |
+
-F "metadata={\"title\":\"Product Guide\",\"category\":\"documentation\"}"
|
| 286 |
+
```
|
| 287 |
+
|
| 288 |
+
**Response:**
|
| 289 |
+
```json
|
| 290 |
+
{
|
| 291 |
+
"doc_id": "doc_abc123",
|
| 292 |
+
"filename": "document.pdf",
|
| 293 |
+
"size_bytes": 245760,
|
| 294 |
+
"status": "processing",
|
| 295 |
+
"estimated_chunks": 15,
|
| 296 |
+
"webhook_url": "/webhook/events"
|
| 297 |
+
}
|
| 298 |
+
```
|
| 299 |
+
|
| 300 |
+
## 🧪 Testing
|
| 301 |
+
|
| 302 |
+
Run the test suite:
|
| 303 |
+
```bash
|
| 304 |
+
npm test
|
| 305 |
+
```
|
| 306 |
+
|
| 307 |
+
Run with coverage:
|
| 308 |
+
```bash
|
| 309 |
+
npm run test:coverage
|
| 310 |
+
```
|
| 311 |
+
|
| 312 |
+
Tests include:
|
| 313 |
+
- Unit tests for all adapters
|
| 314 |
+
- Integration tests for API endpoints
|
| 315 |
+
- Mock implementations for external services
|
| 316 |
+
- Rate limiting validation
|
| 317 |
+
- Authentication checks
|
| 318 |
+
|
| 319 |
+
## 🐳 Docker Deployment
|
| 320 |
+
|
| 321 |
+
### Build Docker Image
|
| 322 |
+
```bash
|
| 323 |
+
docker build -t ai-api-service .
|
| 324 |
+
```
|
| 325 |
+
|
| 326 |
+
### Run with Docker Compose
|
| 327 |
+
```bash
|
| 328 |
+
docker-compose up
|
| 329 |
+
```
|
| 330 |
+
|
| 331 |
+
This starts:
|
| 332 |
+
- API service on port 8000
|
| 333 |
+
- Redis for rate limiting (optional)
|
| 334 |
+
- Background workers
|
| 335 |
+
|
| 336 |
+
## ☁️ Cloud Deployment
|
| 337 |
+
|
| 338 |
+
### Deploy to Encore Cloud (Recommended)
|
| 339 |
+
|
| 340 |
+
1. **Install Encore CLI**
|
| 341 |
+
```bash
|
| 342 |
+
npm install -g encore
|
| 343 |
+
```
|
| 344 |
+
|
| 345 |
+
2. **Login to Encore**
|
| 346 |
+
```bash
|
| 347 |
+
encore auth login
|
| 348 |
+
```
|
| 349 |
+
|
| 350 |
+
3. **Deploy**
|
| 351 |
+
```bash
|
| 352 |
+
encore deploy
|
| 353 |
+
```
|
| 354 |
+
|
| 355 |
+
### Deploy to Hugging Face Spaces
|
| 356 |
+
|
| 357 |
+
1. **Create a new Space** at https://huggingface.co/spaces
|
| 358 |
+
|
| 359 |
+
2. **Add Dockerfile**
|
| 360 |
+
```dockerfile
|
| 361 |
+
FROM node:18-alpine
|
| 362 |
+
WORKDIR /app
|
| 363 |
+
COPY package*.json ./
|
| 364 |
+
RUN npm ci --only=production
|
| 365 |
+
COPY . .
|
| 366 |
+
RUN npm run build
|
| 367 |
+
EXPOSE 7860
|
| 368 |
+
ENV PORT=7860
|
| 369 |
+
CMD ["npm", "start"]
|
| 370 |
+
```
|
| 371 |
+
|
| 372 |
+
3. **Configure secrets** in Space settings:
|
| 373 |
+
- `OPENAI_API_KEY`
|
| 374 |
+
- `HUGGINGFACE_API_KEY`
|
| 375 |
+
- `API_KEYS`
|
| 376 |
+
|
| 377 |
+
4. **Push to Space**
|
| 378 |
+
```bash
|
| 379 |
+
git remote add space https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE
|
| 380 |
+
git push space main
|
| 381 |
+
```
|
| 382 |
+
|
| 383 |
+
### Deploy to Generic Cloud (AWS, GCP, Azure)
|
| 384 |
+
|
| 385 |
+
1. **Build production image**
|
| 386 |
+
```bash
|
| 387 |
+
docker build -t ai-api-service:latest .
|
| 388 |
+
```
|
| 389 |
+
|
| 390 |
+
2. **Push to container registry**
|
| 391 |
+
```bash
|
| 392 |
+
docker tag ai-api-service:latest your-registry/ai-api-service:latest
|
| 393 |
+
docker push your-registry/ai-api-service:latest
|
| 394 |
+
```
|
| 395 |
+
|
| 396 |
+
3. **Deploy to container service**
|
| 397 |
+
- AWS ECS/Fargate
|
| 398 |
+
- GCP Cloud Run
|
| 399 |
+
- Azure Container Instances
|
| 400 |
+
|
| 401 |
+
4. **Set environment variables** in cloud console
|
| 402 |
+
|
| 403 |
+
## 📊 Scaling Considerations
|
| 404 |
+
|
| 405 |
+
### Horizontal Scaling
|
| 406 |
+
- **Stateless design** - All state in external services (Pinecone, Redis)
|
| 407 |
+
- **Load balancing** - Use ALB/NLB in front of multiple instances
|
| 408 |
+
- **Auto-scaling** - Scale based on CPU/memory or request rate
|
| 409 |
+
|
| 410 |
+
### Vector Database
|
| 411 |
+
- **Pinecone** - Managed, scales automatically
|
| 412 |
+
- **Milvus** - Self-hosted, requires cluster setup
|
| 413 |
+
- **In-memory** - Development only, not for production
|
| 414 |
+
|
| 415 |
+
### Background Workers
|
| 416 |
+
- **Concurrent processing** - Adjust `WORKER_CONCURRENCY`
|
| 417 |
+
- **Queue depth** - Monitor pending ingestion jobs
|
| 418 |
+
- **Retry logic** - Failed jobs auto-retry with backoff
|
| 419 |
+
|
| 420 |
+
### Cost Optimization
|
| 421 |
+
- **Model selection** - Use cheaper models (GPT-3.5 vs GPT-4)
|
| 422 |
+
- **Caching** - Cache frequent queries (not implemented, add Redis)
|
| 423 |
+
- **Batch processing** - Group document ingestions
|
| 424 |
+
- **Rate limiting** - Prevent abuse and cost overruns
|
| 425 |
+
|
| 426 |
+
## 🔒 Security Best Practices
|
| 427 |
+
|
| 428 |
+
1. **API Keys** - Rotate regularly, use environment variables
|
| 429 |
+
2. **Rate Limiting** - Prevent abuse and DDoS
|
| 430 |
+
3. **Input Validation** - All requests validated with Zod schemas
|
| 431 |
+
4. **CORS** - Configure allowed origins
|
| 432 |
+
5. **File Upload** - Size limits, type validation
|
| 433 |
+
6. **Secrets Management** - Use Encore secrets or cloud secret managers
|
| 434 |
+
|
| 435 |
+
## 🛠️ Troubleshooting
|
| 436 |
+
|
| 437 |
+
### Common Issues
|
| 438 |
+
|
| 439 |
+
**"Invalid API key" errors**
|
| 440 |
+
- Check `.env` file has correct keys
|
| 441 |
+
- Verify API key has credits/quota
|
| 442 |
+
- Ensure no extra spaces in keys
|
| 443 |
+
|
| 444 |
+
**Rate limit exceeded**
|
| 445 |
+
- Increase `RATE_LIMIT_*` values
|
| 446 |
+
- Use admin API key for testing
|
| 447 |
+
- Check Prometheus metrics for usage
|
| 448 |
+
|
| 449 |
+
**Vector DB connection fails**
|
| 450 |
+
- Check Pinecone API key and environment
|
| 451 |
+
- Falls back to in-memory storage
|
| 452 |
+
- Verify network connectivity
|
| 453 |
+
|
| 454 |
+
**Document upload fails**
|
| 455 |
+
- Check file size < `MAX_FILE_SIZE_MB`
|
| 456 |
+
- Verify file format (PDF, DOCX, TXT)
|
| 457 |
+
- Check disk space for temp files
|
| 458 |
+
|
| 459 |
+
## 📚 Client Libraries
|
| 460 |
+
|
| 461 |
+
See `examples/` directory for:
|
| 462 |
+
- `js_client.js` - JavaScript/Node.js client
|
| 463 |
+
- `curl.sh` - Curl command examples
|
| 464 |
+
- `python_client.py` - Python client (coming soon)
|
| 465 |
+
|
| 466 |
+
## 🤝 Contributing
|
| 467 |
+
|
| 468 |
+
1. Fork the repository
|
| 469 |
+
2. Create a feature branch
|
| 470 |
+
3. Make your changes
|
| 471 |
+
4. Add tests
|
| 472 |
+
5. Submit a pull request
|
| 473 |
+
|
| 474 |
+
## 📝 License
|
| 475 |
+
|
| 476 |
+
MIT License - see LICENSE file for details
|
| 477 |
+
|
| 478 |
+
## 🆘 Support
|
| 479 |
+
|
| 480 |
+
- GitHub Issues: [Report bugs](https://github.com/your-org/ai-api-service/issues)
|
| 481 |
+
- Documentation: [Full API reference](https://docs.your-service.com)
|
| 482 |
+
- Email: [email protected]
|
| 483 |
+
|
| 484 |
+
## 🗺️ Roadmap
|
| 485 |
+
|
| 486 |
+
- [ ] Caching layer (Redis)
|
| 487 |
+
- [ ] Streaming responses (SSE)
|
| 488 |
+
- [ ] Multi-language support
|
| 489 |
+
- [ ] Fine-tuning pipeline
|
| 490 |
+
- [ ] Analytics dashboard
|
| 491 |
+
- [ ] Webhook integrations
|
| 492 |
+
- [ ] GraphQL API
|
| 493 |
+
- [ ] gRPC support
|
tests/api.test.ts
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { describe, it, expect, beforeAll, vi } from 'vitest';
|
| 2 |
+
|
| 3 |
+
describe('AI API Service Tests', () => {
|
| 4 |
+
beforeAll(() => {
|
| 5 |
+
process.env.API_KEYS = 'test-key-1,test-key-2';
|
| 6 |
+
process.env.ADMIN_API_KEYS = 'admin-key-1';
|
| 7 |
+
process.env.OPENAI_API_KEY = 'sk-test-mock-key';
|
| 8 |
+
});
|
| 9 |
+
|
| 10 |
+
describe('Authentication', () => {
|
| 11 |
+
it('should validate correct API key', () => {
|
| 12 |
+
const { validateApiKey } = require('../backend/utils/auth');
|
| 13 |
+
const result = validateApiKey('Bearer test-key-1');
|
| 14 |
+
expect(result.apiKey).toBe('test-key-1');
|
| 15 |
+
expect(result.tier).toBe('default');
|
| 16 |
+
});
|
| 17 |
+
|
| 18 |
+
it('should validate admin API key', () => {
|
| 19 |
+
const { validateApiKey } = require('../backend/utils/auth');
|
| 20 |
+
const result = validateApiKey('Bearer admin-key-1');
|
| 21 |
+
expect(result.apiKey).toBe('admin-key-1');
|
| 22 |
+
expect(result.tier).toBe('admin');
|
| 23 |
+
});
|
| 24 |
+
|
| 25 |
+
it('should reject invalid API key', () => {
|
| 26 |
+
const { validateApiKey } = require('../backend/utils/auth');
|
| 27 |
+
expect(() => validateApiKey('Bearer invalid-key')).toThrow('Invalid API key');
|
| 28 |
+
});
|
| 29 |
+
|
| 30 |
+
it('should reject missing API key', () => {
|
| 31 |
+
const { validateApiKey } = require('../backend/utils/auth');
|
| 32 |
+
expect(() => validateApiKey('')).toThrow('Missing Authorization header');
|
| 33 |
+
});
|
| 34 |
+
});
|
| 35 |
+
|
| 36 |
+
describe('Rate Limiting', () => {
|
| 37 |
+
it('should allow requests within rate limit', () => {
|
| 38 |
+
const { rateLimiter } = require('../backend/utils/rate_limit');
|
| 39 |
+
const info = rateLimiter.checkRateLimit('test-key-1', 'default');
|
| 40 |
+
expect(info.remaining).toBeGreaterThanOrEqual(0);
|
| 41 |
+
expect(info.limit).toBeGreaterThan(0);
|
| 42 |
+
});
|
| 43 |
+
|
| 44 |
+
it('should have higher limit for admin tier', () => {
|
| 45 |
+
const { rateLimiter } = require('../backend/utils/rate_limit');
|
| 46 |
+
const defaultInfo = rateLimiter.getRateLimitInfo('test-key-1', 'default');
|
| 47 |
+
const adminInfo = rateLimiter.getRateLimitInfo('admin-key-1', 'admin');
|
| 48 |
+
expect(adminInfo.limit).toBeGreaterThan(defaultInfo.limit);
|
| 49 |
+
});
|
| 50 |
+
});
|
| 51 |
+
|
| 52 |
+
describe('Vector DB', () => {
|
| 53 |
+
it('should store and retrieve vectors from in-memory DB', async () => {
|
| 54 |
+
const { InMemoryVectorDB } = require('../backend/adapters/vector_db_adapter');
|
| 55 |
+
const db = new InMemoryVectorDB();
|
| 56 |
+
|
| 57 |
+
await db.upsert([
|
| 58 |
+
{
|
| 59 |
+
id: 'test-1',
|
| 60 |
+
values: [1, 0, 0],
|
| 61 |
+
metadata: { content: 'Test document 1' },
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
id: 'test-2',
|
| 65 |
+
values: [0, 1, 0],
|
| 66 |
+
metadata: { content: 'Test document 2' },
|
| 67 |
+
},
|
| 68 |
+
]);
|
| 69 |
+
|
| 70 |
+
const results = await db.query([1, 0, 0], 2);
|
| 71 |
+
expect(results.length).toBe(2);
|
| 72 |
+
expect(results[0].id).toBe('test-1');
|
| 73 |
+
expect(results[0].score).toBeGreaterThan(results[1].score);
|
| 74 |
+
});
|
| 75 |
+
|
| 76 |
+
it('should filter results based on metadata', async () => {
|
| 77 |
+
const { InMemoryVectorDB } = require('../backend/adapters/vector_db_adapter');
|
| 78 |
+
const db = new InMemoryVectorDB();
|
| 79 |
+
|
| 80 |
+
await db.upsert([
|
| 81 |
+
{
|
| 82 |
+
id: 'doc-1',
|
| 83 |
+
values: [1, 0, 0],
|
| 84 |
+
metadata: { category: 'tech', content: 'Tech document' },
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
id: 'doc-2',
|
| 88 |
+
values: [0.9, 0, 0],
|
| 89 |
+
metadata: { category: 'science', content: 'Science document' },
|
| 90 |
+
},
|
| 91 |
+
]);
|
| 92 |
+
|
| 93 |
+
const results = await db.query([1, 0, 0], 5, { category: 'tech' });
|
| 94 |
+
expect(results.length).toBe(1);
|
| 95 |
+
expect(results[0].id).toBe('doc-1');
|
| 96 |
+
});
|
| 97 |
+
|
| 98 |
+
it('should delete vectors', async () => {
|
| 99 |
+
const { InMemoryVectorDB } = require('../backend/adapters/vector_db_adapter');
|
| 100 |
+
const db = new InMemoryVectorDB();
|
| 101 |
+
|
| 102 |
+
await db.upsert([
|
| 103 |
+
{ id: 'delete-1', values: [1, 0, 0], metadata: {} },
|
| 104 |
+
]);
|
| 105 |
+
|
| 106 |
+
let results = await db.query([1, 0, 0], 5);
|
| 107 |
+
expect(results.length).toBe(1);
|
| 108 |
+
|
| 109 |
+
await db.delete(['delete-1']);
|
| 110 |
+
|
| 111 |
+
results = await db.query([1, 0, 0], 5);
|
| 112 |
+
expect(results.length).toBe(0);
|
| 113 |
+
});
|
| 114 |
+
});
|
| 115 |
+
|
| 116 |
+
describe('Document Service', () => {
|
| 117 |
+
it('should chunk text correctly', () => {
|
| 118 |
+
const { documentService } = require('../backend/services/document_service');
|
| 119 |
+
|
| 120 |
+
const text = 'a'.repeat(2500);
|
| 121 |
+
const chunks = documentService['chunkText'](text, 'doc-1', {});
|
| 122 |
+
|
| 123 |
+
expect(chunks.length).toBeGreaterThan(1);
|
| 124 |
+
expect(chunks[0].chunk_index).toBe(0);
|
| 125 |
+
expect(chunks[0].doc_id).toBe('doc-1');
|
| 126 |
+
});
|
| 127 |
+
|
| 128 |
+
it('should extract text from txt file', async () => {
|
| 129 |
+
const { documentService } = require('../backend/services/document_service');
|
| 130 |
+
|
| 131 |
+
const content = Buffer.from('This is a test document', 'utf-8');
|
| 132 |
+
const text = await documentService['extractText']('test.txt', content);
|
| 133 |
+
|
| 134 |
+
expect(text).toBe('This is a test document');
|
| 135 |
+
});
|
| 136 |
+
});
|
| 137 |
+
|
| 138 |
+
describe('Metrics', () => {
|
| 139 |
+
it('should track requests', () => {
|
| 140 |
+
const { metrics } = require('../backend/utils/metrics');
|
| 141 |
+
|
| 142 |
+
const initialMetrics = metrics.getMetrics();
|
| 143 |
+
metrics.incrementRequests('/test');
|
| 144 |
+
const updatedMetrics = metrics.getMetrics();
|
| 145 |
+
|
| 146 |
+
expect(updatedMetrics.requests_total).toBeGreaterThan(initialMetrics.requests_total);
|
| 147 |
+
});
|
| 148 |
+
|
| 149 |
+
it('should track errors', () => {
|
| 150 |
+
const { metrics } = require('../backend/utils/metrics');
|
| 151 |
+
|
| 152 |
+
const initialMetrics = metrics.getMetrics();
|
| 153 |
+
metrics.incrementErrors();
|
| 154 |
+
const updatedMetrics = metrics.getMetrics();
|
| 155 |
+
|
| 156 |
+
expect(updatedMetrics.errors_total).toBeGreaterThan(initialMetrics.errors_total);
|
| 157 |
+
});
|
| 158 |
+
|
| 159 |
+
it('should track response times', () => {
|
| 160 |
+
const { metrics } = require('../backend/utils/metrics');
|
| 161 |
+
|
| 162 |
+
metrics.recordResponseTime(100);
|
| 163 |
+
metrics.recordResponseTime(200);
|
| 164 |
+
const metricsData = metrics.getMetrics();
|
| 165 |
+
|
| 166 |
+
expect(metricsData.average_response_time_ms).toBeGreaterThan(0);
|
| 167 |
+
});
|
| 168 |
+
});
|
| 169 |
+
|
| 170 |
+
describe('Logger', () => {
|
| 171 |
+
it('should log messages at appropriate levels', () => {
|
| 172 |
+
const { logger } = require('../backend/utils/logger');
|
| 173 |
+
const consoleSpy = vi.spyOn(console, 'log');
|
| 174 |
+
|
| 175 |
+
logger.info('Test message');
|
| 176 |
+
expect(consoleSpy).toHaveBeenCalled();
|
| 177 |
+
|
| 178 |
+
consoleSpy.mockRestore();
|
| 179 |
+
});
|
| 180 |
+
});
|
| 181 |
+
|
| 182 |
+
describe('Configuration', () => {
|
| 183 |
+
it('should load default configuration', () => {
|
| 184 |
+
const { loadConfig } = require('../backend/types/config');
|
| 185 |
+
const config = loadConfig();
|
| 186 |
+
|
| 187 |
+
expect(config.auth.apiKeys).toContain('test-key-1');
|
| 188 |
+
expect(config.rateLimit.default).toBeGreaterThan(0);
|
| 189 |
+
expect(config.documents.maxFileSizeMB).toBeGreaterThan(0);
|
| 190 |
+
});
|
| 191 |
+
|
| 192 |
+
it('should parse comma-separated API keys', () => {
|
| 193 |
+
const { loadConfig } = require('../backend/types/config');
|
| 194 |
+
const config = loadConfig();
|
| 195 |
+
|
| 196 |
+
expect(Array.isArray(config.auth.apiKeys)).toBe(true);
|
| 197 |
+
expect(config.auth.apiKeys.length).toBeGreaterThan(0);
|
| 198 |
+
});
|
| 199 |
+
});
|
| 200 |
+
|
| 201 |
+
describe('AI Service', () => {
|
| 202 |
+
it('should initialize with available adapters', () => {
|
| 203 |
+
const { aiService } = require('../backend/services/ai_service');
|
| 204 |
+
const models = aiService.getAvailableModels();
|
| 205 |
+
expect(Array.isArray(models)).toBe(true);
|
| 206 |
+
});
|
| 207 |
+
});
|
| 208 |
+
|
| 209 |
+
describe('RAG Service', () => {
|
| 210 |
+
it('should build RAG prompt correctly', () => {
|
| 211 |
+
const { ragService } = require('../backend/services/rag_service');
|
| 212 |
+
const prompt = ragService['buildRAGPrompt'](
|
| 213 |
+
'What is AI?',
|
| 214 |
+
'AI stands for Artificial Intelligence'
|
| 215 |
+
);
|
| 216 |
+
expect(prompt).toContain('What is AI?');
|
| 217 |
+
expect(prompt).toContain('AI stands for Artificial Intelligence');
|
| 218 |
+
});
|
| 219 |
+
});
|
| 220 |
+
|
| 221 |
+
describe('Ingestion Worker', () => {
|
| 222 |
+
it('should track job status', async () => {
|
| 223 |
+
const { ingestionWorker } = require('../backend/workers/ingestion_worker');
|
| 224 |
+
|
| 225 |
+
await ingestionWorker.addJob('job-1', 'test.pdf');
|
| 226 |
+
const job = ingestionWorker.getJobStatus('job-1');
|
| 227 |
+
|
| 228 |
+
expect(job).toBeDefined();
|
| 229 |
+
expect(job?.doc_id).toBe('job-1');
|
| 230 |
+
expect(job?.status).toBe('pending');
|
| 231 |
+
});
|
| 232 |
+
});
|
| 233 |
+
});
|