cygon commited on
Commit
d61feef
·
1 Parent(s): 05538b8

Initial deployment with Ollama support

Browse files
.dockerignore ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ node_modules
2
+ npm-debug.log
3
+ .env
4
+ .env.local
5
+ .git
6
+ .gitignore
7
+ README.md
8
+ .vscode
9
+ .idea
10
+ *.md
11
+ !README.md
12
+ .DS_Store
13
+ dist
14
+ build
15
+ coverage
16
+ .encore
17
+ data
18
+ *.log
.env.example ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # API Keys and Credentials (at least one required, or use Ollama)
2
+ OPENAI_API_KEY=your_openai_api_key_here
3
+ HUGGINGFACE_API_KEY=your_huggingface_api_key_here
4
+ ANTHROPIC_API_KEY=your_anthropic_api_key_here
5
+
6
+ # Ollama Configuration (local LLM - no API key needed)
7
+ OLLAMA_BASE_URL=http://localhost:11434
8
+ OLLAMA_MODEL=llama2
9
+ OLLAMA_EMBEDDING_MODEL=nomic-embed-text
10
+
11
+ # Vector Database Configuration
12
+ PINECONE_API_KEY=your_pinecone_api_key_here
13
+ PINECONE_ENVIRONMENT=us-west1-gcp
14
+ PINECONE_INDEX_NAME=ai-api-vectors
15
+
16
+ # Authentication
17
+ API_KEYS=demo-key-1,demo-key-2,admin-key-3
18
+ ADMIN_API_KEYS=admin-key-3
19
+
20
+ # Rate Limiting (requests per minute)
21
+ RATE_LIMIT_DEFAULT=60
22
+ RATE_LIMIT_PREMIUM=300
23
+ RATE_LIMIT_ADMIN=1000
24
+
25
+ # Model Configuration
26
+ DEFAULT_CHAT_MODEL=llama2
27
+ DEFAULT_EMBEDDING_MODEL=nomic-embed-text
28
+ DEFAULT_IMAGE_MODEL=dall-e-3
29
+ DEFAULT_VOICE_MODEL=tts-1
30
+
31
+ # Service Configuration
32
+ PORT=8000
33
+ LOG_LEVEL=info
34
+ CORS_ORIGINS=http://localhost:3000,http://localhost:5173
35
+
36
+ # Document Processing
37
+ MAX_FILE_SIZE_MB=10
38
+ CHUNK_SIZE=1000
39
+ CHUNK_OVERLAP=200
40
+
41
+ # Background Workers
42
+ ENABLE_BACKGROUND_WORKERS=true
43
+ WORKER_CONCURRENCY=5
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .encore
2
+ encore.gen.go
3
+ encore.gen.cue
4
+ /.encore
5
+ node_modules
6
+ /encore.gen
CHANGELOG.md ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Changelog
2
+
3
+ All notable changes to the AI API Service will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [1.0.0] - 2025-10-01
9
+
10
+ ### Added
11
+
12
+ #### Core Features
13
+ - **Multi-turn Chat API** - Conversational AI with context management supporting multiple LLM providers
14
+ - **RAG (Retrieval-Augmented Generation)** - Query documents with AI-powered vector retrieval
15
+ - **Image Generation** - Text-to-image using DALL-E or Stable Diffusion
16
+ - **Voice Synthesis** - Text-to-speech with multiple voice options via OpenAI TTS
17
+ - **Speech Recognition** - Audio transcription using Whisper
18
+ - **Document Ingestion** - Upload and process PDF, DOCX, TXT files with automatic chunking
19
+
20
+ #### Model Support
21
+ - OpenAI integration (GPT-4, GPT-3.5-turbo, DALL-E, TTS, Whisper)
22
+ - HuggingFace Inference API support (Mistral, Stable Diffusion, embeddings)
23
+ - Anthropic Claude models (Claude 3 Sonnet, Opus)
24
+ - Local model support (optional, via transformers)
25
+
26
+ #### Vector Database
27
+ - Pinecone adapter for production vector storage
28
+ - In-memory vector DB fallback for development
29
+ - Cosine similarity search
30
+ - Metadata filtering support
31
+
32
+ #### Authentication & Security
33
+ - API Key authentication with Bearer token support
34
+ - Role-based access control (default, premium, admin tiers)
35
+ - Token bucket rate limiting (configurable per tier)
36
+ - Input validation with TypeScript type safety
37
+
38
+ #### Observability
39
+ - Structured JSON logging with configurable log levels
40
+ - Prometheus-style metrics endpoint
41
+ - Health check endpoint with service status
42
+ - Request/response time tracking
43
+ - Model usage statistics
44
+
45
+ #### Background Processing
46
+ - Async document ingestion workers
47
+ - Configurable worker concurrency
48
+ - Webhook notifications for completion events
49
+ - Automatic text chunking with overlap
50
+
51
+ #### Developer Experience
52
+ - Comprehensive TypeScript types
53
+ - Auto-generated API clients
54
+ - Example curl scripts
55
+ - JavaScript/Node.js client library
56
+ - Full test suite with vitest
57
+ - Detailed API documentation
58
+
59
+ #### Deployment
60
+ - Docker support with multi-stage builds
61
+ - Docker Compose for local development
62
+ - Environment-based configuration
63
+ - Health checks and graceful shutdown
64
+ - Production-ready error handling
65
+
66
+ ### API Endpoints
67
+
68
+ #### Health & Monitoring
69
+ - `GET /health` - Service health check with component status
70
+ - `GET /metrics` - Request metrics and usage statistics
71
+
72
+ #### Authentication
73
+ - `POST /auth/verify` - Validate API key and check rate limits
74
+
75
+ #### AI Chat
76
+ - `POST /ai/chat` - Multi-turn conversation with context
77
+ - `GET /ai/query` - Simple question answering
78
+
79
+ #### RAG
80
+ - `POST /rag/query` - Query with document retrieval
81
+ - `GET /rag/models` - List available LLM models
82
+
83
+ #### Image Generation
84
+ - `POST /image/generate` - Generate images from text prompts
85
+
86
+ #### Voice
87
+ - `POST /voice/synthesize` - Text to speech synthesis
88
+ - `POST /voice/transcribe` - Speech to text transcription
89
+
90
+ #### Documents
91
+ - `POST /upload` - Upload and ingest documents
92
+ - `GET /docs/:id/sources` - Retrieve document chunks
93
+ - `POST /webhook/events` - Ingestion completion webhooks
94
+
95
+ ### Configuration
96
+
97
+ Environment variables for all services:
98
+ - LLM provider API keys (OpenAI, HuggingFace, Anthropic)
99
+ - Vector DB configuration (Pinecone)
100
+ - Rate limiting settings per tier
101
+ - Document processing parameters
102
+ - Worker configuration
103
+ - CORS and security settings
104
+
105
+ ### Testing
106
+
107
+ - Unit tests for all core services
108
+ - Integration tests for API endpoints
109
+ - Mock implementations for external services
110
+ - Rate limiting validation
111
+ - Authentication flow tests
112
+ - Vector DB operations tests
113
+
114
+ ### Documentation
115
+
116
+ - Comprehensive README with architecture diagram
117
+ - API reference with curl examples
118
+ - Environment variable guide
119
+ - Deployment instructions (Docker, Hugging Face Spaces, cloud providers)
120
+ - Scaling considerations and best practices
121
+ - Cost optimization guidelines
122
+ - Troubleshooting guide
123
+
124
+ ### Known Limitations
125
+
126
+ - Maximum file upload size: 10MB (configurable)
127
+ - In-memory vector DB not suitable for production
128
+ - No built-in caching layer (add Redis for production)
129
+ - Synchronous API calls (streaming support coming soon)
130
+
131
+ ### Future Roadmap
132
+
133
+ - Server-Sent Events (SSE) for streaming responses
134
+ - Redis caching layer for frequent queries
135
+ - Multi-language support for responses
136
+ - Fine-tuning pipeline integration
137
+ - Analytics dashboard
138
+ - Webhook integrations for third-party services
139
+ - GraphQL API support
140
+ - gRPC endpoints for high-performance use cases
141
+ - Kubernetes deployment manifests
142
+ - Auto-scaling configuration
143
+
144
+ ---
145
+
146
+ ## Release Notes
147
+
148
+ This is the initial release of the AI API Service, a production-ready TypeScript API for integrating multiple AI capabilities into chatbots, LLM applications, and intelligent systems.
149
+
150
+ The service is built on Encore.ts for type-safe backend development and includes comprehensive documentation, tests, and deployment configurations.
151
+
152
+ For questions, issues, or contributions, please visit the GitHub repository.
COMPLETE_DEPLOYMENT_GUIDE.md ADDED
@@ -0,0 +1,1529 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Complete Step-by-Step Guide: Deploy AI API with Ollama to Hugging Face Spaces
2
+ ## (Absolute Beginner-Friendly Guide)
3
+
4
+ **What you'll build**: A fully working AI API running on Hugging Face Spaces that anyone can access via the internet, powered by Ollama (no OpenAI key needed).
5
+
6
+ **Time needed**: 30-45 minutes
7
+ **Cost**: FREE (or $0.60/hour for faster GPU)
8
+ **No prior experience needed!**
9
+
10
+ ---
11
+
12
+ ## 📋 **What You Need Before Starting**
13
+
14
+ 1. ✅ A Hugging Face account (we'll create this if you don't have one)
15
+ 2. ✅ Git installed on your computer
16
+ 3. ✅ Basic ability to copy/paste and follow instructions
17
+ 4. ✅ This project's code files (you already have these)
18
+
19
+ ---
20
+
21
+ ## 🎯 **PART 1: Create Hugging Face Account & Space**
22
+
23
+ ### **Step 1.1: Create Hugging Face Account** (Skip if you have one)
24
+
25
+ 1. Open your web browser
26
+ 2. Go to: https://huggingface.co/join
27
+ 3. Fill in:
28
+ - **Email**: Your email address
29
+ - **Username**: Pick a username (you'll need this later - write it down!)
30
+ - **Password**: Choose a strong password
31
+ 4. Click **"Sign Up"**
32
+ 5. Check your email and click the verification link
33
+ 6. You're now logged into Hugging Face!
34
+
35
+ ### **Step 1.2: Create a New Space**
36
+
37
+ 1. **Go to**: https://huggingface.co/new-space
38
+
39
+ 2. **Fill in the form**:
40
+
41
+ | Field | What to Enter | Example |
42
+ |-------|---------------|---------|
43
+ | **Owner** | Your username | `yourname` |
44
+ | **Space name** | `ai-api-ollama` | (or anything you like) |
45
+ | **License** | Select "MIT" | |
46
+ | **Select the Space SDK** | Click on **"Docker"** | ⚠️ IMPORTANT: Must be Docker! |
47
+ | **Space hardware** | Select **"CPU basic - Free"** for now | (We'll upgrade later if needed) |
48
+ | **Repo type** | Leave as **"Public"** | (or Private if you prefer) |
49
+
50
+ 3. **Click "Create Space"** button at the bottom
51
+
52
+ 4. **IMPORTANT - Write down your Space URL**:
53
+ ```
54
+ https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama
55
+ ```
56
+ Replace `YOUR_USERNAME` with your actual username.
57
+
58
+ 5. You'll see a page with instructions - **ignore them for now**, we'll do it differently.
59
+
60
+ ---
61
+
62
+ ## 🔧 **PART 2: Install Git and Set Up Authentication**
63
+
64
+ ### **Step 2.1: Check if Git is Installed**
65
+
66
+ **On Windows**:
67
+ 1. Press `Windows Key + R`
68
+ 2. Type `cmd` and press Enter
69
+ 3. Type: `git --version`
70
+ 4. If you see a version number (like `git version 2.40.0`), you have Git ✅
71
+ 5. If you see an error, download Git from: https://git-scm.com/download/win
72
+
73
+ **On Mac**:
74
+ 1. Press `Command + Space`
75
+ 2. Type `terminal` and press Enter
76
+ 3. Type: `git --version`
77
+ 4. If you see a version number, you have Git ✅
78
+ 5. If not, it will prompt you to install Xcode Command Line Tools - click Install
79
+
80
+ **On Linux**:
81
+ ```bash
82
+ git --version
83
+ ```
84
+ If not installed:
85
+ ```bash
86
+ sudo apt-get update
87
+ sudo apt-get install git
88
+ ```
89
+
90
+ ### **Step 2.2: Create Hugging Face Access Token**
91
+
92
+ 1. Go to: https://huggingface.co/settings/tokens
93
+ 2. Click **"New token"** button
94
+ 3. Fill in:
95
+ - **Name**: `git-access` (or anything you like)
96
+ - **Role**: Select **"Write"**
97
+ 4. Click **"Generate token"**
98
+ 5. **CRITICAL**: Copy the token and save it somewhere safe (Notepad, password manager)
99
+ - It looks like: `hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxx`
100
+ - ⚠️ **You won't be able to see this again!**
101
+
102
+ ---
103
+
104
+ ## 💻 **PART 3: Clone Your Space to Your Computer**
105
+
106
+ ### **Step 3.1: Open Terminal/Command Prompt**
107
+
108
+ **Windows**:
109
+ 1. Press `Windows Key + R`
110
+ 2. Type `cmd` and press Enter
111
+ 3. Navigate to where you want to work (e.g., Desktop):
112
+ ```
113
+ cd Desktop
114
+ ```
115
+
116
+ **Mac/Linux**:
117
+ 1. Open Terminal
118
+ 2. Navigate to where you want to work:
119
+ ```bash
120
+ cd ~/Desktop
121
+ ```
122
+
123
+ ### **Step 3.2: Clone the Space Repository**
124
+
125
+ 1. **Copy this command** (replace YOUR_USERNAME with your actual Hugging Face username):
126
+ ```bash
127
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama
128
+ ```
129
+
130
+ 2. **Example**:
131
+ ```bash
132
+ git clone https://huggingface.co/spaces/johndoe/ai-api-ollama
133
+ ```
134
+
135
+ 3. **Press Enter**
136
+
137
+ 4. When prompted for username and password:
138
+ - **Username**: Your Hugging Face username
139
+ - **Password**: **Paste your token** (NOT your password!) - the one that starts with `hf_`
140
+
141
+ 5. You should see:
142
+ ```
143
+ Cloning into 'ai-api-ollama'...
144
+ ```
145
+
146
+ 6. **Verify the folder was created**:
147
+ ```bash
148
+ cd ai-api-ollama
149
+ ls
150
+ ```
151
+ (On Windows use `dir` instead of `ls`)
152
+
153
+ ---
154
+
155
+ ## 📂 **PART 4: Copy Project Files to Space**
156
+
157
+ ### **Step 4.1: Locate Your AI API Service Files**
158
+
159
+ You should have the project files in a folder. Let's say they're in:
160
+ - Windows: `C:\Users\YourName\Downloads\ai-api-service\`
161
+ - Mac/Linux: `~/Downloads/ai-api-service/`
162
+
163
+ ### **Step 4.2: Copy ALL Files to Space Folder**
164
+
165
+ **Option A: Using File Explorer (Easiest)**
166
+
167
+ **Windows**:
168
+ 1. Open File Explorer
169
+ 2. Navigate to your original `ai-api-service` folder
170
+ 3. Press `Ctrl + A` to select all files
171
+ 4. Press `Ctrl + C` to copy
172
+ 5. Navigate to `Desktop\ai-api-ollama` (your Space folder)
173
+ 6. Press `Ctrl + V` to paste
174
+ 7. When asked about replacing files, click **"Replace"**
175
+
176
+ **Mac**:
177
+ 1. Open Finder
178
+ 2. Navigate to your original `ai-api-service` folder
179
+ 3. Press `Cmd + A` to select all files
180
+ 4. Press `Cmd + C` to copy
181
+ 5. Navigate to `Desktop/ai-api-ollama` (your Space folder)
182
+ 6. Press `Cmd + V` to paste
183
+
184
+ **Option B: Using Command Line**
185
+
186
+ From the terminal, in your Space folder:
187
+
188
+ **Windows**:
189
+ ```bash
190
+ xcopy /E /I "C:\Users\YourName\Downloads\ai-api-service\*" .
191
+ ```
192
+
193
+ **Mac/Linux**:
194
+ ```bash
195
+ cp -r ~/Downloads/ai-api-service/* .
196
+ ```
197
+
198
+ ### **Step 4.3: Verify Files Were Copied**
199
+
200
+ In your terminal (inside the `ai-api-ollama` folder):
201
+
202
+ ```bash
203
+ ls
204
+ ```
205
+
206
+ You should see these folders/files:
207
+ - `backend/`
208
+ - `examples/`
209
+ - `tests/`
210
+ - `package.json`
211
+ - `README.md`
212
+ - `.env.example`
213
+ - `Dockerfile.huggingface`
214
+ - And many more files...
215
+
216
+ ✅ If you see these, you're good to proceed!
217
+
218
+ ---
219
+
220
+ ## 🐳 **PART 5: Prepare the Dockerfile for Hugging Face**
221
+
222
+ ### **Step 5.1: Rename the Dockerfile**
223
+
224
+ Hugging Face expects a file named exactly `Dockerfile` (no extension).
225
+
226
+ **Windows Command Prompt**:
227
+ ```bash
228
+ ren Dockerfile.huggingface Dockerfile
229
+ ```
230
+
231
+ **Mac/Linux Terminal**:
232
+ ```bash
233
+ mv Dockerfile.huggingface Dockerfile
234
+ ```
235
+
236
+ ### **Step 5.2: Verify the Dockerfile**
237
+
238
+ ```bash
239
+ cat Dockerfile
240
+ ```
241
+
242
+ You should see content starting with `FROM node:18-alpine AS builder`
243
+
244
+ ✅ Good to go!
245
+
246
+ ---
247
+
248
+ ## 📝 **PART 6: Create Space Configuration Files**
249
+
250
+ ### **Step 6.1: Create README.md for Your Space**
251
+
252
+ This file tells Hugging Face how to run your Space.
253
+
254
+ **Create a new file called `README.md`** in your `ai-api-ollama` folder:
255
+
256
+ **Windows**:
257
+ ```bash
258
+ notepad README.md
259
+ ```
260
+
261
+ **Mac/Linux**:
262
+ ```bash
263
+ nano README.md
264
+ ```
265
+
266
+ **Copy and paste this EXACT content** (replace YOUR_USERNAME):
267
+
268
+ ```markdown
269
+ ---
270
+ title: AI API Service with Ollama
271
+ emoji: 🤖
272
+ colorFrom: blue
273
+ colorTo: purple
274
+ sdk: docker
275
+ app_port: 7860
276
+ pinned: false
277
+ ---
278
+
279
+ # AI API Service with Ollama
280
+
281
+ A production-ready AI API service powered by Ollama. No OpenAI API key needed!
282
+
283
+ ## 🚀 Features
284
+
285
+ - 💬 **Multi-turn Chat** - Conversational AI with Llama2/Llama3
286
+ - 📚 **RAG** - Retrieval-Augmented Generation with vector search
287
+ - 🖼️ **Image Generation** - Text-to-image (requires additional API key)
288
+ - 🎙️ **Voice Synthesis** - Text-to-speech (requires additional API key)
289
+ - 📄 **Document Processing** - Upload and query PDFs, DOCX, TXT
290
+ - 🔒 **Authentication** - Secure API key-based access
291
+ - ⚡ **Rate Limiting** - Prevent abuse
292
+
293
+ ## 📡 API Endpoint
294
+
295
+ ```
296
+ https://YOUR_USERNAME-ai-api-ollama.hf.space
297
+ ```
298
+
299
+ ## 🔑 Quick Start
300
+
301
+ ### Health Check
302
+
303
+ ```bash
304
+ curl https://YOUR_USERNAME-ai-api-ollama.hf.space/health
305
+ ```
306
+
307
+ ### Chat Example
308
+
309
+ ```bash
310
+ curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/ai/chat \
311
+ -H "Authorization: Bearer demo-key-1" \
312
+ -H "Content-Type: application/json" \
313
+ -d '{
314
+ "conversation": [
315
+ {"role": "user", "content": "Explain machine learning in simple terms"}
316
+ ]
317
+ }'
318
+ ```
319
+
320
+ ### RAG Example
321
+
322
+ ```bash
323
+ curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/rag/query \
324
+ -H "Authorization: Bearer demo-key-1" \
325
+ -H "Content-Type: application/json" \
326
+ -d '{
327
+ "query": "What are transformers in AI?",
328
+ "top_k": 5
329
+ }'
330
+ ```
331
+
332
+ ## 🔐 Authentication
333
+
334
+ Default API key: `demo-key-1`
335
+
336
+ **⚠️ IMPORTANT**: Change this in Space settings for production use!
337
+
338
+ ## 📚 Available Endpoints
339
+
340
+ | Endpoint | Method | Description |
341
+ |----------|--------|-------------|
342
+ | `/health` | GET | Service health check |
343
+ | `/metrics` | GET | Usage metrics |
344
+ | `/ai/chat` | POST | Multi-turn conversation |
345
+ | `/ai/query` | GET | Simple question answering |
346
+ | `/rag/query` | POST | Query with document retrieval |
347
+ | `/image/generate` | POST | Generate images (needs API key) |
348
+ | `/voice/synthesize` | POST | Text to speech (needs API key) |
349
+ | `/upload` | POST | Upload documents |
350
+
351
+ ## ⚙️ Configuration
352
+
353
+ Configured with Ollama running **inside the Space** for true serverless deployment.
354
+
355
+ **Current Settings**:
356
+ - Model: Llama 2 (7B)
357
+ - Embedding Model: nomic-embed-text
358
+ - Hardware: See Space settings
359
+
360
+ ## 🎯 Use Cases
361
+
362
+ - Chatbot backend for web/mobile apps
363
+ - Document Q&A system
364
+ - AI-powered search
365
+ - Content generation API
366
+ - Educational AI assistant
367
+
368
+ ## 📖 Documentation
369
+
370
+ Full API documentation: [See repository](https://github.com/your-username/ai-api-service)
371
+
372
+ ## 💡 Tips
373
+
374
+ 1. **First request is slow** - Ollama loads the model on first use (~30 seconds)
375
+ 2. **Subsequent requests are fast** - Model stays in memory
376
+ 3. **Use persistent hardware** - Upgrade from CPU to GPU for better performance
377
+ 4. **Monitor costs** - Free tier works great for testing, upgrade for production
378
+
379
+ ## 🆘 Support
380
+
381
+ Having issues? Check the logs or open an issue on GitHub.
382
+
383
+ ---
384
+
385
+ Built with [Encore.ts](https://encore.dev) and [Ollama](https://ollama.ai)
386
+ ```
387
+
388
+ **Save the file**:
389
+ - Notepad: File → Save
390
+ - Nano: Press `Ctrl + O`, then `Enter`, then `Ctrl + X`
391
+
392
+ ---
393
+
394
+ ## 🔐 **PART 7: Configure Environment Variables in Space Settings**
395
+
396
+ ### **Step 7.1: Go to Your Space Settings**
397
+
398
+ 1. Open your browser
399
+ 2. Go to: `https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama/settings`
400
+ 3. Scroll down to **"Variables and secrets"** section
401
+
402
+ ### **Step 7.2: Add Environment Variables**
403
+
404
+ Click **"New variable"** for each of these:
405
+
406
+ #### **Variable 1: API_KEYS**
407
+ - **Name**: `API_KEYS`
408
+ - **Value**: `my-secret-key-12345,another-key-67890`
409
+ - ⚠️ **IMPORTANT**: Replace with your own random keys!
410
+ - Use strong, random strings (20+ characters)
411
+ - Separate multiple keys with commas (no spaces)
412
+ - Click **"Save"**
413
+
414
+ #### **Variable 2: ADMIN_API_KEYS** (Optional but recommended)
415
+ - **Name**: `ADMIN_API_KEYS`
416
+ - **Value**: `admin-super-secret-key-99999`
417
+ - ⚠️ Make this DIFFERENT from regular API keys
418
+ - This bypasses rate limits
419
+ - Click **"Save"**
420
+
421
+ #### **Variable 3: OLLAMA_MODEL**
422
+ - **Name**: `OLLAMA_MODEL`
423
+ - **Value**: Choose one:
424
+ - `phi:latest` (Fastest, smallest - 1.3GB - **RECOMMENDED FOR FREE CPU**)
425
+ - `llama2:latest` (Good quality - 4GB)
426
+ - `llama3:latest` (Best quality - 4.7GB - needs GPU)
427
+ - `mistral:latest` (Very good - 4GB)
428
+ - Click **"Save"**
429
+
430
+ **Recommendation for FREE tier**: Use `phi:latest`
431
+
432
+ #### **Variable 4: OLLAMA_EMBEDDING_MODEL**
433
+ - **Name**: `OLLAMA_EMBEDDING_MODEL`
434
+ - **Value**: `nomic-embed-text`
435
+ - Leave as is, this works great for RAG
436
+ - Click **"Save"**
437
+
438
+ #### **Variable 5: RATE_LIMIT_DEFAULT**
439
+ - **Name**: `RATE_LIMIT_DEFAULT`
440
+ - **Value**: `100`
441
+ - This means 100 requests per minute for regular API keys
442
+ - Click **"Save"**
443
+
444
+ #### **Variable 6: LOG_LEVEL** (Optional)
445
+ - **Name**: `LOG_LEVEL`
446
+ - **Value**: `info`
447
+ - Click **"Save"**
448
+
449
+ ### **Step 7.3: Verify Your Variables**
450
+
451
+ You should now see these variables listed:
452
+ - ✅ `API_KEYS`
453
+ - ✅ `ADMIN_API_KEYS` (if you added it)
454
+ - ✅ `OLLAMA_MODEL`
455
+ - ✅ `OLLAMA_EMBEDDING_MODEL`
456
+ - ✅ `RATE_LIMIT_DEFAULT`
457
+
458
+ ---
459
+
460
+ ## 📤 **PART 8: Push Code to Hugging Face**
461
+
462
+ Now we'll upload all the files to Hugging Face.
463
+
464
+ ### **Step 8.1: Configure Git (First Time Only)**
465
+
466
+ In your terminal (inside the `ai-api-ollama` folder):
467
+
468
+ ```bash
469
+ git config user.email "[email protected]"
470
+ git config user.name "Your Name"
471
+ ```
472
+
473
+ Replace with your actual email and name.
474
+
475
+ ### **Step 8.2: Add All Files to Git**
476
+
477
+ ```bash
478
+ git add .
479
+ ```
480
+
481
+ The `.` means "add all files in this folder"
482
+
483
+ ### **Step 8.3: Commit the Files**
484
+
485
+ ```bash
486
+ git commit -m "Initial deployment with Ollama support"
487
+ ```
488
+
489
+ You should see output like:
490
+ ```
491
+ [main abc1234] Initial deployment with Ollama support
492
+ XX files changed, XXX insertions(+)
493
+ ```
494
+
495
+ ### **Step 8.4: Push to Hugging Face**
496
+
497
+ ```bash
498
+ git push
499
+ ```
500
+
501
+ When prompted for credentials:
502
+ - **Username**: Your Hugging Face username
503
+ - **Password**: Your Hugging Face token (starts with `hf_`)
504
+
505
+ You'll see:
506
+ ```
507
+ Enumerating objects: XX, done.
508
+ Counting objects: 100% (XX/XX), done.
509
+ Writing objects: 100% (XX/XX), XX.XX MiB | XX.XX MiB/s, done.
510
+ ```
511
+
512
+ ✅ **Success!** Your code is now on Hugging Face.
513
+
514
+ ---
515
+
516
+ ## ⏳ **PART 9: Wait for Build & Monitor Progress**
517
+
518
+ ### **Step 9.1: Go to Your Space**
519
+
520
+ 1. Open browser: `https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama`
521
+ 2. You'll see a yellow "Building" status at the top
522
+
523
+ ### **Step 9.2: Watch the Build Logs**
524
+
525
+ 1. Click on the **"Logs"** tab (near the top)
526
+ 2. You'll see real-time output like:
527
+ ```
528
+ Building Docker image...
529
+ Step 1/15 : FROM node:18-alpine AS builder
530
+ ...
531
+ ```
532
+
533
+ ### **Step 9.3: What to Expect (Timeline)**
534
+
535
+ | Time | What's Happening | What You'll See |
536
+ |------|------------------|-----------------|
537
+ | 0-2 min | Docker image building | `Building Docker image...` |
538
+ | 2-5 min | Installing Node dependencies | `npm install...` |
539
+ | 5-8 min | Installing Ollama | `Installing Ollama...` |
540
+ | 8-10 min | Starting services | `Starting Ollama...` |
541
+ | 10-15 min | **Downloading Ollama model** | `Pulling model: phi:latest` ⏳ **LONGEST STEP** |
542
+ | 15+ min | Warming up model | `Warming up model...` |
543
+ | Final | **Space is RUNNING** | 🟢 Green "Running" status |
544
+
545
+ **Total time**: 15-20 minutes for first deployment
546
+
547
+ ### **Step 9.4: Troubleshooting Build Errors**
548
+
549
+ If you see **red error messages**:
550
+
551
+ **Common Error 1**: `npm install failed`
552
+ - **Fix**: Check that `package.json` was copied correctly
553
+ - Re-run: `git add package.json && git commit -m "fix package.json" && git push`
554
+
555
+ **Common Error 2**: `Port 7860 already in use`
556
+ - **Fix**: This shouldn't happen, but if it does, check README.md has `app_port: 7860`
557
+
558
+ **Common Error 3**: `Model download timeout`
559
+ - **Fix**: Use a smaller model like `phi:latest` in environment variables
560
+ - Or upgrade to GPU hardware (see Part 10)
561
+
562
+ **Common Error 4**: `Out of memory`
563
+ - **Fix**: Model too big for free CPU. Use `phi:latest` or upgrade to paid tier
564
+
565
+ ### **Step 9.5: Verify Space is Running**
566
+
567
+ When build completes:
568
+ 1. Status changes to 🟢 **"Running"**
569
+ 2. You'll see in logs: `Starting AI API Service on port 7860...`
570
+ 3. **Your API is now LIVE!**
571
+
572
+ ---
573
+
574
+ ## 🎉 **PART 10: Test Your Live API**
575
+
576
+ ### **Step 10.1: Get Your Space URL**
577
+
578
+ Your API is available at:
579
+ ```
580
+ https://YOUR_USERNAME-ai-api-ollama.hf.space
581
+ ```
582
+
583
+ **Example**:
584
+ ```
585
+ https://johndoe-ai-api-ollama.hf.space
586
+ ```
587
+
588
+ ### **Step 10.2: Test Health Endpoint**
589
+
590
+ **Option A: Use Browser**
591
+ 1. Open your browser
592
+ 2. Go to: `https://YOUR_USERNAME-ai-api-ollama.hf.space/health`
593
+ 3. You should see JSON like:
594
+ ```json
595
+ {
596
+ "status": "healthy",
597
+ "version": "1.0.0",
598
+ "services": [...]
599
+ }
600
+ ```
601
+
602
+ ✅ If you see this, your API is working!
603
+
604
+ **Option B: Use Command Line**
605
+
606
+ ```bash
607
+ curl https://YOUR_USERNAME-ai-api-ollama.hf.space/health
608
+ ```
609
+
610
+ ### **Step 10.3: Test Chat Endpoint**
611
+
612
+ **Copy this command** (replace YOUR_USERNAME and use one of your API keys):
613
+
614
+ ```bash
615
+ curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/ai/chat \
616
+ -H "Authorization: Bearer my-secret-key-12345" \
617
+ -H "Content-Type: application/json" \
618
+ -d '{
619
+ "conversation": [
620
+ {
621
+ "role": "user",
622
+ "content": "Hello! Can you explain what you are in one sentence?"
623
+ }
624
+ ]
625
+ }'
626
+ ```
627
+
628
+ **Expected response** (takes 5-30 seconds for first request):
629
+ ```json
630
+ {
631
+ "reply": "I am an AI assistant powered by Llama, designed to help answer questions...",
632
+ "model": "llama2",
633
+ "usage": {
634
+ "prompt_tokens": 25,
635
+ "completion_tokens": 50,
636
+ "total_tokens": 75
637
+ },
638
+ "sources": null
639
+ }
640
+ ```
641
+
642
+ ✅ **Success!** Your AI API is working!
643
+
644
+ ### **Step 10.4: Test RAG Endpoint (Optional)**
645
+
646
+ First, upload a document:
647
+
648
+ ```bash
649
+ # Create a test document
650
+ echo "The AI API Service is a production-ready API for chatbots. It supports Ollama, OpenAI, and HuggingFace." > test.txt
651
+
652
+ # Convert to base64
653
+ base64 test.txt > test.txt.b64
654
+
655
+ # Upload (Mac/Linux)
656
+ curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/upload \
657
+ -H "Authorization: Bearer my-secret-key-12345" \
658
+ -H "Content-Type: application/json" \
659
+ -d "{
660
+ \"filename\": \"test.txt\",
661
+ \"content_base64\": \"$(cat test.txt.b64)\",
662
+ \"metadata\": {\"title\": \"Test Document\"}
663
+ }"
664
+ ```
665
+
666
+ Then query it:
667
+
668
+ ```bash
669
+ curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/rag/query \
670
+ -H "Authorization: Bearer my-secret-key-12345" \
671
+ -H "Content-Type: application/json" \
672
+ -d '{
673
+ "query": "What does the API support?",
674
+ "top_k": 3
675
+ }'
676
+ ```
677
+
678
+ ---
679
+
680
+ ## 📊 **PART 11: Monitor and Optimize (Optional)**
681
+
682
+ ### **Step 11.1: Check Metrics**
683
+
684
+ ```bash
685
+ curl https://YOUR_USERNAME-ai-api-ollama.hf.space/metrics \
686
+ -H "Authorization: Bearer my-secret-key-12345"
687
+ ```
688
+
689
+ You'll see:
690
+ - Total requests
691
+ - Errors
692
+ - Response times
693
+ - Model usage
694
+
695
+ ### **Step 11.2: Upgrade Hardware (If Needed)**
696
+
697
+ If your Space is slow or timing out:
698
+
699
+ 1. Go to: `https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama/settings`
700
+ 2. Scroll to **"Space hardware"**
701
+ 3. Click **"Change hardware"**
702
+ 4. Select:
703
+ - **CPU upgrade** ($0.60/hr) - 2x faster than free
704
+ - **GPU T4** ($0.60/hr) - 10x faster, supports bigger models
705
+ - **GPU A10G** ($3.15/hr) - Best performance
706
+ 5. Click **"Update Space"**
707
+ 6. Space will restart with new hardware (~5 minutes)
708
+
709
+ ### **Step 11.3: Use Bigger Models**
710
+
711
+ Once you have GPU:
712
+
713
+ 1. Go to Settings → Variables and secrets
714
+ 2. Edit `OLLAMA_MODEL`
715
+ 3. Change to: `llama3:latest` or `mistral:latest`
716
+ 4. Save
717
+ 5. Space will restart and download new model
718
+
719
+ ---
720
+
721
+ ## 🔒 **PART 12: Security Best Practices**
722
+
723
+ ### **Step 12.1: Change Default API Keys**
724
+
725
+ **⚠️ CRITICAL FOR PRODUCTION**
726
+
727
+ 1. Go to Space Settings → Variables
728
+ 2. Edit `API_KEYS`
729
+ 3. Replace `demo-key-1` with strong random keys:
730
+ ```
731
+ ak_live_a8f7d9e2c1b4f5a7d8e9c2b1a5f7,ak_live_b9c2d1e3f4a5b7c8d9e1f2a3b5
732
+ ```
733
+ 4. **Never share these keys publicly!**
734
+
735
+ ### **Step 12.2: Make Space Private (Optional)**
736
+
737
+ 1. Go to: `https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama/settings`
738
+ 2. Scroll to **"Rename or change repo visibility"**
739
+ 3. Click **"Make private"**
740
+ 4. Confirm
741
+
742
+ Now only you can see the Space, but the API still works for anyone with the URL and API key.
743
+
744
+ ### **Step 12.3: Monitor Usage**
745
+
746
+ Check logs regularly:
747
+ 1. Go to Space → Logs tab
748
+ 2. Look for suspicious activity:
749
+ - Many failed authentication attempts
750
+ - Unusually high request volume
751
+ - Error patterns
752
+
753
+ ---
754
+
755
+ ## 🎯 **PART 13: Using Your API in Applications**
756
+
757
+ ### **Example: JavaScript/TypeScript Web App**
758
+
759
+ ```javascript
760
+ // Save as: app.js
761
+
762
+ const API_URL = 'https://YOUR_USERNAME-ai-api-ollama.hf.space';
763
+ const API_KEY = 'my-secret-key-12345'; // Your actual key
764
+
765
+ async function chat(message) {
766
+ const response = await fetch(`${API_URL}/ai/chat`, {
767
+ method: 'POST',
768
+ headers: {
769
+ 'Authorization': `Bearer ${API_KEY}`,
770
+ 'Content-Type': 'application/json',
771
+ },
772
+ body: JSON.stringify({
773
+ conversation: [
774
+ { role: 'user', content: message }
775
+ ]
776
+ })
777
+ });
778
+
779
+ const data = await response.json();
780
+ return data.reply;
781
+ }
782
+
783
+ // Usage
784
+ chat('Hello!').then(reply => {
785
+ console.log('AI:', reply);
786
+ });
787
+ ```
788
+
789
+ ### **Example: Python Application**
790
+
791
+ ```python
792
+ # Save as: app.py
793
+
794
+ import requests
795
+
796
+ API_URL = 'https://YOUR_USERNAME-ai-api-ollama.hf.space'
797
+ API_KEY = 'my-secret-key-12345'
798
+
799
+ def chat(message):
800
+ response = requests.post(
801
+ f'{API_URL}/ai/chat',
802
+ headers={
803
+ 'Authorization': f'Bearer {API_KEY}',
804
+ 'Content-Type': 'application/json'
805
+ },
806
+ json={
807
+ 'conversation': [
808
+ {'role': 'user', 'content': message}
809
+ ]
810
+ }
811
+ )
812
+ return response.json()['reply']
813
+
814
+ # Usage
815
+ reply = chat('Hello!')
816
+ print(f'AI: {reply}')
817
+ ```
818
+
819
+ ### **Example: Mobile App (React Native)**
820
+
821
+ ```javascript
822
+ // Save as: ChatService.js
823
+
824
+ const API_URL = 'https://YOUR_USERNAME-ai-api-ollama.hf.space';
825
+ const API_KEY = 'my-secret-key-12345';
826
+
827
+ export async function sendMessage(message) {
828
+ try {
829
+ const response = await fetch(`${API_URL}/ai/chat`, {
830
+ method: 'POST',
831
+ headers: {
832
+ 'Authorization': `Bearer ${API_KEY}`,
833
+ 'Content-Type': 'application/json',
834
+ },
835
+ body: JSON.stringify({
836
+ conversation: [
837
+ { role: 'user', content: message }
838
+ ]
839
+ })
840
+ });
841
+
842
+ if (!response.ok) {
843
+ throw new Error('API request failed');
844
+ }
845
+
846
+ const data = await response.json();
847
+ return data.reply;
848
+ } catch (error) {
849
+ console.error('Chat error:', error);
850
+ throw error;
851
+ }
852
+ }
853
+ ```
854
+
855
+ ---
856
+
857
+ ## 🆘 **PART 14: Troubleshooting Common Issues**
858
+
859
+ ### **Issue 1: "Space is building for too long"**
860
+
861
+ **Symptoms**: Build takes 30+ minutes
862
+
863
+ **Causes**:
864
+ - Large model download (llama3 is 4.7GB)
865
+ - Slow internet on Hugging Face servers
866
+ - Free tier resource limits
867
+
868
+ **Solutions**:
869
+ 1. Use smaller model: `phi:latest` (1.3GB)
870
+ 2. Upgrade to GPU hardware for faster downloads
871
+ 3. Wait patiently - first build is always slow
872
+
873
+ ---
874
+
875
+ ### **Issue 2: "Space crashed / Runtime error"**
876
+
877
+ **Symptoms**: Red "Runtime error" status
878
+
879
+ **Check logs for**:
880
+
881
+ **Error**: `Out of memory`
882
+ - **Fix**: Model too big for hardware
883
+ - **Solution**: Use `phi:latest` or upgrade to GPU T4
884
+
885
+ **Error**: `Port 7860 already in use`
886
+ - **Fix**: Check README.md has correct `app_port: 7860`
887
+ - **Solution**: Edit README.md and push again
888
+
889
+ **Error**: `Ollama failed to start`
890
+ - **Fix**: Dockerfile issue
891
+ - **Solution**: Verify Dockerfile was renamed correctly
892
+
893
+ ---
894
+
895
+ ### **Issue 3: "API returns 401 Unauthorized"**
896
+
897
+ **Symptoms**:
898
+ ```json
899
+ {"error": "Invalid API key"}
900
+ ```
901
+
902
+ **Solutions**:
903
+ 1. **Check your Authorization header**:
904
+ ```bash
905
+ # Correct format:
906
+ -H "Authorization: Bearer my-secret-key-12345"
907
+
908
+ # NOT:
909
+ -H "Authorization: my-secret-key-12345" # Missing "Bearer"
910
+ ```
911
+
912
+ 2. **Verify API key is in Space settings**:
913
+ - Go to Settings → Variables
914
+ - Check `API_KEYS` contains your key
915
+ - Keys are case-sensitive!
916
+
917
+ 3. **Try the default key**:
918
+ ```bash
919
+ -H "Authorization: Bearer demo-key-1"
920
+ ```
921
+
922
+ ---
923
+
924
+ ### **Issue 4: "API is very slow (30+ seconds)"**
925
+
926
+ **Causes**:
927
+ - First request loads model into memory (normal)
928
+ - Free CPU tier is slow
929
+ - Model is too large for hardware
930
+
931
+ **Solutions**:
932
+ 1. **First request is always slow** - subsequent requests are fast
933
+ 2. **Upgrade to GPU T4**:
934
+ - Settings → Space hardware → GPU T4
935
+ - 10x faster inference
936
+ 3. **Use smaller model**: `phi:latest`
937
+ 4. **Add model warmup** (already in Dockerfile):
938
+ - Keeps model loaded
939
+ - Reduces cold start time
940
+
941
+ ---
942
+
943
+ ### **Issue 5: "Cannot upload documents"**
944
+
945
+ **Error**: `File too large`
946
+
947
+ **Fix**:
948
+ - Default max size is 10MB
949
+ - To increase, add environment variable:
950
+ ```
951
+ MAX_FILE_SIZE_MB=50
952
+ ```
953
+
954
+ **Error**: `Invalid file format`
955
+
956
+ **Fix**:
957
+ - Only supports: PDF, DOCX, TXT
958
+ - Ensure file extension is correct
959
+ - Check file is not corrupted
960
+
961
+ ---
962
+
963
+ ### **Issue 6: "RAG returns no results"**
964
+
965
+ **Symptoms**: Empty `sources` array in response
966
+
967
+ **Causes**:
968
+ 1. No documents uploaded yet
969
+ 2. Query doesn't match document content
970
+ 3. Embedding model not loaded
971
+
972
+ **Solutions**:
973
+ 1. **Upload a document first**:
974
+ ```bash
975
+ curl -X POST https://YOUR_API/upload \
976
+ -H "Authorization: Bearer YOUR_KEY" \
977
978
+ ```
979
+
980
+ 2. **Wait for processing** (check logs):
981
+ ```
982
+ Document processed successfully: doc_abc123
983
+ ```
984
+
985
+ 3. **Try broader query**:
986
+ - Instead of: "What is the exact price?"
987
+ - Try: "pricing information"
988
+
989
+ ---
990
+
991
+ ### **Issue 7: "How do I see errors?"**
992
+
993
+ **Steps**:
994
+ 1. Go to your Space
995
+ 2. Click **"Logs"** tab
996
+ 3. Look for lines with:
997
+ ```
998
+ "level": "error"
999
+ ```
1000
+ 4. Read the `"message"` field
1001
+
1002
+ **Common errors and fixes**:
1003
+
1004
+ ```json
1005
+ {"level":"error","message":"Invalid API key"}
1006
+ ```
1007
+ → Fix: Check Authorization header
1008
+
1009
+ ```json
1010
+ {"level":"error","message":"Rate limit exceeded"}
1011
+ ```
1012
+ → Fix: Wait 60 seconds or use admin key
1013
+
1014
+ ```json
1015
+ {"level":"error","message":"Ollama API error"}
1016
+ ```
1017
+ → Fix: Model not loaded, wait for startup to complete
1018
+
1019
+ ---
1020
+
1021
+ ### **Issue 8: "Space keeps restarting"**
1022
+
1023
+ **Symptoms**: Status alternates between Building and Running
1024
+
1025
+ **Causes**:
1026
+ - Application crashes on startup
1027
+ - Out of memory
1028
+ - Port configuration issue
1029
+
1030
+ **Debug steps**:
1031
+ 1. Check logs for crash reason
1032
+ 2. Verify environment variables are set
1033
+ 3. Try smaller model
1034
+ 4. Contact Hugging Face support if persistent
1035
+
1036
+ ---
1037
+
1038
+ ## 📖 **PART 15: Complete API Reference**
1039
+
1040
+ ### **Base URL**
1041
+ ```
1042
+ https://YOUR_USERNAME-ai-api-ollama.hf.space
1043
+ ```
1044
+
1045
+ ### **Authentication**
1046
+ All endpoints (except `/health`) require:
1047
+ ```
1048
+ Authorization: Bearer YOUR_API_KEY
1049
+ ```
1050
+
1051
+ ---
1052
+
1053
+ ### **1. Health Check**
1054
+
1055
+ **Endpoint**: `GET /health`
1056
+
1057
+ **No authentication required**
1058
+
1059
+ **Example**:
1060
+ ```bash
1061
+ curl https://YOUR_API/health
1062
+ ```
1063
+
1064
+ **Response**:
1065
+ ```json
1066
+ {
1067
+ "status": "healthy",
1068
+ "version": "1.0.0",
1069
+ "services": [
1070
+ {"name": "llm", "status": "up"},
1071
+ {"name": "vector_db", "status": "up"}
1072
+ ],
1073
+ "uptime_seconds": 3600
1074
+ }
1075
+ ```
1076
+
1077
+ ---
1078
+
1079
+ ### **2. Metrics**
1080
+
1081
+ **Endpoint**: `GET /metrics`
1082
+
1083
+ **Requires authentication**
1084
+
1085
+ **Example**:
1086
+ ```bash
1087
+ curl https://YOUR_API/metrics \
1088
+ -H "Authorization: Bearer YOUR_KEY"
1089
+ ```
1090
+
1091
+ **Response**:
1092
+ ```json
1093
+ {
1094
+ "timestamp": 1698765432000,
1095
+ "requests_total": 150,
1096
+ "requests_by_endpoint": {
1097
+ "/ai/chat": 100,
1098
+ "/rag/query": 50
1099
+ },
1100
+ "errors_total": 5,
1101
+ "rate_limit_hits": 2,
1102
+ "average_response_time_ms": 1250
1103
+ }
1104
+ ```
1105
+
1106
+ ---
1107
+
1108
+ ### **3. Simple Chat**
1109
+
1110
+ **Endpoint**: `POST /ai/chat`
1111
+
1112
+ **Request**:
1113
+ ```json
1114
+ {
1115
+ "conversation": [
1116
+ {"role": "user", "content": "Hello!"}
1117
+ ],
1118
+ "model": "llama2",
1119
+ "options": {
1120
+ "temperature": 0.7,
1121
+ "max_tokens": 500
1122
+ }
1123
+ }
1124
+ ```
1125
+
1126
+ **Response**:
1127
+ ```json
1128
+ {
1129
+ "reply": "Hello! How can I help you today?",
1130
+ "model": "llama2",
1131
+ "usage": {
1132
+ "prompt_tokens": 10,
1133
+ "completion_tokens": 20,
1134
+ "total_tokens": 30
1135
+ },
1136
+ "sources": null
1137
+ }
1138
+ ```
1139
+
1140
+ **Example**:
1141
+ ```bash
1142
+ curl -X POST https://YOUR_API/ai/chat \
1143
+ -H "Authorization: Bearer YOUR_KEY" \
1144
+ -H "Content-Type: application/json" \
1145
+ -d '{
1146
+ "conversation": [
1147
+ {"role": "user", "content": "Explain AI in one sentence"}
1148
+ ]
1149
+ }'
1150
+ ```
1151
+
1152
+ ---
1153
+
1154
+ ### **4. Multi-turn Conversation**
1155
+
1156
+ **Endpoint**: `POST /ai/chat`
1157
+
1158
+ **Request** (with context):
1159
+ ```json
1160
+ {
1161
+ "conversation": [
1162
+ {"role": "user", "content": "What is 2+2?"},
1163
+ {"role": "assistant", "content": "2+2 equals 4."},
1164
+ {"role": "user", "content": "What about 2+3?"}
1165
+ ]
1166
+ }
1167
+ ```
1168
+
1169
+ **Response**:
1170
+ ```json
1171
+ {
1172
+ "reply": "2+3 equals 5.",
1173
+ "model": "llama2",
1174
+ "usage": {...}
1175
+ }
1176
+ ```
1177
+
1178
+ ---
1179
+
1180
+ ### **5. RAG Query**
1181
+
1182
+ **Endpoint**: `POST /rag/query`
1183
+
1184
+ **Request**:
1185
+ ```json
1186
+ {
1187
+ "query": "What are the main features?",
1188
+ "top_k": 5,
1189
+ "model": "llama2",
1190
+ "use_retrieval": true
1191
+ }
1192
+ ```
1193
+
1194
+ **Response**:
1195
+ ```json
1196
+ {
1197
+ "answer": "The main features include...",
1198
+ "sources": [
1199
+ {
1200
+ "doc_id": "doc_123",
1201
+ "chunk_id": "chunk_5",
1202
+ "content": "Feature description...",
1203
+ "score": 0.92,
1204
+ "metadata": {"title": "Documentation"}
1205
+ }
1206
+ ],
1207
+ "model": "llama2",
1208
+ "usage": {...},
1209
+ "retrieval_time_ms": 250
1210
+ }
1211
+ ```
1212
+
1213
+ **Example**:
1214
+ ```bash
1215
+ curl -X POST https://YOUR_API/rag/query \
1216
+ -H "Authorization: Bearer YOUR_KEY" \
1217
+ -H "Content-Type: application/json" \
1218
+ -d '{
1219
+ "query": "What is machine learning?",
1220
+ "top_k": 3
1221
+ }'
1222
+ ```
1223
+
1224
+ ---
1225
+
1226
+ ### **6. Upload Document**
1227
+
1228
+ **Endpoint**: `POST /upload`
1229
+
1230
+ **Request**:
1231
+ ```json
1232
+ {
1233
+ "filename": "document.txt",
1234
+ "content_base64": "VGhpcyBpcyBhIHRlc3Q=",
1235
+ "metadata": {
1236
+ "title": "Test Document",
1237
+ "category": "docs"
1238
+ }
1239
+ }
1240
+ ```
1241
+
1242
+ **Response**:
1243
+ ```json
1244
+ {
1245
+ "doc_id": "doc_abc123",
1246
+ "filename": "document.txt",
1247
+ "size_bytes": 1024,
1248
+ "status": "processing",
1249
+ "estimated_chunks": 5
1250
+ }
1251
+ ```
1252
+
1253
+ **Example (Linux/Mac)**:
1254
+ ```bash
1255
+ # Encode file to base64
1256
+ base64 document.txt > document.b64
1257
+
1258
+ # Upload
1259
+ curl -X POST https://YOUR_API/upload \
1260
+ -H "Authorization: Bearer YOUR_KEY" \
1261
+ -H "Content-Type: application/json" \
1262
+ -d "{
1263
+ \"filename\": \"document.txt\",
1264
+ \"content_base64\": \"$(cat document.b64)\",
1265
+ \"metadata\": {\"title\": \"My Document\"}
1266
+ }"
1267
+ ```
1268
+
1269
+ ---
1270
+
1271
+ ### **7. Get Document Sources**
1272
+
1273
+ **Endpoint**: `GET /docs/:id/sources`
1274
+
1275
+ **Example**:
1276
+ ```bash
1277
+ curl https://YOUR_API/docs/doc_abc123/sources \
1278
+ -H "Authorization: Bearer YOUR_KEY"
1279
+ ```
1280
+
1281
+ **Response**:
1282
+ ```json
1283
+ {
1284
+ "sources": [
1285
+ {
1286
+ "doc_id": "doc_abc123",
1287
+ "chunk_id": "chunk_0",
1288
+ "content": "This is the first chunk...",
1289
+ "score": 1.0,
1290
+ "metadata": {...}
1291
+ }
1292
+ ]
1293
+ }
1294
+ ```
1295
+
1296
+ ---
1297
+
1298
+ ### **8. Simple Query**
1299
+
1300
+ **Endpoint**: `GET /ai/query?q=QUESTION`
1301
+
1302
+ **Example**:
1303
+ ```bash
1304
+ curl "https://YOUR_API/ai/query?q=What+is+AI" \
1305
+ -H "Authorization: Bearer YOUR_KEY"
1306
+ ```
1307
+
1308
+ **Response**:
1309
+ ```json
1310
+ {
1311
+ "answer": "AI stands for Artificial Intelligence...",
1312
+ "model": "llama2"
1313
+ }
1314
+ ```
1315
+
1316
+ ---
1317
+
1318
+ ### **9. Get Available Models**
1319
+
1320
+ **Endpoint**: `GET /rag/models`
1321
+
1322
+ **Example**:
1323
+ ```bash
1324
+ curl https://YOUR_API/rag/models \
1325
+ -H "Authorization: Bearer YOUR_KEY"
1326
+ ```
1327
+
1328
+ **Response**:
1329
+ ```json
1330
+ {
1331
+ "models": ["ollama", "llama", "llama2", "llama3", "mistral"],
1332
+ "default_model": "llama2"
1333
+ }
1334
+ ```
1335
+
1336
+ ---
1337
+
1338
+ ## 🎓 **PART 16: Advanced Tips & Tricks**
1339
+
1340
+ ### **Tip 1: Optimize Response Time**
1341
+
1342
+ **Add warmup requests** to keep model in memory:
1343
+
1344
+ Create a simple cron job or scheduled task:
1345
+ ```bash
1346
+ # Every 5 minutes, make a request to keep model loaded
1347
+ */5 * * * * curl -X POST https://YOUR_API/ai/chat \
1348
+ -H "Authorization: Bearer YOUR_KEY" \
1349
+ -H "Content-Type: application/json" \
1350
+ -d '{"conversation":[{"role":"user","content":"ping"}]}'
1351
+ ```
1352
+
1353
+ ---
1354
+
1355
+ ### **Tip 2: Use System Prompts for Consistency**
1356
+
1357
+ ```bash
1358
+ curl -X POST https://YOUR_API/ai/chat \
1359
+ -H "Authorization: Bearer YOUR_KEY" \
1360
+ -H "Content-Type: application/json" \
1361
+ -d '{
1362
+ "conversation": [
1363
+ {
1364
+ "role": "system",
1365
+ "content": "You are a friendly customer support agent. Be helpful and concise."
1366
+ },
1367
+ {
1368
+ "role": "user",
1369
+ "content": "How do I reset my password?"
1370
+ }
1371
+ ]
1372
+ }'
1373
+ ```
1374
+
1375
+ ---
1376
+
1377
+ ### **Tip 3: Batch Document Upload**
1378
+
1379
+ Upload multiple documents efficiently:
1380
+
1381
+ ```bash
1382
+ # Create script: batch_upload.sh
1383
+
1384
+ for file in docs/*.txt; do
1385
+ echo "Uploading $file..."
1386
+ base64 "$file" > temp.b64
1387
+ curl -X POST https://YOUR_API/upload \
1388
+ -H "Authorization: Bearer YOUR_KEY" \
1389
+ -H "Content-Type: application/json" \
1390
+ -d "{
1391
+ \"filename\": \"$(basename $file)\",
1392
+ \"content_base64\": \"$(cat temp.b64)\"
1393
+ }"
1394
+ sleep 2 # Rate limiting
1395
+ done
1396
+
1397
+ rm temp.b64
1398
+ ```
1399
+
1400
+ ---
1401
+
1402
+ ### **Tip 4: Monitor Costs**
1403
+
1404
+ If using paid hardware:
1405
+
1406
+ 1. Check Hugging Face billing: https://huggingface.co/settings/billing
1407
+ 2. Set up budget alerts
1408
+ 3. Monitor Space uptime
1409
+ 4. Pause Space when not in use:
1410
+ - Settings → "Pause Space"
1411
+ - Saves money, stops billing
1412
+ - Resume anytime
1413
+
1414
+ ---
1415
+
1416
+ ### **Tip 5: Create API Key Tiers**
1417
+
1418
+ **In Space Settings**, set up different keys for different users:
1419
+
1420
+ ```
1421
+ # Free tier - limited rate
1422
+ API_KEYS=free_user_key_1,free_user_key_2
1423
+
1424
+ # Premium tier - higher rate
1425
+ PREMIUM_API_KEYS=premium_user_key_1
1426
+
1427
+ # Admin tier - unlimited
1428
+ ADMIN_API_KEYS=admin_key_1
1429
+ ```
1430
+
1431
+ Then adjust rate limits:
1432
+ ```
1433
+ RATE_LIMIT_DEFAULT=60
1434
+ RATE_LIMIT_PREMIUM=300
1435
+ RATE_LIMIT_ADMIN=10000
1436
+ ```
1437
+
1438
+ ---
1439
+
1440
+ ## ✅ **Final Checklist**
1441
+
1442
+ Before going live, verify:
1443
+
1444
+ - [ ] Space is running (green status)
1445
+ - [ ] Health check returns `"status": "healthy"`
1446
+ - [ ] Chat endpoint responds correctly
1447
+ - [ ] Changed default API keys to strong random strings
1448
+ - [ ] Tested with your own API key
1449
+ - [ ] Documented your API keys securely (password manager)
1450
+ - [ ] Set appropriate rate limits
1451
+ - [ ] Chose right model for your hardware
1452
+ - [ ] Tested all endpoints you plan to use
1453
+ - [ ] Reviewed logs for errors
1454
+ - [ ] (Optional) Upgraded hardware if needed
1455
+ - [ ] (Optional) Made Space private if needed
1456
+
1457
+ ---
1458
+
1459
+ ## 🎉 **Congratulations!**
1460
+
1461
+ You now have:
1462
+ ✅ A fully functional AI API running on Hugging Face Spaces
1463
+ ✅ Powered by Ollama (no OpenAI costs!)
1464
+ ✅ Accessible from anywhere via HTTPS
1465
+ ✅ Secure with API key authentication
1466
+ ✅ Ready to integrate into your apps
1467
+
1468
+ **Your API URL**:
1469
+ ```
1470
+ https://YOUR_USERNAME-ai-api-ollama.hf.space
1471
+ ```
1472
+
1473
+ **Share your API** (securely):
1474
+ - Give URL + API key to developers
1475
+ - Use in web apps, mobile apps, scripts
1476
+ - Process millions of requests
1477
+ - Scale as needed
1478
+
1479
+ ---
1480
+
1481
+ ## 📞 **Need Help?**
1482
+
1483
+ **If you're stuck**:
1484
+ 1. ✅ Re-read the relevant section
1485
+ 2. ✅ Check Space logs for errors
1486
+ 3. ✅ Try the troubleshooting section
1487
+ 4. ✅ Open an issue on GitHub
1488
+ 5. ✅ Ask on Hugging Face forums
1489
+
1490
+ **Common beginner mistakes**:
1491
+ - Forgot to rename `Dockerfile.huggingface` to `Dockerfile`
1492
+ - Used wrong API key format (missing "Bearer")
1493
+ - Chose model too large for hardware
1494
+ - Didn't wait for initial model download
1495
+
1496
+ ---
1497
+
1498
+ ## 📚 **What's Next?**
1499
+
1500
+ Now that your API is live:
1501
+
1502
+ 1. **Build a chat interface**:
1503
+ - React app
1504
+ - Vue app
1505
+ - Mobile app
1506
+ - WordPress plugin
1507
+
1508
+ 2. **Add more features**:
1509
+ - User accounts
1510
+ - Usage analytics
1511
+ - Custom models
1512
+ - Advanced RAG
1513
+
1514
+ 3. **Scale up**:
1515
+ - Upgrade hardware
1516
+ - Add caching
1517
+ - Load balancing
1518
+ - CDN
1519
+
1520
+ 4. **Monetize** (optional):
1521
+ - Charge for API access
1522
+ - Offer different tiers
1523
+ - White-label for clients
1524
+
1525
+ ---
1526
+
1527
+ **You did it! 🎉🚀**
1528
+
1529
+ Your AI-powered API is now live and ready to change the world!
DEPLOYMENT.md ADDED
@@ -0,0 +1,435 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deployment Guide
2
+
3
+ This guide covers deploying the AI API Service to various platforms.
4
+
5
+ ## Table of Contents
6
+ - [Local Development](#local-development)
7
+ - [Docker Deployment](#docker-deployment)
8
+ - [Encore Cloud](#encore-cloud)
9
+ - [Hugging Face Spaces](#hugging-face-spaces)
10
+ - [AWS Deployment](#aws-deployment)
11
+ - [Google Cloud Platform](#google-cloud-platform)
12
+ - [Azure Deployment](#azure-deployment)
13
+ - [Environment Variables](#environment-variables)
14
+
15
+ ## Local Development
16
+
17
+ ### Prerequisites
18
+ - Node.js 18+
19
+ - npm or yarn
20
+ - Encore CLI
21
+
22
+ ### Steps
23
+
24
+ 1. **Install Encore CLI**
25
+ ```bash
26
+ npm install -g encore
27
+ ```
28
+
29
+ 2. **Install dependencies**
30
+ ```bash
31
+ npm install
32
+ ```
33
+
34
+ 3. **Configure environment**
35
+ ```bash
36
+ cp .env.example .env
37
+ # Edit .env with your API keys
38
+ ```
39
+
40
+ 4. **Run development server**
41
+ ```bash
42
+ encore run
43
+ ```
44
+
45
+ The API will be available at `http://localhost:8000`
46
+
47
+ ## Docker Deployment
48
+
49
+ ### Build and Run Locally
50
+
51
+ ```bash
52
+ docker-compose up -d
53
+ ```
54
+
55
+ This starts:
56
+ - API service on port 8000
57
+ - Redis for caching (optional)
58
+
59
+ ### Build Production Image
60
+
61
+ ```bash
62
+ docker build -t ai-api-service:latest .
63
+ ```
64
+
65
+ ### Run Production Container
66
+
67
+ ```bash
68
+ docker run -d \
69
+ -p 8000:8000 \
70
+ -e OPENAI_API_KEY=your_key \
71
+ -e API_KEYS=your_api_keys \
72
+ --name ai-api \
73
+ ai-api-service:latest
74
+ ```
75
+
76
+ ## Encore Cloud
77
+
78
+ Encore Cloud provides the easiest deployment experience with automatic infrastructure provisioning.
79
+
80
+ ### Steps
81
+
82
+ 1. **Install Encore CLI**
83
+ ```bash
84
+ npm install -g encore
85
+ ```
86
+
87
+ 2. **Login to Encore**
88
+ ```bash
89
+ encore auth login
90
+ ```
91
+
92
+ 3. **Create app (first time)**
93
+ ```bash
94
+ encore app create ai-api-service
95
+ ```
96
+
97
+ 4. **Set secrets**
98
+ ```bash
99
+ encore secret set OPENAI_API_KEY
100
+ encore secret set HUGGINGFACE_API_KEY
101
+ encore secret set PINECONE_API_KEY
102
+ ```
103
+
104
+ 5. **Deploy**
105
+ ```bash
106
+ encore deploy
107
+ ```
108
+
109
+ Your API will be deployed with:
110
+ - Auto-scaling
111
+ - Load balancing
112
+ - SSL/TLS certificates
113
+ - Monitoring and logs
114
+ - Database backups
115
+
116
+ ## Hugging Face Spaces
117
+
118
+ Deploy as a Docker Space on Hugging Face for easy sharing.
119
+
120
+ ### Steps
121
+
122
+ 1. **Create new Space**
123
+ - Go to https://huggingface.co/new-space
124
+ - Select "Docker" as SDK
125
+ - Choose hardware tier (CPU or GPU)
126
+
127
+ 2. **Clone Space repository**
128
+ ```bash
129
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE
130
+ cd YOUR_SPACE
131
+ ```
132
+
133
+ 3. **Copy project files**
134
+ ```bash
135
+ cp -r /path/to/ai-api-service/* .
136
+ ```
137
+
138
+ 4. **Create Dockerfile for HF Spaces**
139
+ ```dockerfile
140
+ FROM node:18-alpine
141
+
142
+ WORKDIR /app
143
+
144
+ COPY package*.json ./
145
+ RUN npm ci --only=production
146
+
147
+ COPY . .
148
+
149
+ ENV PORT=7860
150
+ EXPOSE 7860
151
+
152
+ CMD ["npm", "start"]
153
+ ```
154
+
155
+ 5. **Configure secrets in Space settings**
156
+ - `OPENAI_API_KEY`
157
+ - `HUGGINGFACE_API_KEY`
158
+ - `API_KEYS`
159
+
160
+ 6. **Push to Space**
161
+ ```bash
162
+ git add .
163
+ git commit -m "Initial deployment"
164
+ git push
165
+ ```
166
+
167
+ ## AWS Deployment
168
+
169
+ ### Using AWS ECS (Elastic Container Service)
170
+
171
+ 1. **Push image to ECR**
172
+ ```bash
173
+ aws ecr create-repository --repository-name ai-api-service
174
+
175
+ docker build -t ai-api-service .
176
+
177
+ aws ecr get-login-password --region us-east-1 | \
178
+ docker login --username AWS --password-stdin \
179
+ YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com
180
+
181
+ docker tag ai-api-service:latest \
182
+ YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com/ai-api-service:latest
183
+
184
+ docker push YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com/ai-api-service:latest
185
+ ```
186
+
187
+ 2. **Create ECS Task Definition**
188
+ ```json
189
+ {
190
+ "family": "ai-api-service",
191
+ "networkMode": "awsvpc",
192
+ "requiresCompatibilities": ["FARGATE"],
193
+ "cpu": "1024",
194
+ "memory": "2048",
195
+ "containerDefinitions": [{
196
+ "name": "ai-api",
197
+ "image": "YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com/ai-api-service:latest",
198
+ "portMappings": [{
199
+ "containerPort": 8000,
200
+ "protocol": "tcp"
201
+ }],
202
+ "environment": [],
203
+ "secrets": [{
204
+ "name": "OPENAI_API_KEY",
205
+ "valueFrom": "arn:aws:secretsmanager:us-east-1:ACCOUNT:secret:openai-api-key"
206
+ }]
207
+ }]
208
+ }
209
+ ```
210
+
211
+ 3. **Create ECS Service with ALB**
212
+ - Configure Application Load Balancer
213
+ - Set up target group (port 8000)
214
+ - Configure auto-scaling
215
+ - Add health checks
216
+
217
+ ### Using AWS Lambda (API Gateway)
218
+
219
+ For serverless deployment, wrap endpoints with AWS Lambda handlers.
220
+
221
+ ## Google Cloud Platform
222
+
223
+ ### Using Cloud Run
224
+
225
+ 1. **Build and push to GCR**
226
+ ```bash
227
+ gcloud builds submit --tag gcr.io/PROJECT_ID/ai-api-service
228
+
229
+ gcloud run deploy ai-api-service \
230
+ --image gcr.io/PROJECT_ID/ai-api-service \
231
+ --platform managed \
232
+ --region us-central1 \
233
+ --allow-unauthenticated \
234
+ --set-env-vars OPENAI_API_KEY=your_key
235
+ ```
236
+
237
+ 2. **Configure secrets**
238
+ ```bash
239
+ echo -n "your_openai_key" | \
240
+ gcloud secrets create openai-api-key --data-file=-
241
+
242
+ gcloud run services update ai-api-service \
243
+ --update-secrets OPENAI_API_KEY=openai-api-key:latest
244
+ ```
245
+
246
+ ### Using GKE (Kubernetes)
247
+
248
+ 1. **Create cluster**
249
+ ```bash
250
+ gcloud container clusters create ai-api-cluster \
251
+ --num-nodes=3 \
252
+ --machine-type=n1-standard-2
253
+ ```
254
+
255
+ 2. **Deploy application**
256
+ ```bash
257
+ kubectl apply -f k8s/deployment.yaml
258
+ kubectl apply -f k8s/service.yaml
259
+ kubectl apply -f k8s/ingress.yaml
260
+ ```
261
+
262
+ ## Azure Deployment
263
+
264
+ ### Using Azure Container Instances
265
+
266
+ ```bash
267
+ az container create \
268
+ --resource-group ai-api-rg \
269
+ --name ai-api-service \
270
+ --image your-registry.azurecr.io/ai-api-service:latest \
271
+ --cpu 2 \
272
+ --memory 4 \
273
+ --ports 8000 \
274
+ --environment-variables \
275
+ PORT=8000 \
276
+ --secure-environment-variables \
277
+ OPENAI_API_KEY=your_key \
278
+ API_KEYS=demo-key-1
279
+ ```
280
+
281
+ ### Using Azure App Service
282
+
283
+ 1. **Create App Service Plan**
284
+ ```bash
285
+ az appservice plan create \
286
+ --name ai-api-plan \
287
+ --resource-group ai-api-rg \
288
+ --is-linux \
289
+ --sku B1
290
+ ```
291
+
292
+ 2. **Create Web App**
293
+ ```bash
294
+ az webapp create \
295
+ --resource-group ai-api-rg \
296
+ --plan ai-api-plan \
297
+ --name ai-api-service \
298
+ --deployment-container-image-name your-registry.azurecr.io/ai-api-service:latest
299
+ ```
300
+
301
+ 3. **Configure settings**
302
+ ```bash
303
+ az webapp config appsettings set \
304
+ --resource-group ai-api-rg \
305
+ --name ai-api-service \
306
+ --settings \
307
+ [email protected](SecretUri=...)
308
+ ```
309
+
310
+ ## Environment Variables
311
+
312
+ ### Required Variables
313
+
314
+ | Variable | Description | Example |
315
+ |----------|-------------|---------|
316
+ | `API_KEYS` | Comma-separated API keys | `key1,key2,key3` |
317
+ | `OPENAI_API_KEY` | OpenAI API key (or alternative) | `sk-...` |
318
+
319
+ ### Optional Variables
320
+
321
+ | Variable | Description | Default |
322
+ |----------|-------------|---------|
323
+ | `HUGGINGFACE_API_KEY` | HuggingFace API key | - |
324
+ | `ANTHROPIC_API_KEY` | Anthropic API key | - |
325
+ | `PINECONE_API_KEY` | Pinecone vector DB key | - |
326
+ | `RATE_LIMIT_DEFAULT` | Requests/min for default tier | `60` |
327
+ | `RATE_LIMIT_ADMIN` | Requests/min for admin tier | `1000` |
328
+ | `LOG_LEVEL` | Logging level | `info` |
329
+ | `MAX_FILE_SIZE_MB` | Max upload size in MB | `10` |
330
+
331
+ ### Setting Secrets
332
+
333
+ **Encore Cloud:**
334
+ ```bash
335
+ encore secret set OPENAI_API_KEY
336
+ ```
337
+
338
+ **Docker:**
339
+ ```bash
340
+ docker run -e OPENAI_API_KEY=your_key ...
341
+ ```
342
+
343
+ **Kubernetes:**
344
+ ```bash
345
+ kubectl create secret generic api-secrets \
346
+ --from-literal=OPENAI_API_KEY=your_key
347
+ ```
348
+
349
+ **AWS Secrets Manager:**
350
+ ```bash
351
+ aws secretsmanager create-secret \
352
+ --name openai-api-key \
353
+ --secret-string your_key
354
+ ```
355
+
356
+ ## Monitoring
357
+
358
+ ### Health Checks
359
+
360
+ Configure health check endpoint:
361
+ ```
362
+ GET /health
363
+ ```
364
+
365
+ Expected response:
366
+ ```json
367
+ {
368
+ "status": "healthy",
369
+ "version": "1.0.0",
370
+ "services": [...]
371
+ }
372
+ ```
373
+
374
+ ### Metrics
375
+
376
+ Access metrics at:
377
+ ```
378
+ GET /metrics
379
+ ```
380
+
381
+ ### Logging
382
+
383
+ Logs are output as structured JSON:
384
+ ```json
385
+ {
386
+ "timestamp": "2025-10-01T12:00:00Z",
387
+ "level": "info",
388
+ "message": "Request processed",
389
+ "duration_ms": 245
390
+ }
391
+ ```
392
+
393
+ ## Scaling Recommendations
394
+
395
+ ### Horizontal Scaling
396
+ - Start with 2-3 replicas
397
+ - Auto-scale based on CPU (70% threshold)
398
+ - Use load balancer for distribution
399
+
400
+ ### Vertical Scaling
401
+ - Minimum: 1 CPU, 2GB RAM
402
+ - Recommended: 2 CPU, 4GB RAM
403
+ - High traffic: 4 CPU, 8GB RAM
404
+
405
+ ### Database Scaling
406
+ - Use Pinecone for production vector storage
407
+ - Implement Redis for caching
408
+ - Consider read replicas for high traffic
409
+
410
+ ## Troubleshooting
411
+
412
+ ### Common Issues
413
+
414
+ **"No LLM adapter available"**
415
+ - Check that at least one API key is set (OpenAI, HuggingFace, or Anthropic)
416
+
417
+ **"Rate limit exceeded"**
418
+ - Increase rate limits in environment variables
419
+ - Use admin API key for testing
420
+
421
+ **"Vector DB connection failed"**
422
+ - Service falls back to in-memory storage
423
+ - Check Pinecone credentials
424
+
425
+ **High latency**
426
+ - Enable caching (Redis)
427
+ - Use closer region for APIs
428
+ - Optimize model selection
429
+
430
+ ## Support
431
+
432
+ For deployment assistance:
433
+ - GitHub Issues
434
+ - Documentation at docs/
435
+ - Community Discord
DEVELOPMENT.md ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Getting Started
2
+
3
+ This project consists of an Encore application. Follow the steps below to get the app running locally.
4
+
5
+ ## Prerequisites
6
+
7
+ If this is your first time using Encore, you need to install the CLI that runs the local development environment. Use the appropriate command for your system:
8
+
9
+ - **macOS:** `brew install encoredev/tap/encore`
10
+ - **Linux:** `curl -L https://encore.dev/install.sh | bash`
11
+ - **Windows:** `iwr https://encore.dev/install.ps1 | iex`
12
+
13
+ You also need to have bun installed for package management. If you don't have bun installed, you can install it by running:
14
+
15
+ ```bash
16
+ npm install -g bun
17
+ ```
18
+
19
+ ## Running the Application
20
+
21
+ ### Backend Setup
22
+
23
+ 1. Navigate to the backend directory:
24
+ ```bash
25
+ cd backend
26
+ ```
27
+
28
+ 2. Start the Encore development server:
29
+ ```bash
30
+ encore run
31
+ ```
32
+
33
+ The backend will be available at the URL shown in your terminal (typically `http://localhost:4000`).
34
+
35
+
36
+
37
+
38
+
39
+ ## Deployment
40
+
41
+ ### Self-hosting
42
+ See the [self-hosting instructions](https://encore.dev/docs/self-host/docker-build) for how to use encore build docker to create a Docker image and
43
+ configure it.
44
+
45
+ ### Encore Cloud Platform
46
+
47
+ #### Step 1: Login to your Encore Cloud Account
48
+
49
+ Before deploying, ensure you have authenticated the Encore CLI with your Encore account (same as your Leap account)
50
+
51
+ ```bash
52
+ encore auth login
53
+ ```
54
+
55
+ #### Step 2: Set Up Git Remote
56
+
57
+ Add Encore's git remote to enable direct deployment:
58
+
59
+ ```bash
60
+ git remote add encore encore://scalable-ai-api-service-ysyi
61
+ ```
62
+
63
+ #### Step 3: Deploy Your Application
64
+
65
+ Deploy by pushing your code:
66
+
67
+ ```bash
68
+ git add -A .
69
+ git commit -m "Deploy to Encore Cloud"
70
+ git push encore
71
+ ```
72
+
73
+ Monitor your deployment progress in the [Encore Cloud dashboard](https://app.encore.dev/scalable-ai-api-service-ysyi/deploys).
74
+
75
+ ## GitHub Integration (Recommended for Production)
76
+
77
+ For production applications, we recommend integrating with GitHub instead of using Encore's managed git:
78
+
79
+ ### Connecting Your GitHub Account
80
+
81
+ 1. Open your app in the **Encore Cloud dashboard**
82
+ 2. Navigate to Encore Cloud [GitHub Integration settings](https://app.encore.cloud/scalable-ai-api-service-ysyi/settings/integrations/github)
83
+ 3. Click **Connect Account to GitHub**
84
+ 4. Grant access to your repository
85
+
86
+ Once connected, pushing to your GitHub repository will automatically trigger deployments. Encore Cloud Pro users also get Preview Environments for each pull request.
87
+
88
+ ### Deploy via GitHub
89
+
90
+ After connecting GitHub, deploy by pushing to your repository:
91
+
92
+ ```bash
93
+ git add -A .
94
+ git commit -m "Deploy via GitHub"
95
+ git push origin main
96
+ ```
97
+
98
+ ## Additional Resources
99
+
100
+ - [Encore Documentation](https://encore.dev/docs)
101
+ - [Deployment Guide](https://encore.dev/docs/platform/deploy/deploying)
102
+ - [GitHub Integration](https://encore.dev/docs/platform/integrations/github)
103
+ - [Encore Cloud Dashboard](https://app.encore.dev)
104
+
105
+
106
+
Dockerfile ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM node:18-alpine AS builder
2
+
3
+ WORKDIR /app
4
+
5
+ COPY package*.json ./
6
+ RUN npm ci
7
+
8
+ COPY . .
9
+ RUN npm run build || echo "Build will happen on startup"
10
+
11
+ FROM node:18
12
+
13
+ WORKDIR /app
14
+
15
+ RUN apt-get update && apt-get install -y curl && \
16
+ curl -fsSL https://ollama.com/install.sh | sh && \
17
+ apt-get clean && rm -rf /var/lib/apt/lists/*
18
+
19
+ COPY --from=builder /app ./
20
+ RUN npm ci --only=production
21
+
22
+ ENV PORT=7860
23
+ ENV NODE_ENV=production
24
+ ENV OLLAMA_BASE_URL=http://localhost:11434
25
+ ENV OLLAMA_MODEL=llama2
26
+ ENV OLLAMA_EMBEDDING_MODEL=nomic-embed-text
27
+ ENV API_KEYS=demo-key-1,demo-key-2
28
+ ENV RATE_LIMIT_DEFAULT=100
29
+ ENV RATE_LIMIT_ADMIN=1000
30
+ ENV LOG_LEVEL=info
31
+ ENV ENABLE_BACKGROUND_WORKERS=true
32
+ ENV OLLAMA_MODELS=/data/ollama-models
33
+
34
+ EXPOSE 7860
35
+
36
+ RUN echo '#!/bin/bash\n\
37
+ set -e\n\
38
+ \n\
39
+ echo "=== Starting AI API Service with Ollama ==="\n\
40
+ \n\
41
+ ollama serve &\n\
42
+ OLLAMA_PID=$!\n\
43
+ echo "Ollama started with PID $OLLAMA_PID"\n\
44
+ \n\
45
+ echo "Waiting for Ollama to be ready..."\n\
46
+ for i in {1..30}; do\n\
47
+ if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then\n\
48
+ echo "Ollama is ready!"\n\
49
+ break\n\
50
+ fi\n\
51
+ echo "Waiting... ($i/30)"\n\
52
+ sleep 2\n\
53
+ done\n\
54
+ \n\
55
+ echo "Pulling Ollama model: $OLLAMA_MODEL"\n\
56
+ ollama pull $OLLAMA_MODEL || echo "Warning: Model pull failed, will retry on first request"\n\
57
+ \n\
58
+ if [ "$OLLAMA_EMBEDDING_MODEL" != "$OLLAMA_MODEL" ]; then\n\
59
+ echo "Pulling embedding model: $OLLAMA_EMBEDDING_MODEL"\n\
60
+ ollama pull $OLLAMA_EMBEDDING_MODEL || echo "Warning: Embedding model pull failed"\n\
61
+ fi\n\
62
+ \n\
63
+ echo "Warming up model..."\n\
64
+ timeout 30s ollama run $OLLAMA_MODEL "Hi" > /dev/null 2>&1 || echo "Warmup completed"\n\
65
+ \n\
66
+ echo "Starting AI API Service on port $PORT..."\n\
67
+ echo "Available models: $(ollama list)"\n\
68
+ \n\
69
+ exec node .encore/build/backend/main.js || exec npm start\n\
70
+ ' > /app/start.sh && chmod +x /app/start.sh
71
+
72
+ VOLUME /data
73
+
74
+ CMD ["/app/start.sh"]
HUGGINGFACE_OLLAMA_DEPLOY.md ADDED
@@ -0,0 +1,423 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deploying AI API Service to Hugging Face Spaces with Ollama
2
+
3
+ This guide shows you how to deploy the AI API service to Hugging Face Spaces using Ollama as your LLM backend (no API keys needed!).
4
+
5
+ ## Why Ollama on Hugging Face Spaces?
6
+
7
+ ✅ **No API costs** - Run models locally in your Space
8
+ ✅ **Privacy** - Data stays within your Space
9
+ ✅ **Model choice** - Use Llama 2, Llama 3, Mistral, Phi, Gemma, etc.
10
+ ✅ **No rate limits** - Only limited by Space hardware
11
+ ✅ **Full control** - Customize models and parameters
12
+
13
+ ## Prerequisites
14
+
15
+ - Hugging Face account (free)
16
+ - Basic knowledge of Git
17
+
18
+ ## Step-by-Step Deployment
19
+
20
+ ### 1. Create a New Space
21
+
22
+ 1. Go to https://huggingface.co/new-space
23
+ 2. Choose:
24
+ - **Name**: `ai-api-ollama` (or your preferred name)
25
+ - **License**: MIT
26
+ - **SDK**: Docker
27
+ - **Hardware**:
28
+ - **CPU Basic (free)**: Works for small models (phi, gemma:2b)
29
+ - **CPU Upgrade ($0.60/hr)**: Better for medium models (llama2, mistral)
30
+ - **GPU T4 ($0.60/hr)**: Recommended for fast inference
31
+ - **GPU A10G ($3.15/hr)**: For large models (llama3:70b)
32
+ 3. Click **Create Space**
33
+
34
+ ### 2. Clone Your Space Repository
35
+
36
+ ```bash
37
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama
38
+ cd ai-api-ollama
39
+ ```
40
+
41
+ ### 3. Copy Project Files
42
+
43
+ Copy all files from this project to your Space directory:
44
+
45
+ ```bash
46
+ # From the ai-api-service directory
47
+ cp -r backend examples tests *.md *.json *.yml .dockerignore .env.example ../ai-api-ollama/
48
+ ```
49
+
50
+ ### 4. Create Hugging Face Space Dockerfile
51
+
52
+ Create a new `Dockerfile` optimized for Hugging Face Spaces with Ollama:
53
+
54
+ ```dockerfile
55
+ FROM node:18-alpine AS builder
56
+
57
+ WORKDIR /app
58
+
59
+ # Copy package files
60
+ COPY package*.json ./
61
+ RUN npm ci
62
+
63
+ # Copy source code
64
+ COPY . .
65
+
66
+ # Build the application
67
+ RUN npm run build || echo "Build step skipped - Encore will build on startup"
68
+
69
+ # Production stage with Ollama
70
+ FROM node:18
71
+
72
+ WORKDIR /app
73
+
74
+ # Install Ollama
75
+ RUN curl -fsSL https://ollama.com/install.sh | sh
76
+
77
+ # Copy built application
78
+ COPY --from=builder /app ./
79
+
80
+ # Install production dependencies
81
+ RUN npm ci --only=production
82
+
83
+ # Set environment variables for Hugging Face Spaces
84
+ ENV PORT=7860
85
+ ENV OLLAMA_BASE_URL=http://localhost:11434
86
+ ENV OLLAMA_MODEL=llama2
87
+ ENV OLLAMA_EMBEDDING_MODEL=nomic-embed-text
88
+ ENV API_KEYS=demo-key-1,demo-key-2
89
+ ENV RATE_LIMIT_DEFAULT=60
90
+ ENV RATE_LIMIT_ADMIN=1000
91
+ ENV LOG_LEVEL=info
92
+ ENV ENABLE_BACKGROUND_WORKERS=true
93
+
94
+ EXPOSE 7860
95
+
96
+ # Create startup script
97
+ RUN echo '#!/bin/bash\n\
98
+ # Start Ollama in background\n\
99
+ ollama serve &\n\
100
+ OLLAMA_PID=$!\n\
101
+ \n\
102
+ # Wait for Ollama to start\n\
103
+ echo "Waiting for Ollama to start..."\n\
104
+ sleep 5\n\
105
+ \n\
106
+ # Pull the model\n\
107
+ echo "Pulling Ollama model: $OLLAMA_MODEL"\n\
108
+ ollama pull $OLLAMA_MODEL || echo "Model pull failed, will try on first request"\n\
109
+ \n\
110
+ # Pull embedding model if different\n\
111
+ if [ "$OLLAMA_EMBEDDING_MODEL" != "$OLLAMA_MODEL" ]; then\n\
112
+ echo "Pulling embedding model: $OLLAMA_EMBEDDING_MODEL"\n\
113
+ ollama pull $OLLAMA_EMBEDDING_MODEL || echo "Embedding model pull failed"\n\
114
+ fi\n\
115
+ \n\
116
+ # Start the API service\n\
117
+ echo "Starting AI API Service on port $PORT..."\n\
118
+ node .encore/build/backend/main.js || npm start\n\
119
+ ' > /app/start.sh && chmod +x /app/start.sh
120
+
121
+ CMD ["/app/start.sh"]
122
+ ```
123
+
124
+ ### 5. Configure Environment Variables in Space Settings
125
+
126
+ In your Space settings on Hugging Face:
127
+
128
+ 1. Go to **Settings** → **Variables and secrets**
129
+ 2. Add these environment variables:
130
+
131
+ | Variable | Value | Description |
132
+ |----------|-------|-------------|
133
+ | `API_KEYS` | `your-secret-key-here` | Comma-separated API keys for authentication |
134
+ | `ADMIN_API_KEYS` | `admin-key-here` | Admin-level API keys (optional) |
135
+ | `OLLAMA_MODEL` | `llama2` | Default: llama2, or use llama3, mistral, phi, gemma |
136
+ | `OLLAMA_EMBEDDING_MODEL` | `nomic-embed-text` | Embedding model for RAG |
137
+ | `RATE_LIMIT_DEFAULT` | `100` | Requests per minute for default users |
138
+
139
+ **Recommended Models by Hardware:**
140
+
141
+ | Hardware | Recommended Model | Speed | Quality |
142
+ |----------|------------------|-------|---------|
143
+ | CPU Basic | `phi:latest` or `gemma:2b` | Fast | Good |
144
+ | CPU Upgrade | `llama2:latest` or `mistral:latest` | Medium | Better |
145
+ | GPU T4 | `llama3:latest` | Fast | Excellent |
146
+ | GPU A10G | `llama3:70b` | Medium | Best |
147
+
148
+ ### 6. Create README.md for Your Space
149
+
150
+ Create a `README.md` in your Space root:
151
+
152
+ ```markdown
153
+ ---
154
+ title: AI API Service with Ollama
155
+ emoji: 🤖
156
+ colorFrom: blue
157
+ colorTo: purple
158
+ sdk: docker
159
+ pinned: false
160
+ ---
161
+
162
+ # AI API Service with Ollama
163
+
164
+ Production-ready AI API with chat, RAG, image generation, and voice synthesis.
165
+
166
+ ## Features
167
+
168
+ - 💬 Multi-turn chat conversations
169
+ - 📚 RAG (Retrieval-Augmented Generation)
170
+ - 🖼️ Image generation
171
+ - 🎙️ Voice synthesis
172
+ - 📄 Document ingestion
173
+ - 🔒 API key authentication
174
+ - ⚡ Rate limiting
175
+
176
+ ## Quick Start
177
+
178
+ ### API Documentation
179
+
180
+ Base URL: `https://YOUR_USERNAME-ai-api-ollama.hf.space`
181
+
182
+ ### Example Request
183
+
184
+ ```bash
185
+ curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/ai/chat \
186
+ -H "Authorization: Bearer demo-key-1" \
187
+ -H "Content-Type: application/json" \
188
+ -d '{
189
+ "conversation": [
190
+ {"role": "user", "content": "Hello! How are you?"}
191
+ ]
192
+ }'
193
+ ```
194
+
195
+ ### Available Endpoints
196
+
197
+ - `GET /health` - Health check
198
+ - `POST /ai/chat` - Chat conversation
199
+ - `POST /rag/query` - Query with retrieval
200
+ - `POST /image/generate` - Generate images
201
+ - `POST /voice/synthesize` - Text to speech
202
+ - `POST /upload` - Upload documents
203
+
204
+ See full API documentation in the repository.
205
+
206
+ ## Using Your Own API Key
207
+
208
+ Replace `demo-key-1` with your configured API key from Space settings.
209
+
210
+ ## Local Development
211
+
212
+ See [QUICKSTART.md](QUICKSTART.md) for local setup instructions.
213
+ ```
214
+
215
+ ### 7. Push to Hugging Face
216
+
217
+ ```bash
218
+ git add .
219
+ git commit -m "Initial deployment with Ollama"
220
+ git push
221
+ ```
222
+
223
+ ### 8. Wait for Build
224
+
225
+ - Hugging Face will automatically build your Docker image
226
+ - This takes 5-10 minutes for first build
227
+ - Watch the **Logs** tab for progress
228
+ - Initial startup will download the Ollama model (2-5 minutes depending on model size)
229
+
230
+ ### 9. Test Your Deployment
231
+
232
+ Once the Space is running:
233
+
234
+ ```bash
235
+ # Replace YOUR_USERNAME with your Hugging Face username
236
+ SPACE_URL="https://YOUR_USERNAME-ai-api-ollama.hf.space"
237
+
238
+ # Health check
239
+ curl $SPACE_URL/health
240
+
241
+ # Chat request
242
+ curl -X POST $SPACE_URL/ai/chat \
243
+ -H "Authorization: Bearer demo-key-1" \
244
+ -H "Content-Type: application/json" \
245
+ -d '{
246
+ "conversation": [
247
+ {"role": "user", "content": "Tell me a joke about AI"}
248
+ ]
249
+ }'
250
+ ```
251
+
252
+ ## Optimizations for Hugging Face Spaces
253
+
254
+ ### 1. Reduce Model Download Time
255
+
256
+ Pre-download models in Dockerfile:
257
+
258
+ ```dockerfile
259
+ RUN ollama pull llama2 && \
260
+ ollama pull nomic-embed-text
261
+ ```
262
+
263
+ ### 2. Use Smaller Models for Free Tier
264
+
265
+ ```env
266
+ OLLAMA_MODEL=phi:latest
267
+ ```
268
+
269
+ Phi is only 1.3GB vs Llama2's 4GB.
270
+
271
+ ### 3. Enable Persistent Storage
272
+
273
+ Hugging Face Spaces have persistent storage in `/data`:
274
+
275
+ ```dockerfile
276
+ # Add to Dockerfile
277
+ VOLUME /data
278
+ ENV OLLAMA_MODELS=/data/ollama-models
279
+ ```
280
+
281
+ This prevents re-downloading models on restart.
282
+
283
+ ### 4. Optimize for Cold Starts
284
+
285
+ Add model warmup in startup script:
286
+
287
+ ```bash
288
+ # Add to start.sh
289
+ echo "Warming up model..."
290
+ ollama run $OLLAMA_MODEL "Hello" --timeout 10s
291
+ ```
292
+
293
+ ## Cost Comparison
294
+
295
+ | Option | Cost | Pros | Cons |
296
+ |--------|------|------|------|
297
+ | **Free CPU** | $0 | Free! | Slow inference, small models only |
298
+ | **CPU Upgrade** | $0.60/hr (~$432/mo) | Better performance | Still slower than GPU |
299
+ | **GPU T4** | $0.60/hr (~$432/mo) | Fast inference | Limited for huge models |
300
+ | **OpenAI API** | Pay per token | No hosting, fast | Ongoing costs, data sent to OpenAI |
301
+ | **Self-hosted** | VPS costs | Full control | Maintenance required |
302
+
303
+ **Recommendation**: Start with **Free CPU + Phi** for testing, upgrade to **GPU T4 + Llama3** for production.
304
+
305
+ ## Troubleshooting
306
+
307
+ ### Space won't start
308
+
309
+ **Check logs for**:
310
+ - Ollama installation errors → Use official Ollama install script
311
+ - Model download timeout → Use smaller model or upgrade hardware
312
+ - Port conflicts → Ensure PORT=7860
313
+
314
+ ### "No LLM adapter available"
315
+
316
+ **Solution**: Ollama adapter is now always initialized. Check Ollama is running:
317
+ ```bash
318
+ # In Space terminal
319
+ curl http://localhost:11434/api/tags
320
+ ```
321
+
322
+ ### Slow responses
323
+
324
+ **Solutions**:
325
+ - Use smaller model (phi instead of llama2)
326
+ - Upgrade to GPU hardware
327
+ - Reduce max_tokens in requests
328
+
329
+ ### Model not found
330
+
331
+ **Solution**: Pull model manually:
332
+ ```bash
333
+ # In Space terminal or startup script
334
+ ollama pull llama2
335
+ ```
336
+
337
+ ## Advanced Configuration
338
+
339
+ ### Use Multiple Models
340
+
341
+ ```env
342
+ # In Space settings
343
+ OLLAMA_MODEL=llama3:latest
344
+ ```
345
+
346
+ Then specify model in API requests:
347
+ ```json
348
+ {
349
+ "conversation": [...],
350
+ "model": "llama3"
351
+ }
352
+ ```
353
+
354
+ ### Custom System Prompts
355
+
356
+ ```bash
357
+ curl -X POST $SPACE_URL/ai/chat \
358
+ -H "Authorization: Bearer your-key" \
359
+ -H "Content-Type: application/json" \
360
+ -d '{
361
+ "conversation": [
362
+ {"role": "system", "content": "You are a helpful coding assistant."},
363
+ {"role": "user", "content": "Explain Python decorators"}
364
+ ]
365
+ }'
366
+ ```
367
+
368
+ ### Enable RAG with Documents
369
+
370
+ ```bash
371
+ # Upload a document
372
+ curl -X POST $SPACE_URL/upload \
373
+ -H "Authorization: Bearer your-key" \
374
375
+
376
+ # Query with RAG
377
+ curl -X POST $SPACE_URL/rag/query \
378
+ -H "Authorization: Bearer your-key" \
379
+ -H "Content-Type: application/json" \
380
+ -d '{"query": "What does the document say about X?"}'
381
+ ```
382
+
383
+ ## Monitoring
384
+
385
+ ### Check Space Health
386
+
387
+ ```bash
388
+ curl https://YOUR_USERNAME-ai-api-ollama.hf.space/health
389
+ ```
390
+
391
+ ### View Metrics
392
+
393
+ ```bash
394
+ curl https://YOUR_USERNAME-ai-api-ollama.hf.space/metrics \
395
+ -H "Authorization: Bearer your-key"
396
+ ```
397
+
398
+ ## Scaling
399
+
400
+ ### Horizontal Scaling
401
+
402
+ Hugging Face Spaces don't support horizontal scaling. For high traffic:
403
+
404
+ 1. **Use multiple Spaces** with load balancer
405
+ 2. **Deploy to cloud** (AWS ECS, GCP Cloud Run) with auto-scaling
406
+ 3. **Use managed API** (OpenAI, Anthropic) for high volume
407
+
408
+ ### Vertical Scaling
409
+
410
+ Upgrade hardware in Space settings:
411
+ - Free CPU → CPU Upgrade (2x faster)
412
+ - CPU → GPU T4 (10x faster)
413
+ - GPU T4 → GPU A10G (2x faster, larger models)
414
+
415
+ ## Support
416
+
417
+ - [GitHub Issues](https://github.com/your-org/ai-api-service/issues)
418
+ - [Hugging Face Discussions](https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama/discussions)
419
+ - [Documentation](https://github.com/your-org/ai-api-service)
420
+
421
+ ## License
422
+
423
+ MIT License - see LICENSE file
QUICKSTART.md ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Quick Start Guide
2
+
3
+ Get your AI API Service up and running in 5 minutes!
4
+
5
+ ## Prerequisites
6
+
7
+ - Node.js 18+
8
+ - npm or yarn
9
+ - At least one LLM API key (OpenAI, HuggingFace, or Anthropic)
10
+
11
+ ## 5-Minute Setup
12
+
13
+ ### 1. Install Dependencies
14
+
15
+ ```bash
16
+ npm install
17
+ ```
18
+
19
+ ### 2. Configure Environment
20
+
21
+ ```bash
22
+ cp .env.example .env
23
+ ```
24
+
25
+ Edit `.env` and add your API keys:
26
+
27
+ ```env
28
+ OPENAI_API_KEY=sk-your-openai-key
29
+ API_KEYS=demo-key-1,my-secret-key
30
+ ```
31
+
32
+ ### 3. Start the Server
33
+
34
+ ```bash
35
+ npm run dev
36
+ ```
37
+
38
+ The API will be available at `http://localhost:8000`
39
+
40
+ ### 4. Test the API
41
+
42
+ ```bash
43
+ curl http://localhost:8000/health
44
+ ```
45
+
46
+ Expected response:
47
+ ```json
48
+ {
49
+ "status": "healthy",
50
+ "version": "1.0.0",
51
+ "services": [...],
52
+ "uptime_seconds": 5
53
+ }
54
+ ```
55
+
56
+ ### 5. Make Your First Request
57
+
58
+ ```bash
59
+ curl -X POST http://localhost:8000/ai/chat \
60
+ -H "Authorization: Bearer demo-key-1" \
61
+ -H "Content-Type: application/json" \
62
+ -d '{
63
+ "conversation": [
64
+ {"role": "user", "content": "Hello!"}
65
+ ]
66
+ }'
67
+ ```
68
+
69
+ ## Example Requests
70
+
71
+ ### Chat
72
+ ```bash
73
+ curl -X POST http://localhost:8000/ai/chat \
74
+ -H "Authorization: Bearer demo-key-1" \
75
+ -H "Content-Type: application/json" \
76
+ -d '{"conversation": [{"role": "user", "content": "What is AI?"}]}'
77
+ ```
78
+
79
+ ### RAG Query
80
+ ```bash
81
+ curl -X POST http://localhost:8000/rag/query \
82
+ -H "Authorization: Bearer demo-key-1" \
83
+ -H "Content-Type: application/json" \
84
+ -d '{"query": "What are the key features?", "top_k": 5}'
85
+ ```
86
+
87
+ ### Image Generation
88
+ ```bash
89
+ curl -X POST http://localhost:8000/image/generate \
90
+ -H "Authorization: Bearer demo-key-1" \
91
+ -H "Content-Type: application/json" \
92
+ -d '{"prompt": "A sunset over mountains", "size": "1024x1024"}'
93
+ ```
94
+
95
+ ## What Each Component Does
96
+
97
+ ### 🔐 **Authentication (`/backend/utils/auth.ts`)**
98
+ - Validates API keys from the Authorization header
99
+ - Implements role-based access (default, premium, admin)
100
+ - Used by all protected endpoints
101
+
102
+ ### ⚡ **Rate Limiting (`/backend/utils/rate_limit.ts`)**
103
+ - Token bucket algorithm
104
+ - Configurable limits per tier (60/300/1000 requests/min)
105
+ - Automatic reset after 1 minute
106
+ - Prevents abuse and cost overruns
107
+
108
+ ### 🤖 **AI Service (`/backend/services/ai_service.ts`)**
109
+ - Multi-provider LLM routing (OpenAI, HuggingFace, Anthropic)
110
+ - Automatic model selection and fallback
111
+ - Chat completions with context management
112
+ - Embedding generation for RAG
113
+
114
+ ### 📚 **RAG Service (`/backend/services/rag_service.ts`)**
115
+ - Vector-based document retrieval
116
+ - Automatic context injection into prompts
117
+ - Supports Pinecone or in-memory vector DB
118
+ - Returns sources with similarity scores
119
+
120
+ ### 🖼️ **Image Service (`/backend/services/image_service.ts`)**
121
+ - Text-to-image generation
122
+ - Supports DALL-E and Stable Diffusion
123
+ - Configurable sizes and quality
124
+ - Returns base64 or URLs
125
+
126
+ ### 🎙️ **Voice Service (`/backend/services/voice_service.ts`)**
127
+ - Text-to-speech synthesis (TTS)
128
+ - Speech-to-text transcription (STT)
129
+ - Multiple voice options
130
+ - Various audio formats (mp3, opus, etc.)
131
+
132
+ ### 📄 **Document Service (`/backend/services/document_service.ts`)**
133
+ - Upload PDF, DOCX, TXT files
134
+ - Automatic text extraction
135
+ - Chunking with overlap for better retrieval
136
+ - Background processing with workers
137
+ - Stores chunks in vector DB
138
+
139
+ ### 🔌 **Adapters**
140
+
141
+ #### **OpenAI Adapter (`/backend/adapters/openai_adapter.ts`)**
142
+ - Chat completions (GPT-4, GPT-3.5)
143
+ - Embeddings (text-embedding-ada-002)
144
+ - Image generation (DALL-E)
145
+ - Voice synthesis and transcription
146
+ - Implements LLMAdapter, ImageAdapter, VoiceAdapter interfaces
147
+
148
+ #### **HuggingFace Adapter (`/backend/adapters/huggingface_adapter.ts`)**
149
+ - Open-source models (Mistral, Llama, etc.)
150
+ - Stable Diffusion for images
151
+ - Sentence transformers for embeddings
152
+ - Free tier available
153
+
154
+ #### **Anthropic Adapter (`/backend/adapters/anthropic_adapter.ts`)**
155
+ - Claude models (Sonnet, Opus)
156
+ - Advanced reasoning capabilities
157
+ - Long context windows
158
+
159
+ #### **Vector DB Adapters (`/backend/adapters/vector_db_adapter.ts`)**
160
+ - **PineconeAdapter**: Production vector storage with managed scaling
161
+ - **InMemoryVectorDB**: Development fallback with cosine similarity
162
+ - Supports metadata filtering and batch operations
163
+
164
+ ### 📊 **Observability**
165
+
166
+ #### **Logger (`/backend/utils/logger.ts`)**
167
+ - Structured JSON logging
168
+ - Configurable log levels (debug, info, warn, error)
169
+ - Automatic timestamping
170
+ - Production-ready format
171
+
172
+ #### **Metrics (`/backend/utils/metrics.ts`)**
173
+ - Request counting by endpoint
174
+ - Error tracking
175
+ - Response time measurement
176
+ - Model usage statistics
177
+ - Vector DB query counts
178
+ - Document processing stats
179
+
180
+ ### 🔄 **Background Workers (`/backend/workers/ingestion_worker.ts`)**
181
+ - Async document processing
182
+ - Configurable concurrency
183
+ - Job status tracking
184
+ - Webhook notifications on completion
185
+ - Automatic retries on failure
186
+
187
+ ### 🌐 **API Endpoints**
188
+
189
+ All endpoints are in `/backend/api/`:
190
+
191
+ #### **Health & Metrics (`health.ts`)**
192
+ - `GET /health` - Service health with component status
193
+ - `GET /metrics` - Usage metrics and statistics
194
+
195
+ #### **Authentication (`auth.ts`)**
196
+ - `POST /auth/verify` - Validate API key
197
+
198
+ #### **Chat (`chat.ts`)**
199
+ - `POST /ai/chat` - Multi-turn conversation
200
+ - `GET /ai/query` - Simple Q&A
201
+
202
+ #### **RAG (`rag.ts`)**
203
+ - `POST /rag/query` - Query with retrieval
204
+ - `GET /rag/models` - List available models
205
+
206
+ #### **Images (`image.ts`)**
207
+ - `POST /image/generate` - Generate images
208
+
209
+ #### **Voice (`voice.ts`)**
210
+ - `POST /voice/synthesize` - Text to speech
211
+ - `POST /voice/transcribe` - Speech to text
212
+
213
+ #### **Documents (`documents.ts`)**
214
+ - `POST /upload` - Upload document
215
+ - `GET /docs/:id/sources` - Get document chunks
216
+ - `POST /webhook/events` - Processing webhooks
217
+
218
+ ## Architecture Flow
219
+
220
+ ```
221
+ ┌─────────┐
222
+ │ Client │
223
+ └────┬────┘
224
+
225
+ ├─ Authorization Header (Bearer token)
226
+
227
+ ┌─────────────────┐
228
+ │ Auth Middleware │ ← Validates API key
229
+ └────┬────────────┘
230
+ ├─ Checks rate limit
231
+
232
+ ┌──────────────┐
233
+ │ API Endpoint │ ← Routes request
234
+ └────┬─────────┘
235
+ ├─ POST /ai/chat → AI Service
236
+ ├─ POST /rag/query → RAG Service → Vector DB → AI Service
237
+ ├─ POST /image/generate → Image Service
238
+ ├─ POST /voice/synthesize → Voice Service
239
+ ├─ POST /upload → Document Service → Worker → Vector DB
240
+
241
+ ┌───────────┐
242
+ │ Response │ ← JSON with data + metadata
243
+ └───────────┘
244
+ ```
245
+
246
+ ## Configuration
247
+
248
+ ### Environment Variables
249
+
250
+ | Variable | What It Does | Example |
251
+ |----------|-------------|---------|
252
+ | `OPENAI_API_KEY` | OpenAI access for GPT models | `sk-...` |
253
+ | `HUGGINGFACE_API_KEY` | HuggingFace models access | `hf_...` |
254
+ | `API_KEYS` | Valid API keys (comma-separated) | `key1,key2` |
255
+ | `RATE_LIMIT_DEFAULT` | Requests/min for basic users | `60` |
256
+ | `RATE_LIMIT_ADMIN` | Requests/min for admins | `1000` |
257
+ | `MAX_FILE_SIZE_MB` | Max document upload size | `10` |
258
+ | `CHUNK_SIZE` | Text chunk size for RAG | `1000` |
259
+ | `LOG_LEVEL` | Logging verbosity | `info` |
260
+
261
+ ### Tier System
262
+
263
+ - **Default**: 60 requests/min
264
+ - **Premium**: 300 requests/min (add to config)
265
+ - **Admin**: 1000 requests/min (via `ADMIN_API_KEYS`)
266
+
267
+ ## Testing
268
+
269
+ Run tests:
270
+ ```bash
271
+ npm test
272
+ ```
273
+
274
+ Run with coverage:
275
+ ```bash
276
+ npm run test:coverage
277
+ ```
278
+
279
+ ## Production Checklist
280
+
281
+ - [ ] Set strong `API_KEYS`
282
+ - [ ] Configure `ADMIN_API_KEYS` separately
283
+ - [ ] Set up Pinecone for vector storage
284
+ - [ ] Increase rate limits based on needs
285
+ - [ ] Enable background workers
286
+ - [ ] Set `LOG_LEVEL=info` or `warn`
287
+ - [ ] Configure CORS origins
288
+ - [ ] Set up monitoring/alerting
289
+ - [ ] Review cost limits on LLM providers
290
+
291
+ ## Troubleshooting
292
+
293
+ **"No LLM adapter available"**
294
+ → Add at least one API key (OPENAI_API_KEY, HUGGINGFACE_API_KEY, or ANTHROPIC_API_KEY)
295
+
296
+ **"Invalid API key"**
297
+ → Check Authorization header: `Bearer your-key-here`
298
+
299
+ **"Rate limit exceeded"**
300
+ → Wait 60 seconds or use admin key
301
+
302
+ **Vector DB queries fail**
303
+ → Service falls back to in-memory storage automatically
304
+
305
+ ## Next Steps
306
+
307
+ 1. **Read the full README**: `README.md`
308
+ 2. **Check deployment guide**: `DEPLOYMENT.md`
309
+ 3. **Review examples**: `examples/js_client.js` and `examples/curl.sh`
310
+ 4. **Run tests**: `npm test`
311
+ 5. **Deploy to production**: See DEPLOYMENT.md
312
+
313
+ ## Support
314
+
315
+ - GitHub Issues
316
+ - Documentation in `/docs`
317
+ - Example code in `/examples`
318
+
319
+ Enjoy building with the AI API Service! 🚀
README.md CHANGED
@@ -1,11 +1,12 @@
 
 
 
1
  ---
2
- title: Ai Api Ollama
3
- emoji: 🔥
4
- colorFrom: yellow
5
- colorTo: red
6
  sdk: docker
 
7
  pinned: false
8
- license: mit
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ cygon24:
2
+
3
+ ```markdown
4
  ---
5
+ title: AI API Service with Ollama
6
+ emoji: 🤖
7
+ colorFrom: blue
8
+ colorTo: purple
9
  sdk: docker
10
+ app_port: 7860
11
  pinned: false
12
+ ---
 
 
 
backend/adapters/anthropic_adapter.ts ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import Anthropic from '@anthropic-ai/sdk';
2
+ import type {
3
+ LLMAdapter,
4
+ Message,
5
+ ChatOptions,
6
+ ChatResponse,
7
+ EmbeddingResponse,
8
+ } from '../types/models';
9
+
10
+ export class AnthropicAdapter implements LLMAdapter {
11
+ private client: Anthropic | null = null;
12
+ private apiKey: string;
13
+ private defaultModel: string;
14
+
15
+ constructor(apiKey: string, defaultModel = 'claude-3-sonnet-20240229') {
16
+ this.apiKey = apiKey;
17
+ this.defaultModel = defaultModel;
18
+
19
+ if (apiKey) {
20
+ this.client = new Anthropic({ apiKey });
21
+ }
22
+ }
23
+
24
+ async isAvailable(): Promise<boolean> {
25
+ if (!this.client) return false;
26
+ try {
27
+ await this.client.messages.create({
28
+ model: this.defaultModel,
29
+ max_tokens: 1,
30
+ messages: [{ role: 'user', content: 'test' }],
31
+ });
32
+ return true;
33
+ } catch {
34
+ return false;
35
+ }
36
+ }
37
+
38
+ async generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse> {
39
+ if (!this.client) {
40
+ throw new Error('Anthropic client not initialized. Please provide ANTHROPIC_API_KEY.');
41
+ }
42
+
43
+ const systemMessage = messages.find(m => m.role === 'system');
44
+ const conversationMessages = messages.filter(m => m.role !== 'system');
45
+
46
+ const response = await this.client.messages.create({
47
+ model: this.defaultModel,
48
+ max_tokens: options?.max_tokens || 1000,
49
+ temperature: options?.temperature ?? 0.7,
50
+ top_p: options?.top_p,
51
+ system: systemMessage?.content,
52
+ messages: conversationMessages.map(m => ({
53
+ role: m.role === 'assistant' ? 'assistant' : 'user',
54
+ content: m.content,
55
+ })),
56
+ stop_sequences: options?.stop,
57
+ });
58
+
59
+ const textContent = response.content.find(c => c.type === 'text');
60
+
61
+ return {
62
+ reply: textContent?.type === 'text' ? textContent.text : '',
63
+ model: response.model,
64
+ usage: {
65
+ prompt_tokens: response.usage.input_tokens,
66
+ completion_tokens: response.usage.output_tokens,
67
+ total_tokens: response.usage.input_tokens + response.usage.output_tokens,
68
+ },
69
+ sources: null,
70
+ };
71
+ }
72
+
73
+ async generateEmbedding(_text: string | string[]): Promise<EmbeddingResponse> {
74
+ throw new Error('Anthropic does not support embeddings. Use OpenAI or HuggingFace adapter.');
75
+ }
76
+ }
backend/adapters/huggingface_adapter.ts ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { HfInference } from '@huggingface/inference';
2
+ import type {
3
+ LLMAdapter,
4
+ ImageAdapter,
5
+ Message,
6
+ ChatOptions,
7
+ ChatResponse,
8
+ EmbeddingResponse,
9
+ ImageGenerationRequest,
10
+ ImageGenerationResponse,
11
+ } from '../types/models';
12
+
13
+ export class HuggingFaceAdapter implements LLMAdapter, ImageAdapter {
14
+ private client: HfInference | null = null;
15
+ private apiKey: string;
16
+ private defaultModel: string;
17
+ private defaultEmbeddingModel: string;
18
+ private defaultImageModel: string;
19
+
20
+ constructor(
21
+ apiKey: string,
22
+ defaultModel = 'mistralai/Mistral-7B-Instruct-v0.1',
23
+ defaultEmbeddingModel = 'sentence-transformers/all-MiniLM-L6-v2',
24
+ defaultImageModel = 'stabilityai/stable-diffusion-xl-base-1.0'
25
+ ) {
26
+ this.apiKey = apiKey;
27
+ this.defaultModel = defaultModel;
28
+ this.defaultEmbeddingModel = defaultEmbeddingModel;
29
+ this.defaultImageModel = defaultImageModel;
30
+
31
+ if (apiKey) {
32
+ this.client = new HfInference(apiKey);
33
+ }
34
+ }
35
+
36
+ async isAvailable(): Promise<boolean> {
37
+ if (!this.client) return false;
38
+ try {
39
+ await this.client.textGeneration({
40
+ model: this.defaultModel,
41
+ inputs: 'test',
42
+ parameters: { max_new_tokens: 1 },
43
+ });
44
+ return true;
45
+ } catch {
46
+ return false;
47
+ }
48
+ }
49
+
50
+ async generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse> {
51
+ if (!this.client) {
52
+ throw new Error('HuggingFace client not initialized. Please provide HUGGINGFACE_API_KEY.');
53
+ }
54
+
55
+ const prompt = this.formatMessagesAsPrompt(messages);
56
+
57
+ const response = await this.client.textGeneration({
58
+ model: this.defaultModel,
59
+ inputs: prompt,
60
+ parameters: {
61
+ max_new_tokens: options?.max_tokens || 1000,
62
+ temperature: options?.temperature ?? 0.7,
63
+ top_p: options?.top_p ?? 0.95,
64
+ repetition_penalty: 1.1,
65
+ return_full_text: false,
66
+ },
67
+ });
68
+
69
+ const estimatedTokens = Math.ceil(prompt.length / 4);
70
+ const completionTokens = Math.ceil((response.generated_text?.length || 0) / 4);
71
+
72
+ return {
73
+ reply: response.generated_text || '',
74
+ model: this.defaultModel,
75
+ usage: {
76
+ prompt_tokens: estimatedTokens,
77
+ completion_tokens: completionTokens,
78
+ total_tokens: estimatedTokens + completionTokens,
79
+ },
80
+ sources: null,
81
+ };
82
+ }
83
+
84
+ async generateEmbedding(text: string | string[]): Promise<EmbeddingResponse> {
85
+ if (!this.client) {
86
+ throw new Error('HuggingFace client not initialized. Please provide HUGGINGFACE_API_KEY.');
87
+ }
88
+
89
+ const inputs = Array.isArray(text) ? text : [text];
90
+ const embeddings: number[][] = [];
91
+
92
+ for (const input of inputs) {
93
+ const response = await this.client.featureExtraction({
94
+ model: this.defaultEmbeddingModel,
95
+ inputs: input,
96
+ });
97
+
98
+ if (Array.isArray(response) && Array.isArray(response[0])) {
99
+ embeddings.push(response[0] as number[]);
100
+ } else if (Array.isArray(response)) {
101
+ embeddings.push(response as number[]);
102
+ }
103
+ }
104
+
105
+ const totalTokens = inputs.reduce((sum, input) => sum + Math.ceil(input.length / 4), 0);
106
+
107
+ return {
108
+ embeddings,
109
+ model: this.defaultEmbeddingModel,
110
+ usage: {
111
+ prompt_tokens: totalTokens,
112
+ completion_tokens: 0,
113
+ total_tokens: totalTokens,
114
+ },
115
+ };
116
+ }
117
+
118
+ async generateImage(prompt: string, options?: Partial<ImageGenerationRequest>): Promise<ImageGenerationResponse> {
119
+ if (!this.client) {
120
+ throw new Error('HuggingFace client not initialized. Please provide HUGGINGFACE_API_KEY.');
121
+ }
122
+
123
+ const model = options?.model || this.defaultImageModel;
124
+
125
+ const response = await this.client.textToImage({
126
+ model,
127
+ inputs: prompt,
128
+ });
129
+
130
+ let buffer: Buffer;
131
+ if (typeof response === 'object' && 'arrayBuffer' in response) {
132
+ const arrayBuffer = await (response as any).arrayBuffer();
133
+ buffer = Buffer.from(arrayBuffer);
134
+ } else {
135
+ buffer = Buffer.from(response as any);
136
+ }
137
+ const base64Image = buffer.toString('base64');
138
+
139
+ return {
140
+ images: [{
141
+ url: `data:image/png;base64,${base64Image}`,
142
+ }],
143
+ model,
144
+ created: Date.now(),
145
+ };
146
+ }
147
+
148
+ private formatMessagesAsPrompt(messages: Message[]): string {
149
+ let prompt = '';
150
+
151
+ for (const message of messages) {
152
+ if (message.role === 'system') {
153
+ prompt += `System: ${message.content}\n\n`;
154
+ } else if (message.role === 'user') {
155
+ prompt += `User: ${message.content}\n\n`;
156
+ } else if (message.role === 'assistant') {
157
+ prompt += `Assistant: ${message.content}\n\n`;
158
+ }
159
+ }
160
+
161
+ prompt += 'Assistant: ';
162
+ return prompt;
163
+ }
164
+ }
backend/adapters/ollama_adapter.ts ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type {
2
+ LLMAdapter,
3
+ Message,
4
+ ChatOptions,
5
+ ChatResponse,
6
+ EmbeddingResponse,
7
+ } from '../types/models';
8
+ import { logger } from '../utils/logger';
9
+
10
+ export class OllamaAdapter implements LLMAdapter {
11
+ private baseUrl: string;
12
+ private defaultModel: string;
13
+ private defaultEmbeddingModel: string;
14
+
15
+ constructor(
16
+ baseUrl = 'http://localhost:11434',
17
+ defaultModel = 'llama2',
18
+ defaultEmbeddingModel = 'nomic-embed-text'
19
+ ) {
20
+ this.baseUrl = baseUrl;
21
+ this.defaultModel = defaultModel;
22
+ this.defaultEmbeddingModel = defaultEmbeddingModel;
23
+ }
24
+
25
+ async isAvailable(): Promise<boolean> {
26
+ try {
27
+ const response = await fetch(`${this.baseUrl}/api/tags`);
28
+ return response.ok;
29
+ } catch {
30
+ return false;
31
+ }
32
+ }
33
+
34
+ async generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse> {
35
+ try {
36
+ const prompt = this.formatMessagesAsPrompt(messages);
37
+
38
+ const response = await fetch(`${this.baseUrl}/api/generate`, {
39
+ method: 'POST',
40
+ headers: {
41
+ 'Content-Type': 'application/json',
42
+ },
43
+ body: JSON.stringify({
44
+ model: this.defaultModel,
45
+ prompt,
46
+ stream: false,
47
+ options: {
48
+ temperature: options?.temperature ?? 0.7,
49
+ num_predict: options?.max_tokens ?? 1000,
50
+ top_p: options?.top_p ?? 0.9,
51
+ stop: options?.stop,
52
+ },
53
+ }),
54
+ });
55
+
56
+ if (!response.ok) {
57
+ throw new Error(`Ollama API error: ${response.statusText}`);
58
+ }
59
+
60
+ const data = await response.json() as any;
61
+
62
+ const estimatedPromptTokens = Math.ceil(prompt.length / 4);
63
+ const estimatedCompletionTokens = Math.ceil((data.response?.length || 0) / 4);
64
+
65
+ return {
66
+ reply: data.response || '',
67
+ model: this.defaultModel,
68
+ usage: {
69
+ prompt_tokens: estimatedPromptTokens,
70
+ completion_tokens: estimatedCompletionTokens,
71
+ total_tokens: estimatedPromptTokens + estimatedCompletionTokens,
72
+ },
73
+ sources: null,
74
+ };
75
+ } catch (error) {
76
+ logger.error('Ollama completion error', {
77
+ error: error instanceof Error ? error.message : String(error),
78
+ });
79
+ throw error;
80
+ }
81
+ }
82
+
83
+ async generateEmbedding(text: string | string[]): Promise<EmbeddingResponse> {
84
+ try {
85
+ const inputs = Array.isArray(text) ? text : [text];
86
+ const embeddings: number[][] = [];
87
+
88
+ for (const input of inputs) {
89
+ const response = await fetch(`${this.baseUrl}/api/embeddings`, {
90
+ method: 'POST',
91
+ headers: {
92
+ 'Content-Type': 'application/json',
93
+ },
94
+ body: JSON.stringify({
95
+ model: this.defaultEmbeddingModel,
96
+ prompt: input,
97
+ }),
98
+ });
99
+
100
+ if (!response.ok) {
101
+ throw new Error(`Ollama embeddings error: ${response.statusText}`);
102
+ }
103
+
104
+ const data = await response.json() as any;
105
+ embeddings.push(data.embedding);
106
+ }
107
+
108
+ const totalTokens = inputs.reduce((sum, input) => sum + Math.ceil(input.length / 4), 0);
109
+
110
+ return {
111
+ embeddings,
112
+ model: this.defaultEmbeddingModel,
113
+ usage: {
114
+ prompt_tokens: totalTokens,
115
+ completion_tokens: 0,
116
+ total_tokens: totalTokens,
117
+ },
118
+ };
119
+ } catch (error) {
120
+ logger.error('Ollama embedding error', {
121
+ error: error instanceof Error ? error.message : String(error),
122
+ });
123
+ throw error;
124
+ }
125
+ }
126
+
127
+ private formatMessagesAsPrompt(messages: Message[]): string {
128
+ let prompt = '';
129
+
130
+ for (const message of messages) {
131
+ if (message.role === 'system') {
132
+ prompt += `System: ${message.content}\n\n`;
133
+ } else if (message.role === 'user') {
134
+ prompt += `User: ${message.content}\n\n`;
135
+ } else if (message.role === 'assistant') {
136
+ prompt += `Assistant: ${message.content}\n\n`;
137
+ }
138
+ }
139
+
140
+ prompt += 'Assistant: ';
141
+ return prompt;
142
+ }
143
+
144
+ setModel(modelName: string): void {
145
+ this.defaultModel = modelName;
146
+ logger.info('Ollama model changed', { model: modelName });
147
+ }
148
+
149
+ setEmbeddingModel(modelName: string): void {
150
+ this.defaultEmbeddingModel = modelName;
151
+ logger.info('Ollama embedding model changed', { model: modelName });
152
+ }
153
+ }
backend/adapters/openai_adapter.ts ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import OpenAI from 'openai';
2
+ import type {
3
+ LLMAdapter,
4
+ ImageAdapter,
5
+ VoiceAdapter,
6
+ Message,
7
+ ChatOptions,
8
+ ChatResponse,
9
+ EmbeddingResponse,
10
+ ImageGenerationRequest,
11
+ ImageGenerationResponse,
12
+ VoiceSynthesisRequest,
13
+ VoiceSynthesisResponse,
14
+ TranscriptionRequest,
15
+ TranscriptionResponse,
16
+ } from '../types/models';
17
+
18
+ export class OpenAIAdapter implements LLMAdapter, ImageAdapter, VoiceAdapter {
19
+ private client: OpenAI | null = null;
20
+ private apiKey: string;
21
+ private defaultChatModel: string;
22
+ private defaultEmbeddingModel: string;
23
+ private defaultImageModel: string;
24
+ private defaultVoiceModel: string;
25
+
26
+ constructor(
27
+ apiKey: string,
28
+ defaultChatModel = 'gpt-3.5-turbo',
29
+ defaultEmbeddingModel = 'text-embedding-ada-002',
30
+ defaultImageModel = 'dall-e-3',
31
+ defaultVoiceModel = 'tts-1'
32
+ ) {
33
+ this.apiKey = apiKey;
34
+ this.defaultChatModel = defaultChatModel;
35
+ this.defaultEmbeddingModel = defaultEmbeddingModel;
36
+ this.defaultImageModel = defaultImageModel;
37
+ this.defaultVoiceModel = defaultVoiceModel;
38
+
39
+ if (apiKey) {
40
+ this.client = new OpenAI({ apiKey });
41
+ }
42
+ }
43
+
44
+ async isAvailable(): Promise<boolean> {
45
+ if (!this.client) return false;
46
+ try {
47
+ await this.client.models.list();
48
+ return true;
49
+ } catch {
50
+ return false;
51
+ }
52
+ }
53
+
54
+ async generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse> {
55
+ if (!this.client) {
56
+ throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
57
+ }
58
+
59
+ const completion = await this.client.chat.completions.create({
60
+ model: this.defaultChatModel,
61
+ messages: messages.map(m => ({
62
+ role: m.role,
63
+ content: m.content,
64
+ })),
65
+ temperature: options?.temperature ?? 0.7,
66
+ max_tokens: options?.max_tokens ?? 1000,
67
+ top_p: options?.top_p,
68
+ frequency_penalty: options?.frequency_penalty,
69
+ presence_penalty: options?.presence_penalty,
70
+ stop: options?.stop,
71
+ });
72
+
73
+ return {
74
+ reply: completion.choices[0]?.message?.content || '',
75
+ model: completion.model,
76
+ usage: {
77
+ prompt_tokens: completion.usage?.prompt_tokens || 0,
78
+ completion_tokens: completion.usage?.completion_tokens || 0,
79
+ total_tokens: completion.usage?.total_tokens || 0,
80
+ },
81
+ sources: null,
82
+ };
83
+ }
84
+
85
+ async generateEmbedding(text: string | string[]): Promise<EmbeddingResponse> {
86
+ if (!this.client) {
87
+ throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
88
+ }
89
+
90
+ const input = Array.isArray(text) ? text : [text];
91
+
92
+ const response = await this.client.embeddings.create({
93
+ model: this.defaultEmbeddingModel,
94
+ input,
95
+ });
96
+
97
+ if (!response.data) {
98
+ throw new Error('No embedding data returned from OpenAI');
99
+ }
100
+
101
+ return {
102
+ embeddings: response.data.map(d => d.embedding),
103
+ model: response.model,
104
+ usage: {
105
+ prompt_tokens: response.usage.prompt_tokens,
106
+ completion_tokens: 0,
107
+ total_tokens: response.usage.total_tokens,
108
+ },
109
+ };
110
+ }
111
+
112
+ async generateImage(prompt: string, options?: Partial<ImageGenerationRequest>): Promise<ImageGenerationResponse> {
113
+ if (!this.client) {
114
+ throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
115
+ }
116
+
117
+ const model = options?.model || this.defaultImageModel;
118
+ const isDallE3 = model.includes('dall-e-3');
119
+
120
+ const response = await this.client.images.generate({
121
+ model,
122
+ prompt,
123
+ n: isDallE3 ? 1 : (options?.n || 1),
124
+ size: options?.size || '1024x1024',
125
+ quality: options?.quality,
126
+ style: options?.style,
127
+ });
128
+
129
+ if (!response.data) {
130
+ throw new Error('No image data returned from OpenAI');
131
+ }
132
+
133
+ return {
134
+ images: response.data.map(img => ({
135
+ url: img.url || '',
136
+ revised_prompt: img.revised_prompt,
137
+ b64_json: img.b64_json,
138
+ })),
139
+ model,
140
+ created: response.created,
141
+ };
142
+ }
143
+
144
+ async synthesize(text: string, options?: Partial<VoiceSynthesisRequest>): Promise<VoiceSynthesisResponse> {
145
+ if (!this.client) {
146
+ throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
147
+ }
148
+
149
+ const voice = options?.voice || 'alloy';
150
+ const model = options?.model || this.defaultVoiceModel;
151
+ const format = options?.format || 'mp3';
152
+
153
+ const response = await this.client.audio.speech.create({
154
+ model,
155
+ voice,
156
+ input: text,
157
+ response_format: format as any,
158
+ speed: options?.speed,
159
+ });
160
+
161
+ const buffer = Buffer.from(await response.arrayBuffer());
162
+ const base64Audio = buffer.toString('base64');
163
+
164
+ return {
165
+ audio_url: `data:audio/${format};base64,${base64Audio}`,
166
+ voice,
167
+ format,
168
+ size_bytes: buffer.length,
169
+ };
170
+ }
171
+
172
+ async transcribe(audio: Buffer, options?: Partial<TranscriptionRequest>): Promise<TranscriptionResponse> {
173
+ if (!this.client) {
174
+ throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
175
+ }
176
+
177
+ const file = audio as any;
178
+
179
+ const response = await this.client.audio.transcriptions.create({
180
+ file,
181
+ model: options?.model || 'whisper-1',
182
+ language: options?.language,
183
+ prompt: options?.prompt,
184
+ });
185
+
186
+ return {
187
+ text: response.text,
188
+ language: options?.language || 'en',
189
+ duration: 0,
190
+ model: 'whisper-1',
191
+ };
192
+ }
193
+ }
backend/adapters/vector_db_adapter.ts ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { Pinecone } from '@pinecone-database/pinecone';
2
+ import type { VectorDBAdapter, VectorSearchResult } from '../types/models';
3
+
4
+ export class PineconeAdapter implements VectorDBAdapter {
5
+ private client: Pinecone | null = null;
6
+ private indexName: string;
7
+ private namespace: string;
8
+ private initialized = false;
9
+
10
+ constructor(apiKey: string, indexName: string, namespace = 'default') {
11
+ this.indexName = indexName;
12
+ this.namespace = namespace;
13
+
14
+ if (apiKey) {
15
+ this.client = new Pinecone({ apiKey });
16
+ }
17
+ }
18
+
19
+ async isAvailable(): Promise<boolean> {
20
+ if (!this.client) return false;
21
+ try {
22
+ await this.client.listIndexes();
23
+ return true;
24
+ } catch {
25
+ return false;
26
+ }
27
+ }
28
+
29
+ async upsert(vectors: { id: string; values: number[]; metadata: Record<string, any> }[]): Promise<void> {
30
+ if (!this.client) {
31
+ throw new Error('Pinecone client not initialized. Please provide PINECONE_API_KEY.');
32
+ }
33
+
34
+ const index = this.client.index(this.indexName);
35
+
36
+ await index.namespace(this.namespace).upsert(vectors);
37
+ }
38
+
39
+ async query(
40
+ queryVector: number[],
41
+ topK: number,
42
+ filter?: Record<string, any>
43
+ ): Promise<VectorSearchResult[]> {
44
+ if (!this.client) {
45
+ throw new Error('Pinecone client not initialized. Please provide PINECONE_API_KEY.');
46
+ }
47
+
48
+ const index = this.client.index(this.indexName);
49
+
50
+ const results = await index.namespace(this.namespace).query({
51
+ vector: queryVector,
52
+ topK,
53
+ filter,
54
+ includeMetadata: true,
55
+ });
56
+
57
+ return results.matches.map(match => ({
58
+ id: match.id,
59
+ score: match.score || 0,
60
+ metadata: (match.metadata || {}) as Record<string, any>,
61
+ }));
62
+ }
63
+
64
+ async delete(ids: string[]): Promise<void> {
65
+ if (!this.client) {
66
+ throw new Error('Pinecone client not initialized. Please provide PINECONE_API_KEY.');
67
+ }
68
+
69
+ const index = this.client.index(this.indexName);
70
+
71
+ await index.namespace(this.namespace).deleteMany(ids);
72
+ }
73
+ }
74
+
75
+ export class InMemoryVectorDB implements VectorDBAdapter {
76
+ private vectors: Map<string, { values: number[]; metadata: Record<string, any> }> = new Map();
77
+
78
+ async isAvailable(): Promise<boolean> {
79
+ return true;
80
+ }
81
+
82
+ async upsert(vectors: { id: string; values: number[]; metadata: Record<string, any> }[]): Promise<void> {
83
+ for (const vector of vectors) {
84
+ this.vectors.set(vector.id, {
85
+ values: vector.values,
86
+ metadata: vector.metadata,
87
+ });
88
+ }
89
+ }
90
+
91
+ async query(
92
+ queryVector: number[],
93
+ topK: number,
94
+ filter?: Record<string, any>
95
+ ): Promise<VectorSearchResult[]> {
96
+ const results: Array<{ id: string; score: number; metadata: Record<string, any> }> = [];
97
+
98
+ for (const [id, vector] of this.vectors.entries()) {
99
+ if (filter && !this.matchesFilter(vector.metadata, filter)) {
100
+ continue;
101
+ }
102
+
103
+ const score = this.cosineSimilarity(queryVector, vector.values);
104
+ results.push({
105
+ id,
106
+ score,
107
+ metadata: vector.metadata,
108
+ });
109
+ }
110
+
111
+ results.sort((a, b) => b.score - a.score);
112
+ return results.slice(0, topK);
113
+ }
114
+
115
+ async delete(ids: string[]): Promise<void> {
116
+ for (const id of ids) {
117
+ this.vectors.delete(id);
118
+ }
119
+ }
120
+
121
+ private cosineSimilarity(a: number[], b: number[]): number {
122
+ if (a.length !== b.length) return 0;
123
+
124
+ let dotProduct = 0;
125
+ let normA = 0;
126
+ let normB = 0;
127
+
128
+ for (let i = 0; i < a.length; i++) {
129
+ dotProduct += a[i] * b[i];
130
+ normA += a[i] * a[i];
131
+ normB += b[i] * b[i];
132
+ }
133
+
134
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
135
+ return denominator === 0 ? 0 : dotProduct / denominator;
136
+ }
137
+
138
+ private matchesFilter(metadata: Record<string, any>, filter: Record<string, any>): boolean {
139
+ for (const [key, value] of Object.entries(filter)) {
140
+ if (metadata[key] !== value) {
141
+ return false;
142
+ }
143
+ }
144
+ return true;
145
+ }
146
+ }
backend/api/auth.ts ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { api } from "encore.dev/api";
2
+ import { auth, validateApiKey, getApiKeyInfo } from "../utils/auth";
3
+ import { getRateLimitInfo } from "../utils/rate_limit";
4
+ import type { ApiKeyInfo, RateLimitInfo } from "../types/models";
5
+
6
+ interface VerifyResponse {
7
+ valid: boolean;
8
+ key_info: ApiKeyInfo;
9
+ rate_limit: RateLimitInfo;
10
+ }
11
+
12
+ export const verify = api<void, VerifyResponse>(
13
+ { expose: true, method: "POST", path: "/auth/verify", auth: false },
14
+ async () => {
15
+ const authHeader = auth();
16
+ const authData = validateApiKey(authHeader);
17
+ const keyInfo = getApiKeyInfo(authData.apiKey);
18
+ const rateLimitInfo = getRateLimitInfo(authData.apiKey, authData.tier);
19
+
20
+ return {
21
+ valid: true,
22
+ key_info: keyInfo,
23
+ rate_limit: rateLimitInfo,
24
+ };
25
+ }
26
+ );
backend/api/chat.ts ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { api, APIError, Query } from "encore.dev/api";
2
+ import { auth, validateApiKey } from "../utils/auth";
3
+ import { checkRateLimit } from "../utils/rate_limit";
4
+ import { metrics } from "../utils/metrics";
5
+ import { aiService } from "../services/ai_service";
6
+ import type { ChatRequest, ChatResponse } from "../types/models";
7
+
8
+ export const chat = api<ChatRequest, ChatResponse>(
9
+ { expose: true, method: "POST", path: "/ai/chat", auth: false },
10
+ async (req) => {
11
+ const startTime = Date.now();
12
+
13
+ try {
14
+ const authHeader = auth();
15
+ const authData = validateApiKey(authHeader);
16
+ checkRateLimit(authData.apiKey, authData.tier);
17
+
18
+ metrics.incrementRequests("/ai/chat");
19
+
20
+ if (!req.conversation || req.conversation.length === 0) {
21
+ throw APIError.invalidArgument("conversation must contain at least one message");
22
+ }
23
+
24
+ const response = await aiService.chat(
25
+ req.conversation,
26
+ req.model,
27
+ req.options
28
+ );
29
+
30
+ metrics.recordResponseTime(Date.now() - startTime);
31
+
32
+ return response;
33
+ } catch (error) {
34
+ metrics.incrementErrors();
35
+
36
+ if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
37
+ const err = error as any;
38
+ throw APIError.resourceExhausted(err.message).withDetails({
39
+ limit: err.limit,
40
+ remaining: err.remaining,
41
+ reset_at: err.resetAt,
42
+ });
43
+ }
44
+
45
+ throw error instanceof Error ? error : APIError.internal(String(error));
46
+ }
47
+ }
48
+ );
49
+
50
+ interface SimpleQueryRequest {
51
+ q: Query<string>;
52
+ model?: Query<string>;
53
+ }
54
+
55
+ interface SimpleQueryResponse {
56
+ answer: string;
57
+ model: string;
58
+ }
59
+
60
+ export const query = api<SimpleQueryRequest, SimpleQueryResponse>(
61
+ { expose: true, method: "GET", path: "/ai/query", auth: false },
62
+ async (req) => {
63
+ const startTime = Date.now();
64
+
65
+ try {
66
+ const authHeader = auth();
67
+ const authData = validateApiKey(authHeader);
68
+ checkRateLimit(authData.apiKey, authData.tier);
69
+
70
+ metrics.incrementRequests("/ai/query");
71
+
72
+ if (!req.q) {
73
+ throw APIError.invalidArgument("query parameter 'q' is required");
74
+ }
75
+
76
+ const answer = await aiService.simpleQuery(req.q, req.model);
77
+
78
+ metrics.recordResponseTime(Date.now() - startTime);
79
+
80
+ return {
81
+ answer,
82
+ model: req.model || 'default',
83
+ };
84
+ } catch (error) {
85
+ metrics.incrementErrors();
86
+
87
+ if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
88
+ const err = error as any;
89
+ throw APIError.resourceExhausted(err.message).withDetails({
90
+ limit: err.limit,
91
+ remaining: err.remaining,
92
+ reset_at: err.resetAt,
93
+ });
94
+ }
95
+
96
+ throw error instanceof Error ? error : APIError.internal(String(error));
97
+ }
98
+ }
99
+ );
backend/api/documents.ts ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { api, APIError } from "encore.dev/api";
2
+ import { auth, validateApiKey } from "../utils/auth";
3
+ import { checkRateLimit } from "../utils/rate_limit";
4
+ import { metrics } from "../utils/metrics";
5
+ import { documentService } from "../services/document_service";
6
+ import type {
7
+ DocumentUploadResponse,
8
+ DocumentSource,
9
+ WebhookEvent
10
+ } from "../types/models";
11
+
12
+ interface UploadRequest {
13
+ filename: string;
14
+ content_base64: string;
15
+ metadata?: {
16
+ title?: string;
17
+ author?: string;
18
+ category?: string;
19
+ tags?: string[];
20
+ };
21
+ }
22
+
23
+ export const upload = api<UploadRequest, DocumentUploadResponse>(
24
+ { expose: true, method: "POST", path: "/upload", auth: false },
25
+ async (req) => {
26
+ const startTime = Date.now();
27
+
28
+ try {
29
+ const authHeader = auth();
30
+ const authData = validateApiKey(authHeader);
31
+ checkRateLimit(authData.apiKey, authData.tier);
32
+
33
+ metrics.incrementRequests("/upload");
34
+
35
+ if (!req.filename) {
36
+ throw APIError.invalidArgument("filename is required");
37
+ }
38
+
39
+ if (!req.content_base64) {
40
+ throw APIError.invalidArgument("content_base64 is required");
41
+ }
42
+
43
+ const content = Buffer.from(req.content_base64, 'base64');
44
+
45
+ const response = await documentService.uploadDocument(
46
+ req.filename,
47
+ content,
48
+ req.metadata
49
+ );
50
+
51
+ metrics.recordResponseTime(Date.now() - startTime);
52
+
53
+ return response;
54
+ } catch (error) {
55
+ metrics.incrementErrors();
56
+
57
+ if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
58
+ const err = error as any;
59
+ throw APIError.resourceExhausted(err.message).withDetails({
60
+ limit: err.limit,
61
+ remaining: err.remaining,
62
+ reset_at: err.resetAt,
63
+ });
64
+ }
65
+
66
+ throw error instanceof Error ? error : APIError.internal(String(error));
67
+ }
68
+ }
69
+ );
70
+
71
+ interface GetSourcesRequest {
72
+ id: string;
73
+ }
74
+
75
+ interface GetSourcesResponse {
76
+ sources: DocumentSource[];
77
+ }
78
+
79
+ export const getSources = api<GetSourcesRequest, GetSourcesResponse>(
80
+ { expose: true, method: "GET", path: "/docs/:id/sources", auth: false },
81
+ async (req) => {
82
+ try {
83
+ const authHeader = auth();
84
+ validateApiKey(authHeader);
85
+
86
+ metrics.incrementRequests("/docs/:id/sources");
87
+
88
+ const sources = await documentService.getDocumentSources(req.id);
89
+
90
+ return { sources };
91
+ } catch (error) {
92
+ metrics.incrementErrors();
93
+
94
+ if (error instanceof Error && error.message === 'Document not found') {
95
+ throw APIError.notFound("document not found");
96
+ }
97
+
98
+ throw error instanceof Error ? error : APIError.internal(String(error));
99
+ }
100
+ }
101
+ );
102
+
103
+ interface WebhookResponse {
104
+ received: boolean;
105
+ }
106
+
107
+ export const webhook = api<WebhookEvent, WebhookResponse>(
108
+ { expose: true, method: "POST", path: "/webhook/events", auth: false },
109
+ async () => {
110
+ try {
111
+ metrics.incrementRequests("/webhook/events");
112
+
113
+ return { received: true };
114
+ } catch (error) {
115
+ metrics.incrementErrors();
116
+ throw error instanceof Error ? error : APIError.internal(String(error));
117
+ }
118
+ }
119
+ );
backend/api/encore.service.ts ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import { Service } from "encore.dev/service";
2
+
3
+ export default new Service("api");
backend/api/health.ts ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { api } from "encore.dev/api";
2
+ import { aiService } from "../services/ai_service";
3
+ import { ragService } from "../services/rag_service";
4
+ import { imageService } from "../services/image_service";
5
+ import { voiceService } from "../services/voice_service";
6
+ import { metrics } from "../utils/metrics";
7
+ import type { HealthCheckResponse, MetricsResponse } from "../types/models";
8
+
9
+ const startTime = Date.now();
10
+ const version = "1.0.0";
11
+
12
+ export const health = api<void, HealthCheckResponse>(
13
+ { expose: true, method: "GET", path: "/health" },
14
+ async () => {
15
+ const services = [];
16
+
17
+ try {
18
+ const llmHealth = await aiService.healthCheck();
19
+ services.push({
20
+ name: "llm",
21
+ status: llmHealth.some(h => h.available) ? ("up" as const) : ("down" as const),
22
+ });
23
+ } catch {
24
+ services.push({ name: "llm", status: "down" as const });
25
+ }
26
+
27
+ try {
28
+ const vectorDbAvailable = await ragService.healthCheck();
29
+ services.push({
30
+ name: "vector_db",
31
+ status: vectorDbAvailable ? ("up" as const) : ("down" as const),
32
+ });
33
+ } catch {
34
+ services.push({ name: "vector_db", status: "down" as const });
35
+ }
36
+
37
+ const allUp = services.every(s => s.status === "up");
38
+ const status = allUp ? "healthy" : "degraded";
39
+
40
+ return {
41
+ status,
42
+ timestamp: Date.now(),
43
+ version,
44
+ services,
45
+ uptime_seconds: Math.floor((Date.now() - startTime) / 1000),
46
+ };
47
+ }
48
+ );
49
+
50
+ export const getMetrics = api<void, MetricsResponse>(
51
+ { expose: true, method: "GET", path: "/metrics" },
52
+ async () => {
53
+ return metrics.getMetrics();
54
+ }
55
+ );
backend/api/image.ts ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { api, APIError } from "encore.dev/api";
2
+ import { auth, validateApiKey } from "../utils/auth";
3
+ import { checkRateLimit } from "../utils/rate_limit";
4
+ import { metrics } from "../utils/metrics";
5
+ import { imageService } from "../services/image_service";
6
+ import type { ImageGenerationRequest, ImageGenerationResponse } from "../types/models";
7
+
8
+ export const generate = api<ImageGenerationRequest, ImageGenerationResponse>(
9
+ { expose: true, method: "POST", path: "/image/generate", auth: false },
10
+ async (req) => {
11
+ const startTime = Date.now();
12
+
13
+ try {
14
+ const authHeader = auth();
15
+ const authData = validateApiKey(authHeader);
16
+ checkRateLimit(authData.apiKey, authData.tier);
17
+
18
+ metrics.incrementRequests("/image/generate");
19
+
20
+ if (!req.prompt) {
21
+ throw APIError.invalidArgument("prompt is required");
22
+ }
23
+
24
+ const response = await imageService.generate(req);
25
+
26
+ metrics.recordResponseTime(Date.now() - startTime);
27
+
28
+ return response;
29
+ } catch (error) {
30
+ metrics.incrementErrors();
31
+
32
+ if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
33
+ const err = error as any;
34
+ throw APIError.resourceExhausted(err.message).withDetails({
35
+ limit: err.limit,
36
+ remaining: err.remaining,
37
+ reset_at: err.resetAt,
38
+ });
39
+ }
40
+
41
+ throw error instanceof Error ? error : APIError.internal(String(error));
42
+ }
43
+ }
44
+ );
backend/api/rag.ts ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { api, APIError } from "encore.dev/api";
2
+ import { auth, validateApiKey } from "../utils/auth";
3
+ import { checkRateLimit } from "../utils/rate_limit";
4
+ import { metrics } from "../utils/metrics";
5
+ import { ragService } from "../services/rag_service";
6
+ import { aiService } from "../services/ai_service";
7
+ import type { RAGQueryRequest, RAGQueryResponse } from "../types/models";
8
+
9
+ export const ragQuery = api<RAGQueryRequest, RAGQueryResponse>(
10
+ { expose: true, method: "POST", path: "/rag/query", auth: false },
11
+ async (req) => {
12
+ const startTime = Date.now();
13
+
14
+ try {
15
+ const authHeader = auth();
16
+ const authData = validateApiKey(authHeader);
17
+ checkRateLimit(authData.apiKey, authData.tier);
18
+
19
+ metrics.incrementRequests("/rag/query");
20
+
21
+ if (!req.query) {
22
+ throw APIError.invalidArgument("query is required");
23
+ }
24
+
25
+ const response = await ragService.query(req);
26
+
27
+ metrics.recordResponseTime(Date.now() - startTime);
28
+
29
+ return response;
30
+ } catch (error) {
31
+ metrics.incrementErrors();
32
+
33
+ if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
34
+ const err = error as any;
35
+ throw APIError.resourceExhausted(err.message).withDetails({
36
+ limit: err.limit,
37
+ remaining: err.remaining,
38
+ reset_at: err.resetAt,
39
+ });
40
+ }
41
+
42
+ throw error instanceof Error ? error : APIError.internal(String(error));
43
+ }
44
+ }
45
+ );
46
+
47
+ interface ModelsResponse {
48
+ models: string[];
49
+ default_model: string;
50
+ }
51
+
52
+ export const getModels = api<void, ModelsResponse>(
53
+ { expose: true, method: "GET", path: "/rag/models", auth: false },
54
+ async () => {
55
+ try {
56
+ const authHeader = auth();
57
+ validateApiKey(authHeader);
58
+
59
+ metrics.incrementRequests("/rag/models");
60
+
61
+ const models = aiService.getAvailableModels();
62
+
63
+ return {
64
+ models,
65
+ default_model: models[0] || 'gpt-3.5-turbo',
66
+ };
67
+ } catch (error) {
68
+ metrics.incrementErrors();
69
+ throw error instanceof Error ? error : APIError.internal(String(error));
70
+ }
71
+ }
72
+ );
backend/api/voice.ts ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { api, APIError } from "encore.dev/api";
2
+ import { auth, validateApiKey } from "../utils/auth";
3
+ import { checkRateLimit } from "../utils/rate_limit";
4
+ import { metrics } from "../utils/metrics";
5
+ import { voiceService } from "../services/voice_service";
6
+ import type {
7
+ VoiceSynthesisRequest,
8
+ VoiceSynthesisResponse,
9
+ TranscriptionRequest,
10
+ TranscriptionResponse
11
+ } from "../types/models";
12
+
13
+ export const synthesize = api<VoiceSynthesisRequest, VoiceSynthesisResponse>(
14
+ { expose: true, method: "POST", path: "/voice/synthesize", auth: false },
15
+ async (req) => {
16
+ const startTime = Date.now();
17
+
18
+ try {
19
+ const authHeader = auth();
20
+ const authData = validateApiKey(authHeader);
21
+ checkRateLimit(authData.apiKey, authData.tier);
22
+
23
+ metrics.incrementRequests("/voice/synthesize");
24
+
25
+ if (!req.text) {
26
+ throw APIError.invalidArgument("text is required");
27
+ }
28
+
29
+ const response = await voiceService.synthesize(req);
30
+
31
+ metrics.recordResponseTime(Date.now() - startTime);
32
+
33
+ return response;
34
+ } catch (error) {
35
+ metrics.incrementErrors();
36
+
37
+ if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
38
+ const err = error as any;
39
+ throw APIError.resourceExhausted(err.message).withDetails({
40
+ limit: err.limit,
41
+ remaining: err.remaining,
42
+ reset_at: err.resetAt,
43
+ });
44
+ }
45
+
46
+ throw error instanceof Error ? error : APIError.internal(String(error));
47
+ }
48
+ }
49
+ );
50
+
51
+ interface TranscribeRequestBody {
52
+ audio_base64: string;
53
+ model?: string;
54
+ language?: string;
55
+ prompt?: string;
56
+ }
57
+
58
+ export const transcribe = api<TranscribeRequestBody, TranscriptionResponse>(
59
+ { expose: true, method: "POST", path: "/voice/transcribe", auth: false },
60
+ async (req) => {
61
+ const startTime = Date.now();
62
+
63
+ try {
64
+ const authHeader = auth();
65
+ const authData = validateApiKey(authHeader);
66
+ checkRateLimit(authData.apiKey, authData.tier);
67
+
68
+ metrics.incrementRequests("/voice/transcribe");
69
+
70
+ if (!req.audio_base64) {
71
+ throw APIError.invalidArgument("audio_base64 is required");
72
+ }
73
+
74
+ const audioBuffer = Buffer.from(req.audio_base64, 'base64');
75
+
76
+ const response = await voiceService.transcribe(audioBuffer, {
77
+ audio_url: '',
78
+ model: req.model,
79
+ language: req.language,
80
+ prompt: req.prompt,
81
+ });
82
+
83
+ metrics.recordResponseTime(Date.now() - startTime);
84
+
85
+ return response;
86
+ } catch (error) {
87
+ metrics.incrementErrors();
88
+
89
+ if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
90
+ const err = error as any;
91
+ throw APIError.resourceExhausted(err.message).withDetails({
92
+ limit: err.limit,
93
+ remaining: err.remaining,
94
+ reset_at: err.resetAt,
95
+ });
96
+ }
97
+
98
+ throw error instanceof Error ? error : APIError.internal(String(error));
99
+ }
100
+ }
101
+ );
backend/encore.app ADDED
@@ -0,0 +1 @@
 
 
1
+ {"id": "scalable-ai-api-service-ysyi", "lang": "typescript"}
backend/package.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "backend",
3
+ "version": "1.0.0",
4
+ "type": "module",
5
+ "packageManager": "bun",
6
+ "dependencies": {
7
+ "@anthropic-ai/sdk": "^0.24.1",
8
+ "@huggingface/inference": "^3.10.0",
9
+ "@pinecone-database/pinecone": "^6.1.1",
10
+ "encore.dev": "^1.50.4",
11
+ "openai": "^4.90.0"
12
+ },
13
+ "devDependencies": {
14
+ "typescript": "^5.8.3"
15
+ }
16
+ }
backend/services/ai_service.ts ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { OpenAIAdapter } from '../adapters/openai_adapter';
2
+ import { HuggingFaceAdapter } from '../adapters/huggingface_adapter';
3
+ import { AnthropicAdapter } from '../adapters/anthropic_adapter';
4
+ import { OllamaAdapter } from '../adapters/ollama_adapter';
5
+ import { loadConfig } from '../types/config';
6
+ import { logger } from '../utils/logger';
7
+ import { metrics } from '../utils/metrics';
8
+ import type { Message, ChatOptions, ChatResponse, LLMAdapter } from '../types/models';
9
+
10
+ const config = loadConfig();
11
+
12
+ class AIService {
13
+ private adapters: Map<string, LLMAdapter> = new Map();
14
+ private defaultAdapter: LLMAdapter | null = null;
15
+
16
+ constructor() {
17
+ this.initializeAdapters();
18
+ }
19
+
20
+ private initializeAdapters(): void {
21
+ if (config.openai.apiKey) {
22
+ const openaiAdapter = new OpenAIAdapter(
23
+ config.openai.apiKey,
24
+ config.openai.defaultChatModel,
25
+ config.openai.defaultEmbeddingModel,
26
+ config.openai.defaultImageModel,
27
+ config.openai.defaultVoiceModel
28
+ );
29
+ this.adapters.set('openai', openaiAdapter);
30
+ this.adapters.set('gpt-4', openaiAdapter);
31
+ this.adapters.set('gpt-3.5-turbo', openaiAdapter);
32
+ this.adapters.set('gpt-4-turbo', openaiAdapter);
33
+
34
+ if (!this.defaultAdapter) {
35
+ this.defaultAdapter = openaiAdapter;
36
+ }
37
+ }
38
+
39
+ if (config.huggingface.apiKey) {
40
+ const hfAdapter = new HuggingFaceAdapter(config.huggingface.apiKey);
41
+ this.adapters.set('huggingface', hfAdapter);
42
+ this.adapters.set('mistral', hfAdapter);
43
+
44
+ if (!this.defaultAdapter) {
45
+ this.defaultAdapter = hfAdapter;
46
+ }
47
+ }
48
+
49
+ if (config.anthropic.apiKey) {
50
+ const anthropicAdapter = new AnthropicAdapter(config.anthropic.apiKey);
51
+ this.adapters.set('anthropic', anthropicAdapter);
52
+ this.adapters.set('claude', anthropicAdapter);
53
+ this.adapters.set('claude-3-sonnet', anthropicAdapter);
54
+ this.adapters.set('claude-3-opus', anthropicAdapter);
55
+
56
+ if (!this.defaultAdapter) {
57
+ this.defaultAdapter = anthropicAdapter;
58
+ }
59
+ }
60
+
61
+ const ollamaBaseUrl = process.env.OLLAMA_BASE_URL || 'http://localhost:11434';
62
+ const ollamaModel = process.env.OLLAMA_MODEL || 'llama2';
63
+ const ollamaEmbeddingModel = process.env.OLLAMA_EMBEDDING_MODEL || 'nomic-embed-text';
64
+
65
+ const ollamaAdapter = new OllamaAdapter(ollamaBaseUrl, ollamaModel, ollamaEmbeddingModel);
66
+ this.adapters.set('ollama', ollamaAdapter);
67
+ this.adapters.set('llama', ollamaAdapter);
68
+ this.adapters.set('llama2', ollamaAdapter);
69
+ this.adapters.set('llama3', ollamaAdapter);
70
+ this.adapters.set('mistral', ollamaAdapter);
71
+ this.adapters.set('phi', ollamaAdapter);
72
+ this.adapters.set('gemma', ollamaAdapter);
73
+
74
+ if (!this.defaultAdapter) {
75
+ this.defaultAdapter = ollamaAdapter;
76
+ logger.info('Using Ollama as default LLM provider');
77
+ }
78
+
79
+ if (!this.defaultAdapter) {
80
+ logger.warn('No LLM adapters initialized. Please configure at least one API key.');
81
+ }
82
+ }
83
+
84
+ private getAdapter(model?: string): LLMAdapter {
85
+ if (!model) {
86
+ if (!this.defaultAdapter) {
87
+ throw new Error('No LLM adapter available. Please configure API keys.');
88
+ }
89
+ return this.defaultAdapter;
90
+ }
91
+
92
+ const lowerModel = model.toLowerCase();
93
+
94
+ for (const [key, adapter] of this.adapters.entries()) {
95
+ if (lowerModel.includes(key) || key.includes(lowerModel)) {
96
+ return adapter;
97
+ }
98
+ }
99
+
100
+ if (!this.defaultAdapter) {
101
+ throw new Error('No LLM adapter available. Please configure API keys.');
102
+ }
103
+
104
+ logger.warn(`Model ${model} not found, using default adapter`);
105
+ return this.defaultAdapter;
106
+ }
107
+
108
+ async chat(messages: Message[], model?: string, options?: ChatOptions): Promise<ChatResponse> {
109
+ try {
110
+ const adapter = this.getAdapter(model);
111
+
112
+ logger.info('Generating chat completion', {
113
+ model: model || 'default',
114
+ messageCount: messages.length
115
+ });
116
+
117
+ const response = await adapter.generateCompletion(messages, options);
118
+
119
+ metrics.incrementModelUsage(response.model);
120
+
121
+ logger.info('Chat completion generated', {
122
+ model: response.model,
123
+ tokensUsed: response.usage.total_tokens,
124
+ });
125
+
126
+ return response;
127
+ } catch (error) {
128
+ logger.error('Error generating chat completion', {
129
+ error: error instanceof Error ? error.message : String(error),
130
+ model: model || 'default'
131
+ });
132
+ throw error;
133
+ }
134
+ }
135
+
136
+ async simpleQuery(query: string, model?: string, options?: ChatOptions): Promise<string> {
137
+ const messages: Message[] = [
138
+ {
139
+ role: 'system',
140
+ content: 'You are a helpful assistant. Provide clear, concise answers.',
141
+ },
142
+ {
143
+ role: 'user',
144
+ content: query,
145
+ },
146
+ ];
147
+
148
+ const response = await this.chat(messages, model, options);
149
+ return response.reply;
150
+ }
151
+
152
+ async generateEmbedding(text: string | string[], model?: string) {
153
+ try {
154
+ const adapter = this.getAdapter(model);
155
+
156
+ logger.info('Generating embeddings', {
157
+ model: model || 'default',
158
+ textCount: Array.isArray(text) ? text.length : 1
159
+ });
160
+
161
+ const response = await adapter.generateEmbedding(text);
162
+
163
+ logger.info('Embeddings generated', {
164
+ model: response.model,
165
+ count: response.embeddings.length,
166
+ });
167
+
168
+ return response;
169
+ } catch (error) {
170
+ logger.error('Error generating embeddings', {
171
+ error: error instanceof Error ? error.message : String(error)
172
+ });
173
+ throw error;
174
+ }
175
+ }
176
+
177
+ getAvailableModels(): string[] {
178
+ return Array.from(this.adapters.keys());
179
+ }
180
+
181
+ async healthCheck(): Promise<{ provider: string; available: boolean }[]> {
182
+ const results: { provider: string; available: boolean }[] = [];
183
+
184
+ for (const [provider, adapter] of this.adapters.entries()) {
185
+ const available = await adapter.isAvailable();
186
+ results.push({ provider, available });
187
+ }
188
+
189
+ return results;
190
+ }
191
+ }
192
+
193
+ export const aiService = new AIService();
backend/services/document_service.ts ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ragService } from './rag_service';
2
+ import { loadConfig } from '../types/config';
3
+ import { logger } from '../utils/logger';
4
+ import { metrics } from '../utils/metrics';
5
+ import crypto from 'crypto';
6
+
7
+ function uuidv4(): string {
8
+ return crypto.randomUUID();
9
+ }
10
+ import type {
11
+ DocumentUploadResponse,
12
+ DocumentMetadata,
13
+ DocumentChunk,
14
+ DocumentSource
15
+ } from '../types/models';
16
+
17
+ const config = loadConfig();
18
+
19
+ interface StoredDocument {
20
+ doc_id: string;
21
+ filename: string;
22
+ content: string;
23
+ metadata: DocumentMetadata;
24
+ chunks: DocumentChunk[];
25
+ status: 'processing' | 'completed' | 'failed';
26
+ error?: string;
27
+ created_at: number;
28
+ }
29
+
30
+ class DocumentService {
31
+ private documents = new Map<string, StoredDocument>();
32
+ private processingQueue: string[] = [];
33
+
34
+ async uploadDocument(
35
+ filename: string,
36
+ content: Buffer,
37
+ metadata?: DocumentMetadata
38
+ ): Promise<DocumentUploadResponse> {
39
+ try {
40
+ const doc_id = uuidv4();
41
+ const size_bytes = content.length;
42
+
43
+ const maxSize = config.documents.maxFileSizeMB * 1024 * 1024;
44
+ if (size_bytes > maxSize) {
45
+ throw new Error(`File size exceeds maximum of ${config.documents.maxFileSizeMB}MB`);
46
+ }
47
+
48
+ logger.info('Uploading document', { doc_id, filename, size_bytes });
49
+
50
+ const textContent = await this.extractText(filename, content);
51
+
52
+ const estimatedChunks = Math.ceil(textContent.length / config.documents.chunkSize);
53
+
54
+ const document: StoredDocument = {
55
+ doc_id,
56
+ filename,
57
+ content: textContent,
58
+ metadata: metadata || {},
59
+ chunks: [],
60
+ status: 'processing',
61
+ created_at: Date.now(),
62
+ };
63
+
64
+ this.documents.set(doc_id, document);
65
+ this.processingQueue.push(doc_id);
66
+
67
+ if (config.workers.enabled) {
68
+ this.processDocumentAsync(doc_id);
69
+ } else {
70
+ await this.processDocument(doc_id);
71
+ }
72
+
73
+ return {
74
+ doc_id,
75
+ filename,
76
+ size_bytes,
77
+ status: document.status,
78
+ estimated_chunks: estimatedChunks,
79
+ webhook_url: '/webhook/events',
80
+ };
81
+ } catch (error) {
82
+ logger.error('Error uploading document', {
83
+ error: error instanceof Error ? error.message : String(error),
84
+ filename,
85
+ });
86
+ throw error;
87
+ }
88
+ }
89
+
90
+ private async extractText(filename: string, content: Buffer): Promise<string> {
91
+ const extension = filename.split('.').pop()?.toLowerCase();
92
+
93
+ if (extension === 'txt') {
94
+ return content.toString('utf-8');
95
+ }
96
+
97
+ if (extension === 'pdf' || extension === 'docx') {
98
+ logger.warn(`${extension} parsing not implemented, treating as text`, { filename });
99
+ return content.toString('utf-8');
100
+ }
101
+
102
+ return content.toString('utf-8');
103
+ }
104
+
105
+ private async processDocument(doc_id: string): Promise<void> {
106
+ const document = this.documents.get(doc_id);
107
+ if (!document) {
108
+ logger.error('Document not found', { doc_id });
109
+ return;
110
+ }
111
+
112
+ try {
113
+ logger.info('Processing document', { doc_id, filename: document.filename });
114
+
115
+ const chunks = this.chunkText(document.content, doc_id, document.metadata);
116
+ document.chunks = chunks;
117
+
118
+ const chunkData = chunks.map(chunk => ({
119
+ id: chunk.chunk_id,
120
+ content: chunk.content,
121
+ metadata: {
122
+ doc_id: chunk.doc_id,
123
+ chunk_index: chunk.chunk_index,
124
+ total_chunks: chunk.total_chunks,
125
+ ...chunk.metadata,
126
+ },
127
+ }));
128
+
129
+ await ragService.addDocumentChunks(chunkData);
130
+
131
+ document.status = 'completed';
132
+ metrics.incrementDocumentsProcessed();
133
+
134
+ logger.info('Document processed successfully', {
135
+ doc_id,
136
+ chunksCreated: chunks.length,
137
+ });
138
+ } catch (error) {
139
+ document.status = 'failed';
140
+ document.error = error instanceof Error ? error.message : String(error);
141
+
142
+ logger.error('Error processing document', {
143
+ error: document.error,
144
+ doc_id,
145
+ });
146
+ }
147
+ }
148
+
149
+ private async processDocumentAsync(doc_id: string): Promise<void> {
150
+ setTimeout(async () => {
151
+ await this.processDocument(doc_id);
152
+ }, 100);
153
+ }
154
+
155
+ private chunkText(
156
+ text: string,
157
+ doc_id: string,
158
+ metadata: DocumentMetadata
159
+ ): DocumentChunk[] {
160
+ const chunkSize = config.documents.chunkSize;
161
+ const overlap = config.documents.chunkOverlap;
162
+ const chunks: DocumentChunk[] = [];
163
+
164
+ let start = 0;
165
+ let chunkIndex = 0;
166
+
167
+ while (start < text.length) {
168
+ const end = Math.min(start + chunkSize, text.length);
169
+ const content = text.slice(start, end);
170
+
171
+ const chunk_id = `${doc_id}_chunk_${chunkIndex}`;
172
+
173
+ chunks.push({
174
+ chunk_id,
175
+ doc_id,
176
+ content,
177
+ metadata,
178
+ chunk_index: chunkIndex,
179
+ total_chunks: 0,
180
+ });
181
+
182
+ start += chunkSize - overlap;
183
+ chunkIndex++;
184
+ }
185
+
186
+ const totalChunks = chunks.length;
187
+ chunks.forEach(chunk => {
188
+ chunk.total_chunks = totalChunks;
189
+ });
190
+
191
+ return chunks;
192
+ }
193
+
194
+ async getDocumentSources(doc_id: string): Promise<DocumentSource[]> {
195
+ const document = this.documents.get(doc_id);
196
+ if (!document) {
197
+ throw new Error('Document not found');
198
+ }
199
+
200
+ return document.chunks.map(chunk => ({
201
+ doc_id: chunk.doc_id,
202
+ chunk_id: chunk.chunk_id,
203
+ content: chunk.content,
204
+ score: 1.0,
205
+ metadata: chunk.metadata,
206
+ }));
207
+ }
208
+
209
+ async getDocumentStatus(doc_id: string): Promise<DocumentUploadResponse> {
210
+ const document = this.documents.get(doc_id);
211
+ if (!document) {
212
+ throw new Error('Document not found');
213
+ }
214
+
215
+ return {
216
+ doc_id: document.doc_id,
217
+ filename: document.filename,
218
+ size_bytes: document.content.length,
219
+ status: document.status,
220
+ estimated_chunks: document.chunks.length,
221
+ error: document.error,
222
+ };
223
+ }
224
+
225
+ async deleteDocument(doc_id: string): Promise<void> {
226
+ const document = this.documents.get(doc_id);
227
+ if (!document) {
228
+ throw new Error('Document not found');
229
+ }
230
+
231
+ await ragService.deleteDocument(doc_id);
232
+ this.documents.delete(doc_id);
233
+
234
+ logger.info('Document deleted', { doc_id });
235
+ }
236
+ }
237
+
238
+ export const documentService = new DocumentService();
backend/services/image_service.ts ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { OpenAIAdapter } from '../adapters/openai_adapter';
2
+ import { HuggingFaceAdapter } from '../adapters/huggingface_adapter';
3
+ import { loadConfig } from '../types/config';
4
+ import { logger } from '../utils/logger';
5
+ import { metrics } from '../utils/metrics';
6
+ import type { ImageAdapter, ImageGenerationRequest, ImageGenerationResponse } from '../types/models';
7
+
8
+ const config = loadConfig();
9
+
10
+ class ImageService {
11
+ private adapters: Map<string, ImageAdapter> = new Map();
12
+ private defaultAdapter: ImageAdapter | null = null;
13
+
14
+ constructor() {
15
+ this.initializeAdapters();
16
+ }
17
+
18
+ private initializeAdapters(): void {
19
+ if (config.openai.apiKey) {
20
+ const openaiAdapter = new OpenAIAdapter(
21
+ config.openai.apiKey,
22
+ config.openai.defaultChatModel,
23
+ config.openai.defaultEmbeddingModel,
24
+ config.openai.defaultImageModel
25
+ );
26
+ this.adapters.set('openai', openaiAdapter);
27
+ this.adapters.set('dall-e', openaiAdapter);
28
+ this.adapters.set('dall-e-2', openaiAdapter);
29
+ this.adapters.set('dall-e-3', openaiAdapter);
30
+
31
+ if (!this.defaultAdapter) {
32
+ this.defaultAdapter = openaiAdapter;
33
+ }
34
+ }
35
+
36
+ if (config.huggingface.apiKey) {
37
+ const hfAdapter = new HuggingFaceAdapter(config.huggingface.apiKey);
38
+ this.adapters.set('huggingface', hfAdapter);
39
+ this.adapters.set('stable-diffusion', hfAdapter);
40
+ this.adapters.set('sdxl', hfAdapter);
41
+
42
+ if (!this.defaultAdapter) {
43
+ this.defaultAdapter = hfAdapter;
44
+ }
45
+ }
46
+
47
+ if (!this.defaultAdapter) {
48
+ logger.warn('No image generation adapters initialized. Please configure API keys.');
49
+ }
50
+ }
51
+
52
+ private getAdapter(model?: string): ImageAdapter {
53
+ if (!model) {
54
+ if (!this.defaultAdapter) {
55
+ throw new Error('No image adapter available. Please configure API keys.');
56
+ }
57
+ return this.defaultAdapter;
58
+ }
59
+
60
+ const lowerModel = model.toLowerCase();
61
+
62
+ for (const [key, adapter] of this.adapters.entries()) {
63
+ if (lowerModel.includes(key) || key.includes(lowerModel)) {
64
+ return adapter;
65
+ }
66
+ }
67
+
68
+ if (!this.defaultAdapter) {
69
+ throw new Error('No image adapter available. Please configure API keys.');
70
+ }
71
+
72
+ logger.warn(`Model ${model} not found, using default adapter`);
73
+ return this.defaultAdapter;
74
+ }
75
+
76
+ async generate(request: ImageGenerationRequest): Promise<ImageGenerationResponse> {
77
+ try {
78
+ const adapter = this.getAdapter(request.model);
79
+
80
+ logger.info('Generating image', {
81
+ prompt: request.prompt.substring(0, 100),
82
+ model: request.model || 'default',
83
+ size: request.size,
84
+ n: request.n,
85
+ });
86
+
87
+ const response = await adapter.generateImage(request.prompt, request);
88
+
89
+ metrics.incrementModelUsage(response.model);
90
+
91
+ logger.info('Image generated successfully', {
92
+ model: response.model,
93
+ imageCount: response.images.length,
94
+ });
95
+
96
+ return response;
97
+ } catch (error) {
98
+ logger.error('Error generating image', {
99
+ error: error instanceof Error ? error.message : String(error),
100
+ model: request.model || 'default',
101
+ });
102
+ throw error;
103
+ }
104
+ }
105
+
106
+ getAvailableModels(): string[] {
107
+ return Array.from(this.adapters.keys());
108
+ }
109
+
110
+ async healthCheck(): Promise<{ provider: string; available: boolean }[]> {
111
+ const results: { provider: string; available: boolean }[] = [];
112
+
113
+ for (const [provider, adapter] of this.adapters.entries()) {
114
+ const available = await adapter.isAvailable();
115
+ results.push({ provider, available });
116
+ }
117
+
118
+ return results;
119
+ }
120
+ }
121
+
122
+ export const imageService = new ImageService();
backend/services/rag_service.ts ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { aiService } from './ai_service';
2
+ import { PineconeAdapter, InMemoryVectorDB } from '../adapters/vector_db_adapter';
3
+ import { loadConfig } from '../types/config';
4
+ import { logger } from '../utils/logger';
5
+ import { metrics } from '../utils/metrics';
6
+ import type {
7
+ RAGQueryRequest,
8
+ RAGQueryResponse,
9
+ DocumentSource,
10
+ VectorDBAdapter
11
+ } from '../types/models';
12
+
13
+ const config = loadConfig();
14
+
15
+ class RAGService {
16
+ private vectorDB: VectorDBAdapter;
17
+
18
+ constructor() {
19
+ this.vectorDB = this.initializeVectorDB();
20
+ }
21
+
22
+ private initializeVectorDB(): VectorDBAdapter {
23
+ if (config.pinecone.apiKey) {
24
+ logger.info('Initializing Pinecone vector DB');
25
+ return new PineconeAdapter(
26
+ config.pinecone.apiKey,
27
+ config.pinecone.indexName
28
+ );
29
+ }
30
+
31
+ logger.warn('Pinecone not configured, using in-memory vector DB');
32
+ return new InMemoryVectorDB();
33
+ }
34
+
35
+ async query(request: RAGQueryRequest): Promise<RAGQueryResponse> {
36
+ const startTime = Date.now();
37
+
38
+ try {
39
+ logger.info('Processing RAG query', {
40
+ query: request.query.substring(0, 100),
41
+ topK: request.top_k || 5
42
+ });
43
+
44
+ let sources: DocumentSource[] = [];
45
+ let contextPrompt = request.query;
46
+
47
+ if (request.use_retrieval !== false) {
48
+ const embeddingResponse = await aiService.generateEmbedding(request.query);
49
+ const queryVector = embeddingResponse.embeddings[0];
50
+
51
+ metrics.incrementVectorDbQueries();
52
+
53
+ const results = await this.vectorDB.query(
54
+ queryVector,
55
+ request.top_k || 5,
56
+ request.filters
57
+ );
58
+
59
+ sources = results.map(result => ({
60
+ doc_id: result.metadata.doc_id || result.id,
61
+ chunk_id: result.id,
62
+ content: result.metadata.content || '',
63
+ score: result.score,
64
+ metadata: result.metadata,
65
+ }));
66
+
67
+ if (sources.length > 0) {
68
+ const context = sources
69
+ .map(s => `[Source: ${s.doc_id}]\n${s.content}`)
70
+ .join('\n\n');
71
+
72
+ contextPrompt = this.buildRAGPrompt(request.query, context);
73
+ }
74
+ }
75
+
76
+ const messages = [
77
+ {
78
+ role: 'system' as const,
79
+ content: 'You are a helpful assistant. Answer questions based on the provided context. If the context doesn\'t contain relevant information, say so.',
80
+ },
81
+ {
82
+ role: 'user' as const,
83
+ content: contextPrompt,
84
+ },
85
+ ];
86
+
87
+ const chatResponse = await aiService.chat(messages, request.model);
88
+
89
+ const retrievalTimeMs = Date.now() - startTime;
90
+
91
+ logger.info('RAG query completed', {
92
+ sourcesFound: sources.length,
93
+ retrievalTimeMs,
94
+ model: chatResponse.model,
95
+ });
96
+
97
+ return {
98
+ answer: chatResponse.reply,
99
+ sources,
100
+ model: chatResponse.model,
101
+ usage: chatResponse.usage,
102
+ retrieval_time_ms: retrievalTimeMs,
103
+ };
104
+ } catch (error) {
105
+ logger.error('Error processing RAG query', {
106
+ error: error instanceof Error ? error.message : String(error),
107
+ });
108
+ throw error;
109
+ }
110
+ }
111
+
112
+ async addDocumentChunks(chunks: Array<{
113
+ id: string;
114
+ content: string;
115
+ metadata: Record<string, any>;
116
+ }>): Promise<void> {
117
+ try {
118
+ logger.info('Adding document chunks to vector DB', { count: chunks.length });
119
+
120
+ const texts = chunks.map(c => c.content);
121
+ const embeddingResponse = await aiService.generateEmbedding(texts);
122
+
123
+ const vectors = chunks.map((chunk, index) => ({
124
+ id: chunk.id,
125
+ values: embeddingResponse.embeddings[index],
126
+ metadata: {
127
+ ...chunk.metadata,
128
+ content: chunk.content,
129
+ },
130
+ }));
131
+
132
+ await this.vectorDB.upsert(vectors);
133
+
134
+ logger.info('Document chunks added successfully', { count: chunks.length });
135
+ } catch (error) {
136
+ logger.error('Error adding document chunks', {
137
+ error: error instanceof Error ? error.message : String(error),
138
+ });
139
+ throw error;
140
+ }
141
+ }
142
+
143
+ async deleteDocument(docId: string): Promise<void> {
144
+ try {
145
+ logger.info('Deleting document from vector DB', { docId });
146
+
147
+ const results = await this.vectorDB.query([], 10000, { doc_id: docId });
148
+ const chunkIds = results.map(r => r.id);
149
+
150
+ if (chunkIds.length > 0) {
151
+ await this.vectorDB.delete(chunkIds);
152
+ }
153
+
154
+ logger.info('Document deleted successfully', { docId, chunksDeleted: chunkIds.length });
155
+ } catch (error) {
156
+ logger.error('Error deleting document', {
157
+ error: error instanceof Error ? error.message : String(error),
158
+ docId,
159
+ });
160
+ throw error;
161
+ }
162
+ }
163
+
164
+ private buildRAGPrompt(query: string, context: string): string {
165
+ return `Context information is below:
166
+ ---
167
+ ${context}
168
+ ---
169
+
170
+ Based on the context above, please answer the following question. If the context doesn't contain enough information to answer the question, please say so.
171
+
172
+ Question: ${query}
173
+
174
+ Answer:`;
175
+ }
176
+
177
+ async healthCheck(): Promise<boolean> {
178
+ return await this.vectorDB.isAvailable();
179
+ }
180
+ }
181
+
182
+ export const ragService = new RAGService();
backend/services/voice_service.ts ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { OpenAIAdapter } from '../adapters/openai_adapter';
2
+ import { loadConfig } from '../types/config';
3
+ import { logger } from '../utils/logger';
4
+ import { metrics } from '../utils/metrics';
5
+ import type {
6
+ VoiceAdapter,
7
+ VoiceSynthesisRequest,
8
+ VoiceSynthesisResponse,
9
+ TranscriptionRequest,
10
+ TranscriptionResponse
11
+ } from '../types/models';
12
+
13
+ const config = loadConfig();
14
+
15
+ class VoiceService {
16
+ private adapters: Map<string, VoiceAdapter> = new Map();
17
+ private defaultAdapter: VoiceAdapter | null = null;
18
+
19
+ constructor() {
20
+ this.initializeAdapters();
21
+ }
22
+
23
+ private initializeAdapters(): void {
24
+ if (config.openai.apiKey) {
25
+ const openaiAdapter = new OpenAIAdapter(
26
+ config.openai.apiKey,
27
+ config.openai.defaultChatModel,
28
+ config.openai.defaultEmbeddingModel,
29
+ config.openai.defaultImageModel,
30
+ config.openai.defaultVoiceModel
31
+ );
32
+ this.adapters.set('openai', openaiAdapter);
33
+ this.adapters.set('tts-1', openaiAdapter);
34
+ this.adapters.set('tts-1-hd', openaiAdapter);
35
+ this.adapters.set('whisper', openaiAdapter);
36
+
37
+ if (!this.defaultAdapter) {
38
+ this.defaultAdapter = openaiAdapter;
39
+ }
40
+ }
41
+
42
+ if (!this.defaultAdapter) {
43
+ logger.warn('No voice synthesis adapters initialized. Please configure API keys.');
44
+ }
45
+ }
46
+
47
+ private getAdapter(model?: string): VoiceAdapter {
48
+ if (!model) {
49
+ if (!this.defaultAdapter) {
50
+ throw new Error('No voice adapter available. Please configure API keys.');
51
+ }
52
+ return this.defaultAdapter;
53
+ }
54
+
55
+ const lowerModel = model.toLowerCase();
56
+
57
+ for (const [key, adapter] of this.adapters.entries()) {
58
+ if (lowerModel.includes(key) || key.includes(lowerModel)) {
59
+ return adapter;
60
+ }
61
+ }
62
+
63
+ if (!this.defaultAdapter) {
64
+ throw new Error('No voice adapter available. Please configure API keys.');
65
+ }
66
+
67
+ logger.warn(`Model ${model} not found, using default adapter`);
68
+ return this.defaultAdapter;
69
+ }
70
+
71
+ async synthesize(request: VoiceSynthesisRequest): Promise<VoiceSynthesisResponse> {
72
+ try {
73
+ const adapter = this.getAdapter(request.model);
74
+
75
+ logger.info('Synthesizing speech', {
76
+ textLength: request.text.length,
77
+ voice: request.voice || 'default',
78
+ model: request.model || 'default',
79
+ });
80
+
81
+ const response = await adapter.synthesize(request.text, request);
82
+
83
+ metrics.incrementModelUsage(request.model || 'tts-1');
84
+
85
+ logger.info('Speech synthesized successfully', {
86
+ voice: response.voice,
87
+ format: response.format,
88
+ sizeBytes: response.size_bytes,
89
+ });
90
+
91
+ return response;
92
+ } catch (error) {
93
+ logger.error('Error synthesizing speech', {
94
+ error: error instanceof Error ? error.message : String(error),
95
+ model: request.model || 'default',
96
+ });
97
+ throw error;
98
+ }
99
+ }
100
+
101
+ async transcribe(audio: Buffer, request: TranscriptionRequest): Promise<TranscriptionResponse> {
102
+ try {
103
+ const adapter = this.getAdapter(request.model);
104
+
105
+ if (!adapter.transcribe) {
106
+ throw new Error('Transcription not supported by this adapter');
107
+ }
108
+
109
+ logger.info('Transcribing audio', {
110
+ model: request.model || 'default',
111
+ language: request.language,
112
+ });
113
+
114
+ const response = await adapter.transcribe(audio, request);
115
+
116
+ metrics.incrementModelUsage(request.model || 'whisper-1');
117
+
118
+ logger.info('Audio transcribed successfully', {
119
+ textLength: response.text.length,
120
+ language: response.language,
121
+ });
122
+
123
+ return response;
124
+ } catch (error) {
125
+ logger.error('Error transcribing audio', {
126
+ error: error instanceof Error ? error.message : String(error),
127
+ model: request.model || 'default',
128
+ });
129
+ throw error;
130
+ }
131
+ }
132
+
133
+ getAvailableModels(): string[] {
134
+ return Array.from(this.adapters.keys());
135
+ }
136
+
137
+ async healthCheck(): Promise<{ provider: string; available: boolean }[]> {
138
+ const results: { provider: string; available: boolean }[] = [];
139
+
140
+ for (const [provider, adapter] of this.adapters.entries()) {
141
+ const available = await adapter.isAvailable();
142
+ results.push({ provider, available });
143
+ }
144
+
145
+ return results;
146
+ }
147
+ }
148
+
149
+ export const voiceService = new VoiceService();
backend/tsconfig.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$schema": "https://json.schemastore.org/tsconfig",
3
+ "compilerOptions": {
4
+ /* Basic Options */
5
+ "lib": ["ES2022"],
6
+ "target": "ES2022",
7
+ "module": "ES2022",
8
+ "types": ["node"],
9
+ "paths": {
10
+ "~encore/*": ["./encore.gen/*"]
11
+ },
12
+
13
+ /* Workspace Settings */
14
+ "composite": true,
15
+
16
+ /* Strict Type-Checking Options */
17
+ "strict": true,
18
+
19
+ /* Module Resolution Options */
20
+ "moduleResolution": "bundler",
21
+ "allowSyntheticDefaultImports": true,
22
+ "isolatedModules": true,
23
+ "sourceMap": true,
24
+
25
+ "declaration": true,
26
+
27
+ /* Advanced Options */
28
+ "forceConsistentCasingInFileNames": true,
29
+ "skipLibCheck": true
30
+ }
31
+ }
backend/types/config.ts ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export interface AppConfig {
2
+ openai: {
3
+ apiKey: string;
4
+ defaultChatModel: string;
5
+ defaultEmbeddingModel: string;
6
+ defaultImageModel: string;
7
+ defaultVoiceModel: string;
8
+ };
9
+ huggingface: {
10
+ apiKey: string;
11
+ defaultModel: string;
12
+ };
13
+ anthropic: {
14
+ apiKey: string;
15
+ defaultModel: string;
16
+ };
17
+ pinecone: {
18
+ apiKey: string;
19
+ environment: string;
20
+ indexName: string;
21
+ };
22
+ auth: {
23
+ apiKeys: string[];
24
+ adminApiKeys: string[];
25
+ };
26
+ rateLimit: {
27
+ default: number;
28
+ premium: number;
29
+ admin: number;
30
+ };
31
+ documents: {
32
+ maxFileSizeMB: number;
33
+ chunkSize: number;
34
+ chunkOverlap: number;
35
+ };
36
+ workers: {
37
+ enabled: boolean;
38
+ concurrency: number;
39
+ };
40
+ server: {
41
+ port: number;
42
+ logLevel: string;
43
+ corsOrigins: string[];
44
+ };
45
+ }
46
+
47
+ export function loadConfig(): AppConfig {
48
+ return {
49
+ openai: {
50
+ apiKey: process.env.OPENAI_API_KEY || '',
51
+ defaultChatModel: process.env.DEFAULT_CHAT_MODEL || 'gpt-3.5-turbo',
52
+ defaultEmbeddingModel: process.env.DEFAULT_EMBEDDING_MODEL || 'text-embedding-ada-002',
53
+ defaultImageModel: process.env.DEFAULT_IMAGE_MODEL || 'dall-e-3',
54
+ defaultVoiceModel: process.env.DEFAULT_VOICE_MODEL || 'tts-1',
55
+ },
56
+ huggingface: {
57
+ apiKey: process.env.HUGGINGFACE_API_KEY || '',
58
+ defaultModel: process.env.HF_DEFAULT_MODEL || 'mistralai/Mistral-7B-Instruct-v0.1',
59
+ },
60
+ anthropic: {
61
+ apiKey: process.env.ANTHROPIC_API_KEY || '',
62
+ defaultModel: process.env.ANTHROPIC_DEFAULT_MODEL || 'claude-3-sonnet-20240229',
63
+ },
64
+ pinecone: {
65
+ apiKey: process.env.PINECONE_API_KEY || '',
66
+ environment: process.env.PINECONE_ENVIRONMENT || 'us-west1-gcp',
67
+ indexName: process.env.PINECONE_INDEX_NAME || 'ai-api-vectors',
68
+ },
69
+ auth: {
70
+ apiKeys: (process.env.API_KEYS || 'demo-key-1,demo-key-2').split(',').map(k => k.trim()),
71
+ adminApiKeys: (process.env.ADMIN_API_KEYS || '').split(',').map(k => k.trim()).filter(Boolean),
72
+ },
73
+ rateLimit: {
74
+ default: parseInt(process.env.RATE_LIMIT_DEFAULT || '60', 10),
75
+ premium: parseInt(process.env.RATE_LIMIT_PREMIUM || '300', 10),
76
+ admin: parseInt(process.env.RATE_LIMIT_ADMIN || '1000', 10),
77
+ },
78
+ documents: {
79
+ maxFileSizeMB: parseInt(process.env.MAX_FILE_SIZE_MB || '10', 10),
80
+ chunkSize: parseInt(process.env.CHUNK_SIZE || '1000', 10),
81
+ chunkOverlap: parseInt(process.env.CHUNK_OVERLAP || '200', 10),
82
+ },
83
+ workers: {
84
+ enabled: process.env.ENABLE_BACKGROUND_WORKERS === 'true',
85
+ concurrency: parseInt(process.env.WORKER_CONCURRENCY || '5', 10),
86
+ },
87
+ server: {
88
+ port: parseInt(process.env.PORT || '8000', 10),
89
+ logLevel: process.env.LOG_LEVEL || 'info',
90
+ corsOrigins: (process.env.CORS_ORIGINS || 'http://localhost:3000').split(',').map(o => o.trim()),
91
+ },
92
+ };
93
+ }
backend/types/models.ts ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export interface Message {
2
+ role: 'system' | 'user' | 'assistant';
3
+ content: string;
4
+ timestamp?: number;
5
+ }
6
+
7
+ export interface ChatRequest {
8
+ conversation: Message[];
9
+ model?: string;
10
+ options?: ChatOptions;
11
+ }
12
+
13
+ export interface ChatOptions {
14
+ temperature?: number;
15
+ max_tokens?: number;
16
+ top_p?: number;
17
+ frequency_penalty?: number;
18
+ presence_penalty?: number;
19
+ stop?: string[];
20
+ }
21
+
22
+ export interface ChatResponse {
23
+ reply: string;
24
+ model: string;
25
+ usage: TokenUsage;
26
+ sources?: DocumentSource[] | null;
27
+ conversation_id?: string;
28
+ }
29
+
30
+ export interface TokenUsage {
31
+ prompt_tokens: number;
32
+ completion_tokens: number;
33
+ total_tokens: number;
34
+ }
35
+
36
+ export interface RAGQueryRequest {
37
+ query: string;
38
+ top_k?: number;
39
+ model?: string;
40
+ use_retrieval?: boolean;
41
+ filters?: Record<string, any>;
42
+ }
43
+
44
+ export interface RAGQueryResponse {
45
+ answer: string;
46
+ sources: DocumentSource[];
47
+ model: string;
48
+ usage: TokenUsage;
49
+ retrieval_time_ms?: number;
50
+ }
51
+
52
+ export interface DocumentSource {
53
+ doc_id: string;
54
+ chunk_id: string;
55
+ content: string;
56
+ score: number;
57
+ metadata?: Record<string, any>;
58
+ }
59
+
60
+ export interface ImageGenerationRequest {
61
+ prompt: string;
62
+ model?: string;
63
+ size?: '256x256' | '512x512' | '1024x1024' | '1792x1024' | '1024x1792';
64
+ n?: number;
65
+ quality?: 'standard' | 'hd';
66
+ style?: 'vivid' | 'natural';
67
+ }
68
+
69
+ export interface ImageGenerationResponse {
70
+ images: GeneratedImage[];
71
+ model: string;
72
+ created: number;
73
+ }
74
+
75
+ export interface GeneratedImage {
76
+ url: string;
77
+ revised_prompt?: string;
78
+ b64_json?: string;
79
+ }
80
+
81
+ export interface VoiceSynthesisRequest {
82
+ text: string;
83
+ voice?: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
84
+ model?: string;
85
+ format?: 'mp3' | 'opus' | 'aac' | 'flac';
86
+ speed?: number;
87
+ }
88
+
89
+ export interface VoiceSynthesisResponse {
90
+ audio_url: string;
91
+ voice: string;
92
+ format: string;
93
+ duration_ms?: number;
94
+ size_bytes?: number;
95
+ }
96
+
97
+ export interface TranscriptionRequest {
98
+ audio_url: string;
99
+ model?: string;
100
+ language?: string;
101
+ prompt?: string;
102
+ }
103
+
104
+ export interface TranscriptionResponse {
105
+ text: string;
106
+ language: string;
107
+ duration: number;
108
+ model: string;
109
+ }
110
+
111
+ export interface DocumentUploadRequest {
112
+ filename: string;
113
+ content: Buffer;
114
+ metadata?: DocumentMetadata;
115
+ }
116
+
117
+ export interface DocumentMetadata {
118
+ title?: string;
119
+ author?: string;
120
+ category?: string;
121
+ tags?: string[];
122
+ [key: string]: any;
123
+ }
124
+
125
+ export interface DocumentUploadResponse {
126
+ doc_id: string;
127
+ filename: string;
128
+ size_bytes: number;
129
+ status: 'processing' | 'completed' | 'failed';
130
+ estimated_chunks?: number;
131
+ webhook_url?: string;
132
+ error?: string;
133
+ }
134
+
135
+ export interface DocumentChunk {
136
+ chunk_id: string;
137
+ doc_id: string;
138
+ content: string;
139
+ embedding?: number[];
140
+ metadata: DocumentMetadata;
141
+ chunk_index: number;
142
+ total_chunks: number;
143
+ }
144
+
145
+ export interface HealthCheckResponse {
146
+ status: 'healthy' | 'degraded' | 'unhealthy';
147
+ timestamp: number;
148
+ version: string;
149
+ services: ServiceHealth[];
150
+ uptime_seconds: number;
151
+ }
152
+
153
+ export interface ServiceHealth {
154
+ name: string;
155
+ status: 'up' | 'down' | 'degraded';
156
+ latency_ms?: number;
157
+ error?: string;
158
+ }
159
+
160
+ export interface MetricsResponse {
161
+ timestamp: number;
162
+ requests_total: number;
163
+ requests_by_endpoint: Record<string, number>;
164
+ errors_total: number;
165
+ rate_limit_hits: number;
166
+ active_connections: number;
167
+ average_response_time_ms: number;
168
+ model_usage: Record<string, number>;
169
+ vector_db_queries: number;
170
+ documents_processed: number;
171
+ }
172
+
173
+ export interface ApiKeyInfo {
174
+ key_hash: string;
175
+ tier: 'default' | 'premium' | 'admin';
176
+ rate_limit: number;
177
+ created_at: number;
178
+ last_used?: number;
179
+ }
180
+
181
+ export interface RateLimitInfo {
182
+ limit: number;
183
+ remaining: number;
184
+ reset_at: number;
185
+ tier: string;
186
+ }
187
+
188
+ export interface WebhookEvent {
189
+ event_type: 'document.ingestion.completed' | 'document.ingestion.failed';
190
+ doc_id: string;
191
+ timestamp: number;
192
+ data: {
193
+ chunks_created?: number;
194
+ error?: string;
195
+ status: string;
196
+ };
197
+ }
198
+
199
+ export interface EmbeddingRequest {
200
+ text: string | string[];
201
+ model?: string;
202
+ }
203
+
204
+ export interface EmbeddingResponse {
205
+ embeddings: number[][];
206
+ model: string;
207
+ usage: TokenUsage;
208
+ }
209
+
210
+ export interface VectorSearchRequest {
211
+ query_vector: number[];
212
+ top_k: number;
213
+ filter?: Record<string, any>;
214
+ namespace?: string;
215
+ }
216
+
217
+ export interface VectorSearchResult {
218
+ id: string;
219
+ score: number;
220
+ metadata: Record<string, any>;
221
+ }
222
+
223
+ export type ModelProvider = 'openai' | 'huggingface' | 'anthropic' | 'local';
224
+
225
+ export interface ModelConfig {
226
+ provider: ModelProvider;
227
+ model_name: string;
228
+ api_key?: string;
229
+ max_tokens?: number;
230
+ temperature?: number;
231
+ endpoint?: string;
232
+ }
233
+
234
+ export interface LLMAdapter {
235
+ generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse>;
236
+ generateEmbedding(text: string | string[]): Promise<EmbeddingResponse>;
237
+ isAvailable(): Promise<boolean>;
238
+ }
239
+
240
+ export interface VectorDBAdapter {
241
+ upsert(vectors: { id: string; values: number[]; metadata: Record<string, any> }[]): Promise<void>;
242
+ query(queryVector: number[], topK: number, filter?: Record<string, any>): Promise<VectorSearchResult[]>;
243
+ delete(ids: string[]): Promise<void>;
244
+ isAvailable(): Promise<boolean>;
245
+ }
246
+
247
+ export interface ImageAdapter {
248
+ generateImage(prompt: string, options?: Partial<ImageGenerationRequest>): Promise<ImageGenerationResponse>;
249
+ isAvailable(): Promise<boolean>;
250
+ }
251
+
252
+ export interface VoiceAdapter {
253
+ synthesize(text: string, options?: Partial<VoiceSynthesisRequest>): Promise<VoiceSynthesisResponse>;
254
+ transcribe?(audio: Buffer, options?: Partial<TranscriptionRequest>): Promise<TranscriptionResponse>;
255
+ isAvailable(): Promise<boolean>;
256
+ }
backend/utils/auth.ts ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { Header as HeaderType } from "encore.dev/api";
2
+ import { loadConfig } from "../types/config";
3
+ import type { ApiKeyInfo } from "../types/models";
4
+
5
+ const config = loadConfig();
6
+
7
+ export interface AuthData {
8
+ apiKey: string;
9
+ tier: 'default' | 'premium' | 'admin';
10
+ }
11
+
12
+ export function auth(): HeaderType<"authorization"> {
13
+ return "" as HeaderType<"authorization">;
14
+ }
15
+
16
+ export function validateApiKey(authHeader?: string): AuthData {
17
+ if (!authHeader) {
18
+ throw new Error('Missing Authorization header');
19
+ }
20
+
21
+ const apiKey = authHeader.replace(/^Bearer\s+/i, '').trim();
22
+
23
+ if (!apiKey) {
24
+ throw new Error('Invalid Authorization header format');
25
+ }
26
+
27
+ if (config.auth.adminApiKeys.includes(apiKey)) {
28
+ return { apiKey, tier: 'admin' };
29
+ }
30
+
31
+ if (config.auth.apiKeys.includes(apiKey)) {
32
+ return { apiKey, tier: 'default' };
33
+ }
34
+
35
+ throw new Error('Invalid API key');
36
+ }
37
+
38
+ export function getApiKeyInfo(apiKey: string): ApiKeyInfo {
39
+ let tier: 'default' | 'premium' | 'admin' = 'default';
40
+ if (config.auth.adminApiKeys.includes(apiKey)) {
41
+ tier = 'admin';
42
+ }
43
+
44
+ let rateLimit = config.rateLimit.default;
45
+ if (tier === 'admin') {
46
+ rateLimit = config.rateLimit.admin;
47
+ }
48
+
49
+ return {
50
+ key_hash: hashApiKey(apiKey),
51
+ tier,
52
+ rate_limit: rateLimit,
53
+ created_at: Date.now(),
54
+ };
55
+ }
56
+
57
+ function hashApiKey(apiKey: string): string {
58
+ let hash = 0;
59
+ for (let i = 0; i < apiKey.length; i++) {
60
+ const char = apiKey.charCodeAt(i);
61
+ hash = ((hash << 5) - hash) + char;
62
+ hash = hash & hash;
63
+ }
64
+ return Math.abs(hash).toString(16);
65
+ }
66
+
67
+ export function requireAuth(authHeader?: string): AuthData {
68
+ return validateApiKey(authHeader);
69
+ }
backend/utils/logger.ts ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ type LogLevel = 'debug' | 'info' | 'warn' | 'error';
2
+
3
+ class Logger {
4
+ private level: LogLevel;
5
+
6
+ constructor(level: LogLevel = 'info') {
7
+ this.level = level;
8
+ }
9
+
10
+ private shouldLog(level: LogLevel): boolean {
11
+ const levels: LogLevel[] = ['debug', 'info', 'warn', 'error'];
12
+ return levels.indexOf(level) >= levels.indexOf(this.level);
13
+ }
14
+
15
+ private log(level: LogLevel, message: string, meta?: Record<string, any>): void {
16
+ if (!this.shouldLog(level)) return;
17
+
18
+ const timestamp = new Date().toISOString();
19
+ const logEntry = {
20
+ timestamp,
21
+ level,
22
+ message,
23
+ ...meta,
24
+ };
25
+
26
+ console.log(JSON.stringify(logEntry));
27
+ }
28
+
29
+ debug(message: string, meta?: Record<string, any>): void {
30
+ this.log('debug', message, meta);
31
+ }
32
+
33
+ info(message: string, meta?: Record<string, any>): void {
34
+ this.log('info', message, meta);
35
+ }
36
+
37
+ warn(message: string, meta?: Record<string, any>): void {
38
+ this.log('warn', message, meta);
39
+ }
40
+
41
+ error(message: string, meta?: Record<string, any>): void {
42
+ this.log('error', message, meta);
43
+ }
44
+ }
45
+
46
+ export const logger = new Logger(
47
+ (process.env.LOG_LEVEL as LogLevel) || 'info'
48
+ );
backend/utils/metrics.ts ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { MetricsResponse } from '../types/models';
2
+
3
+ class MetricsCollector {
4
+ private startTime = Date.now();
5
+ private requestsTotal = 0;
6
+ private requestsByEndpoint = new Map<string, number>();
7
+ private errorsTotal = 0;
8
+ private rateLimitHits = 0;
9
+ private responseTimes: number[] = [];
10
+ private modelUsage = new Map<string, number>();
11
+ private vectorDbQueries = 0;
12
+ private documentsProcessed = 0;
13
+
14
+ incrementRequests(endpoint: string): void {
15
+ this.requestsTotal++;
16
+ const count = this.requestsByEndpoint.get(endpoint) || 0;
17
+ this.requestsByEndpoint.set(endpoint, count + 1);
18
+ }
19
+
20
+ incrementErrors(): void {
21
+ this.errorsTotal++;
22
+ }
23
+
24
+ incrementRateLimitHits(): void {
25
+ this.rateLimitHits++;
26
+ }
27
+
28
+ recordResponseTime(timeMs: number): void {
29
+ this.responseTimes.push(timeMs);
30
+ if (this.responseTimes.length > 1000) {
31
+ this.responseTimes.shift();
32
+ }
33
+ }
34
+
35
+ incrementModelUsage(model: string): void {
36
+ const count = this.modelUsage.get(model) || 0;
37
+ this.modelUsage.set(model, count + 1);
38
+ }
39
+
40
+ incrementVectorDbQueries(): void {
41
+ this.vectorDbQueries++;
42
+ }
43
+
44
+ incrementDocumentsProcessed(): void {
45
+ this.documentsProcessed++;
46
+ }
47
+
48
+ getMetrics(): MetricsResponse {
49
+ const avgResponseTime = this.responseTimes.length > 0
50
+ ? this.responseTimes.reduce((a, b) => a + b, 0) / this.responseTimes.length
51
+ : 0;
52
+
53
+ const requestsByEndpoint: Record<string, number> = {};
54
+ for (const [endpoint, count] of this.requestsByEndpoint.entries()) {
55
+ requestsByEndpoint[endpoint] = count;
56
+ }
57
+
58
+ const modelUsageObj: Record<string, number> = {};
59
+ for (const [model, count] of this.modelUsage.entries()) {
60
+ modelUsageObj[model] = count;
61
+ }
62
+
63
+ return {
64
+ timestamp: Date.now(),
65
+ requests_total: this.requestsTotal,
66
+ requests_by_endpoint: requestsByEndpoint,
67
+ errors_total: this.errorsTotal,
68
+ rate_limit_hits: this.rateLimitHits,
69
+ active_connections: 0,
70
+ average_response_time_ms: Math.round(avgResponseTime),
71
+ model_usage: modelUsageObj,
72
+ vector_db_queries: this.vectorDbQueries,
73
+ documents_processed: this.documentsProcessed,
74
+ };
75
+ }
76
+
77
+ reset(): void {
78
+ this.startTime = Date.now();
79
+ this.requestsTotal = 0;
80
+ this.requestsByEndpoint.clear();
81
+ this.errorsTotal = 0;
82
+ this.rateLimitHits = 0;
83
+ this.responseTimes = [];
84
+ this.modelUsage.clear();
85
+ this.vectorDbQueries = 0;
86
+ this.documentsProcessed = 0;
87
+ }
88
+ }
89
+
90
+ export const metrics = new MetricsCollector();
backend/utils/rate_limit.ts ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { loadConfig } from "../types/config";
2
+ import type { RateLimitInfo } from "../types/models";
3
+
4
+ const config = loadConfig();
5
+
6
+ interface RateLimitBucket {
7
+ tokens: number;
8
+ lastRefill: number;
9
+ }
10
+
11
+ class RateLimiter {
12
+ private buckets = new Map<string, RateLimitBucket>();
13
+ private readonly refillInterval = 60000;
14
+
15
+ checkRateLimit(apiKey: string, tier: 'default' | 'premium' | 'admin'): RateLimitInfo {
16
+ const limit = this.getLimitForTier(tier);
17
+ const now = Date.now();
18
+
19
+ let bucket = this.buckets.get(apiKey);
20
+
21
+ if (!bucket) {
22
+ bucket = {
23
+ tokens: limit,
24
+ lastRefill: now,
25
+ };
26
+ this.buckets.set(apiKey, bucket);
27
+ }
28
+
29
+ const timeSinceRefill = now - bucket.lastRefill;
30
+ if (timeSinceRefill >= this.refillInterval) {
31
+ bucket.tokens = limit;
32
+ bucket.lastRefill = now;
33
+ }
34
+
35
+ if (bucket.tokens <= 0) {
36
+ const resetAt = bucket.lastRefill + this.refillInterval;
37
+ throw {
38
+ statusCode: 429,
39
+ message: 'Rate limit exceeded',
40
+ limit,
41
+ remaining: 0,
42
+ resetAt,
43
+ };
44
+ }
45
+
46
+ bucket.tokens -= 1;
47
+
48
+ const resetAt = bucket.lastRefill + this.refillInterval;
49
+
50
+ return {
51
+ limit,
52
+ remaining: bucket.tokens,
53
+ reset_at: resetAt,
54
+ tier,
55
+ };
56
+ }
57
+
58
+ private getLimitForTier(tier: 'default' | 'premium' | 'admin'): number {
59
+ switch (tier) {
60
+ case 'admin':
61
+ return config.rateLimit.admin;
62
+ case 'premium':
63
+ return config.rateLimit.premium;
64
+ default:
65
+ return config.rateLimit.default;
66
+ }
67
+ }
68
+
69
+ getRateLimitInfo(apiKey: string, tier: 'default' | 'premium' | 'admin'): RateLimitInfo {
70
+ const limit = this.getLimitForTier(tier);
71
+ const bucket = this.buckets.get(apiKey);
72
+
73
+ if (!bucket) {
74
+ return {
75
+ limit,
76
+ remaining: limit,
77
+ reset_at: Date.now() + this.refillInterval,
78
+ tier,
79
+ };
80
+ }
81
+
82
+ return {
83
+ limit,
84
+ remaining: bucket.tokens,
85
+ reset_at: bucket.lastRefill + this.refillInterval,
86
+ tier,
87
+ };
88
+ }
89
+
90
+ cleanup(): void {
91
+ const now = Date.now();
92
+ const maxAge = this.refillInterval * 2;
93
+
94
+ for (const [key, bucket] of this.buckets.entries()) {
95
+ if (now - bucket.lastRefill > maxAge) {
96
+ this.buckets.delete(key);
97
+ }
98
+ }
99
+ }
100
+ }
101
+
102
+ export const rateLimiter = new RateLimiter();
103
+
104
+ setInterval(() => {
105
+ rateLimiter.cleanup();
106
+ }, 300000);
107
+
108
+ export function checkRateLimit(apiKey: string, tier: 'default' | 'premium' | 'admin'): RateLimitInfo {
109
+ return rateLimiter.checkRateLimit(apiKey, tier);
110
+ }
111
+
112
+ export function getRateLimitInfo(apiKey: string, tier: 'default' | 'premium' | 'admin'): RateLimitInfo {
113
+ return rateLimiter.getRateLimitInfo(apiKey, tier);
114
+ }
backend/vite-env.d.ts ADDED
@@ -0,0 +1 @@
 
 
1
+ /// <reference types="vite/client" />
backend/workers/ingestion_worker.ts ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { logger } from '../utils/logger';
2
+ import type { WebhookEvent } from '../types/models';
3
+
4
+ interface IngestionJob {
5
+ doc_id: string;
6
+ filename: string;
7
+ status: 'pending' | 'processing' | 'completed' | 'failed';
8
+ created_at: number;
9
+ completed_at?: number;
10
+ error?: string;
11
+ }
12
+
13
+ class IngestionWorker {
14
+ private jobs = new Map<string, IngestionJob>();
15
+ private isRunning = false;
16
+ private concurrency: number;
17
+
18
+ constructor(concurrency = 5) {
19
+ this.concurrency = concurrency;
20
+ }
21
+
22
+ async start(): Promise<void> {
23
+ if (this.isRunning) {
24
+ logger.warn('Ingestion worker already running');
25
+ return;
26
+ }
27
+
28
+ this.isRunning = true;
29
+ logger.info('Ingestion worker started', { concurrency: this.concurrency });
30
+
31
+ this.processQueue();
32
+ }
33
+
34
+ async stop(): Promise<void> {
35
+ this.isRunning = false;
36
+ logger.info('Ingestion worker stopped');
37
+ }
38
+
39
+ async addJob(doc_id: string, filename: string): Promise<void> {
40
+ const job: IngestionJob = {
41
+ doc_id,
42
+ filename,
43
+ status: 'pending',
44
+ created_at: Date.now(),
45
+ };
46
+
47
+ this.jobs.set(doc_id, job);
48
+ logger.info('Job added to ingestion queue', { doc_id, filename });
49
+ }
50
+
51
+ private async processQueue(): Promise<void> {
52
+ while (this.isRunning) {
53
+ const pendingJobs = Array.from(this.jobs.values())
54
+ .filter(job => job.status === 'pending')
55
+ .slice(0, this.concurrency);
56
+
57
+ if (pendingJobs.length === 0) {
58
+ await this.sleep(1000);
59
+ continue;
60
+ }
61
+
62
+ await Promise.all(
63
+ pendingJobs.map(job => this.processJob(job))
64
+ );
65
+ }
66
+ }
67
+
68
+ private async processJob(job: IngestionJob): Promise<void> {
69
+ try {
70
+ job.status = 'processing';
71
+ logger.info('Processing ingestion job', { doc_id: job.doc_id });
72
+
73
+ await this.sleep(Math.random() * 2000 + 1000);
74
+
75
+ job.status = 'completed';
76
+ job.completed_at = Date.now();
77
+
78
+ logger.info('Ingestion job completed', { doc_id: job.doc_id });
79
+
80
+ await this.sendWebhook({
81
+ event_type: 'document.ingestion.completed',
82
+ doc_id: job.doc_id,
83
+ timestamp: Date.now(),
84
+ data: {
85
+ chunks_created: Math.floor(Math.random() * 20) + 5,
86
+ status: 'completed',
87
+ },
88
+ });
89
+ } catch (error) {
90
+ job.status = 'failed';
91
+ job.error = error instanceof Error ? error.message : String(error);
92
+ job.completed_at = Date.now();
93
+
94
+ logger.error('Ingestion job failed', {
95
+ doc_id: job.doc_id,
96
+ error: job.error,
97
+ });
98
+
99
+ await this.sendWebhook({
100
+ event_type: 'document.ingestion.failed',
101
+ doc_id: job.doc_id,
102
+ timestamp: Date.now(),
103
+ data: {
104
+ error: job.error,
105
+ status: 'failed',
106
+ },
107
+ });
108
+ }
109
+ }
110
+
111
+ private async sendWebhook(event: WebhookEvent): Promise<void> {
112
+ logger.info('Webhook event', event);
113
+ }
114
+
115
+ private sleep(ms: number): Promise<void> {
116
+ return new Promise(resolve => setTimeout(resolve, ms));
117
+ }
118
+
119
+ getJobStatus(doc_id: string): IngestionJob | undefined {
120
+ return this.jobs.get(doc_id);
121
+ }
122
+
123
+ getAllJobs(): IngestionJob[] {
124
+ return Array.from(this.jobs.values());
125
+ }
126
+ }
127
+
128
+ export const ingestionWorker = new IngestionWorker();
docker-compose.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ api:
5
+ build: .
6
+ ports:
7
+ - "8000:8000"
8
+ environment:
9
+ - OPENAI_API_KEY=${OPENAI_API_KEY}
10
+ - HUGGINGFACE_API_KEY=${HUGGINGFACE_API_KEY}
11
+ - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
12
+ - PINECONE_API_KEY=${PINECONE_API_KEY}
13
+ - PINECONE_ENVIRONMENT=${PINECONE_ENVIRONMENT:-us-west1-gcp}
14
+ - PINECONE_INDEX_NAME=${PINECONE_INDEX_NAME:-ai-api-vectors}
15
+ - API_KEYS=${API_KEYS:-demo-key-1,demo-key-2}
16
+ - ADMIN_API_KEYS=${ADMIN_API_KEYS}
17
+ - RATE_LIMIT_DEFAULT=${RATE_LIMIT_DEFAULT:-60}
18
+ - RATE_LIMIT_PREMIUM=${RATE_LIMIT_PREMIUM:-300}
19
+ - RATE_LIMIT_ADMIN=${RATE_LIMIT_ADMIN:-1000}
20
+ - DEFAULT_CHAT_MODEL=${DEFAULT_CHAT_MODEL:-gpt-3.5-turbo}
21
+ - DEFAULT_EMBEDDING_MODEL=${DEFAULT_EMBEDDING_MODEL:-text-embedding-ada-002}
22
+ - DEFAULT_IMAGE_MODEL=${DEFAULT_IMAGE_MODEL:-dall-e-3}
23
+ - DEFAULT_VOICE_MODEL=${DEFAULT_VOICE_MODEL:-tts-1}
24
+ - MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-10}
25
+ - CHUNK_SIZE=${CHUNK_SIZE:-1000}
26
+ - CHUNK_OVERLAP=${CHUNK_OVERLAP:-200}
27
+ - ENABLE_BACKGROUND_WORKERS=${ENABLE_BACKGROUND_WORKERS:-true}
28
+ - WORKER_CONCURRENCY=${WORKER_CONCURRENCY:-5}
29
+ - LOG_LEVEL=${LOG_LEVEL:-info}
30
+ - CORS_ORIGINS=${CORS_ORIGINS:-http://localhost:3000}
31
+ volumes:
32
+ - ./data:/app/data
33
+ restart: unless-stopped
34
+ healthcheck:
35
+ test: ["CMD", "wget", "--spider", "-q", "http://localhost:8000/health"]
36
+ interval: 30s
37
+ timeout: 10s
38
+ retries: 3
39
+ start_period: 40s
40
+
41
+ redis:
42
+ image: redis:7-alpine
43
+ ports:
44
+ - "6379:6379"
45
+ volumes:
46
+ - redis_data:/data
47
+ restart: unless-stopped
48
+ command: redis-server --appendonly yes
49
+
50
+ volumes:
51
+ redis_data:
examples/curl.sh ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ API_URL="http://localhost:8000"
4
+ API_KEY="demo-key-1"
5
+
6
+ echo "=== AI API Service - Example Requests ==="
7
+ echo ""
8
+
9
+ echo "1. Health Check"
10
+ echo "==============="
11
+ curl -s "${API_URL}/health" | jq .
12
+ echo ""
13
+ echo ""
14
+
15
+ echo "2. Verify API Key"
16
+ echo "================="
17
+ curl -s -X POST "${API_URL}/auth/verify" \
18
+ -H "Authorization: Bearer ${API_KEY}" | jq .
19
+ echo ""
20
+ echo ""
21
+
22
+ echo "3. Simple Query"
23
+ echo "==============="
24
+ curl -s "${API_URL}/ai/query?q=What%20is%20machine%20learning%3F" \
25
+ -H "Authorization: Bearer ${API_KEY}" | jq .
26
+ echo ""
27
+ echo ""
28
+
29
+ echo "4. Chat Conversation"
30
+ echo "===================="
31
+ curl -s -X POST "${API_URL}/ai/chat" \
32
+ -H "Authorization: Bearer ${API_KEY}" \
33
+ -H "Content-Type: application/json" \
34
+ -d '{
35
+ "conversation": [
36
+ {
37
+ "role": "user",
38
+ "content": "Explain quantum computing in simple terms"
39
+ }
40
+ ],
41
+ "options": {
42
+ "temperature": 0.7,
43
+ "max_tokens": 200
44
+ }
45
+ }' | jq .
46
+ echo ""
47
+ echo ""
48
+
49
+ echo "5. RAG Query (with retrieval)"
50
+ echo "============================="
51
+ curl -s -X POST "${API_URL}/rag/query" \
52
+ -H "Authorization: Bearer ${API_KEY}" \
53
+ -H "Content-Type: application/json" \
54
+ -d '{
55
+ "query": "What are the key features?",
56
+ "top_k": 5,
57
+ "use_retrieval": true
58
+ }' | jq .
59
+ echo ""
60
+ echo ""
61
+
62
+ echo "6. Image Generation"
63
+ echo "==================="
64
+ curl -s -X POST "${API_URL}/image/generate" \
65
+ -H "Authorization: Bearer ${API_KEY}" \
66
+ -H "Content-Type: application/json" \
67
+ -d '{
68
+ "prompt": "A serene mountain landscape at sunset",
69
+ "size": "1024x1024",
70
+ "n": 1
71
+ }' | jq .
72
+ echo ""
73
+ echo ""
74
+
75
+ echo "7. Voice Synthesis"
76
+ echo "=================="
77
+ curl -s -X POST "${API_URL}/voice/synthesize" \
78
+ -H "Authorization: Bearer ${API_KEY}" \
79
+ -H "Content-Type: application/json" \
80
+ -d '{
81
+ "text": "Hello, this is a test of the voice synthesis system.",
82
+ "voice": "alloy",
83
+ "format": "mp3"
84
+ }' | jq .
85
+ echo ""
86
+ echo ""
87
+
88
+ echo "8. Document Upload"
89
+ echo "=================="
90
+ CONTENT=$(echo "This is a sample document for testing." | base64)
91
+ curl -s -X POST "${API_URL}/upload" \
92
+ -H "Authorization: Bearer ${API_KEY}" \
93
+ -H "Content-Type: application/json" \
94
+ -d "{
95
+ \"filename\": \"sample.txt\",
96
+ \"content_base64\": \"${CONTENT}\",
97
+ \"metadata\": {
98
+ \"title\": \"Sample Document\",
99
+ \"category\": \"test\"
100
+ }
101
+ }" | jq .
102
+ echo ""
103
+ echo ""
104
+
105
+ echo "9. Get Metrics"
106
+ echo "=============="
107
+ curl -s "${API_URL}/metrics" \
108
+ -H "Authorization: Bearer ${API_KEY}" | jq .
109
+ echo ""
110
+ echo ""
111
+
112
+ echo "10. Get Available Models"
113
+ echo "======================="
114
+ curl -s "${API_URL}/rag/models" \
115
+ -H "Authorization: Bearer ${API_KEY}" | jq .
116
+ echo ""
examples/js_client.js ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const API_URL = 'http://localhost:8000';
2
+ const API_KEY = 'demo-key-1';
3
+
4
+ class AIAPIClient {
5
+ constructor(apiUrl, apiKey) {
6
+ this.apiUrl = apiUrl;
7
+ this.apiKey = apiKey;
8
+ }
9
+
10
+ async request(endpoint, options = {}) {
11
+ const url = `${this.apiUrl}${endpoint}`;
12
+ const headers = {
13
+ 'Authorization': `Bearer ${this.apiKey}`,
14
+ 'Content-Type': 'application/json',
15
+ ...options.headers,
16
+ };
17
+
18
+ const response = await fetch(url, {
19
+ ...options,
20
+ headers,
21
+ });
22
+
23
+ if (!response.ok) {
24
+ const error = await response.json();
25
+ throw new Error(`API Error: ${error.message || response.statusText}`);
26
+ }
27
+
28
+ return response.json();
29
+ }
30
+
31
+ async healthCheck() {
32
+ return this.request('/health', { method: 'GET' });
33
+ }
34
+
35
+ async verifyApiKey() {
36
+ return this.request('/auth/verify', { method: 'POST' });
37
+ }
38
+
39
+ async chat(conversation, model = null, options = {}) {
40
+ return this.request('/ai/chat', {
41
+ method: 'POST',
42
+ body: JSON.stringify({
43
+ conversation,
44
+ model,
45
+ options,
46
+ }),
47
+ });
48
+ }
49
+
50
+ async simpleQuery(query, model = null) {
51
+ const params = new URLSearchParams({ q: query });
52
+ if (model) params.append('model', model);
53
+
54
+ return this.request(`/ai/query?${params}`, { method: 'GET' });
55
+ }
56
+
57
+ async ragQuery(query, topK = 5, model = null, useRetrieval = true, filters = null) {
58
+ return this.request('/rag/query', {
59
+ method: 'POST',
60
+ body: JSON.stringify({
61
+ query,
62
+ top_k: topK,
63
+ model,
64
+ use_retrieval: useRetrieval,
65
+ filters,
66
+ }),
67
+ });
68
+ }
69
+
70
+ async generateImage(prompt, options = {}) {
71
+ return this.request('/image/generate', {
72
+ method: 'POST',
73
+ body: JSON.stringify({
74
+ prompt,
75
+ ...options,
76
+ }),
77
+ });
78
+ }
79
+
80
+ async synthesizeVoice(text, voice = 'alloy', format = 'mp3', speed = 1.0) {
81
+ return this.request('/voice/synthesize', {
82
+ method: 'POST',
83
+ body: JSON.stringify({
84
+ text,
85
+ voice,
86
+ format,
87
+ speed,
88
+ }),
89
+ });
90
+ }
91
+
92
+ async transcribeAudio(audioBase64, model = null, language = null) {
93
+ return this.request('/voice/transcribe', {
94
+ method: 'POST',
95
+ body: JSON.stringify({
96
+ audio_base64: audioBase64,
97
+ model,
98
+ language,
99
+ }),
100
+ });
101
+ }
102
+
103
+ async uploadDocument(filename, contentBase64, metadata = {}) {
104
+ return this.request('/upload', {
105
+ method: 'POST',
106
+ body: JSON.stringify({
107
+ filename,
108
+ content_base64: contentBase64,
109
+ metadata,
110
+ }),
111
+ });
112
+ }
113
+
114
+ async getDocumentSources(docId) {
115
+ return this.request(`/docs/${docId}/sources`, { method: 'GET' });
116
+ }
117
+
118
+ async getMetrics() {
119
+ return this.request('/metrics', { method: 'GET' });
120
+ }
121
+
122
+ async getAvailableModels() {
123
+ return this.request('/rag/models', { method: 'GET' });
124
+ }
125
+ }
126
+
127
+ async function main() {
128
+ const client = new AIAPIClient(API_URL, API_KEY);
129
+
130
+ try {
131
+ console.log('=== AI API Client Examples ===\n');
132
+
133
+ console.log('1. Health Check');
134
+ const health = await client.healthCheck();
135
+ console.log(JSON.stringify(health, null, 2));
136
+ console.log('\n');
137
+
138
+ console.log('2. Simple Query');
139
+ const queryResult = await client.simpleQuery('What is artificial intelligence?');
140
+ console.log(JSON.stringify(queryResult, null, 2));
141
+ console.log('\n');
142
+
143
+ console.log('3. Chat Conversation');
144
+ const chatResult = await client.chat([
145
+ { role: 'user', content: 'Tell me a fun fact about space' }
146
+ ], null, { temperature: 0.8, max_tokens: 150 });
147
+ console.log(JSON.stringify(chatResult, null, 2));
148
+ console.log('\n');
149
+
150
+ console.log('4. RAG Query');
151
+ const ragResult = await client.ragQuery(
152
+ 'What are the main features?',
153
+ 5,
154
+ null,
155
+ true
156
+ );
157
+ console.log(JSON.stringify(ragResult, null, 2));
158
+ console.log('\n');
159
+
160
+ console.log('5. Image Generation');
161
+ const imageResult = await client.generateImage(
162
+ 'A futuristic cityscape at night',
163
+ { size: '1024x1024', n: 1 }
164
+ );
165
+ console.log('Image generated:', imageResult.images[0].url.substring(0, 100) + '...');
166
+ console.log('\n');
167
+
168
+ console.log('6. Voice Synthesis');
169
+ const voiceResult = await client.synthesizeVoice(
170
+ 'Welcome to the AI API service.',
171
+ 'alloy',
172
+ 'mp3'
173
+ );
174
+ console.log('Audio generated:', voiceResult.audio_url.substring(0, 100) + '...');
175
+ console.log('\n');
176
+
177
+ console.log('7. Document Upload');
178
+ const docContent = Buffer.from('This is a sample document.').toString('base64');
179
+ const uploadResult = await client.uploadDocument(
180
+ 'sample.txt',
181
+ docContent,
182
+ { title: 'Sample', category: 'test' }
183
+ );
184
+ console.log(JSON.stringify(uploadResult, null, 2));
185
+ console.log('\n');
186
+
187
+ console.log('8. Get Metrics');
188
+ const metrics = await client.getMetrics();
189
+ console.log(JSON.stringify(metrics, null, 2));
190
+ console.log('\n');
191
+
192
+ } catch (error) {
193
+ console.error('Error:', error.message);
194
+ }
195
+ }
196
+
197
+ if (typeof window === 'undefined') {
198
+ main();
199
+ }
200
+
201
+ if (typeof module !== 'undefined' && module.exports) {
202
+ module.exports = AIAPIClient;
203
+ }
package.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "leap-app",
3
+ "version": "1.0.0",
4
+ "type": "module",
5
+ "packageManager": "bun",
6
+ "workspaces": [
7
+ "backend"
8
+ ]
9
+ }
strcture.md ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI API Service
2
+
3
+ A production-ready, scalable AI API service built with TypeScript and Encore.ts. Supports conversational chat, RAG (Retrieval-Augmented Generation), image generation, voice synthesis, and document ingestion.
4
+
5
+ ## 🏗️ Architecture
6
+
7
+ ```
8
+ ┌─────────────────────────────────────────────────────────────────┐
9
+ │ API Gateway Layer │
10
+ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
11
+ │ │ Auth Filter │→ │ Rate Limiter │→ │ Routes │ │
12
+ │ └──────────────┘ └──────────────┘ └──────────────┘ │
13
+ └─────────────────────────────────────────────────────────────────┘
14
+
15
+ ┌─────────────────────────────────────────────────────────────────┐
16
+ │ Service Layer │
17
+ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
18
+ │ │ AI Service │ │ RAG Service │ │Image Service │ │
19
+ │ └──────────────┘ └──────────────┘ └──────────────┘ │
20
+ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
21
+ │ │Voice Service │ │ Doc Service │ │Worker Service│ │
22
+ │ └──────────────┘ └──────────────┘ └──────────────┘ │
23
+ └─────────────────────────────────────────────────────────────────┘
24
+
25
+ ┌─────────────────────────────────────────────────────────────────┐
26
+ │ Adapter Layer │
27
+ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
28
+ │ │OpenAI Adapter│ │ HF Adapter │ │Anthropic Adp │ │
29
+ │ └──────────────┘ └──────────────┘ └──────────────┘ │
30
+ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
31
+ │ │Vector DB Adp │ │Embedding Adp │ │ Local Models │ │
32
+ │ └──────────────┘ └──────────────┘ └──────────────┘ │
33
+ └─────────────────────────────────────────────────────────────────┘
34
+
35
+ ┌─────────────────────────────────────────────────────────────────┐
36
+ │ Storage Layer │
37
+ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
38
+ │ │ Pinecone │ │ In-Memory │ │ File Storage │ │
39
+ │ │ (Vector DB) │ │ (Fallback) │ │ (Documents) │ │
40
+ │ └──────────────┘ └──────────────┘ └──────────────┘ │
41
+ └─────────────────────────────────────────────────────────────────┘
42
+ ```
43
+
44
+ ## ✨ Features
45
+
46
+ ### Core Capabilities
47
+ - **Multi-turn Chat** - Conversational AI with context management
48
+ - **RAG (Retrieval-Augmented Generation)** - Query documents with AI-powered retrieval
49
+ - **Image Generation** - Text-to-image using DALL-E or Stable Diffusion
50
+ - **Voice Synthesis** - Text-to-speech with multiple voice options
51
+ - **Document Ingestion** - Upload PDFs, DOCX, TXT with automatic chunking & embedding
52
+
53
+ ### Model Support
54
+ - **OpenAI** - GPT-4, GPT-3.5-turbo, DALL-E, Whisper, TTS
55
+ - **HuggingFace** - Open-source models via Inference API
56
+ - **Anthropic** - Claude models
57
+ - **Local Models** - Run transformers locally (optional)
58
+
59
+ ### Enterprise Features
60
+ - **API Key Authentication** - Secure access control
61
+ - **Role-based Rate Limiting** - Default, Premium, Admin tiers
62
+ - **Multi-model Routing** - Select models via API or policy
63
+ - **Background Workers** - Async document processing
64
+ - **Observability** - Health checks, metrics, structured logging
65
+ - **CORS Support** - Cross-origin requests
66
+
67
+ ## 📋 API Endpoints
68
+
69
+ ### Health & Metrics
70
+ ```bash
71
+ GET /health # Service health check
72
+ GET /metrics # Prometheus-style metrics
73
+ POST /auth/verify # Verify API key validity
74
+ ```
75
+
76
+ ### AI Chat
77
+ ```bash
78
+ POST /ai/chat # Multi-turn conversation
79
+ GET /ai/query # Simple question answering
80
+ ```
81
+
82
+ ### RAG (Retrieval-Augmented Generation)
83
+ ```bash
84
+ POST /rag/query # Query with document retrieval
85
+ GET /rag/models # List available models
86
+ ```
87
+
88
+ ### Image Generation
89
+ ```bash
90
+ POST /image/generate # Generate images from text
91
+ ```
92
+
93
+ ### Voice Synthesis
94
+ ```bash
95
+ POST /voice/synthesize # Text to speech
96
+ POST /voice/transcribe # Speech to text (optional)
97
+ ```
98
+
99
+ ### Document Management
100
+ ```bash
101
+ POST /upload # Upload document for ingestion
102
+ GET /docs/:id/sources # Get document chunks
103
+ POST /webhook/events # Ingestion completion webhook
104
+ ```
105
+
106
+ ## 🚀 Quick Start
107
+
108
+ ### Prerequisites
109
+ - Node.js 18+ and npm
110
+ - Encore CLI: `npm install -g encore`
111
+ - API keys (OpenAI, HuggingFace, etc.)
112
+
113
+ ### Local Development
114
+
115
+ 1. **Clone and install dependencies**
116
+ ```bash
117
+ npm install
118
+ ```
119
+
120
+ 2. **Configure environment variables**
121
+ ```bash
122
+ cp .env.example .env
123
+ # Edit .env with your API keys
124
+ ```
125
+
126
+ 3. **Run the development server**
127
+ ```bash
128
+ encore run
129
+ ```
130
+
131
+ The API will be available at `http://localhost:8000`
132
+
133
+ 4. **Run tests**
134
+ ```bash
135
+ npm test
136
+ ```
137
+
138
+ ## 🔑 Environment Variables
139
+
140
+ | Variable | Description | Required | Default |
141
+ |----------|-------------|----------|---------|
142
+ | `OPENAI_API_KEY` | OpenAI API key for GPT models | No* | - |
143
+ | `HUGGINGFACE_API_KEY` | HuggingFace API key | No* | - |
144
+ | `ANTHROPIC_API_KEY` | Anthropic API key for Claude | No* | - |
145
+ | `PINECONE_API_KEY` | Pinecone vector DB key | No | In-memory fallback |
146
+ | `API_KEYS` | Comma-separated valid API keys | Yes | `demo-key-1` |
147
+ | `ADMIN_API_KEYS` | Admin-level API keys | No | - |
148
+ | `RATE_LIMIT_DEFAULT` | Requests/min for default tier | No | 60 |
149
+ | `RATE_LIMIT_PREMIUM` | Requests/min for premium tier | No | 300 |
150
+ | `DEFAULT_CHAT_MODEL` | Default LLM model | No | `gpt-3.5-turbo` |
151
+
152
+ *At least one LLM provider key is required
153
+
154
+ ## 📖 API Usage Examples
155
+
156
+ ### 1. Chat Endpoint
157
+
158
+ **Request:**
159
+ ```bash
160
+ curl -X POST http://localhost:8000/ai/chat \
161
+ -H "Authorization: Bearer demo-key-1" \
162
+ -H "Content-Type: application/json" \
163
+ -d '{
164
+ "conversation": [
165
+ {"role": "user", "content": "What is machine learning?"}
166
+ ],
167
+ "model": "gpt-3.5-turbo",
168
+ "options": {
169
+ "temperature": 0.7,
170
+ "max_tokens": 500
171
+ }
172
+ }'
173
+ ```
174
+
175
+ **Response:**
176
+ ```json
177
+ {
178
+ "reply": "Machine learning is a subset of artificial intelligence...",
179
+ "model": "gpt-3.5-turbo",
180
+ "usage": {
181
+ "prompt_tokens": 15,
182
+ "completion_tokens": 120,
183
+ "total_tokens": 135
184
+ },
185
+ "sources": null
186
+ }
187
+ ```
188
+
189
+ ### 2. RAG Query Endpoint
190
+
191
+ **Request:**
192
+ ```bash
193
+ curl -X POST http://localhost:8000/rag/query \
194
+ -H "Authorization: Bearer demo-key-1" \
195
+ -H "Content-Type: application/json" \
196
+ -d '{
197
+ "query": "What are the key features of our product?",
198
+ "top_k": 5,
199
+ "model": "gpt-4",
200
+ "use_retrieval": true
201
+ }'
202
+ ```
203
+
204
+ **Response:**
205
+ ```json
206
+ {
207
+ "answer": "Based on the documentation, the key features include...",
208
+ "sources": [
209
+ {
210
+ "doc_id": "doc_123",
211
+ "chunk_id": "chunk_5",
212
+ "content": "Our product features...",
213
+ "score": 0.92
214
+ }
215
+ ],
216
+ "model": "gpt-4",
217
+ "usage": {
218
+ "prompt_tokens": 450,
219
+ "completion_tokens": 180,
220
+ "total_tokens": 630
221
+ }
222
+ }
223
+ ```
224
+
225
+ ### 3. Image Generation
226
+
227
+ **Request:**
228
+ ```bash
229
+ curl -X POST http://localhost:8000/image/generate \
230
+ -H "Authorization: Bearer demo-key-1" \
231
+ -H "Content-Type: application/json" \
232
+ -d '{
233
+ "prompt": "A futuristic city with flying cars at sunset",
234
+ "model": "dall-e-3",
235
+ "size": "1024x1024",
236
+ "n": 1
237
+ }'
238
+ ```
239
+
240
+ **Response:**
241
+ ```json
242
+ {
243
+ "images": [
244
+ {
245
+ "url": "https://...",
246
+ "revised_prompt": "A futuristic city with flying cars..."
247
+ }
248
+ ],
249
+ "model": "dall-e-3",
250
+ "created": 1698765432
251
+ }
252
+ ```
253
+
254
+ ### 4. Voice Synthesis
255
+
256
+ **Request:**
257
+ ```bash
258
+ curl -X POST http://localhost:8000/voice/synthesize \
259
+ -H "Authorization: Bearer demo-key-1" \
260
+ -H "Content-Type: application/json" \
261
+ -d '{
262
+ "text": "Hello, this is a test of the voice synthesis system.",
263
+ "voice": "alloy",
264
+ "format": "mp3"
265
+ }'
266
+ ```
267
+
268
+ **Response:**
269
+ ```json
270
+ {
271
+ "audio_url": "data:audio/mp3;base64,//uQx...",
272
+ "voice": "alloy",
273
+ "format": "mp3",
274
+ "duration_ms": 3200
275
+ }
276
+ ```
277
+
278
+ ### 5. Document Upload
279
+
280
+ **Request:**
281
+ ```bash
282
+ curl -X POST http://localhost:8000/upload \
283
+ -H "Authorization: Bearer demo-key-1" \
284
285
+ -F "metadata={\"title\":\"Product Guide\",\"category\":\"documentation\"}"
286
+ ```
287
+
288
+ **Response:**
289
+ ```json
290
+ {
291
+ "doc_id": "doc_abc123",
292
+ "filename": "document.pdf",
293
+ "size_bytes": 245760,
294
+ "status": "processing",
295
+ "estimated_chunks": 15,
296
+ "webhook_url": "/webhook/events"
297
+ }
298
+ ```
299
+
300
+ ## 🧪 Testing
301
+
302
+ Run the test suite:
303
+ ```bash
304
+ npm test
305
+ ```
306
+
307
+ Run with coverage:
308
+ ```bash
309
+ npm run test:coverage
310
+ ```
311
+
312
+ Tests include:
313
+ - Unit tests for all adapters
314
+ - Integration tests for API endpoints
315
+ - Mock implementations for external services
316
+ - Rate limiting validation
317
+ - Authentication checks
318
+
319
+ ## 🐳 Docker Deployment
320
+
321
+ ### Build Docker Image
322
+ ```bash
323
+ docker build -t ai-api-service .
324
+ ```
325
+
326
+ ### Run with Docker Compose
327
+ ```bash
328
+ docker-compose up
329
+ ```
330
+
331
+ This starts:
332
+ - API service on port 8000
333
+ - Redis for rate limiting (optional)
334
+ - Background workers
335
+
336
+ ## ☁️ Cloud Deployment
337
+
338
+ ### Deploy to Encore Cloud (Recommended)
339
+
340
+ 1. **Install Encore CLI**
341
+ ```bash
342
+ npm install -g encore
343
+ ```
344
+
345
+ 2. **Login to Encore**
346
+ ```bash
347
+ encore auth login
348
+ ```
349
+
350
+ 3. **Deploy**
351
+ ```bash
352
+ encore deploy
353
+ ```
354
+
355
+ ### Deploy to Hugging Face Spaces
356
+
357
+ 1. **Create a new Space** at https://huggingface.co/spaces
358
+
359
+ 2. **Add Dockerfile**
360
+ ```dockerfile
361
+ FROM node:18-alpine
362
+ WORKDIR /app
363
+ COPY package*.json ./
364
+ RUN npm ci --only=production
365
+ COPY . .
366
+ RUN npm run build
367
+ EXPOSE 7860
368
+ ENV PORT=7860
369
+ CMD ["npm", "start"]
370
+ ```
371
+
372
+ 3. **Configure secrets** in Space settings:
373
+ - `OPENAI_API_KEY`
374
+ - `HUGGINGFACE_API_KEY`
375
+ - `API_KEYS`
376
+
377
+ 4. **Push to Space**
378
+ ```bash
379
+ git remote add space https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE
380
+ git push space main
381
+ ```
382
+
383
+ ### Deploy to Generic Cloud (AWS, GCP, Azure)
384
+
385
+ 1. **Build production image**
386
+ ```bash
387
+ docker build -t ai-api-service:latest .
388
+ ```
389
+
390
+ 2. **Push to container registry**
391
+ ```bash
392
+ docker tag ai-api-service:latest your-registry/ai-api-service:latest
393
+ docker push your-registry/ai-api-service:latest
394
+ ```
395
+
396
+ 3. **Deploy to container service**
397
+ - AWS ECS/Fargate
398
+ - GCP Cloud Run
399
+ - Azure Container Instances
400
+
401
+ 4. **Set environment variables** in cloud console
402
+
403
+ ## 📊 Scaling Considerations
404
+
405
+ ### Horizontal Scaling
406
+ - **Stateless design** - All state in external services (Pinecone, Redis)
407
+ - **Load balancing** - Use ALB/NLB in front of multiple instances
408
+ - **Auto-scaling** - Scale based on CPU/memory or request rate
409
+
410
+ ### Vector Database
411
+ - **Pinecone** - Managed, scales automatically
412
+ - **Milvus** - Self-hosted, requires cluster setup
413
+ - **In-memory** - Development only, not for production
414
+
415
+ ### Background Workers
416
+ - **Concurrent processing** - Adjust `WORKER_CONCURRENCY`
417
+ - **Queue depth** - Monitor pending ingestion jobs
418
+ - **Retry logic** - Failed jobs auto-retry with backoff
419
+
420
+ ### Cost Optimization
421
+ - **Model selection** - Use cheaper models (GPT-3.5 vs GPT-4)
422
+ - **Caching** - Cache frequent queries (not implemented, add Redis)
423
+ - **Batch processing** - Group document ingestions
424
+ - **Rate limiting** - Prevent abuse and cost overruns
425
+
426
+ ## 🔒 Security Best Practices
427
+
428
+ 1. **API Keys** - Rotate regularly, use environment variables
429
+ 2. **Rate Limiting** - Prevent abuse and DDoS
430
+ 3. **Input Validation** - All requests validated with Zod schemas
431
+ 4. **CORS** - Configure allowed origins
432
+ 5. **File Upload** - Size limits, type validation
433
+ 6. **Secrets Management** - Use Encore secrets or cloud secret managers
434
+
435
+ ## 🛠️ Troubleshooting
436
+
437
+ ### Common Issues
438
+
439
+ **"Invalid API key" errors**
440
+ - Check `.env` file has correct keys
441
+ - Verify API key has credits/quota
442
+ - Ensure no extra spaces in keys
443
+
444
+ **Rate limit exceeded**
445
+ - Increase `RATE_LIMIT_*` values
446
+ - Use admin API key for testing
447
+ - Check Prometheus metrics for usage
448
+
449
+ **Vector DB connection fails**
450
+ - Check Pinecone API key and environment
451
+ - Falls back to in-memory storage
452
+ - Verify network connectivity
453
+
454
+ **Document upload fails**
455
+ - Check file size < `MAX_FILE_SIZE_MB`
456
+ - Verify file format (PDF, DOCX, TXT)
457
+ - Check disk space for temp files
458
+
459
+ ## 📚 Client Libraries
460
+
461
+ See `examples/` directory for:
462
+ - `js_client.js` - JavaScript/Node.js client
463
+ - `curl.sh` - Curl command examples
464
+ - `python_client.py` - Python client (coming soon)
465
+
466
+ ## 🤝 Contributing
467
+
468
+ 1. Fork the repository
469
+ 2. Create a feature branch
470
+ 3. Make your changes
471
+ 4. Add tests
472
+ 5. Submit a pull request
473
+
474
+ ## 📝 License
475
+
476
+ MIT License - see LICENSE file for details
477
+
478
+ ## 🆘 Support
479
+
480
+ - GitHub Issues: [Report bugs](https://github.com/your-org/ai-api-service/issues)
481
+ - Documentation: [Full API reference](https://docs.your-service.com)
482
+ - Email: [email protected]
483
+
484
+ ## 🗺️ Roadmap
485
+
486
+ - [ ] Caching layer (Redis)
487
+ - [ ] Streaming responses (SSE)
488
+ - [ ] Multi-language support
489
+ - [ ] Fine-tuning pipeline
490
+ - [ ] Analytics dashboard
491
+ - [ ] Webhook integrations
492
+ - [ ] GraphQL API
493
+ - [ ] gRPC support
tests/api.test.ts ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { describe, it, expect, beforeAll, vi } from 'vitest';
2
+
3
+ describe('AI API Service Tests', () => {
4
+ beforeAll(() => {
5
+ process.env.API_KEYS = 'test-key-1,test-key-2';
6
+ process.env.ADMIN_API_KEYS = 'admin-key-1';
7
+ process.env.OPENAI_API_KEY = 'sk-test-mock-key';
8
+ });
9
+
10
+ describe('Authentication', () => {
11
+ it('should validate correct API key', () => {
12
+ const { validateApiKey } = require('../backend/utils/auth');
13
+ const result = validateApiKey('Bearer test-key-1');
14
+ expect(result.apiKey).toBe('test-key-1');
15
+ expect(result.tier).toBe('default');
16
+ });
17
+
18
+ it('should validate admin API key', () => {
19
+ const { validateApiKey } = require('../backend/utils/auth');
20
+ const result = validateApiKey('Bearer admin-key-1');
21
+ expect(result.apiKey).toBe('admin-key-1');
22
+ expect(result.tier).toBe('admin');
23
+ });
24
+
25
+ it('should reject invalid API key', () => {
26
+ const { validateApiKey } = require('../backend/utils/auth');
27
+ expect(() => validateApiKey('Bearer invalid-key')).toThrow('Invalid API key');
28
+ });
29
+
30
+ it('should reject missing API key', () => {
31
+ const { validateApiKey } = require('../backend/utils/auth');
32
+ expect(() => validateApiKey('')).toThrow('Missing Authorization header');
33
+ });
34
+ });
35
+
36
+ describe('Rate Limiting', () => {
37
+ it('should allow requests within rate limit', () => {
38
+ const { rateLimiter } = require('../backend/utils/rate_limit');
39
+ const info = rateLimiter.checkRateLimit('test-key-1', 'default');
40
+ expect(info.remaining).toBeGreaterThanOrEqual(0);
41
+ expect(info.limit).toBeGreaterThan(0);
42
+ });
43
+
44
+ it('should have higher limit for admin tier', () => {
45
+ const { rateLimiter } = require('../backend/utils/rate_limit');
46
+ const defaultInfo = rateLimiter.getRateLimitInfo('test-key-1', 'default');
47
+ const adminInfo = rateLimiter.getRateLimitInfo('admin-key-1', 'admin');
48
+ expect(adminInfo.limit).toBeGreaterThan(defaultInfo.limit);
49
+ });
50
+ });
51
+
52
+ describe('Vector DB', () => {
53
+ it('should store and retrieve vectors from in-memory DB', async () => {
54
+ const { InMemoryVectorDB } = require('../backend/adapters/vector_db_adapter');
55
+ const db = new InMemoryVectorDB();
56
+
57
+ await db.upsert([
58
+ {
59
+ id: 'test-1',
60
+ values: [1, 0, 0],
61
+ metadata: { content: 'Test document 1' },
62
+ },
63
+ {
64
+ id: 'test-2',
65
+ values: [0, 1, 0],
66
+ metadata: { content: 'Test document 2' },
67
+ },
68
+ ]);
69
+
70
+ const results = await db.query([1, 0, 0], 2);
71
+ expect(results.length).toBe(2);
72
+ expect(results[0].id).toBe('test-1');
73
+ expect(results[0].score).toBeGreaterThan(results[1].score);
74
+ });
75
+
76
+ it('should filter results based on metadata', async () => {
77
+ const { InMemoryVectorDB } = require('../backend/adapters/vector_db_adapter');
78
+ const db = new InMemoryVectorDB();
79
+
80
+ await db.upsert([
81
+ {
82
+ id: 'doc-1',
83
+ values: [1, 0, 0],
84
+ metadata: { category: 'tech', content: 'Tech document' },
85
+ },
86
+ {
87
+ id: 'doc-2',
88
+ values: [0.9, 0, 0],
89
+ metadata: { category: 'science', content: 'Science document' },
90
+ },
91
+ ]);
92
+
93
+ const results = await db.query([1, 0, 0], 5, { category: 'tech' });
94
+ expect(results.length).toBe(1);
95
+ expect(results[0].id).toBe('doc-1');
96
+ });
97
+
98
+ it('should delete vectors', async () => {
99
+ const { InMemoryVectorDB } = require('../backend/adapters/vector_db_adapter');
100
+ const db = new InMemoryVectorDB();
101
+
102
+ await db.upsert([
103
+ { id: 'delete-1', values: [1, 0, 0], metadata: {} },
104
+ ]);
105
+
106
+ let results = await db.query([1, 0, 0], 5);
107
+ expect(results.length).toBe(1);
108
+
109
+ await db.delete(['delete-1']);
110
+
111
+ results = await db.query([1, 0, 0], 5);
112
+ expect(results.length).toBe(0);
113
+ });
114
+ });
115
+
116
+ describe('Document Service', () => {
117
+ it('should chunk text correctly', () => {
118
+ const { documentService } = require('../backend/services/document_service');
119
+
120
+ const text = 'a'.repeat(2500);
121
+ const chunks = documentService['chunkText'](text, 'doc-1', {});
122
+
123
+ expect(chunks.length).toBeGreaterThan(1);
124
+ expect(chunks[0].chunk_index).toBe(0);
125
+ expect(chunks[0].doc_id).toBe('doc-1');
126
+ });
127
+
128
+ it('should extract text from txt file', async () => {
129
+ const { documentService } = require('../backend/services/document_service');
130
+
131
+ const content = Buffer.from('This is a test document', 'utf-8');
132
+ const text = await documentService['extractText']('test.txt', content);
133
+
134
+ expect(text).toBe('This is a test document');
135
+ });
136
+ });
137
+
138
+ describe('Metrics', () => {
139
+ it('should track requests', () => {
140
+ const { metrics } = require('../backend/utils/metrics');
141
+
142
+ const initialMetrics = metrics.getMetrics();
143
+ metrics.incrementRequests('/test');
144
+ const updatedMetrics = metrics.getMetrics();
145
+
146
+ expect(updatedMetrics.requests_total).toBeGreaterThan(initialMetrics.requests_total);
147
+ });
148
+
149
+ it('should track errors', () => {
150
+ const { metrics } = require('../backend/utils/metrics');
151
+
152
+ const initialMetrics = metrics.getMetrics();
153
+ metrics.incrementErrors();
154
+ const updatedMetrics = metrics.getMetrics();
155
+
156
+ expect(updatedMetrics.errors_total).toBeGreaterThan(initialMetrics.errors_total);
157
+ });
158
+
159
+ it('should track response times', () => {
160
+ const { metrics } = require('../backend/utils/metrics');
161
+
162
+ metrics.recordResponseTime(100);
163
+ metrics.recordResponseTime(200);
164
+ const metricsData = metrics.getMetrics();
165
+
166
+ expect(metricsData.average_response_time_ms).toBeGreaterThan(0);
167
+ });
168
+ });
169
+
170
+ describe('Logger', () => {
171
+ it('should log messages at appropriate levels', () => {
172
+ const { logger } = require('../backend/utils/logger');
173
+ const consoleSpy = vi.spyOn(console, 'log');
174
+
175
+ logger.info('Test message');
176
+ expect(consoleSpy).toHaveBeenCalled();
177
+
178
+ consoleSpy.mockRestore();
179
+ });
180
+ });
181
+
182
+ describe('Configuration', () => {
183
+ it('should load default configuration', () => {
184
+ const { loadConfig } = require('../backend/types/config');
185
+ const config = loadConfig();
186
+
187
+ expect(config.auth.apiKeys).toContain('test-key-1');
188
+ expect(config.rateLimit.default).toBeGreaterThan(0);
189
+ expect(config.documents.maxFileSizeMB).toBeGreaterThan(0);
190
+ });
191
+
192
+ it('should parse comma-separated API keys', () => {
193
+ const { loadConfig } = require('../backend/types/config');
194
+ const config = loadConfig();
195
+
196
+ expect(Array.isArray(config.auth.apiKeys)).toBe(true);
197
+ expect(config.auth.apiKeys.length).toBeGreaterThan(0);
198
+ });
199
+ });
200
+
201
+ describe('AI Service', () => {
202
+ it('should initialize with available adapters', () => {
203
+ const { aiService } = require('../backend/services/ai_service');
204
+ const models = aiService.getAvailableModels();
205
+ expect(Array.isArray(models)).toBe(true);
206
+ });
207
+ });
208
+
209
+ describe('RAG Service', () => {
210
+ it('should build RAG prompt correctly', () => {
211
+ const { ragService } = require('../backend/services/rag_service');
212
+ const prompt = ragService['buildRAGPrompt'](
213
+ 'What is AI?',
214
+ 'AI stands for Artificial Intelligence'
215
+ );
216
+ expect(prompt).toContain('What is AI?');
217
+ expect(prompt).toContain('AI stands for Artificial Intelligence');
218
+ });
219
+ });
220
+
221
+ describe('Ingestion Worker', () => {
222
+ it('should track job status', async () => {
223
+ const { ingestionWorker } = require('../backend/workers/ingestion_worker');
224
+
225
+ await ingestionWorker.addJob('job-1', 'test.pdf');
226
+ const job = ingestionWorker.getJobStatus('job-1');
227
+
228
+ expect(job).toBeDefined();
229
+ expect(job?.doc_id).toBe('job-1');
230
+ expect(job?.status).toBe('pending');
231
+ });
232
+ });
233
+ });