Spaces:

cygon24
/

ai-api-ollama

Build error

App Files Files Community

cygon commited on Oct 3

Commit

d61feef

1 Parent(s): 05538b8

Initial deployment with Ollama support

Browse files

Files changed (46) hide show

.dockerignore +18 -0
.env.example +43 -0
.gitignore +6 -0
CHANGELOG.md +152 -0
COMPLETE_DEPLOYMENT_GUIDE.md +1529 -0
DEPLOYMENT.md +435 -0
DEVELOPMENT.md +106 -0
Dockerfile +74 -0
HUGGINGFACE_OLLAMA_DEPLOY.md +423 -0
QUICKSTART.md +319 -0
README.md +9 -8
backend/adapters/anthropic_adapter.ts +76 -0
backend/adapters/huggingface_adapter.ts +164 -0
backend/adapters/ollama_adapter.ts +153 -0
backend/adapters/openai_adapter.ts +193 -0
backend/adapters/vector_db_adapter.ts +146 -0
backend/api/auth.ts +26 -0
backend/api/chat.ts +99 -0
backend/api/documents.ts +119 -0
backend/api/encore.service.ts +3 -0
backend/api/health.ts +55 -0
backend/api/image.ts +44 -0
backend/api/rag.ts +72 -0
backend/api/voice.ts +101 -0
backend/encore.app +1 -0
backend/package.json +16 -0
backend/services/ai_service.ts +193 -0
backend/services/document_service.ts +238 -0
backend/services/image_service.ts +122 -0
backend/services/rag_service.ts +182 -0
backend/services/voice_service.ts +149 -0
backend/tsconfig.json +31 -0
backend/types/config.ts +93 -0
backend/types/models.ts +256 -0
backend/utils/auth.ts +69 -0
backend/utils/logger.ts +48 -0
backend/utils/metrics.ts +90 -0
backend/utils/rate_limit.ts +114 -0
backend/vite-env.d.ts +1 -0
backend/workers/ingestion_worker.ts +128 -0
docker-compose.yml +51 -0
examples/curl.sh +116 -0
examples/js_client.js +203 -0
package.json +9 -0
strcture.md +493 -0
tests/api.test.ts +233 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,18 @@

+node_modules
+npm-debug.log
+.env
+.env.local
+.git
+.gitignore
+README.md
+.vscode
+.idea
+*.md
+!README.md
+.DS_Store
+dist
+build
+coverage
+.encore
+data
+*.log

.env.example ADDED Viewed

	@@ -0,0 +1,43 @@

+# API Keys and Credentials (at least one required, or use Ollama)
+OPENAI_API_KEY=your_openai_api_key_here
+HUGGINGFACE_API_KEY=your_huggingface_api_key_here
+ANTHROPIC_API_KEY=your_anthropic_api_key_here
+# Ollama Configuration (local LLM - no API key needed)
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_MODEL=llama2
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+# Vector Database Configuration
+PINECONE_API_KEY=your_pinecone_api_key_here
+PINECONE_ENVIRONMENT=us-west1-gcp
+PINECONE_INDEX_NAME=ai-api-vectors
+# Authentication
+API_KEYS=demo-key-1,demo-key-2,admin-key-3
+ADMIN_API_KEYS=admin-key-3
+# Rate Limiting (requests per minute)
+RATE_LIMIT_DEFAULT=60
+RATE_LIMIT_PREMIUM=300
+RATE_LIMIT_ADMIN=1000
+# Model Configuration
+DEFAULT_CHAT_MODEL=llama2
+DEFAULT_EMBEDDING_MODEL=nomic-embed-text
+DEFAULT_IMAGE_MODEL=dall-e-3
+DEFAULT_VOICE_MODEL=tts-1
+# Service Configuration
+PORT=8000
+LOG_LEVEL=info
+CORS_ORIGINS=http://localhost:3000,http://localhost:5173
+# Document Processing
+MAX_FILE_SIZE_MB=10
+CHUNK_SIZE=1000
+CHUNK_OVERLAP=200
+# Background Workers
+ENABLE_BACKGROUND_WORKERS=true
+WORKER_CONCURRENCY=5

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.encore
+encore.gen.go
+encore.gen.cue
+/.encore
+node_modules
+/encore.gen

CHANGELOG.md ADDED Viewed

	@@ -0,0 +1,152 @@

+# Changelog
+All notable changes to the AI API Service will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [1.0.0] - 2025-10-01
+### Added
+#### Core Features
+- **Multi-turn Chat API** - Conversational AI with context management supporting multiple LLM providers
+- **RAG (Retrieval-Augmented Generation)** - Query documents with AI-powered vector retrieval
+- **Image Generation** - Text-to-image using DALL-E or Stable Diffusion
+- **Voice Synthesis** - Text-to-speech with multiple voice options via OpenAI TTS
+- **Speech Recognition** - Audio transcription using Whisper
+- **Document Ingestion** - Upload and process PDF, DOCX, TXT files with automatic chunking
+#### Model Support
+- OpenAI integration (GPT-4, GPT-3.5-turbo, DALL-E, TTS, Whisper)
+- HuggingFace Inference API support (Mistral, Stable Diffusion, embeddings)
+- Anthropic Claude models (Claude 3 Sonnet, Opus)
+- Local model support (optional, via transformers)
+#### Vector Database
+- Pinecone adapter for production vector storage
+- In-memory vector DB fallback for development
+- Cosine similarity search
+- Metadata filtering support
+#### Authentication & Security
+- API Key authentication with Bearer token support
+- Role-based access control (default, premium, admin tiers)
+- Token bucket rate limiting (configurable per tier)
+- Input validation with TypeScript type safety
+#### Observability
+- Structured JSON logging with configurable log levels
+- Prometheus-style metrics endpoint
+- Health check endpoint with service status
+- Request/response time tracking
+- Model usage statistics
+#### Background Processing
+- Async document ingestion workers
+- Configurable worker concurrency
+- Webhook notifications for completion events
+- Automatic text chunking with overlap
+#### Developer Experience
+- Comprehensive TypeScript types
+- Auto-generated API clients
+- Example curl scripts
+- JavaScript/Node.js client library
+- Full test suite with vitest
+- Detailed API documentation
+#### Deployment
+- Docker support with multi-stage builds
+- Docker Compose for local development
+- Environment-based configuration
+- Health checks and graceful shutdown
+- Production-ready error handling
+### API Endpoints
+#### Health & Monitoring
+- `GET /health` - Service health check with component status
+- `GET /metrics` - Request metrics and usage statistics
+#### Authentication
+- `POST /auth/verify` - Validate API key and check rate limits
+#### AI Chat
+- `POST /ai/chat` - Multi-turn conversation with context
+- `GET /ai/query` - Simple question answering
+#### RAG
+- `POST /rag/query` - Query with document retrieval
+- `GET /rag/models` - List available LLM models
+#### Image Generation
+- `POST /image/generate` - Generate images from text prompts
+#### Voice
+- `POST /voice/synthesize` - Text to speech synthesis
+- `POST /voice/transcribe` - Speech to text transcription
+#### Documents
+- `POST /upload` - Upload and ingest documents
+- `GET /docs/:id/sources` - Retrieve document chunks
+- `POST /webhook/events` - Ingestion completion webhooks
+### Configuration
+Environment variables for all services:
+- LLM provider API keys (OpenAI, HuggingFace, Anthropic)
+- Vector DB configuration (Pinecone)
+- Rate limiting settings per tier
+- Document processing parameters
+- Worker configuration
+- CORS and security settings
+### Testing
+- Unit tests for all core services
+- Integration tests for API endpoints
+- Mock implementations for external services
+- Rate limiting validation
+- Authentication flow tests
+- Vector DB operations tests
+### Documentation
+- Comprehensive README with architecture diagram
+- API reference with curl examples
+- Environment variable guide
+- Deployment instructions (Docker, Hugging Face Spaces, cloud providers)
+- Scaling considerations and best practices
+- Cost optimization guidelines
+- Troubleshooting guide
+### Known Limitations
+- Maximum file upload size: 10MB (configurable)
+- In-memory vector DB not suitable for production
+- No built-in caching layer (add Redis for production)
+- Synchronous API calls (streaming support coming soon)
+### Future Roadmap
+- Server-Sent Events (SSE) for streaming responses
+- Redis caching layer for frequent queries
+- Multi-language support for responses
+- Fine-tuning pipeline integration
+- Analytics dashboard
+- Webhook integrations for third-party services
+- GraphQL API support
+- gRPC endpoints for high-performance use cases
+- Kubernetes deployment manifests
+- Auto-scaling configuration
+---
+## Release Notes
+This is the initial release of the AI API Service, a production-ready TypeScript API for integrating multiple AI capabilities into chatbots, LLM applications, and intelligent systems.
+The service is built on Encore.ts for type-safe backend development and includes comprehensive documentation, tests, and deployment configurations.
+For questions, issues, or contributions, please visit the GitHub repository.

COMPLETE_DEPLOYMENT_GUIDE.md ADDED Viewed

	@@ -0,0 +1,1529 @@

+# Complete Step-by-Step Guide: Deploy AI API with Ollama to Hugging Face Spaces
+## (Absolute Beginner-Friendly Guide)
+**What you'll build**: A fully working AI API running on Hugging Face Spaces that anyone can access via the internet, powered by Ollama (no OpenAI key needed).
+**Time needed**: 30-45 minutes
+**Cost**: FREE (or $0.60/hour for faster GPU)
+**No prior experience needed!**
+---
+## 📋 **What You Need Before Starting**
+1. ✅ A Hugging Face account (we'll create this if you don't have one)
+2. ✅ Git installed on your computer
+3. ✅ Basic ability to copy/paste and follow instructions
+4. ✅ This project's code files (you already have these)
+---
+## 🎯 **PART 1: Create Hugging Face Account & Space**
+### **Step 1.1: Create Hugging Face Account** (Skip if you have one)
+1. Open your web browser
+2. Go to: https://huggingface.co/join
+3. Fill in:
+   - **Email**: Your email address
+   - **Username**: Pick a username (you'll need this later - write it down!)
+   - **Password**: Choose a strong password
+4. Click **"Sign Up"**
+5. Check your email and click the verification link
+6. You're now logged into Hugging Face!
+### **Step 1.2: Create a New Space**
+1. **Go to**: https://huggingface.co/new-space
+2. **Fill in the form**:
+   | Field | What to Enter | Example |
+   |-------|---------------|---------|
+   | **Owner** | Your username | `yourname` |
+   | **Space name** | `ai-api-ollama` | (or anything you like) |
+   | **License** | Select "MIT" | |
+   | **Select the Space SDK** | Click on **"Docker"** | ⚠️ IMPORTANT: Must be Docker! |
+   | **Space hardware** | Select **"CPU basic - Free"** for now | (We'll upgrade later if needed) |
+   | **Repo type** | Leave as **"Public"** | (or Private if you prefer) |
+3. **Click "Create Space"** button at the bottom
+4. **IMPORTANT - Write down your Space URL**:
+   ```
+   https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama
+   ```
+   Replace `YOUR_USERNAME` with your actual username.
+5. You'll see a page with instructions - **ignore them for now**, we'll do it differently.
+---
+## 🔧 **PART 2: Install Git and Set Up Authentication**
+### **Step 2.1: Check if Git is Installed**
+**On Windows**:
+1. Press `Windows Key + R`
+2. Type `cmd` and press Enter
+3. Type: `git --version`
+4. If you see a version number (like `git version 2.40.0`), you have Git ✅
+5. If you see an error, download Git from: https://git-scm.com/download/win
+**On Mac**:
+1. Press `Command + Space`
+2. Type `terminal` and press Enter
+3. Type: `git --version`
+4. If you see a version number, you have Git ✅
+5. If not, it will prompt you to install Xcode Command Line Tools - click Install
+**On Linux**:
+```bash
+git --version
+```
+If not installed:
+```bash
+sudo apt-get update
+sudo apt-get install git
+```
+### **Step 2.2: Create Hugging Face Access Token**
+1. Go to: https://huggingface.co/settings/tokens
+2. Click **"New token"** button
+3. Fill in:
+   - **Name**: `git-access` (or anything you like)
+   - **Role**: Select **"Write"**
+4. Click **"Generate token"**
+5. **CRITICAL**: Copy the token and save it somewhere safe (Notepad, password manager)
+   - It looks like: `hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxx`
+   - ⚠️ **You won't be able to see this again!**
+---
+## 💻 **PART 3: Clone Your Space to Your Computer**
+### **Step 3.1: Open Terminal/Command Prompt**
+**Windows**:
+1. Press `Windows Key + R`
+2. Type `cmd` and press Enter
+3. Navigate to where you want to work (e.g., Desktop):
+   ```
+   cd Desktop
+   ```
+**Mac/Linux**:
+1. Open Terminal
+2. Navigate to where you want to work:
+   ```bash
+   cd ~/Desktop
+   ```
+### **Step 3.2: Clone the Space Repository**
+1. **Copy this command** (replace YOUR_USERNAME with your actual Hugging Face username):
+   ```bash
+   git clone https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama
+   ```
+2. **Example**:
+   ```bash
+   git clone https://huggingface.co/spaces/johndoe/ai-api-ollama
+   ```
+3. **Press Enter**
+4. When prompted for username and password:
+   - **Username**: Your Hugging Face username
+   - **Password**: **Paste your token** (NOT your password!) - the one that starts with `hf_`
+5. You should see:
+   ```
+   Cloning into 'ai-api-ollama'...
+   ```
+6. **Verify the folder was created**:
+   ```bash
+   cd ai-api-ollama
+   ls
+   ```
+   (On Windows use `dir` instead of `ls`)
+---
+## 📂 **PART 4: Copy Project Files to Space**
+### **Step 4.1: Locate Your AI API Service Files**
+You should have the project files in a folder. Let's say they're in:
+- Windows: `C:\Users\YourName\Downloads\ai-api-service\`
+- Mac/Linux: `~/Downloads/ai-api-service/`
+### **Step 4.2: Copy ALL Files to Space Folder**
+**Option A: Using File Explorer (Easiest)**
+**Windows**:
+1. Open File Explorer
+2. Navigate to your original `ai-api-service` folder
+3. Press `Ctrl + A` to select all files
+4. Press `Ctrl + C` to copy
+5. Navigate to `Desktop\ai-api-ollama` (your Space folder)
+6. Press `Ctrl + V` to paste
+7. When asked about replacing files, click **"Replace"**
+**Mac**:
+1. Open Finder
+2. Navigate to your original `ai-api-service` folder
+3. Press `Cmd + A` to select all files
+4. Press `Cmd + C` to copy
+5. Navigate to `Desktop/ai-api-ollama` (your Space folder)
+6. Press `Cmd + V` to paste
+**Option B: Using Command Line**
+From the terminal, in your Space folder:
+**Windows**:
+```bash
+xcopy /E /I "C:\Users\YourName\Downloads\ai-api-service\*" .
+```
+**Mac/Linux**:
+```bash
+cp -r ~/Downloads/ai-api-service/* .
+```
+### **Step 4.3: Verify Files Were Copied**
+In your terminal (inside the `ai-api-ollama` folder):
+```bash
+ls
+```
+You should see these folders/files:
+- `backend/`
+- `examples/`
+- `tests/`
+- `package.json`
+- `README.md`
+- `.env.example`
+- `Dockerfile.huggingface`
+- And many more files...
+✅ If you see these, you're good to proceed!
+---
+## 🐳 **PART 5: Prepare the Dockerfile for Hugging Face**
+### **Step 5.1: Rename the Dockerfile**
+Hugging Face expects a file named exactly `Dockerfile` (no extension).
+**Windows Command Prompt**:
+```bash
+ren Dockerfile.huggingface Dockerfile
+```
+**Mac/Linux Terminal**:
+```bash
+mv Dockerfile.huggingface Dockerfile
+```
+### **Step 5.2: Verify the Dockerfile**
+```bash
+cat Dockerfile
+```
+You should see content starting with `FROM node:18-alpine AS builder`
+✅ Good to go!
+---
+## 📝 **PART 6: Create Space Configuration Files**
+### **Step 6.1: Create README.md for Your Space**
+This file tells Hugging Face how to run your Space.
+**Create a new file called `README.md`** in your `ai-api-ollama` folder:
+**Windows**:
+```bash
+notepad README.md
+```
+**Mac/Linux**:
+```bash
+nano README.md
+```
+**Copy and paste this EXACT content** (replace YOUR_USERNAME):
+```markdown
+---
+title: AI API Service with Ollama
+emoji: 🤖
+colorFrom: blue
+colorTo: purple
+sdk: docker
+app_port: 7860
+pinned: false
+---
+# AI API Service with Ollama
+A production-ready AI API service powered by Ollama. No OpenAI API key needed!
+## 🚀 Features
+- 💬 **Multi-turn Chat** - Conversational AI with Llama2/Llama3
+- 📚 **RAG** - Retrieval-Augmented Generation with vector search
+- 🖼️ **Image Generation** - Text-to-image (requires additional API key)
+- 🎙️ **Voice Synthesis** - Text-to-speech (requires additional API key)
+- 📄 **Document Processing** - Upload and query PDFs, DOCX, TXT
+- 🔒 **Authentication** - Secure API key-based access
+- ⚡ **Rate Limiting** - Prevent abuse
+## 📡 API Endpoint
+```
+https://YOUR_USERNAME-ai-api-ollama.hf.space
+```
+## 🔑 Quick Start
+### Health Check
+```bash
+curl https://YOUR_USERNAME-ai-api-ollama.hf.space/health
+```
+### Chat Example
+```bash
+curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/ai/chat \
+  -H "Authorization: Bearer demo-key-1" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "conversation": [
+      {"role": "user", "content": "Explain machine learning in simple terms"}
+    ]
+  }'
+```
+### RAG Example
+```bash
+curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/rag/query \
+  -H "Authorization: Bearer demo-key-1" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "What are transformers in AI?",
+    "top_k": 5
+  }'
+```
+## 🔐 Authentication
+Default API key: `demo-key-1`
+**⚠️ IMPORTANT**: Change this in Space settings for production use!
+## 📚 Available Endpoints
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/health` | GET | Service health check |
+| `/metrics` | GET | Usage metrics |
+| `/ai/chat` | POST | Multi-turn conversation |
+| `/ai/query` | GET | Simple question answering |
+| `/rag/query` | POST | Query with document retrieval |
+| `/image/generate` | POST | Generate images (needs API key) |
+| `/voice/synthesize` | POST | Text to speech (needs API key) |
+| `/upload` | POST | Upload documents |
+## ⚙️ Configuration
+Configured with Ollama running **inside the Space** for true serverless deployment.
+**Current Settings**:
+- Model: Llama 2 (7B)
+- Embedding Model: nomic-embed-text
+- Hardware: See Space settings
+## 🎯 Use Cases
+- Chatbot backend for web/mobile apps
+- Document Q&A system
+- AI-powered search
+- Content generation API
+- Educational AI assistant
+## 📖 Documentation
+Full API documentation: [See repository](https://github.com/your-username/ai-api-service)
+## 💡 Tips
+1. **First request is slow** - Ollama loads the model on first use (~30 seconds)
+2. **Subsequent requests are fast** - Model stays in memory
+3. **Use persistent hardware** - Upgrade from CPU to GPU for better performance
+4. **Monitor costs** - Free tier works great for testing, upgrade for production
+## 🆘 Support
+Having issues? Check the logs or open an issue on GitHub.
+---
+Built with [Encore.ts](https://encore.dev) and [Ollama](https://ollama.ai)
+```
+**Save the file**:
+- Notepad: File → Save
+- Nano: Press `Ctrl + O`, then `Enter`, then `Ctrl + X`
+---
+## 🔐 **PART 7: Configure Environment Variables in Space Settings**
+### **Step 7.1: Go to Your Space Settings**
+1. Open your browser
+2. Go to: `https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama/settings`
+3. Scroll down to **"Variables and secrets"** section
+### **Step 7.2: Add Environment Variables**
+Click **"New variable"** for each of these:
+#### **Variable 1: API_KEYS**
+- **Name**: `API_KEYS`
+- **Value**: `my-secret-key-12345,another-key-67890`
+  - ⚠️ **IMPORTANT**: Replace with your own random keys!
+  - Use strong, random strings (20+ characters)
+  - Separate multiple keys with commas (no spaces)
+- Click **"Save"**
+#### **Variable 2: ADMIN_API_KEYS** (Optional but recommended)
+- **Name**: `ADMIN_API_KEYS`
+- **Value**: `admin-super-secret-key-99999`
+  - ⚠️ Make this DIFFERENT from regular API keys
+  - This bypasses rate limits
+- Click **"Save"**
+#### **Variable 3: OLLAMA_MODEL**
+- **Name**: `OLLAMA_MODEL`
+- **Value**: Choose one:
+  - `phi:latest` (Fastest, smallest - 1.3GB - **RECOMMENDED FOR FREE CPU**)
+  - `llama2:latest` (Good quality - 4GB)
+  - `llama3:latest` (Best quality - 4.7GB - needs GPU)
+  - `mistral:latest` (Very good - 4GB)
+- Click **"Save"**
+**Recommendation for FREE tier**: Use `phi:latest`
+#### **Variable 4: OLLAMA_EMBEDDING_MODEL**
+- **Name**: `OLLAMA_EMBEDDING_MODEL`
+- **Value**: `nomic-embed-text`
+  - Leave as is, this works great for RAG
+- Click **"Save"**
+#### **Variable 5: RATE_LIMIT_DEFAULT**
+- **Name**: `RATE_LIMIT_DEFAULT`
+- **Value**: `100`
+  - This means 100 requests per minute for regular API keys
+- Click **"Save"**
+#### **Variable 6: LOG_LEVEL** (Optional)
+- **Name**: `LOG_LEVEL`
+- **Value**: `info`
+- Click **"Save"**
+### **Step 7.3: Verify Your Variables**
+You should now see these variables listed:
+- ✅ `API_KEYS`
+- ✅ `ADMIN_API_KEYS` (if you added it)
+- ✅ `OLLAMA_MODEL`
+- ✅ `OLLAMA_EMBEDDING_MODEL`
+- ✅ `RATE_LIMIT_DEFAULT`
+---
+## 📤 **PART 8: Push Code to Hugging Face**
+Now we'll upload all the files to Hugging Face.
+### **Step 8.1: Configure Git (First Time Only)**
+In your terminal (inside the `ai-api-ollama` folder):
+```bash
+git config user.email "[email protected]"
+git config user.name "Your Name"
+```
+Replace with your actual email and name.
+### **Step 8.2: Add All Files to Git**
+```bash
+git add .
+```
+The `.` means "add all files in this folder"
+### **Step 8.3: Commit the Files**
+```bash
+git commit -m "Initial deployment with Ollama support"
+```
+You should see output like:
+```
+[main abc1234] Initial deployment with Ollama support
+ XX files changed, XXX insertions(+)
+```
+### **Step 8.4: Push to Hugging Face**
+```bash
+git push
+```
+When prompted for credentials:
+- **Username**: Your Hugging Face username
+- **Password**: Your Hugging Face token (starts with `hf_`)
+You'll see:
+```
+Enumerating objects: XX, done.
+Counting objects: 100% (XX/XX), done.
+Writing objects: 100% (XX/XX), XX.XX MiB | XX.XX MiB/s, done.
+```
+✅ **Success!** Your code is now on Hugging Face.
+---
+## ⏳ **PART 9: Wait for Build & Monitor Progress**
+### **Step 9.1: Go to Your Space**
+1. Open browser: `https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama`
+2. You'll see a yellow "Building" status at the top
+### **Step 9.2: Watch the Build Logs**
+1. Click on the **"Logs"** tab (near the top)
+2. You'll see real-time output like:
+   ```
+   Building Docker image...
+   Step 1/15 : FROM node:18-alpine AS builder
+   ...
+   ```
+### **Step 9.3: What to Expect (Timeline)**
+| Time | What's Happening | What You'll See |
+|------|------------------|-----------------|
+| 0-2 min | Docker image building | `Building Docker image...` |
+| 2-5 min | Installing Node dependencies | `npm install...` |
+| 5-8 min | Installing Ollama | `Installing Ollama...` |
+| 8-10 min | Starting services | `Starting Ollama...` |
+| 10-15 min | **Downloading Ollama model** | `Pulling model: phi:latest` ⏳ **LONGEST STEP** |
+| 15+ min | Warming up model | `Warming up model...` |
+| Final | **Space is RUNNING** | 🟢 Green "Running" status |
+**Total time**: 15-20 minutes for first deployment
+### **Step 9.4: Troubleshooting Build Errors**
+If you see **red error messages**:
+**Common Error 1**: `npm install failed`
+- **Fix**: Check that `package.json` was copied correctly
+- Re-run: `git add package.json && git commit -m "fix package.json" && git push`
+**Common Error 2**: `Port 7860 already in use`
+- **Fix**: This shouldn't happen, but if it does, check README.md has `app_port: 7860`
+**Common Error 3**: `Model download timeout`
+- **Fix**: Use a smaller model like `phi:latest` in environment variables
+- Or upgrade to GPU hardware (see Part 10)
+**Common Error 4**: `Out of memory`
+- **Fix**: Model too big for free CPU. Use `phi:latest` or upgrade to paid tier
+### **Step 9.5: Verify Space is Running**
+When build completes:
+1. Status changes to 🟢 **"Running"**
+2. You'll see in logs: `Starting AI API Service on port 7860...`
+3. **Your API is now LIVE!**
+---
+## 🎉 **PART 10: Test Your Live API**
+### **Step 10.1: Get Your Space URL**
+Your API is available at:
+```
+https://YOUR_USERNAME-ai-api-ollama.hf.space
+```
+**Example**:
+```
+https://johndoe-ai-api-ollama.hf.space
+```
+### **Step 10.2: Test Health Endpoint**
+**Option A: Use Browser**
+1. Open your browser
+2. Go to: `https://YOUR_USERNAME-ai-api-ollama.hf.space/health`
+3. You should see JSON like:
+   ```json
+   {
+     "status": "healthy",
+     "version": "1.0.0",
+     "services": [...]
+   }
+   ```
+✅ If you see this, your API is working!
+**Option B: Use Command Line**
+```bash
+curl https://YOUR_USERNAME-ai-api-ollama.hf.space/health
+```
+### **Step 10.3: Test Chat Endpoint**
+**Copy this command** (replace YOUR_USERNAME and use one of your API keys):
+```bash
+curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/ai/chat \
+  -H "Authorization: Bearer my-secret-key-12345" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "conversation": [
+      {
+        "role": "user",
+        "content": "Hello! Can you explain what you are in one sentence?"
+      }
+    ]
+  }'
+```
+**Expected response** (takes 5-30 seconds for first request):
+```json
+{
+  "reply": "I am an AI assistant powered by Llama, designed to help answer questions...",
+  "model": "llama2",
+  "usage": {
+    "prompt_tokens": 25,
+    "completion_tokens": 50,
+    "total_tokens": 75
+  },
+  "sources": null
+}
+```
+✅ **Success!** Your AI API is working!
+### **Step 10.4: Test RAG Endpoint (Optional)**
+First, upload a document:
+```bash
+# Create a test document
+echo "The AI API Service is a production-ready API for chatbots. It supports Ollama, OpenAI, and HuggingFace." > test.txt
+# Convert to base64
+base64 test.txt > test.txt.b64
+# Upload (Mac/Linux)
+curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/upload \
+  -H "Authorization: Bearer my-secret-key-12345" \
+  -H "Content-Type: application/json" \
+  -d "{
+    \"filename\": \"test.txt\",
+    \"content_base64\": \"$(cat test.txt.b64)\",
+    \"metadata\": {\"title\": \"Test Document\"}
+  }"
+```
+Then query it:
+```bash
+curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/rag/query \
+  -H "Authorization: Bearer my-secret-key-12345" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "What does the API support?",
+    "top_k": 3
+  }'
+```
+---
+## 📊 **PART 11: Monitor and Optimize (Optional)**
+### **Step 11.1: Check Metrics**
+```bash
+curl https://YOUR_USERNAME-ai-api-ollama.hf.space/metrics \
+  -H "Authorization: Bearer my-secret-key-12345"
+```
+You'll see:
+- Total requests
+- Errors
+- Response times
+- Model usage
+### **Step 11.2: Upgrade Hardware (If Needed)**
+If your Space is slow or timing out:
+1. Go to: `https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama/settings`
+2. Scroll to **"Space hardware"**
+3. Click **"Change hardware"**
+4. Select:
+   - **CPU upgrade** ($0.60/hr) - 2x faster than free
+   - **GPU T4** ($0.60/hr) - 10x faster, supports bigger models
+   - **GPU A10G** ($3.15/hr) - Best performance
+5. Click **"Update Space"**
+6. Space will restart with new hardware (~5 minutes)
+### **Step 11.3: Use Bigger Models**
+Once you have GPU:
+1. Go to Settings → Variables and secrets
+2. Edit `OLLAMA_MODEL`
+3. Change to: `llama3:latest` or `mistral:latest`
+4. Save
+5. Space will restart and download new model
+---
+## 🔒 **PART 12: Security Best Practices**
+### **Step 12.1: Change Default API Keys**
+**⚠️ CRITICAL FOR PRODUCTION**
+1. Go to Space Settings → Variables
+2. Edit `API_KEYS`
+3. Replace `demo-key-1` with strong random keys:
+   ```
+   ak_live_a8f7d9e2c1b4f5a7d8e9c2b1a5f7,ak_live_b9c2d1e3f4a5b7c8d9e1f2a3b5
+   ```
+4. **Never share these keys publicly!**
+### **Step 12.2: Make Space Private (Optional)**
+1. Go to: `https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama/settings`
+2. Scroll to **"Rename or change repo visibility"**
+3. Click **"Make private"**
+4. Confirm
+Now only you can see the Space, but the API still works for anyone with the URL and API key.
+### **Step 12.3: Monitor Usage**
+Check logs regularly:
+1. Go to Space → Logs tab
+2. Look for suspicious activity:
+   - Many failed authentication attempts
+   - Unusually high request volume
+   - Error patterns
+---
+## 🎯 **PART 13: Using Your API in Applications**
+### **Example: JavaScript/TypeScript Web App**
+```javascript
+// Save as: app.js
+const API_URL = 'https://YOUR_USERNAME-ai-api-ollama.hf.space';
+const API_KEY = 'my-secret-key-12345'; // Your actual key
+async function chat(message) {
+  const response = await fetch(`${API_URL}/ai/chat`, {
+    method: 'POST',
+    headers: {
+      'Authorization': `Bearer ${API_KEY}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      conversation: [
+        { role: 'user', content: message }
+      ]
+    })
+  });
+  const data = await response.json();
+  return data.reply;
+}
+// Usage
+chat('Hello!').then(reply => {
+  console.log('AI:', reply);
+});
+```
+### **Example: Python Application**
+```python
+# Save as: app.py
+import requests
+API_URL = 'https://YOUR_USERNAME-ai-api-ollama.hf.space'
+API_KEY = 'my-secret-key-12345'
+def chat(message):
+    response = requests.post(
+        f'{API_URL}/ai/chat',
+        headers={
+            'Authorization': f'Bearer {API_KEY}',
+            'Content-Type': 'application/json'
+        },
+        json={
+            'conversation': [
+                {'role': 'user', 'content': message}
+            ]
+        }
+    )
+    return response.json()['reply']
+# Usage
+reply = chat('Hello!')
+print(f'AI: {reply}')
+```
+### **Example: Mobile App (React Native)**
+```javascript
+// Save as: ChatService.js
+const API_URL = 'https://YOUR_USERNAME-ai-api-ollama.hf.space';
+const API_KEY = 'my-secret-key-12345';
+export async function sendMessage(message) {
+  try {
+    const response = await fetch(`${API_URL}/ai/chat`, {
+      method: 'POST',
+      headers: {
+        'Authorization': `Bearer ${API_KEY}`,
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify({
+        conversation: [
+          { role: 'user', content: message }
+        ]
+      })
+    });
+    if (!response.ok) {
+      throw new Error('API request failed');
+    }
+    const data = await response.json();
+    return data.reply;
+  } catch (error) {
+    console.error('Chat error:', error);
+    throw error;
+  }
+}
+```
+---
+## 🆘 **PART 14: Troubleshooting Common Issues**
+### **Issue 1: "Space is building for too long"**
+**Symptoms**: Build takes 30+ minutes
+**Causes**:
+- Large model download (llama3 is 4.7GB)
+- Slow internet on Hugging Face servers
+- Free tier resource limits
+**Solutions**:
+1. Use smaller model: `phi:latest` (1.3GB)
+2. Upgrade to GPU hardware for faster downloads
+3. Wait patiently - first build is always slow
+---
+### **Issue 2: "Space crashed / Runtime error"**
+**Symptoms**: Red "Runtime error" status
+**Check logs for**:
+**Error**: `Out of memory`
+- **Fix**: Model too big for hardware
+- **Solution**: Use `phi:latest` or upgrade to GPU T4
+**Error**: `Port 7860 already in use`
+- **Fix**: Check README.md has correct `app_port: 7860`
+- **Solution**: Edit README.md and push again
+**Error**: `Ollama failed to start`
+- **Fix**: Dockerfile issue
+- **Solution**: Verify Dockerfile was renamed correctly
+---
+### **Issue 3: "API returns 401 Unauthorized"**
+**Symptoms**:
+```json
+{"error": "Invalid API key"}
+```
+**Solutions**:
+1. **Check your Authorization header**:
+   ```bash
+   # Correct format:
+   -H "Authorization: Bearer my-secret-key-12345"
+   # NOT:
+   -H "Authorization: my-secret-key-12345"  # Missing "Bearer"
+   ```
+2. **Verify API key is in Space settings**:
+   - Go to Settings → Variables
+   - Check `API_KEYS` contains your key
+   - Keys are case-sensitive!
+3. **Try the default key**:
+   ```bash
+   -H "Authorization: Bearer demo-key-1"
+   ```
+---
+### **Issue 4: "API is very slow (30+ seconds)"**
+**Causes**:
+- First request loads model into memory (normal)
+- Free CPU tier is slow
+- Model is too large for hardware
+**Solutions**:
+1. **First request is always slow** - subsequent requests are fast
+2. **Upgrade to GPU T4**:
+   - Settings → Space hardware → GPU T4
+   - 10x faster inference
+3. **Use smaller model**: `phi:latest`
+4. **Add model warmup** (already in Dockerfile):
+   - Keeps model loaded
+   - Reduces cold start time
+---
+### **Issue 5: "Cannot upload documents"**
+**Error**: `File too large`
+**Fix**:
+- Default max size is 10MB
+- To increase, add environment variable:
+  ```
+  MAX_FILE_SIZE_MB=50
+  ```
+**Error**: `Invalid file format`
+**Fix**:
+- Only supports: PDF, DOCX, TXT
+- Ensure file extension is correct
+- Check file is not corrupted
+---
+### **Issue 6: "RAG returns no results"**
+**Symptoms**: Empty `sources` array in response
+**Causes**:
+1. No documents uploaded yet
+2. Query doesn't match document content
+3. Embedding model not loaded
+**Solutions**:
+1. **Upload a document first**:
+   ```bash
+   curl -X POST https://YOUR_API/upload \
+     -H "Authorization: Bearer YOUR_KEY" \
+     -F "[email protected]"
+   ```
+2. **Wait for processing** (check logs):
+   ```
+   Document processed successfully: doc_abc123
+   ```
+3. **Try broader query**:
+   - Instead of: "What is the exact price?"
+   - Try: "pricing information"
+---
+### **Issue 7: "How do I see errors?"**
+**Steps**:
+1. Go to your Space
+2. Click **"Logs"** tab
+3. Look for lines with:
+   ```
+   "level": "error"
+   ```
+4. Read the `"message"` field
+**Common errors and fixes**:
+```json
+{"level":"error","message":"Invalid API key"}
+```
+→ Fix: Check Authorization header
+```json
+{"level":"error","message":"Rate limit exceeded"}
+```
+→ Fix: Wait 60 seconds or use admin key
+```json
+{"level":"error","message":"Ollama API error"}
+```
+→ Fix: Model not loaded, wait for startup to complete
+---
+### **Issue 8: "Space keeps restarting"**
+**Symptoms**: Status alternates between Building and Running
+**Causes**:
+- Application crashes on startup
+- Out of memory
+- Port configuration issue
+**Debug steps**:
+1. Check logs for crash reason
+2. Verify environment variables are set
+3. Try smaller model
+4. Contact Hugging Face support if persistent
+---
+## 📖 **PART 15: Complete API Reference**
+### **Base URL**
+```
+https://YOUR_USERNAME-ai-api-ollama.hf.space
+```
+### **Authentication**
+All endpoints (except `/health`) require:
+```
+Authorization: Bearer YOUR_API_KEY
+```
+---
+### **1. Health Check**
+**Endpoint**: `GET /health`
+**No authentication required**
+**Example**:
+```bash
+curl https://YOUR_API/health
+```
+**Response**:
+```json
+{
+  "status": "healthy",
+  "version": "1.0.0",
+  "services": [
+    {"name": "llm", "status": "up"},
+    {"name": "vector_db", "status": "up"}
+  ],
+  "uptime_seconds": 3600
+}
+```
+---
+### **2. Metrics**
+**Endpoint**: `GET /metrics`
+**Requires authentication**
+**Example**:
+```bash
+curl https://YOUR_API/metrics \
+  -H "Authorization: Bearer YOUR_KEY"
+```
+**Response**:
+```json
+{
+  "timestamp": 1698765432000,
+  "requests_total": 150,
+  "requests_by_endpoint": {
+    "/ai/chat": 100,
+    "/rag/query": 50
+  },
+  "errors_total": 5,
+  "rate_limit_hits": 2,
+  "average_response_time_ms": 1250
+}
+```
+---
+### **3. Simple Chat**
+**Endpoint**: `POST /ai/chat`
+**Request**:
+```json
+{
+  "conversation": [
+    {"role": "user", "content": "Hello!"}
+  ],
+  "model": "llama2",
+  "options": {
+    "temperature": 0.7,
+    "max_tokens": 500
+  }
+}
+```
+**Response**:
+```json
+{
+  "reply": "Hello! How can I help you today?",
+  "model": "llama2",
+  "usage": {
+    "prompt_tokens": 10,
+    "completion_tokens": 20,
+    "total_tokens": 30
+  },
+  "sources": null
+}
+```
+**Example**:
+```bash
+curl -X POST https://YOUR_API/ai/chat \
+  -H "Authorization: Bearer YOUR_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "conversation": [
+      {"role": "user", "content": "Explain AI in one sentence"}
+    ]
+  }'
+```
+---
+### **4. Multi-turn Conversation**
+**Endpoint**: `POST /ai/chat`
+**Request** (with context):
+```json
+{
+  "conversation": [
+    {"role": "user", "content": "What is 2+2?"},
+    {"role": "assistant", "content": "2+2 equals 4."},
+    {"role": "user", "content": "What about 2+3?"}
+  ]
+}
+```
+**Response**:
+```json
+{
+  "reply": "2+3 equals 5.",
+  "model": "llama2",
+  "usage": {...}
+}
+```
+---
+### **5. RAG Query**
+**Endpoint**: `POST /rag/query`
+**Request**:
+```json
+{
+  "query": "What are the main features?",
+  "top_k": 5,
+  "model": "llama2",
+  "use_retrieval": true
+}
+```
+**Response**:
+```json
+{
+  "answer": "The main features include...",
+  "sources": [
+    {
+      "doc_id": "doc_123",
+      "chunk_id": "chunk_5",
+      "content": "Feature description...",
+      "score": 0.92,
+      "metadata": {"title": "Documentation"}
+    }
+  ],
+  "model": "llama2",
+  "usage": {...},
+  "retrieval_time_ms": 250
+}
+```
+**Example**:
+```bash
+curl -X POST https://YOUR_API/rag/query \
+  -H "Authorization: Bearer YOUR_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "What is machine learning?",
+    "top_k": 3
+  }'
+```
+---
+### **6. Upload Document**
+**Endpoint**: `POST /upload`
+**Request**:
+```json
+{
+  "filename": "document.txt",
+  "content_base64": "VGhpcyBpcyBhIHRlc3Q=",
+  "metadata": {
+    "title": "Test Document",
+    "category": "docs"
+  }
+}
+```
+**Response**:
+```json
+{
+  "doc_id": "doc_abc123",
+  "filename": "document.txt",
+  "size_bytes": 1024,
+  "status": "processing",
+  "estimated_chunks": 5
+}
+```
+**Example (Linux/Mac)**:
+```bash
+# Encode file to base64
+base64 document.txt > document.b64
+# Upload
+curl -X POST https://YOUR_API/upload \
+  -H "Authorization: Bearer YOUR_KEY" \
+  -H "Content-Type: application/json" \
+  -d "{
+    \"filename\": \"document.txt\",
+    \"content_base64\": \"$(cat document.b64)\",
+    \"metadata\": {\"title\": \"My Document\"}
+  }"
+```
+---
+### **7. Get Document Sources**
+**Endpoint**: `GET /docs/:id/sources`
+**Example**:
+```bash
+curl https://YOUR_API/docs/doc_abc123/sources \
+  -H "Authorization: Bearer YOUR_KEY"
+```
+**Response**:
+```json
+{
+  "sources": [
+    {
+      "doc_id": "doc_abc123",
+      "chunk_id": "chunk_0",
+      "content": "This is the first chunk...",
+      "score": 1.0,
+      "metadata": {...}
+    }
+  ]
+}
+```
+---
+### **8. Simple Query**
+**Endpoint**: `GET /ai/query?q=QUESTION`
+**Example**:
+```bash
+curl "https://YOUR_API/ai/query?q=What+is+AI" \
+  -H "Authorization: Bearer YOUR_KEY"
+```
+**Response**:
+```json
+{
+  "answer": "AI stands for Artificial Intelligence...",
+  "model": "llama2"
+}
+```
+---
+### **9. Get Available Models**
+**Endpoint**: `GET /rag/models`
+**Example**:
+```bash
+curl https://YOUR_API/rag/models \
+  -H "Authorization: Bearer YOUR_KEY"
+```
+**Response**:
+```json
+{
+  "models": ["ollama", "llama", "llama2", "llama3", "mistral"],
+  "default_model": "llama2"
+}
+```
+---
+## 🎓 **PART 16: Advanced Tips & Tricks**
+### **Tip 1: Optimize Response Time**
+**Add warmup requests** to keep model in memory:
+Create a simple cron job or scheduled task:
+```bash
+# Every 5 minutes, make a request to keep model loaded
+*/5 * * * * curl -X POST https://YOUR_API/ai/chat \
+  -H "Authorization: Bearer YOUR_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"conversation":[{"role":"user","content":"ping"}]}'
+```
+---
+### **Tip 2: Use System Prompts for Consistency**
+```bash
+curl -X POST https://YOUR_API/ai/chat \
+  -H "Authorization: Bearer YOUR_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "conversation": [
+      {
+        "role": "system",
+        "content": "You are a friendly customer support agent. Be helpful and concise."
+      },
+      {
+        "role": "user",
+        "content": "How do I reset my password?"
+      }
+    ]
+  }'
+```
+---
+### **Tip 3: Batch Document Upload**
+Upload multiple documents efficiently:
+```bash
+# Create script: batch_upload.sh
+for file in docs/*.txt; do
+  echo "Uploading $file..."
+  base64 "$file" > temp.b64
+  curl -X POST https://YOUR_API/upload \
+    -H "Authorization: Bearer YOUR_KEY" \
+    -H "Content-Type: application/json" \
+    -d "{
+      \"filename\": \"$(basename $file)\",
+      \"content_base64\": \"$(cat temp.b64)\"
+    }"
+  sleep 2  # Rate limiting
+done
+rm temp.b64
+```
+---
+### **Tip 4: Monitor Costs**
+If using paid hardware:
+1. Check Hugging Face billing: https://huggingface.co/settings/billing
+2. Set up budget alerts
+3. Monitor Space uptime
+4. Pause Space when not in use:
+   - Settings → "Pause Space"
+   - Saves money, stops billing
+   - Resume anytime
+---
+### **Tip 5: Create API Key Tiers**
+**In Space Settings**, set up different keys for different users:
+```
+# Free tier - limited rate
+API_KEYS=free_user_key_1,free_user_key_2
+# Premium tier - higher rate
+PREMIUM_API_KEYS=premium_user_key_1
+# Admin tier - unlimited
+ADMIN_API_KEYS=admin_key_1
+```
+Then adjust rate limits:
+```
+RATE_LIMIT_DEFAULT=60
+RATE_LIMIT_PREMIUM=300
+RATE_LIMIT_ADMIN=10000
+```
+---
+## ✅ **Final Checklist**
+Before going live, verify:
+- [ ] Space is running (green status)
+- [ ] Health check returns `"status": "healthy"`
+- [ ] Chat endpoint responds correctly
+- [ ] Changed default API keys to strong random strings
+- [ ] Tested with your own API key
+- [ ] Documented your API keys securely (password manager)
+- [ ] Set appropriate rate limits
+- [ ] Chose right model for your hardware
+- [ ] Tested all endpoints you plan to use
+- [ ] Reviewed logs for errors
+- [ ] (Optional) Upgraded hardware if needed
+- [ ] (Optional) Made Space private if needed
+---
+## 🎉 **Congratulations!**
+You now have:
+✅ A fully functional AI API running on Hugging Face Spaces
+✅ Powered by Ollama (no OpenAI costs!)
+✅ Accessible from anywhere via HTTPS
+✅ Secure with API key authentication
+✅ Ready to integrate into your apps
+**Your API URL**:
+```
+https://YOUR_USERNAME-ai-api-ollama.hf.space
+```
+**Share your API** (securely):
+- Give URL + API key to developers
+- Use in web apps, mobile apps, scripts
+- Process millions of requests
+- Scale as needed
+---
+## 📞 **Need Help?**
+**If you're stuck**:
+1. ✅ Re-read the relevant section
+2. ✅ Check Space logs for errors
+3. ✅ Try the troubleshooting section
+4. ✅ Open an issue on GitHub
+5. ✅ Ask on Hugging Face forums
+**Common beginner mistakes**:
+- Forgot to rename `Dockerfile.huggingface` to `Dockerfile`
+- Used wrong API key format (missing "Bearer")
+- Chose model too large for hardware
+- Didn't wait for initial model download
+---
+## 📚 **What's Next?**
+Now that your API is live:
+1. **Build a chat interface**:
+   - React app
+   - Vue app
+   - Mobile app
+   - WordPress plugin
+2. **Add more features**:
+   - User accounts
+   - Usage analytics
+   - Custom models
+   - Advanced RAG
+3. **Scale up**:
+   - Upgrade hardware
+   - Add caching
+   - Load balancing
+   - CDN
+4. **Monetize** (optional):
+   - Charge for API access
+   - Offer different tiers
+   - White-label for clients
+---
+**You did it! 🎉🚀**
+Your AI-powered API is now live and ready to change the world!

DEPLOYMENT.md ADDED Viewed

	@@ -0,0 +1,435 @@

+# Deployment Guide
+This guide covers deploying the AI API Service to various platforms.
+## Table of Contents
+- [Local Development](#local-development)
+- [Docker Deployment](#docker-deployment)
+- [Encore Cloud](#encore-cloud)
+- [Hugging Face Spaces](#hugging-face-spaces)
+- [AWS Deployment](#aws-deployment)
+- [Google Cloud Platform](#google-cloud-platform)
+- [Azure Deployment](#azure-deployment)
+- [Environment Variables](#environment-variables)
+## Local Development
+### Prerequisites
+- Node.js 18+
+- npm or yarn
+- Encore CLI
+### Steps
+1. **Install Encore CLI**
+```bash
+npm install -g encore
+```
+2. **Install dependencies**
+```bash
+npm install
+```
+3. **Configure environment**
+```bash
+cp .env.example .env
+# Edit .env with your API keys
+```
+4. **Run development server**
+```bash
+encore run
+```
+The API will be available at `http://localhost:8000`
+## Docker Deployment
+### Build and Run Locally
+```bash
+docker-compose up -d
+```
+This starts:
+- API service on port 8000
+- Redis for caching (optional)
+### Build Production Image
+```bash
+docker build -t ai-api-service:latest .
+```
+### Run Production Container
+```bash
+docker run -d \
+  -p 8000:8000 \
+  -e OPENAI_API_KEY=your_key \
+  -e API_KEYS=your_api_keys \
+  --name ai-api \
+  ai-api-service:latest
+```
+## Encore Cloud
+Encore Cloud provides the easiest deployment experience with automatic infrastructure provisioning.
+### Steps
+1. **Install Encore CLI**
+```bash
+npm install -g encore
+```
+2. **Login to Encore**
+```bash
+encore auth login
+```
+3. **Create app (first time)**
+```bash
+encore app create ai-api-service
+```
+4. **Set secrets**
+```bash
+encore secret set OPENAI_API_KEY
+encore secret set HUGGINGFACE_API_KEY
+encore secret set PINECONE_API_KEY
+```
+5. **Deploy**
+```bash
+encore deploy
+```
+Your API will be deployed with:
+- Auto-scaling
+- Load balancing
+- SSL/TLS certificates
+- Monitoring and logs
+- Database backups
+## Hugging Face Spaces
+Deploy as a Docker Space on Hugging Face for easy sharing.
+### Steps
+1. **Create new Space**
+   - Go to https://huggingface.co/new-space
+   - Select "Docker" as SDK
+   - Choose hardware tier (CPU or GPU)
+2. **Clone Space repository**
+```bash
+git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE
+cd YOUR_SPACE
+```
+3. **Copy project files**
+```bash
+cp -r /path/to/ai-api-service/* .
+```
+4. **Create Dockerfile for HF Spaces**
+```dockerfile
+FROM node:18-alpine
+WORKDIR /app
+COPY package*.json ./
+RUN npm ci --only=production
+COPY . .
+ENV PORT=7860
+EXPOSE 7860
+CMD ["npm", "start"]
+```
+5. **Configure secrets in Space settings**
+   - `OPENAI_API_KEY`
+   - `HUGGINGFACE_API_KEY`
+   - `API_KEYS`
+6. **Push to Space**
+```bash
+git add .
+git commit -m "Initial deployment"
+git push
+```
+## AWS Deployment
+### Using AWS ECS (Elastic Container Service)
+1. **Push image to ECR**
+```bash
+aws ecr create-repository --repository-name ai-api-service
+docker build -t ai-api-service .
+aws ecr get-login-password --region us-east-1 | \
+  docker login --username AWS --password-stdin \
+  YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com
+docker tag ai-api-service:latest \
+  YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com/ai-api-service:latest
+docker push YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com/ai-api-service:latest
+```
+2. **Create ECS Task Definition**
+```json
+{
+  "family": "ai-api-service",
+  "networkMode": "awsvpc",
+  "requiresCompatibilities": ["FARGATE"],
+  "cpu": "1024",
+  "memory": "2048",
+  "containerDefinitions": [{
+    "name": "ai-api",
+    "image": "YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com/ai-api-service:latest",
+    "portMappings": [{
+      "containerPort": 8000,
+      "protocol": "tcp"
+    }],
+    "environment": [],
+    "secrets": [{
+      "name": "OPENAI_API_KEY",
+      "valueFrom": "arn:aws:secretsmanager:us-east-1:ACCOUNT:secret:openai-api-key"
+    }]
+  }]
+}
+```
+3. **Create ECS Service with ALB**
+   - Configure Application Load Balancer
+   - Set up target group (port 8000)
+   - Configure auto-scaling
+   - Add health checks
+### Using AWS Lambda (API Gateway)
+For serverless deployment, wrap endpoints with AWS Lambda handlers.
+## Google Cloud Platform
+### Using Cloud Run
+1. **Build and push to GCR**
+```bash
+gcloud builds submit --tag gcr.io/PROJECT_ID/ai-api-service
+gcloud run deploy ai-api-service \
+  --image gcr.io/PROJECT_ID/ai-api-service \
+  --platform managed \
+  --region us-central1 \
+  --allow-unauthenticated \
+  --set-env-vars OPENAI_API_KEY=your_key
+```
+2. **Configure secrets**
+```bash
+echo -n "your_openai_key" | \
+  gcloud secrets create openai-api-key --data-file=-
+gcloud run services update ai-api-service \
+  --update-secrets OPENAI_API_KEY=openai-api-key:latest
+```
+### Using GKE (Kubernetes)
+1. **Create cluster**
+```bash
+gcloud container clusters create ai-api-cluster \
+  --num-nodes=3 \
+  --machine-type=n1-standard-2
+```
+2. **Deploy application**
+```bash
+kubectl apply -f k8s/deployment.yaml
+kubectl apply -f k8s/service.yaml
+kubectl apply -f k8s/ingress.yaml
+```
+## Azure Deployment
+### Using Azure Container Instances
+```bash
+az container create \
+  --resource-group ai-api-rg \
+  --name ai-api-service \
+  --image your-registry.azurecr.io/ai-api-service:latest \
+  --cpu 2 \
+  --memory 4 \
+  --ports 8000 \
+  --environment-variables \
+    PORT=8000 \
+  --secure-environment-variables \
+    OPENAI_API_KEY=your_key \
+    API_KEYS=demo-key-1
+```
+### Using Azure App Service
+1. **Create App Service Plan**
+```bash
+az appservice plan create \
+  --name ai-api-plan \
+  --resource-group ai-api-rg \
+  --is-linux \
+  --sku B1
+```
+2. **Create Web App**
+```bash
+az webapp create \
+  --resource-group ai-api-rg \
+  --plan ai-api-plan \
+  --name ai-api-service \
+  --deployment-container-image-name your-registry.azurecr.io/ai-api-service:latest
+```
+3. **Configure settings**
+```bash
+az webapp config appsettings set \
+  --resource-group ai-api-rg \
+  --name ai-api-service \
+  --settings \
+    [email protected](SecretUri=...)
+```
+## Environment Variables
+### Required Variables
+| Variable | Description | Example |
+|----------|-------------|---------|
+| `API_KEYS` | Comma-separated API keys | `key1,key2,key3` |
+| `OPENAI_API_KEY` | OpenAI API key (or alternative) | `sk-...` |
+### Optional Variables
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `HUGGINGFACE_API_KEY` | HuggingFace API key | - |
+| `ANTHROPIC_API_KEY` | Anthropic API key | - |
+| `PINECONE_API_KEY` | Pinecone vector DB key | - |
+| `RATE_LIMIT_DEFAULT` | Requests/min for default tier | `60` |
+| `RATE_LIMIT_ADMIN` | Requests/min for admin tier | `1000` |
+| `LOG_LEVEL` | Logging level | `info` |
+| `MAX_FILE_SIZE_MB` | Max upload size in MB | `10` |
+### Setting Secrets
+**Encore Cloud:**
+```bash
+encore secret set OPENAI_API_KEY
+```
+**Docker:**
+```bash
+docker run -e OPENAI_API_KEY=your_key ...
+```
+**Kubernetes:**
+```bash
+kubectl create secret generic api-secrets \
+  --from-literal=OPENAI_API_KEY=your_key
+```
+**AWS Secrets Manager:**
+```bash
+aws secretsmanager create-secret \
+  --name openai-api-key \
+  --secret-string your_key
+```
+## Monitoring
+### Health Checks
+Configure health check endpoint:
+```
+GET /health
+```
+Expected response:
+```json
+{
+  "status": "healthy",
+  "version": "1.0.0",
+  "services": [...]
+}
+```
+### Metrics
+Access metrics at:
+```
+GET /metrics
+```
+### Logging
+Logs are output as structured JSON:
+```json
+{
+  "timestamp": "2025-10-01T12:00:00Z",
+  "level": "info",
+  "message": "Request processed",
+  "duration_ms": 245
+}
+```
+## Scaling Recommendations
+### Horizontal Scaling
+- Start with 2-3 replicas
+- Auto-scale based on CPU (70% threshold)
+- Use load balancer for distribution
+### Vertical Scaling
+- Minimum: 1 CPU, 2GB RAM
+- Recommended: 2 CPU, 4GB RAM
+- High traffic: 4 CPU, 8GB RAM
+### Database Scaling
+- Use Pinecone for production vector storage
+- Implement Redis for caching
+- Consider read replicas for high traffic
+## Troubleshooting
+### Common Issues
+**"No LLM adapter available"**
+- Check that at least one API key is set (OpenAI, HuggingFace, or Anthropic)
+**"Rate limit exceeded"**
+- Increase rate limits in environment variables
+- Use admin API key for testing
+**"Vector DB connection failed"**
+- Service falls back to in-memory storage
+- Check Pinecone credentials
+**High latency**
+- Enable caching (Redis)
+- Use closer region for APIs
+- Optimize model selection
+## Support
+For deployment assistance:
+- GitHub Issues
+- Documentation at docs/
+- Community Discord

DEVELOPMENT.md ADDED Viewed

	@@ -0,0 +1,106 @@

+# Getting Started
+This project consists of an Encore application. Follow the steps below to get the app running locally.
+## Prerequisites
+If this is your first time using Encore, you need to install the CLI that runs the local development environment. Use the appropriate command for your system:
+- **macOS:** `brew install encoredev/tap/encore`
+- **Linux:** `curl -L https://encore.dev/install.sh | bash`
+- **Windows:** `iwr https://encore.dev/install.ps1 | iex`
+You also need to have bun installed for package management. If you don't have bun installed, you can install it by running:
+```bash
+npm install -g bun
+```
+## Running the Application
+### Backend Setup
+1. Navigate to the backend directory:
+   ```bash
+   cd backend
+   ```
+2. Start the Encore development server:
+   ```bash
+   encore run
+   ```
+The backend will be available at the URL shown in your terminal (typically `http://localhost:4000`).
+## Deployment
+### Self-hosting
+See the [self-hosting instructions](https://encore.dev/docs/self-host/docker-build) for how to use encore build docker to create a Docker image and
+configure it.
+### Encore Cloud Platform
+#### Step 1: Login to your Encore Cloud Account
+Before deploying, ensure you have authenticated the Encore CLI with your Encore account (same as your Leap account)
+```bash
+encore auth login
+```
+#### Step 2: Set Up Git Remote
+Add Encore's git remote to enable direct deployment:
+```bash
+git remote add encore encore://scalable-ai-api-service-ysyi
+```
+#### Step 3: Deploy Your Application
+Deploy by pushing your code:
+```bash
+git add -A .
+git commit -m "Deploy to Encore Cloud"
+git push encore
+```
+Monitor your deployment progress in the [Encore Cloud dashboard](https://app.encore.dev/scalable-ai-api-service-ysyi/deploys).
+## GitHub Integration (Recommended for Production)
+For production applications, we recommend integrating with GitHub instead of using Encore's managed git:
+### Connecting Your GitHub Account
+1. Open your app in the **Encore Cloud dashboard**
+2. Navigate to Encore Cloud [GitHub Integration settings](https://app.encore.cloud/scalable-ai-api-service-ysyi/settings/integrations/github)
+3. Click **Connect Account to GitHub**
+4. Grant access to your repository
+Once connected, pushing to your GitHub repository will automatically trigger deployments. Encore Cloud Pro users also get Preview Environments for each pull request.
+### Deploy via GitHub
+After connecting GitHub, deploy by pushing to your repository:
+```bash
+git add -A .
+git commit -m "Deploy via GitHub"
+git push origin main
+```
+## Additional Resources
+- [Encore Documentation](https://encore.dev/docs)
+- [Deployment Guide](https://encore.dev/docs/platform/deploy/deploying)
+- [GitHub Integration](https://encore.dev/docs/platform/integrations/github)
+- [Encore Cloud Dashboard](https://app.encore.dev)

Dockerfile ADDED Viewed

	@@ -0,0 +1,74 @@

+FROM node:18-alpine AS builder
+WORKDIR /app
+COPY package*.json ./
+RUN npm ci
+COPY . .
+RUN npm run build || echo "Build will happen on startup"
+FROM node:18
+WORKDIR /app
+RUN apt-get update && apt-get install -y curl && \
+    curl -fsSL https://ollama.com/install.sh | sh && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+COPY --from=builder /app ./
+RUN npm ci --only=production
+ENV PORT=7860
+ENV NODE_ENV=production
+ENV OLLAMA_BASE_URL=http://localhost:11434
+ENV OLLAMA_MODEL=llama2
+ENV OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+ENV API_KEYS=demo-key-1,demo-key-2
+ENV RATE_LIMIT_DEFAULT=100
+ENV RATE_LIMIT_ADMIN=1000
+ENV LOG_LEVEL=info
+ENV ENABLE_BACKGROUND_WORKERS=true
+ENV OLLAMA_MODELS=/data/ollama-models
+EXPOSE 7860
+RUN echo '#!/bin/bash\n\
+set -e\n\
+\n\
+echo "=== Starting AI API Service with Ollama ==="\n\
+\n\
+ollama serve &\n\
+OLLAMA_PID=$!\n\
+echo "Ollama started with PID $OLLAMA_PID"\n\
+\n\
+echo "Waiting for Ollama to be ready..."\n\
+for i in {1..30}; do\n\
+  if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then\n\
+    echo "Ollama is ready!"\n\
+    break\n\
+  fi\n\
+  echo "Waiting... ($i/30)"\n\
+  sleep 2\n\
+done\n\
+\n\
+echo "Pulling Ollama model: $OLLAMA_MODEL"\n\
+ollama pull $OLLAMA_MODEL || echo "Warning: Model pull failed, will retry on first request"\n\
+\n\
+if [ "$OLLAMA_EMBEDDING_MODEL" != "$OLLAMA_MODEL" ]; then\n\
+  echo "Pulling embedding model: $OLLAMA_EMBEDDING_MODEL"\n\
+  ollama pull $OLLAMA_EMBEDDING_MODEL || echo "Warning: Embedding model pull failed"\n\
+fi\n\
+\n\
+echo "Warming up model..."\n\
+timeout 30s ollama run $OLLAMA_MODEL "Hi" > /dev/null 2>&1 || echo "Warmup completed"\n\
+\n\
+echo "Starting AI API Service on port $PORT..."\n\
+echo "Available models: $(ollama list)"\n\
+\n\
+exec node .encore/build/backend/main.js || exec npm start\n\
+' > /app/start.sh && chmod +x /app/start.sh
+VOLUME /data
+CMD ["/app/start.sh"]

HUGGINGFACE_OLLAMA_DEPLOY.md ADDED Viewed

	@@ -0,0 +1,423 @@

+# Deploying AI API Service to Hugging Face Spaces with Ollama
+This guide shows you how to deploy the AI API service to Hugging Face Spaces using Ollama as your LLM backend (no API keys needed!).
+## Why Ollama on Hugging Face Spaces?
+✅ **No API costs** - Run models locally in your Space
+✅ **Privacy** - Data stays within your Space
+✅ **Model choice** - Use Llama 2, Llama 3, Mistral, Phi, Gemma, etc.
+✅ **No rate limits** - Only limited by Space hardware
+✅ **Full control** - Customize models and parameters
+## Prerequisites
+- Hugging Face account (free)
+- Basic knowledge of Git
+## Step-by-Step Deployment
+### 1. Create a New Space
+1. Go to https://huggingface.co/new-space
+2. Choose:
+   - **Name**: `ai-api-ollama` (or your preferred name)
+   - **License**: MIT
+   - **SDK**: Docker
+   - **Hardware**:
+     - **CPU Basic (free)**: Works for small models (phi, gemma:2b)
+     - **CPU Upgrade ($0.60/hr)**: Better for medium models (llama2, mistral)
+     - **GPU T4 ($0.60/hr)**: Recommended for fast inference
+     - **GPU A10G ($3.15/hr)**: For large models (llama3:70b)
+3. Click **Create Space**
+### 2. Clone Your Space Repository
+```bash
+git clone https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama
+cd ai-api-ollama
+```
+### 3. Copy Project Files
+Copy all files from this project to your Space directory:
+```bash
+# From the ai-api-service directory
+cp -r backend examples tests *.md *.json *.yml .dockerignore .env.example ../ai-api-ollama/
+```
+### 4. Create Hugging Face Space Dockerfile
+Create a new `Dockerfile` optimized for Hugging Face Spaces with Ollama:
+```dockerfile
+FROM node:18-alpine AS builder
+WORKDIR /app
+# Copy package files
+COPY package*.json ./
+RUN npm ci
+# Copy source code
+COPY . .
+# Build the application
+RUN npm run build || echo "Build step skipped - Encore will build on startup"
+# Production stage with Ollama
+FROM node:18
+WORKDIR /app
+# Install Ollama
+RUN curl -fsSL https://ollama.com/install.sh | sh
+# Copy built application
+COPY --from=builder /app ./
+# Install production dependencies
+RUN npm ci --only=production
+# Set environment variables for Hugging Face Spaces
+ENV PORT=7860
+ENV OLLAMA_BASE_URL=http://localhost:11434
+ENV OLLAMA_MODEL=llama2
+ENV OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+ENV API_KEYS=demo-key-1,demo-key-2
+ENV RATE_LIMIT_DEFAULT=60
+ENV RATE_LIMIT_ADMIN=1000
+ENV LOG_LEVEL=info
+ENV ENABLE_BACKGROUND_WORKERS=true
+EXPOSE 7860
+# Create startup script
+RUN echo '#!/bin/bash\n\
+# Start Ollama in background\n\
+ollama serve &\n\
+OLLAMA_PID=$!\n\
+\n\
+# Wait for Ollama to start\n\
+echo "Waiting for Ollama to start..."\n\
+sleep 5\n\
+\n\
+# Pull the model\n\
+echo "Pulling Ollama model: $OLLAMA_MODEL"\n\
+ollama pull $OLLAMA_MODEL || echo "Model pull failed, will try on first request"\n\
+\n\
+# Pull embedding model if different\n\
+if [ "$OLLAMA_EMBEDDING_MODEL" != "$OLLAMA_MODEL" ]; then\n\
+  echo "Pulling embedding model: $OLLAMA_EMBEDDING_MODEL"\n\
+  ollama pull $OLLAMA_EMBEDDING_MODEL || echo "Embedding model pull failed"\n\
+fi\n\
+\n\
+# Start the API service\n\
+echo "Starting AI API Service on port $PORT..."\n\
+node .encore/build/backend/main.js || npm start\n\
+' > /app/start.sh && chmod +x /app/start.sh
+CMD ["/app/start.sh"]
+```
+### 5. Configure Environment Variables in Space Settings
+In your Space settings on Hugging Face:
+1. Go to **Settings** → **Variables and secrets**
+2. Add these environment variables:
+| Variable | Value | Description |
+|----------|-------|-------------|
+| `API_KEYS` | `your-secret-key-here` | Comma-separated API keys for authentication |
+| `ADMIN_API_KEYS` | `admin-key-here` | Admin-level API keys (optional) |
+| `OLLAMA_MODEL` | `llama2` | Default: llama2, or use llama3, mistral, phi, gemma |
+| `OLLAMA_EMBEDDING_MODEL` | `nomic-embed-text` | Embedding model for RAG |
+| `RATE_LIMIT_DEFAULT` | `100` | Requests per minute for default users |
+**Recommended Models by Hardware:**
+| Hardware | Recommended Model | Speed | Quality |
+|----------|------------------|-------|---------|
+| CPU Basic | `phi:latest` or `gemma:2b` | Fast | Good |
+| CPU Upgrade | `llama2:latest` or `mistral:latest` | Medium | Better |
+| GPU T4 | `llama3:latest` | Fast | Excellent |
+| GPU A10G | `llama3:70b` | Medium | Best |
+### 6. Create README.md for Your Space
+Create a `README.md` in your Space root:
+```markdown
+---
+title: AI API Service with Ollama
+emoji: 🤖
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+---
+# AI API Service with Ollama
+Production-ready AI API with chat, RAG, image generation, and voice synthesis.
+## Features
+- 💬 Multi-turn chat conversations
+- 📚 RAG (Retrieval-Augmented Generation)
+- 🖼️ Image generation
+- 🎙️ Voice synthesis
+- 📄 Document ingestion
+- 🔒 API key authentication
+- ⚡ Rate limiting
+## Quick Start
+### API Documentation
+Base URL: `https://YOUR_USERNAME-ai-api-ollama.hf.space`
+### Example Request
+```bash
+curl -X POST https://YOUR_USERNAME-ai-api-ollama.hf.space/ai/chat \
+  -H "Authorization: Bearer demo-key-1" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "conversation": [
+      {"role": "user", "content": "Hello! How are you?"}
+    ]
+  }'
+```
+### Available Endpoints
+- `GET /health` - Health check
+- `POST /ai/chat` - Chat conversation
+- `POST /rag/query` - Query with retrieval
+- `POST /image/generate` - Generate images
+- `POST /voice/synthesize` - Text to speech
+- `POST /upload` - Upload documents
+See full API documentation in the repository.
+## Using Your Own API Key
+Replace `demo-key-1` with your configured API key from Space settings.
+## Local Development
+See [QUICKSTART.md](QUICKSTART.md) for local setup instructions.
+```
+### 7. Push to Hugging Face
+```bash
+git add .
+git commit -m "Initial deployment with Ollama"
+git push
+```
+### 8. Wait for Build
+- Hugging Face will automatically build your Docker image
+- This takes 5-10 minutes for first build
+- Watch the **Logs** tab for progress
+- Initial startup will download the Ollama model (2-5 minutes depending on model size)
+### 9. Test Your Deployment
+Once the Space is running:
+```bash
+# Replace YOUR_USERNAME with your Hugging Face username
+SPACE_URL="https://YOUR_USERNAME-ai-api-ollama.hf.space"
+# Health check
+curl $SPACE_URL/health
+# Chat request
+curl -X POST $SPACE_URL/ai/chat \
+  -H "Authorization: Bearer demo-key-1" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "conversation": [
+      {"role": "user", "content": "Tell me a joke about AI"}
+    ]
+  }'
+```
+## Optimizations for Hugging Face Spaces
+### 1. Reduce Model Download Time
+Pre-download models in Dockerfile:
+```dockerfile
+RUN ollama pull llama2 && \
+    ollama pull nomic-embed-text
+```
+### 2. Use Smaller Models for Free Tier
+```env
+OLLAMA_MODEL=phi:latest
+```
+Phi is only 1.3GB vs Llama2's 4GB.
+### 3. Enable Persistent Storage
+Hugging Face Spaces have persistent storage in `/data`:
+```dockerfile
+# Add to Dockerfile
+VOLUME /data
+ENV OLLAMA_MODELS=/data/ollama-models
+```
+This prevents re-downloading models on restart.
+### 4. Optimize for Cold Starts
+Add model warmup in startup script:
+```bash
+# Add to start.sh
+echo "Warming up model..."
+ollama run $OLLAMA_MODEL "Hello" --timeout 10s
+```
+## Cost Comparison
+| Option | Cost | Pros | Cons |
+|--------|------|------|------|
+| **Free CPU** | $0 | Free! | Slow inference, small models only |
+| **CPU Upgrade** | $0.60/hr (~$432/mo) | Better performance | Still slower than GPU |
+| **GPU T4** | $0.60/hr (~$432/mo) | Fast inference | Limited for huge models |
+| **OpenAI API** | Pay per token | No hosting, fast | Ongoing costs, data sent to OpenAI |
+| **Self-hosted** | VPS costs | Full control | Maintenance required |
+**Recommendation**: Start with **Free CPU + Phi** for testing, upgrade to **GPU T4 + Llama3** for production.
+## Troubleshooting
+### Space won't start
+**Check logs for**:
+- Ollama installation errors → Use official Ollama install script
+- Model download timeout → Use smaller model or upgrade hardware
+- Port conflicts → Ensure PORT=7860
+### "No LLM adapter available"
+**Solution**: Ollama adapter is now always initialized. Check Ollama is running:
+```bash
+# In Space terminal
+curl http://localhost:11434/api/tags
+```
+### Slow responses
+**Solutions**:
+- Use smaller model (phi instead of llama2)
+- Upgrade to GPU hardware
+- Reduce max_tokens in requests
+### Model not found
+**Solution**: Pull model manually:
+```bash
+# In Space terminal or startup script
+ollama pull llama2
+```
+## Advanced Configuration
+### Use Multiple Models
+```env
+# In Space settings
+OLLAMA_MODEL=llama3:latest
+```
+Then specify model in API requests:
+```json
+{
+  "conversation": [...],
+  "model": "llama3"
+}
+```
+### Custom System Prompts
+```bash
+curl -X POST $SPACE_URL/ai/chat \
+  -H "Authorization: Bearer your-key" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "conversation": [
+      {"role": "system", "content": "You are a helpful coding assistant."},
+      {"role": "user", "content": "Explain Python decorators"}
+    ]
+  }'
+```
+### Enable RAG with Documents
+```bash
+# Upload a document
+curl -X POST $SPACE_URL/upload \
+  -H "Authorization: Bearer your-key" \
+  -F "[email protected]"
+# Query with RAG
+curl -X POST $SPACE_URL/rag/query \
+  -H "Authorization: Bearer your-key" \
+  -H "Content-Type: application/json" \
+  -d '{"query": "What does the document say about X?"}'
+```
+## Monitoring
+### Check Space Health
+```bash
+curl https://YOUR_USERNAME-ai-api-ollama.hf.space/health
+```
+### View Metrics
+```bash
+curl https://YOUR_USERNAME-ai-api-ollama.hf.space/metrics \
+  -H "Authorization: Bearer your-key"
+```
+## Scaling
+### Horizontal Scaling
+Hugging Face Spaces don't support horizontal scaling. For high traffic:
+1. **Use multiple Spaces** with load balancer
+2. **Deploy to cloud** (AWS ECS, GCP Cloud Run) with auto-scaling
+3. **Use managed API** (OpenAI, Anthropic) for high volume
+### Vertical Scaling
+Upgrade hardware in Space settings:
+- Free CPU → CPU Upgrade (2x faster)
+- CPU → GPU T4 (10x faster)
+- GPU T4 → GPU A10G (2x faster, larger models)
+## Support
+- [GitHub Issues](https://github.com/your-org/ai-api-service/issues)
+- [Hugging Face Discussions](https://huggingface.co/spaces/YOUR_USERNAME/ai-api-ollama/discussions)
+- [Documentation](https://github.com/your-org/ai-api-service)
+## License
+MIT License - see LICENSE file

QUICKSTART.md ADDED Viewed

	@@ -0,0 +1,319 @@

+# Quick Start Guide
+Get your AI API Service up and running in 5 minutes!
+## Prerequisites
+- Node.js 18+
+- npm or yarn
+- At least one LLM API key (OpenAI, HuggingFace, or Anthropic)
+## 5-Minute Setup
+### 1. Install Dependencies
+```bash
+npm install
+```
+### 2. Configure Environment
+```bash
+cp .env.example .env
+```
+Edit `.env` and add your API keys:
+```env
+OPENAI_API_KEY=sk-your-openai-key
+API_KEYS=demo-key-1,my-secret-key
+```
+### 3. Start the Server
+```bash
+npm run dev
+```
+The API will be available at `http://localhost:8000`
+### 4. Test the API
+```bash
+curl http://localhost:8000/health
+```
+Expected response:
+```json
+{
+  "status": "healthy",
+  "version": "1.0.0",
+  "services": [...],
+  "uptime_seconds": 5
+}
+```
+### 5. Make Your First Request
+```bash
+curl -X POST http://localhost:8000/ai/chat \
+  -H "Authorization: Bearer demo-key-1" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "conversation": [
+      {"role": "user", "content": "Hello!"}
+    ]
+  }'
+```
+## Example Requests
+### Chat
+```bash
+curl -X POST http://localhost:8000/ai/chat \
+  -H "Authorization: Bearer demo-key-1" \
+  -H "Content-Type: application/json" \
+  -d '{"conversation": [{"role": "user", "content": "What is AI?"}]}'
+```
+### RAG Query
+```bash
+curl -X POST http://localhost:8000/rag/query \
+  -H "Authorization: Bearer demo-key-1" \
+  -H "Content-Type: application/json" \
+  -d '{"query": "What are the key features?", "top_k": 5}'
+```
+### Image Generation
+```bash
+curl -X POST http://localhost:8000/image/generate \
+  -H "Authorization: Bearer demo-key-1" \
+  -H "Content-Type: application/json" \
+  -d '{"prompt": "A sunset over mountains", "size": "1024x1024"}'
+```
+## What Each Component Does
+### 🔐 **Authentication (`/backend/utils/auth.ts`)**
+- Validates API keys from the Authorization header
+- Implements role-based access (default, premium, admin)
+- Used by all protected endpoints
+### ⚡ **Rate Limiting (`/backend/utils/rate_limit.ts`)**
+- Token bucket algorithm
+- Configurable limits per tier (60/300/1000 requests/min)
+- Automatic reset after 1 minute
+- Prevents abuse and cost overruns
+### 🤖 **AI Service (`/backend/services/ai_service.ts`)**
+- Multi-provider LLM routing (OpenAI, HuggingFace, Anthropic)
+- Automatic model selection and fallback
+- Chat completions with context management
+- Embedding generation for RAG
+### 📚 **RAG Service (`/backend/services/rag_service.ts`)**
+- Vector-based document retrieval
+- Automatic context injection into prompts
+- Supports Pinecone or in-memory vector DB
+- Returns sources with similarity scores
+### 🖼️ **Image Service (`/backend/services/image_service.ts`)**
+- Text-to-image generation
+- Supports DALL-E and Stable Diffusion
+- Configurable sizes and quality
+- Returns base64 or URLs
+### 🎙️ **Voice Service (`/backend/services/voice_service.ts`)**
+- Text-to-speech synthesis (TTS)
+- Speech-to-text transcription (STT)
+- Multiple voice options
+- Various audio formats (mp3, opus, etc.)
+### 📄 **Document Service (`/backend/services/document_service.ts`)**
+- Upload PDF, DOCX, TXT files
+- Automatic text extraction
+- Chunking with overlap for better retrieval
+- Background processing with workers
+- Stores chunks in vector DB
+### 🔌 **Adapters**
+#### **OpenAI Adapter (`/backend/adapters/openai_adapter.ts`)**
+- Chat completions (GPT-4, GPT-3.5)
+- Embeddings (text-embedding-ada-002)
+- Image generation (DALL-E)
+- Voice synthesis and transcription
+- Implements LLMAdapter, ImageAdapter, VoiceAdapter interfaces
+#### **HuggingFace Adapter (`/backend/adapters/huggingface_adapter.ts`)**
+- Open-source models (Mistral, Llama, etc.)
+- Stable Diffusion for images
+- Sentence transformers for embeddings
+- Free tier available
+#### **Anthropic Adapter (`/backend/adapters/anthropic_adapter.ts`)**
+- Claude models (Sonnet, Opus)
+- Advanced reasoning capabilities
+- Long context windows
+#### **Vector DB Adapters (`/backend/adapters/vector_db_adapter.ts`)**
+- **PineconeAdapter**: Production vector storage with managed scaling
+- **InMemoryVectorDB**: Development fallback with cosine similarity
+- Supports metadata filtering and batch operations
+### 📊 **Observability**
+#### **Logger (`/backend/utils/logger.ts`)**
+- Structured JSON logging
+- Configurable log levels (debug, info, warn, error)
+- Automatic timestamping
+- Production-ready format
+#### **Metrics (`/backend/utils/metrics.ts`)**
+- Request counting by endpoint
+- Error tracking
+- Response time measurement
+- Model usage statistics
+- Vector DB query counts
+- Document processing stats
+### 🔄 **Background Workers (`/backend/workers/ingestion_worker.ts`)**
+- Async document processing
+- Configurable concurrency
+- Job status tracking
+- Webhook notifications on completion
+- Automatic retries on failure
+### 🌐 **API Endpoints**
+All endpoints are in `/backend/api/`:
+#### **Health & Metrics (`health.ts`)**
+- `GET /health` - Service health with component status
+- `GET /metrics` - Usage metrics and statistics
+#### **Authentication (`auth.ts`)**
+- `POST /auth/verify` - Validate API key
+#### **Chat (`chat.ts`)**
+- `POST /ai/chat` - Multi-turn conversation
+- `GET /ai/query` - Simple Q&A
+#### **RAG (`rag.ts`)**
+- `POST /rag/query` - Query with retrieval
+- `GET /rag/models` - List available models
+#### **Images (`image.ts`)**
+- `POST /image/generate` - Generate images
+#### **Voice (`voice.ts`)**
+- `POST /voice/synthesize` - Text to speech
+- `POST /voice/transcribe` - Speech to text
+#### **Documents (`documents.ts`)**
+- `POST /upload` - Upload document
+- `GET /docs/:id/sources` - Get document chunks
+- `POST /webhook/events` - Processing webhooks
+## Architecture Flow
+```
+┌─────────┐
+│ Client  │
+└────┬────┘
+     │
+     ├─ Authorization Header (Bearer token)
+     ↓
+┌─────────────────┐
+│ Auth Middleware │ ← Validates API key
+└────┬────────────┘
+     ├─ Checks rate limit
+     ↓
+┌──────────────┐
+│ API Endpoint │ ← Routes request
+└────┬─────────┘
+     ├─ POST /ai/chat → AI Service
+     ├─ POST /rag/query → RAG Service → Vector DB → AI Service
+     ├─ POST /image/generate → Image Service
+     ├─ POST /voice/synthesize → Voice Service
+     ├─ POST /upload → Document Service → Worker → Vector DB
+     ↓
+┌───────────┐
+│ Response  │ ← JSON with data + metadata
+└───────────┘
+```
+## Configuration
+### Environment Variables
+| Variable | What It Does | Example |
+|----------|-------------|---------|
+| `OPENAI_API_KEY` | OpenAI access for GPT models | `sk-...` |
+| `HUGGINGFACE_API_KEY` | HuggingFace models access | `hf_...` |
+| `API_KEYS` | Valid API keys (comma-separated) | `key1,key2` |
+| `RATE_LIMIT_DEFAULT` | Requests/min for basic users | `60` |
+| `RATE_LIMIT_ADMIN` | Requests/min for admins | `1000` |
+| `MAX_FILE_SIZE_MB` | Max document upload size | `10` |
+| `CHUNK_SIZE` | Text chunk size for RAG | `1000` |
+| `LOG_LEVEL` | Logging verbosity | `info` |
+### Tier System
+- **Default**: 60 requests/min
+- **Premium**: 300 requests/min (add to config)
+- **Admin**: 1000 requests/min (via `ADMIN_API_KEYS`)
+## Testing
+Run tests:
+```bash
+npm test
+```
+Run with coverage:
+```bash
+npm run test:coverage
+```
+## Production Checklist
+- [ ] Set strong `API_KEYS`
+- [ ] Configure `ADMIN_API_KEYS` separately
+- [ ] Set up Pinecone for vector storage
+- [ ] Increase rate limits based on needs
+- [ ] Enable background workers
+- [ ] Set `LOG_LEVEL=info` or `warn`
+- [ ] Configure CORS origins
+- [ ] Set up monitoring/alerting
+- [ ] Review cost limits on LLM providers
+## Troubleshooting
+**"No LLM adapter available"**
+→ Add at least one API key (OPENAI_API_KEY, HUGGINGFACE_API_KEY, or ANTHROPIC_API_KEY)
+**"Invalid API key"**
+→ Check Authorization header: `Bearer your-key-here`
+**"Rate limit exceeded"**
+→ Wait 60 seconds or use admin key
+**Vector DB queries fail**
+→ Service falls back to in-memory storage automatically
+## Next Steps
+1. **Read the full README**: `README.md`
+2. **Check deployment guide**: `DEPLOYMENT.md`
+3. **Review examples**: `examples/js_client.js` and `examples/curl.sh`
+4. **Run tests**: `npm test`
+5. **Deploy to production**: See DEPLOYMENT.md
+## Support
+- GitHub Issues
+- Documentation in `/docs`
+- Example code in `/examples`
+Enjoy building with the AI API Service! 🚀

README.md CHANGED Viewed

@@ -1,11 +1,12 @@
 ---
-title: Ai Api Ollama
-emoji: 🔥
-colorFrom: yellow
-colorTo: red
 sdk: docker
 pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+cygon24:
+```markdown
 ---
+title: AI API Service with Ollama
+emoji: 🤖
+colorFrom: blue
+colorTo: purple
 sdk: docker
+app_port: 7860
 pinned: false
+---

backend/adapters/anthropic_adapter.ts ADDED Viewed

	@@ -0,0 +1,76 @@

+import Anthropic from '@anthropic-ai/sdk';
+import type {
+  LLMAdapter,
+  Message,
+  ChatOptions,
+  ChatResponse,
+  EmbeddingResponse,
+} from '../types/models';
+export class AnthropicAdapter implements LLMAdapter {
+  private client: Anthropic | null = null;
+  private apiKey: string;
+  private defaultModel: string;
+  constructor(apiKey: string, defaultModel = 'claude-3-sonnet-20240229') {
+    this.apiKey = apiKey;
+    this.defaultModel = defaultModel;
+    if (apiKey) {
+      this.client = new Anthropic({ apiKey });
+    }
+  }
+  async isAvailable(): Promise<boolean> {
+    if (!this.client) return false;
+    try {
+      await this.client.messages.create({
+        model: this.defaultModel,
+        max_tokens: 1,
+        messages: [{ role: 'user', content: 'test' }],
+      });
+      return true;
+    } catch {
+      return false;
+    }
+  }
+  async generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse> {
+    if (!this.client) {
+      throw new Error('Anthropic client not initialized. Please provide ANTHROPIC_API_KEY.');
+    }
+    const systemMessage = messages.find(m => m.role === 'system');
+    const conversationMessages = messages.filter(m => m.role !== 'system');
+    const response = await this.client.messages.create({
+      model: this.defaultModel,
+      max_tokens: options?.max_tokens || 1000,
+      temperature: options?.temperature ?? 0.7,
+      top_p: options?.top_p,
+      system: systemMessage?.content,
+      messages: conversationMessages.map(m => ({
+        role: m.role === 'assistant' ? 'assistant' : 'user',
+        content: m.content,
+      })),
+      stop_sequences: options?.stop,
+    });
+    const textContent = response.content.find(c => c.type === 'text');
+    return {
+      reply: textContent?.type === 'text' ? textContent.text : '',
+      model: response.model,
+      usage: {
+        prompt_tokens: response.usage.input_tokens,
+        completion_tokens: response.usage.output_tokens,
+        total_tokens: response.usage.input_tokens + response.usage.output_tokens,
+      },
+      sources: null,
+    };
+  }
+  async generateEmbedding(_text: string | string[]): Promise<EmbeddingResponse> {
+    throw new Error('Anthropic does not support embeddings. Use OpenAI or HuggingFace adapter.');
+  }
+}

backend/adapters/huggingface_adapter.ts ADDED Viewed

	@@ -0,0 +1,164 @@

+import { HfInference } from '@huggingface/inference';
+import type {
+  LLMAdapter,
+  ImageAdapter,
+  Message,
+  ChatOptions,
+  ChatResponse,
+  EmbeddingResponse,
+  ImageGenerationRequest,
+  ImageGenerationResponse,
+} from '../types/models';
+export class HuggingFaceAdapter implements LLMAdapter, ImageAdapter {
+  private client: HfInference | null = null;
+  private apiKey: string;
+  private defaultModel: string;
+  private defaultEmbeddingModel: string;
+  private defaultImageModel: string;
+  constructor(
+    apiKey: string,
+    defaultModel = 'mistralai/Mistral-7B-Instruct-v0.1',
+    defaultEmbeddingModel = 'sentence-transformers/all-MiniLM-L6-v2',
+    defaultImageModel = 'stabilityai/stable-diffusion-xl-base-1.0'
+  ) {
+    this.apiKey = apiKey;
+    this.defaultModel = defaultModel;
+    this.defaultEmbeddingModel = defaultEmbeddingModel;
+    this.defaultImageModel = defaultImageModel;
+    if (apiKey) {
+      this.client = new HfInference(apiKey);
+    }
+  }
+  async isAvailable(): Promise<boolean> {
+    if (!this.client) return false;
+    try {
+      await this.client.textGeneration({
+        model: this.defaultModel,
+        inputs: 'test',
+        parameters: { max_new_tokens: 1 },
+      });
+      return true;
+    } catch {
+      return false;
+    }
+  }
+  async generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse> {
+    if (!this.client) {
+      throw new Error('HuggingFace client not initialized. Please provide HUGGINGFACE_API_KEY.');
+    }
+    const prompt = this.formatMessagesAsPrompt(messages);
+    const response = await this.client.textGeneration({
+      model: this.defaultModel,
+      inputs: prompt,
+      parameters: {
+        max_new_tokens: options?.max_tokens || 1000,
+        temperature: options?.temperature ?? 0.7,
+        top_p: options?.top_p ?? 0.95,
+        repetition_penalty: 1.1,
+        return_full_text: false,
+      },
+    });
+    const estimatedTokens = Math.ceil(prompt.length / 4);
+    const completionTokens = Math.ceil((response.generated_text?.length || 0) / 4);
+    return {
+      reply: response.generated_text || '',
+      model: this.defaultModel,
+      usage: {
+        prompt_tokens: estimatedTokens,
+        completion_tokens: completionTokens,
+        total_tokens: estimatedTokens + completionTokens,
+      },
+      sources: null,
+    };
+  }
+  async generateEmbedding(text: string | string[]): Promise<EmbeddingResponse> {
+    if (!this.client) {
+      throw new Error('HuggingFace client not initialized. Please provide HUGGINGFACE_API_KEY.');
+    }
+    const inputs = Array.isArray(text) ? text : [text];
+    const embeddings: number[][] = [];
+    for (const input of inputs) {
+      const response = await this.client.featureExtraction({
+        model: this.defaultEmbeddingModel,
+        inputs: input,
+      });
+      if (Array.isArray(response) && Array.isArray(response[0])) {
+        embeddings.push(response[0] as number[]);
+      } else if (Array.isArray(response)) {
+        embeddings.push(response as number[]);
+      }
+    }
+    const totalTokens = inputs.reduce((sum, input) => sum + Math.ceil(input.length / 4), 0);
+    return {
+      embeddings,
+      model: this.defaultEmbeddingModel,
+      usage: {
+        prompt_tokens: totalTokens,
+        completion_tokens: 0,
+        total_tokens: totalTokens,
+      },
+    };
+  }
+  async generateImage(prompt: string, options?: Partial<ImageGenerationRequest>): Promise<ImageGenerationResponse> {
+    if (!this.client) {
+      throw new Error('HuggingFace client not initialized. Please provide HUGGINGFACE_API_KEY.');
+    }
+    const model = options?.model || this.defaultImageModel;
+    const response = await this.client.textToImage({
+      model,
+      inputs: prompt,
+    });
+    let buffer: Buffer;
+    if (typeof response === 'object' && 'arrayBuffer' in response) {
+      const arrayBuffer = await (response as any).arrayBuffer();
+      buffer = Buffer.from(arrayBuffer);
+    } else {
+      buffer = Buffer.from(response as any);
+    }
+    const base64Image = buffer.toString('base64');
+    return {
+      images: [{
+        url: `data:image/png;base64,${base64Image}`,
+      }],
+      model,
+      created: Date.now(),
+    };
+  }
+  private formatMessagesAsPrompt(messages: Message[]): string {
+    let prompt = '';
+    for (const message of messages) {
+      if (message.role === 'system') {
+        prompt += `System: ${message.content}\n\n`;
+      } else if (message.role === 'user') {
+        prompt += `User: ${message.content}\n\n`;
+      } else if (message.role === 'assistant') {
+        prompt += `Assistant: ${message.content}\n\n`;
+      }
+    }
+    prompt += 'Assistant: ';
+    return prompt;
+  }
+}

backend/adapters/ollama_adapter.ts ADDED Viewed

	@@ -0,0 +1,153 @@

+import type {
+  LLMAdapter,
+  Message,
+  ChatOptions,
+  ChatResponse,
+  EmbeddingResponse,
+} from '../types/models';
+import { logger } from '../utils/logger';
+export class OllamaAdapter implements LLMAdapter {
+  private baseUrl: string;
+  private defaultModel: string;
+  private defaultEmbeddingModel: string;
+  constructor(
+    baseUrl = 'http://localhost:11434',
+    defaultModel = 'llama2',
+    defaultEmbeddingModel = 'nomic-embed-text'
+  ) {
+    this.baseUrl = baseUrl;
+    this.defaultModel = defaultModel;
+    this.defaultEmbeddingModel = defaultEmbeddingModel;
+  }
+  async isAvailable(): Promise<boolean> {
+    try {
+      const response = await fetch(`${this.baseUrl}/api/tags`);
+      return response.ok;
+    } catch {
+      return false;
+    }
+  }
+  async generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse> {
+    try {
+      const prompt = this.formatMessagesAsPrompt(messages);
+      const response = await fetch(`${this.baseUrl}/api/generate`, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+        },
+        body: JSON.stringify({
+          model: this.defaultModel,
+          prompt,
+          stream: false,
+          options: {
+            temperature: options?.temperature ?? 0.7,
+            num_predict: options?.max_tokens ?? 1000,
+            top_p: options?.top_p ?? 0.9,
+            stop: options?.stop,
+          },
+        }),
+      });
+      if (!response.ok) {
+        throw new Error(`Ollama API error: ${response.statusText}`);
+      }
+      const data = await response.json() as any;
+      const estimatedPromptTokens = Math.ceil(prompt.length / 4);
+      const estimatedCompletionTokens = Math.ceil((data.response?.length || 0) / 4);
+      return {
+        reply: data.response || '',
+        model: this.defaultModel,
+        usage: {
+          prompt_tokens: estimatedPromptTokens,
+          completion_tokens: estimatedCompletionTokens,
+          total_tokens: estimatedPromptTokens + estimatedCompletionTokens,
+        },
+        sources: null,
+      };
+    } catch (error) {
+      logger.error('Ollama completion error', {
+        error: error instanceof Error ? error.message : String(error),
+      });
+      throw error;
+    }
+  }
+  async generateEmbedding(text: string | string[]): Promise<EmbeddingResponse> {
+    try {
+      const inputs = Array.isArray(text) ? text : [text];
+      const embeddings: number[][] = [];
+      for (const input of inputs) {
+        const response = await fetch(`${this.baseUrl}/api/embeddings`, {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json',
+          },
+          body: JSON.stringify({
+            model: this.defaultEmbeddingModel,
+            prompt: input,
+          }),
+        });
+        if (!response.ok) {
+          throw new Error(`Ollama embeddings error: ${response.statusText}`);
+        }
+        const data = await response.json() as any;
+        embeddings.push(data.embedding);
+      }
+      const totalTokens = inputs.reduce((sum, input) => sum + Math.ceil(input.length / 4), 0);
+      return {
+        embeddings,
+        model: this.defaultEmbeddingModel,
+        usage: {
+          prompt_tokens: totalTokens,
+          completion_tokens: 0,
+          total_tokens: totalTokens,
+        },
+      };
+    } catch (error) {
+      logger.error('Ollama embedding error', {
+        error: error instanceof Error ? error.message : String(error),
+      });
+      throw error;
+    }
+  }
+  private formatMessagesAsPrompt(messages: Message[]): string {
+    let prompt = '';
+    for (const message of messages) {
+      if (message.role === 'system') {
+        prompt += `System: ${message.content}\n\n`;
+      } else if (message.role === 'user') {
+        prompt += `User: ${message.content}\n\n`;
+      } else if (message.role === 'assistant') {
+        prompt += `Assistant: ${message.content}\n\n`;
+      }
+    }
+    prompt += 'Assistant: ';
+    return prompt;
+  }
+  setModel(modelName: string): void {
+    this.defaultModel = modelName;
+    logger.info('Ollama model changed', { model: modelName });
+  }
+  setEmbeddingModel(modelName: string): void {
+    this.defaultEmbeddingModel = modelName;
+    logger.info('Ollama embedding model changed', { model: modelName });
+  }
+}

backend/adapters/openai_adapter.ts ADDED Viewed

	@@ -0,0 +1,193 @@

+import OpenAI from 'openai';
+import type {
+  LLMAdapter,
+  ImageAdapter,
+  VoiceAdapter,
+  Message,
+  ChatOptions,
+  ChatResponse,
+  EmbeddingResponse,
+  ImageGenerationRequest,
+  ImageGenerationResponse,
+  VoiceSynthesisRequest,
+  VoiceSynthesisResponse,
+  TranscriptionRequest,
+  TranscriptionResponse,
+} from '../types/models';
+export class OpenAIAdapter implements LLMAdapter, ImageAdapter, VoiceAdapter {
+  private client: OpenAI | null = null;
+  private apiKey: string;
+  private defaultChatModel: string;
+  private defaultEmbeddingModel: string;
+  private defaultImageModel: string;
+  private defaultVoiceModel: string;
+  constructor(
+    apiKey: string,
+    defaultChatModel = 'gpt-3.5-turbo',
+    defaultEmbeddingModel = 'text-embedding-ada-002',
+    defaultImageModel = 'dall-e-3',
+    defaultVoiceModel = 'tts-1'
+  ) {
+    this.apiKey = apiKey;
+    this.defaultChatModel = defaultChatModel;
+    this.defaultEmbeddingModel = defaultEmbeddingModel;
+    this.defaultImageModel = defaultImageModel;
+    this.defaultVoiceModel = defaultVoiceModel;
+    if (apiKey) {
+      this.client = new OpenAI({ apiKey });
+    }
+  }
+  async isAvailable(): Promise<boolean> {
+    if (!this.client) return false;
+    try {
+      await this.client.models.list();
+      return true;
+    } catch {
+      return false;
+    }
+  }
+  async generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse> {
+    if (!this.client) {
+      throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
+    }
+    const completion = await this.client.chat.completions.create({
+      model: this.defaultChatModel,
+      messages: messages.map(m => ({
+        role: m.role,
+        content: m.content,
+      })),
+      temperature: options?.temperature ?? 0.7,
+      max_tokens: options?.max_tokens ?? 1000,
+      top_p: options?.top_p,
+      frequency_penalty: options?.frequency_penalty,
+      presence_penalty: options?.presence_penalty,
+      stop: options?.stop,
+    });
+    return {
+      reply: completion.choices[0]?.message?.content || '',
+      model: completion.model,
+      usage: {
+        prompt_tokens: completion.usage?.prompt_tokens || 0,
+        completion_tokens: completion.usage?.completion_tokens || 0,
+        total_tokens: completion.usage?.total_tokens || 0,
+      },
+      sources: null,
+    };
+  }
+  async generateEmbedding(text: string | string[]): Promise<EmbeddingResponse> {
+    if (!this.client) {
+      throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
+    }
+    const input = Array.isArray(text) ? text : [text];
+    const response = await this.client.embeddings.create({
+      model: this.defaultEmbeddingModel,
+      input,
+    });
+    if (!response.data) {
+      throw new Error('No embedding data returned from OpenAI');
+    }
+    return {
+      embeddings: response.data.map(d => d.embedding),
+      model: response.model,
+      usage: {
+        prompt_tokens: response.usage.prompt_tokens,
+        completion_tokens: 0,
+        total_tokens: response.usage.total_tokens,
+      },
+    };
+  }
+  async generateImage(prompt: string, options?: Partial<ImageGenerationRequest>): Promise<ImageGenerationResponse> {
+    if (!this.client) {
+      throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
+    }
+    const model = options?.model || this.defaultImageModel;
+    const isDallE3 = model.includes('dall-e-3');
+    const response = await this.client.images.generate({
+      model,
+      prompt,
+      n: isDallE3 ? 1 : (options?.n || 1),
+      size: options?.size || '1024x1024',
+      quality: options?.quality,
+      style: options?.style,
+    });
+    if (!response.data) {
+      throw new Error('No image data returned from OpenAI');
+    }
+    return {
+      images: response.data.map(img => ({
+        url: img.url || '',
+        revised_prompt: img.revised_prompt,
+        b64_json: img.b64_json,
+      })),
+      model,
+      created: response.created,
+    };
+  }
+  async synthesize(text: string, options?: Partial<VoiceSynthesisRequest>): Promise<VoiceSynthesisResponse> {
+    if (!this.client) {
+      throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
+    }
+    const voice = options?.voice || 'alloy';
+    const model = options?.model || this.defaultVoiceModel;
+    const format = options?.format || 'mp3';
+    const response = await this.client.audio.speech.create({
+      model,
+      voice,
+      input: text,
+      response_format: format as any,
+      speed: options?.speed,
+    });
+    const buffer = Buffer.from(await response.arrayBuffer());
+    const base64Audio = buffer.toString('base64');
+    return {
+      audio_url: `data:audio/${format};base64,${base64Audio}`,
+      voice,
+      format,
+      size_bytes: buffer.length,
+    };
+  }
+  async transcribe(audio: Buffer, options?: Partial<TranscriptionRequest>): Promise<TranscriptionResponse> {
+    if (!this.client) {
+      throw new Error('OpenAI client not initialized. Please provide OPENAI_API_KEY.');
+    }
+    const file = audio as any;
+    const response = await this.client.audio.transcriptions.create({
+      file,
+      model: options?.model || 'whisper-1',
+      language: options?.language,
+      prompt: options?.prompt,
+    });
+    return {
+      text: response.text,
+      language: options?.language || 'en',
+      duration: 0,
+      model: 'whisper-1',
+    };
+  }
+}

backend/adapters/vector_db_adapter.ts ADDED Viewed

	@@ -0,0 +1,146 @@

+import { Pinecone } from '@pinecone-database/pinecone';
+import type { VectorDBAdapter, VectorSearchResult } from '../types/models';
+export class PineconeAdapter implements VectorDBAdapter {
+  private client: Pinecone | null = null;
+  private indexName: string;
+  private namespace: string;
+  private initialized = false;
+  constructor(apiKey: string, indexName: string, namespace = 'default') {
+    this.indexName = indexName;
+    this.namespace = namespace;
+    if (apiKey) {
+      this.client = new Pinecone({ apiKey });
+    }
+  }
+  async isAvailable(): Promise<boolean> {
+    if (!this.client) return false;
+    try {
+      await this.client.listIndexes();
+      return true;
+    } catch {
+      return false;
+    }
+  }
+  async upsert(vectors: { id: string; values: number[]; metadata: Record<string, any> }[]): Promise<void> {
+    if (!this.client) {
+      throw new Error('Pinecone client not initialized. Please provide PINECONE_API_KEY.');
+    }
+    const index = this.client.index(this.indexName);
+    await index.namespace(this.namespace).upsert(vectors);
+  }
+  async query(
+    queryVector: number[],
+    topK: number,
+    filter?: Record<string, any>
+  ): Promise<VectorSearchResult[]> {
+    if (!this.client) {
+      throw new Error('Pinecone client not initialized. Please provide PINECONE_API_KEY.');
+    }
+    const index = this.client.index(this.indexName);
+    const results = await index.namespace(this.namespace).query({
+      vector: queryVector,
+      topK,
+      filter,
+      includeMetadata: true,
+    });
+    return results.matches.map(match => ({
+      id: match.id,
+      score: match.score || 0,
+      metadata: (match.metadata || {}) as Record<string, any>,
+    }));
+  }
+  async delete(ids: string[]): Promise<void> {
+    if (!this.client) {
+      throw new Error('Pinecone client not initialized. Please provide PINECONE_API_KEY.');
+    }
+    const index = this.client.index(this.indexName);
+    await index.namespace(this.namespace).deleteMany(ids);
+  }
+}
+export class InMemoryVectorDB implements VectorDBAdapter {
+  private vectors: Map<string, { values: number[]; metadata: Record<string, any> }> = new Map();
+  async isAvailable(): Promise<boolean> {
+    return true;
+  }
+  async upsert(vectors: { id: string; values: number[]; metadata: Record<string, any> }[]): Promise<void> {
+    for (const vector of vectors) {
+      this.vectors.set(vector.id, {
+        values: vector.values,
+        metadata: vector.metadata,
+      });
+    }
+  }
+  async query(
+    queryVector: number[],
+    topK: number,
+    filter?: Record<string, any>
+  ): Promise<VectorSearchResult[]> {
+    const results: Array<{ id: string; score: number; metadata: Record<string, any> }> = [];
+    for (const [id, vector] of this.vectors.entries()) {
+      if (filter && !this.matchesFilter(vector.metadata, filter)) {
+        continue;
+      }
+      const score = this.cosineSimilarity(queryVector, vector.values);
+      results.push({
+        id,
+        score,
+        metadata: vector.metadata,
+      });
+    }
+    results.sort((a, b) => b.score - a.score);
+    return results.slice(0, topK);
+  }
+  async delete(ids: string[]): Promise<void> {
+    for (const id of ids) {
+      this.vectors.delete(id);
+    }
+  }
+  private cosineSimilarity(a: number[], b: number[]): number {
+    if (a.length !== b.length) return 0;
+    let dotProduct = 0;
+    let normA = 0;
+    let normB = 0;
+    for (let i = 0; i < a.length; i++) {
+      dotProduct += a[i] * b[i];
+      normA += a[i] * a[i];
+      normB += b[i] * b[i];
+    }
+    const denominator = Math.sqrt(normA) * Math.sqrt(normB);
+    return denominator === 0 ? 0 : dotProduct / denominator;
+  }
+  private matchesFilter(metadata: Record<string, any>, filter: Record<string, any>): boolean {
+    for (const [key, value] of Object.entries(filter)) {
+      if (metadata[key] !== value) {
+        return false;
+      }
+    }
+    return true;
+  }
+}

backend/api/auth.ts ADDED Viewed

	@@ -0,0 +1,26 @@

+import { api } from "encore.dev/api";
+import { auth, validateApiKey, getApiKeyInfo } from "../utils/auth";
+import { getRateLimitInfo } from "../utils/rate_limit";
+import type { ApiKeyInfo, RateLimitInfo } from "../types/models";
+interface VerifyResponse {
+  valid: boolean;
+  key_info: ApiKeyInfo;
+  rate_limit: RateLimitInfo;
+}
+export const verify = api<void, VerifyResponse>(
+  { expose: true, method: "POST", path: "/auth/verify", auth: false },
+  async () => {
+    const authHeader = auth();
+    const authData = validateApiKey(authHeader);
+    const keyInfo = getApiKeyInfo(authData.apiKey);
+    const rateLimitInfo = getRateLimitInfo(authData.apiKey, authData.tier);
+    return {
+      valid: true,
+      key_info: keyInfo,
+      rate_limit: rateLimitInfo,
+    };
+  }
+);

backend/api/chat.ts ADDED Viewed

	@@ -0,0 +1,99 @@

+import { api, APIError, Query } from "encore.dev/api";
+import { auth, validateApiKey } from "../utils/auth";
+import { checkRateLimit } from "../utils/rate_limit";
+import { metrics } from "../utils/metrics";
+import { aiService } from "../services/ai_service";
+import type { ChatRequest, ChatResponse } from "../types/models";
+export const chat = api<ChatRequest, ChatResponse>(
+  { expose: true, method: "POST", path: "/ai/chat", auth: false },
+  async (req) => {
+    const startTime = Date.now();
+    try {
+      const authHeader = auth();
+      const authData = validateApiKey(authHeader);
+      checkRateLimit(authData.apiKey, authData.tier);
+      metrics.incrementRequests("/ai/chat");
+      if (!req.conversation || req.conversation.length === 0) {
+        throw APIError.invalidArgument("conversation must contain at least one message");
+      }
+      const response = await aiService.chat(
+        req.conversation,
+        req.model,
+        req.options
+      );
+      metrics.recordResponseTime(Date.now() - startTime);
+      return response;
+    } catch (error) {
+      metrics.incrementErrors();
+      if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
+        const err = error as any;
+        throw APIError.resourceExhausted(err.message).withDetails({
+          limit: err.limit,
+          remaining: err.remaining,
+          reset_at: err.resetAt,
+        });
+      }
+      throw error instanceof Error ? error : APIError.internal(String(error));
+    }
+  }
+);
+interface SimpleQueryRequest {
+  q: Query<string>;
+  model?: Query<string>;
+}
+interface SimpleQueryResponse {
+  answer: string;
+  model: string;
+}
+export const query = api<SimpleQueryRequest, SimpleQueryResponse>(
+  { expose: true, method: "GET", path: "/ai/query", auth: false },
+  async (req) => {
+    const startTime = Date.now();
+    try {
+      const authHeader = auth();
+      const authData = validateApiKey(authHeader);
+      checkRateLimit(authData.apiKey, authData.tier);
+      metrics.incrementRequests("/ai/query");
+      if (!req.q) {
+        throw APIError.invalidArgument("query parameter 'q' is required");
+      }
+      const answer = await aiService.simpleQuery(req.q, req.model);
+      metrics.recordResponseTime(Date.now() - startTime);
+      return {
+        answer,
+        model: req.model || 'default',
+      };
+    } catch (error) {
+      metrics.incrementErrors();
+      if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
+        const err = error as any;
+        throw APIError.resourceExhausted(err.message).withDetails({
+          limit: err.limit,
+          remaining: err.remaining,
+          reset_at: err.resetAt,
+        });
+      }
+      throw error instanceof Error ? error : APIError.internal(String(error));
+    }
+  }
+);

backend/api/documents.ts ADDED Viewed

	@@ -0,0 +1,119 @@

+import { api, APIError } from "encore.dev/api";
+import { auth, validateApiKey } from "../utils/auth";
+import { checkRateLimit } from "../utils/rate_limit";
+import { metrics } from "../utils/metrics";
+import { documentService } from "../services/document_service";
+import type {
+  DocumentUploadResponse,
+  DocumentSource,
+  WebhookEvent
+} from "../types/models";
+interface UploadRequest {
+  filename: string;
+  content_base64: string;
+  metadata?: {
+    title?: string;
+    author?: string;
+    category?: string;
+    tags?: string[];
+  };
+}
+export const upload = api<UploadRequest, DocumentUploadResponse>(
+  { expose: true, method: "POST", path: "/upload", auth: false },
+  async (req) => {
+    const startTime = Date.now();
+    try {
+      const authHeader = auth();
+      const authData = validateApiKey(authHeader);
+      checkRateLimit(authData.apiKey, authData.tier);
+      metrics.incrementRequests("/upload");
+      if (!req.filename) {
+        throw APIError.invalidArgument("filename is required");
+      }
+      if (!req.content_base64) {
+        throw APIError.invalidArgument("content_base64 is required");
+      }
+      const content = Buffer.from(req.content_base64, 'base64');
+      const response = await documentService.uploadDocument(
+        req.filename,
+        content,
+        req.metadata
+      );
+      metrics.recordResponseTime(Date.now() - startTime);
+      return response;
+    } catch (error) {
+      metrics.incrementErrors();
+      if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
+        const err = error as any;
+        throw APIError.resourceExhausted(err.message).withDetails({
+          limit: err.limit,
+          remaining: err.remaining,
+          reset_at: err.resetAt,
+        });
+      }
+      throw error instanceof Error ? error : APIError.internal(String(error));
+    }
+  }
+);
+interface GetSourcesRequest {
+  id: string;
+}
+interface GetSourcesResponse {
+  sources: DocumentSource[];
+}
+export const getSources = api<GetSourcesRequest, GetSourcesResponse>(
+  { expose: true, method: "GET", path: "/docs/:id/sources", auth: false },
+  async (req) => {
+    try {
+      const authHeader = auth();
+      validateApiKey(authHeader);
+      metrics.incrementRequests("/docs/:id/sources");
+      const sources = await documentService.getDocumentSources(req.id);
+      return { sources };
+    } catch (error) {
+      metrics.incrementErrors();
+      if (error instanceof Error && error.message === 'Document not found') {
+        throw APIError.notFound("document not found");
+      }
+      throw error instanceof Error ? error : APIError.internal(String(error));
+    }
+  }
+);
+interface WebhookResponse {
+  received: boolean;
+}
+export const webhook = api<WebhookEvent, WebhookResponse>(
+  { expose: true, method: "POST", path: "/webhook/events", auth: false },
+  async () => {
+    try {
+      metrics.incrementRequests("/webhook/events");
+      return { received: true };
+    } catch (error) {
+      metrics.incrementErrors();
+      throw error instanceof Error ? error : APIError.internal(String(error));
+    }
+  }
+);

backend/api/encore.service.ts ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ import { Service } from "encore.dev/service";
2	+
3	+ export default new Service("api");

backend/api/health.ts ADDED Viewed

	@@ -0,0 +1,55 @@

+import { api } from "encore.dev/api";
+import { aiService } from "../services/ai_service";
+import { ragService } from "../services/rag_service";
+import { imageService } from "../services/image_service";
+import { voiceService } from "../services/voice_service";
+import { metrics } from "../utils/metrics";
+import type { HealthCheckResponse, MetricsResponse } from "../types/models";
+const startTime = Date.now();
+const version = "1.0.0";
+export const health = api<void, HealthCheckResponse>(
+  { expose: true, method: "GET", path: "/health" },
+  async () => {
+    const services = [];
+    try {
+      const llmHealth = await aiService.healthCheck();
+      services.push({
+        name: "llm",
+        status: llmHealth.some(h => h.available) ? ("up" as const) : ("down" as const),
+      });
+    } catch {
+      services.push({ name: "llm", status: "down" as const });
+    }
+    try {
+      const vectorDbAvailable = await ragService.healthCheck();
+      services.push({
+        name: "vector_db",
+        status: vectorDbAvailable ? ("up" as const) : ("down" as const),
+      });
+    } catch {
+      services.push({ name: "vector_db", status: "down" as const });
+    }
+    const allUp = services.every(s => s.status === "up");
+    const status = allUp ? "healthy" : "degraded";
+    return {
+      status,
+      timestamp: Date.now(),
+      version,
+      services,
+      uptime_seconds: Math.floor((Date.now() - startTime) / 1000),
+    };
+  }
+);
+export const getMetrics = api<void, MetricsResponse>(
+  { expose: true, method: "GET", path: "/metrics" },
+  async () => {
+    return metrics.getMetrics();
+  }
+);

backend/api/image.ts ADDED Viewed

	@@ -0,0 +1,44 @@

+import { api, APIError } from "encore.dev/api";
+import { auth, validateApiKey } from "../utils/auth";
+import { checkRateLimit } from "../utils/rate_limit";
+import { metrics } from "../utils/metrics";
+import { imageService } from "../services/image_service";
+import type { ImageGenerationRequest, ImageGenerationResponse } from "../types/models";
+export const generate = api<ImageGenerationRequest, ImageGenerationResponse>(
+  { expose: true, method: "POST", path: "/image/generate", auth: false },
+  async (req) => {
+    const startTime = Date.now();
+    try {
+      const authHeader = auth();
+      const authData = validateApiKey(authHeader);
+      checkRateLimit(authData.apiKey, authData.tier);
+      metrics.incrementRequests("/image/generate");
+      if (!req.prompt) {
+        throw APIError.invalidArgument("prompt is required");
+      }
+      const response = await imageService.generate(req);
+      metrics.recordResponseTime(Date.now() - startTime);
+      return response;
+    } catch (error) {
+      metrics.incrementErrors();
+      if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
+        const err = error as any;
+        throw APIError.resourceExhausted(err.message).withDetails({
+          limit: err.limit,
+          remaining: err.remaining,
+          reset_at: err.resetAt,
+        });
+      }
+      throw error instanceof Error ? error : APIError.internal(String(error));
+    }
+  }
+);

backend/api/rag.ts ADDED Viewed

	@@ -0,0 +1,72 @@

+import { api, APIError } from "encore.dev/api";
+import { auth, validateApiKey } from "../utils/auth";
+import { checkRateLimit } from "../utils/rate_limit";
+import { metrics } from "../utils/metrics";
+import { ragService } from "../services/rag_service";
+import { aiService } from "../services/ai_service";
+import type { RAGQueryRequest, RAGQueryResponse } from "../types/models";
+export const ragQuery = api<RAGQueryRequest, RAGQueryResponse>(
+  { expose: true, method: "POST", path: "/rag/query", auth: false },
+  async (req) => {
+    const startTime = Date.now();
+    try {
+      const authHeader = auth();
+      const authData = validateApiKey(authHeader);
+      checkRateLimit(authData.apiKey, authData.tier);
+      metrics.incrementRequests("/rag/query");
+      if (!req.query) {
+        throw APIError.invalidArgument("query is required");
+      }
+      const response = await ragService.query(req);
+      metrics.recordResponseTime(Date.now() - startTime);
+      return response;
+    } catch (error) {
+      metrics.incrementErrors();
+      if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
+        const err = error as any;
+        throw APIError.resourceExhausted(err.message).withDetails({
+          limit: err.limit,
+          remaining: err.remaining,
+          reset_at: err.resetAt,
+        });
+      }
+      throw error instanceof Error ? error : APIError.internal(String(error));
+    }
+  }
+);
+interface ModelsResponse {
+  models: string[];
+  default_model: string;
+}
+export const getModels = api<void, ModelsResponse>(
+  { expose: true, method: "GET", path: "/rag/models", auth: false },
+  async () => {
+    try {
+      const authHeader = auth();
+      validateApiKey(authHeader);
+      metrics.incrementRequests("/rag/models");
+      const models = aiService.getAvailableModels();
+      return {
+        models,
+        default_model: models[0] || 'gpt-3.5-turbo',
+      };
+    } catch (error) {
+      metrics.incrementErrors();
+      throw error instanceof Error ? error : APIError.internal(String(error));
+    }
+  }
+);

backend/api/voice.ts ADDED Viewed

	@@ -0,0 +1,101 @@

+import { api, APIError } from "encore.dev/api";
+import { auth, validateApiKey } from "../utils/auth";
+import { checkRateLimit } from "../utils/rate_limit";
+import { metrics } from "../utils/metrics";
+import { voiceService } from "../services/voice_service";
+import type {
+  VoiceSynthesisRequest,
+  VoiceSynthesisResponse,
+  TranscriptionRequest,
+  TranscriptionResponse
+} from "../types/models";
+export const synthesize = api<VoiceSynthesisRequest, VoiceSynthesisResponse>(
+  { expose: true, method: "POST", path: "/voice/synthesize", auth: false },
+  async (req) => {
+    const startTime = Date.now();
+    try {
+      const authHeader = auth();
+      const authData = validateApiKey(authHeader);
+      checkRateLimit(authData.apiKey, authData.tier);
+      metrics.incrementRequests("/voice/synthesize");
+      if (!req.text) {
+        throw APIError.invalidArgument("text is required");
+      }
+      const response = await voiceService.synthesize(req);
+      metrics.recordResponseTime(Date.now() - startTime);
+      return response;
+    } catch (error) {
+      metrics.incrementErrors();
+      if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
+        const err = error as any;
+        throw APIError.resourceExhausted(err.message).withDetails({
+          limit: err.limit,
+          remaining: err.remaining,
+          reset_at: err.resetAt,
+        });
+      }
+      throw error instanceof Error ? error : APIError.internal(String(error));
+    }
+  }
+);
+interface TranscribeRequestBody {
+  audio_base64: string;
+  model?: string;
+  language?: string;
+  prompt?: string;
+}
+export const transcribe = api<TranscribeRequestBody, TranscriptionResponse>(
+  { expose: true, method: "POST", path: "/voice/transcribe", auth: false },
+  async (req) => {
+    const startTime = Date.now();
+    try {
+      const authHeader = auth();
+      const authData = validateApiKey(authHeader);
+      checkRateLimit(authData.apiKey, authData.tier);
+      metrics.incrementRequests("/voice/transcribe");
+      if (!req.audio_base64) {
+        throw APIError.invalidArgument("audio_base64 is required");
+      }
+      const audioBuffer = Buffer.from(req.audio_base64, 'base64');
+      const response = await voiceService.transcribe(audioBuffer, {
+        audio_url: '',
+        model: req.model,
+        language: req.language,
+        prompt: req.prompt,
+      });
+      metrics.recordResponseTime(Date.now() - startTime);
+      return response;
+    } catch (error) {
+      metrics.incrementErrors();
+      if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 429) {
+        const err = error as any;
+        throw APIError.resourceExhausted(err.message).withDetails({
+          limit: err.limit,
+          remaining: err.remaining,
+          reset_at: err.resetAt,
+        });
+      }
+      throw error instanceof Error ? error : APIError.internal(String(error));
+    }
+  }
+);

backend/encore.app ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"id": "scalable-ai-api-service-ysyi", "lang": "typescript"}

backend/package.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "name": "backend",
+  "version": "1.0.0",
+  "type": "module",
+  "packageManager": "bun",
+  "dependencies": {
+    "@anthropic-ai/sdk": "^0.24.1",
+    "@huggingface/inference": "^3.10.0",
+    "@pinecone-database/pinecone": "^6.1.1",
+    "encore.dev": "^1.50.4",
+    "openai": "^4.90.0"
+  },
+  "devDependencies": {
+    "typescript": "^5.8.3"
+  }
+}

backend/services/ai_service.ts ADDED Viewed

	@@ -0,0 +1,193 @@

+import { OpenAIAdapter } from '../adapters/openai_adapter';
+import { HuggingFaceAdapter } from '../adapters/huggingface_adapter';
+import { AnthropicAdapter } from '../adapters/anthropic_adapter';
+import { OllamaAdapter } from '../adapters/ollama_adapter';
+import { loadConfig } from '../types/config';
+import { logger } from '../utils/logger';
+import { metrics } from '../utils/metrics';
+import type { Message, ChatOptions, ChatResponse, LLMAdapter } from '../types/models';
+const config = loadConfig();
+class AIService {
+  private adapters: Map<string, LLMAdapter> = new Map();
+  private defaultAdapter: LLMAdapter | null = null;
+  constructor() {
+    this.initializeAdapters();
+  }
+  private initializeAdapters(): void {
+    if (config.openai.apiKey) {
+      const openaiAdapter = new OpenAIAdapter(
+        config.openai.apiKey,
+        config.openai.defaultChatModel,
+        config.openai.defaultEmbeddingModel,
+        config.openai.defaultImageModel,
+        config.openai.defaultVoiceModel
+      );
+      this.adapters.set('openai', openaiAdapter);
+      this.adapters.set('gpt-4', openaiAdapter);
+      this.adapters.set('gpt-3.5-turbo', openaiAdapter);
+      this.adapters.set('gpt-4-turbo', openaiAdapter);
+      if (!this.defaultAdapter) {
+        this.defaultAdapter = openaiAdapter;
+      }
+    }
+    if (config.huggingface.apiKey) {
+      const hfAdapter = new HuggingFaceAdapter(config.huggingface.apiKey);
+      this.adapters.set('huggingface', hfAdapter);
+      this.adapters.set('mistral', hfAdapter);
+      if (!this.defaultAdapter) {
+        this.defaultAdapter = hfAdapter;
+      }
+    }
+    if (config.anthropic.apiKey) {
+      const anthropicAdapter = new AnthropicAdapter(config.anthropic.apiKey);
+      this.adapters.set('anthropic', anthropicAdapter);
+      this.adapters.set('claude', anthropicAdapter);
+      this.adapters.set('claude-3-sonnet', anthropicAdapter);
+      this.adapters.set('claude-3-opus', anthropicAdapter);
+      if (!this.defaultAdapter) {
+        this.defaultAdapter = anthropicAdapter;
+      }
+    }
+    const ollamaBaseUrl = process.env.OLLAMA_BASE_URL || 'http://localhost:11434';
+    const ollamaModel = process.env.OLLAMA_MODEL || 'llama2';
+    const ollamaEmbeddingModel = process.env.OLLAMA_EMBEDDING_MODEL || 'nomic-embed-text';
+    const ollamaAdapter = new OllamaAdapter(ollamaBaseUrl, ollamaModel, ollamaEmbeddingModel);
+    this.adapters.set('ollama', ollamaAdapter);
+    this.adapters.set('llama', ollamaAdapter);
+    this.adapters.set('llama2', ollamaAdapter);
+    this.adapters.set('llama3', ollamaAdapter);
+    this.adapters.set('mistral', ollamaAdapter);
+    this.adapters.set('phi', ollamaAdapter);
+    this.adapters.set('gemma', ollamaAdapter);
+    if (!this.defaultAdapter) {
+      this.defaultAdapter = ollamaAdapter;
+      logger.info('Using Ollama as default LLM provider');
+    }
+    if (!this.defaultAdapter) {
+      logger.warn('No LLM adapters initialized. Please configure at least one API key.');
+    }
+  }
+  private getAdapter(model?: string): LLMAdapter {
+    if (!model) {
+      if (!this.defaultAdapter) {
+        throw new Error('No LLM adapter available. Please configure API keys.');
+      }
+      return this.defaultAdapter;
+    }
+    const lowerModel = model.toLowerCase();
+    for (const [key, adapter] of this.adapters.entries()) {
+      if (lowerModel.includes(key) || key.includes(lowerModel)) {
+        return adapter;
+      }
+    }
+    if (!this.defaultAdapter) {
+      throw new Error('No LLM adapter available. Please configure API keys.');
+    }
+    logger.warn(`Model ${model} not found, using default adapter`);
+    return this.defaultAdapter;
+  }
+  async chat(messages: Message[], model?: string, options?: ChatOptions): Promise<ChatResponse> {
+    try {
+      const adapter = this.getAdapter(model);
+      logger.info('Generating chat completion', {
+        model: model || 'default',
+        messageCount: messages.length
+      });
+      const response = await adapter.generateCompletion(messages, options);
+      metrics.incrementModelUsage(response.model);
+      logger.info('Chat completion generated', {
+        model: response.model,
+        tokensUsed: response.usage.total_tokens,
+      });
+      return response;
+    } catch (error) {
+      logger.error('Error generating chat completion', {
+        error: error instanceof Error ? error.message : String(error),
+        model: model || 'default'
+      });
+      throw error;
+    }
+  }
+  async simpleQuery(query: string, model?: string, options?: ChatOptions): Promise<string> {
+    const messages: Message[] = [
+      {
+        role: 'system',
+        content: 'You are a helpful assistant. Provide clear, concise answers.',
+      },
+      {
+        role: 'user',
+        content: query,
+      },
+    ];
+    const response = await this.chat(messages, model, options);
+    return response.reply;
+  }
+  async generateEmbedding(text: string | string[], model?: string) {
+    try {
+      const adapter = this.getAdapter(model);
+      logger.info('Generating embeddings', {
+        model: model || 'default',
+        textCount: Array.isArray(text) ? text.length : 1
+      });
+      const response = await adapter.generateEmbedding(text);
+      logger.info('Embeddings generated', {
+        model: response.model,
+        count: response.embeddings.length,
+      });
+      return response;
+    } catch (error) {
+      logger.error('Error generating embeddings', {
+        error: error instanceof Error ? error.message : String(error)
+      });
+      throw error;
+    }
+  }
+  getAvailableModels(): string[] {
+    return Array.from(this.adapters.keys());
+  }
+  async healthCheck(): Promise<{ provider: string; available: boolean }[]> {
+    const results: { provider: string; available: boolean }[] = [];
+    for (const [provider, adapter] of this.adapters.entries()) {
+      const available = await adapter.isAvailable();
+      results.push({ provider, available });
+    }
+    return results;
+  }
+}
+export const aiService = new AIService();

backend/services/document_service.ts ADDED Viewed

	@@ -0,0 +1,238 @@

+import { ragService } from './rag_service';
+import { loadConfig } from '../types/config';
+import { logger } from '../utils/logger';
+import { metrics } from '../utils/metrics';
+import crypto from 'crypto';
+function uuidv4(): string {
+  return crypto.randomUUID();
+}
+import type {
+  DocumentUploadResponse,
+  DocumentMetadata,
+  DocumentChunk,
+  DocumentSource
+} from '../types/models';
+const config = loadConfig();
+interface StoredDocument {
+  doc_id: string;
+  filename: string;
+  content: string;
+  metadata: DocumentMetadata;
+  chunks: DocumentChunk[];
+  status: 'processing' | 'completed' | 'failed';
+  error?: string;
+  created_at: number;
+}
+class DocumentService {
+  private documents = new Map<string, StoredDocument>();
+  private processingQueue: string[] = [];
+  async uploadDocument(
+    filename: string,
+    content: Buffer,
+    metadata?: DocumentMetadata
+  ): Promise<DocumentUploadResponse> {
+    try {
+      const doc_id = uuidv4();
+      const size_bytes = content.length;
+      const maxSize = config.documents.maxFileSizeMB * 1024 * 1024;
+      if (size_bytes > maxSize) {
+        throw new Error(`File size exceeds maximum of ${config.documents.maxFileSizeMB}MB`);
+      }
+      logger.info('Uploading document', { doc_id, filename, size_bytes });
+      const textContent = await this.extractText(filename, content);
+      const estimatedChunks = Math.ceil(textContent.length / config.documents.chunkSize);
+      const document: StoredDocument = {
+        doc_id,
+        filename,
+        content: textContent,
+        metadata: metadata || {},
+        chunks: [],
+        status: 'processing',
+        created_at: Date.now(),
+      };
+      this.documents.set(doc_id, document);
+      this.processingQueue.push(doc_id);
+      if (config.workers.enabled) {
+        this.processDocumentAsync(doc_id);
+      } else {
+        await this.processDocument(doc_id);
+      }
+      return {
+        doc_id,
+        filename,
+        size_bytes,
+        status: document.status,
+        estimated_chunks: estimatedChunks,
+        webhook_url: '/webhook/events',
+      };
+    } catch (error) {
+      logger.error('Error uploading document', {
+        error: error instanceof Error ? error.message : String(error),
+        filename,
+      });
+      throw error;
+    }
+  }
+  private async extractText(filename: string, content: Buffer): Promise<string> {
+    const extension = filename.split('.').pop()?.toLowerCase();
+    if (extension === 'txt') {
+      return content.toString('utf-8');
+    }
+    if (extension === 'pdf' || extension === 'docx') {
+      logger.warn(`${extension} parsing not implemented, treating as text`, { filename });
+      return content.toString('utf-8');
+    }
+    return content.toString('utf-8');
+  }
+  private async processDocument(doc_id: string): Promise<void> {
+    const document = this.documents.get(doc_id);
+    if (!document) {
+      logger.error('Document not found', { doc_id });
+      return;
+    }
+    try {
+      logger.info('Processing document', { doc_id, filename: document.filename });
+      const chunks = this.chunkText(document.content, doc_id, document.metadata);
+      document.chunks = chunks;
+      const chunkData = chunks.map(chunk => ({
+        id: chunk.chunk_id,
+        content: chunk.content,
+        metadata: {
+          doc_id: chunk.doc_id,
+          chunk_index: chunk.chunk_index,
+          total_chunks: chunk.total_chunks,
+          ...chunk.metadata,
+        },
+      }));
+      await ragService.addDocumentChunks(chunkData);
+      document.status = 'completed';
+      metrics.incrementDocumentsProcessed();
+      logger.info('Document processed successfully', {
+        doc_id,
+        chunksCreated: chunks.length,
+      });
+    } catch (error) {
+      document.status = 'failed';
+      document.error = error instanceof Error ? error.message : String(error);
+      logger.error('Error processing document', {
+        error: document.error,
+        doc_id,
+      });
+    }
+  }
+  private async processDocumentAsync(doc_id: string): Promise<void> {
+    setTimeout(async () => {
+      await this.processDocument(doc_id);
+    }, 100);
+  }
+  private chunkText(
+    text: string,
+    doc_id: string,
+    metadata: DocumentMetadata
+  ): DocumentChunk[] {
+    const chunkSize = config.documents.chunkSize;
+    const overlap = config.documents.chunkOverlap;
+    const chunks: DocumentChunk[] = [];
+    let start = 0;
+    let chunkIndex = 0;
+    while (start < text.length) {
+      const end = Math.min(start + chunkSize, text.length);
+      const content = text.slice(start, end);
+      const chunk_id = `${doc_id}_chunk_${chunkIndex}`;
+      chunks.push({
+        chunk_id,
+        doc_id,
+        content,
+        metadata,
+        chunk_index: chunkIndex,
+        total_chunks: 0,
+      });
+      start += chunkSize - overlap;
+      chunkIndex++;
+    }
+    const totalChunks = chunks.length;
+    chunks.forEach(chunk => {
+      chunk.total_chunks = totalChunks;
+    });
+    return chunks;
+  }
+  async getDocumentSources(doc_id: string): Promise<DocumentSource[]> {
+    const document = this.documents.get(doc_id);
+    if (!document) {
+      throw new Error('Document not found');
+    }
+    return document.chunks.map(chunk => ({
+      doc_id: chunk.doc_id,
+      chunk_id: chunk.chunk_id,
+      content: chunk.content,
+      score: 1.0,
+      metadata: chunk.metadata,
+    }));
+  }
+  async getDocumentStatus(doc_id: string): Promise<DocumentUploadResponse> {
+    const document = this.documents.get(doc_id);
+    if (!document) {
+      throw new Error('Document not found');
+    }
+    return {
+      doc_id: document.doc_id,
+      filename: document.filename,
+      size_bytes: document.content.length,
+      status: document.status,
+      estimated_chunks: document.chunks.length,
+      error: document.error,
+    };
+  }
+  async deleteDocument(doc_id: string): Promise<void> {
+    const document = this.documents.get(doc_id);
+    if (!document) {
+      throw new Error('Document not found');
+    }
+    await ragService.deleteDocument(doc_id);
+    this.documents.delete(doc_id);
+    logger.info('Document deleted', { doc_id });
+  }
+}
+export const documentService = new DocumentService();

backend/services/image_service.ts ADDED Viewed

	@@ -0,0 +1,122 @@

+import { OpenAIAdapter } from '../adapters/openai_adapter';
+import { HuggingFaceAdapter } from '../adapters/huggingface_adapter';
+import { loadConfig } from '../types/config';
+import { logger } from '../utils/logger';
+import { metrics } from '../utils/metrics';
+import type { ImageAdapter, ImageGenerationRequest, ImageGenerationResponse } from '../types/models';
+const config = loadConfig();
+class ImageService {
+  private adapters: Map<string, ImageAdapter> = new Map();
+  private defaultAdapter: ImageAdapter | null = null;
+  constructor() {
+    this.initializeAdapters();
+  }
+  private initializeAdapters(): void {
+    if (config.openai.apiKey) {
+      const openaiAdapter = new OpenAIAdapter(
+        config.openai.apiKey,
+        config.openai.defaultChatModel,
+        config.openai.defaultEmbeddingModel,
+        config.openai.defaultImageModel
+      );
+      this.adapters.set('openai', openaiAdapter);
+      this.adapters.set('dall-e', openaiAdapter);
+      this.adapters.set('dall-e-2', openaiAdapter);
+      this.adapters.set('dall-e-3', openaiAdapter);
+      if (!this.defaultAdapter) {
+        this.defaultAdapter = openaiAdapter;
+      }
+    }
+    if (config.huggingface.apiKey) {
+      const hfAdapter = new HuggingFaceAdapter(config.huggingface.apiKey);
+      this.adapters.set('huggingface', hfAdapter);
+      this.adapters.set('stable-diffusion', hfAdapter);
+      this.adapters.set('sdxl', hfAdapter);
+      if (!this.defaultAdapter) {
+        this.defaultAdapter = hfAdapter;
+      }
+    }
+    if (!this.defaultAdapter) {
+      logger.warn('No image generation adapters initialized. Please configure API keys.');
+    }
+  }
+  private getAdapter(model?: string): ImageAdapter {
+    if (!model) {
+      if (!this.defaultAdapter) {
+        throw new Error('No image adapter available. Please configure API keys.');
+      }
+      return this.defaultAdapter;
+    }
+    const lowerModel = model.toLowerCase();
+    for (const [key, adapter] of this.adapters.entries()) {
+      if (lowerModel.includes(key) || key.includes(lowerModel)) {
+        return adapter;
+      }
+    }
+    if (!this.defaultAdapter) {
+      throw new Error('No image adapter available. Please configure API keys.');
+    }
+    logger.warn(`Model ${model} not found, using default adapter`);
+    return this.defaultAdapter;
+  }
+  async generate(request: ImageGenerationRequest): Promise<ImageGenerationResponse> {
+    try {
+      const adapter = this.getAdapter(request.model);
+      logger.info('Generating image', {
+        prompt: request.prompt.substring(0, 100),
+        model: request.model || 'default',
+        size: request.size,
+        n: request.n,
+      });
+      const response = await adapter.generateImage(request.prompt, request);
+      metrics.incrementModelUsage(response.model);
+      logger.info('Image generated successfully', {
+        model: response.model,
+        imageCount: response.images.length,
+      });
+      return response;
+    } catch (error) {
+      logger.error('Error generating image', {
+        error: error instanceof Error ? error.message : String(error),
+        model: request.model || 'default',
+      });
+      throw error;
+    }
+  }
+  getAvailableModels(): string[] {
+    return Array.from(this.adapters.keys());
+  }
+  async healthCheck(): Promise<{ provider: string; available: boolean }[]> {
+    const results: { provider: string; available: boolean }[] = [];
+    for (const [provider, adapter] of this.adapters.entries()) {
+      const available = await adapter.isAvailable();
+      results.push({ provider, available });
+    }
+    return results;
+  }
+}
+export const imageService = new ImageService();

backend/services/rag_service.ts ADDED Viewed

	@@ -0,0 +1,182 @@

+import { aiService } from './ai_service';
+import { PineconeAdapter, InMemoryVectorDB } from '../adapters/vector_db_adapter';
+import { loadConfig } from '../types/config';
+import { logger } from '../utils/logger';
+import { metrics } from '../utils/metrics';
+import type {
+  RAGQueryRequest,
+  RAGQueryResponse,
+  DocumentSource,
+  VectorDBAdapter
+} from '../types/models';
+const config = loadConfig();
+class RAGService {
+  private vectorDB: VectorDBAdapter;
+  constructor() {
+    this.vectorDB = this.initializeVectorDB();
+  }
+  private initializeVectorDB(): VectorDBAdapter {
+    if (config.pinecone.apiKey) {
+      logger.info('Initializing Pinecone vector DB');
+      return new PineconeAdapter(
+        config.pinecone.apiKey,
+        config.pinecone.indexName
+      );
+    }
+    logger.warn('Pinecone not configured, using in-memory vector DB');
+    return new InMemoryVectorDB();
+  }
+  async query(request: RAGQueryRequest): Promise<RAGQueryResponse> {
+    const startTime = Date.now();
+    try {
+      logger.info('Processing RAG query', {
+        query: request.query.substring(0, 100),
+        topK: request.top_k || 5
+      });
+      let sources: DocumentSource[] = [];
+      let contextPrompt = request.query;
+      if (request.use_retrieval !== false) {
+        const embeddingResponse = await aiService.generateEmbedding(request.query);
+        const queryVector = embeddingResponse.embeddings[0];
+        metrics.incrementVectorDbQueries();
+        const results = await this.vectorDB.query(
+          queryVector,
+          request.top_k || 5,
+          request.filters
+        );
+        sources = results.map(result => ({
+          doc_id: result.metadata.doc_id || result.id,
+          chunk_id: result.id,
+          content: result.metadata.content || '',
+          score: result.score,
+          metadata: result.metadata,
+        }));
+        if (sources.length > 0) {
+          const context = sources
+            .map(s => `[Source: ${s.doc_id}]\n${s.content}`)
+            .join('\n\n');
+          contextPrompt = this.buildRAGPrompt(request.query, context);
+        }
+      }
+      const messages = [
+        {
+          role: 'system' as const,
+          content: 'You are a helpful assistant. Answer questions based on the provided context. If the context doesn\'t contain relevant information, say so.',
+        },
+        {
+          role: 'user' as const,
+          content: contextPrompt,
+        },
+      ];
+      const chatResponse = await aiService.chat(messages, request.model);
+      const retrievalTimeMs = Date.now() - startTime;
+      logger.info('RAG query completed', {
+        sourcesFound: sources.length,
+        retrievalTimeMs,
+        model: chatResponse.model,
+      });
+      return {
+        answer: chatResponse.reply,
+        sources,
+        model: chatResponse.model,
+        usage: chatResponse.usage,
+        retrieval_time_ms: retrievalTimeMs,
+      };
+    } catch (error) {
+      logger.error('Error processing RAG query', {
+        error: error instanceof Error ? error.message : String(error),
+      });
+      throw error;
+    }
+  }
+  async addDocumentChunks(chunks: Array<{
+    id: string;
+    content: string;
+    metadata: Record<string, any>;
+  }>): Promise<void> {
+    try {
+      logger.info('Adding document chunks to vector DB', { count: chunks.length });
+      const texts = chunks.map(c => c.content);
+      const embeddingResponse = await aiService.generateEmbedding(texts);
+      const vectors = chunks.map((chunk, index) => ({
+        id: chunk.id,
+        values: embeddingResponse.embeddings[index],
+        metadata: {
+          ...chunk.metadata,
+          content: chunk.content,
+        },
+      }));
+      await this.vectorDB.upsert(vectors);
+      logger.info('Document chunks added successfully', { count: chunks.length });
+    } catch (error) {
+      logger.error('Error adding document chunks', {
+        error: error instanceof Error ? error.message : String(error),
+      });
+      throw error;
+    }
+  }
+  async deleteDocument(docId: string): Promise<void> {
+    try {
+      logger.info('Deleting document from vector DB', { docId });
+      const results = await this.vectorDB.query([], 10000, { doc_id: docId });
+      const chunkIds = results.map(r => r.id);
+      if (chunkIds.length > 0) {
+        await this.vectorDB.delete(chunkIds);
+      }
+      logger.info('Document deleted successfully', { docId, chunksDeleted: chunkIds.length });
+    } catch (error) {
+      logger.error('Error deleting document', {
+        error: error instanceof Error ? error.message : String(error),
+        docId,
+      });
+      throw error;
+    }
+  }
+  private buildRAGPrompt(query: string, context: string): string {
+    return `Context information is below:
+---
+${context}
+---
+Based on the context above, please answer the following question. If the context doesn't contain enough information to answer the question, please say so.
+Question: ${query}
+Answer:`;
+  }
+  async healthCheck(): Promise<boolean> {
+    return await this.vectorDB.isAvailable();
+  }
+}
+export const ragService = new RAGService();

backend/services/voice_service.ts ADDED Viewed

	@@ -0,0 +1,149 @@

+import { OpenAIAdapter } from '../adapters/openai_adapter';
+import { loadConfig } from '../types/config';
+import { logger } from '../utils/logger';
+import { metrics } from '../utils/metrics';
+import type {
+  VoiceAdapter,
+  VoiceSynthesisRequest,
+  VoiceSynthesisResponse,
+  TranscriptionRequest,
+  TranscriptionResponse
+} from '../types/models';
+const config = loadConfig();
+class VoiceService {
+  private adapters: Map<string, VoiceAdapter> = new Map();
+  private defaultAdapter: VoiceAdapter | null = null;
+  constructor() {
+    this.initializeAdapters();
+  }
+  private initializeAdapters(): void {
+    if (config.openai.apiKey) {
+      const openaiAdapter = new OpenAIAdapter(
+        config.openai.apiKey,
+        config.openai.defaultChatModel,
+        config.openai.defaultEmbeddingModel,
+        config.openai.defaultImageModel,
+        config.openai.defaultVoiceModel
+      );
+      this.adapters.set('openai', openaiAdapter);
+      this.adapters.set('tts-1', openaiAdapter);
+      this.adapters.set('tts-1-hd', openaiAdapter);
+      this.adapters.set('whisper', openaiAdapter);
+      if (!this.defaultAdapter) {
+        this.defaultAdapter = openaiAdapter;
+      }
+    }
+    if (!this.defaultAdapter) {
+      logger.warn('No voice synthesis adapters initialized. Please configure API keys.');
+    }
+  }
+  private getAdapter(model?: string): VoiceAdapter {
+    if (!model) {
+      if (!this.defaultAdapter) {
+        throw new Error('No voice adapter available. Please configure API keys.');
+      }
+      return this.defaultAdapter;
+    }
+    const lowerModel = model.toLowerCase();
+    for (const [key, adapter] of this.adapters.entries()) {
+      if (lowerModel.includes(key) || key.includes(lowerModel)) {
+        return adapter;
+      }
+    }
+    if (!this.defaultAdapter) {
+      throw new Error('No voice adapter available. Please configure API keys.');
+    }
+    logger.warn(`Model ${model} not found, using default adapter`);
+    return this.defaultAdapter;
+  }
+  async synthesize(request: VoiceSynthesisRequest): Promise<VoiceSynthesisResponse> {
+    try {
+      const adapter = this.getAdapter(request.model);
+      logger.info('Synthesizing speech', {
+        textLength: request.text.length,
+        voice: request.voice || 'default',
+        model: request.model || 'default',
+      });
+      const response = await adapter.synthesize(request.text, request);
+      metrics.incrementModelUsage(request.model || 'tts-1');
+      logger.info('Speech synthesized successfully', {
+        voice: response.voice,
+        format: response.format,
+        sizeBytes: response.size_bytes,
+      });
+      return response;
+    } catch (error) {
+      logger.error('Error synthesizing speech', {
+        error: error instanceof Error ? error.message : String(error),
+        model: request.model || 'default',
+      });
+      throw error;
+    }
+  }
+  async transcribe(audio: Buffer, request: TranscriptionRequest): Promise<TranscriptionResponse> {
+    try {
+      const adapter = this.getAdapter(request.model);
+      if (!adapter.transcribe) {
+        throw new Error('Transcription not supported by this adapter');
+      }
+      logger.info('Transcribing audio', {
+        model: request.model || 'default',
+        language: request.language,
+      });
+      const response = await adapter.transcribe(audio, request);
+      metrics.incrementModelUsage(request.model || 'whisper-1');
+      logger.info('Audio transcribed successfully', {
+        textLength: response.text.length,
+        language: response.language,
+      });
+      return response;
+    } catch (error) {
+      logger.error('Error transcribing audio', {
+        error: error instanceof Error ? error.message : String(error),
+        model: request.model || 'default',
+      });
+      throw error;
+    }
+  }
+  getAvailableModels(): string[] {
+    return Array.from(this.adapters.keys());
+  }
+  async healthCheck(): Promise<{ provider: string; available: boolean }[]> {
+    const results: { provider: string; available: boolean }[] = [];
+    for (const [provider, adapter] of this.adapters.entries()) {
+      const available = await adapter.isAvailable();
+      results.push({ provider, available });
+    }
+    return results;
+  }
+}
+export const voiceService = new VoiceService();

backend/tsconfig.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+	"$schema": "https://json.schemastore.org/tsconfig",
+	"compilerOptions": {
+		/* Basic Options */
+		"lib": ["ES2022"],
+		"target": "ES2022",
+		"module": "ES2022",
+		"types": ["node"],
+		"paths": {
+			"~encore/*": ["./encore.gen/*"]
+		},
+		/* Workspace Settings */
+		"composite": true,
+		/* Strict Type-Checking Options */
+		"strict": true,
+		/* Module Resolution Options */
+		"moduleResolution": "bundler",
+		"allowSyntheticDefaultImports": true,
+		"isolatedModules": true,
+		"sourceMap": true,
+		"declaration": true,
+		/* Advanced Options */
+		"forceConsistentCasingInFileNames": true,
+		"skipLibCheck": true
+	}
+}

backend/types/config.ts ADDED Viewed

	@@ -0,0 +1,93 @@

+export interface AppConfig {
+  openai: {
+    apiKey: string;
+    defaultChatModel: string;
+    defaultEmbeddingModel: string;
+    defaultImageModel: string;
+    defaultVoiceModel: string;
+  };
+  huggingface: {
+    apiKey: string;
+    defaultModel: string;
+  };
+  anthropic: {
+    apiKey: string;
+    defaultModel: string;
+  };
+  pinecone: {
+    apiKey: string;
+    environment: string;
+    indexName: string;
+  };
+  auth: {
+    apiKeys: string[];
+    adminApiKeys: string[];
+  };
+  rateLimit: {
+    default: number;
+    premium: number;
+    admin: number;
+  };
+  documents: {
+    maxFileSizeMB: number;
+    chunkSize: number;
+    chunkOverlap: number;
+  };
+  workers: {
+    enabled: boolean;
+    concurrency: number;
+  };
+  server: {
+    port: number;
+    logLevel: string;
+    corsOrigins: string[];
+  };
+}
+export function loadConfig(): AppConfig {
+  return {
+    openai: {
+      apiKey: process.env.OPENAI_API_KEY || '',
+      defaultChatModel: process.env.DEFAULT_CHAT_MODEL || 'gpt-3.5-turbo',
+      defaultEmbeddingModel: process.env.DEFAULT_EMBEDDING_MODEL || 'text-embedding-ada-002',
+      defaultImageModel: process.env.DEFAULT_IMAGE_MODEL || 'dall-e-3',
+      defaultVoiceModel: process.env.DEFAULT_VOICE_MODEL || 'tts-1',
+    },
+    huggingface: {
+      apiKey: process.env.HUGGINGFACE_API_KEY || '',
+      defaultModel: process.env.HF_DEFAULT_MODEL || 'mistralai/Mistral-7B-Instruct-v0.1',
+    },
+    anthropic: {
+      apiKey: process.env.ANTHROPIC_API_KEY || '',
+      defaultModel: process.env.ANTHROPIC_DEFAULT_MODEL || 'claude-3-sonnet-20240229',
+    },
+    pinecone: {
+      apiKey: process.env.PINECONE_API_KEY || '',
+      environment: process.env.PINECONE_ENVIRONMENT || 'us-west1-gcp',
+      indexName: process.env.PINECONE_INDEX_NAME || 'ai-api-vectors',
+    },
+    auth: {
+      apiKeys: (process.env.API_KEYS || 'demo-key-1,demo-key-2').split(',').map(k => k.trim()),
+      adminApiKeys: (process.env.ADMIN_API_KEYS || '').split(',').map(k => k.trim()).filter(Boolean),
+    },
+    rateLimit: {
+      default: parseInt(process.env.RATE_LIMIT_DEFAULT || '60', 10),
+      premium: parseInt(process.env.RATE_LIMIT_PREMIUM || '300', 10),
+      admin: parseInt(process.env.RATE_LIMIT_ADMIN || '1000', 10),
+    },
+    documents: {
+      maxFileSizeMB: parseInt(process.env.MAX_FILE_SIZE_MB || '10', 10),
+      chunkSize: parseInt(process.env.CHUNK_SIZE || '1000', 10),
+      chunkOverlap: parseInt(process.env.CHUNK_OVERLAP || '200', 10),
+    },
+    workers: {
+      enabled: process.env.ENABLE_BACKGROUND_WORKERS === 'true',
+      concurrency: parseInt(process.env.WORKER_CONCURRENCY || '5', 10),
+    },
+    server: {
+      port: parseInt(process.env.PORT || '8000', 10),
+      logLevel: process.env.LOG_LEVEL || 'info',
+      corsOrigins: (process.env.CORS_ORIGINS || 'http://localhost:3000').split(',').map(o => o.trim()),
+    },
+  };
+}

backend/types/models.ts ADDED Viewed

	@@ -0,0 +1,256 @@

+export interface Message {
+  role: 'system' | 'user' | 'assistant';
+  content: string;
+  timestamp?: number;
+}
+export interface ChatRequest {
+  conversation: Message[];
+  model?: string;
+  options?: ChatOptions;
+}
+export interface ChatOptions {
+  temperature?: number;
+  max_tokens?: number;
+  top_p?: number;
+  frequency_penalty?: number;
+  presence_penalty?: number;
+  stop?: string[];
+}
+export interface ChatResponse {
+  reply: string;
+  model: string;
+  usage: TokenUsage;
+  sources?: DocumentSource[] | null;
+  conversation_id?: string;
+}
+export interface TokenUsage {
+  prompt_tokens: number;
+  completion_tokens: number;
+  total_tokens: number;
+}
+export interface RAGQueryRequest {
+  query: string;
+  top_k?: number;
+  model?: string;
+  use_retrieval?: boolean;
+  filters?: Record<string, any>;
+}
+export interface RAGQueryResponse {
+  answer: string;
+  sources: DocumentSource[];
+  model: string;
+  usage: TokenUsage;
+  retrieval_time_ms?: number;
+}
+export interface DocumentSource {
+  doc_id: string;
+  chunk_id: string;
+  content: string;
+  score: number;
+  metadata?: Record<string, any>;
+}
+export interface ImageGenerationRequest {
+  prompt: string;
+  model?: string;
+  size?: '256x256' | '512x512' | '1024x1024' | '1792x1024' | '1024x1792';
+  n?: number;
+  quality?: 'standard' | 'hd';
+  style?: 'vivid' | 'natural';
+}
+export interface ImageGenerationResponse {
+  images: GeneratedImage[];
+  model: string;
+  created: number;
+}
+export interface GeneratedImage {
+  url: string;
+  revised_prompt?: string;
+  b64_json?: string;
+}
+export interface VoiceSynthesisRequest {
+  text: string;
+  voice?: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
+  model?: string;
+  format?: 'mp3' | 'opus' | 'aac' | 'flac';
+  speed?: number;
+}
+export interface VoiceSynthesisResponse {
+  audio_url: string;
+  voice: string;
+  format: string;
+  duration_ms?: number;
+  size_bytes?: number;
+}
+export interface TranscriptionRequest {
+  audio_url: string;
+  model?: string;
+  language?: string;
+  prompt?: string;
+}
+export interface TranscriptionResponse {
+  text: string;
+  language: string;
+  duration: number;
+  model: string;
+}
+export interface DocumentUploadRequest {
+  filename: string;
+  content: Buffer;
+  metadata?: DocumentMetadata;
+}
+export interface DocumentMetadata {
+  title?: string;
+  author?: string;
+  category?: string;
+  tags?: string[];
+  [key: string]: any;
+}
+export interface DocumentUploadResponse {
+  doc_id: string;
+  filename: string;
+  size_bytes: number;
+  status: 'processing' | 'completed' | 'failed';
+  estimated_chunks?: number;
+  webhook_url?: string;
+  error?: string;
+}
+export interface DocumentChunk {
+  chunk_id: string;
+  doc_id: string;
+  content: string;
+  embedding?: number[];
+  metadata: DocumentMetadata;
+  chunk_index: number;
+  total_chunks: number;
+}
+export interface HealthCheckResponse {
+  status: 'healthy' | 'degraded' | 'unhealthy';
+  timestamp: number;
+  version: string;
+  services: ServiceHealth[];
+  uptime_seconds: number;
+}
+export interface ServiceHealth {
+  name: string;
+  status: 'up' | 'down' | 'degraded';
+  latency_ms?: number;
+  error?: string;
+}
+export interface MetricsResponse {
+  timestamp: number;
+  requests_total: number;
+  requests_by_endpoint: Record<string, number>;
+  errors_total: number;
+  rate_limit_hits: number;
+  active_connections: number;
+  average_response_time_ms: number;
+  model_usage: Record<string, number>;
+  vector_db_queries: number;
+  documents_processed: number;
+}
+export interface ApiKeyInfo {
+  key_hash: string;
+  tier: 'default' | 'premium' | 'admin';
+  rate_limit: number;
+  created_at: number;
+  last_used?: number;
+}
+export interface RateLimitInfo {
+  limit: number;
+  remaining: number;
+  reset_at: number;
+  tier: string;
+}
+export interface WebhookEvent {
+  event_type: 'document.ingestion.completed' | 'document.ingestion.failed';
+  doc_id: string;
+  timestamp: number;
+  data: {
+    chunks_created?: number;
+    error?: string;
+    status: string;
+  };
+}
+export interface EmbeddingRequest {
+  text: string | string[];
+  model?: string;
+}
+export interface EmbeddingResponse {
+  embeddings: number[][];
+  model: string;
+  usage: TokenUsage;
+}
+export interface VectorSearchRequest {
+  query_vector: number[];
+  top_k: number;
+  filter?: Record<string, any>;
+  namespace?: string;
+}
+export interface VectorSearchResult {
+  id: string;
+  score: number;
+  metadata: Record<string, any>;
+}
+export type ModelProvider = 'openai' | 'huggingface' | 'anthropic' | 'local';
+export interface ModelConfig {
+  provider: ModelProvider;
+  model_name: string;
+  api_key?: string;
+  max_tokens?: number;
+  temperature?: number;
+  endpoint?: string;
+}
+export interface LLMAdapter {
+  generateCompletion(messages: Message[], options?: ChatOptions): Promise<ChatResponse>;
+  generateEmbedding(text: string | string[]): Promise<EmbeddingResponse>;
+  isAvailable(): Promise<boolean>;
+}
+export interface VectorDBAdapter {
+  upsert(vectors: { id: string; values: number[]; metadata: Record<string, any> }[]): Promise<void>;
+  query(queryVector: number[], topK: number, filter?: Record<string, any>): Promise<VectorSearchResult[]>;
+  delete(ids: string[]): Promise<void>;
+  isAvailable(): Promise<boolean>;
+}
+export interface ImageAdapter {
+  generateImage(prompt: string, options?: Partial<ImageGenerationRequest>): Promise<ImageGenerationResponse>;
+  isAvailable(): Promise<boolean>;
+}
+export interface VoiceAdapter {
+  synthesize(text: string, options?: Partial<VoiceSynthesisRequest>): Promise<VoiceSynthesisResponse>;
+  transcribe?(audio: Buffer, options?: Partial<TranscriptionRequest>): Promise<TranscriptionResponse>;
+  isAvailable(): Promise<boolean>;
+}

backend/utils/auth.ts ADDED Viewed

	@@ -0,0 +1,69 @@

+import { Header as HeaderType } from "encore.dev/api";
+import { loadConfig } from "../types/config";
+import type { ApiKeyInfo } from "../types/models";
+const config = loadConfig();
+export interface AuthData {
+  apiKey: string;
+  tier: 'default' | 'premium' | 'admin';
+}
+export function auth(): HeaderType<"authorization"> {
+  return "" as HeaderType<"authorization">;
+}
+export function validateApiKey(authHeader?: string): AuthData {
+  if (!authHeader) {
+    throw new Error('Missing Authorization header');
+  }
+  const apiKey = authHeader.replace(/^Bearer\s+/i, '').trim();
+  if (!apiKey) {
+    throw new Error('Invalid Authorization header format');
+  }
+  if (config.auth.adminApiKeys.includes(apiKey)) {
+    return { apiKey, tier: 'admin' };
+  }
+  if (config.auth.apiKeys.includes(apiKey)) {
+    return { apiKey, tier: 'default' };
+  }
+  throw new Error('Invalid API key');
+}
+export function getApiKeyInfo(apiKey: string): ApiKeyInfo {
+  let tier: 'default' | 'premium' | 'admin' = 'default';
+  if (config.auth.adminApiKeys.includes(apiKey)) {
+    tier = 'admin';
+  }
+  let rateLimit = config.rateLimit.default;
+  if (tier === 'admin') {
+    rateLimit = config.rateLimit.admin;
+  }
+  return {
+    key_hash: hashApiKey(apiKey),
+    tier,
+    rate_limit: rateLimit,
+    created_at: Date.now(),
+  };
+}
+function hashApiKey(apiKey: string): string {
+  let hash = 0;
+  for (let i = 0; i < apiKey.length; i++) {
+    const char = apiKey.charCodeAt(i);
+    hash = ((hash << 5) - hash) + char;
+    hash = hash & hash;
+  }
+  return Math.abs(hash).toString(16);
+}
+export function requireAuth(authHeader?: string): AuthData {
+  return validateApiKey(authHeader);
+}

backend/utils/logger.ts ADDED Viewed

	@@ -0,0 +1,48 @@

+type LogLevel = 'debug' | 'info' | 'warn' | 'error';
+class Logger {
+  private level: LogLevel;
+  constructor(level: LogLevel = 'info') {
+    this.level = level;
+  }
+  private shouldLog(level: LogLevel): boolean {
+    const levels: LogLevel[] = ['debug', 'info', 'warn', 'error'];
+    return levels.indexOf(level) >= levels.indexOf(this.level);
+  }
+  private log(level: LogLevel, message: string, meta?: Record<string, any>): void {
+    if (!this.shouldLog(level)) return;
+    const timestamp = new Date().toISOString();
+    const logEntry = {
+      timestamp,
+      level,
+      message,
+      ...meta,
+    };
+    console.log(JSON.stringify(logEntry));
+  }
+  debug(message: string, meta?: Record<string, any>): void {
+    this.log('debug', message, meta);
+  }
+  info(message: string, meta?: Record<string, any>): void {
+    this.log('info', message, meta);
+  }
+  warn(message: string, meta?: Record<string, any>): void {
+    this.log('warn', message, meta);
+  }
+  error(message: string, meta?: Record<string, any>): void {
+    this.log('error', message, meta);
+  }
+}
+export const logger = new Logger(
+  (process.env.LOG_LEVEL as LogLevel) || 'info'
+);

backend/utils/metrics.ts ADDED Viewed

	@@ -0,0 +1,90 @@

+import type { MetricsResponse } from '../types/models';
+class MetricsCollector {
+  private startTime = Date.now();
+  private requestsTotal = 0;
+  private requestsByEndpoint = new Map<string, number>();
+  private errorsTotal = 0;
+  private rateLimitHits = 0;
+  private responseTimes: number[] = [];
+  private modelUsage = new Map<string, number>();
+  private vectorDbQueries = 0;
+  private documentsProcessed = 0;
+  incrementRequests(endpoint: string): void {
+    this.requestsTotal++;
+    const count = this.requestsByEndpoint.get(endpoint) || 0;
+    this.requestsByEndpoint.set(endpoint, count + 1);
+  }
+  incrementErrors(): void {
+    this.errorsTotal++;
+  }
+  incrementRateLimitHits(): void {
+    this.rateLimitHits++;
+  }
+  recordResponseTime(timeMs: number): void {
+    this.responseTimes.push(timeMs);
+    if (this.responseTimes.length > 1000) {
+      this.responseTimes.shift();
+    }
+  }
+  incrementModelUsage(model: string): void {
+    const count = this.modelUsage.get(model) || 0;
+    this.modelUsage.set(model, count + 1);
+  }
+  incrementVectorDbQueries(): void {
+    this.vectorDbQueries++;
+  }
+  incrementDocumentsProcessed(): void {
+    this.documentsProcessed++;
+  }
+  getMetrics(): MetricsResponse {
+    const avgResponseTime = this.responseTimes.length > 0
+      ? this.responseTimes.reduce((a, b) => a + b, 0) / this.responseTimes.length
+      : 0;
+    const requestsByEndpoint: Record<string, number> = {};
+    for (const [endpoint, count] of this.requestsByEndpoint.entries()) {
+      requestsByEndpoint[endpoint] = count;
+    }
+    const modelUsageObj: Record<string, number> = {};
+    for (const [model, count] of this.modelUsage.entries()) {
+      modelUsageObj[model] = count;
+    }
+    return {
+      timestamp: Date.now(),
+      requests_total: this.requestsTotal,
+      requests_by_endpoint: requestsByEndpoint,
+      errors_total: this.errorsTotal,
+      rate_limit_hits: this.rateLimitHits,
+      active_connections: 0,
+      average_response_time_ms: Math.round(avgResponseTime),
+      model_usage: modelUsageObj,
+      vector_db_queries: this.vectorDbQueries,
+      documents_processed: this.documentsProcessed,
+    };
+  }
+  reset(): void {
+    this.startTime = Date.now();
+    this.requestsTotal = 0;
+    this.requestsByEndpoint.clear();
+    this.errorsTotal = 0;
+    this.rateLimitHits = 0;
+    this.responseTimes = [];
+    this.modelUsage.clear();
+    this.vectorDbQueries = 0;
+    this.documentsProcessed = 0;
+  }
+}
+export const metrics = new MetricsCollector();

backend/utils/rate_limit.ts ADDED Viewed

	@@ -0,0 +1,114 @@

+import { loadConfig } from "../types/config";
+import type { RateLimitInfo } from "../types/models";
+const config = loadConfig();
+interface RateLimitBucket {
+  tokens: number;
+  lastRefill: number;
+}
+class RateLimiter {
+  private buckets = new Map<string, RateLimitBucket>();
+  private readonly refillInterval = 60000;
+  checkRateLimit(apiKey: string, tier: 'default' | 'premium' | 'admin'): RateLimitInfo {
+    const limit = this.getLimitForTier(tier);
+    const now = Date.now();
+    let bucket = this.buckets.get(apiKey);
+    if (!bucket) {
+      bucket = {
+        tokens: limit,
+        lastRefill: now,
+      };
+      this.buckets.set(apiKey, bucket);
+    }
+    const timeSinceRefill = now - bucket.lastRefill;
+    if (timeSinceRefill >= this.refillInterval) {
+      bucket.tokens = limit;
+      bucket.lastRefill = now;
+    }
+    if (bucket.tokens <= 0) {
+      const resetAt = bucket.lastRefill + this.refillInterval;
+      throw {
+        statusCode: 429,
+        message: 'Rate limit exceeded',
+        limit,
+        remaining: 0,
+        resetAt,
+      };
+    }
+    bucket.tokens -= 1;
+    const resetAt = bucket.lastRefill + this.refillInterval;
+    return {
+      limit,
+      remaining: bucket.tokens,
+      reset_at: resetAt,
+      tier,
+    };
+  }
+  private getLimitForTier(tier: 'default' | 'premium' | 'admin'): number {
+    switch (tier) {
+      case 'admin':
+        return config.rateLimit.admin;
+      case 'premium':
+        return config.rateLimit.premium;
+      default:
+        return config.rateLimit.default;
+    }
+  }
+  getRateLimitInfo(apiKey: string, tier: 'default' | 'premium' | 'admin'): RateLimitInfo {
+    const limit = this.getLimitForTier(tier);
+    const bucket = this.buckets.get(apiKey);
+    if (!bucket) {
+      return {
+        limit,
+        remaining: limit,
+        reset_at: Date.now() + this.refillInterval,
+        tier,
+      };
+    }
+    return {
+      limit,
+      remaining: bucket.tokens,
+      reset_at: bucket.lastRefill + this.refillInterval,
+      tier,
+    };
+  }
+  cleanup(): void {
+    const now = Date.now();
+    const maxAge = this.refillInterval * 2;
+    for (const [key, bucket] of this.buckets.entries()) {
+      if (now - bucket.lastRefill > maxAge) {
+        this.buckets.delete(key);
+      }
+    }
+  }
+}
+export const rateLimiter = new RateLimiter();
+setInterval(() => {
+  rateLimiter.cleanup();
+}, 300000);
+export function checkRateLimit(apiKey: string, tier: 'default' | 'premium' | 'admin'): RateLimitInfo {
+  return rateLimiter.checkRateLimit(apiKey, tier);
+}
+export function getRateLimitInfo(apiKey: string, tier: 'default' | 'premium' | 'admin'): RateLimitInfo {
+  return rateLimiter.getRateLimitInfo(apiKey, tier);
+}

backend/vite-env.d.ts ADDED Viewed

	@@ -0,0 +1 @@


1	+ /// <reference types="vite/client" />

backend/workers/ingestion_worker.ts ADDED Viewed

	@@ -0,0 +1,128 @@

+import { logger } from '../utils/logger';
+import type { WebhookEvent } from '../types/models';
+interface IngestionJob {
+  doc_id: string;
+  filename: string;
+  status: 'pending' | 'processing' | 'completed' | 'failed';
+  created_at: number;
+  completed_at?: number;
+  error?: string;
+}
+class IngestionWorker {
+  private jobs = new Map<string, IngestionJob>();
+  private isRunning = false;
+  private concurrency: number;
+  constructor(concurrency = 5) {
+    this.concurrency = concurrency;
+  }
+  async start(): Promise<void> {
+    if (this.isRunning) {
+      logger.warn('Ingestion worker already running');
+      return;
+    }
+    this.isRunning = true;
+    logger.info('Ingestion worker started', { concurrency: this.concurrency });
+    this.processQueue();
+  }
+  async stop(): Promise<void> {
+    this.isRunning = false;
+    logger.info('Ingestion worker stopped');
+  }
+  async addJob(doc_id: string, filename: string): Promise<void> {
+    const job: IngestionJob = {
+      doc_id,
+      filename,
+      status: 'pending',
+      created_at: Date.now(),
+    };
+    this.jobs.set(doc_id, job);
+    logger.info('Job added to ingestion queue', { doc_id, filename });
+  }
+  private async processQueue(): Promise<void> {
+    while (this.isRunning) {
+      const pendingJobs = Array.from(this.jobs.values())
+        .filter(job => job.status === 'pending')
+        .slice(0, this.concurrency);
+      if (pendingJobs.length === 0) {
+        await this.sleep(1000);
+        continue;
+      }
+      await Promise.all(
+        pendingJobs.map(job => this.processJob(job))
+      );
+    }
+  }
+  private async processJob(job: IngestionJob): Promise<void> {
+    try {
+      job.status = 'processing';
+      logger.info('Processing ingestion job', { doc_id: job.doc_id });
+      await this.sleep(Math.random() * 2000 + 1000);
+      job.status = 'completed';
+      job.completed_at = Date.now();
+      logger.info('Ingestion job completed', { doc_id: job.doc_id });
+      await this.sendWebhook({
+        event_type: 'document.ingestion.completed',
+        doc_id: job.doc_id,
+        timestamp: Date.now(),
+        data: {
+          chunks_created: Math.floor(Math.random() * 20) + 5,
+          status: 'completed',
+        },
+      });
+    } catch (error) {
+      job.status = 'failed';
+      job.error = error instanceof Error ? error.message : String(error);
+      job.completed_at = Date.now();
+      logger.error('Ingestion job failed', {
+        doc_id: job.doc_id,
+        error: job.error,
+      });
+      await this.sendWebhook({
+        event_type: 'document.ingestion.failed',
+        doc_id: job.doc_id,
+        timestamp: Date.now(),
+        data: {
+          error: job.error,
+          status: 'failed',
+        },
+      });
+    }
+  }
+  private async sendWebhook(event: WebhookEvent): Promise<void> {
+    logger.info('Webhook event', event);
+  }
+  private sleep(ms: number): Promise<void> {
+    return new Promise(resolve => setTimeout(resolve, ms));
+  }
+  getJobStatus(doc_id: string): IngestionJob | undefined {
+    return this.jobs.get(doc_id);
+  }
+  getAllJobs(): IngestionJob[] {
+    return Array.from(this.jobs.values());
+  }
+}
+export const ingestionWorker = new IngestionWorker();

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+version: '3.8'
+services:
+  api:
+    build: .
+    ports:
+      - "8000:8000"
+    environment:
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - HUGGINGFACE_API_KEY=${HUGGINGFACE_API_KEY}
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
+      - PINECONE_API_KEY=${PINECONE_API_KEY}
+      - PINECONE_ENVIRONMENT=${PINECONE_ENVIRONMENT:-us-west1-gcp}
+      - PINECONE_INDEX_NAME=${PINECONE_INDEX_NAME:-ai-api-vectors}
+      - API_KEYS=${API_KEYS:-demo-key-1,demo-key-2}
+      - ADMIN_API_KEYS=${ADMIN_API_KEYS}
+      - RATE_LIMIT_DEFAULT=${RATE_LIMIT_DEFAULT:-60}
+      - RATE_LIMIT_PREMIUM=${RATE_LIMIT_PREMIUM:-300}
+      - RATE_LIMIT_ADMIN=${RATE_LIMIT_ADMIN:-1000}
+      - DEFAULT_CHAT_MODEL=${DEFAULT_CHAT_MODEL:-gpt-3.5-turbo}
+      - DEFAULT_EMBEDDING_MODEL=${DEFAULT_EMBEDDING_MODEL:-text-embedding-ada-002}
+      - DEFAULT_IMAGE_MODEL=${DEFAULT_IMAGE_MODEL:-dall-e-3}
+      - DEFAULT_VOICE_MODEL=${DEFAULT_VOICE_MODEL:-tts-1}
+      - MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-10}
+      - CHUNK_SIZE=${CHUNK_SIZE:-1000}
+      - CHUNK_OVERLAP=${CHUNK_OVERLAP:-200}
+      - ENABLE_BACKGROUND_WORKERS=${ENABLE_BACKGROUND_WORKERS:-true}
+      - WORKER_CONCURRENCY=${WORKER_CONCURRENCY:-5}
+      - LOG_LEVEL=${LOG_LEVEL:-info}
+      - CORS_ORIGINS=${CORS_ORIGINS:-http://localhost:3000}
+    volumes:
+      - ./data:/app/data
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "wget", "--spider", "-q", "http://localhost:8000/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+  redis:
+    image: redis:7-alpine
+    ports:
+      - "6379:6379"
+    volumes:
+      - redis_data:/data
+    restart: unless-stopped
+    command: redis-server --appendonly yes
+volumes:
+  redis_data:

examples/curl.sh ADDED Viewed

	@@ -0,0 +1,116 @@

+#!/bin/bash
+API_URL="http://localhost:8000"
+API_KEY="demo-key-1"
+echo "=== AI API Service - Example Requests ==="
+echo ""
+echo "1. Health Check"
+echo "==============="
+curl -s "${API_URL}/health" | jq .
+echo ""
+echo ""
+echo "2. Verify API Key"
+echo "================="
+curl -s -X POST "${API_URL}/auth/verify" \
+  -H "Authorization: Bearer ${API_KEY}" | jq .
+echo ""
+echo ""
+echo "3. Simple Query"
+echo "==============="
+curl -s "${API_URL}/ai/query?q=What%20is%20machine%20learning%3F" \
+  -H "Authorization: Bearer ${API_KEY}" | jq .
+echo ""
+echo ""
+echo "4. Chat Conversation"
+echo "===================="
+curl -s -X POST "${API_URL}/ai/chat" \
+  -H "Authorization: Bearer ${API_KEY}" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "conversation": [
+      {
+        "role": "user",
+        "content": "Explain quantum computing in simple terms"
+      }
+    ],
+    "options": {
+      "temperature": 0.7,
+      "max_tokens": 200
+    }
+  }' | jq .
+echo ""
+echo ""
+echo "5. RAG Query (with retrieval)"
+echo "============================="
+curl -s -X POST "${API_URL}/rag/query" \
+  -H "Authorization: Bearer ${API_KEY}" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "What are the key features?",
+    "top_k": 5,
+    "use_retrieval": true
+  }' | jq .
+echo ""
+echo ""
+echo "6. Image Generation"
+echo "==================="
+curl -s -X POST "${API_URL}/image/generate" \
+  -H "Authorization: Bearer ${API_KEY}" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "prompt": "A serene mountain landscape at sunset",
+    "size": "1024x1024",
+    "n": 1
+  }' | jq .
+echo ""
+echo ""
+echo "7. Voice Synthesis"
+echo "=================="
+curl -s -X POST "${API_URL}/voice/synthesize" \
+  -H "Authorization: Bearer ${API_KEY}" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "text": "Hello, this is a test of the voice synthesis system.",
+    "voice": "alloy",
+    "format": "mp3"
+  }' | jq .
+echo ""
+echo ""
+echo "8. Document Upload"
+echo "=================="
+CONTENT=$(echo "This is a sample document for testing." | base64)
+curl -s -X POST "${API_URL}/upload" \
+  -H "Authorization: Bearer ${API_KEY}" \
+  -H "Content-Type: application/json" \
+  -d "{
+    \"filename\": \"sample.txt\",
+    \"content_base64\": \"${CONTENT}\",
+    \"metadata\": {
+      \"title\": \"Sample Document\",
+      \"category\": \"test\"
+    }
+  }" | jq .
+echo ""
+echo ""
+echo "9. Get Metrics"
+echo "=============="
+curl -s "${API_URL}/metrics" \
+  -H "Authorization: Bearer ${API_KEY}" | jq .
+echo ""
+echo ""
+echo "10. Get Available Models"
+echo "======================="
+curl -s "${API_URL}/rag/models" \
+  -H "Authorization: Bearer ${API_KEY}" | jq .
+echo ""

examples/js_client.js ADDED Viewed

	@@ -0,0 +1,203 @@

+const API_URL = 'http://localhost:8000';
+const API_KEY = 'demo-key-1';
+class AIAPIClient {
+  constructor(apiUrl, apiKey) {
+    this.apiUrl = apiUrl;
+    this.apiKey = apiKey;
+  }
+  async request(endpoint, options = {}) {
+    const url = `${this.apiUrl}${endpoint}`;
+    const headers = {
+      'Authorization': `Bearer ${this.apiKey}`,
+      'Content-Type': 'application/json',
+      ...options.headers,
+    };
+    const response = await fetch(url, {
+      ...options,
+      headers,
+    });
+    if (!response.ok) {
+      const error = await response.json();
+      throw new Error(`API Error: ${error.message || response.statusText}`);
+    }
+    return response.json();
+  }
+  async healthCheck() {
+    return this.request('/health', { method: 'GET' });
+  }
+  async verifyApiKey() {
+    return this.request('/auth/verify', { method: 'POST' });
+  }
+  async chat(conversation, model = null, options = {}) {
+    return this.request('/ai/chat', {
+      method: 'POST',
+      body: JSON.stringify({
+        conversation,
+        model,
+        options,
+      }),
+    });
+  }
+  async simpleQuery(query, model = null) {
+    const params = new URLSearchParams({ q: query });
+    if (model) params.append('model', model);
+    return this.request(`/ai/query?${params}`, { method: 'GET' });
+  }
+  async ragQuery(query, topK = 5, model = null, useRetrieval = true, filters = null) {
+    return this.request('/rag/query', {
+      method: 'POST',
+      body: JSON.stringify({
+        query,
+        top_k: topK,
+        model,
+        use_retrieval: useRetrieval,
+        filters,
+      }),
+    });
+  }
+  async generateImage(prompt, options = {}) {
+    return this.request('/image/generate', {
+      method: 'POST',
+      body: JSON.stringify({
+        prompt,
+        ...options,
+      }),
+    });
+  }
+  async synthesizeVoice(text, voice = 'alloy', format = 'mp3', speed = 1.0) {
+    return this.request('/voice/synthesize', {
+      method: 'POST',
+      body: JSON.stringify({
+        text,
+        voice,
+        format,
+        speed,
+      }),
+    });
+  }
+  async transcribeAudio(audioBase64, model = null, language = null) {
+    return this.request('/voice/transcribe', {
+      method: 'POST',
+      body: JSON.stringify({
+        audio_base64: audioBase64,
+        model,
+        language,
+      }),
+    });
+  }
+  async uploadDocument(filename, contentBase64, metadata = {}) {
+    return this.request('/upload', {
+      method: 'POST',
+      body: JSON.stringify({
+        filename,
+        content_base64: contentBase64,
+        metadata,
+      }),
+    });
+  }
+  async getDocumentSources(docId) {
+    return this.request(`/docs/${docId}/sources`, { method: 'GET' });
+  }
+  async getMetrics() {
+    return this.request('/metrics', { method: 'GET' });
+  }
+  async getAvailableModels() {
+    return this.request('/rag/models', { method: 'GET' });
+  }
+}
+async function main() {
+  const client = new AIAPIClient(API_URL, API_KEY);
+  try {
+    console.log('=== AI API Client Examples ===\n');
+    console.log('1. Health Check');
+    const health = await client.healthCheck();
+    console.log(JSON.stringify(health, null, 2));
+    console.log('\n');
+    console.log('2. Simple Query');
+    const queryResult = await client.simpleQuery('What is artificial intelligence?');
+    console.log(JSON.stringify(queryResult, null, 2));
+    console.log('\n');
+    console.log('3. Chat Conversation');
+    const chatResult = await client.chat([
+      { role: 'user', content: 'Tell me a fun fact about space' }
+    ], null, { temperature: 0.8, max_tokens: 150 });
+    console.log(JSON.stringify(chatResult, null, 2));
+    console.log('\n');
+    console.log('4. RAG Query');
+    const ragResult = await client.ragQuery(
+      'What are the main features?',
+      5,
+      null,
+      true
+    );
+    console.log(JSON.stringify(ragResult, null, 2));
+    console.log('\n');
+    console.log('5. Image Generation');
+    const imageResult = await client.generateImage(
+      'A futuristic cityscape at night',
+      { size: '1024x1024', n: 1 }
+    );
+    console.log('Image generated:', imageResult.images[0].url.substring(0, 100) + '...');
+    console.log('\n');
+    console.log('6. Voice Synthesis');
+    const voiceResult = await client.synthesizeVoice(
+      'Welcome to the AI API service.',
+      'alloy',
+      'mp3'
+    );
+    console.log('Audio generated:', voiceResult.audio_url.substring(0, 100) + '...');
+    console.log('\n');
+    console.log('7. Document Upload');
+    const docContent = Buffer.from('This is a sample document.').toString('base64');
+    const uploadResult = await client.uploadDocument(
+      'sample.txt',
+      docContent,
+      { title: 'Sample', category: 'test' }
+    );
+    console.log(JSON.stringify(uploadResult, null, 2));
+    console.log('\n');
+    console.log('8. Get Metrics');
+    const metrics = await client.getMetrics();
+    console.log(JSON.stringify(metrics, null, 2));
+    console.log('\n');
+  } catch (error) {
+    console.error('Error:', error.message);
+  }
+}
+if (typeof window === 'undefined') {
+  main();
+}
+if (typeof module !== 'undefined' && module.exports) {
+  module.exports = AIAPIClient;
+}

package.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "name": "leap-app",
+  "version": "1.0.0",
+  "type": "module",
+  "packageManager": "bun",
+  "workspaces": [
+    "backend"
+  ]
+}

strcture.md ADDED Viewed

	@@ -0,0 +1,493 @@

+# AI API Service
+A production-ready, scalable AI API service built with TypeScript and Encore.ts. Supports conversational chat, RAG (Retrieval-Augmented Generation), image generation, voice synthesis, and document ingestion.
+## 🏗️ Architecture
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                         API Gateway Layer                        │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐          │
+│  │ Auth Filter  │→ │ Rate Limiter │→ │   Routes     │          │
+│  └──────────────┘  └──────────────┘  └──────────────┘          │
+└─────────────────────────────────────────────────────────────────┘
+                              ↓
+┌─────────────────────────────────────────────────────────────────┐
+│                        Service Layer                             │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐          │
+│  │  AI Service  │  │  RAG Service │  │Image Service │          │
+│  └──────────────┘  └──────────────┘  └──────────────┘          │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐          │
+│  │Voice Service │  │ Doc Service  │  │Worker Service│          │
+│  └──────────────┘  └──────────────┘  └──────────────┘          │
+└─────────────────────────────────────────────────────────────────┘
+                              ↓
+┌─────────────────────────────────────────────────────────────────┐
+│                       Adapter Layer                              │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐          │
+│  │OpenAI Adapter│  │  HF Adapter  │  │Anthropic Adp │          │
+│  └──────────────┘  └──────────────┘  └──────────────┘          │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐          │
+│  │Vector DB Adp │  │Embedding Adp │  │ Local Models │          │
+│  └──────────────┘  └──────────────┘  └──────────────┘          │
+└─────────────────────────────────────────────────────────────────┘
+                              ↓
+┌─────────────────────────────────────────────────────────────────┐
+│                      Storage Layer                               │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐          │
+│  │  Pinecone    │  │  In-Memory   │  │ File Storage │          │
+│  │  (Vector DB) │  │  (Fallback)  │  │  (Documents) │          │
+│  └──────────────┘  └──────────────┘  └──────────────┘          │
+└─────────────────────────────────────────────────────────────────┘
+```
+## ✨ Features
+### Core Capabilities
+- **Multi-turn Chat** - Conversational AI with context management
+- **RAG (Retrieval-Augmented Generation)** - Query documents with AI-powered retrieval
+- **Image Generation** - Text-to-image using DALL-E or Stable Diffusion
+- **Voice Synthesis** - Text-to-speech with multiple voice options
+- **Document Ingestion** - Upload PDFs, DOCX, TXT with automatic chunking & embedding
+### Model Support
+- **OpenAI** - GPT-4, GPT-3.5-turbo, DALL-E, Whisper, TTS
+- **HuggingFace** - Open-source models via Inference API
+- **Anthropic** - Claude models
+- **Local Models** - Run transformers locally (optional)
+### Enterprise Features
+- **API Key Authentication** - Secure access control
+- **Role-based Rate Limiting** - Default, Premium, Admin tiers
+- **Multi-model Routing** - Select models via API or policy
+- **Background Workers** - Async document processing
+- **Observability** - Health checks, metrics, structured logging
+- **CORS Support** - Cross-origin requests
+## 📋 API Endpoints
+### Health & Metrics
+```bash
+GET  /health              # Service health check
+GET  /metrics             # Prometheus-style metrics
+POST /auth/verify         # Verify API key validity
+```
+### AI Chat
+```bash
+POST /ai/chat             # Multi-turn conversation
+GET  /ai/query            # Simple question answering
+```
+### RAG (Retrieval-Augmented Generation)
+```bash
+POST /rag/query           # Query with document retrieval
+GET  /rag/models          # List available models
+```
+### Image Generation
+```bash
+POST /image/generate      # Generate images from text
+```
+### Voice Synthesis
+```bash
+POST /voice/synthesize    # Text to speech
+POST /voice/transcribe    # Speech to text (optional)
+```
+### Document Management
+```bash
+POST /upload              # Upload document for ingestion
+GET  /docs/:id/sources    # Get document chunks
+POST /webhook/events      # Ingestion completion webhook
+```
+## 🚀 Quick Start
+### Prerequisites
+- Node.js 18+ and npm
+- Encore CLI: `npm install -g encore`
+- API keys (OpenAI, HuggingFace, etc.)
+### Local Development
+1. **Clone and install dependencies**
+```bash
+npm install
+```
+2. **Configure environment variables**
+```bash
+cp .env.example .env
+# Edit .env with your API keys
+```
+3. **Run the development server**
+```bash
+encore run
+```
+The API will be available at `http://localhost:8000`
+4. **Run tests**
+```bash
+npm test
+```
+## 🔑 Environment Variables
+| Variable | Description | Required | Default |
+|----------|-------------|----------|---------|
+| `OPENAI_API_KEY` | OpenAI API key for GPT models | No* | - |
+| `HUGGINGFACE_API_KEY` | HuggingFace API key | No* | - |
+| `ANTHROPIC_API_KEY` | Anthropic API key for Claude | No* | - |
+| `PINECONE_API_KEY` | Pinecone vector DB key | No | In-memory fallback |
+| `API_KEYS` | Comma-separated valid API keys | Yes | `demo-key-1` |
+| `ADMIN_API_KEYS` | Admin-level API keys | No | - |
+| `RATE_LIMIT_DEFAULT` | Requests/min for default tier | No | 60 |
+| `RATE_LIMIT_PREMIUM` | Requests/min for premium tier | No | 300 |
+| `DEFAULT_CHAT_MODEL` | Default LLM model | No | `gpt-3.5-turbo` |
+*At least one LLM provider key is required
+## 📖 API Usage Examples
+### 1. Chat Endpoint
+**Request:**
+```bash
+curl -X POST http://localhost:8000/ai/chat \
+  -H "Authorization: Bearer demo-key-1" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "conversation": [
+      {"role": "user", "content": "What is machine learning?"}
+    ],
+    "model": "gpt-3.5-turbo",
+    "options": {
+      "temperature": 0.7,
+      "max_tokens": 500
+    }
+  }'
+```
+**Response:**
+```json
+{
+  "reply": "Machine learning is a subset of artificial intelligence...",
+  "model": "gpt-3.5-turbo",
+  "usage": {
+    "prompt_tokens": 15,
+    "completion_tokens": 120,
+    "total_tokens": 135
+  },
+  "sources": null
+}
+```
+### 2. RAG Query Endpoint
+**Request:**
+```bash
+curl -X POST http://localhost:8000/rag/query \
+  -H "Authorization: Bearer demo-key-1" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "What are the key features of our product?",
+    "top_k": 5,
+    "model": "gpt-4",
+    "use_retrieval": true
+  }'
+```
+**Response:**
+```json
+{
+  "answer": "Based on the documentation, the key features include...",
+  "sources": [
+    {
+      "doc_id": "doc_123",
+      "chunk_id": "chunk_5",
+      "content": "Our product features...",
+      "score": 0.92
+    }
+  ],
+  "model": "gpt-4",
+  "usage": {
+    "prompt_tokens": 450,
+    "completion_tokens": 180,
+    "total_tokens": 630
+  }
+}
+```
+### 3. Image Generation
+**Request:**
+```bash
+curl -X POST http://localhost:8000/image/generate \
+  -H "Authorization: Bearer demo-key-1" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "prompt": "A futuristic city with flying cars at sunset",
+    "model": "dall-e-3",
+    "size": "1024x1024",
+    "n": 1
+  }'
+```
+**Response:**
+```json
+{
+  "images": [
+    {
+      "url": "https://...",
+      "revised_prompt": "A futuristic city with flying cars..."
+    }
+  ],
+  "model": "dall-e-3",
+  "created": 1698765432
+}
+```
+### 4. Voice Synthesis
+**Request:**
+```bash
+curl -X POST http://localhost:8000/voice/synthesize \
+  -H "Authorization: Bearer demo-key-1" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "text": "Hello, this is a test of the voice synthesis system.",
+    "voice": "alloy",
+    "format": "mp3"
+  }'
+```
+**Response:**
+```json
+{
+  "audio_url": "data:audio/mp3;base64,//uQx...",
+  "voice": "alloy",
+  "format": "mp3",
+  "duration_ms": 3200
+}
+```
+### 5. Document Upload
+**Request:**
+```bash
+curl -X POST http://localhost:8000/upload \
+  -H "Authorization: Bearer demo-key-1" \
+  -F "[email protected]" \
+  -F "metadata={\"title\":\"Product Guide\",\"category\":\"documentation\"}"
+```
+**Response:**
+```json
+{
+  "doc_id": "doc_abc123",
+  "filename": "document.pdf",
+  "size_bytes": 245760,
+  "status": "processing",
+  "estimated_chunks": 15,
+  "webhook_url": "/webhook/events"
+}
+```
+## 🧪 Testing
+Run the test suite:
+```bash
+npm test
+```
+Run with coverage:
+```bash
+npm run test:coverage
+```
+Tests include:
+- Unit tests for all adapters
+- Integration tests for API endpoints
+- Mock implementations for external services
+- Rate limiting validation
+- Authentication checks
+## 🐳 Docker Deployment
+### Build Docker Image
+```bash
+docker build -t ai-api-service .
+```
+### Run with Docker Compose
+```bash
+docker-compose up
+```
+This starts:
+- API service on port 8000
+- Redis for rate limiting (optional)
+- Background workers
+## ☁️ Cloud Deployment
+### Deploy to Encore Cloud (Recommended)
+1. **Install Encore CLI**
+```bash
+npm install -g encore
+```
+2. **Login to Encore**
+```bash
+encore auth login
+```
+3. **Deploy**
+```bash
+encore deploy
+```
+### Deploy to Hugging Face Spaces
+1. **Create a new Space** at https://huggingface.co/spaces
+2. **Add Dockerfile**
+```dockerfile
+FROM node:18-alpine
+WORKDIR /app
+COPY package*.json ./
+RUN npm ci --only=production
+COPY . .
+RUN npm run build
+EXPOSE 7860
+ENV PORT=7860
+CMD ["npm", "start"]
+```
+3. **Configure secrets** in Space settings:
+   - `OPENAI_API_KEY`
+   - `HUGGINGFACE_API_KEY`
+   - `API_KEYS`
+4. **Push to Space**
+```bash
+git remote add space https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE
+git push space main
+```
+### Deploy to Generic Cloud (AWS, GCP, Azure)
+1. **Build production image**
+```bash
+docker build -t ai-api-service:latest .
+```
+2. **Push to container registry**
+```bash
+docker tag ai-api-service:latest your-registry/ai-api-service:latest
+docker push your-registry/ai-api-service:latest
+```
+3. **Deploy to container service**
+   - AWS ECS/Fargate
+   - GCP Cloud Run
+   - Azure Container Instances
+4. **Set environment variables** in cloud console
+## 📊 Scaling Considerations
+### Horizontal Scaling
+- **Stateless design** - All state in external services (Pinecone, Redis)
+- **Load balancing** - Use ALB/NLB in front of multiple instances
+- **Auto-scaling** - Scale based on CPU/memory or request rate
+### Vector Database
+- **Pinecone** - Managed, scales automatically
+- **Milvus** - Self-hosted, requires cluster setup
+- **In-memory** - Development only, not for production
+### Background Workers
+- **Concurrent processing** - Adjust `WORKER_CONCURRENCY`
+- **Queue depth** - Monitor pending ingestion jobs
+- **Retry logic** - Failed jobs auto-retry with backoff
+### Cost Optimization
+- **Model selection** - Use cheaper models (GPT-3.5 vs GPT-4)
+- **Caching** - Cache frequent queries (not implemented, add Redis)
+- **Batch processing** - Group document ingestions
+- **Rate limiting** - Prevent abuse and cost overruns
+## 🔒 Security Best Practices
+1. **API Keys** - Rotate regularly, use environment variables
+2. **Rate Limiting** - Prevent abuse and DDoS
+3. **Input Validation** - All requests validated with Zod schemas
+4. **CORS** - Configure allowed origins
+5. **File Upload** - Size limits, type validation
+6. **Secrets Management** - Use Encore secrets or cloud secret managers
+## 🛠️ Troubleshooting
+### Common Issues
+**"Invalid API key" errors**
+- Check `.env` file has correct keys
+- Verify API key has credits/quota
+- Ensure no extra spaces in keys
+**Rate limit exceeded**
+- Increase `RATE_LIMIT_*` values
+- Use admin API key for testing
+- Check Prometheus metrics for usage
+**Vector DB connection fails**
+- Check Pinecone API key and environment
+- Falls back to in-memory storage
+- Verify network connectivity
+**Document upload fails**
+- Check file size < `MAX_FILE_SIZE_MB`
+- Verify file format (PDF, DOCX, TXT)
+- Check disk space for temp files
+## 📚 Client Libraries
+See `examples/` directory for:
+- `js_client.js` - JavaScript/Node.js client
+- `curl.sh` - Curl command examples
+- `python_client.py` - Python client (coming soon)
+## 🤝 Contributing
+1. Fork the repository
+2. Create a feature branch
+3. Make your changes
+4. Add tests
+5. Submit a pull request
+## 📝 License
+MIT License - see LICENSE file for details
+## 🆘 Support
+- GitHub Issues: [Report bugs](https://github.com/your-org/ai-api-service/issues)
+- Documentation: [Full API reference](https://docs.your-service.com)
+- Email: [email protected]
+## 🗺️ Roadmap
+- [ ] Caching layer (Redis)
+- [ ] Streaming responses (SSE)
+- [ ] Multi-language support
+- [ ] Fine-tuning pipeline
+- [ ] Analytics dashboard
+- [ ] Webhook integrations
+- [ ] GraphQL API
+- [ ] gRPC support

tests/api.test.ts ADDED Viewed

	@@ -0,0 +1,233 @@

+import { describe, it, expect, beforeAll, vi } from 'vitest';
+describe('AI API Service Tests', () => {
+  beforeAll(() => {
+    process.env.API_KEYS = 'test-key-1,test-key-2';
+    process.env.ADMIN_API_KEYS = 'admin-key-1';
+    process.env.OPENAI_API_KEY = 'sk-test-mock-key';
+  });
+  describe('Authentication', () => {
+    it('should validate correct API key', () => {
+      const { validateApiKey } = require('../backend/utils/auth');
+      const result = validateApiKey('Bearer test-key-1');
+      expect(result.apiKey).toBe('test-key-1');
+      expect(result.tier).toBe('default');
+    });
+    it('should validate admin API key', () => {
+      const { validateApiKey } = require('../backend/utils/auth');
+      const result = validateApiKey('Bearer admin-key-1');
+      expect(result.apiKey).toBe('admin-key-1');
+      expect(result.tier).toBe('admin');
+    });
+    it('should reject invalid API key', () => {
+      const { validateApiKey } = require('../backend/utils/auth');
+      expect(() => validateApiKey('Bearer invalid-key')).toThrow('Invalid API key');
+    });
+    it('should reject missing API key', () => {
+      const { validateApiKey } = require('../backend/utils/auth');
+      expect(() => validateApiKey('')).toThrow('Missing Authorization header');
+    });
+  });
+  describe('Rate Limiting', () => {
+    it('should allow requests within rate limit', () => {
+      const { rateLimiter } = require('../backend/utils/rate_limit');
+      const info = rateLimiter.checkRateLimit('test-key-1', 'default');
+      expect(info.remaining).toBeGreaterThanOrEqual(0);
+      expect(info.limit).toBeGreaterThan(0);
+    });
+    it('should have higher limit for admin tier', () => {
+      const { rateLimiter } = require('../backend/utils/rate_limit');
+      const defaultInfo = rateLimiter.getRateLimitInfo('test-key-1', 'default');
+      const adminInfo = rateLimiter.getRateLimitInfo('admin-key-1', 'admin');
+      expect(adminInfo.limit).toBeGreaterThan(defaultInfo.limit);
+    });
+  });
+  describe('Vector DB', () => {
+    it('should store and retrieve vectors from in-memory DB', async () => {
+      const { InMemoryVectorDB } = require('../backend/adapters/vector_db_adapter');
+      const db = new InMemoryVectorDB();
+      await db.upsert([
+        {
+          id: 'test-1',
+          values: [1, 0, 0],
+          metadata: { content: 'Test document 1' },
+        },
+        {
+          id: 'test-2',
+          values: [0, 1, 0],
+          metadata: { content: 'Test document 2' },
+        },
+      ]);
+      const results = await db.query([1, 0, 0], 2);
+      expect(results.length).toBe(2);
+      expect(results[0].id).toBe('test-1');
+      expect(results[0].score).toBeGreaterThan(results[1].score);
+    });
+    it('should filter results based on metadata', async () => {
+      const { InMemoryVectorDB } = require('../backend/adapters/vector_db_adapter');
+      const db = new InMemoryVectorDB();
+      await db.upsert([
+        {
+          id: 'doc-1',
+          values: [1, 0, 0],
+          metadata: { category: 'tech', content: 'Tech document' },
+        },
+        {
+          id: 'doc-2',
+          values: [0.9, 0, 0],
+          metadata: { category: 'science', content: 'Science document' },
+        },
+      ]);
+      const results = await db.query([1, 0, 0], 5, { category: 'tech' });
+      expect(results.length).toBe(1);
+      expect(results[0].id).toBe('doc-1');
+    });
+    it('should delete vectors', async () => {
+      const { InMemoryVectorDB } = require('../backend/adapters/vector_db_adapter');
+      const db = new InMemoryVectorDB();
+      await db.upsert([
+        { id: 'delete-1', values: [1, 0, 0], metadata: {} },
+      ]);
+      let results = await db.query([1, 0, 0], 5);
+      expect(results.length).toBe(1);
+      await db.delete(['delete-1']);
+      results = await db.query([1, 0, 0], 5);
+      expect(results.length).toBe(0);
+    });
+  });
+  describe('Document Service', () => {
+    it('should chunk text correctly', () => {
+      const { documentService } = require('../backend/services/document_service');
+      const text = 'a'.repeat(2500);
+      const chunks = documentService['chunkText'](text, 'doc-1', {});
+      expect(chunks.length).toBeGreaterThan(1);
+      expect(chunks[0].chunk_index).toBe(0);
+      expect(chunks[0].doc_id).toBe('doc-1');
+    });
+    it('should extract text from txt file', async () => {
+      const { documentService } = require('../backend/services/document_service');
+      const content = Buffer.from('This is a test document', 'utf-8');
+      const text = await documentService['extractText']('test.txt', content);
+      expect(text).toBe('This is a test document');
+    });
+  });
+  describe('Metrics', () => {
+    it('should track requests', () => {
+      const { metrics } = require('../backend/utils/metrics');
+      const initialMetrics = metrics.getMetrics();
+      metrics.incrementRequests('/test');
+      const updatedMetrics = metrics.getMetrics();
+      expect(updatedMetrics.requests_total).toBeGreaterThan(initialMetrics.requests_total);
+    });
+    it('should track errors', () => {
+      const { metrics } = require('../backend/utils/metrics');
+      const initialMetrics = metrics.getMetrics();
+      metrics.incrementErrors();
+      const updatedMetrics = metrics.getMetrics();
+      expect(updatedMetrics.errors_total).toBeGreaterThan(initialMetrics.errors_total);
+    });
+    it('should track response times', () => {
+      const { metrics } = require('../backend/utils/metrics');
+      metrics.recordResponseTime(100);
+      metrics.recordResponseTime(200);
+      const metricsData = metrics.getMetrics();
+      expect(metricsData.average_response_time_ms).toBeGreaterThan(0);
+    });
+  });
+  describe('Logger', () => {
+    it('should log messages at appropriate levels', () => {
+      const { logger } = require('../backend/utils/logger');
+      const consoleSpy = vi.spyOn(console, 'log');
+      logger.info('Test message');
+      expect(consoleSpy).toHaveBeenCalled();
+      consoleSpy.mockRestore();
+    });
+  });
+  describe('Configuration', () => {
+    it('should load default configuration', () => {
+      const { loadConfig } = require('../backend/types/config');
+      const config = loadConfig();
+      expect(config.auth.apiKeys).toContain('test-key-1');
+      expect(config.rateLimit.default).toBeGreaterThan(0);
+      expect(config.documents.maxFileSizeMB).toBeGreaterThan(0);
+    });
+    it('should parse comma-separated API keys', () => {
+      const { loadConfig } = require('../backend/types/config');
+      const config = loadConfig();
+      expect(Array.isArray(config.auth.apiKeys)).toBe(true);
+      expect(config.auth.apiKeys.length).toBeGreaterThan(0);
+    });
+  });
+  describe('AI Service', () => {
+    it('should initialize with available adapters', () => {
+      const { aiService } = require('../backend/services/ai_service');
+      const models = aiService.getAvailableModels();
+      expect(Array.isArray(models)).toBe(true);
+    });
+  });
+  describe('RAG Service', () => {
+    it('should build RAG prompt correctly', () => {
+      const { ragService } = require('../backend/services/rag_service');
+      const prompt = ragService['buildRAGPrompt'](
+        'What is AI?',
+        'AI stands for Artificial Intelligence'
+      );
+      expect(prompt).toContain('What is AI?');
+      expect(prompt).toContain('AI stands for Artificial Intelligence');
+    });
+  });
+  describe('Ingestion Worker', () => {
+    it('should track job status', async () => {
+      const { ingestionWorker } = require('../backend/workers/ingestion_worker');
+      await ingestionWorker.addJob('job-1', 'test.pdf');
+      const job = ingestionWorker.getJobStatus('job-1');
+      expect(job).toBeDefined();
+      expect(job?.doc_id).toBe('job-1');
+      expect(job?.status).toBe('pending');
+    });
+  });
+});