Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, Request, HTTPException, Header | |
| from fastapi.responses import JSONResponse, StreamingResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from typing import Optional, List, Dict, Any | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| import time | |
| import os | |
| import json | |
| import secrets | |
| from datetime import datetime, timedelta | |
| # Indian Standard Time helper | |
| def get_ist_time(): | |
| """Get current time in IST (UTC+5:30)""" | |
| return datetime.utcnow() + timedelta(hours=5, minutes=30) | |
| # Load model directly in the Space (FREE and UNLIMITED!) | |
| # AJ-Mini: Unlimited free assistant by AJ STUDIOZ | |
| MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
| BRANDING_NAME = "AJ-Mini v1.0" | |
| DEVELOPER = "AJ STUDIOZ" | |
| # Initialize model and tokenizer with optimization | |
| print(f"Loading {BRANDING_NAME} (based on {MODEL_NAME})...") | |
| print(f"Developed by {DEVELOPER}") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| low_cpu_mem_usage=True, | |
| device_map="auto" | |
| ) | |
| model.eval() # Set to evaluation mode | |
| print(f"{BRANDING_NAME} loaded successfully!") | |
| def query_ollama_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False): | |
| """Query model loaded directly in the Space - Optimized for speed""" | |
| try: | |
| # Optimize for faster responses | |
| max_tokens = min(max_tokens, 150) # Reduced from 256 to 150 for faster CPU inference | |
| # Tokenize input | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device) | |
| # Generate response with optimization | |
| with torch.no_grad(): # Disable gradient computation for faster inference | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| do_sample=temperature > 0.1, | |
| top_p=0.9, | |
| top_k=40, # Reduced from 50 to 40 for speed | |
| repetition_penalty=1.15, # Reduce repetition | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| num_beams=1, # Greedy decoding for speed | |
| early_stopping=True, | |
| no_repeat_ngram_size=3 # Prevent repetition | |
| ) | |
| # Extract only the generated text (remove input) | |
| input_length = inputs.input_ids.shape[1] | |
| generated_tokens = outputs[0][input_length:] | |
| generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip() | |
| # Create response object | |
| class SuccessResponse: | |
| status_code = 200 | |
| def json(self): | |
| return [{"generated_text": generated_text}] | |
| text = generated_text | |
| return SuccessResponse() | |
| except Exception as e: | |
| # Create error response | |
| class ErrorResponse: | |
| status_code = 500 | |
| def json(self): | |
| return {"error": str(e)} | |
| text = str(e) | |
| return ErrorResponse() | |
| # Simple API key validation for AJ format | |
| VALID_API_KEY_PREFIX = "aj_" | |
| # Anthropic API key validation | |
| def validate_anthropic_key(api_key: Optional[str]) -> bool: | |
| """Validate Anthropic-style API key""" | |
| if not api_key: | |
| return False | |
| return api_key.startswith("sk-ant-") and len(api_key) > 20 | |
| def validate_api_key(api_key: Optional[str]) -> bool: | |
| """Validate API key format - accepts both AJ and Anthropic formats""" | |
| if not api_key: | |
| return False | |
| return (api_key.startswith(VALID_API_KEY_PREFIX) and len(api_key) > 10) or validate_anthropic_key(api_key) | |
| def extract_api_key(authorization: Optional[str]) -> Optional[str]: | |
| """Extract API key from Authorization header""" | |
| if not authorization: | |
| return None | |
| if authorization.startswith("Bearer "): | |
| return authorization[7:] | |
| return authorization | |
| def extract_anthropic_key(x_api_key: Optional[str]) -> Optional[str]: | |
| """Extract API key from x-api-key header (Anthropic style)""" | |
| return x_api_key | |
| app = FastAPI( | |
| title="AJ STUDIOZ Mini API", | |
| version="1.0", | |
| description="AJ-Mini: Unlimited free AI assistant by AJ STUDIOZ - Powered by TinyLlama-1.1B" | |
| ) | |
| # Enable CORS | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| async def root(): | |
| return { | |
| "service": "AJ STUDIOZ Mini API", | |
| "version": "1.0", | |
| "model": "AJ-Mini v1.0 (TinyLlama-1.1B-Chat)", | |
| "developed_by": "AJ STUDIOZ", | |
| "status": "online", | |
| "provider": "AJ STUDIOZ", | |
| "website": "https://ajstudioz.co.in", | |
| "pricing": { | |
| "plan": "LIFETIME FREE", | |
| "rate_limits": "UNLIMITED", | |
| "cost": "FREE FOREVER", | |
| "usage_cap": "NONE" | |
| }, | |
| "description": "AJ-Mini: Unlimited free AI assistant by AJ STUDIOZ with Claude API compatibility, OpenAI support, and reliable 24/7 availability", | |
| "capabilities": [ | |
| "General conversation & assistance", | |
| "Basic code generation & help", | |
| "Multi-language support", | |
| "Simple problem solving", | |
| "Anthropic Claude API compatible", | |
| "OpenAI-compatible API", | |
| "Text generation & responses", | |
| "Markdown formatting", | |
| "24/7 availability", | |
| "Enterprise security", | |
| "Unlimited usage - FREE FOREVER" | |
| ], | |
| "endpoints": { | |
| "v1_messages": "/v1/messages - Anthropic Claude-compatible endpoint", | |
| "v1_chat": "/v1/chat/completions - OpenAI-compatible chat endpoint", | |
| "v1_completions": "/v1/completions - OpenAI-compatible completions", | |
| "v1_models": "/v1/models - List available models", | |
| "chat": "/chat - Simple chat interface", | |
| "generate": "/api/generate - Direct generation API" | |
| }, | |
| "authentication": { | |
| "anthropic": "x-api-key: sk-ant-<your_key>", | |
| "openai": "Authorization: Bearer aj_<your_key>", | |
| "note": "Both formats accepted for compatibility" | |
| } | |
| } | |
| async def anthropic_messages( | |
| request: Request, | |
| x_api_key: Optional[str] = Header(None, alias="x-api-key"), | |
| anthropic_version: Optional[str] = Header(None, alias="anthropic-version") | |
| ): | |
| """Anthropic Claude-compatible messages endpoint""" | |
| # Validate API key | |
| api_key = extract_anthropic_key(x_api_key) | |
| if not validate_api_key(api_key): | |
| return JSONResponse( | |
| status_code=401, | |
| content={ | |
| "type": "error", | |
| "error": { | |
| "type": "authentication_error", | |
| "message": "Invalid API key. Use format: sk-ant-<your_key> or aj_<your_key>" | |
| } | |
| } | |
| ) | |
| try: | |
| data = await request.json() | |
| messages = data.get("messages", []) | |
| model = data.get("model", "claude-sonnet-4-20250514") | |
| max_tokens = data.get("max_tokens", 1024) | |
| temperature = data.get("temperature", 1.0) | |
| stream = data.get("stream", False) | |
| if not messages: | |
| return JSONResponse( | |
| status_code=400, | |
| content={ | |
| "type": "error", | |
| "error": { | |
| "type": "invalid_request_error", | |
| "message": "messages is required" | |
| } | |
| } | |
| ) | |
| # Convert to prompt format for text_generation | |
| prompt_parts = ["You are AJ Mini, a helpful AI assistant created by AJ STUDIOZ. You provide reliable assistance and are available 24/7 unlimited and free.\n"] | |
| for msg in messages: | |
| role = msg.get("role") | |
| content = msg.get("content") | |
| if isinstance(content, list): | |
| # Handle complex content (text, images, etc.) | |
| text_parts = [c.get("text", "") for c in content if c.get("type") == "text"] | |
| content = " ".join(text_parts) | |
| if role == "user": | |
| prompt_parts.append(f"User: {content}") | |
| elif role == "assistant": | |
| prompt_parts.append(f"Assistant: {content}") | |
| elif role == "system": | |
| prompt_parts.insert(0, content) | |
| prompt_parts.append("Assistant:") | |
| full_prompt = "\n\n".join(prompt_parts) | |
| response = query_ollama_model(full_prompt, max_tokens, temperature) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| assistant_message = result[0].get('generated_text', '') | |
| else: | |
| assistant_message = result.get('generated_text', '') | |
| else: | |
| raise HTTPException(status_code=500, detail=f"Model error: {response.text}") | |
| # Return Anthropic-compatible response | |
| return { | |
| "id": f"msg_{secrets.token_hex(12)}", | |
| "type": "message", | |
| "role": "assistant", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": assistant_message | |
| } | |
| ], | |
| "model": model, | |
| "stop_reason": "end_turn", | |
| "stop_sequence": None, | |
| "usage": { | |
| "input_tokens": sum(len(str(m.get("content", "")).split()) for m in messages), | |
| "output_tokens": len(assistant_message.split()) | |
| } | |
| } | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| return JSONResponse( | |
| status_code=500, | |
| content={ | |
| "type": "error", | |
| "error": { | |
| "type": "api_error", | |
| "message": str(e) | |
| } | |
| } | |
| ) | |
| async def list_models(authorization: Optional[str] = Header(None)): | |
| """OpenAI-compatible models endpoint""" | |
| api_key = extract_api_key(authorization) | |
| if not validate_api_key(api_key): | |
| raise HTTPException(status_code=401, detail="Invalid API key. Use format: aj_your_key") | |
| return { | |
| "object": "list", | |
| "data": [ | |
| { | |
| "id": "aj-mini", | |
| "object": "model", | |
| "created": 1730505600, | |
| "owned_by": "aj-studioz", | |
| "permission": [], | |
| "root": "aj-mini", | |
| "parent": None, | |
| }, | |
| { | |
| "id": "aj-mini-v1", | |
| "object": "model", | |
| "created": 1730505600, | |
| "owned_by": "aj-studioz", | |
| "permission": [], | |
| "root": "aj-mini-v1", | |
| "parent": None, | |
| } | |
| ] | |
| } | |
| async def chat_completions(request: Request, authorization: Optional[str] = Header(None)): | |
| """OpenAI-compatible chat completions endpoint - Public access for AJ STUDIOZ""" | |
| # Make API key optional for public access | |
| # api_key = extract_api_key(authorization) | |
| # if not validate_api_key(api_key): | |
| # raise HTTPException( | |
| # status_code=401, | |
| # detail={ | |
| # "error": { | |
| # "message": "Invalid API key. Your API key should start with 'aj_'", | |
| # "type": "invalid_request_error", | |
| # "code": "invalid_api_key" | |
| # } | |
| # } | |
| # ) | |
| try: | |
| data = await request.json() | |
| messages = data.get("messages", []) | |
| model = data.get("model", "aj-mini") | |
| max_tokens = data.get("max_tokens", 256) | |
| temperature = data.get("temperature", 0.7) | |
| if not messages: | |
| raise HTTPException(status_code=400, detail="Messages are required") | |
| # Add system prompt if not provided | |
| has_system = any(msg.get("role") == "system" for msg in messages) | |
| if not has_system: | |
| messages.insert(0, { | |
| "role": "system", | |
| "content": "You are AJ-Mini v1.0, a helpful AI assistant developed by AJ STUDIOZ. You are friendly, concise, and helpful. When asked who you are, introduce yourself as AJ-Mini created by AJ STUDIOZ." | |
| }) | |
| # Convert messages to prompt | |
| prompt_parts = [] | |
| for msg in messages: | |
| role = msg.get("role", "user") | |
| content = msg.get("content", "") | |
| if role == "system": | |
| prompt_parts.append(f"{content}") | |
| elif role == "user": | |
| prompt_parts.append(f"User: {content}") | |
| elif role == "assistant": | |
| prompt_parts.append(f"Assistant: {content}") | |
| prompt = "\n\n".join(prompt_parts) + "\n\nAssistant:" | |
| completion_id = f"chatcmpl-{secrets.token_hex(12)}" | |
| response = query_ollama_model(prompt, max_tokens, temperature) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| assistant_message = result[0].get('generated_text', '') | |
| else: | |
| assistant_message = result.get('generated_text', '') | |
| else: | |
| raise HTTPException(status_code=500, detail=f"Model error: {response.text}") | |
| # OpenAI-compatible response | |
| return { | |
| "id": completion_id, | |
| "object": "chat.completion", | |
| "created": int(time.time()), | |
| "model": model, | |
| "choices": [ | |
| { | |
| "index": 0, | |
| "message": { | |
| "role": "assistant", | |
| "content": assistant_message | |
| }, | |
| "finish_reason": "stop" | |
| } | |
| ], | |
| "usage": { | |
| "prompt_tokens": len(prompt.split()), | |
| "completion_tokens": len(assistant_message.split()), | |
| "total_tokens": len(prompt.split()) + len(assistant_message.split()) | |
| }, | |
| "system_fingerprint": "aj-mini-v1.0" | |
| } | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def completions(request: Request, authorization: Optional[str] = Header(None)): | |
| """OpenAI-compatible completions endpoint""" | |
| api_key = extract_api_key(authorization) | |
| if not validate_api_key(api_key): | |
| raise HTTPException(status_code=401, detail="Invalid API key") | |
| try: | |
| data = await request.json() | |
| prompt = data.get("prompt", "") | |
| model = data.get("model", "aj-mini") | |
| max_tokens = data.get("max_tokens", 2000) | |
| temperature = data.get("temperature", 0.7) | |
| if not prompt: | |
| raise HTTPException(status_code=400, detail="Prompt is required") | |
| response = query_ollama_model(prompt, max_tokens, temperature) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| completion_text = result[0].get('generated_text', '') | |
| else: | |
| completion_text = result.get('generated_text', '') | |
| else: | |
| raise HTTPException(status_code=500, detail=f"Model error: {response.text}") | |
| return { | |
| "id": f"cmpl-{secrets.token_hex(12)}", | |
| "object": "text_completion", | |
| "created": int(time.time()), | |
| "model": model, | |
| "choices": [ | |
| { | |
| "text": completion_text, | |
| "index": 0, | |
| "logprobs": None, | |
| "finish_reason": "stop" | |
| } | |
| ], | |
| "usage": { | |
| "prompt_tokens": len(prompt.split()), | |
| "completion_tokens": len(completion_text.split()), | |
| "total_tokens": len(prompt.split()) + len(completion_text.split()) | |
| } | |
| } | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def chat(request: Request): | |
| try: | |
| data = await request.json() | |
| message = data.get("message", "") | |
| if not message: | |
| return JSONResponse({"error": "Message is required"}, status_code=400) | |
| # Simple prompt for Qwen | |
| full_message = f"You are AJ, a helpful AI assistant by AJ STUDIOZ.\n\nUser: {message}\n\nAssistant:" | |
| response = query_ollama_model(full_message, 500, 0.7) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| reply = result[0].get('generated_text', '') | |
| else: | |
| reply = result.get('generated_text', '') | |
| return JSONResponse({ | |
| "reply": reply, | |
| "model": "AJ-Mini v1.0", | |
| "provider": "AJ STUDIOZ" | |
| }) | |
| else: | |
| return JSONResponse( | |
| {"error": "Model error", "details": response.text}, | |
| status_code=500 | |
| ) | |
| except Exception as e: | |
| return JSONResponse( | |
| {"error": "Failed to process request", "details": str(e)}, | |
| status_code=500 | |
| ) | |
| async def generate(request: Request): | |
| """Direct API for text generation""" | |
| try: | |
| data = await request.json() | |
| prompt = data.get("prompt", "") | |
| max_tokens = data.get("max_tokens", 1000) | |
| temperature = data.get("temperature", 0.7) | |
| if not prompt: | |
| return JSONResponse({"error": "Prompt is required"}, status_code=400) | |
| response = query_ollama_model(prompt, max_tokens, temperature) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| response_text = result[0].get('generated_text', '') | |
| else: | |
| response_text = result.get('generated_text', '') | |
| return JSONResponse({ | |
| "response": response_text, | |
| "model": "AJ-Mini v1.0", | |
| "done": True | |
| }) | |
| else: | |
| return JSONResponse( | |
| {"error": "Model error", "details": response.text}, | |
| status_code=500 | |
| ) | |
| except Exception as e: | |
| return JSONResponse( | |
| {"error": "Failed to process request", "details": str(e)}, | |
| status_code=500 | |
| ) | |
| async def health(): | |
| """Fast health check endpoint - no model query""" | |
| return { | |
| "status": "healthy", | |
| "service": "AJ STUDIOZ Mini API", | |
| "model": "AJ-Mini v1.0", | |
| "version": "1.0", | |
| "developer": "AJ STUDIOZ", | |
| "platform": "HuggingFace Spaces (CPU)", | |
| "availability": "Unlimited FREE", | |
| "timestamp": get_ist_time().strftime("%Y-%m-%d %H:%M:%S IST"), | |
| "note": "Use POST /v1/chat/completions for inference" | |
| } | |
| async def quick_test(): | |
| """Ultra-fast test endpoint for ReqBin - responds in < 200ms""" | |
| return { | |
| "status": "ok", | |
| "message": "AJ-Mini v1.0 is operational", | |
| "model": "aj-mini", | |
| "latency": "< 200ms", | |
| "endpoint": "POST /v1/chat/completions", | |
| "example": { | |
| "model": "aj-mini", | |
| "messages": [{"role": "user", "content": "Hello"}] | |
| }, | |
| "developer": "AJ STUDIOZ", | |
| "availability": "UNLIMITED FREE", | |
| "timestamp": get_ist_time().strftime("%Y-%m-%d %H:%M:%S IST") | |
| } | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |