Spaces:
Paused
Paused
Commit
·
9124b31
0
Parent(s):
initialized
Browse files- Dockerfile +20 -0
- README.md +30 -0
- main.py +106 -0
- requirements.txt +4 -0
Dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use an official Python runtime as a parent image
|
| 2 |
+
FROM python:3.9-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory in the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy the dependencies file to the working directory
|
| 8 |
+
COPY requirements.txt .
|
| 9 |
+
|
| 10 |
+
# Install any needed packages specified in requirements.txt
|
| 11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
+
|
| 13 |
+
# Copy the main application file to the working directory
|
| 14 |
+
COPY main.py .
|
| 15 |
+
|
| 16 |
+
# Make port 7860 available to the world outside this container
|
| 17 |
+
EXPOSE 7860
|
| 18 |
+
|
| 19 |
+
# Run app.py when the container launches
|
| 20 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Vertex to Gemini Proxy
|
| 3 |
+
emoji: 🚀
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
secrets:
|
| 9 |
+
- PROXY_API_KEY
|
| 10 |
+
- VERTEX_EXPRESS_KEYS
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# Vertex to Gemini Proxy
|
| 14 |
+
|
| 15 |
+
This Hugging Face Space hosts a FastAPI application that acts as a proxy between a Vertex AI Express endpoint and the Gemini API.
|
| 16 |
+
|
| 17 |
+
## Features
|
| 18 |
+
|
| 19 |
+
- **Authentication**: Protects the proxy with an API key.
|
| 20 |
+
- **Key Rotation**: Rotates through a list of Vertex Express keys.
|
| 21 |
+
- **Project ID Extraction**: Automatically determines the Google Cloud Project ID from the Vertex Express key.
|
| 22 |
+
- **Dynamic Proxy**: Forwards requests to the appropriate Gemini model and function.
|
| 23 |
+
- **Streaming Support**: Handles streaming responses from the Gemini API.
|
| 24 |
+
- **Model-Specific Logic**: Modifies request bodies for specific models as needed.
|
| 25 |
+
|
| 26 |
+
## Usage
|
| 27 |
+
|
| 28 |
+
1. Set the `PROXY_API_KEY` and `VERTEX_EXPRESS_KEYS` secrets in your Hugging Face Space settings.
|
| 29 |
+
2. Make requests to the Space URL, following the Gemini API format.
|
| 30 |
+
3. Provide the `PROXY_API_KEY` in the `x-goog-api-key` header or as a `key` query parameter.
|
main.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import httpx
|
| 4 |
+
from fastapi import FastAPI, Request, HTTPException, Security
|
| 5 |
+
from fastapi.responses import StreamingResponse
|
| 6 |
+
from fastapi.security import APIKeyHeader, APIKeyQuery
|
| 7 |
+
from itertools import cycle
|
| 8 |
+
import asyncio
|
| 9 |
+
|
| 10 |
+
# --- Configuration ---
|
| 11 |
+
PROXY_API_KEY = os.environ.get("PROXY_API_KEY")
|
| 12 |
+
VERTEX_EXPRESS_KEYS_STR = os.environ.get("VERTEX_EXPRESS_KEYS")
|
| 13 |
+
VERTEX_EXPRESS_KEYS = [key.strip() for key in VERTEX_EXPRESS_KEYS_STR.split(',')] if VERTEX_EXPRESS_KEYS_STR else []
|
| 14 |
+
|
| 15 |
+
if not VERTEX_EXPRESS_KEYS:
|
| 16 |
+
raise ValueError("VERTEX_EXPRESS_KEYS environment variable not set or empty.")
|
| 17 |
+
|
| 18 |
+
# --- Globals ---
|
| 19 |
+
app = FastAPI()
|
| 20 |
+
project_id_cache = {}
|
| 21 |
+
key_rotator = cycle(VERTEX_EXPRESS_KEYS)
|
| 22 |
+
key_lock = asyncio.Lock()
|
| 23 |
+
|
| 24 |
+
# --- API Key Security ---
|
| 25 |
+
api_key_query = APIKeyQuery(name="key", auto_error=False)
|
| 26 |
+
api_key_header = APIKeyHeader(name="x-goog-api-key", auto_error=False)
|
| 27 |
+
|
| 28 |
+
async def get_api_key(
|
| 29 |
+
key_query: str = Security(api_key_query),
|
| 30 |
+
key_header: str = Security(api_key_header),
|
| 31 |
+
):
|
| 32 |
+
if PROXY_API_KEY:
|
| 33 |
+
if key_query == PROXY_API_KEY:
|
| 34 |
+
return key_query
|
| 35 |
+
if key_header == PROXY_API_KEY:
|
| 36 |
+
return key_header
|
| 37 |
+
raise HTTPException(status_code=401, detail="Invalid or missing API Key")
|
| 38 |
+
else:
|
| 39 |
+
# If no PROXY_API_KEY is set, authentication is skipped
|
| 40 |
+
return None
|
| 41 |
+
|
| 42 |
+
# --- Project ID Extraction ---
|
| 43 |
+
async def get_project_id(key: str):
|
| 44 |
+
if key in project_id_cache:
|
| 45 |
+
return project_id_cache[key]
|
| 46 |
+
|
| 47 |
+
url = f"https://aiplatform.googleapis.com/v1/publishers/google/models/gemini-2.6-pro:generateContent?key={key}"
|
| 48 |
+
headers = {'Content-Type': 'application/json'}
|
| 49 |
+
data = '{}'
|
| 50 |
+
|
| 51 |
+
async with httpx.AsyncClient() as client:
|
| 52 |
+
try:
|
| 53 |
+
response = await client.post(url, headers=headers, data=data)
|
| 54 |
+
response.raise_for_status()
|
| 55 |
+
except httpx.HTTPStatusError as e:
|
| 56 |
+
if e.response.status_code == 404:
|
| 57 |
+
error_message = e.response.json().get("error", {}).get("message", "")
|
| 58 |
+
match = re.search(r"projects/([^/]+)/locations/", error_message)
|
| 59 |
+
if match:
|
| 60 |
+
project_id = match.group(1)
|
| 61 |
+
project_id_cache[key] = project_id
|
| 62 |
+
return project_id
|
| 63 |
+
raise HTTPException(status_code=500, detail=f"Failed to extract project ID: {e.response.text}")
|
| 64 |
+
|
| 65 |
+
raise HTTPException(status_code=500, detail="Could not extract project ID from any key.")
|
| 66 |
+
|
| 67 |
+
# --- Proxy Endpoint ---
|
| 68 |
+
@app.post("/v1beta/models/{model_path:path}")
|
| 69 |
+
async def proxy(request: Request, model_path: str, api_key: str = Security(get_api_key)):
|
| 70 |
+
async with key_lock:
|
| 71 |
+
express_key = next(key_rotator)
|
| 72 |
+
|
| 73 |
+
project_id = await get_project_id(express_key)
|
| 74 |
+
|
| 75 |
+
request_body = await request.json()
|
| 76 |
+
|
| 77 |
+
# Model-specific request body modification
|
| 78 |
+
if "gemini-2.5-flash-image-preview" in model_path:
|
| 79 |
+
if "generationConfig" in request_body and "thinkingConfig" in request_body.get("generationConfig", {}):
|
| 80 |
+
del request_body["generationConfig"]["thinkingConfig"]
|
| 81 |
+
|
| 82 |
+
target_url = f"https://aiplatform.googleapis.com/v1/projects/{project_id}/locations/us-central1/publishers/google/models/{model_path}?key={express_key}"
|
| 83 |
+
|
| 84 |
+
async def stream_response(response):
|
| 85 |
+
async for chunk in response.aiter_bytes():
|
| 86 |
+
yield chunk
|
| 87 |
+
|
| 88 |
+
async with httpx.AsyncClient(timeout=None) as client:
|
| 89 |
+
req = client.build_request(
|
| 90 |
+
method=request.method,
|
| 91 |
+
url=target_url,
|
| 92 |
+
headers={k: v for k, v in request.headers.items() if k.lower() not in ['host', 'authorization', 'x-goog-api-key']},
|
| 93 |
+
json=request_body,
|
| 94 |
+
)
|
| 95 |
+
response = await client.send(req, stream=True)
|
| 96 |
+
|
| 97 |
+
if "streamGenerateContent" in model_path:
|
| 98 |
+
return StreamingResponse(stream_response(response), media_type=response.headers.get("content-type"))
|
| 99 |
+
else:
|
| 100 |
+
response_data = await response.aread()
|
| 101 |
+
return response_data
|
| 102 |
+
|
| 103 |
+
if __name__ == "__main__":
|
| 104 |
+
import uvicorn
|
| 105 |
+
# Hugging Face Spaces run on port 7860
|
| 106 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
httpx
|
| 4 |
+
python-dotenv
|