Spaces:

bibibi12345
/

ve2gem

Paused

App Files Files Community

bibibi12345 commited on Aug 26

Commit

9124b31

0 Parent(s):

initialized

Browse files

Files changed (4) hide show

Dockerfile +20 -0
README.md +30 -0
main.py +106 -0
requirements.txt +4 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the dependencies file to the working directory
+COPY requirements.txt .
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the main application file to the working directory
+COPY main.py .
+# Make port 7860 available to the world outside this container
+EXPOSE 7860
+# Run app.py when the container launches
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,30 @@

+---
+title: Vertex to Gemini Proxy
+emoji: 🚀
+colorFrom: blue
+colorTo: green
+sdk: docker
+app_port: 7860
+secrets:
+  - PROXY_API_KEY
+  - VERTEX_EXPRESS_KEYS
+---
+# Vertex to Gemini Proxy
+This Hugging Face Space hosts a FastAPI application that acts as a proxy between a Vertex AI Express endpoint and the Gemini API.
+## Features
+- **Authentication**: Protects the proxy with an API key.
+- **Key Rotation**: Rotates through a list of Vertex Express keys.
+- **Project ID Extraction**: Automatically determines the Google Cloud Project ID from the Vertex Express key.
+- **Dynamic Proxy**: Forwards requests to the appropriate Gemini model and function.
+- **Streaming Support**: Handles streaming responses from the Gemini API.
+- **Model-Specific Logic**: Modifies request bodies for specific models as needed.
+## Usage
+1.  Set the `PROXY_API_KEY` and `VERTEX_EXPRESS_KEYS` secrets in your Hugging Face Space settings.
+2.  Make requests to the Space URL, following the Gemini API format.
+3.  Provide the `PROXY_API_KEY` in the `x-goog-api-key` header or as a `key` query parameter.

main.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import os
+import re
+import httpx
+from fastapi import FastAPI, Request, HTTPException, Security
+from fastapi.responses import StreamingResponse
+from fastapi.security import APIKeyHeader, APIKeyQuery
+from itertools import cycle
+import asyncio
+# --- Configuration ---
+PROXY_API_KEY = os.environ.get("PROXY_API_KEY")
+VERTEX_EXPRESS_KEYS_STR = os.environ.get("VERTEX_EXPRESS_KEYS")
+VERTEX_EXPRESS_KEYS = [key.strip() for key in VERTEX_EXPRESS_KEYS_STR.split(',')] if VERTEX_EXPRESS_KEYS_STR else []
+if not VERTEX_EXPRESS_KEYS:
+    raise ValueError("VERTEX_EXPRESS_KEYS environment variable not set or empty.")
+# --- Globals ---
+app = FastAPI()
+project_id_cache = {}
+key_rotator = cycle(VERTEX_EXPRESS_KEYS)
+key_lock = asyncio.Lock()
+# --- API Key Security ---
+api_key_query = APIKeyQuery(name="key", auto_error=False)
+api_key_header = APIKeyHeader(name="x-goog-api-key", auto_error=False)
+async def get_api_key(
+    key_query: str = Security(api_key_query),
+    key_header: str = Security(api_key_header),
+):
+    if PROXY_API_KEY:
+        if key_query == PROXY_API_KEY:
+            return key_query
+        if key_header == PROXY_API_KEY:
+            return key_header
+        raise HTTPException(status_code=401, detail="Invalid or missing API Key")
+    else:
+        # If no PROXY_API_KEY is set, authentication is skipped
+        return None
+# --- Project ID Extraction ---
+async def get_project_id(key: str):
+    if key in project_id_cache:
+        return project_id_cache[key]
+    url = f"https://aiplatform.googleapis.com/v1/publishers/google/models/gemini-2.6-pro:generateContent?key={key}"
+    headers = {'Content-Type': 'application/json'}
+    data = '{}'
+    async with httpx.AsyncClient() as client:
+        try:
+            response = await client.post(url, headers=headers, data=data)
+            response.raise_for_status()
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 404:
+                error_message = e.response.json().get("error", {}).get("message", "")
+                match = re.search(r"projects/([^/]+)/locations/", error_message)
+                if match:
+                    project_id = match.group(1)
+                    project_id_cache[key] = project_id
+                    return project_id
+            raise HTTPException(status_code=500, detail=f"Failed to extract project ID: {e.response.text}")
+    raise HTTPException(status_code=500, detail="Could not extract project ID from any key.")
+# --- Proxy Endpoint ---
+@app.post("/v1beta/models/{model_path:path}")
+async def proxy(request: Request, model_path: str, api_key: str = Security(get_api_key)):
+    async with key_lock:
+        express_key = next(key_rotator)
+    project_id = await get_project_id(express_key)
+    request_body = await request.json()
+    # Model-specific request body modification
+    if "gemini-2.5-flash-image-preview" in model_path:
+        if "generationConfig" in request_body and "thinkingConfig" in request_body.get("generationConfig", {}):
+            del request_body["generationConfig"]["thinkingConfig"]
+    target_url = f"https://aiplatform.googleapis.com/v1/projects/{project_id}/locations/us-central1/publishers/google/models/{model_path}?key={express_key}"
+    async def stream_response(response):
+        async for chunk in response.aiter_bytes():
+            yield chunk
+    async with httpx.AsyncClient(timeout=None) as client:
+        req = client.build_request(
+            method=request.method,
+            url=target_url,
+            headers={k: v for k, v in request.headers.items() if k.lower() not in ['host', 'authorization', 'x-goog-api-key']},
+            json=request_body,
+        )
+        response = await client.send(req, stream=True)
+    if "streamGenerateContent" in model_path:
+        return StreamingResponse(stream_response(response), media_type=response.headers.get("content-type"))
+    else:
+        response_data = await response.aread()
+        return response_data
+if __name__ == "__main__":
+    import uvicorn
+    # Hugging Face Spaces run on port 7860
+    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+fastapi
+uvicorn
+httpx
+python-dotenv