Spaces:

samwell
/

medrax2

Sleeping

App Files Files Community

Emily Xie commited on Aug 11

Commit

b2aba7d

1 Parent(s): c34de72

MedGemma fixes

Browse files

Files changed (4) hide show

main.py +8 -4
medrax/tools/vqa/medgemma/medgemma.py +37 -7
medrax/tools/vqa/medgemma/medgemma_client.py +12 -4
medrax/tools/vqa/medgemma/medgemma_setup.py +23 -2

main.py CHANGED Viewed

@@ -91,7 +91,7 @@ def initialize_agent(
         "MedSAM2Tool": lambda: MedSAM2Tool(
             device=device, cache_dir=model_dir, temp_dir=temp_dir
         ),
-        "MedGemmaVQATool": lambda: MedGemmaAPIClientTool(cache_dir=model_dir, device=device, api_url=MEDGEMMA_API_URL)
     }
     # Initialize only selected tools or all if none specified
@@ -184,9 +184,13 @@ if __name__ == "__main__":
         # "PythonSandboxTool",  # Add the Python sandbox tool
     ]
     # Setup the MedGemma environment if the MedGemmaVQATool is selected
     if "MedGemmaVQATool" in selected_tools:
-        setup_medgemma_env()
     # Configure the Retrieval Augmented Generation (RAG) system
     # This allows the agent to access and use medical knowledge documents
@@ -210,9 +214,9 @@ if __name__ == "__main__":
     agent, tools_dict = initialize_agent(
         prompt_file="medrax/docs/system_prompts.txt",
         tools_to_use=selected_tools,
-        model_dir="/model-weights",
         temp_dir="temp2",  # Change this to the path of the temporary directory
-        device="cuda:0",
         model="gpt-5",  # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro, gpt-5
         temperature=1.0,
         model_kwargs=model_kwargs,

         "MedSAM2Tool": lambda: MedSAM2Tool(
             device=device, cache_dir=model_dir, temp_dir=temp_dir
         ),
+        "MedGemmaVQATool": lambda: MedGemmaAPIClientTool(cache_dir=model_dir, device=device, load_in_8bit=True, api_url=MEDGEMMA_API_URL)
     }
     # Initialize only selected tools or all if none specified
         # "PythonSandboxTool",  # Add the Python sandbox tool
     ]
+    # Share a single cache directory and device across tools
+    shared_model_dir = os.getenv("MODEL_WEIGHTS_DIR", "/model-weights")
+    shared_device = os.getenv("MEDRAX_DEVICE", "cuda:0")
     # Setup the MedGemma environment if the MedGemmaVQATool is selected
     if "MedGemmaVQATool" in selected_tools:
+        setup_medgemma_env(cache_dir=shared_model_dir, device=shared_device)
     # Configure the Retrieval Augmented Generation (RAG) system
     # This allows the agent to access and use medical knowledge documents
     agent, tools_dict = initialize_agent(
         prompt_file="medrax/docs/system_prompts.txt",
         tools_to_use=selected_tools,
+        model_dir=shared_model_dir,
         temp_dir="temp2",  # Change this to the path of the temporary directory
+        device=shared_device,
         model="gpt-5",  # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro, gpt-5
         temperature=1.0,
         model_kwargs=model_kwargs,

medrax/tools/vqa/medgemma/medgemma.py CHANGED Viewed

@@ -98,7 +98,7 @@ class MedGemmaModel:
         device: Optional[str] = "cuda",
         dtype: torch.dtype = torch.bfloat16,
         cache_dir: Optional[str] = None,
-        load_in_4bit: bool = True,
         **kwargs: Any,
     ) -> None:
         """Initialize the MedGemmaModel.
@@ -108,7 +108,7 @@ class MedGemmaModel:
             device: Device to run model on - "cuda" or "cpu" (default: "cuda")
             dtype: Data type for model weights - bfloat16 recommended for efficiency (default: torch.bfloat16)
             cache_dir: Directory to cache downloaded models (default: None)
-            load_in_4bit: Whether to load model in 4-bit quantization for memory efficiency (default: True)
             **kwargs: Additional arguments passed to the model pipeline
         Raises:
@@ -138,8 +138,8 @@ class MedGemmaModel:
             "use_cache": True,
         }
-        if load_in_4bit:
-            model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_4bit=True)
         model_kwargs["device_map"] = {"": self.device}
         try:
@@ -288,6 +288,7 @@ app = FastAPI(
 )
 medgemma_model: Optional[MedGemmaModel] = None
 @app.on_event("startup")
 async def startup_event():
@@ -306,7 +307,32 @@ async def startup_event():
     """
     global medgemma_model
     try:
-        medgemma_model = MedGemmaModel()
         print("MedGemma model loaded successfully.")
     except RuntimeError as e:
         print(f"Error loading MedGemma model: {e}")
@@ -379,8 +405,12 @@ async def analyze_images(
             raise HTTPException(status_code=500, detail=f"Failed to save uploaded image: {str(e)}")
     try:
-        # Generate AI analysis
-        response_text = await medgemma_model.aget_response(image_paths, prompt, system_prompt, max_new_tokens)
         # Prepare success response
         metadata = {

         device: Optional[str] = "cuda",
         dtype: torch.dtype = torch.bfloat16,
         cache_dir: Optional[str] = None,
+        load_in_8bit: bool = True,
         **kwargs: Any,
     ) -> None:
         """Initialize the MedGemmaModel.
             device: Device to run model on - "cuda" or "cpu" (default: "cuda")
             dtype: Data type for model weights - bfloat16 recommended for efficiency (default: torch.bfloat16)
             cache_dir: Directory to cache downloaded models (default: None)
+            load_in_8bit: Whether to load model in 4-bit quantization for memory efficiency (default: True)
             **kwargs: Additional arguments passed to the model pipeline
         Raises:
             "use_cache": True,
         }
+        if load_in_8bit:
+            model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)
         model_kwargs["device_map"] = {"": self.device}
         try:
 )
 medgemma_model: Optional[MedGemmaModel] = None
+inference_semaphore: Optional[asyncio.Semaphore] = None
 @app.on_event("startup")
 async def startup_event():
     """
     global medgemma_model
     try:
+        # Allow overriding Hugging Face cache directory and device via env vars
+        cache_dir_env = os.getenv("MEDGEMMA_CACHE_DIR")
+        device_env = os.getenv("MEDGEMMA_DEVICE")
+        max_concurrency_env = os.getenv("MEDGEMMA_MAX_CONCURRENCY", "1")
+        # Ensure the cache directory is writable; if not, fall back to a user cache
+        if cache_dir_env:
+            try:
+                os.makedirs(cache_dir_env, exist_ok=True)
+                if not os.access(cache_dir_env, os.W_OK):
+                    raise PermissionError("Cache dir not writable")
+            except Exception:
+                fallback = os.path.join(Path.home(), ".cache", "medrax", "medgemma")
+                os.makedirs(fallback, exist_ok=True)
+                print(f"Warning: MEDGEMMA_CACHE_DIR '{cache_dir_env}' not writable. Falling back to '{fallback}'.")
+                cache_dir_env = fallback
+        medgemma_model = MedGemmaModel(cache_dir=cache_dir_env, device=device_env)
+        # Initialize concurrency gate
+        try:
+            max_concurrency = int(max_concurrency_env)
+        except ValueError:
+            max_concurrency = 1
+        max_concurrency = max(1, max_concurrency)
+        global inference_semaphore
+        inference_semaphore = asyncio.Semaphore(max_concurrency)
         print("MedGemma model loaded successfully.")
     except RuntimeError as e:
         print(f"Error loading MedGemma model: {e}")
             raise HTTPException(status_code=500, detail=f"Failed to save uploaded image: {str(e)}")
     try:
+        # Generate AI analysis with concurrency gating to avoid GPU contention timeouts
+        global inference_semaphore
+        if inference_semaphore is None:
+            inference_semaphore = asyncio.Semaphore(1)
+        async with inference_semaphore:
+            response_text = await medgemma_model.aget_response(image_paths, prompt, system_prompt, max_new_tokens)
         # Prepare success response
         metadata = {

medrax/tools/vqa/medgemma/medgemma_client.py CHANGED Viewed

@@ -59,15 +59,21 @@ class MedGemmaAPIClientTool(BaseTool):
     # API configuration
     api_url: str  # The URL of the running FastAPI service
-    def __init__(self, api_url: str, **kwargs: Any):
         """Initialize the MedGemmaAPIClientTool.
         Args:
             api_url: The URL of the running MedGemma FastAPI service
             **kwargs: Additional arguments passed to BaseTool
         """
-        super().__init__(api_url=api_url, **kwargs)
     def _prepare_request_data(
         self, image_paths: List[str], prompt: str, system_prompt: str, max_new_tokens: int
@@ -149,7 +155,8 @@ class MedGemmaAPIClientTool(BaseTool):
             Tuple of output dictionary and metadata
         """
         # httpx is a modern HTTP client that supports sync and async
-        timeout_config = httpx.Timeout(300.0, connect=10.0)
         client = httpx.Client(timeout=timeout_config)
         try:
@@ -233,11 +240,12 @@ class MedGemmaAPIClientTool(BaseTool):
                     image_paths, prompt, system_prompt, max_new_tokens
                 )
                 response = await client.post(
                     f"{self.api_url}/analyze-images/",
                     data=data,
                     files=files_to_send,
-                    timeout=120.0
                 )
                 response.raise_for_status()

     # API configuration
     api_url: str  # The URL of the running FastAPI service
+    cache_dir: Optional[str] = None # Not used by the client directly, but accepted to keep a uniform constructor
+    device: Optional[str] = None
+    def __init__(self, api_url: str, cache_dir: Optional[str] = None, device: Optional[str] = None, timeout_seconds: Optional[float] = None, **kwargs: Any):
         """Initialize the MedGemmaAPIClientTool.
         Args:
             api_url: The URL of the running MedGemma FastAPI service
+            cache_dir: Optional local cache directory for model weights (accepted for interface consistency)
+            device: Optional device spec (accepted for interface consistency)
+            timeout_seconds: Optional request timeout override (seconds)
             **kwargs: Additional arguments passed to BaseTool
         """
+        super().__init__(api_url=api_url, cache_dir=cache_dir, device=device, **kwargs)
+        self._timeout_seconds = timeout_seconds
     def _prepare_request_data(
         self, image_paths: List[str], prompt: str, system_prompt: str, max_new_tokens: int
             Tuple of output dictionary and metadata
         """
         # httpx is a modern HTTP client that supports sync and async
+        timeout_value = self._timeout_seconds if self._timeout_seconds is not None else 600.0
+        timeout_config = httpx.Timeout(timeout_value, connect=10.0)
         client = httpx.Client(timeout=timeout_config)
         try:
                     image_paths, prompt, system_prompt, max_new_tokens
                 )
+                timeout_value = self._timeout_seconds if self._timeout_seconds is not None else 600.0
                 response = await client.post(
                     f"{self.api_url}/analyze-images/",
                     data=data,
                     files=files_to_send,
+                    timeout=timeout_value
                 )
                 response.raise_for_status()

medrax/tools/vqa/medgemma/medgemma_setup.py CHANGED Viewed

@@ -3,7 +3,23 @@ from pathlib import Path
 import subprocess
 import venv
-def setup_medgemma_env():
     """Set up MedGemma virtual environment and launch the FastAPI service.
     This function performs the following steps:
@@ -55,10 +71,15 @@ def setup_medgemma_env():
     # Launch MedGemma FastAPI service
     print("Launching MedGemma FastAPI service...")
     subprocess.Popen([
         str(python_executable),
         str(medgemma_path)
-    ])
     # Note: stdout and stderr redirection commented out for debugging
     # stdout=subprocess.DEVNULL,
     # stderr=subprocess.DEVNULL,

 import subprocess
 import venv
+def _resolve_writable_cache_dir(preferred: str | None) -> str:
+    """Return a writable cache directory, falling back to user cache if needed."""
+    # Preferred path first
+    if preferred:
+        try:
+            os.makedirs(preferred, exist_ok=True)
+            if os.access(preferred, os.W_OK):
+                return preferred
+        except Exception:
+            pass
+    # Fallback path under user's home
+    fallback = os.path.join(Path.home(), ".cache", "medrax", "medgemma")
+    os.makedirs(fallback, exist_ok=True)
+    return fallback
+def setup_medgemma_env(cache_dir: str | None = None, device: str | None = None):
     """Set up MedGemma virtual environment and launch the FastAPI service.
     This function performs the following steps:
     # Launch MedGemma FastAPI service
     print("Launching MedGemma FastAPI service...")
+    env = os.environ.copy()
+    resolved_cache = _resolve_writable_cache_dir(cache_dir)
+    env["MEDGEMMA_CACHE_DIR"] = resolved_cache
+    if device:
+        env["MEDGEMMA_DEVICE"] = device
     subprocess.Popen([
         str(python_executable),
         str(medgemma_path)
+    ], env=env)
     # Note: stdout and stderr redirection commented out for debugging
     # stdout=subprocess.DEVNULL,
     # stderr=subprocess.DEVNULL,