Spaces:

samwell
/

medrax2

Sleeping

App Files Files Community

Adibvafa commited on Aug 12

Commit

9a2c640

1 Parent(s): f1b429f

Improve style

Browse files

Files changed (3) hide show

api.py +73 -78
interface.py +10 -23
main.py +50 -112

api.py CHANGED Viewed

@@ -32,12 +32,13 @@ from medrax.agent import Agent
 class QueryRequest(BaseModel):
     """
     Request model for text-only queries.
     Attributes:
         question (str): The question or query to ask the agent
         system_prompt (Optional[str]): Custom system prompt to override default
         thread_id (Optional[str]): Optional thread ID for conversation continuity
     """
     question: str = Field(..., description="The question or query to ask the agent")
     system_prompt: Optional[str] = Field(None, description="Custom system prompt to override default")
     thread_id: Optional[str] = Field(None, description="Optional thread ID for conversation continuity")
@@ -46,13 +47,14 @@ class QueryRequest(BaseModel):
 class QueryResponse(BaseModel):
     """
     Response model for API queries.
     Attributes:
         response (str): The agent's text response
         thread_id (str): The thread ID used for this conversation
         tools_used (List[str]): List of tools that were executed
         processing_time (float): Time taken to process the request in seconds
     """
     response: str = Field(..., description="The agent's text response")
     thread_id: str = Field(..., description="The thread ID used for this conversation")
     tools_used: List[str] = Field(..., description="List of tools that were executed")
@@ -62,15 +64,15 @@ class QueryResponse(BaseModel):
 class MedRAXAPI:
     """
     FastAPI application wrapper for the MedRAX agent.
     This class provides a clean interface for creating and managing the API endpoints
     while maintaining separation of concerns from the core agent functionality.
     """
     def __init__(self, agent: Agent, tools_dict: Dict[str, Any], temp_dir: str = "temp_api"):
         """
         Initialize the MedRAX API.
         Args:
             agent (Agent): The initialized MedRAX agent
             tools_dict (Dict[str, Any]): Dictionary of available tools
@@ -80,16 +82,16 @@ class MedRAXAPI:
         self.tools_dict = tools_dict
         self.temp_dir = Path(temp_dir)
         self.temp_dir.mkdir(exist_ok=True)
         # Create FastAPI app
         self.app = FastAPI(
             title="MedRAX API",
             description="Medical Reasoning Agent for Chest X-ray Analysis",
             version="2.0.0",
             docs_url="/docs",
-            redoc_url="/redoc"
         )
         # Add CORS middleware
         self.app.add_middleware(
             CORSMiddleware,
@@ -98,161 +100,154 @@ class MedRAXAPI:
             allow_methods=["*"],
             allow_headers=["*"],
         )
         # Register routes
         self._register_routes()
     def _register_routes(self):
         """Register all API routes."""
         @self.app.get("/health")
         async def health_check():
             """Health check endpoint."""
             return {"status": "healthy", "service": "MedRAX API"}
         @self.app.get("/tools")
         async def list_tools():
             """List available tools."""
-            return {
-                "available_tools": list(self.tools_dict.keys()),
-                "total_count": len(self.tools_dict)
-            }
         @self.app.post("/query", response_model=QueryResponse)
         async def query_text_only(request: QueryRequest):
             """
             Process a text-only query without images.
             Args:
                 request (QueryRequest): The query request
             Returns:
                 QueryResponse: The agent's response
             """
             return await self._process_query(
-                question=request.question,
-                system_prompt=request.system_prompt,
-                thread_id=request.thread_id,
-                images=None
             )
         @self.app.post("/query-with-images", response_model=QueryResponse)
         async def query_with_images(
             question: str = Form(..., description="The question or query to ask the agent"),
             system_prompt: Optional[str] = Form(None, description="Custom system prompt to override default"),
             thread_id: Optional[str] = Form(None, description="Optional thread ID for conversation continuity"),
-            images: List[UploadFile] = File(..., description="One or more medical images to analyze")
         ):
             """
             Process a query with one or more images.
             Args:
                 question (str): The question or query to ask the agent
                 system_prompt (Optional[str]): Custom system prompt to override default
                 thread_id (Optional[str]): Optional thread ID for conversation continuity
                 images (List[UploadFile]): List of uploaded image files
             Returns:
                 QueryResponse: The agent's response
             """
             # Validate image files
             if not images or len(images) == 0:
                 raise HTTPException(status_code=400, detail="At least one image is required")
             # Validate file types
-            allowed_types = {'image/jpeg', 'image/jpg', 'image/png', 'image/bmp', 'image/tiff', 'application/dicom'}
             for image in images:
                 if image.content_type not in allowed_types:
                     raise HTTPException(
-                        status_code=400,
-                        detail=f"Unsupported file type: {image.content_type}. Allowed types: {allowed_types}"
                     )
             return await self._process_query(
-                question=question,
-                system_prompt=system_prompt,
-                thread_id=thread_id,
-                images=images
             )
     async def _process_query(
         self,
         question: str,
         system_prompt: Optional[str] = None,
         thread_id: Optional[str] = None,
-        images: Optional[List[UploadFile]] = None
     ) -> QueryResponse:
         """
         Internal method to process queries through the agent.
         Args:
             question (str): The question to ask
             system_prompt (Optional[str]): Custom system prompt
             thread_id (Optional[str]): Thread ID for conversation
             images (Optional[List[UploadFile]]): List of images
         Returns:
             QueryResponse: The processed response
         """
         start_time = time.time()
         # Generate thread ID if not provided
         if not thread_id:
             thread_id = str(uuid.uuid4())
         try:
             # Prepare messages
             messages = []
             image_paths = []
             # Handle image uploads
             if images:
                 for i, image in enumerate(images):
                     # Save uploaded file temporarily
                     temp_path = self.temp_dir / f"{thread_id}_{i}_{image.filename}"
                     with open(temp_path, "wb") as buffer:
                         content = await image.read()
                         buffer.write(content)
                     image_paths.append(str(temp_path))
                     # Add image path for tools
                     messages.append({"role": "user", "content": f"image_path: {temp_path}"})
                     # Add base64 encoded image for multimodal processing
                     image_base64 = base64.b64encode(content).decode("utf-8")
                     # Determine MIME type
                     mime_type = "image/jpeg"  # Default
                     if image.content_type:
                         mime_type = image.content_type
-                    elif temp_path.suffix.lower() in ['.png']:
                         mime_type = "image/png"
-                    messages.append({
-                        "role": "user",
-                        "content": [
-                            {
-                                "type": "image_url",
-                                "image_url": {"url": f"data:{mime_type};base64,{image_base64}"},
-                            }
-                        ],
-                    })
             # Add text question
             messages.append({"role": "user", "content": [{"type": "text", "text": question}]})
             # Process through agent workflow
             response_text = ""
             tools_used = []
             # Temporarily update system prompt if provided
             original_prompt = None
             if system_prompt:
                 original_prompt = self.agent.system_prompt
                 self.agent.system_prompt = system_prompt
             try:
                 async for chunk in self._stream_agent_response(messages, thread_id):
                     if chunk.get("type") == "text":
@@ -263,23 +258,23 @@ class MedRAXAPI:
                 # Restore original system prompt
                 if original_prompt is not None:
                     self.agent.system_prompt = original_prompt
             # Clean up temporary files
             for image_path in image_paths:
                 try:
                     Path(image_path).unlink(missing_ok=True)
                 except Exception:
                     pass  # Ignore cleanup errors
             processing_time = time.time() - start_time
             return QueryResponse(
                 response=response_text.strip(),
                 thread_id=thread_id,
                 tools_used=list(set(tools_used)),  # Remove duplicates
-                processing_time=processing_time
             )
         except Exception as e:
             # Clean up on error
             for image_path in image_paths:
@@ -287,17 +282,17 @@ class MedRAXAPI:
                     Path(image_path).unlink(missing_ok=True)
                 except Exception:
                     pass
             raise HTTPException(status_code=500, detail=f"Error processing query: {str(e)}")
     async def _stream_agent_response(self, messages: List[Dict], thread_id: str):
         """
         Stream responses from the agent workflow.
         Args:
             messages (List[Dict]): Messages to process
             thread_id (str): Thread ID for the conversation
         Yields:
             Dict: Response chunks with type and content
         """
@@ -309,24 +304,24 @@ class MedRAXAPI:
             ):
                 if not isinstance(chunk, dict):
                     continue
                 for node_name, node_output in chunk.items():
                     if "messages" not in node_output:
                         continue
                     for msg in node_output["messages"]:
                         if isinstance(msg, AIMessage) and msg.content:
                             # Clean up temp paths from response
                             clean_content = re.sub(r"temp[^\s]*", "", msg.content).strip()
                             if clean_content:
                                 yield {"type": "text", "content": clean_content}
                         elif isinstance(msg, ToolMessage):
                             # Extract tool name from the message
                             tool_call_id = msg.tool_call_id
                             # We'll track tool usage but not include detailed output in API response
                             yield {"type": "tool", "tool_name": "tool_executed"}
         except Exception as e:
             yield {"type": "error", "content": str(e)}
@@ -334,12 +329,12 @@ class MedRAXAPI:
 def create_api(agent: Agent, tools_dict: Dict[str, Any], temp_dir: str = "temp_api") -> FastAPI:
     """
     Create and configure the MedRAX FastAPI application.
     Args:
         agent (Agent): The initialized MedRAX agent
         tools_dict (Dict[str, Any]): Dictionary of available tools
         temp_dir (str): Directory for temporary file storage
     Returns:
         FastAPI: Configured FastAPI application
     """

 class QueryRequest(BaseModel):
     """
     Request model for text-only queries.
     Attributes:
         question (str): The question or query to ask the agent
         system_prompt (Optional[str]): Custom system prompt to override default
         thread_id (Optional[str]): Optional thread ID for conversation continuity
     """
     question: str = Field(..., description="The question or query to ask the agent")
     system_prompt: Optional[str] = Field(None, description="Custom system prompt to override default")
     thread_id: Optional[str] = Field(None, description="Optional thread ID for conversation continuity")
 class QueryResponse(BaseModel):
     """
     Response model for API queries.
     Attributes:
         response (str): The agent's text response
         thread_id (str): The thread ID used for this conversation
         tools_used (List[str]): List of tools that were executed
         processing_time (float): Time taken to process the request in seconds
     """
     response: str = Field(..., description="The agent's text response")
     thread_id: str = Field(..., description="The thread ID used for this conversation")
     tools_used: List[str] = Field(..., description="List of tools that were executed")
 class MedRAXAPI:
     """
     FastAPI application wrapper for the MedRAX agent.
     This class provides a clean interface for creating and managing the API endpoints
     while maintaining separation of concerns from the core agent functionality.
     """
     def __init__(self, agent: Agent, tools_dict: Dict[str, Any], temp_dir: str = "temp_api"):
         """
         Initialize the MedRAX API.
         Args:
             agent (Agent): The initialized MedRAX agent
             tools_dict (Dict[str, Any]): Dictionary of available tools
         self.tools_dict = tools_dict
         self.temp_dir = Path(temp_dir)
         self.temp_dir.mkdir(exist_ok=True)
         # Create FastAPI app
         self.app = FastAPI(
             title="MedRAX API",
             description="Medical Reasoning Agent for Chest X-ray Analysis",
             version="2.0.0",
             docs_url="/docs",
+            redoc_url="/redoc",
         )
         # Add CORS middleware
         self.app.add_middleware(
             CORSMiddleware,
             allow_methods=["*"],
             allow_headers=["*"],
         )
         # Register routes
         self._register_routes()
     def _register_routes(self):
         """Register all API routes."""
         @self.app.get("/health")
         async def health_check():
             """Health check endpoint."""
             return {"status": "healthy", "service": "MedRAX API"}
         @self.app.get("/tools")
         async def list_tools():
             """List available tools."""
+            return {"available_tools": list(self.tools_dict.keys()), "total_count": len(self.tools_dict)}
         @self.app.post("/query", response_model=QueryResponse)
         async def query_text_only(request: QueryRequest):
             """
             Process a text-only query without images.
             Args:
                 request (QueryRequest): The query request
             Returns:
                 QueryResponse: The agent's response
             """
             return await self._process_query(
+                question=request.question, system_prompt=request.system_prompt, thread_id=request.thread_id, images=None
             )
         @self.app.post("/query-with-images", response_model=QueryResponse)
         async def query_with_images(
             question: str = Form(..., description="The question or query to ask the agent"),
             system_prompt: Optional[str] = Form(None, description="Custom system prompt to override default"),
             thread_id: Optional[str] = Form(None, description="Optional thread ID for conversation continuity"),
+            images: List[UploadFile] = File(..., description="One or more medical images to analyze"),
         ):
             """
             Process a query with one or more images.
             Args:
                 question (str): The question or query to ask the agent
                 system_prompt (Optional[str]): Custom system prompt to override default
                 thread_id (Optional[str]): Optional thread ID for conversation continuity
                 images (List[UploadFile]): List of uploaded image files
             Returns:
                 QueryResponse: The agent's response
             """
             # Validate image files
             if not images or len(images) == 0:
                 raise HTTPException(status_code=400, detail="At least one image is required")
             # Validate file types
+            allowed_types = {"image/jpeg", "image/jpg", "image/png", "image/bmp", "image/tiff", "application/dicom"}
             for image in images:
                 if image.content_type not in allowed_types:
                     raise HTTPException(
+                        status_code=400,
+                        detail=f"Unsupported file type: {image.content_type}. Allowed types: {allowed_types}",
                     )
             return await self._process_query(
+                question=question, system_prompt=system_prompt, thread_id=thread_id, images=images
             )
     async def _process_query(
         self,
         question: str,
         system_prompt: Optional[str] = None,
         thread_id: Optional[str] = None,
+        images: Optional[List[UploadFile]] = None,
     ) -> QueryResponse:
         """
         Internal method to process queries through the agent.
         Args:
             question (str): The question to ask
             system_prompt (Optional[str]): Custom system prompt
             thread_id (Optional[str]): Thread ID for conversation
             images (Optional[List[UploadFile]]): List of images
         Returns:
             QueryResponse: The processed response
         """
         start_time = time.time()
         # Generate thread ID if not provided
         if not thread_id:
             thread_id = str(uuid.uuid4())
         try:
             # Prepare messages
             messages = []
             image_paths = []
             # Handle image uploads
             if images:
                 for i, image in enumerate(images):
                     # Save uploaded file temporarily
                     temp_path = self.temp_dir / f"{thread_id}_{i}_{image.filename}"
                     with open(temp_path, "wb") as buffer:
                         content = await image.read()
                         buffer.write(content)
                     image_paths.append(str(temp_path))
                     # Add image path for tools
                     messages.append({"role": "user", "content": f"image_path: {temp_path}"})
                     # Add base64 encoded image for multimodal processing
                     image_base64 = base64.b64encode(content).decode("utf-8")
                     # Determine MIME type
                     mime_type = "image/jpeg"  # Default
                     if image.content_type:
                         mime_type = image.content_type
+                    elif temp_path.suffix.lower() in [".png"]:
                         mime_type = "image/png"
+                    messages.append(
+                        {
+                            "role": "user",
+                            "content": [
+                                {
+                                    "type": "image_url",
+                                    "image_url": {"url": f"data:{mime_type};base64,{image_base64}"},
+                                }
+                            ],
+                        }
+                    )
             # Add text question
             messages.append({"role": "user", "content": [{"type": "text", "text": question}]})
             # Process through agent workflow
             response_text = ""
             tools_used = []
             # Temporarily update system prompt if provided
             original_prompt = None
             if system_prompt:
                 original_prompt = self.agent.system_prompt
                 self.agent.system_prompt = system_prompt
             try:
                 async for chunk in self._stream_agent_response(messages, thread_id):
                     if chunk.get("type") == "text":
                 # Restore original system prompt
                 if original_prompt is not None:
                     self.agent.system_prompt = original_prompt
             # Clean up temporary files
             for image_path in image_paths:
                 try:
                     Path(image_path).unlink(missing_ok=True)
                 except Exception:
                     pass  # Ignore cleanup errors
             processing_time = time.time() - start_time
             return QueryResponse(
                 response=response_text.strip(),
                 thread_id=thread_id,
                 tools_used=list(set(tools_used)),  # Remove duplicates
+                processing_time=processing_time,
             )
         except Exception as e:
             # Clean up on error
             for image_path in image_paths:
                     Path(image_path).unlink(missing_ok=True)
                 except Exception:
                     pass
             raise HTTPException(status_code=500, detail=f"Error processing query: {str(e)}")
     async def _stream_agent_response(self, messages: List[Dict], thread_id: str):
         """
         Stream responses from the agent workflow.
         Args:
             messages (List[Dict]): Messages to process
             thread_id (str): Thread ID for the conversation
         Yields:
             Dict: Response chunks with type and content
         """
             ):
                 if not isinstance(chunk, dict):
                     continue
                 for node_name, node_output in chunk.items():
                     if "messages" not in node_output:
                         continue
                     for msg in node_output["messages"]:
                         if isinstance(msg, AIMessage) and msg.content:
                             # Clean up temp paths from response
                             clean_content = re.sub(r"temp[^\s]*", "", msg.content).strip()
                             if clean_content:
                                 yield {"type": "text", "content": clean_content}
                         elif isinstance(msg, ToolMessage):
                             # Extract tool name from the message
                             tool_call_id = msg.tool_call_id
                             # We'll track tool usage but not include detailed output in API response
                             yield {"type": "tool", "tool_name": "tool_executed"}
         except Exception as e:
             yield {"type": "error", "content": str(e)}
 def create_api(agent: Agent, tools_dict: Dict[str, Any], temp_dir: str = "temp_api") -> FastAPI:
     """
     Create and configure the MedRAX FastAPI application.
     Args:
         agent (Agent): The initialized MedRAX agent
         tools_dict (Dict[str, Any]): Dictionary of available tools
         temp_dir (str): Directory for temporary file storage
     Returns:
         FastAPI: Configured FastAPI application
     """

interface.py CHANGED Viewed

@@ -68,9 +68,7 @@ class ChatInterface:
         return self.display_file_path
-    def add_message(
-        self, message: str, display_image: str, history: List[dict]
-    ) -> Tuple[List[dict], gr.Textbox]:
         """
         Add a new message to the chat history.
@@ -155,9 +153,7 @@ class ChatInterface:
                         if isinstance(msg, AIMessageChunk) and msg.content:
                             accumulated_content += msg.content
                             if final_message is None:
-                                final_message = ChatMessage(
-                                    role="assistant", content=accumulated_content
-                                )
                                 chat_history.append(final_message)
                             else:
                                 final_message.content = accumulated_content
@@ -169,9 +165,7 @@ class ChatInterface:
                                 if final_message:
                                     final_message.content = final_content
                                 else:
-                                    chat_history.append(
-                                        ChatMessage(role="assistant", content=final_content)
-                                    )
                                 yield chat_history, self.display_file_path, ""
                             if msg.tool_calls:
@@ -204,7 +198,7 @@ class ChatInterface:
                                     except json.JSONDecodeError:
                                         result = msg.content
                                         tool_output_str = str(msg.content)
                                 # Display tool usage card
                                 tool_args_str = json.dumps(tool_args, indent=2)
                                 description = f"**Input:**\n```json\n{tool_args_str}\n```\n\n**Output:**\n```json\n{tool_output_str}\n```"
@@ -231,7 +225,7 @@ class ChatInterface:
                                             image_path = result[0]["image_path"]
                                         except (TypeError, KeyError, IndexError):
                                             pass
                                     if image_path:
                                         self.display_file_path = image_path
                                         chat_history.append(
@@ -240,16 +234,13 @@ class ChatInterface:
                                                 content={"path": self.display_file_path},
                                             )
                                         )
                                 # Yield a single update for this tool event
                                 yield chat_history, self.display_file_path, ""
         except Exception as e:
             chat_history.append(
-                ChatMessage(
-                    role="assistant", content=f"❌ Error: {str(e)}", metadata={"title": "Error"}
-                )
             )
             yield chat_history, self.display_file_path, ""
@@ -300,9 +291,7 @@ def create_demo(agent, tools_dict):
                             )
                 with gr.Column(scale=3):
-                    image_display = gr.Image(
-                        label="Image", type="filepath", height=600, container=True
-                    )
                     with gr.Row():
                         upload_button = gr.UploadButton(
                             "📎 Upload X-Ray",
@@ -325,9 +314,7 @@ def create_demo(agent, tools_dict):
         def handle_file_upload(file):
             return interface.handle_upload(file.name)
-        chat_msg = txt.submit(
-            interface.add_message, inputs=[txt, image_display, chatbot], outputs=[chatbot, txt]
-        )
         bot_msg = chat_msg.then(
             interface.process_message,
             inputs=[txt, image_display, chatbot],
@@ -341,4 +328,4 @@ def create_demo(agent, tools_dict):
         new_chat_btn.click(new_chat, outputs=[chatbot, image_display])
-    return demo

         return self.display_file_path
+    def add_message(self, message: str, display_image: str, history: List[dict]) -> Tuple[List[dict], gr.Textbox]:
         """
         Add a new message to the chat history.
                         if isinstance(msg, AIMessageChunk) and msg.content:
                             accumulated_content += msg.content
                             if final_message is None:
+                                final_message = ChatMessage(role="assistant", content=accumulated_content)
                                 chat_history.append(final_message)
                             else:
                                 final_message.content = accumulated_content
                                 if final_message:
                                     final_message.content = final_content
                                 else:
+                                    chat_history.append(ChatMessage(role="assistant", content=final_content))
                                 yield chat_history, self.display_file_path, ""
                             if msg.tool_calls:
                                     except json.JSONDecodeError:
                                         result = msg.content
                                         tool_output_str = str(msg.content)
                                 # Display tool usage card
                                 tool_args_str = json.dumps(tool_args, indent=2)
                                 description = f"**Input:**\n```json\n{tool_args_str}\n```\n\n**Output:**\n```json\n{tool_output_str}\n```"
                                             image_path = result[0]["image_path"]
                                         except (TypeError, KeyError, IndexError):
                                             pass
                                     if image_path:
                                         self.display_file_path = image_path
                                         chat_history.append(
                                                 content={"path": self.display_file_path},
                                             )
                                         )
                                 # Yield a single update for this tool event
                                 yield chat_history, self.display_file_path, ""
         except Exception as e:
             chat_history.append(
+                ChatMessage(role="assistant", content=f"❌ Error: {str(e)}", metadata={"title": "Error"})
             )
             yield chat_history, self.display_file_path, ""
                             )
                 with gr.Column(scale=3):
+                    image_display = gr.Image(label="Image", type="filepath", height=600, container=True)
                     with gr.Row():
                         upload_button = gr.UploadButton(
                             "📎 Upload X-Ray",
         def handle_file_upload(file):
             return interface.handle_upload(file.name)
+        chat_msg = txt.submit(interface.add_message, inputs=[txt, image_display, chatbot], outputs=[chatbot, txt])
         bot_msg = chat_msg.then(
             interface.process_message,
             inputs=[txt, image_display, chatbot],
         new_chat_btn.click(new_chat, outputs=[chatbot, image_display])
+    return demo

main.py CHANGED Viewed

@@ -76,9 +76,7 @@ def initialize_agent(
         "ChestXRaySegmentationTool": lambda: ChestXRaySegmentationTool(device=device),
         "LlavaMedTool": lambda: LlavaMedTool(cache_dir=model_dir, device=device, load_in_8bit=True),
         "CheXagentXRayVQATool": lambda: CheXagentXRayVQATool(cache_dir=model_dir, device=device),
-        "ChestXRayReportGeneratorTool": lambda: ChestXRayReportGeneratorTool(
-            cache_dir=model_dir, device=device
-        ),
         "XRayPhraseGroundingTool": lambda: XRayPhraseGroundingTool(
             cache_dir=model_dir, temp_dir=temp_dir, load_in_8bit=True, device=device
         ),
@@ -90,15 +88,13 @@ def initialize_agent(
         "MedicalRAGTool": lambda: RAGTool(config=rag_config),
         "WebBrowserTool": lambda: WebBrowserTool(),
         "DuckDuckGoSearchTool": lambda: DuckDuckGoSearchTool(),
-        "MedSAM2Tool": lambda: MedSAM2Tool(
-            device=device, cache_dir=model_dir, temp_dir=temp_dir
-        ),
         "MedGemmaVQATool": lambda: MedGemmaAPIClientTool(
             cache_dir=model_dir,
             device=device,
             load_in_8bit=True,
-            api_url=os.getenv("MEDGEMMA_API_URL", "http://0.0.0.0:8002")
-        )
     }
     # Initialize only selected tools or all if none specified
@@ -106,7 +102,7 @@ def initialize_agent(
     if tools_to_use is None:
         tools_to_use = []
     for tool_name in tools_to_use:
         if tool_name == "PythonSandboxTool":
             try:
@@ -116,16 +112,13 @@ def initialize_agent(
                 print("Skipping PythonSandboxTool")
         if tool_name in all_tools:
             tools_dict[tool_name] = all_tools[tool_name]()
     # Set up checkpointing for conversation state
     checkpointer = MemorySaver()
     # Create the language model using the factory
     try:
-        llm = ModelFactory.create_model(
-            model_name=model, temperature=temperature, **model_kwargs
-        )
     except ValueError as e:
         print(f"Error creating language model: {e}")
         print(f"Available model providers: {list(ModelFactory._model_providers.keys())}")
@@ -145,7 +138,7 @@ def initialize_agent(
 def run_gradio_interface(agent, tools_dict, host="0.0.0.0", port=8686):
     """
     Run the Gradio web interface.
     Args:
         agent: The initialized MedRAX agent
         tools_dict: Dictionary of available tools
@@ -160,7 +153,7 @@ def run_gradio_interface(agent, tools_dict, host="0.0.0.0", port=8686):
 def run_api_server(agent, tools_dict, host="0.0.0.0", port=8585, public=False):
     """
     Run the FastAPI server.
     Args:
         agent: The initialized MedRAX agent
         tools_dict: Dictionary of available tools
@@ -169,21 +162,23 @@ def run_api_server(agent, tools_dict, host="0.0.0.0", port=8585, public=False):
         public (bool): Whether to expose via ngrok tunnel
     """
     print(f"Starting API server on {host}:{port}")
     if public:
         try:
             public_tunnel = ngrok.connect(port)
             public_url = public_tunnel.public_url
-            print(f"🌍 Public URL: {public_url}\n🌍 API Documentation: {public_url}/docs\n🌍 Share this URL with your friend!\n{'=' * 60}")
         except ImportError:
             print("⚠️  pyngrok not installed. Install with: pip install pyngrok\nRunning locally only...")
             public = False
         except Exception as e:
             print(f"⚠️  Failed to create public tunnel: {e}\nRunning locally only...")
             public = False
     app = create_api(agent, tools_dict)
     try:
         uvicorn.run(app, host=host, port=port)
     finally:
@@ -198,121 +193,74 @@ def run_api_server(agent, tools_dict, host="0.0.0.0", port=8585, public=False):
 def parse_arguments():
     """Parse command line arguments."""
     parser = argparse.ArgumentParser(description="MedRAX - Medical Reasoning Agent for Chest X-ray")
     # Server configuration
     parser.add_argument(
-        "--mode",
-        choices=["gradio", "api", "both"],
         default="gradio",
-        help="Run mode: 'gradio' for web interface, 'api' for REST API, 'both' for both services"
     )
     parser.add_argument("--gradio-host", default="0.0.0.0", help="Gradio host address")
     parser.add_argument("--gradio-port", type=int, default=8686, help="Gradio port")
     parser.add_argument("--api-host", default="0.0.0.0", help="API host address")
     parser.add_argument("--api-port", type=int, default=8000, help="API port")
     parser.add_argument("--public", action="store_true", help="Make API publicly accessible via ngrok tunnel")
     # Model and system configuration
     parser.add_argument(
-        "--model-dir",
         default="/model-weights",
-        help="Directory containing model weights (default: uses MODEL_WEIGHTS_DIR env var or '/model-weights')"
     )
     parser.add_argument(
-        "--device",
-        default="cuda",
-        help="Device to run models on (default: uses MEDRAX_DEVICE env var or 'cuda:1')"
     )
     parser.add_argument(
-        "--model",
         default="gpt-4.1",
-        help="Model to use (default: gpt-4.1). Examples: gpt-4.1-2025-04-14, gemini-2.5-pro, gpt-5"
-    )
-    parser.add_argument(
-        "--temperature",
-        type=float,
-        default=1.0,
-        help="Temperature for the model (default: 1.0)"
-    )
-    parser.add_argument(
-        "--temp-dir",
-        default="temp2",
-        help="Directory for temporary files (default: temp2)"
     )
     parser.add_argument(
-        "--prompt-file",
         default="medrax/docs/system_prompts.txt",
-        help="Path to file containing system prompts (default: medrax/docs/system_prompts.txt)"
     )
     parser.add_argument(
-        "--system-prompt",
-        default="MEDICAL_ASSISTANT",
-        help="System prompt to use (default: MEDICAL_ASSISTANT)"
     )
     # RAG configuration
     parser.add_argument(
-        "--rag-model",
-        default="command-a-03-2025",
-        help="Chat model for RAG responses (default: command-a-03-2025)"
-    )
-    parser.add_argument(
-        "--rag-embedding-model",
-        default="embed-v4.0",
-        help="Embedding model for RAG system (default: embed-v4.0)"
-    )
-    parser.add_argument(
-        "--rag-rerank-model",
-        default="rerank-v3.5",
-        help="Reranking model for RAG system (default: rerank-v3.5)"
-    )
-    parser.add_argument(
-        "--rag-temperature",
-        type=float,
-        default=0.3,
-        help="Temperature for RAG model (default: 0.3)"
     )
     parser.add_argument(
-        "--pinecone-index",
-        default="medrax2",
-        help="Pinecone index name (default: medrax2)"
     )
     parser.add_argument(
-        "--chunk-size",
-        type=int,
-        default=1500,
-        help="RAG chunk size (default: 1500)"
     )
-    parser.add_argument(
-        "--chunk-overlap",
-        type=int,
-        default=300,
-        help="RAG chunk overlap (default: 300)"
-    )
-    parser.add_argument(
-        "--retriever-k",
-        type=int,
-        default=3,
-        help="Number of documents to retrieve (default: 3)"
-    )
-    parser.add_argument(
-        "--rag-docs-dir",
-        default="rag_docs",
-        help="Directory for RAG documents (default: rag_docs)"
-    )
     # Tools configuration
     parser.add_argument(
-        "--tools",
         nargs="*",
-        help="Specific tools to enable (if not provided, uses default set). Available tools: " +
-             "ImageVisualizerTool, DicomProcessorTool, MedSAM2Tool, ChestXRaySegmentationTool, " +
-             "ChestXRayGeneratorTool, TorchXRayVisionClassifierTool, ArcPlusClassifierTool, " +
-             "ChestXRayReportGeneratorTool, XRayPhraseGroundingTool, MedGemmaVQATool, " +
-             "XRayVQATool, LlavaMedTool, MedicalRAGTool, WebBrowserTool, DuckDuckGoSearchTool, " +
-             "PythonSandboxTool"
     )
     return parser.parse_args()
@@ -334,36 +282,27 @@ if __name__ == "__main__":
             # Image Processing Tools
             "ImageVisualizerTool",  # For displaying images in the UI
             # "DicomProcessorTool",  # For processing DICOM medical image files
             # Segmentation Tools
             "MedSAM2Tool",  # For advanced medical image segmentation using MedSAM2
             "ChestXRaySegmentationTool",  # For segmenting anatomical regions in chest X-rays
             # Generation Tools
             # "ChestXRayGeneratorTool",  # For generating synthetic chest X-rays
             # Classification Tools
             "TorchXRayVisionClassifierTool",  # For classifying chest X-ray images using TorchXRayVision
             "ArcPlusClassifierTool",  # For advanced chest X-ray classification using ArcPlus
             # Report Generation Tools
             "ChestXRayReportGeneratorTool",  # For generating medical reports from X-rays
             # Grounding Tools
             "XRayPhraseGroundingTool",  # For locating described features in X-rays
             # VQA Tools
             # "MedGemmaVQATool",  # Google MedGemma VQA tool
             "XRayVQATool",  # For visual question answering on X-rays
             # "LlavaMedTool",  # For multimodal medical image understanding
             # RAG Tools
             "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
             # Search Tools
             # "WebBrowserTool",  # For web browsing and search capabilities
             "DuckDuckGoSearchTool",  # For privacy-focused web search using DuckDuckGo
             # Development Tools
             # "PythonSandboxTool",  # Add the Python sandbox tool
         ]
@@ -424,11 +363,10 @@ if __name__ == "__main__":
     elif args.mode == "both":
         # Run both services in separate threads
         api_thread = threading.Thread(
-            target=run_api_server,
-            args=(agent, tools_dict, args.api_host, args.api_port, args.public)
         )
         api_thread.daemon = True
         api_thread.start()
         # Run Gradio in main thread
         run_gradio_interface(agent, tools_dict, args.gradio_host, args.gradio_port)

         "ChestXRaySegmentationTool": lambda: ChestXRaySegmentationTool(device=device),
         "LlavaMedTool": lambda: LlavaMedTool(cache_dir=model_dir, device=device, load_in_8bit=True),
         "CheXagentXRayVQATool": lambda: CheXagentXRayVQATool(cache_dir=model_dir, device=device),
+        "ChestXRayReportGeneratorTool": lambda: ChestXRayReportGeneratorTool(cache_dir=model_dir, device=device),
         "XRayPhraseGroundingTool": lambda: XRayPhraseGroundingTool(
             cache_dir=model_dir, temp_dir=temp_dir, load_in_8bit=True, device=device
         ),
         "MedicalRAGTool": lambda: RAGTool(config=rag_config),
         "WebBrowserTool": lambda: WebBrowserTool(),
         "DuckDuckGoSearchTool": lambda: DuckDuckGoSearchTool(),
+        "MedSAM2Tool": lambda: MedSAM2Tool(device=device, cache_dir=model_dir, temp_dir=temp_dir),
         "MedGemmaVQATool": lambda: MedGemmaAPIClientTool(
             cache_dir=model_dir,
             device=device,
             load_in_8bit=True,
+            api_url=os.getenv("MEDGEMMA_API_URL", "http://0.0.0.0:8002"),
+        ),
     }
     # Initialize only selected tools or all if none specified
     if tools_to_use is None:
         tools_to_use = []
     for tool_name in tools_to_use:
         if tool_name == "PythonSandboxTool":
             try:
                 print("Skipping PythonSandboxTool")
         if tool_name in all_tools:
             tools_dict[tool_name] = all_tools[tool_name]()
     # Set up checkpointing for conversation state
     checkpointer = MemorySaver()
     # Create the language model using the factory
     try:
+        llm = ModelFactory.create_model(model_name=model, temperature=temperature, **model_kwargs)
     except ValueError as e:
         print(f"Error creating language model: {e}")
         print(f"Available model providers: {list(ModelFactory._model_providers.keys())}")
 def run_gradio_interface(agent, tools_dict, host="0.0.0.0", port=8686):
     """
     Run the Gradio web interface.
     Args:
         agent: The initialized MedRAX agent
         tools_dict: Dictionary of available tools
 def run_api_server(agent, tools_dict, host="0.0.0.0", port=8585, public=False):
     """
     Run the FastAPI server.
     Args:
         agent: The initialized MedRAX agent
         tools_dict: Dictionary of available tools
         public (bool): Whether to expose via ngrok tunnel
     """
     print(f"Starting API server on {host}:{port}")
     if public:
         try:
             public_tunnel = ngrok.connect(port)
             public_url = public_tunnel.public_url
+            print(
+                f"🌍 Public URL: {public_url}\n🌍 API Documentation: {public_url}/docs\n🌍 Share this URL with your friend!\n{'=' * 60}"
+            )
         except ImportError:
             print("⚠️  pyngrok not installed. Install with: pip install pyngrok\nRunning locally only...")
             public = False
         except Exception as e:
             print(f"⚠️  Failed to create public tunnel: {e}\nRunning locally only...")
             public = False
     app = create_api(agent, tools_dict)
     try:
         uvicorn.run(app, host=host, port=port)
     finally:
 def parse_arguments():
     """Parse command line arguments."""
     parser = argparse.ArgumentParser(description="MedRAX - Medical Reasoning Agent for Chest X-ray")
     # Server configuration
     parser.add_argument(
+        "--mode",
+        choices=["gradio", "api", "both"],
         default="gradio",
+        help="Run mode: 'gradio' for web interface, 'api' for REST API, 'both' for both services",
     )
     parser.add_argument("--gradio-host", default="0.0.0.0", help="Gradio host address")
     parser.add_argument("--gradio-port", type=int, default=8686, help="Gradio port")
     parser.add_argument("--api-host", default="0.0.0.0", help="API host address")
     parser.add_argument("--api-port", type=int, default=8000, help="API port")
     parser.add_argument("--public", action="store_true", help="Make API publicly accessible via ngrok tunnel")
     # Model and system configuration
     parser.add_argument(
+        "--model-dir",
         default="/model-weights",
+        help="Directory containing model weights (default: uses MODEL_WEIGHTS_DIR env var or '/model-weights')",
     )
     parser.add_argument(
+        "--device", default="cuda", help="Device to run models on (default: uses MEDRAX_DEVICE env var or 'cuda:1')"
     )
     parser.add_argument(
+        "--model",
         default="gpt-4.1",
+        help="Model to use (default: gpt-4.1). Examples: gpt-4.1-2025-04-14, gemini-2.5-pro, gpt-5",
     )
+    parser.add_argument("--temperature", type=float, default=1.0, help="Temperature for the model (default: 1.0)")
+    parser.add_argument("--temp-dir", default="temp2", help="Directory for temporary files (default: temp2)")
     parser.add_argument(
+        "--prompt-file",
         default="medrax/docs/system_prompts.txt",
+        help="Path to file containing system prompts (default: medrax/docs/system_prompts.txt)",
     )
     parser.add_argument(
+        "--system-prompt", default="MEDICAL_ASSISTANT", help="System prompt to use (default: MEDICAL_ASSISTANT)"
     )
     # RAG configuration
     parser.add_argument(
+        "--rag-model", default="command-a-03-2025", help="Chat model for RAG responses (default: command-a-03-2025)"
     )
     parser.add_argument(
+        "--rag-embedding-model", default="embed-v4.0", help="Embedding model for RAG system (default: embed-v4.0)"
     )
     parser.add_argument(
+        "--rag-rerank-model", default="rerank-v3.5", help="Reranking model for RAG system (default: rerank-v3.5)"
     )
+    parser.add_argument("--rag-temperature", type=float, default=0.3, help="Temperature for RAG model (default: 0.3)")
+    parser.add_argument("--pinecone-index", default="medrax2", help="Pinecone index name (default: medrax2)")
+    parser.add_argument("--chunk-size", type=int, default=1500, help="RAG chunk size (default: 1500)")
+    parser.add_argument("--chunk-overlap", type=int, default=300, help="RAG chunk overlap (default: 300)")
+    parser.add_argument("--retriever-k", type=int, default=3, help="Number of documents to retrieve (default: 3)")
+    parser.add_argument("--rag-docs-dir", default="rag_docs", help="Directory for RAG documents (default: rag_docs)")
     # Tools configuration
     parser.add_argument(
+        "--tools",
         nargs="*",
+        help="Specific tools to enable (if not provided, uses default set). Available tools: "
+        + "ImageVisualizerTool, DicomProcessorTool, MedSAM2Tool, ChestXRaySegmentationTool, "
+        + "ChestXRayGeneratorTool, TorchXRayVisionClassifierTool, ArcPlusClassifierTool, "
+        + "ChestXRayReportGeneratorTool, XRayPhraseGroundingTool, MedGemmaVQATool, "
+        + "XRayVQATool, LlavaMedTool, MedicalRAGTool, WebBrowserTool, DuckDuckGoSearchTool, "
+        + "PythonSandboxTool",
     )
     return parser.parse_args()
             # Image Processing Tools
             "ImageVisualizerTool",  # For displaying images in the UI
             # "DicomProcessorTool",  # For processing DICOM medical image files
             # Segmentation Tools
             "MedSAM2Tool",  # For advanced medical image segmentation using MedSAM2
             "ChestXRaySegmentationTool",  # For segmenting anatomical regions in chest X-rays
             # Generation Tools
             # "ChestXRayGeneratorTool",  # For generating synthetic chest X-rays
             # Classification Tools
             "TorchXRayVisionClassifierTool",  # For classifying chest X-ray images using TorchXRayVision
             "ArcPlusClassifierTool",  # For advanced chest X-ray classification using ArcPlus
             # Report Generation Tools
             "ChestXRayReportGeneratorTool",  # For generating medical reports from X-rays
             # Grounding Tools
             "XRayPhraseGroundingTool",  # For locating described features in X-rays
             # VQA Tools
             # "MedGemmaVQATool",  # Google MedGemma VQA tool
             "XRayVQATool",  # For visual question answering on X-rays
             # "LlavaMedTool",  # For multimodal medical image understanding
             # RAG Tools
             "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
             # Search Tools
             # "WebBrowserTool",  # For web browsing and search capabilities
             "DuckDuckGoSearchTool",  # For privacy-focused web search using DuckDuckGo
             # Development Tools
             # "PythonSandboxTool",  # Add the Python sandbox tool
         ]
     elif args.mode == "both":
         # Run both services in separate threads
         api_thread = threading.Thread(
+            target=run_api_server, args=(agent, tools_dict, args.api_host, args.api_port, args.public)
         )
         api_thread.daemon = True
         api_thread.start()
         # Run Gradio in main thread
         run_gradio_interface(agent, tools_dict, args.gradio_host, args.gradio_port)