Spaces:

samwell
/

medrax2

Sleeping

App Files Files Community

VictorLJZ commited on Aug 4

Commit

4c987e5

2 Parent(s): c94d452 2639ef4

Merge branch 'main' into emily/medgemma

Browse files

Files changed (32) hide show

.gitignore +2 -0
README.md +25 -8
benchmarking/benchmarks/base.py +15 -0
benchmarking/benchmarks/rexvqa_benchmark.py +121 -2
benchmarking/cli.py +51 -27
benchmarking/data/rexvqa/download_rexgradient_images.py +0 -172
benchmarking/llm_providers/base.py +13 -7
benchmarking/llm_providers/google_provider.py +2 -4
benchmarking/llm_providers/medrax_provider.py +102 -67
benchmarking/llm_providers/openai_provider.py +2 -4
benchmarking/llm_providers/openrouter_provider.py +2 -4
benchmarking/runner.py +60 -21
interface.py +10 -5
main.py +14 -13
medrax/agent/__init__.py +1 -1
medrax/agent/agent.py +111 -21
medrax/docs/system_prompts.txt +1 -1
medrax/tools/__init__.py +2 -2
medrax/tools/browsing/__init__.py +13 -0
medrax/tools/browsing/duckduckgo.py +403 -0
medrax/tools/{web_browser.py → browsing/web_browser.py} +0 -0
medrax/tools/classification/arcplus.py +2 -1
medrax/tools/llava_med.py +0 -193
medrax/tools/rag.py +4 -4
medrax/tools/segmentation/__init__.py +12 -0
medrax/tools/{medsam2.py → segmentation/medsam2.py} +2 -2
medrax/tools/{segmentation.py → segmentation/segmentation.py} +0 -0
medrax/tools/vqa/llava_med.py +14 -7
medrax/tools/vqa/xray_vqa.py +1 -1
medrax/tools/{generation.py → xray_generation.py} +0 -0
medrax/tools/xray_vqa.py +0 -186
pyproject.toml +2 -1

.gitignore CHANGED Viewed

@@ -178,3 +178,5 @@ medrax-pdfs/
 model-weights/
 .DS_Store

 model-weights/
 .DS_Store
+benchmarking/data/

README.md CHANGED Viewed

@@ -29,6 +29,7 @@ MedRAX is built on a robust technical foundation:
 - **Disease Classification**: Leverages DenseNet-121 from TorchXRayVision for detecting 18 pathology classes
 - **X-ray Generation**: Utilizes RoentGen for synthetic CXR generation
 - **Web Browser**: Provides web search capabilities and URL content retrieval using Google Custom Search API
 - **Python Sandbox**: Executes Python code in a secure, stateful sandbox environment using `langchain-sandbox` and Pyodide. Supports custom data analysis, calculations, and dynamic package installations. Pre-configured with medical analysis packages including pandas, numpy, pydicom, SimpleITK, scikit-image, Pillow, scikit-learn, matplotlib, seaborn, and openpyxl. **Requires Deno runtime.**
 - **Utilities**: Includes DICOM processing, visualization tools, and custom plotting capabilities
 <br><br>
@@ -164,6 +165,7 @@ selected_tools = [
     "ChestXRaySegmentationTool",
     "PythonSandboxTool",              # Python code execution
     "WebBrowserTool",                 # Web search and URL access
     # Add or remove tools as needed
 ]
@@ -179,17 +181,10 @@ agent, tools_dict = initialize_agent(
 The following tools will automatically download their model weights when initialized:
-### Classification Tools
 ```python
 # TorchXRayVision-based classifier (original)
 TorchXRayVisionClassifierTool(device=device)
-# ArcPlus SwinTransformer-based classifier (new)
-ArcPlusClassifierTool(
-    model_path="/path/to/Ark6_swinLarge768_ep50.pth.tar",  # Optional
-    num_classes=18,  # Default
-    device=device
-)
 ```
 ### Segmentation Tool
@@ -283,6 +278,7 @@ No additional model weights required:
 ImageVisualizerTool()
 DicomProcessorTool(temp_dir=temp_dir)
 WebBrowserTool()  # Requires Google Search API credentials
 ```
 <br>
@@ -301,6 +297,25 @@ ChestXRayGeneratorTool(
   2. Place weights in `{model_dir}/roentgen`
   3. Optional tool, can be excluded if not needed
 ### Knowledge Base Setup (MedicalRAGTool)
 The `MedicalRAGTool` uses a Pinecone vector database to store and retrieve medical knowledge. To use this tool, you need to set up a Pinecone account and a Cohere account.
@@ -403,6 +418,8 @@ If you are running a local LLM using frameworks like [Ollama](https://ollama.com
 **WebBrowserTool**: Requires Google Custom Search API credentials, which can be set in the `.env` file.
 **PythonSandboxTool**: Requires Deno runtime installation:
 ```bash
 # Verify Deno is installed

 - **Disease Classification**: Leverages DenseNet-121 from TorchXRayVision for detecting 18 pathology classes
 - **X-ray Generation**: Utilizes RoentGen for synthetic CXR generation
 - **Web Browser**: Provides web search capabilities and URL content retrieval using Google Custom Search API
+- **DuckDuckGo Search**: Offers privacy-focused web search capabilities using DuckDuckGo search engine for medical research, fact-checking, and accessing current medical information without API keys
 - **Python Sandbox**: Executes Python code in a secure, stateful sandbox environment using `langchain-sandbox` and Pyodide. Supports custom data analysis, calculations, and dynamic package installations. Pre-configured with medical analysis packages including pandas, numpy, pydicom, SimpleITK, scikit-image, Pillow, scikit-learn, matplotlib, seaborn, and openpyxl. **Requires Deno runtime.**
 - **Utilities**: Includes DICOM processing, visualization tools, and custom plotting capabilities
 <br><br>
     "ChestXRaySegmentationTool",
     "PythonSandboxTool",              # Python code execution
     "WebBrowserTool",                 # Web search and URL access
+    "DuckDuckGoSearchTool",           # Privacy-focused web search
     # Add or remove tools as needed
 ]
 The following tools will automatically download their model weights when initialized:
+### Classification Tool
 ```python
 # TorchXRayVision-based classifier (original)
 TorchXRayVisionClassifierTool(device=device)
 ```
 ### Segmentation Tool
 ImageVisualizerTool()
 DicomProcessorTool(temp_dir=temp_dir)
 WebBrowserTool()  # Requires Google Search API credentials
+DuckDuckGoSearchTool()  # No API key required, privacy-focused search
 ```
 <br>
   2. Place weights in `{model_dir}/roentgen`
   3. Optional tool, can be excluded if not needed
+### ArcPlus SwinTransformer-based Classifier
+```python
+ArcPlusClassifierTool(
+    model_path="/path/to/Ark6_swinLarge768_ep50.pth.tar",  # Optional
+    num_classes=18,  # Default
+    device=device
+)
+```
+The ArcPlus classifier requires manual setup as the pre-trained model is not publicly available for automatic download:
+1. **Request Access**: Visit [https://github.com/jlianglab/Ark](https://github.com/jlianglab/Ark) and request the pretrained model through their Google Forms
+2. **Download Model**: Once approved, download the `Ark6_swinLarge768_ep50.pth.tar` file
+3. **Place in Directory**: Drag the downloaded file into your `model-weights` directory
+4. **Initialize Tool**: The tool will automatically look for the model file in the specified `cache_dir`
+The ArcPlus model provides advanced chest X-ray classification across 6 medical datasets (MIMIC, CheXpert, NIH, RSNA, VinDr, Shenzhen) with 52+ pathology categories.
+```
 ### Knowledge Base Setup (MedicalRAGTool)
 The `MedicalRAGTool` uses a Pinecone vector database to store and retrieve medical knowledge. To use this tool, you need to set up a Pinecone account and a Cohere account.
 **WebBrowserTool**: Requires Google Custom Search API credentials, which can be set in the `.env` file.
+**DuckDuckGoSearchTool**: No API key required. Uses DuckDuckGo's privacy-focused search engine for medical research and fact-checking.
 **PythonSandboxTool**: Requires Deno runtime installation:
 ```bash
 # Verify Deno is installed

benchmarking/benchmarks/base.py CHANGED Viewed

@@ -4,6 +4,7 @@ from abc import ABC, abstractmethod
 from typing import Dict, List, Optional, Any, Iterator, Tuple
 from dataclasses import dataclass
 from pathlib import Path
 @dataclass
@@ -31,17 +32,31 @@ class Benchmark(ABC):
         Args:
             data_dir (str): Directory containing benchmark data
             **kwargs: Additional configuration parameters
         """
         self.data_dir = Path(data_dir)
         self.config = kwargs
         self.data_points = []
         self._load_data()
     @abstractmethod
     def _load_data(self) -> None:
         """Load benchmark data from the data directory."""
         pass
     def get_data_point(self, index: int) -> BenchmarkDataPoint:
         """Get a specific data point by index.

 from typing import Dict, List, Optional, Any, Iterator, Tuple
 from dataclasses import dataclass
 from pathlib import Path
+import random
 @dataclass
         Args:
             data_dir (str): Directory containing benchmark data
             **kwargs: Additional configuration parameters
+                random_seed (int): Random seed for shuffling data (default: None, no shuffling)
         """
         self.data_dir = Path(data_dir)
         self.config = kwargs
         self.data_points = []
         self._load_data()
+        self._shuffle_data()
     @abstractmethod
     def _load_data(self) -> None:
         """Load benchmark data from the data directory."""
         pass
+    def _shuffle_data(self) -> None:
+        """Shuffle the data points if a random seed is provided.
+        This method is called automatically after data loading to ensure
+        reproducible benchmark runs when a random seed is specified.
+        """
+        random_seed = self.config.get("random_seed", None)
+        if random_seed is not None:
+            random.seed(random_seed)
+            random.shuffle(self.data_points)
+            print(f"Shuffled {len(self.data_points)} data points with seed {random_seed}")
     def get_data_point(self, index: int) -> BenchmarkDataPoint:
         """Get a specific data point by index.

benchmarking/benchmarks/rexvqa_benchmark.py CHANGED Viewed

@@ -2,10 +2,12 @@
 import json
 import os
-from typing import Dict, List, Optional, Any
 from datasets import load_dataset
 from .base import Benchmark, BenchmarkDataPoint
 from pathlib import Path
 class ReXVQABenchmark(Benchmark):
@@ -47,11 +49,128 @@ class ReXVQABenchmark(Benchmark):
         super().__init__(data_dir, **kwargs)
     def _load_data(self) -> None:
         """Load ReXVQA data from local JSON file."""
         try:
             # Construct path to the JSON file
-            json_file_path = os.path.join("benchmarking", "data", "rexvqa", "test_vqa_data.json")
             # Check if file exists
             if not os.path.exists(json_file_path):

 import json
 import os
+from typing import Dict, Optional, Any
 from datasets import load_dataset
 from .base import Benchmark, BenchmarkDataPoint
 from pathlib import Path
+import subprocess
+from huggingface_hub import hf_hub_download, list_repo_files
 class ReXVQABenchmark(Benchmark):
         super().__init__(data_dir, **kwargs)
+    @staticmethod
+    def download_rexgradient_images(output_dir: str = "benchmarking/data/rexvqa", repo_id: str = "rajpurkarlab/ReXGradient-160K"):
+        """Download and extract ReXGradient-160K images if not already present."""
+        output_dir = Path(output_dir)
+        tar_path = output_dir / "deid_png.tar"
+        images_dir = output_dir / "images"
+        # Check if images already exist
+        if images_dir.exists() and any(images_dir.rglob("*.png")):
+            print(f"Images already exist in {images_dir}, skipping download.")
+            return
+        output_dir.mkdir(parents=True, exist_ok=True)
+        print(f"Output directory: {output_dir}")
+        try:
+            print("Listing files in repository...")
+            files = list_repo_files(repo_id, repo_type='dataset')
+            part_files = [f for f in files if f.startswith("deid_png.part")]
+            if not part_files:
+                print("No part files found. The images might be in a different format.")
+                return
+            print(f"Found {len(part_files)} part files.")
+            # Download part files
+            for part_file in part_files:
+                output_path = output_dir / part_file
+                if output_path.exists():
+                    print(f"Skipping {part_file} (already exists)")
+                    continue
+                print(f"Downloading {part_file}...")
+                hf_hub_download(
+                    repo_id=repo_id,
+                    filename=part_file,
+                    local_dir=output_dir,
+                    local_dir_use_symlinks=False,
+                    repo_type='dataset'
+                )
+            # Concatenate part files
+            if not tar_path.exists():
+                print("\nConcatenating part files...")
+                with open(tar_path, 'wb') as tar_file:
+                    for part_file in sorted(part_files):
+                        part_path = output_dir / part_file
+                        if part_path.exists():
+                            print(f"Adding {part_file}...")
+                            with open(part_path, 'rb') as f:
+                                tar_file.write(f.read())
+                        else:
+                            print(f"Warning: {part_file} not found, skipping...")
+            else:
+                print(f"Tar file already exists: {tar_path}")
+            # Extract tar file
+            if tar_path.exists():
+                print("\nExtracting images...")
+                images_dir.mkdir(exist_ok=True)
+                if any(images_dir.rglob("*.png")):
+                    print("Images already extracted.")
+                else:
+                    try:
+                        subprocess.run([
+                            "tar", "-xf", str(tar_path),
+                            "-C", str(images_dir)
+                        ], check=True)
+                        print("Extraction completed!")
+                    except subprocess.CalledProcessError as e:
+                        print(f"Error extracting tar file: {e}")
+                        return
+                    except FileNotFoundError:
+                        print("Error: 'tar' command not found. Please install tar or extract manually.")
+                        return
+                png_files = list(images_dir.rglob("*.png"))
+                print(f"Extracted {len(png_files)} PNG images to {images_dir}")
+                # Clean up part and tar files after successful extraction
+                print("Cleaning up part and tar files...")
+                # Remove deid_png.part* files
+                for part_file in output_dir.glob("deid_png.part*"):
+                    try:
+                        part_file.unlink()
+                        print(f"Deleted {part_file}")
+                    except Exception as e:
+                        print(f"Could not delete {part_file}: {e}")
+                # Remove deid_png.tar
+                if tar_path.exists():
+                    try:
+                        tar_path.unlink()
+                        print(f"Deleted {tar_path}")
+                    except Exception as e:
+                        print(f"Could not delete {tar_path}: {e}")
+        except Exception as e:
+            print(f"Error: {e}")
+    @staticmethod
+    def download_test_vqa_data_json(output_dir: str = "benchmarking/data/rexvqa", repo_id: str = "rajpurkarlab/ReXVQA"):
+        """Download test_vqa_data.json from the ReXVQA HuggingFace repo if not already present."""
+        output_dir = Path(output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        json_path = output_dir / "metadata" / "test_vqa_data.json"
+        if json_path.exists():
+            print(f"test_vqa_data.json already exists at {json_path}, skipping download.")
+            return
+        print(f"Downloading test_vqa_data.json to {json_path}...")
+        try:
+            hf_hub_download(
+                repo_id=repo_id,
+                filename="metadata/test_vqa_data.json",
+                local_dir=output_dir,
+                local_dir_use_symlinks=False,
+                repo_type='dataset'
+            )
+            print("Download complete.")
+        except Exception as e:
+            print(f"Error downloading test_vqa_data.json: {e}")
+            print("You may need to accept the license agreement on HuggingFace.")
     def _load_data(self) -> None:
         """Load ReXVQA data from local JSON file."""
         try:
+            # Check for images and test_vqa_data.json, download if missing
+            self.download_test_vqa_data_json()
+            self.download_rexgradient_images()
             # Construct path to the JSON file
+            json_file_path = os.path.join("benchmarking", "data", "rexvqa", "metadata", "test_vqa_data.json")
             # Check if file exists
             if not os.path.exists(json_file_path):

benchmarking/cli.py CHANGED Viewed

@@ -3,34 +3,40 @@
 import argparse
 import sys
-from .llm_providers import *
 from .benchmarks import *
 from .runner import BenchmarkRunner, BenchmarkRunConfig
-def create_llm_provider(model_name: str, provider_type: str, **kwargs) -> LLMProvider:
     """Create an LLM provider based on the model name and type.
     Args:
         model_name (str): Name of the model
-        provider_type (str): Type of provider (openai, google, openrouter, anthropic, medrax)
         **kwargs: Additional configuration parameters
     Returns:
         LLMProvider: The configured LLM provider
     """
-    provider_map = {
-        "openai": OpenAIProvider,
-        "google": GoogleProvider,
-        "openrouter": OpenRouterProvider,
-        "medrax": MedRAXProvider,
-    }
-    if provider_type not in provider_map:
-        raise ValueError(f"Unknown provider type: {provider_type}. Available: {list(provider_map.keys())}")
-    provider_class = provider_map[provider_type]
-    return provider_class(model_name, **kwargs)
 def create_benchmark(benchmark_name: str, data_dir: str, **kwargs) -> Benchmark:
@@ -63,12 +69,14 @@ def run_benchmark_command(args) -> None:
     # Create LLM provider
     provider_kwargs = {}
-    llm_provider = create_llm_provider(args.model, args.provider, **provider_kwargs)
     # Create benchmark
     benchmark_kwargs = {}
-    benchmark = create_benchmark(args.benchmark, args.data_dir, **benchmark_kwargs)
     # Create runner config
     config = BenchmarkRunConfig(
@@ -111,16 +119,32 @@ def main():
     subparsers = parser.add_subparsers(dest="command", help="Available commands")
     # Run benchmark command
-    run_parser = subparsers.add_parser("run", help="Run a benchmark")
-    run_parser.add_argument("--model", required=True, help="Model name (e.g., gpt-4o, gemini-2.5-pro)")
-    run_parser.add_argument("--provider", required=True, choices=["openai", "google", "openrouter", "medrax"], help="LLM provider")
-    run_parser.add_argument("--benchmark", required=True, choices=["rexvqa", "chestagentbench"], help="Benchmark to run")
-    run_parser.add_argument("--data-dir", required=True, help="Directory containing benchmark data")
-    run_parser.add_argument("--output-dir", default="benchmark_results", help="Output directory for results")
-    run_parser.add_argument("--max-questions", type=int, help="Maximum number of questions to process")
-    run_parser.add_argument("--temperature", type=float, default=0.7, help="Model temperature")
-    run_parser.add_argument("--top-p", type=float, default=0.95, help="Top-p value")
-    run_parser.add_argument("--max-tokens", type=int, default=1000, help="Maximum tokens per response")
     run_parser.set_defaults(func=run_benchmark_command)

 import argparse
 import sys
+from .llm_providers.base import LLMProvider
 from .benchmarks import *
 from .runner import BenchmarkRunner, BenchmarkRunConfig
+def create_llm_provider(model_name: str, provider_type: str, system_prompt: str, **kwargs) -> LLMProvider:
     """Create an LLM provider based on the model name and type.
     Args:
         model_name (str): Name of the model
+        provider_type (str): Type of provider (openai, google, openrouter, medrax)
+        system_prompt (str): System prompt identifier to load from file
         **kwargs: Additional configuration parameters
     Returns:
         LLMProvider: The configured LLM provider
     """
+    # Lazy imports to avoid slow startup
+    if provider_type == "openai":
+        from .llm_providers.openai_provider import OpenAIProvider
+        provider_class = OpenAIProvider
+    elif provider_type == "google":
+        from .llm_providers.google_provider import GoogleProvider
+        provider_class = GoogleProvider
+    elif provider_type == "openrouter":
+        from .llm_providers.openrouter_provider import OpenRouterProvider
+        provider_class = OpenRouterProvider
+    elif provider_type == "medrax":
+        from .llm_providers.medrax_provider import MedRAXProvider
+        provider_class = MedRAXProvider
+    else:
+        raise ValueError(f"Unknown provider type: {provider_type}. Available: openai, google, openrouter, medrax")
+    return provider_class(model_name, system_prompt, **kwargs)
 def create_benchmark(benchmark_name: str, data_dir: str, **kwargs) -> Benchmark:
     # Create LLM provider
     provider_kwargs = {}
+    llm_provider = create_llm_provider(model_name=args.model, provider_type=args.provider, system_prompt=args.system_prompt, **provider_kwargs)
     # Create benchmark
     benchmark_kwargs = {}
+    if args.random_seed is not None:
+        benchmark_kwargs["random_seed"] = args.random_seed
+    benchmark = create_benchmark(benchmark_name=args.benchmark, data_dir=args.data_dir, **benchmark_kwargs)
     # Create runner config
     config = BenchmarkRunConfig(
     subparsers = parser.add_subparsers(dest="command", help="Available commands")
     # Run benchmark command
+    run_parser = subparsers.add_parser("run", help="Run a benchmark evaluation")
+    run_parser.add_argument("--model", required=True,
+                           help="Model name (e.g., gpt-4o, gpt-4.1-2025-04-14, gemini-2.5-pro)")
+    run_parser.add_argument("--provider", required=True,
+                           choices=["openai", "google", "openrouter", "medrax"],
+                           help="LLM provider to use")
+    run_parser.add_argument("--system-prompt", required=True,
+                           choices=["MEDICAL_ASSISTANT", "CHESTAGENTBENCH_PROMPT"],
+                           help="System prompt: MEDICAL_ASSISTANT (general) or CHESTAGENTBENCH_PROMPT (benchmarks)")
+    run_parser.add_argument("--benchmark", required=True,
+                           choices=["rexvqa", "chestagentbench"],
+                           help="Benchmark dataset: rexvqa (radiology VQA) or chestagentbench (chest X-ray reasoning)")
+    run_parser.add_argument("--data-dir", required=True,
+                           help="Directory containing benchmark data files")
+    run_parser.add_argument("--output-dir", default="benchmark_results",
+                           help="Output directory for results (default: benchmark_results)")
+    run_parser.add_argument("--max-questions", type=int,
+                           help="Maximum number of questions to process (default: all)")
+    run_parser.add_argument("--temperature", type=float, default=1,
+                           help="Model temperature for response generation (default: 0.7)")
+    run_parser.add_argument("--top-p", type=float, default=0.95,
+                           help="Top-p nucleus sampling parameter (default: 0.95)")
+    run_parser.add_argument("--max-tokens", type=int, default=5000,
+                           help="Maximum tokens per model response (default: 5000)")
+    run_parser.add_argument("--random-seed", type=int, default=42,
+                           help="Random seed for shuffling benchmark data (enables reproducible runs, default: None)")
     run_parser.set_defaults(func=run_benchmark_command)

benchmarking/data/rexvqa/download_rexgradient_images.py DELETED Viewed

@@ -1,172 +0,0 @@
-#!/usr/bin/env python3
-"""
-Utility script to download and extract ReXGradient-160K images.
-This script helps users download the actual PNG images from the ReXGradient-160K dataset,
-which are stored as part files on HuggingFace and need to be concatenated and extracted.
-Usage:
-    python download_rexgradient_images.py --output_dir /path/to/images
-"""
-import argparse
-import subprocess
-from pathlib import Path
-from huggingface_hub import hf_hub_download, list_repo_files
-import requests
-from tqdm import tqdm
-def download_file(url, output_path, chunk_size=8192):
-    """Download a file with progress bar."""
-    response = requests.get(url, stream=True)
-    total_size = int(response.headers.get('content-length', 0))
-    with open(output_path, 'wb') as f:
-        with tqdm(total=total_size, unit='B', unit_scale=True, desc=output_path.name) as pbar:
-            for chunk in response.iter_content(chunk_size=chunk_size):
-                if chunk:
-                    f.write(chunk)
-                    pbar.update(len(chunk))
-def main():
-    parser = argparse.ArgumentParser(description="Download ReXGradient-160K images")
-    parser.add_argument(
-        "--output_dir",
-        type=str,
-        required=True,
-        help="Directory to save extracted images"
-    )
-    parser.add_argument(
-        "--repo_id",
-        type=str,
-        default="rajpurkarlab/ReXGradient-160K",
-        help="HuggingFace repository ID"
-    )
-    parser.add_argument(
-        "--skip_download",
-        action="store_true",
-        help="Skip downloading and only extract if files exist"
-    )
-    args = parser.parse_args()
-    output_dir = Path(args.output_dir)
-    output_dir.mkdir(parents=True, exist_ok=True)
-    print(f"Output directory: {output_dir}")
-    # Check if we need to accept the license first
-    print("Note: You may need to accept the dataset license on HuggingFace first:")
-    print(f"Visit: https://huggingface.co/datasets/{args.repo_id}")
-    print("Click 'Access repository' and accept the license agreement.")
-    print()
-    try:
-        # List files in the repository
-        print("Listing files in repository...")
-        files = list_repo_files(args.repo_id, repo_type='dataset')
-        part_files = [f for f in files if f.startswith("deid_png.part")]
-        if not part_files:
-            print("No part files found. The images might be in a different format.")
-            print("Available files:")
-            for f in files:
-                print(f"  - {f}")
-            return
-        print(f"Found {len(part_files)} part files:")
-        for f in part_files:
-            print(f"  - {f}")
-        # Download part files
-        if not args.skip_download:
-            print("\nDownloading part files...")
-            for part_file in part_files:
-                output_path = output_dir / part_file
-                if output_path.exists():
-                    print(f"Skipping {part_file} (already exists)")
-                    continue
-                print(f"Downloading {part_file}...")
-                try:
-                    hf_hub_download(
-                        repo_id=args.repo_id,
-                        filename=part_file,
-                        local_dir=output_dir,
-                        local_dir_use_symlinks=False,
-                        repo_type='dataset'
-                    )
-                except Exception as e:
-                    print(f"Error downloading {part_file}: {e}")
-                    print("You may need to accept the license agreement on HuggingFace.")
-                    return
-        # Concatenate part files
-        tar_path = output_dir / "deid_png.tar"
-        if not tar_path.exists():
-            print("\nConcatenating part files...")
-            with open(tar_path, 'wb') as tar_file:
-                for part_file in sorted(part_files):
-                    part_path = output_dir / part_file
-                    if part_path.exists():
-                        print(f"Adding {part_file}...")
-                        with open(part_path, 'rb') as f:
-                            tar_file.write(f.read())
-                    else:
-                        print(f"Warning: {part_file} not found, skipping...")
-        else:
-            print(f"Tar file already exists: {tar_path}")
-        # Extract tar file
-        if tar_path.exists():
-            print("\nExtracting images...")
-            images_dir = output_dir / "images"
-            images_dir.mkdir(exist_ok=True)
-            # Check if already extracted
-            if any(images_dir.glob("*.png")):
-                print("Images already extracted.")
-            else:
-                try:
-                    subprocess.run([
-                        "tar", "-xf", str(tar_path),
-                        "-C", str(images_dir)
-                    ], check=True)
-                    print("Extraction completed!")
-                except subprocess.CalledProcessError as e:
-                    print(f"Error extracting tar file: {e}")
-                    return
-                except FileNotFoundError:
-                    print("Error: 'tar' command not found. Please install tar or extract manually.")
-                    return
-            # Count extracted images
-            png_files = list(images_dir.glob("*.png"))
-            print(f"Extracted {len(png_files)} PNG images to {images_dir}")
-            # Show some example filenames
-            if png_files:
-                print("\nExample image filenames:")
-                for f in png_files[:5]:
-                    print(f"  - {f.name}")
-                if len(png_files) > 5:
-                    print(f"  ... and {len(png_files) - 5} more")
-        print(f"\nSetup complete! Use this directory as images_dir in ReXVQABenchmark:")
-        print(f"images_dir='{images_dir}'")
-    except Exception as e:
-        print(f"Error: {e}")
-        print("\nManual setup instructions:")
-        print("1. Visit https://huggingface.co/datasets/rajpurkarlab/ReXGradient-160K")
-        print("2. Accept the license agreement")
-        print("3. Download the deid_png.part* files")
-        print("4. Concatenate: cat deid_png.part* > deid_png.tar")
-        print("5. Extract: tar -xf deid_png.tar")
-        print("6. Use the extracted directory as images_dir")
-if __name__ == "__main__":
-    main()

benchmarking/llm_providers/base.py CHANGED Viewed

@@ -25,7 +25,7 @@ class LLMResponse:
     content: str
     usage: Optional[Dict[str, Any]] = None
     duration: Optional[float] = None
-    raw_response: Optional[Any] = None
 class LLMProvider(ABC):
@@ -35,22 +35,24 @@ class LLMProvider(ABC):
     text + image input -> text output across different models and APIs.
     """
-    def __init__(self, model_name: str, **kwargs):
         """Initialize the LLM provider.
         Args:
             model_name (str): Name of the model to use
             **kwargs: Additional configuration parameters
         """
         self.model_name = model_name
         self.config = kwargs
-        # Always load system prompt from file
         try:
             prompts = load_prompts_from_file("medrax/docs/system_prompts.txt")
-            self.system_prompt = prompts.get("CHESTAGENTBENCH_PROMPT", None)
             if self.system_prompt is None:
-                print(f"Warning: System prompt not found in medrax/docs/system_prompts.txt.")
         except Exception as e:
             print(f"Error loading system prompt: {e}")
             self.system_prompt = None
@@ -102,8 +104,12 @@ class LLMProvider(ABC):
         Returns:
             str: Base64 encoded image string
         """
-        with open(image_path, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode('utf-8')
     def _validate_image_paths(self, image_paths: List[str]) -> List[str]:
         """Validate that image paths exist and are readable.

     content: str
     usage: Optional[Dict[str, Any]] = None
     duration: Optional[float] = None
+    chunk_history: Optional[Any] = None
 class LLMProvider(ABC):
     text + image input -> text output across different models and APIs.
     """
+    def __init__(self, model_name: str, system_prompt: str, **kwargs):
         """Initialize the LLM provider.
         Args:
             model_name (str): Name of the model to use
+            system_prompt (str): System prompt identifier to load from file
             **kwargs: Additional configuration parameters
         """
         self.model_name = model_name
         self.config = kwargs
+        self.prompt_name = system_prompt  # Store the original prompt identifier
+        # Load system prompt content from file
         try:
             prompts = load_prompts_from_file("medrax/docs/system_prompts.txt")
+            self.system_prompt = prompts.get(system_prompt, None)
             if self.system_prompt is None:
+                print(f"Warning: System prompt '{system_prompt}' not found in medrax/docs/system_prompts.txt.")
         except Exception as e:
             print(f"Error loading system prompt: {e}")
             self.system_prompt = None
         Returns:
             str: Base64 encoded image string
         """
+        try:
+            with open(image_path, "rb") as image_file:
+                return base64.b64encode(image_file.read()).decode('utf-8')
+        except Exception as e:
+            print(f"ERROR: _encode_image failed for {image_path} (type: {type(image_path)}): {e}")
+            raise
     def _validate_image_paths(self, image_paths: List[str]) -> List[str]:
         """Validate that image paths exist and are readable.

benchmarking/llm_providers/google_provider.py CHANGED Viewed

@@ -92,13 +92,11 @@ class GoogleProvider(LLMProvider):
             return LLMResponse(
                 content=content,
                 usage=usage,
-                duration=duration,
-                raw_response=response
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
-                duration=time.time() - start_time,
-                raw_response=None
             )

             return LLMResponse(
                 content=content,
                 usage=usage,
+                duration=duration
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
+                duration=time.time() - start_time
             )

benchmarking/llm_providers/medrax_provider.py CHANGED Viewed

@@ -1,10 +1,10 @@
 """MedRAX LLM provider implementation."""
 import time
-import shutil
-from pathlib import Path
 from .base import LLMProvider, LLMRequest, LLMResponse
 from medrax.rag.rag import RAGConfig
 from main import initialize_agent
@@ -13,18 +13,19 @@ from main import initialize_agent
 class MedRAXProvider(LLMProvider):
     """MedRAX LLM provider that uses the full MedRAX agent system."""
-    def __init__(self, model_name: str, **kwargs):
         """Initialize MedRAX provider.
         Args:
             model_name (str): Base LLM model name (e.g., "gpt-4.1-2025-04-14")
             **kwargs: Additional configuration parameters
         """
         self.model_name = model_name
         self.agent = None
         self.tools_dict = None
-        super().__init__(model_name, **kwargs)
     def _setup(self) -> None:
         """Set up MedRAX agent system."""
@@ -32,19 +33,14 @@ class MedRAXProvider(LLMProvider):
             print("Starting server...")
             selected_tools = [
-                # "ImageVisualizerTool",  # For displaying images in the UI
-                # "DicomProcessorTool",  # For processing DICOM medical image files
-                # "TorchXRayVisionClassifierTool",  # For classifying chest X-ray images using TorchXRayVision
-                # "ArcPlusClassifierTool",  # For advanced chest X-ray classification using ArcPlus
-                # "ChestXRaySegmentationTool",  # For segmenting anatomical regions in chest X-rays
-                # "ChestXRayReportGeneratorTool",  # For generating medical reports from X-rays
-                # "XRayVQATool",  # For visual question answering on X-rays
-                # "LlavaMedTool",  # For multimodal medical image understanding
-                # "XRayPhraseGroundingTool",  # For locating described features in X-rays
-                # "ChestXRayGeneratorTool",  # For generating synthetic chest X-rays
-                "WebBrowserTool",  # For web browsing and search capabilities
                 "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
-                # "PythonSandboxTool",  # Add the Python sandbox tool
             ]
             rag_config = RAGConfig(
@@ -55,7 +51,7 @@ class MedRAXProvider(LLMProvider):
                 pinecone_index_name="medrax2",  # Name for the Pinecone index
                 chunk_size=1500,
                 chunk_overlap=300,
-                retriever_k=7,
                 local_docs_dir="rag_docs",  # Change this to the path of the documents for RAG
                 huggingface_datasets=["VictorLJZ/medrax2"],  # List of HuggingFace datasets to load
                 dataset_split="train",  # Which split of the datasets to use
@@ -69,13 +65,13 @@ class MedRAXProvider(LLMProvider):
                 tools_to_use=selected_tools,
                 model_dir="/model-weights",
                 temp_dir="temp",  # Change this to the path of the temporary directory
-                device="cpu",
                 model=self.model_name,  # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
-                temperature=0.7,
                 top_p=0.95,
                 model_kwargs=model_kwargs,
                 rag_config=rag_config,
-                debug=True,
             )
             self.agent = agent
@@ -101,8 +97,7 @@ class MedRAXProvider(LLMProvider):
         if self.agent is None:
             return LLMResponse(
                 content="Error: MedRAX agent not initialized",
-                duration=time.time() - start_time,
-                raw_response=None
             )
         try:
@@ -110,78 +105,118 @@ class MedRAXProvider(LLMProvider):
             messages = []
             thread_id = str(int(time.time() * 1000))  # Unique thread ID
-            # Copy images to session temp directory and provide paths
-            image_paths = []
             if request.images:
                 valid_images = self._validate_image_paths(request.images)
                 print(f"Processing {len(valid_images)} images")
                 for i, image_path in enumerate(valid_images):
-                    print(f"Original image path: {image_path}")
-                    # Copy image to session temp directory
-                    dest_path = Path("temp") / f"image_{i}_{Path(image_path).name}"
-                    print(f"Destination path: {dest_path}")
-                    shutil.copy2(image_path, dest_path)
-                    image_paths.append(str(dest_path))
-                    # Verify file exists after copy
-                    if not dest_path.exists():
-                        print(f"ERROR: File not found after copy: {dest_path}")
-                    else:
-                        print(f"File successfully copied: {dest_path}")
                     # Add image path message for tools
-                    messages.append({
-                        "role": "user",
-                        "content": f"image_path: {dest_path}"
-                    })
                     # Add image content for multimodal LLM
-                    with open(image_path, "rb") as img_file:
-                        img_base64 = self._encode_image(image_path)
-                    messages.append({
-                        "role": "user",
-                        "content": [{
                             "type": "image_url",
                             "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"}
-                        }]
-                    })
             # Add text message
-            messages.append({
-                "role": "user",
-                "content": [{
                     "type": "text",
                     "text": request.text
-                }]
-            })
-            # Run the agent
-            response_content = ""
             for chunk in self.agent.workflow.stream(
                 {"messages": messages},
                 {"configurable": {"thread_id": thread_id}},
                 stream_mode="updates"
             ):
-                if isinstance(chunk, dict):
-                    for node_name, node_output in chunk.items():
-                        if "messages" in node_output:
-                            for msg in node_output["messages"]:
-                                if hasattr(msg, 'content') and msg.content:
-                                    response_content += str(msg.content)
             duration = time.time() - start_time
             return LLMResponse(
-                content=response_content.strip(),
                 usage={"agent_tools": list(self.tools_dict.keys())},
                 duration=duration,
-                raw_response={"thread_id": thread_id, "image_paths": image_paths}
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
-                duration=time.time() - start_time,
-                raw_response=None
             )

 """MedRAX LLM provider implementation."""
 import time
+import re
 from .base import LLMProvider, LLMRequest, LLMResponse
+from langchain_core.messages import AIMessage, HumanMessage
 from medrax.rag.rag import RAGConfig
 from main import initialize_agent
 class MedRAXProvider(LLMProvider):
     """MedRAX LLM provider that uses the full MedRAX agent system."""
+    def __init__(self, model_name: str, system_prompt: str, **kwargs):
         """Initialize MedRAX provider.
         Args:
             model_name (str): Base LLM model name (e.g., "gpt-4.1-2025-04-14")
+            system_prompt (str): System prompt to use
             **kwargs: Additional configuration parameters
         """
         self.model_name = model_name
         self.agent = None
         self.tools_dict = None
+        super().__init__(model_name, system_prompt, **kwargs)
     def _setup(self) -> None:
         """Set up MedRAX agent system."""
             print("Starting server...")
             selected_tools = [
+                "ChestXRayReportGeneratorTool",  # For generating medical reports from X-rays
                 "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
+                "WebBrowserTool",  # For web browsing and search capabilities
+                "TorchXRayVisionClassifierTool",  # For classifying chest X-ray images using TorchXRayVision
+                "ArcPlusClassifierTool",  # For advanced chest X-ray classification using ArcPlus
+                "DuckDuckGoSearchTool",  # For privacy-focused web search using DuckDuckGo
+                "XRayVQATool",  # For visual question answering on X-rays
+                "XRayPhraseGroundingTool",  # For locating described features in X-rays
             ]
             rag_config = RAGConfig(
                 pinecone_index_name="medrax2",  # Name for the Pinecone index
                 chunk_size=1500,
                 chunk_overlap=300,
+                retriever_k=3,
                 local_docs_dir="rag_docs",  # Change this to the path of the documents for RAG
                 huggingface_datasets=["VictorLJZ/medrax2"],  # List of HuggingFace datasets to load
                 dataset_split="train",  # Which split of the datasets to use
                 tools_to_use=selected_tools,
                 model_dir="/model-weights",
                 temp_dir="temp",  # Change this to the path of the temporary directory
+                device="cuda:0",
                 model=self.model_name,  # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
+                temperature=0.3,
                 top_p=0.95,
                 model_kwargs=model_kwargs,
                 rag_config=rag_config,
+                system_prompt=self.prompt_name,
             )
             self.agent = agent
         if self.agent is None:
             return LLMResponse(
                 content="Error: MedRAX agent not initialized",
+                duration=time.time() - start_time
             )
         try:
             messages = []
             thread_id = str(int(time.time() * 1000))  # Unique thread ID
             if request.images:
                 valid_images = self._validate_image_paths(request.images)
                 print(f"Processing {len(valid_images)} images")
                 for i, image_path in enumerate(valid_images):
                     # Add image path message for tools
+                    messages.append(HumanMessage(content=f"image_path: {image_path}"))
                     # Add image content for multimodal LLM
+                    try:
+                        with open(image_path, "rb") as img_file:
+                            img_base64 = self._encode_image(image_path)
+                        messages.append(HumanMessage(content=[{
                             "type": "image_url",
                             "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"}
+                        }]))
+                    except Exception as e:
+                        print(f"ERROR: Image encoding failed for {image_path}: {e}")
+                        raise
             # Add text message
+            if request.images:
+                # If there are images, add text as part of multimodal content
+                messages.append(HumanMessage(content=[{
                     "type": "text",
                     "text": request.text
+                }]))
+            else:
+                # If no images, add text as simple string
+                messages.append(HumanMessage(content=request.text))
+            # Run the agent with proper message type handling
+            final_response = ""
+            chunk_history = []
             for chunk in self.agent.workflow.stream(
                 {"messages": messages},
                 {"configurable": {"thread_id": thread_id}},
                 stream_mode="updates"
             ):
+                if not isinstance(chunk, dict):
+                    continue
+                for node_name, node_output in chunk.items():
+                    # Log chunk and get serializable version
+                    serializable_chunk = self._log_chunk(node_output, node_name)
+                    chunk_history.append(serializable_chunk)
+                    if "messages" not in node_output:
+                        continue
+                    for msg in node_output["messages"]:
+                        if isinstance(msg, AIMessage) and msg.content:
+                            # Handle case where content is a list
+                            content = msg.content
+                            if isinstance(content, list):
+                                content = " ".join(content)
+                            # Clean up the content (remove temp paths, etc.)
+                            final_response = re.sub(r"temp/[^\s]*", "", content).strip()
+            # Determine the final response
+            if final_response:
+                response_content = final_response
+            else:
+                # Fallback if no LLM response was received
+                response_content = "No response generated"
             duration = time.time() - start_time
             return LLMResponse(
+                content=response_content,
                 usage={"agent_tools": list(self.tools_dict.keys())},
                 duration=duration,
+                chunk_history=chunk_history
             )
         except Exception as e:
+            print(f"ERROR: MedRAX agent failed: {e}")
             return LLMResponse(
                 content=f"Error: {str(e)}",
+                duration=time.time() - start_time
             )
+    def _log_chunk(self, chunk: dict, node_name: str) -> dict:
+        """Log and process a chunk from the agent workflow.
+        Args:
+            chunk (dict): The chunk data from the agent workflow
+            node_name (str): Name of the node that produced the chunk
+        Returns:
+            dict: Serializable version of the chunk for debugging
+        """
+        # Log every chunk for debugging
+        print(f"Chunk from node '{node_name}': {type(chunk)}")
+        # Store serializable version of chunk for debugging
+        serializable_chunk = {
+            "node_name": node_name,
+            "node_type": type(chunk).__name__,
+        }
+        # Log messages in this chunk
+        if "messages" in chunk and isinstance(chunk, dict):
+            chunk_messages = []
+            for msg in chunk["messages"]:
+                msg_info = {
+                    "type": type(msg).__name__,
+                    "content": str(msg.content) if hasattr(msg, 'content') else str(msg)
+                }
+                chunk_messages.append(msg_info)
+                print(f"Message in chunk: {msg_info}")
+            serializable_chunk["messages"] = chunk_messages
+        return serializable_chunk

benchmarking/llm_providers/openai_provider.py CHANGED Viewed

@@ -101,13 +101,11 @@ class OpenAIProvider(LLMProvider):
             return LLMResponse(
                 content=content,
                 usage=usage,
-                duration=duration,
-                raw_response=response
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
-                duration=time.time() - start_time,
-                raw_response=None
             )

             return LLMResponse(
                 content=content,
                 usage=usage,
+                duration=duration
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
+                duration=time.time() - start_time
             )

benchmarking/llm_providers/openrouter_provider.py CHANGED Viewed

@@ -78,12 +78,10 @@ class OpenRouterProvider(LLMProvider):
             return LLMResponse(
                 content=content,
                 usage=usage,
-                duration=duration,
-                raw_response=response
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
-                duration=time.time() - start_time,
-                raw_response=None
             )

             return LLMResponse(
                 content=content,
                 usage=usage,
+                duration=duration
             )
         except Exception as e:
             return LLMResponse(
                 content=f"Error: {str(e)}",
+                duration=time.time() - start_time
             )

benchmarking/runner.py CHANGED Viewed

@@ -24,6 +24,7 @@ class BenchmarkResult:
     duration: float
     usage: Optional[Dict[str, Any]] = None
     error: Optional[str] = None
     metadata: Optional[Dict[str, Any]] = None
@@ -138,9 +139,11 @@ class BenchmarkRunner:
                 # Add to results
                 self.results.append(result)
                 # Log progress
                 if processed % 10 == 0:
-                    self._save_intermediate_results()
                     accuracy = (correct / processed) * 100
                     avg_duration = total_duration / processed
@@ -163,6 +166,9 @@ class BenchmarkRunner:
                     error=str(e)
                 )
                 self.results.append(error_result)
                 continue
         # Save final results
@@ -220,6 +226,7 @@ class BenchmarkRunner:
                 is_correct=is_correct,
                 duration=duration,
                 usage=response.usage,
                 metadata={
                     "data_point_metadata": data_point.metadata,
                     "case_id": data_point.case_id,
@@ -238,6 +245,7 @@ class BenchmarkRunner:
                 is_correct=False,
                 duration=duration,
                 error=str(e),
                 metadata={
                     "data_point_metadata": data_point.metadata,
                     "case_id": data_point.case_id,
@@ -254,9 +262,9 @@ class BenchmarkRunner:
         Returns:
             str: The extracted answer
         """
-        # First, look for the '<|A|>' format
-        final_answer_pattern = r'\s*<\|([A-F])\|>'
-        match = re.search(final_answer_pattern, response_text)
         if match:
             return match.group(1).upper()
@@ -286,11 +294,55 @@ class BenchmarkRunner:
         return model_letter == correct_letter
-    def _save_intermediate_results(self) -> None:
-        """Save intermediate results to disk."""
-        results_file = self.output_dir / f"{self.run_id}_intermediate.json"
-        # Convert results to serializable format
         results_data = []
         for result in self.results:
             results_data.append({
@@ -307,19 +359,6 @@ class BenchmarkRunner:
         with open(results_file, 'w') as f:
             json.dump(results_data, f, indent=2)
-    def _save_final_results(self, benchmark: Benchmark) -> Dict[str, Any]:
-        """Save final results and return summary.
-        Args:
-            benchmark (Benchmark): The benchmark that was run
-        Returns:
-            Dict[str, Any]: Summary of results
-        """
-        # Save detailed results
-        results_file = self.output_dir / f"{self.run_id}_results.json"
-        self._save_intermediate_results()
         # Calculate summary statistics
         total_questions = len(self.results)

     duration: float
     usage: Optional[Dict[str, Any]] = None
     error: Optional[str] = None
+    chunk_history: Optional[Dict[str, Any]] = None
     metadata: Optional[Dict[str, Any]] = None
                 # Add to results
                 self.results.append(result)
+                # Save individual result immediately
+                self._save_individual_result(result)
                 # Log progress
                 if processed % 10 == 0:
                     accuracy = (correct / processed) * 100
                     avg_duration = total_duration / processed
                     error=str(e)
                 )
                 self.results.append(error_result)
+                # Save individual error result immediately
+                self._save_individual_result(error_result)
                 continue
         # Save final results
                 is_correct=is_correct,
                 duration=duration,
                 usage=response.usage,
+                chunk_history=response.chunk_history,
                 metadata={
                     "data_point_metadata": data_point.metadata,
                     "case_id": data_point.case_id,
                 is_correct=False,
                 duration=duration,
                 error=str(e),
+                chunk_history=None,
                 metadata={
                     "data_point_metadata": data_point.metadata,
                     "case_id": data_point.case_id,
         Returns:
             str: The extracted answer
         """
+        # Look for the '\boxed{A}' format
+        boxed_pattern = r'\\boxed\{([A-Fa-f])\}'
+        match = re.search(boxed_pattern, response_text)
         if match:
             return match.group(1).upper()
         return model_letter == correct_letter
+    def _save_individual_result(self, result: BenchmarkResult) -> None:
+        """Save a single result to its own JSON file.
+        Args:
+            result (BenchmarkResult): The result to save
+        """
+        # Sanitize data_point_id for filename (remove invalid characters)
+        safe_id = re.sub(r'[^\w\-_.]', '_', result.data_point_id)
+        # Create filename with benchmark name and data point ID
+        filename = f"{self.config.benchmark_name}_{safe_id}.json"
+        result_file = self.output_dir / "individual_results" / filename
+        # Create individual_results directory if it doesn't exist
+        result_file.parent.mkdir(exist_ok=True)
+        # Convert result to serializable format
+        result_data = {
+            "timestamp": datetime.now().isoformat(),
+            "run_id": self.run_id,
+            "data_point_id": result.data_point_id,
+            "question": result.question,
+            "model_answer": result.model_answer,
+            "correct_answer": result.correct_answer,
+            "is_correct": result.is_correct,
+            "duration": result.duration,
+            "usage": result.usage,
+            "error": result.error,
+            "chunk_history": result.chunk_history,
+            "metadata": result.metadata
+        }
+        # Save to file
+        with open(result_file, 'w') as f:
+            json.dump(result_data, f, indent=2)
+    def _save_final_results(self, benchmark: Benchmark) -> Dict[str, Any]:
+        """Save final results and return summary.
+        Args:
+            benchmark (Benchmark): The benchmark that was run
+        Returns:
+            Dict[str, Any]: Summary of results
+        """
+        # Save detailed results
+        results_file = self.output_dir / f"{self.run_id}_results.json"
+        # Convert results to serializable format for final file
         results_data = []
         for result in self.results:
             results_data.append({
         with open(results_file, 'w') as f:
             json.dump(results_data, f, indent=2)
         # Calculate summary statistics
         total_questions = len(self.results)

interface.py CHANGED Viewed

@@ -192,7 +192,11 @@ class ChatInterface:
                                 tool_args = pending_call["args"]
                                 try:
-                                    tool_output_json = json.loads(msg.content)
                                     tool_output_str = json.dumps(tool_output_json, indent=2)
                                 except (json.JSONDecodeError, TypeError):
                                     tool_output_str = str(msg.content)
@@ -217,10 +221,11 @@ class ChatInterface:
                                 if tool_name == "image_visualizer":
                                     try:
-                                        result = json.loads(msg.content)
-                                        # Handle case where tool returns array [output, metadata]
-                                        if isinstance(result, list) and len(result) > 0:
-                                            result = result[0]  # Take the first element (output)
                                         if isinstance(result, dict) and "image_path" in result:
                                             self.display_file_path = result["image_path"]
                                             chat_history.append(

                                 tool_args = pending_call["args"]
                                 try:
+                                    # Handle case where tool returns tuple (output, metadata)
+                                    content = msg.content
+                                    content_tuple = ast.literal_eval(content)
+                                    content = json.dumps(content_tuple[0])
+                                    tool_output_json = json.loads(content)
                                     tool_output_str = json.dumps(tool_output_json, indent=2)
                                 except (json.JSONDecodeError, TypeError):
                                     tool_output_str = str(msg.content)
                                 if tool_name == "image_visualizer":
                                     try:
+                                        # Handle case where tool returns tuple (output, metadata)
+                                        content = msg.content
+                                        content_tuple = ast.literal_eval(content)
+                                        result = content_tuple[0]
                                         if isinstance(result, dict) and "image_path" in result:
                                             self.display_file_path = result["image_path"]
                                             chat_history.append(

main.py CHANGED Viewed

@@ -41,7 +41,7 @@ def initialize_agent(
     top_p: float = 0.95,
     rag_config: Optional[RAGConfig] = None,
     model_kwargs: Dict[str, Any] = {},
-    debug: bool = False,
 ):
     """Initialize the MedRAX agent with specified tools and configuration.
@@ -56,6 +56,7 @@ def initialize_agent(
         top_p (float, optional): Top P for the model. Defaults to 0.95.
         rag_config (RAGConfig, optional): Configuration for the RAG tool. Defaults to None.
         model_kwargs (dict, optional): Additional keyword arguments for model.
         debug (bool, optional): Whether to enable debug mode. Defaults to False.
     Returns:
@@ -63,7 +64,7 @@ def initialize_agent(
     """
     # Load system prompts from file
     prompts = load_prompts_from_file(prompt_file)
-    prompt = prompts["MEDICAL_ASSISTANT"]
     # Define the URL of the MedGemma FastAPI service.
     MEDGEMMA_API_URL = os.getenv("MEDGEMMA_API_URL", "http://127.0.0.1:8002")
@@ -126,7 +127,6 @@ def initialize_agent(
         log_dir="logs",
         system_prompt=prompt,
         checkpointer=checkpointer,
-        debug=debug,
     )
     print("Agent initialized")
@@ -145,19 +145,20 @@ if __name__ == "__main__":
     selected_tools = [
         "ImageVisualizerTool",  # For displaying images in the UI
         # "DicomProcessorTool",  # For processing DICOM medical image files
-        # "TorchXRayVisionClassifierTool",  # For classifying chest X-ray images using TorchXRayVision
-        # "ArcPlusClassifierTool",  # For advanced chest X-ray classification using ArcPlus
-        # "ChestXRaySegmentationTool",  # For segmenting anatomical regions in chest X-rays
-        # "ChestXRayReportGeneratorTool",  # For generating medical reports from X-rays
-        # "XRayVQATool",  # For visual question answering on X-rays
         # "LlavaMedTool",  # For multimodal medical image understanding
-        # "XRayPhraseGroundingTool",  # For locating described features in X-rays
         # "ChestXRayGeneratorTool",  # For generating synthetic chest X-rays
         # "MedSAM2Tool",  # For advanced medical image segmentation using MedSAM2
         # "WebBrowserTool",  # For web browsing and search capabilities
         # "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
         # "PythonSandboxTool",  # Add the Python sandbox tool
         "MedGemmaVQATool" # Google MedGemma VQA tool
     ]
     # Setup the MedGemma environment if the MedGemmaVQATool is selected
@@ -174,7 +175,7 @@ if __name__ == "__main__":
         pinecone_index_name="medrax2",  # Name for the Pinecone index
         chunk_size=1500,
         chunk_overlap=300,
-        retriever_k=7,
         local_docs_dir="rag_docs",  # Change this to the path of the documents for RAG
         huggingface_datasets=["VictorLJZ/medrax2"],  # List of HuggingFace datasets to load
         dataset_split="train",  # Which split of the datasets to use
@@ -186,15 +187,15 @@ if __name__ == "__main__":
     agent, tools_dict = initialize_agent(
         prompt_file="medrax/docs/system_prompts.txt",
         tools_to_use=selected_tools,
-        model_dir="model-weights",
         temp_dir="temp",  # Change this to the path of the temporary directory
-        device="cuda",
         model="gpt-4.1-2025-04-14",  # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
         temperature=0.7,
         top_p=0.95,
         model_kwargs=model_kwargs,
         rag_config=rag_config,
-        debug=True,
     )
     # Create and launch the web interface

     top_p: float = 0.95,
     rag_config: Optional[RAGConfig] = None,
     model_kwargs: Dict[str, Any] = {},
+    system_prompt: str = "MEDICAL_ASSISTANT",
 ):
     """Initialize the MedRAX agent with specified tools and configuration.
         top_p (float, optional): Top P for the model. Defaults to 0.95.
         rag_config (RAGConfig, optional): Configuration for the RAG tool. Defaults to None.
         model_kwargs (dict, optional): Additional keyword arguments for model.
+        system_prompt (str, optional): System prompt to use. Defaults to "MEDICAL_ASSISTANT".
         debug (bool, optional): Whether to enable debug mode. Defaults to False.
     Returns:
     """
     # Load system prompts from file
     prompts = load_prompts_from_file(prompt_file)
+    prompt = prompts[system_prompt]
     # Define the URL of the MedGemma FastAPI service.
     MEDGEMMA_API_URL = os.getenv("MEDGEMMA_API_URL", "http://127.0.0.1:8002")
         log_dir="logs",
         system_prompt=prompt,
         checkpointer=checkpointer,
     )
     print("Agent initialized")
     selected_tools = [
         "ImageVisualizerTool",  # For displaying images in the UI
         # "DicomProcessorTool",  # For processing DICOM medical image files
+        "TorchXRayVisionClassifierTool",  # For classifying chest X-ray images using TorchXRayVision
+        "ArcPlusClassifierTool",  # For advanced chest X-ray classification using ArcPlus
+        "ChestXRaySegmentationTool",  # For segmenting anatomical regions in chest X-rays
+        "ChestXRayReportGeneratorTool",  # For generating medical reports from X-rays
+        "XRayVQATool",  # For visual question answering on X-rays
         # "LlavaMedTool",  # For multimodal medical image understanding
+        "XRayPhraseGroundingTool",  # For locating described features in X-rays
         # "ChestXRayGeneratorTool",  # For generating synthetic chest X-rays
         # "MedSAM2Tool",  # For advanced medical image segmentation using MedSAM2
         # "WebBrowserTool",  # For web browsing and search capabilities
         # "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
         # "PythonSandboxTool",  # Add the Python sandbox tool
         "MedGemmaVQATool" # Google MedGemma VQA tool
+        "DuckDuckGoSearchTool",  # For privacy-focused web search using DuckDuckGo
     ]
     # Setup the MedGemma environment if the MedGemmaVQATool is selected
         pinecone_index_name="medrax2",  # Name for the Pinecone index
         chunk_size=1500,
         chunk_overlap=300,
+        retriever_k=3,
         local_docs_dir="rag_docs",  # Change this to the path of the documents for RAG
         huggingface_datasets=["VictorLJZ/medrax2"],  # List of HuggingFace datasets to load
         dataset_split="train",  # Which split of the datasets to use
     agent, tools_dict = initialize_agent(
         prompt_file="medrax/docs/system_prompts.txt",
         tools_to_use=selected_tools,
+        model_dir="/model-weights",
         temp_dir="temp",  # Change this to the path of the temporary directory
+        device="cuda:0",
         model="gpt-4.1-2025-04-14",  # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
         temperature=0.7,
         top_p=0.95,
         model_kwargs=model_kwargs,
         rag_config=rag_config,
+        system_prompt="MEDICAL_ASSISTANT",
     )
     # Create and launch the web interface

medrax/agent/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from .agent import ~~State~~, Agent


1	+ from .agent import AgentState, Agent

medrax/agent/agent.py CHANGED Viewed

@@ -5,9 +5,8 @@ from dotenv import load_dotenv
 from datetime import datetime
 from typing import List, Dict, Any, TypedDict, Annotated, Optional
-from langgraph.prebuilt import create_react_agent
-from langchain_core.messages import AnyMessage
-from langgraph.prebuilt.chat_agent_executor import AgentState
 from langchain_core.language_models import BaseLanguageModel
 from langchain_core.tools import BaseTool
@@ -33,19 +32,17 @@ class ToolCallLog(TypedDict):
     content: str
-class State(AgentState):
     """
-    A AgentState representing the state of an agent.
     Attributes:
-        session_bytes (bytes): The pickled state of the sandbox session. This is
-            required for stateful tools and should not be modified directly.
-        session_metadata (dict): Metadata associated with the sandbox session.
     """
-    # Required for the stateful PyodideSandboxTool
-    session_bytes: bytes = b""
-    session_metadata: dict = {}
 class Agent:
@@ -55,7 +52,7 @@ class Agent:
     Attributes:
         model (BaseLanguageModel): The language model used for processing.
-        tools (List[BaseTool]): A list of available tools.
         checkpointer (Any): Manages and persists the agent's state.
         system_prompt (str): The system instructions for the agent.
         workflow (StateGraph): The compiled workflow for the agent's processing.
@@ -71,7 +68,6 @@ class Agent:
         system_prompt: str = "",
         log_tools: bool = True,
         log_dir: Optional[str] = "logs",
-        debug: bool = False,
     ):
         """
         Initialize the Agent.
@@ -83,7 +79,6 @@ class Agent:
             system_prompt (str, optional): System instructions. Defaults to "".
             log_tools (bool, optional): Whether to log tool calls. Defaults to True.
             log_dir (str, optional): Directory to save logs. Defaults to 'logs'.
-            debug (bool, optional): Whether to enable debug mode. Defaults to False.
         """
         self.system_prompt = system_prompt
         self.log_tools = log_tools
@@ -92,12 +87,107 @@ class Agent:
             self.log_path = Path(log_dir or "logs")
             self.log_path.mkdir(exist_ok=True)
-        self.workflow = create_react_agent(
-            model=model,
-            tools=tools,
-            checkpointer=checkpointer,
-            state_schema=State,
-            prompt=system_prompt if system_prompt else None,
-            debug=debug,
         )
         self.tools = {t.name: t for t in tools}

 from datetime import datetime
 from typing import List, Dict, Any, TypedDict, Annotated, Optional
+from langgraph.graph import StateGraph, END
+from langchain_core.messages import AnyMessage, SystemMessage, ToolMessage
 from langchain_core.language_models import BaseLanguageModel
 from langchain_core.tools import BaseTool
     content: str
+class AgentState(TypedDict):
     """
+    A TypedDict representing the state of an agent.
     Attributes:
+        messages (Annotated[List[AnyMessage], operator.add]): A list of messages
+            representing the conversation history. The operator.add annotation
+            indicates that new messages should be appended to this list.
     """
+    messages: Annotated[List[AnyMessage], operator.add]
 class Agent:
     Attributes:
         model (BaseLanguageModel): The language model used for processing.
+        tools (Dict[str, BaseTool]): A dictionary of available tools.
         checkpointer (Any): Manages and persists the agent's state.
         system_prompt (str): The system instructions for the agent.
         workflow (StateGraph): The compiled workflow for the agent's processing.
         system_prompt: str = "",
         log_tools: bool = True,
         log_dir: Optional[str] = "logs",
     ):
         """
         Initialize the Agent.
             system_prompt (str, optional): System instructions. Defaults to "".
             log_tools (bool, optional): Whether to log tool calls. Defaults to True.
             log_dir (str, optional): Directory to save logs. Defaults to 'logs'.
         """
         self.system_prompt = system_prompt
         self.log_tools = log_tools
             self.log_path = Path(log_dir or "logs")
             self.log_path.mkdir(exist_ok=True)
+        # Define the agent workflow
+        workflow = StateGraph(AgentState)
+        workflow.add_node("process", self.process_request)
+        workflow.add_node("execute", self.execute_tools)
+        workflow.add_conditional_edges(
+            "process", self.has_tool_calls, {True: "execute", False: END}
         )
+        workflow.add_edge("execute", "process")
+        workflow.set_entry_point("process")
+        self.workflow = workflow.compile(checkpointer=checkpointer)
         self.tools = {t.name: t for t in tools}
+        self.model = model.bind_tools(tools)
+    def process_request(self, state: AgentState) -> Dict[str, List[AnyMessage]]:
+        """
+        Process the request using the language model.
+        Args:
+            state (AgentState): The current state of the agent.
+        Returns:
+            Dict[str, List[AnyMessage]]: A dictionary containing the model's response.
+        """
+        messages = state["messages"]
+        if self.system_prompt:
+            messages = [SystemMessage(content=self.system_prompt)] + messages
+        response = self.model.invoke(messages)
+        return {"messages": [response]}
+    def has_tool_calls(self, state: AgentState) -> bool:
+        """
+        Check if the response contains any tool calls.
+        Args:
+            state (AgentState): The current state of the agent.
+        Returns:
+            bool: True if tool calls exist, False otherwise.
+        """
+        response = state["messages"][-1]
+        return len(response.tool_calls) > 0
+    def execute_tools(self, state: AgentState) -> Dict[str, List[ToolMessage]]:
+        """
+        Execute tool calls from the model's response.
+        Args:
+            state (AgentState): The current state of the agent.
+        Returns:
+            Dict[str, List[ToolMessage]]: A dictionary containing tool execution results.
+        """
+        tool_calls = state["messages"][-1].tool_calls
+        results = []
+        for call in tool_calls:
+            print(f"Executing tool: {call}")
+            if call["name"] not in self.tools:
+                print("\n....invalid tool....")
+                result = "invalid tool, please retry"
+            else:
+                result = self.tools[call["name"]].invoke(call["args"])
+            results.append(
+                ToolMessage(
+                    tool_call_id=call["id"],
+                    name=call["name"],
+                    args=call["args"],
+                    content=str(result),
+                )
+            )
+        self._save_tool_calls(results)
+        print("Returning to model processing!")
+        return {"messages": results}
+    def _save_tool_calls(self, tool_calls: List[ToolMessage]) -> None:
+        """
+        Save tool calls to a JSON file with timestamp-based naming.
+        Args:
+            tool_calls (List[ToolMessage]): List of tool calls to save.
+        """
+        if not self.log_tools:
+            return
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename = self.log_path / f"tool_calls_{timestamp}.json"
+        logs: List[ToolCallLog] = []
+        for call in tool_calls:
+            log_entry = {
+                "tool_call_id": call.tool_call_id,
+                "name": call.name,
+                "args": call.args,
+                "content": call.content,
+                "timestamp": datetime.now().isoformat(),
+            }
+            logs.append(log_entry)
+        with open(filename, "w") as f:
+            json.dump(logs, f, indent=4)

medrax/docs/system_prompts.txt CHANGED Viewed

@@ -22,5 +22,5 @@ Solve using your own vision and reasoning and use tools (if available) to comple
 You can make multiple tool calls in parallel or in sequence as needed for comprehensive answers.
 Think critically about and criticize the tool outputs.
 If you need to look up some information before asking a follow up question, you are allowed to do that.
-When encountering a multiple-choice question, your final response should end with "Final answer: <|A|>" from list of possible choices A, B, C, D, E, F.
 It is extremely important that you strictly answer in the format mentioned above.

 You can make multiple tool calls in parallel or in sequence as needed for comprehensive answers.
 Think critically about and criticize the tool outputs.
 If you need to look up some information before asking a follow up question, you are allowed to do that.
+When encountering a multiple-choice question, your final response should end with "Final answer: \boxed{A}" from list of possible choices A, B, C, D, E, F.
 It is extremely important that you strictly answer in the format mentioned above.

medrax/tools/__init__.py CHANGED Viewed

@@ -5,10 +5,10 @@ from .report_generation import *
 from .segmentation import *
 from .vqa import *
 from .grounding import *
-from .generation import *
 from .dicom import *
 from .utils import *
 from .rag import *
-from .web_browser import *
 from .python_tool import *
 from .medsam2 import *

 from .segmentation import *
 from .vqa import *
 from .grounding import *
+from .xray_generation import *
 from .dicom import *
 from .utils import *
 from .rag import *
+from .browsing import *
 from .python_tool import *
 from .medsam2 import *

medrax/tools/browsing/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""Web browsing tools for MedRAX2 medical agents."""
+from .duckduckgo import DuckDuckGoSearchTool, WebSearchInput
+from .web_browser import WebBrowserTool, WebBrowserSchema, SearchQuerySchema, VisitUrlSchema
+__all__ = [
+    "DuckDuckGoSearchTool",
+    "WebSearchInput",
+    "WebBrowserTool",
+    "WebBrowserSchema",
+    "SearchQuerySchema",
+    "VisitUrlSchema"
+]

medrax/tools/browsing/duckduckgo.py ADDED Viewed

	@@ -0,0 +1,403 @@

+"""
+Web search tool for MedRAX2 medical agents.
+Provides DuckDuckGo search capabilities for medical agents to retrieve
+real-time information from the web with proper error handling
+and result formatting. Designed specifically for medical research,
+fact-checking, and accessing current medical information.
+"""
+import asyncio
+import logging
+import time
+from datetime import datetime
+from typing import Dict, Any, Tuple
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForToolRun,
+    CallbackManagerForToolRun,
+)
+from langchain_core.tools import BaseTool
+from pydantic import BaseModel, Field
+try:
+    from duckduckgo_search import DDGS
+except ImportError:
+    DDGS = None
+logger = logging.getLogger(__name__)
+class WebSearchInput(BaseModel):
+    """Input schema for web search tool."""
+    query: str = Field(
+        ...,
+        description="The search query to look up on the web. Be specific and include relevant medical keywords for better results.",
+        min_length=1,
+        max_length=500,
+    )
+    max_results: int = Field(
+        default=5,
+        description="Maximum number of search results to return (1-10)",
+        ge=1,
+        le=10,
+    )
+    region: str = Field(
+        default="us-en",
+        description="Region for search results (e.g., 'us-en', 'uk-en', 'ca-en')",
+    )
+class DuckDuckGoSearchTool(BaseTool):
+    """
+    Tool that performs web searches using DuckDuckGo search engine for medical research.
+    This tool provides access to real-time web information through DuckDuckGo's
+    search API, specifically designed for medical agents that need to retrieve current
+    medical information, verify facts, or find resources on medical topics.
+    Features:
+        - Real-time web search capability for medical information
+        - Configurable number of results (1-10)
+        - Regional search support for localized medical results
+        - Robust error handling for network issues
+        - Structured result formatting for easy parsing
+        - Privacy-focused (DuckDuckGo doesn't track users)
+        - Medical-focused search optimization
+    Use Cases:
+        - Medical fact checking and verification
+        - Finding current medical news and updates
+        - Researching specific medical topics or questions
+        - Gathering multiple perspectives on medical issues
+        - Locating official medical resources and documentation
+        - Accessing current clinical guidelines and research
+    Rate Limiting:
+        DuckDuckGo has rate limits. Avoid making too many rapid requests
+        to prevent temporary blocking.
+    """
+    name: str = "duckduckgo_search"
+    description: str = (
+        "Search the web using DuckDuckGo to find current medical information, research, and resources. "
+        "Input should be a clear search query with relevant medical keywords. The tool returns a list of relevant web results "
+        "with titles, URLs, and brief snippets. Useful for medical fact-checking, finding current medical events, "
+        "researching medical topics, and gathering information from reliable medical sources. "
+        "Results are privacy-focused and don't track user searches. Optimized for medical research and clinical information."
+    )
+    args_schema: type[BaseModel] = WebSearchInput
+    return_direct: bool = False
+    def __init__(self, **kwargs):
+        """Initialize the DuckDuckGo search tool."""
+        super().__init__(**kwargs)
+        if DDGS is None:
+            logger.error(
+                "duckduckgo-search package not installed. Install with: pip install duckduckgo-search"
+            )
+            raise ImportError(
+                "duckduckgo-search package is required for web search functionality"
+            )
+        logger.info("DuckDuckGo search tool initialized successfully")
+    def _perform_search_sync(
+        self, query: str, max_results: int = 5, region: str = "us-en"
+    ) -> Dict[str, Any]:
+        """
+        Perform the actual web search using DuckDuckGo synchronously.
+        Args:
+            query (str): The search query.
+            max_results (int): Maximum number of results to return.
+            region (str): Region for localized results.
+        Returns:
+            Dict[str, Any]: Structured search results.
+        """
+        logger.info(
+            f"Performing web search: '{query}' (max_results={max_results}, region={region})"
+        )
+        try:
+            # Initialize DDGS with error handling
+            with DDGS() as ddgs:
+                # Perform the search
+                search_results = list(
+                    ddgs.text(
+                        keywords=query,
+                        region=region,
+                        safesearch="moderate",
+                        timelimit=None,
+                        max_results=max_results,
+                    )
+                )
+                # Format results for the agent
+                formatted_results = []
+                for i, result in enumerate(search_results, 1):
+                    formatted_result = {
+                        "rank": i,
+                        "title": result.get("title", "No title"),
+                        "url": result.get("href", "No URL"),
+                        "snippet": result.get("body", "No description available"),
+                        "source": "DuckDuckGo",
+                    }
+                    formatted_results.append(formatted_result)
+                # Create summary for the agent
+                if formatted_results:
+                    summary = (
+                        f"Found {len(formatted_results)} results for '{query}'. Top results include: "
+                        + ", ".join([f"{r['title']}" for r in formatted_results[:3]])
+                    )
+                else:
+                    summary = f"No results found for '{query}'"
+                # Log successful completion
+                logger.info(
+                    f"Web search completed successfully: {len(formatted_results)} results"
+                )
+                return {
+                    "query": query,
+                    "results_count": len(formatted_results),
+                    "results": formatted_results,
+                    "summary": summary,
+                    "search_engine": "DuckDuckGo",
+                    "timestamp": datetime.now().isoformat(),
+                }
+        except Exception as e:
+            error_msg = f"Web search failed for query '{query}': {str(e)}"
+            logger.error(f"{error_msg}")
+            return {
+                "query": query,
+                "results_count": 0,
+                "results": [],
+                "error": error_msg,
+                "search_engine": "DuckDuckGo",
+                "timestamp": datetime.now().isoformat(),
+            }
+    def _run(
+        self,
+        query: str,
+        max_results: int = 5,
+        region: str = "us-en",
+        run_manager: CallbackManagerForToolRun | None = None,
+    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        """
+        Execute the web search synchronously.
+        Args:
+            query (str): Search query
+            max_results (int): Maximum number of results
+            region (str): Search region
+            run_manager: Callback manager (unused)
+        Returns:
+            Tuple[Dict[str, Any], Dict[str, Any]]: A tuple containing:
+                - output: Dictionary with search results
+                - metadata: Dictionary with execution metadata
+        """
+        # Create metadata structure
+        metadata = {
+            "query": query,
+            "max_results": max_results,
+            "region": region,
+            "timestamp": time.time(),
+            "tool": "duckduckgo_search",
+            "operation": "search",
+        }
+        try:
+            result = self._perform_search_sync(query, max_results, region)
+            # Check if search was successful
+            if "error" in result:
+                metadata["analysis_status"] = "failed"
+                metadata["error_details"] = result["error"]
+            else:
+                metadata["analysis_status"] = "completed"
+                metadata["results_count"] = result.get("results_count", 0)
+            return result, metadata
+        except Exception as e:
+            error_result = {
+                "query": query,
+                "results_count": 0,
+                "results": [],
+                "error": str(e),
+                "search_engine": "DuckDuckGo",
+                "timestamp": datetime.now().isoformat(),
+            }
+            metadata["analysis_status"] = "failed"
+            metadata["error_details"] = str(e)
+            return error_result, metadata
+    async def _arun(
+        self,
+        query: str,
+        max_results: int = 5,
+        region: str = "us-en",
+        run_manager: AsyncCallbackManagerForToolRun | None = None,
+    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        """
+        Execute the web search asynchronously.
+        Args:
+            query (str): Search query
+            max_results (int): Maximum number of results
+            region (str): Search region
+            run_manager: Callback manager (unused)
+        Returns:
+            Tuple[Dict[str, Any], Dict[str, Any]]: A tuple containing:
+                - output: Dictionary with search results
+                - metadata: Dictionary with execution metadata
+        """
+        # Try to get LangGraph stream writer for progress updates
+        writer = None
+        try:
+            from langgraph.config import get_stream_writer
+            writer = get_stream_writer()
+        except Exception:
+            # Stream writer not available (outside LangGraph context)
+            pass
+        if writer:
+            writer(
+                {
+                    "tool_name": "DuckDuckGoSearchTool",
+                    "status": "started",
+                    "query": query,
+                    "max_results": max_results,
+                    "step": "Initiating web search",
+                }
+            )
+        try:
+            if writer:
+                writer(
+                    {
+                        "tool_name": "DuckDuckGoSearchTool",
+                        "status": "searching",
+                        "step": "Fetching results from DuckDuckGo API",
+                    }
+                )
+            # Use asyncio to run sync search in executor
+            loop = asyncio.get_event_loop()
+            result, metadata = await loop.run_in_executor(
+                None, self._run, query, max_results, region
+            )
+            if writer:
+                # Parse result to get count for progress update
+                results_count = result.get("results_count", 0)
+                writer(
+                    {
+                        "tool_name": "DuckDuckGoSearchTool",
+                        "status": "completed",
+                        "step": f"Search completed with {results_count} results",
+                        "results_count": results_count,
+                    }
+                )
+            return result, metadata
+        except Exception as e:
+            if writer:
+                writer(
+                    {
+                        "tool_name": "DuckDuckGoSearchTool",
+                        "status": "error",
+                        "step": f"Search failed: {str(e)}",
+                        "error": str(e),
+                    }
+                )
+            error_result = {
+                "query": query,
+                "results_count": 0,
+                "results": [],
+                "error": str(e),
+                "search_engine": "DuckDuckGo",
+                "timestamp": datetime.now().isoformat(),
+            }
+            metadata = {
+                "query": query,
+                "max_results": max_results,
+                "region": region,
+                "timestamp": time.time(),
+                "tool": "duckduckgo_search",
+                "operation": "search",
+                "analysis_status": "failed",
+                "error_details": str(e),
+            }
+            return error_result, metadata
+    def get_search_summary(
+        self, query: str, max_results: int = 3
+    ) -> dict[str, str | list[str]]:
+        """
+        Get a quick summary of search results for a given query.
+        Args:
+            query (str): The search query.
+            max_results (int): Maximum number of results to summarize.
+        Returns:
+            Dict[str, Union[str, List[str]]]: Summary of search results.
+        """
+        try:
+            result, _ = self._run(query, max_results)
+            if "error" in result:
+                return {
+                    "query": query,
+                    "status": "error",
+                    "error": result["error"],
+                    "results": [],
+                }
+            # Extract key information
+            results = result.get("results", [])
+            titles = [r["title"] for r in results]
+            urls = [r["url"] for r in results]
+            snippets = [
+                (
+                    r["snippet"][:100] + "..."
+                    if len(r["snippet"]) > 100
+                    else r["snippet"]
+                )
+                for r in results
+            ]
+            return {
+                "query": query,
+                "status": "success",
+                "total_results": result.get("results_count", 0),
+                "titles": titles,
+                "urls": urls,
+                "snippets": snippets,
+            }
+        except Exception as e:
+            logger.error(f"Error getting search summary: {e}")
+            return {
+                "query": query,
+                "status": "error",
+                "error": str(e),
+                "results": [],
+            }

medrax/tools/{web_browser.py → browsing/web_browser.py} RENAMED Viewed

File without changes

medrax/tools/classification/arcplus.py CHANGED Viewed

@@ -345,7 +345,8 @@ class ArcPlusClassifierTool(BaseTool):
                     predictions = predictions[: len(self.disease_list)]
             # Create output dictionary mapping disease names to probabilities
-            output = dict(zip(self.disease_list, predictions.astype(float)))
             metadata = {
                 "image_path": image_path,

                     predictions = predictions[: len(self.disease_list)]
             # Create output dictionary mapping disease names to probabilities
+            # Convert numpy floats to native Python floats for proper serialization
+            output = dict(zip(self.disease_list, [float(pred) for pred in predictions]))
             metadata = {
                 "image_path": image_path,

medrax/tools/llava_med.py DELETED Viewed

@@ -1,193 +0,0 @@
-from typing import Any, Dict, Optional, Tuple, Type
-from pydantic import BaseModel, Field
-import torch
-from langchain_core.callbacks import (
-    AsyncCallbackManagerForToolRun,
-    CallbackManagerForToolRun,
-)
-from langchain_core.tools import BaseTool
-from PIL import Image
-from medrax.llava.conversation import conv_templates
-from medrax.llava.model.builder import load_pretrained_model
-from medrax.llava.mm_utils import tokenizer_image_token, process_images
-from medrax.llava.constants import (
-    IMAGE_TOKEN_INDEX,
-    DEFAULT_IMAGE_TOKEN,
-    DEFAULT_IM_START_TOKEN,
-    DEFAULT_IM_END_TOKEN,
-)
-class LlavaMedInput(BaseModel):
-    """Input for the LLaVA-Med Visual QA tool. Only supports JPG or PNG images."""
-    question: str = Field(..., description="The question to ask about the medical image")
-    image_path: Optional[str] = Field(
-        None,
-        description="Path to the medical image file (optional), only supports JPG or PNG images",
-    )
-class LlavaMedTool(BaseTool):
-    """Tool that performs medical visual question answering using LLaVA-Med.
-    This tool uses a large language model fine-tuned on medical images to answer
-    questions about medical images. It can handle both image-based questions and
-    general medical questions without images.
-    """
-    name: str = "llava_med_qa"
-    description: str = (
-        "A tool that answers questions about biomedical images and general medical questions using LLaVA-Med. "
-        "While it can process chest X-rays, it may not be as reliable for detailed chest X-ray analysis. "
-        "Input should be a question and optionally a path to a medical image file."
-    )
-    args_schema: Type[BaseModel] = LlavaMedInput
-    tokenizer: Any = None
-    model: Any = None
-    image_processor: Any = None
-    context_len: int = 200000
-    def __init__(
-        self,
-        model_path: str = "microsoft/llava-med-v1.5-mistral-7b",
-        cache_dir: str = "/model-weights",
-        low_cpu_mem_usage: bool = True,
-        torch_dtype: torch.dtype = torch.bfloat16,
-        device: str = "cuda",
-        load_in_4bit: bool = False,
-        load_in_8bit: bool = False,
-        **kwargs,
-    ):
-        super().__init__()
-        self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(
-            model_path=model_path,
-            model_base=None,
-            model_name=model_path,
-            load_in_4bit=load_in_4bit,
-            load_in_8bit=load_in_8bit,
-            cache_dir=cache_dir,
-            low_cpu_mem_usage=low_cpu_mem_usage,
-            torch_dtype=torch_dtype,
-            device=device,
-            **kwargs,
-        )
-        self.model.eval()
-    def _process_input(
-        self, question: str, image_path: Optional[str] = None
-    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
-        if self.model.config.mm_use_im_start_end:
-            question = (
-                DEFAULT_IM_START_TOKEN
-                + DEFAULT_IMAGE_TOKEN
-                + DEFAULT_IM_END_TOKEN
-                + "\n"
-                + question
-            )
-        else:
-            question = DEFAULT_IMAGE_TOKEN + "\n" + question
-        conv = conv_templates["vicuna_v1"].copy()
-        conv.append_message(conv.roles[0], question)
-        conv.append_message(conv.roles[1], None)
-        prompt = conv.get_prompt()
-        input_ids = (
-            tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
-            .unsqueeze(0)
-            .cuda()
-        )
-        image_tensor = None
-        if image_path:
-            image = Image.open(image_path)
-            image_tensor = process_images([image], self.image_processor, self.model.config)[0]
-            image_tensor = image_tensor.unsqueeze(0).half().cuda()
-        return input_ids, image_tensor
-    def _run(
-        self,
-        question: str,
-        image_path: Optional[str] = None,
-        run_manager: Optional[CallbackManagerForToolRun] = None,
-    ) -> Tuple[Dict[str, Any], Dict]:
-        """Answer a medical question, optionally based on an input image.
-        Args:
-            question (str): The medical question to answer.
-            image_path (Optional[str]): The path to the medical image file (if applicable).
-            run_manager (Optional[CallbackManagerForToolRun]): The callback manager for the tool run.
-        Returns:
-            Tuple[Dict[str, Any], Dict]: A tuple containing the output dictionary and metadata dictionary.
-        Raises:
-            Exception: If there's an error processing the input or generating the answer.
-        """
-        try:
-            input_ids, image_tensor = self._process_input(question, image_path)
-            input_ids = input_ids.to(device=self.model.device)
-            image_tensor = image_tensor.to(device=self.model.device, dtype=self.model.dtype)
-            with torch.inference_mode():
-                output_ids = self.model.generate(
-                    input_ids,
-                    images=image_tensor,
-                    do_sample=False,
-                    temperature=0.2,
-                    max_new_tokens=500,
-                    use_cache=True,
-                )
-            answer = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
-            output = {
-                "answer": answer,
-            }
-            metadata = {
-                "question": question,
-                "image_path": image_path,
-                "analysis_status": "completed",
-            }
-            return output, metadata
-        except Exception as e:
-            output = {"error": f"Error generating answer: {str(e)}"}
-            metadata = {
-                "question": question,
-                "image_path": image_path,
-                "analysis_status": "failed",
-            }
-            return output, metadata
-    async def _arun(
-        self,
-        question: str,
-        image_path: Optional[str] = None,
-        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
-    ) -> Tuple[Dict[str, Any], Dict]:
-        """Asynchronously answer a medical question, optionally based on an input image.
-        This method currently calls the synchronous version, as the model inference
-        is not inherently asynchronous. For true asynchronous behavior, consider
-        using a separate thread or process.
-        Args:
-            question (str): The medical question to answer.
-            image_path (Optional[str]): The path to the medical image file (if applicable).
-            run_manager (Optional[AsyncCallbackManagerForToolRun]): The async callback manager for the tool run.
-        Returns:
-            Tuple[Dict[str, Any], Dict]: A tuple containing the output dictionary and metadata dictionary.
-        Raises:
-            Exception: If there's an error processing the input or generating the answer.
-        """
-        return self._run(question, image_path)

medrax/tools/rag.py CHANGED Viewed

@@ -48,14 +48,14 @@ class RAGTool(BaseTool):
         self.rag = CohereRAG(config)
         self.chain = self.rag.initialize_rag(with_memory=True)
-    def _run(self, query: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
         """Execute the RAG tool with the given query.
         Args:
             query (str): Medical question to answer
         Returns:
-            Tuple[Dict[str, Any], Dict[str, Any]]: Output dictionary and metadata dictionary
         """
         try:
             result = self.chain.invoke({"query": query})
@@ -87,14 +87,14 @@ class RAGTool(BaseTool):
             }
             return output, metadata
-    async def _arun(self, query: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
         """Async version of _run.
         Args:
             query (str): Medical question to answer
         Returns:
-            Tuple[Dict[str, Any], Dict[str, Any]]: Output dictionary and metadata dictionary
         Raises:
             NotImplementedError: Async not implemented yet

         self.rag = CohereRAG(config)
         self.chain = self.rag.initialize_rag(with_memory=True)
+    def _run(self, query: str) -> Tuple[Dict[str, Any], Dict]:
         """Execute the RAG tool with the given query.
         Args:
             query (str): Medical question to answer
         Returns:
+            Tuple[Dict[str, Any], Dict]: Output dictionary and metadata dictionary
         """
         try:
             result = self.chain.invoke({"query": query})
             }
             return output, metadata
+    async def _arun(self, query: str) -> Tuple[Dict[str, Any], Dict]:
         """Async version of _run.
         Args:
             query (str): Medical question to answer
         Returns:
+            Tuple[Dict[str, Any], Dict]: Output dictionary and metadata dictionary
         Raises:
             NotImplementedError: Async not implemented yet

medrax/tools/segmentation/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+"""Medical image segmentation tools for MedRAX2."""
+from .segmentation import ChestXRaySegmentationTool, ChestXRaySegmentationInput, OrganMetrics
+from .medsam2 import MedSAM2Tool, MedSAM2Input
+__all__ = [
+    "ChestXRaySegmentationTool",
+    "ChestXRaySegmentationInput",
+    "OrganMetrics",
+    "MedSAM2Tool",
+    "MedSAM2Input"
+]

medrax/tools/{medsam2.py → segmentation/medsam2.py} RENAMED Viewed

@@ -15,7 +15,7 @@ from langchain_core.callbacks import (
 from langchain_core.tools import BaseTool
 # Add MedSAM2 to Python path for proper module resolution
-medsam2_path = str(Path(__file__).parent.parent.parent / "MedSAM2")
 if medsam2_path not in sys.path:
     sys.path.append(medsam2_path)
@@ -93,7 +93,7 @@ class MedSAM2Tool(BaseTool):
             if GlobalHydra.instance().is_initialized():
                 GlobalHydra.instance().clear()
-            config_dir = Path(__file__).parent.parent.parent / "MedSAM2" / "sam2" / "configs"
             initialize_config_dir(config_dir=str(config_dir), version_base="1.2")
             hf_hub_download(

 from langchain_core.tools import BaseTool
 # Add MedSAM2 to Python path for proper module resolution
+medsam2_path = str(Path(__file__).parent.parent.parent.parent / "MedSAM2")
 if medsam2_path not in sys.path:
     sys.path.append(medsam2_path)
             if GlobalHydra.instance().is_initialized():
                 GlobalHydra.instance().clear()
+            config_dir = Path(__file__).parent.parent.parent.parent / "MedSAM2" / "sam2" / "configs"
             initialize_config_dir(config_dir=str(config_dir), version_base="1.2")
             hf_hub_download(

medrax/tools/{segmentation.py → segmentation/segmentation.py} RENAMED Viewed

File without changes

medrax/tools/vqa/llava_med.py CHANGED Viewed

@@ -117,7 +117,7 @@ class LlavaMedTool(BaseTool):
         question: str,
         image_path: Optional[str] = None,
         run_manager: Optional[CallbackManagerForToolRun] = None,
-    ) -> Tuple[str, Dict]:
         """Answer a medical question, optionally based on an input image.
         Args:
@@ -126,7 +126,7 @@ class LlavaMedTool(BaseTool):
             run_manager (Optional[CallbackManagerForToolRun]): The callback manager for the tool run.
         Returns:
-            Tuple[str, Dict]: A tuple containing the model's answer and any additional metadata.
         Raises:
             Exception: If there's an error processing the input or generating the answer.
@@ -146,7 +146,12 @@ class LlavaMedTool(BaseTool):
                     use_cache=True,
                 )
-            output = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
             metadata = {
                 "question": question,
                 "image_path": image_path,
@@ -154,18 +159,20 @@ class LlavaMedTool(BaseTool):
             }
             return output, metadata
         except Exception as e:
-            return f"Error generating answer: {str(e)}", {
                 "question": question,
                 "image_path": image_path,
                 "analysis_status": "failed",
             }
     async def _arun(
         self,
         question: str,
         image_path: Optional[str] = None,
         run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
-    ) -> Tuple[str, Dict]:
         """Asynchronously answer a medical question, optionally based on an input image.
         This method currently calls the synchronous version, as the model inference
@@ -178,9 +185,9 @@ class LlavaMedTool(BaseTool):
             run_manager (Optional[AsyncCallbackManagerForToolRun]): The async callback manager for the tool run.
         Returns:
-            Tuple[str, Dict]: A tuple containing the model's answer and any additional metadata.
         Raises:
             Exception: If there's an error processing the input or generating the answer.
         """
-        return self._run(question, image_path)

         question: str,
         image_path: Optional[str] = None,
         run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> Tuple[Dict[str, Any], Dict]:
         """Answer a medical question, optionally based on an input image.
         Args:
             run_manager (Optional[CallbackManagerForToolRun]): The callback manager for the tool run.
         Returns:
+            Tuple[Dict[str, Any], Dict]: A tuple containing the output dictionary and metadata dictionary.
         Raises:
             Exception: If there's an error processing the input or generating the answer.
                     use_cache=True,
                 )
+            answer = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
+            output = {
+                "answer": answer,
+            }
             metadata = {
                 "question": question,
                 "image_path": image_path,
             }
             return output, metadata
         except Exception as e:
+            output = {"error": f"Error generating answer: {str(e)}"}
+            metadata = {
                 "question": question,
                 "image_path": image_path,
                 "analysis_status": "failed",
             }
+            return output, metadata
     async def _arun(
         self,
         question: str,
         image_path: Optional[str] = None,
         run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
+    ) -> Tuple[Dict[str, Any], Dict]:
         """Asynchronously answer a medical question, optionally based on an input image.
         This method currently calls the synchronous version, as the model inference
             run_manager (Optional[AsyncCallbackManagerForToolRun]): The async callback manager for the tool run.
         Returns:
+            Tuple[Dict[str, Any], Dict]: A tuple containing the output dictionary and metadata dictionary.
         Raises:
             Exception: If there's an error processing the input or generating the answer.
         """
+        return self._run(question, image_path)

medrax/tools/vqa/xray_vqa.py CHANGED Viewed

@@ -183,4 +183,4 @@ class CheXagentXRayVQATool(BaseTool):
         run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
     ) -> Tuple[Dict[str, Any], Dict]:
         """Async version of _run."""
-        return self._run(image_paths, prompt, max_new_tokens)

         run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
     ) -> Tuple[Dict[str, Any], Dict]:
         """Async version of _run."""
+        return self._run(image_paths, prompt, max_new_tokens)

medrax/tools/{generation.py → xray_generation.py} RENAMED Viewed

File without changes

medrax/tools/xray_vqa.py DELETED Viewed

@@ -1,186 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Type, Any
-from pathlib import Path
-from pydantic import BaseModel, Field
-import torch
-import transformers
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from langchain_core.callbacks import (
-    AsyncCallbackManagerForToolRun,
-    CallbackManagerForToolRun,
-)
-from langchain_core.tools import BaseTool
-class XRayVQAToolInput(BaseModel):
-    """Input schema for the CheXagent Tool."""
-    image_paths: List[str] = Field(
-        ..., description="List of paths to chest X-ray images to analyze"
-    )
-    prompt: str = Field(..., description="Question or instruction about the chest X-ray images")
-    max_new_tokens: int = Field(
-        512, description="Maximum number of tokens to generate in the response"
-    )
-class XRayVQATool(BaseTool):
-    """Tool that leverages CheXagent for comprehensive chest X-ray analysis."""
-    name: str = "chest_xray_expert"
-    description: str = (
-        "A versatile tool for analyzing chest X-rays. "
-        "Can perform multiple tasks including: visual question answering, report generation, "
-        "abnormality detection, comparative analysis, anatomical description, "
-        "and clinical interpretation. Input should be paths to X-ray images "
-        "and a natural language prompt describing the analysis needed."
-    )
-    args_schema: Type[BaseModel] = XRayVQAToolInput
-    return_direct: bool = True
-    cache_dir: Optional[str] = None
-    device: Optional[str] = None
-    dtype: torch.dtype = torch.bfloat16
-    tokenizer: Optional[AutoTokenizer] = None
-    model: Optional[AutoModelForCausalLM] = None
-    def __init__(
-        self,
-        model_name: str = "StanfordAIMI/CheXagent-2-3b",
-        device: Optional[str] = "cuda",
-        dtype: torch.dtype = torch.bfloat16,
-        cache_dir: Optional[str] = None,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize the XRayVQATool.
-        Args:
-            model_name: Name of the CheXagent model to use
-            device: Device to run model on (cuda/cpu)
-            dtype: Data type for model weights
-            cache_dir: Directory to cache downloaded models
-            **kwargs: Additional arguments
-        """
-        super().__init__(**kwargs)
-        # Dangerous code, but works for now
-        import transformers
-        original_transformers_version = transformers.__version__
-        transformers.__version__ = "4.40.0"
-        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
-        self.dtype = dtype
-        self.cache_dir = cache_dir
-        # Load tokenizer and model
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            model_name,
-            trust_remote_code=True,
-            cache_dir=cache_dir,
-        )
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            device_map=self.device,
-            trust_remote_code=True,
-            cache_dir=cache_dir,
-        )
-        self.model = self.model.to(dtype=self.dtype)
-        self.model.eval()
-        transformers.__version__ = original_transformers_version
-    def _generate_response(self, image_paths: List[str], prompt: str, max_new_tokens: int) -> str:
-        """Generate response using CheXagent model.
-        Args:
-            image_paths: List of paths to chest X-ray images
-            prompt: Question or instruction about the images
-            max_new_tokens: Maximum number of tokens to generate
-        Returns:
-            str: Model's response
-        """
-        query = self.tokenizer.from_list_format(
-            [*[{"image": path} for path in image_paths], {"text": prompt}]
-        )
-        conv = [
-            {"from": "system", "value": "You are a helpful assistant."},
-            {"from": "human", "value": query},
-        ]
-        input_ids = self.tokenizer.apply_chat_template(
-            conv, add_generation_prompt=True, return_tensors="pt"
-        ).to(device=self.device)
-        # Run inference
-        with torch.inference_mode():
-            output = self.model.generate(
-                input_ids,
-                do_sample=False,
-                num_beams=1,
-                temperature=1.0,
-                top_p=1.0,
-                use_cache=True,
-                max_new_tokens=max_new_tokens,
-            )[0]
-            response = self.tokenizer.decode(output[input_ids.size(1) : -1])
-            return response
-    def _run(
-        self,
-        image_paths: List[str],
-        prompt: str,
-        max_new_tokens: int = 512,
-        run_manager: Optional[CallbackManagerForToolRun] = None,
-    ) -> Tuple[Dict[str, Any], Dict]:
-        """Execute the chest X-ray analysis.
-        Args:
-            image_paths: List of paths to chest X-ray images
-            prompt: Question or instruction about the images
-            max_new_tokens: Maximum number of tokens to generate
-            run_manager: Optional callback manager
-        Returns:
-            Tuple[Dict[str, Any], Dict]: Output dictionary and metadata dictionary
-        """
-        try:
-            # Verify image paths
-            for path in image_paths:
-                if not Path(path).is_file():
-                    raise FileNotFoundError(f"Image file not found: {path}")
-            response = self._generate_response(image_paths, prompt, max_new_tokens)
-            output = {
-                "response": response,
-            }
-            metadata = {
-                "image_paths": image_paths,
-                "prompt": prompt,
-                "max_new_tokens": max_new_tokens,
-                "analysis_status": "completed",
-            }
-            return output, metadata
-        except Exception as e:
-            output = {"error": str(e)}
-            metadata = {
-                "image_paths": image_paths,
-                "prompt": prompt,
-                "max_new_tokens": max_new_tokens,
-                "analysis_status": "failed",
-                "error_details": str(e),
-            }
-            return output, metadata
-    async def _arun(
-        self,
-        image_paths: List[str],
-        prompt: str,
-        max_new_tokens: int = 512,
-        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
-    ) -> Tuple[Dict[str, Any], Dict]:
-        """Async version of _run."""
-        return self._run(image_paths, prompt, max_new_tokens)

pyproject.toml CHANGED Viewed

@@ -50,7 +50,7 @@ dependencies = [
     "fastapi>=0.68.0",
     "einops>=0.3.0",
     "einops-exts>=0.0.4",
-    "timm>=0.5.0",
     "tiktoken>=0.3.0",
     "openai>=0.27.0",
     "backoff>=1.10.0",
@@ -75,6 +75,7 @@ dependencies = [
     "seaborn>=0.12.0",
     "huggingface_hub>=0.17.0",
     "iopath>=0.1.10",
 ]
 [project.optional-dependencies]

     "fastapi>=0.68.0",
     "einops>=0.3.0",
     "einops-exts>=0.0.4",
+    "timm==0.5.4",
     "tiktoken>=0.3.0",
     "openai>=0.27.0",
     "backoff>=1.10.0",
     "seaborn>=0.12.0",
     "huggingface_hub>=0.17.0",
     "iopath>=0.1.10",
+    "duckduckgo-search>=4.0.0",
 ]
 [project.optional-dependencies]