Spaces:

samwell
/

medrax2

Sleeping

App Files Files Community

victorli commited on Aug 4, 2025

Commit

e97f266

1 Parent(s): 8d52c4a

cleared merge issues

Browse files

Files changed (4) hide show

benchmarking/benchmarks/rexvqa_benchmark.py +6 -6
benchmarking/llm_providers/medrax_provider.py +10 -10
main.py +2 -1
pyproject.toml +0 -3

benchmarking/benchmarks/rexvqa_benchmark.py CHANGED Viewed

@@ -34,20 +34,20 @@ class ReXVQABenchmark(Benchmark):
             data_dir (str): Directory to store/cache downloaded data
             **kwargs: Additional configuration parameters
                 split (str): Dataset split to use (default: 'test')
-                cache_dir (str): Directory for caching HuggingFace datasets
                 trust_remote_code (bool): Whether to trust remote code (default: False)
                 max_questions (int): Maximum number of questions to load (default: None, load all)
                 images_dir (str): Directory containing extracted PNG images (default: None)
         """
         self.split = kwargs.get("split", "test")
-        self.cache_dir = kwargs.get("cache_dir", None)
         self.trust_remote_code = kwargs.get("trust_remote_code", False)
         self.max_questions = kwargs.get("max_questions", None)
-        self.images_dir = "benchmarking/data/rexvqa/images/deid_png"
         self.image_dataset = None
         self.image_mapping = {}  # Maps study_id to image data
         super().__init__(data_dir, **kwargs)
     @staticmethod
     def download_rexgradient_images(output_dir: str = "benchmarking/data/rexvqa", repo_id: str = "rajpurkarlab/ReXGradient-160K"):
@@ -166,8 +166,8 @@ class ReXVQABenchmark(Benchmark):
         """Load ReXVQA data from local JSON file."""
         try:
             # Check for images and test_vqa_data.json, download if missing
-            self.download_test_vqa_data_json()
-            self.download_rexgradient_images()
             # Construct path to the JSON file
             json_file_path = os.path.join("benchmarking", "data", "rexvqa", "metadata", "test_vqa_data.json")
@@ -197,7 +197,7 @@ class ReXVQABenchmark(Benchmark):
                 self.image_dataset = load_dataset(
                     "rajpurkarlab/ReXGradient-160K",
                     split="test",
-                    cache_dir=self.cache_dir,
                     trust_remote_code=self.trust_remote_code
                 )
                 print(f"Loaded {len(self.image_dataset)} image metadata entries from ReXGradient-160K")

             data_dir (str): Directory to store/cache downloaded data
             **kwargs: Additional configuration parameters
                 split (str): Dataset split to use (default: 'test')
                 trust_remote_code (bool): Whether to trust remote code (default: False)
                 max_questions (int): Maximum number of questions to load (default: None, load all)
                 images_dir (str): Directory containing extracted PNG images (default: None)
         """
         self.split = kwargs.get("split", "test")
         self.trust_remote_code = kwargs.get("trust_remote_code", False)
         self.max_questions = kwargs.get("max_questions", None)
         self.image_dataset = None
         self.image_mapping = {}  # Maps study_id to image data
         super().__init__(data_dir, **kwargs)
+        # Set images_dir after parent initialization
+        self.images_dir = f"{self.data_dir}/images/deid_png"
     @staticmethod
     def download_rexgradient_images(output_dir: str = "benchmarking/data/rexvqa", repo_id: str = "rajpurkarlab/ReXGradient-160K"):
         """Load ReXVQA data from local JSON file."""
         try:
             # Check for images and test_vqa_data.json, download if missing
+            self.download_test_vqa_data_json(self.data_dir)
+            self.download_rexgradient_images(self.data_dir)
             # Construct path to the JSON file
             json_file_path = os.path.join("benchmarking", "data", "rexvqa", "metadata", "test_vqa_data.json")
                 self.image_dataset = load_dataset(
                     "rajpurkarlab/ReXGradient-160K",
                     split="test",
+                    cache_dir=self.data_dir,
                     trust_remote_code=self.trust_remote_code
                 )
                 print(f"Loaded {len(self.image_dataset)} image metadata entries from ReXGradient-160K")

benchmarking/llm_providers/medrax_provider.py CHANGED Viewed

@@ -33,15 +33,15 @@ class MedRAXProvider(LLMProvider):
             print("Starting server...")
             selected_tools = [
-                "ChestXRayReportGeneratorTool",  # For generating medical reports from X-rays
-                "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
-                "WebBrowserTool",  # For web browsing and search capabilities
-                "TorchXRayVisionClassifierTool",  # For classifying chest X-ray images using TorchXRayVision
-                "ArcPlusClassifierTool",  # For advanced chest X-ray classification using ArcPlus
-                "DuckDuckGoSearchTool",  # For privacy-focused web search using DuckDuckGo
-                "XRayVQATool",  # For visual question answering on X-rays
-                "XRayPhraseGroundingTool",  # For locating described features in X-rays
                 "MedGemmaVQATool"
             ]
             rag_config = RAGConfig(
@@ -64,11 +64,11 @@ class MedRAXProvider(LLMProvider):
             agent, tools_dict = initialize_agent(
                 prompt_file="medrax/docs/system_prompts.txt",
                 tools_to_use=selected_tools,
-                model_dir="/model-weights",
                 temp_dir="temp",  # Change this to the path of the temporary directory
                 device="cuda:0",
                 model=self.model_name,  # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
-                temperature=0.3,
                 top_p=0.95,
                 model_kwargs=model_kwargs,
                 rag_config=rag_config,

             print("Starting server...")
             selected_tools = [
+                # "TorchXRayVisionClassifierTool",  # For classifying chest X-ray images using TorchXRayVision
+                # "ArcPlusClassifierTool",  # For advanced chest X-ray classification using ArcPlus
+                # "ChestXRayReportGeneratorTool",  # For generating medical reports from X-rays
+                # "XRayVQATool",  # For visual question answering on X-rays
                 "MedGemmaVQATool"
+                # "XRayPhraseGroundingTool",  # For locating described features in X-rays
+                # "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
+                # "WebBrowserTool",  # For web browsing and search capabilities
+                # "DuckDuckGoSearchTool",  # For privacy-focused web search using DuckDuckGo
             ]
             rag_config = RAGConfig(
             agent, tools_dict = initialize_agent(
                 prompt_file="medrax/docs/system_prompts.txt",
                 tools_to_use=selected_tools,
+                model_dir="/scratch/ssd004/scratch/victorli/model-weights",
                 temp_dir="temp",  # Change this to the path of the temporary directory
                 device="cuda:0",
                 model=self.model_name,  # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
+                temperature=1.0,
                 top_p=0.95,
                 model_kwargs=model_kwargs,
                 rag_config=rag_config,

main.py CHANGED Viewed

@@ -33,7 +33,7 @@ _ = load_dotenv()
 def initialize_agent(
     prompt_file: str,
     tools_to_use: Optional[List[str]] = None,
-    model_dir: str = "/model-weights",
     temp_dir: str = "temp",
     device: str = "cpu",
     model: str = "gpt-4.1-2025-04-14",
@@ -88,6 +88,7 @@ def initialize_agent(
         "DicomProcessorTool": lambda: DicomProcessorTool(temp_dir=temp_dir),
         "MedicalRAGTool": lambda: RAGTool(config=rag_config),
         "WebBrowserTool": lambda: WebBrowserTool(),
         "MedSAM2Tool": lambda: MedSAM2Tool(
             device=device, cache_dir=model_dir, temp_dir=temp_dir
         ),

 def initialize_agent(
     prompt_file: str,
     tools_to_use: Optional[List[str]] = None,
+    model_dir: str = "/scratch/ssd004/scratch/victorli/model-weights",
     temp_dir: str = "temp",
     device: str = "cpu",
     model: str = "gpt-4.1-2025-04-14",
         "DicomProcessorTool": lambda: DicomProcessorTool(temp_dir=temp_dir),
         "MedicalRAGTool": lambda: RAGTool(config=rag_config),
         "WebBrowserTool": lambda: WebBrowserTool(),
+        "DuckDuckGoSearchTool": lambda: DuckDuckGoSearchTool(),
         "MedSAM2Tool": lambda: MedSAM2Tool(
             device=device, cache_dir=model_dir, temp_dir=temp_dir
         ),

pyproject.toml CHANGED Viewed

@@ -57,7 +57,6 @@ dependencies = [
     "torch>=2.2.0",
     "torchvision>=0.10.0",
     "scikit-image>=0.18.0",
-    "gradio>=5.0.0",
     "opencv-python>=4.8.0",
     "matplotlib>=3.8.0",
     "diffusers>=0.20.0",
@@ -65,13 +64,11 @@ dependencies = [
     "pylibjpeg>=1.0.0",
     "jupyter>=1.0.0",
     "albumentations>=1.0.0",
-    "pyarrow>=10.0.0",
     "chromadb>=0.0.10",
     "pinecone-client>=3.2.2",
     "langchain-pinecone>=0.0.1",
     "langchain-google-genai>=0.1.0",
     "ray>=2.9.0",
-    "langchain-sandbox>=0.0.6",
     "seaborn>=0.12.0",
     "huggingface_hub>=0.17.0",
     "iopath>=0.1.10",

     "torch>=2.2.0",
     "torchvision>=0.10.0",
     "scikit-image>=0.18.0",
     "opencv-python>=4.8.0",
     "matplotlib>=3.8.0",
     "diffusers>=0.20.0",
     "pylibjpeg>=1.0.0",
     "jupyter>=1.0.0",
     "albumentations>=1.0.0",
     "chromadb>=0.0.10",
     "pinecone-client>=3.2.2",
     "langchain-pinecone>=0.0.1",
     "langchain-google-genai>=0.1.0",
     "ray>=2.9.0",
     "seaborn>=0.12.0",
     "huggingface_hub>=0.17.0",
     "iopath>=0.1.10",