Spaces:

samwell
/

medrax2

Sleeping

App Files Files Community

Adibvafa commited on Aug 12

Commit

7393de8

1 Parent(s): 9a2c640

Improve style

Browse files

Files changed (38) hide show

medrax/agent/agent.py +1 -4
medrax/llava/conversation.py +1 -3
medrax/llava/eval/eval_multimodal_chat_gpt_score.py +3 -6
medrax/llava/eval/llm.py +8 -23
medrax/llava/eval/model_vqa.py +2 -8
medrax/llava/eval/summarize_gpt_review.py +3 -7
medrax/llava/mm_utils.py +4 -14
medrax/llava/model/builder.py +4 -12
medrax/llava/model/language_model/llava_mistral.py +1 -3
medrax/llava/model/llava_arch.py +13 -39
medrax/llava/model/multimodal_encoder/builder.py +2 -8
medrax/llava/model/multimodal_projector/builder.py +1 -3
medrax/llava/serve/cli.py +1 -3
medrax/llava/serve/controller.py +3 -6
medrax/llava/serve/gradio_web_server.py +4 -12
medrax/llava/serve/model_worker.py +6 -14
medrax/llava/serve/test_message.py +2 -6
medrax/llava/utils.py +1 -3
medrax/models/model_factory.py +6 -15
medrax/rag/rag.py +3 -9
medrax/tools/browsing/__init__.py +3 -3
medrax/tools/browsing/duckduckgo.py +12 -33
medrax/tools/browsing/web_browser.py +3 -9
medrax/tools/classification/__init__.py +1 -6
medrax/tools/classification/arcplus.py +5 -17
medrax/tools/classification/torchxrayvision.py +1 -3
medrax/tools/dicom.py +1 -3
medrax/tools/grounding.py +4 -13
medrax/tools/rag.py +1 -1
medrax/tools/report_generation.py +4 -14
medrax/tools/segmentation/__init__.py +1 -7
medrax/tools/segmentation/medsam2.py +70 -78
medrax/tools/segmentation/segmentation.py +10 -30
medrax/tools/utils.py +5 -15
medrax/tools/vqa/__init__.py +4 -4
medrax/tools/vqa/llava_med.py +4 -12
medrax/tools/vqa/xray_vqa.py +6 -12
medrax/tools/xray_generation.py +12 -23

medrax/agent/agent.py CHANGED Viewed

@@ -62,9 +62,7 @@ class Agent:
         workflow = StateGraph(AgentState)
         workflow.add_node("agent", self.process_request)
         workflow.add_node("tools", self.tool_node)
-        workflow.add_conditional_edges(
-            "agent", self.has_tool_calls, {True: "tools", False: END}
-        )
         workflow.add_edge("tools", "agent")
         workflow.set_entry_point("agent")
@@ -99,4 +97,3 @@ class Agent:
         """
         response = state["messages"][-1]
         return len(response.tool_calls) > 0

         workflow = StateGraph(AgentState)
         workflow.add_node("agent", self.process_request)
         workflow.add_node("tools", self.tool_node)
+        workflow.add_conditional_edges("agent", self.has_tool_calls, {True: "tools", False: END})
         workflow.add_edge("tools", "agent")
         workflow.set_entry_point("agent")
         """
         response = state["messages"][-1]
         return len(response.tool_calls) > 0

medrax/llava/conversation.py CHANGED Viewed

@@ -230,9 +230,7 @@ class Conversation:
                     buffered = BytesIO()
                     image.save(buffered, format="JPEG")
                     img_b64_str = base64.b64encode(buffered.getvalue()).decode()
-                    img_str = (
-                        f'<img src="data:image/png;base64,{img_b64_str}" alt="user upload image" />'
-                    )
                     msg = img_str + msg.replace("<image>", "").strip()
                     ret.append([msg, None])
                 else:

                     buffered = BytesIO()
                     image.save(buffered, format="JPEG")
                     img_b64_str = base64.b64encode(buffered.getvalue()).decode()
+                    img_str = f'<img src="data:image/png;base64,{img_b64_str}" alt="user upload image" />'
                     msg = img_str + msg.replace("<image>", "").strip()
                     ret.append([msg, None])
                 else:

medrax/llava/eval/eval_multimodal_chat_gpt_score.py CHANGED Viewed

@@ -14,6 +14,7 @@ INSTRUCT_PROMPT = """We would like to request your feedback on the performance o
   Please first output a single line containing only two values indicating the scores for Assistant 1 and 2, respectively. The two scores are separated by a space. In the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."""
 ROLE = "Assistant"
 # Generate instruction for GPT-4 to score the two answers.
 def conv_to_str(fig_label, fig_caption, fig_context, question, ans1, ans2):
     return (
@@ -127,17 +128,13 @@ def main(args):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("GPT-4 Multimodal Chat Scoring", add_help=True)
-    parser.add_argument(
-        "--answers-file", default="", metavar="FILE", help="path to model answer file"
-    )
     parser.add_argument(
         "--question-file",
         default="data/questions/llava_med_eval_qa50_qa.jsonl",
         metavar="FILE",
         help="path to multichat questions file",
     )
-    parser.add_argument(
-        "--scores-file", default="", metavar="FILE", help="path to save gpt-4 score file"
-    )
     args = parser.parse_args()
     main(args)

   Please first output a single line containing only two values indicating the scores for Assistant 1 and 2, respectively. The two scores are separated by a space. In the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."""
 ROLE = "Assistant"
 # Generate instruction for GPT-4 to score the two answers.
 def conv_to_str(fig_label, fig_caption, fig_context, question, ans1, ans2):
     return (
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("GPT-4 Multimodal Chat Scoring", add_help=True)
+    parser.add_argument("--answers-file", default="", metavar="FILE", help="path to model answer file")
     parser.add_argument(
         "--question-file",
         default="data/questions/llava_med_eval_qa50_qa.jsonl",
         metavar="FILE",
         help="path to multichat questions file",
     )
+    parser.add_argument("--scores-file", default="", metavar="FILE", help="path to save gpt-4 score file")
     args = parser.parse_args()
     main(args)

medrax/llava/eval/llm.py CHANGED Viewed

@@ -21,9 +21,7 @@ class LLM(abc.ABC):
         raise NotImplementedError("Subclasses should implement this!")
     @abstractmethod
-    def split_input(
-        self, fixed_instruction, few_shot_examples, splittable_input, input_header, output_header
-    ):
         raise NotImplementedError("Subclasses should implement this!")
@@ -49,9 +47,7 @@ class GPT(LLM):
     def __init__(self, model_id):
         self.temperature = 0.0
         self.top_k = 1
-        self.encoding = tiktoken.encoding_for_model(
-            "-".join(model_id.split("-", 2)[:2]).replace("5", ".5")
-        )
         self.openai_api = "default"
         self.model_id = model_id
         self.max_length = self.deployment_max_length_dict[model_id]
@@ -61,9 +57,7 @@ class GPT(LLM):
             azure_endpoint=self.openai_cxn_dict[self.openai_api]["endpoint"],
         )
-    def gen_messages(
-        self, fixed_instruction, few_shot_examples, input, input_header, output_header
-    ):
         messages = [
             {
                 "role": "system",
@@ -120,18 +114,13 @@ class GPT(LLM):
     ):
         return asyncio.run(self.dispatch_openai_requests(messages_list))
-    def split_input(
-        self, fixed_instruction, few_shot_examples, splittable_input, input_header, output_header
-    ):
         # Tokenize fixed_prompt
         fixed_token_ids = self.encoding.encode(
-            fixed_instruction
-            + " ".join([x["user"] + " " + x["assistant"] for x in few_shot_examples])
         )
         # Calculate remaining token length
-        remaining_token_len = math.ceil(
-            (self.prompt_percent * self.max_length) - len(fixed_token_ids)
-        )
         # Tokenize splittable_input
         split_token_ids = self.encoding.encode(splittable_input)
@@ -141,14 +130,10 @@ class GPT(LLM):
             split_token_ids[i : i + remaining_token_len + 10]
             for i in range(0, len(split_token_ids), remaining_token_len)
         ]
-        split_input_list = [
-            self.encoding.decode(split_token_ids) for split_token_ids in split_token_ids_list
-        ]
         # Take the fixed_prompt, few_shot_examples, splitted inputs, and input/output headers and generate list of prompt strings.
         return [
-            self.gen_messages(
-                fixed_instruction, few_shot_examples, split_input, input_header, output_header
-            )
             for split_input in split_input_list
         ]

         raise NotImplementedError("Subclasses should implement this!")
     @abstractmethod
+    def split_input(self, fixed_instruction, few_shot_examples, splittable_input, input_header, output_header):
         raise NotImplementedError("Subclasses should implement this!")
     def __init__(self, model_id):
         self.temperature = 0.0
         self.top_k = 1
+        self.encoding = tiktoken.encoding_for_model("-".join(model_id.split("-", 2)[:2]).replace("5", ".5"))
         self.openai_api = "default"
         self.model_id = model_id
         self.max_length = self.deployment_max_length_dict[model_id]
             azure_endpoint=self.openai_cxn_dict[self.openai_api]["endpoint"],
         )
+    def gen_messages(self, fixed_instruction, few_shot_examples, input, input_header, output_header):
         messages = [
             {
                 "role": "system",
     ):
         return asyncio.run(self.dispatch_openai_requests(messages_list))
+    def split_input(self, fixed_instruction, few_shot_examples, splittable_input, input_header, output_header):
         # Tokenize fixed_prompt
         fixed_token_ids = self.encoding.encode(
+            fixed_instruction + " ".join([x["user"] + " " + x["assistant"] for x in few_shot_examples])
         )
         # Calculate remaining token length
+        remaining_token_len = math.ceil((self.prompt_percent * self.max_length) - len(fixed_token_ids))
         # Tokenize splittable_input
         split_token_ids = self.encoding.encode(splittable_input)
             split_token_ids[i : i + remaining_token_len + 10]
             for i in range(0, len(split_token_ids), remaining_token_len)
         ]
+        split_input_list = [self.encoding.decode(split_token_ids) for split_token_ids in split_token_ids_list]
         # Take the fixed_prompt, few_shot_examples, splitted inputs, and input/output headers and generate list of prompt strings.
         return [
+            self.gen_messages(fixed_instruction, few_shot_examples, split_input, input_header, output_header)
             for split_input in split_input_list
         ]

medrax/llava/eval/model_vqa.py CHANGED Viewed

@@ -45,9 +45,7 @@ def eval_model(args):
     disable_torch_init()
     model_path = os.path.expanduser(args.model_path)
     model_name = get_model_name_from_path(model_path)
-    tokenizer, model, image_processor, context_len = load_pretrained_model(
-        model_path, args.model_base, model_name
-    )
     questions = [json.loads(q) for q in open(os.path.expanduser(args.question_file), "r")]
     questions = get_chunk(questions, args.num_chunks, args.chunk_idx)
@@ -69,11 +67,7 @@ def eval_model(args):
         conv.append_message(conv.roles[1], None)
         prompt = conv.get_prompt()
-        input_ids = (
-            tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
-            .unsqueeze(0)
-            .cuda()
-        )
         image = Image.open(os.path.join(args.image_folder, image_file))
         image_tensor = process_images([image], image_processor, model.config)[0]

     disable_torch_init()
     model_path = os.path.expanduser(args.model_path)
     model_name = get_model_name_from_path(model_path)
+    tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, args.model_base, model_name)
     questions = [json.loads(q) for q in open(os.path.expanduser(args.question_file), "r")]
     questions = get_chunk(questions, args.num_chunks, args.chunk_idx)
         conv.append_message(conv.roles[1], None)
         prompt = conv.get_prompt()
+        input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).cuda()
         image = Image.open(os.path.join(args.image_folder, image_file))
         image_tensor = process_images([image], image_processor, model.config)[0]

medrax/llava/eval/summarize_gpt_review.py CHANGED Viewed

@@ -14,8 +14,7 @@ def get_domain(x):
 def main(args):
     scores_data = util.load_file_jsonl(args.scores_file)
     predictions = [
-        (x["question_id"], x["type"], get_domain(x), x["gpt_eval"].split("\n")[0].split(" "))
-        for x in scores_data
     ]
     score_type_dict = defaultdict(lambda: defaultdict(list))
@@ -33,8 +32,7 @@ def main(args):
         result[q_type]["gpt4_score"] = util.get_avg(score_dict[1])
         result[q_type]["pred_score"] = util.get_avg(score_dict[2])
         result[q_type]["pred_relative_score"] = (
-            util.get_avg([float(s2) / float(s1) for s1, s2 in zip(score_dict[1], score_dict[2])])
-            * 100
         )
         result[q_type]["data_size"] = len(score_dict[1])
@@ -55,8 +53,6 @@ def main(args):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("GPT-4 Multimodal Chat Eval Postprocessing", add_help=True)
-    parser.add_argument(
-        "--scores-file", default="", metavar="FILE", help="input path to gpt-4 score file"
-    )
     args = parser.parse_args()
     main(args)

 def main(args):
     scores_data = util.load_file_jsonl(args.scores_file)
     predictions = [
+        (x["question_id"], x["type"], get_domain(x), x["gpt_eval"].split("\n")[0].split(" ")) for x in scores_data
     ]
     score_type_dict = defaultdict(lambda: defaultdict(list))
         result[q_type]["gpt4_score"] = util.get_avg(score_dict[1])
         result[q_type]["pred_score"] = util.get_avg(score_dict[2])
         result[q_type]["pred_relative_score"] = (
+            util.get_avg([float(s2) / float(s1) for s1, s2 in zip(score_dict[1], score_dict[2])]) * 100
         )
         result[q_type]["data_size"] = len(score_dict[1])
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("GPT-4 Multimodal Chat Eval Postprocessing", add_help=True)
+    parser.add_argument("--scores-file", default="", metavar="FILE", help="input path to gpt-4 score file")
     args = parser.parse_args()
     main(args)

medrax/llava/mm_utils.py CHANGED Viewed

@@ -35,9 +35,7 @@ def process_images(images, image_processor, model_cfg):
     for image in images:
         if image_aspect_ratio == "pad":
             if image.mode == "L":
-                background_color = int(
-                    255 * sum(image_processor.image_mean) / len(image_processor.image_mean)
-                )
             else:
                 background_color = tuple(int(x * 255) for x in image_processor.image_mean)
             image = expand2square(image, background_color)
@@ -48,9 +46,7 @@ def process_images(images, image_processor, model_cfg):
     return new_images
-def tokenizer_image_token(
-    prompt, tokenizer, image_token_index=IMAGE_TOKEN_INDEX, return_tensors=None
-):
     prompt_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split("<image>")]
     def insert_separator(X, sep):
@@ -58,11 +54,7 @@ def tokenizer_image_token(
     input_ids = []
     offset = 0
-    if (
-        len(prompt_chunks) > 0
-        and len(prompt_chunks[0]) > 0
-        and prompt_chunks[0][0] == tokenizer.bos_token_id
-    ):
         offset = 1
         input_ids.append(prompt_chunks[0][0])
@@ -100,9 +92,7 @@ class KeywordsStoppingCriteria(StoppingCriteria):
         self.tokenizer = tokenizer
         self.start_len = input_ids.shape[1]
-    def call_for_batch(
-        self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
-    ) -> bool:
         offset = min(output_ids.shape[1] - self.start_len, self.max_keyword_len)
         self.keyword_ids = [keyword_id.to(output_ids.device) for keyword_id in self.keyword_ids]
         for keyword_id in self.keyword_ids:

     for image in images:
         if image_aspect_ratio == "pad":
             if image.mode == "L":
+                background_color = int(255 * sum(image_processor.image_mean) / len(image_processor.image_mean))
             else:
                 background_color = tuple(int(x * 255) for x in image_processor.image_mean)
             image = expand2square(image, background_color)
     return new_images
+def tokenizer_image_token(prompt, tokenizer, image_token_index=IMAGE_TOKEN_INDEX, return_tensors=None):
     prompt_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split("<image>")]
     def insert_separator(X, sep):
     input_ids = []
     offset = 0
+    if len(prompt_chunks) > 0 and len(prompt_chunks[0]) > 0 and prompt_chunks[0][0] == tokenizer.bos_token_id:
         offset = 1
         input_ids.append(prompt_chunks[0][0])
         self.tokenizer = tokenizer
         self.start_len = input_ids.shape[1]
+    def call_for_batch(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
         offset = min(output_ids.shape[1] - self.start_len, self.max_keyword_len)
         self.keyword_ids = [keyword_id.to(output_ids.device) for keyword_id in self.keyword_ids]
         for keyword_id in self.keyword_ids:

medrax/llava/model/builder.py CHANGED Viewed

@@ -59,9 +59,7 @@ def load_pretrained_model(
             # PEFT model
             from peft import PeftModel
-            tokenizer = AutoTokenizer.from_pretrained(
-                model_base, use_fast=False, cache_dir=cache_dir
-            )
             model = AutoModelForCausalLM.from_pretrained(
                 model_base,
                 low_cpu_mem_usage=True,
@@ -78,9 +76,7 @@ def load_pretrained_model(
         else:
             use_fast = False
             if "mpt" in model_name.lower():
-                tokenizer = AutoTokenizer.from_pretrained(
-                    model_path, use_fast=True, cache_dir=cache_dir
-                )
                 model = AutoModelForCausalLM.from_pretrained(
                     model_path,
                     low_cpu_mem_usage=True,
@@ -90,9 +86,7 @@ def load_pretrained_model(
                     **kwargs,
                 )
             else:
-                tokenizer = AutoTokenizer.from_pretrained(
-                    model_path, use_fast=False, cache_dir=cache_dir
-                )
                 model = AutoModelForCausalLM.from_pretrained(
                     model_path,
                     low_cpu_mem_usage=True,
@@ -109,9 +103,7 @@ def load_pretrained_model(
         if mm_use_im_patch_token:
             tokenizer.add_tokens([DEFAULT_IMAGE_PATCH_TOKEN], special_tokens=True)
         if mm_use_im_start_end:
-            tokenizer.add_tokens(
-                [DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN], special_tokens=True
-            )
         model.resize_token_embeddings(len(tokenizer))
         vision_tower = model.get_vision_tower()

             # PEFT model
             from peft import PeftModel
+            tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False, cache_dir=cache_dir)
             model = AutoModelForCausalLM.from_pretrained(
                 model_base,
                 low_cpu_mem_usage=True,
         else:
             use_fast = False
             if "mpt" in model_name.lower():
+                tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, cache_dir=cache_dir)
                 model = AutoModelForCausalLM.from_pretrained(
                     model_path,
                     low_cpu_mem_usage=True,
                     **kwargs,
                 )
             else:
+                tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, cache_dir=cache_dir)
                 model = AutoModelForCausalLM.from_pretrained(
                     model_path,
                     low_cpu_mem_usage=True,
         if mm_use_im_patch_token:
             tokenizer.add_tokens([DEFAULT_IMAGE_PATCH_TOKEN], special_tokens=True)
         if mm_use_im_start_end:
+            tokenizer.add_tokens([DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN], special_tokens=True)
         model.resize_token_embeddings(len(tokenizer))
         vision_tower = model.get_vision_tower()

medrax/llava/model/language_model/llava_mistral.py CHANGED Viewed

@@ -125,9 +125,7 @@ class LlavaMistralForCausalLM(MistralForCausalLM, LlavaMetaForCausalLM):
             **kwargs,
         )
-    def prepare_inputs_for_generation(
-        self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs
-    ):
         images = kwargs.pop("images", None)
         image_sizes = kwargs.pop("image_sizes", None)
         inputs = super().prepare_inputs_for_generation(

             **kwargs,
         )
+    def prepare_inputs_for_generation(self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs):
         images = kwargs.pop("images", None)
         image_sizes = kwargs.pop("image_sizes", None)
         inputs = super().prepare_inputs_for_generation(

medrax/llava/model/llava_arch.py CHANGED Viewed

@@ -104,9 +104,7 @@ class LlavaMetaModel:
             checkpoint_folder = os.path.dirname(pretrain_mm_mlp_adapter)
             ckpts = glob(f"{checkpoint_folder}/checkpoint-*", recursive=False)
             if len(ckpts) > 0:
-                vision_module_weights = torch.load(
-                    f"{ckpts[-1]}/mm_projector.bin", map_location="cpu"
-                )
                 model_dict = get_w(vision_module_weights, "vision_tower")
                 print(f"Loading vision module weights from {ckpts[-1]}/mm_projector.bin")
                 # print keys in model_dict
@@ -170,9 +168,7 @@ class LlavaMetaForCausalLM(ABC):
             image_features = self.encode_images(images).to(self.device)
         # TODO: image start / end is not implemented here to support pretraining.
-        if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr(
-            self.config, "mm_use_im_start_end", False
-        ):
             raise NotImplementedError
         # Let's just add dummy tensors if they do not exist,
@@ -188,21 +184,15 @@ class LlavaMetaForCausalLM(ABC):
         else:
             attention_mask = attention_mask.bool()
         if position_ids is None:
-            position_ids = torch.arange(
-                0, input_ids.shape[1], dtype=torch.long, device=input_ids.device
-            )
         if labels is None:
             labels = torch.full_like(input_ids, IGNORE_INDEX)
         input_ids = [
-            cur_input_ids[cur_attention_mask]
-            for cur_input_ids, cur_attention_mask in zip(input_ids, attention_mask)
-        ]
-        labels = [
-            cur_labels[cur_attention_mask]
-            for cur_labels, cur_attention_mask in zip(labels, attention_mask)
         ]
         new_input_embeds = []
         new_labels = []
@@ -219,20 +209,14 @@ class LlavaMetaForCausalLM(ABC):
                 continue
             image_token_indices = (
-                [-1]
-                + torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0].tolist()
-                + [cur_input_ids.shape[0]]
             )
             cur_input_ids_noim = []
             cur_labels = labels[batch_idx]
             cur_labels_noim = []
             for i in range(len(image_token_indices) - 1):
-                cur_input_ids_noim.append(
-                    cur_input_ids[image_token_indices[i] + 1 : image_token_indices[i + 1]]
-                )
-                cur_labels_noim.append(
-                    cur_labels[image_token_indices[i] + 1 : image_token_indices[i + 1]]
-                )
             split_sizes = [x.shape[0] for x in cur_labels_noim]
             cur_input_embeds = self.get_model().embed_tokens(torch.cat(cur_input_ids_noim))
@@ -279,12 +263,8 @@ class LlavaMetaForCausalLM(ABC):
             dtype=new_labels[0].dtype,
             device=new_labels[0].device,
         )
-        attention_mask = torch.zeros(
-            (batch_size, max_len), dtype=attention_mask.dtype, device=attention_mask.device
-        )
-        position_ids = torch.zeros(
-            (batch_size, max_len), dtype=position_ids.dtype, device=position_ids.device
-        )
         for i, (cur_new_embed, cur_new_labels) in enumerate(zip(new_input_embeds, new_labels)):
             cur_len = cur_new_embed.shape[0]
@@ -351,9 +331,7 @@ class LlavaMetaForCausalLM(ABC):
             self.resize_token_embeddings(len(tokenizer))
         if model_args.mm_use_im_start_end:
-            num_new_tokens = tokenizer.add_tokens(
-                [DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN], special_tokens=True
-            )
             self.resize_token_embeddings(len(tokenizer))
             if num_new_tokens > 0:
@@ -361,9 +339,7 @@ class LlavaMetaForCausalLM(ABC):
                 output_embeddings = self.get_output_embeddings().weight.data
                 input_embeddings_avg = input_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True)
-                output_embeddings_avg = output_embeddings[:-num_new_tokens].mean(
-                    dim=0, keepdim=True
-                )
                 input_embeddings[-num_new_tokens:] = input_embeddings_avg
                 output_embeddings[-num_new_tokens:] = output_embeddings_avg
@@ -375,9 +351,7 @@ class LlavaMetaForCausalLM(ABC):
                     p.requires_grad = False
             if model_args.pretrain_mm_mlp_adapter:
-                mm_projector_weights = torch.load(
-                    model_args.pretrain_mm_mlp_adapter, map_location="cpu"
-                )
                 embed_tokens_weight = mm_projector_weights["model.embed_tokens.weight"]
                 assert num_new_tokens == 2
                 if input_embeddings.shape == embed_tokens_weight.shape:

             checkpoint_folder = os.path.dirname(pretrain_mm_mlp_adapter)
             ckpts = glob(f"{checkpoint_folder}/checkpoint-*", recursive=False)
             if len(ckpts) > 0:
+                vision_module_weights = torch.load(f"{ckpts[-1]}/mm_projector.bin", map_location="cpu")
                 model_dict = get_w(vision_module_weights, "vision_tower")
                 print(f"Loading vision module weights from {ckpts[-1]}/mm_projector.bin")
                 # print keys in model_dict
             image_features = self.encode_images(images).to(self.device)
         # TODO: image start / end is not implemented here to support pretraining.
+        if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr(self.config, "mm_use_im_start_end", False):
             raise NotImplementedError
         # Let's just add dummy tensors if they do not exist,
         else:
             attention_mask = attention_mask.bool()
         if position_ids is None:
+            position_ids = torch.arange(0, input_ids.shape[1], dtype=torch.long, device=input_ids.device)
         if labels is None:
             labels = torch.full_like(input_ids, IGNORE_INDEX)
         input_ids = [
+            cur_input_ids[cur_attention_mask] for cur_input_ids, cur_attention_mask in zip(input_ids, attention_mask)
         ]
+        labels = [cur_labels[cur_attention_mask] for cur_labels, cur_attention_mask in zip(labels, attention_mask)]
         new_input_embeds = []
         new_labels = []
                 continue
             image_token_indices = (
+                [-1] + torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0].tolist() + [cur_input_ids.shape[0]]
             )
             cur_input_ids_noim = []
             cur_labels = labels[batch_idx]
             cur_labels_noim = []
             for i in range(len(image_token_indices) - 1):
+                cur_input_ids_noim.append(cur_input_ids[image_token_indices[i] + 1 : image_token_indices[i + 1]])
+                cur_labels_noim.append(cur_labels[image_token_indices[i] + 1 : image_token_indices[i + 1]])
             split_sizes = [x.shape[0] for x in cur_labels_noim]
             cur_input_embeds = self.get_model().embed_tokens(torch.cat(cur_input_ids_noim))
             dtype=new_labels[0].dtype,
             device=new_labels[0].device,
         )
+        attention_mask = torch.zeros((batch_size, max_len), dtype=attention_mask.dtype, device=attention_mask.device)
+        position_ids = torch.zeros((batch_size, max_len), dtype=position_ids.dtype, device=position_ids.device)
         for i, (cur_new_embed, cur_new_labels) in enumerate(zip(new_input_embeds, new_labels)):
             cur_len = cur_new_embed.shape[0]
             self.resize_token_embeddings(len(tokenizer))
         if model_args.mm_use_im_start_end:
+            num_new_tokens = tokenizer.add_tokens([DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN], special_tokens=True)
             self.resize_token_embeddings(len(tokenizer))
             if num_new_tokens > 0:
                 output_embeddings = self.get_output_embeddings().weight.data
                 input_embeddings_avg = input_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True)
+                output_embeddings_avg = output_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True)
                 input_embeddings[-num_new_tokens:] = input_embeddings_avg
                 output_embeddings[-num_new_tokens:] = output_embeddings_avg
                     p.requires_grad = False
             if model_args.pretrain_mm_mlp_adapter:
+                mm_projector_weights = torch.load(model_args.pretrain_mm_mlp_adapter, map_location="cpu")
                 embed_tokens_weight = mm_projector_weights["model.embed_tokens.weight"]
                 assert num_new_tokens == 2
                 if input_embeddings.shape == embed_tokens_weight.shape:

medrax/llava/model/multimodal_encoder/builder.py CHANGED Viewed

@@ -3,13 +3,7 @@ from .clip_encoder import CLIPVisionTower
 def build_vision_tower(vision_tower_cfg, **kwargs):
-    vision_tower = getattr(
-        vision_tower_cfg, "mm_vision_tower", getattr(vision_tower_cfg, "vision_tower", None)
-    )
     is_absolute_path_exists = os.path.exists(vision_tower)
-    if (
-        is_absolute_path_exists
-        or vision_tower.startswith("openai")
-        or vision_tower.startswith("laion")
-    ):
         return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs)

 def build_vision_tower(vision_tower_cfg, **kwargs):
+    vision_tower = getattr(vision_tower_cfg, "mm_vision_tower", getattr(vision_tower_cfg, "vision_tower", None))
     is_absolute_path_exists = os.path.exists(vision_tower)
+    if is_absolute_path_exists or vision_tower.startswith("openai") or vision_tower.startswith("laion"):
         return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs)

medrax/llava/model/multimodal_projector/builder.py CHANGED Viewed

@@ -19,9 +19,7 @@ class SimpleResBlock(nn.Module):
         super().__init__()
         self.pre_norm = nn.LayerNorm(channels)
-        self.proj = nn.Sequential(
-            nn.Linear(channels, channels), nn.GELU(), nn.Linear(channels, channels)
-        )
     def forward(self, x):
         x = self.pre_norm(x)

         super().__init__()
         self.pre_norm = nn.LayerNorm(channels)
+        self.proj = nn.Sequential(nn.Linear(channels, channels), nn.GELU(), nn.Linear(channels, channels))
     def forward(self, x):
         x = self.pre_norm(x)

medrax/llava/serve/cli.py CHANGED Viewed

@@ -94,9 +94,7 @@ def main(args):
         if image is not None:
             # first message
             if model.config.mm_use_im_start_end:
-                inp = (
-                    DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + "\n" + inp
-                )
             else:
                 inp = DEFAULT_IMAGE_TOKEN + "\n" + inp
             conv.append_message(conv.roles[0], inp)

         if image is not None:
             # first message
             if model.config.mm_use_im_start_end:
+                inp = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + "\n" + inp
             else:
                 inp = DEFAULT_IMAGE_TOKEN + "\n" + inp
             conv.append_message(conv.roles[0], inp)

medrax/llava/serve/controller.py CHANGED Viewed

@@ -2,6 +2,7 @@
 A controller manages distributed workers.
 It sends worker addresses to clients.
 """
 import argparse
 import dataclasses
 from enum import Enum, auto
@@ -199,9 +200,7 @@ class Controller:
             yield json.dumps(ret).encode() + b"\0"
         try:
-            response = requests.post(
-                worker_addr + "/worker_generate_stream", json=params, stream=True, timeout=5
-            )
             for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
                 if chunk:
                     yield chunk + b"\0"
@@ -240,9 +239,7 @@ app = FastAPI()
 @app.post("/register_worker")
 async def register_worker(request: Request):
     data = await request.json()
-    controller.register_worker(
-        data["worker_name"], data["check_heart_beat"], data.get("worker_status", None)
-    )
 @app.post("/refresh_all_workers")

 A controller manages distributed workers.
 It sends worker addresses to clients.
 """
 import argparse
 import dataclasses
 from enum import Enum, auto
             yield json.dumps(ret).encode() + b"\0"
         try:
+            response = requests.post(worker_addr + "/worker_generate_stream", json=params, stream=True, timeout=5)
             for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
                 if chunk:
                     yield chunk + b"\0"
 @app.post("/register_worker")
 async def register_worker(request: Request):
     data = await request.json()
+    controller.register_worker(data["worker_name"], data["check_heart_beat"], data.get("worker_status", None))
 @app.post("/refresh_all_workers")

medrax/llava/serve/gradio_web_server.py CHANGED Viewed

@@ -216,9 +216,7 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
     all_image_hash = [hashlib.md5(image.tobytes()).hexdigest() for image in all_images]
     for image, hash in zip(all_images, all_image_hash):
         t = datetime.datetime.now()
-        filename = os.path.join(
-            LOGDIR, "serve_images", f"{t.year}-{t.month:02d}-{t.day:02d}", f"{hash}.jpg"
-        )
         if not os.path.isfile(filename):
             os.makedirs(os.path.dirname(filename), exist_ok=True)
             image.save(filename)
@@ -230,9 +228,7 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
         "temperature": float(temperature),
         "top_p": float(top_p),
         "max_new_tokens": min(int(max_new_tokens), 1536),
-        "stop": state.sep
-        if state.sep_style in [SeparatorStyle.SINGLE, SeparatorStyle.MPT]
-        else state.sep2,
         "images": f"List of {len(state.get_images())} images: {all_image_hash}",
     }
     logger.info(f"==== request ====\n{pload}")
@@ -330,9 +326,7 @@ block_css = """
 def build_demo(embed_mode):
-    textbox = gr.Textbox(
-        show_label=False, placeholder="Enter text and press ENTER", container=False
-    )
     with gr.Blocks(title="LLaVA", theme=gr.themes.Default(), css=block_css) as demo:
         state = gr.State()
@@ -468,9 +462,7 @@ def build_demo(embed_mode):
             [state, chatbot] + btn_list,
         )
-        clear_btn.click(
-            clear_history, None, [state, chatbot, textbox, imagebox] + btn_list, queue=False
-        )
         textbox.submit(
             add_text,

     all_image_hash = [hashlib.md5(image.tobytes()).hexdigest() for image in all_images]
     for image, hash in zip(all_images, all_image_hash):
         t = datetime.datetime.now()
+        filename = os.path.join(LOGDIR, "serve_images", f"{t.year}-{t.month:02d}-{t.day:02d}", f"{hash}.jpg")
         if not os.path.isfile(filename):
             os.makedirs(os.path.dirname(filename), exist_ok=True)
             image.save(filename)
         "temperature": float(temperature),
         "top_p": float(top_p),
         "max_new_tokens": min(int(max_new_tokens), 1536),
+        "stop": state.sep if state.sep_style in [SeparatorStyle.SINGLE, SeparatorStyle.MPT] else state.sep2,
         "images": f"List of {len(state.get_images())} images: {all_image_hash}",
     }
     logger.info(f"==== request ====\n{pload}")
 def build_demo(embed_mode):
+    textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
     with gr.Blocks(title="LLaVA", theme=gr.themes.Default(), css=block_css) as demo:
         state = gr.State()
             [state, chatbot] + btn_list,
         )
+        clear_btn.click(clear_history, None, [state, chatbot, textbox, imagebox] + btn_list, queue=False)
         textbox.submit(
             add_text,

medrax/llava/serve/model_worker.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """
 A model worker executes the model.
 """
 import argparse
 import asyncio
 import json
@@ -155,9 +156,7 @@ class ModelWorker:
         if images is not None and len(images) > 0 and self.is_multimodal:
             if len(images) > 0:
                 if len(images) != prompt.count(DEFAULT_IMAGE_TOKEN):
-                    raise ValueError(
-                        "Number of images does not match number of <image> tokens in prompt"
-                    )
                 images = [load_image_from_base64(image) for image in images]
                 images = process_images(images, image_processor, model.config)
@@ -172,9 +171,7 @@ class ModelWorker:
                     replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN
                 prompt = prompt.replace(DEFAULT_IMAGE_TOKEN, replace_token)
-                num_image_tokens = (
-                    prompt.count(replace_token) * model.get_vision_tower().num_patches
-                )
             else:
                 images = None
             image_args = {"images": images}
@@ -196,19 +193,14 @@ class ModelWorker:
         )
         keywords = [stop_str]
         stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
-        streamer = TextIteratorStreamer(
-            tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15
-        )
-        max_new_tokens = min(
-            max_new_tokens, max_context_length - input_ids.shape[-1] - num_image_tokens
-        )
         if max_new_tokens < 1:
             yield json.dumps(
                 {
-                    "text": ori_prompt
-                    + "Exceeds max token length. Please start a new conversation, thanks.",
                     "error_code": 0,
                 }
             ).encode() + b"\0"

 """
 A model worker executes the model.
 """
 import argparse
 import asyncio
 import json
         if images is not None and len(images) > 0 and self.is_multimodal:
             if len(images) > 0:
                 if len(images) != prompt.count(DEFAULT_IMAGE_TOKEN):
+                    raise ValueError("Number of images does not match number of <image> tokens in prompt")
                 images = [load_image_from_base64(image) for image in images]
                 images = process_images(images, image_processor, model.config)
                     replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN
                 prompt = prompt.replace(DEFAULT_IMAGE_TOKEN, replace_token)
+                num_image_tokens = prompt.count(replace_token) * model.get_vision_tower().num_patches
             else:
                 images = None
             image_args = {"images": images}
         )
         keywords = [stop_str]
         stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
+        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
+        max_new_tokens = min(max_new_tokens, max_context_length - input_ids.shape[-1] - num_image_tokens)
         if max_new_tokens < 1:
             yield json.dumps(
                 {
+                    "text": ori_prompt + "Exceeds max token length. Please start a new conversation, thanks.",
                     "error_code": 0,
                 }
             ).encode() + b"\0"

medrax/llava/serve/test_message.py CHANGED Viewed

@@ -17,9 +17,7 @@ def main():
         models.sort()
         print(f"Models: {models}")
-        ret = requests.post(
-            controller_addr + "/get_worker_address", json={"model": args.model_name}
-        )
         worker_addr = ret.json()["address"]
         print(f"worker_addr: {worker_addr}")
@@ -38,9 +36,7 @@ def main():
         "temperature": 0.7,
         "stop": conv.sep2,
     }
-    response = requests.post(
-        worker_addr + "/worker_generate_stream", headers=headers, json=pload, stream=True
-    )
     print(prompt, end="")
     for chunk in response.iter_lines(chunk_size=8192, decode_unicode=False, delimiter=b"\0"):

         models.sort()
         print(f"Models: {models}")
+        ret = requests.post(controller_addr + "/get_worker_address", json={"model": args.model_name})
         worker_addr = ret.json()["address"]
         print(f"worker_addr: {worker_addr}")
         "temperature": 0.7,
         "stop": conv.sep2,
     }
+    response = requests.post(worker_addr + "/worker_generate_stream", headers=headers, json=pload, stream=True)
     print(prompt, end="")
     for chunk in response.iter_lines(chunk_size=8192, decode_unicode=False, delimiter=b"\0"):

medrax/llava/utils.py CHANGED Viewed

@@ -45,9 +45,7 @@ def build_logger(logger_name, logger_filename):
     if handler is None:
         os.makedirs(LOGDIR, exist_ok=True)
         filename = os.path.join(LOGDIR, logger_filename)
-        handler = logging.handlers.TimedRotatingFileHandler(
-            filename, when="D", utc=True, encoding="UTF-8"
-        )
         handler.setFormatter(formatter)
         for name, item in logging.root.manager.loggerDict.items():

     if handler is None:
         os.makedirs(LOGDIR, exist_ok=True)
         filename = os.path.join(LOGDIR, logger_filename)
+        handler = logging.handlers.TimedRotatingFileHandler(filename, when="D", utc=True, encoding="UTF-8")
         handler.setFormatter(formatter)
         for name, item in logging.root.manager.loggerDict.items():

medrax/models/model_factory.py CHANGED Viewed

@@ -29,7 +29,7 @@ class ModelFactory:
             "base_url_key": "OPENAI_BASE_URL",
         },
         "gemini": {
-            "class": ChatGoogleGenerativeAI,
             "env_key": "GOOGLE_API_KEY",
             "base_url_key": "GOOGLE_BASE_URL",
         },
@@ -42,14 +42,12 @@ class ModelFactory:
         "grok": {
             "class": ChatXAI,
             "env_key": "XAI_API_KEY",
-        }
         # Add more providers with default configurations here
     }
     @classmethod
-    def register_provider(
-        cls, prefix: str, model_class: Type[BaseLanguageModel], env_key: str, **kwargs
-    ) -> None:
         """Register a new model provider.
         Args:
@@ -61,9 +59,7 @@ class ModelFactory:
         cls._model_providers[prefix] = {"class": model_class, "env_key": env_key, **kwargs}
     @classmethod
-    def create_model(
-        cls, model_name: str, temperature: float = 0.7, **kwargs
-    ) -> BaseLanguageModel:
         """Create and return an instance of the appropriate language model.
         Args:
@@ -79,9 +75,7 @@ class ModelFactory:
             ValueError: If the required API key is missing
         """
         # Find the matching provider based on model name prefix
-        provider_prefix = next(
-            (prefix for prefix in cls._model_providers if model_name.startswith(prefix)), None
-        )
         if not provider_prefix:
             raise ValueError(
@@ -138,7 +132,4 @@ class ModelFactory:
             Dict[str, Dict[str, Any]]: Dictionary of registered providers and their configurations
         """
         # Return a copy to prevent accidental modification
-        return {
-            k: {kk: vv for kk, vv in v.items() if kk != "class"}
-            for k, v in cls._model_providers.items()
-        }

             "base_url_key": "OPENAI_BASE_URL",
         },
         "gemini": {
+            "class": ChatGoogleGenerativeAI,
             "env_key": "GOOGLE_API_KEY",
             "base_url_key": "GOOGLE_BASE_URL",
         },
         "grok": {
             "class": ChatXAI,
             "env_key": "XAI_API_KEY",
+        },
         # Add more providers with default configurations here
     }
     @classmethod
+    def register_provider(cls, prefix: str, model_class: Type[BaseLanguageModel], env_key: str, **kwargs) -> None:
         """Register a new model provider.
         Args:
         cls._model_providers[prefix] = {"class": model_class, "env_key": env_key, **kwargs}
     @classmethod
+    def create_model(cls, model_name: str, temperature: float = 0.7, **kwargs) -> BaseLanguageModel:
         """Create and return an instance of the appropriate language model.
         Args:
             ValueError: If the required API key is missing
         """
         # Find the matching provider based on model name prefix
+        provider_prefix = next((prefix for prefix in cls._model_providers if model_name.startswith(prefix)), None)
         if not provider_prefix:
             raise ValueError(
             Dict[str, Dict[str, Any]]: Dictionary of registered providers and their configurations
         """
         # Return a copy to prevent accidental modification
+        return {k: {kk: vv for kk, vv in v.items() if kk != "class"} for k, v in cls._model_providers.items()}

medrax/rag/rag.py CHANGED Viewed

@@ -107,9 +107,7 @@ class CohereRAG:
         # Initialize Pinecone
         self.pinecone_api_key = os.getenv("PINECONE_API_KEY")
         if not self.pinecone_api_key:
-            raise ValueError(
-                "PINECONE_API_KEY environment variable not set. Please get a key from app.pinecone.io"
-            )
         self.pinecone = Pinecone(api_key=self.pinecone_api_key)
         self.index_name = self.config.pinecone_index_name
@@ -161,9 +159,7 @@ class CohereRAG:
             )
         print(f"Connecting to existing Pinecone index: {self.index_name}")
-        vectorstore = PineconeVectorStore.from_existing_index(
-            index_name=self.index_name, embedding=self.embeddings
-        )
         # Check if the index is empty and needs to be populated
         try:
@@ -329,9 +325,7 @@ class CohereRAG:
                 )
                 documents.append(doc)
-            print(
-                f"Loaded {len(documents)} document chunks from HuggingFace dataset: {dataset_name}"
-            )
             return documents
         except Exception as e:

         # Initialize Pinecone
         self.pinecone_api_key = os.getenv("PINECONE_API_KEY")
         if not self.pinecone_api_key:
+            raise ValueError("PINECONE_API_KEY environment variable not set. Please get a key from app.pinecone.io")
         self.pinecone = Pinecone(api_key=self.pinecone_api_key)
         self.index_name = self.config.pinecone_index_name
             )
         print(f"Connecting to existing Pinecone index: {self.index_name}")
+        vectorstore = PineconeVectorStore.from_existing_index(index_name=self.index_name, embedding=self.embeddings)
         # Check if the index is empty and needs to be populated
         try:
                 )
                 documents.append(doc)
+            print(f"Loaded {len(documents)} document chunks from HuggingFace dataset: {dataset_name}")
             return documents
         except Exception as e:

medrax/tools/browsing/__init__.py CHANGED Viewed

@@ -6,8 +6,8 @@ from .web_browser import WebBrowserTool, WebBrowserSchema, SearchQuerySchema, Vi
 __all__ = [
     "DuckDuckGoSearchTool",
     "WebSearchInput",
-    "WebBrowserTool",
     "WebBrowserSchema",
     "SearchQuerySchema",
-    "VisitUrlSchema"
-]

 __all__ = [
     "DuckDuckGoSearchTool",
     "WebSearchInput",
+    "WebBrowserTool",
     "WebBrowserSchema",
     "SearchQuerySchema",
+    "VisitUrlSchema",
+]

medrax/tools/browsing/duckduckgo.py CHANGED Viewed

@@ -95,18 +95,12 @@ class DuckDuckGoSearchTool(BaseTool):
         super().__init__(**kwargs)
         if DDGS is None:
-            logger.error(
-                "duckduckgo-search package not installed. Install with: pip install duckduckgo-search"
-            )
-            raise ImportError(
-                "duckduckgo-search package is required for web search functionality"
-            )
         logger.info("DuckDuckGo search tool initialized successfully")
-    def _perform_search_sync(
-        self, query: str, max_results: int = 5, region: str = "us-en"
-    ) -> Dict[str, Any]:
         """
         Perform the actual web search using DuckDuckGo synchronously.
@@ -118,9 +112,7 @@ class DuckDuckGoSearchTool(BaseTool):
         Returns:
             Dict[str, Any]: Structured search results.
         """
-        logger.info(
-            f"Performing web search: '{query}' (max_results={max_results}, region={region})"
-        )
         try:
             # Initialize DDGS with error handling
@@ -158,9 +150,7 @@ class DuckDuckGoSearchTool(BaseTool):
                     summary = f"No results found for '{query}'"
                 # Log successful completion
-                logger.info(
-                    f"Web search completed successfully: {len(formatted_results)} results"
-                )
                 return {
                     "query": query,
@@ -217,7 +207,7 @@ class DuckDuckGoSearchTool(BaseTool):
         try:
             result = self._perform_search_sync(query, max_results, region)
             # Check if search was successful
             if "error" in result:
                 metadata["analysis_status"] = "failed"
@@ -239,7 +229,7 @@ class DuckDuckGoSearchTool(BaseTool):
             }
             metadata["analysis_status"] = "failed"
             metadata["error_details"] = str(e)
             return error_result, metadata
     async def _arun(
@@ -296,9 +286,7 @@ class DuckDuckGoSearchTool(BaseTool):
             # Use asyncio to run sync search in executor
             loop = asyncio.get_event_loop()
-            result, metadata = await loop.run_in_executor(
-                None, self._run, query, max_results, region
-            )
             if writer:
                 # Parse result to get count for progress update
@@ -333,7 +321,7 @@ class DuckDuckGoSearchTool(BaseTool):
                 "search_engine": "DuckDuckGo",
                 "timestamp": datetime.now().isoformat(),
             }
             metadata = {
                 "query": query,
                 "max_results": max_results,
@@ -344,12 +332,10 @@ class DuckDuckGoSearchTool(BaseTool):
                 "analysis_status": "failed",
                 "error_details": str(e),
             }
             return error_result, metadata
-    def get_search_summary(
-        self, query: str, max_results: int = 3
-    ) -> dict[str, str | list[str]]:
         """
         Get a quick summary of search results for a given query.
@@ -375,14 +361,7 @@ class DuckDuckGoSearchTool(BaseTool):
             results = result.get("results", [])
             titles = [r["title"] for r in results]
             urls = [r["url"] for r in results]
-            snippets = [
-                (
-                    r["snippet"][:100] + "..."
-                    if len(r["snippet"]) > 100
-                    else r["snippet"]
-                )
-                for r in results
-            ]
             return {
                 "query": query,

         super().__init__(**kwargs)
         if DDGS is None:
+            logger.error("duckduckgo-search package not installed. Install with: pip install duckduckgo-search")
+            raise ImportError("duckduckgo-search package is required for web search functionality")
         logger.info("DuckDuckGo search tool initialized successfully")
+    def _perform_search_sync(self, query: str, max_results: int = 5, region: str = "us-en") -> Dict[str, Any]:
         """
         Perform the actual web search using DuckDuckGo synchronously.
         Returns:
             Dict[str, Any]: Structured search results.
         """
+        logger.info(f"Performing web search: '{query}' (max_results={max_results}, region={region})")
         try:
             # Initialize DDGS with error handling
                     summary = f"No results found for '{query}'"
                 # Log successful completion
+                logger.info(f"Web search completed successfully: {len(formatted_results)} results")
                 return {
                     "query": query,
         try:
             result = self._perform_search_sync(query, max_results, region)
             # Check if search was successful
             if "error" in result:
                 metadata["analysis_status"] = "failed"
             }
             metadata["analysis_status"] = "failed"
             metadata["error_details"] = str(e)
             return error_result, metadata
     async def _arun(
             # Use asyncio to run sync search in executor
             loop = asyncio.get_event_loop()
+            result, metadata = await loop.run_in_executor(None, self._run, query, max_results, region)
             if writer:
                 # Parse result to get count for progress update
                 "search_engine": "DuckDuckGo",
                 "timestamp": datetime.now().isoformat(),
             }
             metadata = {
                 "query": query,
                 "max_results": max_results,
                 "analysis_status": "failed",
                 "error_details": str(e),
             }
             return error_result, metadata
+    def get_search_summary(self, query: str, max_results: int = 3) -> dict[str, str | list[str]]:
         """
         Get a quick summary of search results for a given query.
             results = result.get("results", [])
             titles = [r["title"] for r in results]
             urls = [r["url"] for r in results]
+            snippets = [(r["snippet"][:100] + "..." if len(r["snippet"]) > 100 else r["snippet"]) for r in results]
             return {
                 "query": query,

medrax/tools/browsing/web_browser.py CHANGED Viewed

@@ -78,9 +78,7 @@ class WebBrowserTool(BaseTool):
     max_results: int = 5
     args_schema: Type[BaseModel] = WebBrowserSchema
-    def __init__(
-        self, search_api_key: Optional[str] = None, search_engine_id: Optional[str] = None, **kwargs
-    ):
         """Initialize the web browser tool with optional search API credentials.
         Args:
@@ -145,9 +143,7 @@ class WebBrowserTool(BaseTool):
         except Exception as e:
             return {"error": f"Search failed: {str(e)}"}
-    def visit_url(
-        self, url: str, max_content_length: int = 5000, max_links: int = 5
-    ) -> Dict[str, Any]:
         """Visit a URL and extract its content with comprehensive parsing.
         Args:
@@ -218,9 +214,7 @@ class WebBrowserTool(BaseTool):
             return {
                 "title": title,
                 "content": (
-                    text_content[:max_content_length]
-                    if len(text_content) > max_content_length
-                    else text_content
                 ),
                 "url": url,
                 "links": links[:max_links],  # Limit to max_links

     max_results: int = 5
     args_schema: Type[BaseModel] = WebBrowserSchema
+    def __init__(self, search_api_key: Optional[str] = None, search_engine_id: Optional[str] = None, **kwargs):
         """Initialize the web browser tool with optional search API credentials.
         Args:
         except Exception as e:
             return {"error": f"Search failed: {str(e)}"}
+    def visit_url(self, url: str, max_content_length: int = 5000, max_links: int = 5) -> Dict[str, Any]:
         """Visit a URL and extract its content with comprehensive parsing.
         Args:
             return {
                 "title": title,
                 "content": (
+                    text_content[:max_content_length] if len(text_content) > max_content_length else text_content
                 ),
                 "url": url,
                 "links": links[:max_links],  # Limit to max_links

medrax/tools/classification/__init__.py CHANGED Viewed

@@ -3,9 +3,4 @@
 from .torchxrayvision import TorchXRayVisionClassifierTool, TorchXRayVisionInput
 from .arcplus import ArcPlusClassifierTool, ArcPlusInput
-__all__ = [
-    "TorchXRayVisionClassifierTool",
-    "TorchXRayVisionInput",
-    "ArcPlusClassifierTool",
-    "ArcPlusInput"
-]

 from .torchxrayvision import TorchXRayVisionClassifierTool, TorchXRayVisionInput
 from .arcplus import ArcPlusClassifierTool, ArcPlusInput
+__all__ = ["TorchXRayVisionClassifierTool", "TorchXRayVisionInput", "ArcPlusClassifierTool", "ArcPlusInput"]

medrax/tools/classification/arcplus.py CHANGED Viewed

@@ -38,9 +38,7 @@ class OmniSwinTransformer(SwinTransformer):
         self.omni_heads = []
         for num_classes in num_classes_list:
-            self.omni_heads.append(
-                nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity()
-            )
         self.omni_heads = nn.ModuleList(self.omni_heads)
     def forward(self, x, head_n=None):
@@ -62,9 +60,7 @@ class OmniSwinTransformer(SwinTransformer):
 class ArcPlusInput(BaseModel):
     """Input for ArcPlus chest X-ray analysis tool. Only supports JPG or PNG images."""
-    image_path: str = Field(
-        ..., description="Path to the radiology image file, only supports JPG or PNG images"
-    )
 class ArcPlusClassifierTool(BaseTool):
@@ -249,11 +245,7 @@ class ArcPlusClassifierTool(BaseTool):
         # Remove "module." prefix if present (improved logic from example)
         if any([True if "module." in k else False for k in state_dict.keys()]):
-            state_dict = {
-                k.replace("module.", ""): v
-                for k, v in state_dict.items()
-                if k.startswith("module.")
-            }
         # Load the model weights
         msg = self.model.load_state_dict(state_dict, strict=False)
@@ -333,14 +325,10 @@ class ArcPlusClassifierTool(BaseTool):
             # Map predictions to disease names
             if len(predictions) != len(self.disease_list):
-                print(
-                    f"Warning: Expected {len(self.disease_list)} predictions, got {len(predictions)}"
-                )
                 # Pad or truncate as needed
                 if len(predictions) < len(self.disease_list):
-                    predictions = np.pad(
-                        predictions, (0, len(self.disease_list) - len(predictions))
-                    )
                 else:
                     predictions = predictions[: len(self.disease_list)]

         self.omni_heads = []
         for num_classes in num_classes_list:
+            self.omni_heads.append(nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity())
         self.omni_heads = nn.ModuleList(self.omni_heads)
     def forward(self, x, head_n=None):
 class ArcPlusInput(BaseModel):
     """Input for ArcPlus chest X-ray analysis tool. Only supports JPG or PNG images."""
+    image_path: str = Field(..., description="Path to the radiology image file, only supports JPG or PNG images")
 class ArcPlusClassifierTool(BaseTool):
         # Remove "module." prefix if present (improved logic from example)
         if any([True if "module." in k else False for k in state_dict.keys()]):
+            state_dict = {k.replace("module.", ""): v for k, v in state_dict.items() if k.startswith("module.")}
         # Load the model weights
         msg = self.model.load_state_dict(state_dict, strict=False)
             # Map predictions to disease names
             if len(predictions) != len(self.disease_list):
+                print(f"Warning: Expected {len(self.disease_list)} predictions, got {len(predictions)}")
                 # Pad or truncate as needed
                 if len(predictions) < len(self.disease_list):
+                    predictions = np.pad(predictions, (0, len(self.disease_list) - len(predictions)))
                 else:
                     predictions = predictions[: len(self.disease_list)]

medrax/tools/classification/torchxrayvision.py CHANGED Viewed

@@ -16,9 +16,7 @@ from langchain_core.tools import BaseTool
 class TorchXRayVisionInput(BaseModel):
     """Input for TorchXRayVision chest X-ray analysis tools. Only supports JPG or PNG images."""
-    image_path: str = Field(
-        ..., description="Path to the radiology image file, only supports JPG or PNG images"
-    )
 class TorchXRayVisionClassifierTool(BaseTool):

 class TorchXRayVisionInput(BaseModel):
     """Input for TorchXRayVision chest X-ray analysis tools. Only supports JPG or PNG images."""
+    image_path: str = Field(..., description="Path to the radiology image file, only supports JPG or PNG images")
 class TorchXRayVisionClassifierTool(BaseTool):

medrax/tools/dicom.py CHANGED Viewed

@@ -14,9 +14,7 @@ class DicomProcessorInput(BaseModel):
     """Input schema for the DICOM Processor Tool."""
     dicom_path: str = Field(..., description="Path to the DICOM file")
-    window_center: Optional[float] = Field(
-        None, description="Window center for contrast adjustment"
-    )
     window_width: Optional[float] = Field(None, description="Window width for contrast adjustment")

     """Input schema for the DICOM Processor Tool."""
     dicom_path: str = Field(..., description="Path to the DICOM file")
+    window_center: Optional[float] = Field(None, description="Window center for contrast adjustment")
     window_width: Optional[float] = Field(None, description="Window width for contrast adjustment")

medrax/tools/grounding.py CHANGED Viewed

@@ -89,11 +89,8 @@ class XRayPhraseGroundingTool(BaseTool):
             trust_remote_code=True,
             quantization_config=quantization_config,
         )
-        self.processor = AutoProcessor.from_pretrained(
-            model_path, cache_dir=cache_dir, trust_remote_code=True
-        )
         self.model = self.model.eval()
         self.temp_dir = Path(temp_dir if temp_dir else tempfile.mkdtemp())
@@ -167,12 +164,8 @@ class XRayPhraseGroundingTool(BaseTool):
                 )
             prompt_length = inputs["input_ids"].shape[-1]
-            decoded_text = self.processor.decode(
-                output[0][prompt_length:], skip_special_tokens=True
-            )
-            predictions = self.processor.convert_output_to_plaintext_or_grounded_sequence(
-                decoded_text
-            )
             metadata = {
                 "image_path": image_path,
@@ -199,9 +192,7 @@ class XRayPhraseGroundingTool(BaseTool):
                 # Convert model bboxes to list format and get original image bboxes
                 model_bboxes = [list(bbox) for bbox in pred_bboxes]
                 original_bboxes = [
-                    self.processor.adjust_box_for_original_image_size(
-                        bbox, width=image.size[0], height=image.size[1]
-                    )
                     for bbox in model_bboxes
                 ]

             trust_remote_code=True,
             quantization_config=quantization_config,
         )
+        self.processor = AutoProcessor.from_pretrained(model_path, cache_dir=cache_dir, trust_remote_code=True)
         self.model = self.model.eval()
         self.temp_dir = Path(temp_dir if temp_dir else tempfile.mkdtemp())
                 )
             prompt_length = inputs["input_ids"].shape[-1]
+            decoded_text = self.processor.decode(output[0][prompt_length:], skip_special_tokens=True)
+            predictions = self.processor.convert_output_to_plaintext_or_grounded_sequence(decoded_text)
             metadata = {
                 "image_path": image_path,
                 # Convert model bboxes to list format and get original image bboxes
                 model_bboxes = [list(bbox) for bbox in pred_bboxes]
                 original_bboxes = [
+                    self.processor.adjust_box_for_original_image_size(bbox, width=image.size[0], height=image.size[1])
                     for bbox in model_bboxes
                 ]

medrax/tools/rag.py CHANGED Viewed

@@ -14,7 +14,7 @@ class RAGTool(BaseTool):
     The knowledge base includes:
     - Medical textbooks and reference materials
-    - Research papers and clinical studies
     - Medical manuals and guidelines
     - Specialized medical literature

     The knowledge base includes:
     - Medical textbooks and reference materials
+    - Research papers and clinical studies
     - Medical manuals and guidelines
     - Specialized medical literature

medrax/tools/report_generation.py CHANGED Viewed

@@ -22,9 +22,7 @@ from transformers import (
 class ChestXRayInput(BaseModel):
     """Input for chest X-ray analysis tools. Only supports JPG or PNG images."""
-    image_path: str = Field(
-        ..., description="Path to the radiology image file, only supports JPG or PNG images"
-    )
 class ChestXRayReportGeneratorTool(BaseTool):
@@ -170,12 +168,8 @@ class ChestXRayReportGeneratorTool(BaseTool):
         """
         try:
             # Process image for both models
-            findings_pixels = self._process_image(
-                image_path, self.findings_processor, self.findings_model
-            )
-            impression_pixels = self._process_image(
-                image_path, self.impression_processor, self.impression_model
-            )
             # Generate both sections
             with torch.inference_mode():
@@ -187,11 +181,7 @@ class ChestXRayReportGeneratorTool(BaseTool):
                 )
             # Combine into formatted report
-            report = (
-                "CHEST X-RAY REPORT\n\n"
-                f"FINDINGS:\n{findings_text}\n\n"
-                f"IMPRESSION:\n{impression_text}"
-            )
             output = {
                 "report": report,

 class ChestXRayInput(BaseModel):
     """Input for chest X-ray analysis tools. Only supports JPG or PNG images."""
+    image_path: str = Field(..., description="Path to the radiology image file, only supports JPG or PNG images")
 class ChestXRayReportGeneratorTool(BaseTool):
         """
         try:
             # Process image for both models
+            findings_pixels = self._process_image(image_path, self.findings_processor, self.findings_model)
+            impression_pixels = self._process_image(image_path, self.impression_processor, self.impression_model)
             # Generate both sections
             with torch.inference_mode():
                 )
             # Combine into formatted report
+            report = "CHEST X-RAY REPORT\n\n" f"FINDINGS:\n{findings_text}\n\n" f"IMPRESSION:\n{impression_text}"
             output = {
                 "report": report,

medrax/tools/segmentation/__init__.py CHANGED Viewed

@@ -3,10 +3,4 @@
 from .segmentation import ChestXRaySegmentationTool, ChestXRaySegmentationInput, OrganMetrics
 from .medsam2 import MedSAM2Tool, MedSAM2Input
-__all__ = [
-    "ChestXRaySegmentationTool",
-    "ChestXRaySegmentationInput",
-    "OrganMetrics",
-    "MedSAM2Tool",
-    "MedSAM2Input"
-]

 from .segmentation import ChestXRaySegmentationTool, ChestXRaySegmentationInput, OrganMetrics
 from .medsam2 import MedSAM2Tool, MedSAM2Input
+__all__ = ["ChestXRaySegmentationTool", "ChestXRaySegmentationInput", "OrganMetrics", "MedSAM2Tool", "MedSAM2Input"]

medrax/tools/segmentation/medsam2.py CHANGED Viewed

@@ -26,7 +26,6 @@ from hydra import initialize_config_dir
 from hydra.core.global_hydra import GlobalHydra
 class MedSAM2Input(BaseModel):
     """Input schema for the MedSAM2 Tool."""
@@ -47,7 +46,7 @@ class MedSAM2Input(BaseModel):
 class MedSAM2Tool(BaseTool):
     """Advanced medical image segmentation tool using MedSAM2.
     This tool provides state-of-the-art medical image segmentation capabilities using
     the MedSAM2 model, which is specifically adapted for medical imaging from Meta's SAM2.
     Supports interactive prompting with boxes, points, or automatic segmentation.
@@ -92,18 +91,15 @@ class MedSAM2Tool(BaseTool):
             # This works around the issue with initialize_config_module in sam2
             if GlobalHydra.instance().is_initialized():
                 GlobalHydra.instance().clear()
             config_dir = Path(__file__).parent.parent.parent.parent / "MedSAM2" / "sam2" / "configs"
             initialize_config_dir(config_dir=str(config_dir), version_base="1.2")
             hf_hub_download(
-                repo_id=model_path,
-                filename=model_file,
-                local_dir=self.cache_dir,
-                local_dir_use_symlinks=False
             )
-            config_path = model_cfg.replace('.yaml', '')
             sam2_model = build_sam2(config_path, str(self.cache_dir / model_file), device=device)
             self.predictor = SAM2ImagePredictor(sam2_model)
@@ -114,37 +110,37 @@ class MedSAM2Tool(BaseTool):
         """Load and preprocess image for medical analysis."""
         try:
             # Handle different image formats
-            if image_path.lower().endswith('.dcm'):
                 # DICOM files - would need DICOM processor
                 raise ValueError("DICOM files not directly supported. Please convert to standard image format first.")
             # Load standard image formats
             image = Image.open(image_path)
             # For medical images, convert to grayscale first if needed, then to RGB
-            if image.mode == 'L':  # Grayscale
                 # Convert grayscale to RGB for SAM2
-                image = image.convert('RGB')
-            elif image.mode != 'RGB':
-                if image.mode == 'RGBA':
                     # Create white background for RGBA
-                    background = Image.new('RGB', image.size, (255, 255, 255))
                     background.paste(image, mask=image.split()[-1])
                     image = background
                 else:
-                    image = image.convert('RGB')
             # Convert to numpy array
             image_np = np.array(image)
             # Ensure image is in proper range [0, 255]
             if image_np.max() <= 1.0:
                 image_np = (image_np * 255).astype(np.uint8)
             else:
                 image_np = image_np.astype(np.uint8)
             return image_np
         except Exception as e:
             raise ValueError(f"Failed to load image {image_path}: {str(e)}")
@@ -152,55 +148,53 @@ class MedSAM2Tool(BaseTool):
         """Process and validate prompts."""
         if prompt_type == "auto":
             return None, None, None
         if prompt_coords is None:
             if prompt_type != "auto":
                 raise ValueError(f"Prompt coordinates required for prompt type '{prompt_type}'")
             return None, None, None
         if prompt_type == "box":
             if len(prompt_coords) != 4:
                 raise ValueError("Box prompt requires 4 coordinates: [x1,y1,x2,y2]")
             x1, y1, x2, y2 = prompt_coords
             # Validate coordinates
             if x1 >= x2 or y1 >= y2:
                 raise ValueError("Invalid box coordinates: x1 < x2 and y1 < y2 required")
             input_box = np.array([[x1, y1, x2, y2]])
             return input_box, None, None
         elif prompt_type == "point":
             if len(prompt_coords) != 2:
                 raise ValueError("Point prompt requires 2 coordinates: [x,y]")
             x, y = prompt_coords
             input_point = np.array([[x, y]])
             input_label = np.array([1])  # Positive point
             return None, input_point, input_label
         else:
             raise ValueError(f"Unknown prompt type: {prompt_type}")
     def _create_visualization(self, image: np.ndarray, masks: np.ndarray, prompt_info: Dict) -> str:
         """Create visualization of segmentation results."""
         plt.figure(figsize=(10, 10))
         # Convert RGB image to grayscale for background display
         if len(image.shape) == 3:
             # Convert RGB to grayscale using standard luminance formula
-            gray_image = 0.299 * image[:,:,0] + 0.587 * image[:,:,1] + 0.114 * image[:,:,2]
         else:
             gray_image = image
         # Display grayscale background
-        plt.imshow(
-            gray_image, cmap="gray", extent=[0, image.shape[1], image.shape[0], 0]
-        )
         # Generate color palette for multiple masks
         colors = plt.cm.rainbow(np.linspace(0, 1, len(masks)))
         # Process and overlay each mask
         for idx, (mask, color) in enumerate(zip(masks, colors)):
             if mask.sum() > 0:
@@ -208,33 +202,31 @@ class MedSAM2Tool(BaseTool):
                 mask_bool = mask.astype(bool)
                 colored_mask = np.zeros((*mask_bool.shape, 4))
                 colored_mask[mask_bool] = (*color[:3], 0.3)  # 30% transparency like segmentation tool
-                plt.imshow(
-                    colored_mask, extent=[0, image.shape[1], image.shape[0], 0]
-                )
                 # Add legend entry for each mask
                 mask_label = f"Mask {idx + 1} (score: {prompt_info.get('scores', [0])[idx] if idx < len(prompt_info.get('scores', [])) else 0:.3f})"
                 plt.plot([], [], color=color, label=mask_label, linewidth=3)
         # Add prompt visualization with consistent styling
-        if prompt_info.get('box') is not None:
-            box = prompt_info['box'][0]
             x1, y1, x2, y2 = box
-            plt.plot([x1, x2, x2, x1, x1], [y1, y1, y2, y2, y1], 'g-', linewidth=2, label='Box Prompt')
-        if prompt_info.get('point') is not None:
-            point = prompt_info['point'][0]
-            plt.plot(point[0], point[1], 'go', markersize=10, label='Point Prompt')
         plt.title("Segmentation Overlay")
         plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
         plt.axis("off")
         # Save visualization with higher DPI like segmentation tool
         viz_path = self.temp_dir / f"medsam2_result_{uuid.uuid4().hex[:8]}.png"
-        plt.savefig(viz_path, bbox_inches='tight', dpi=300)
         plt.close()
         return str(viz_path)
     def _run(
@@ -249,28 +241,28 @@ class MedSAM2Tool(BaseTool):
         try:
             # Load image
             image = self._load_image(image_path)
             # Set image for predictor
             self.predictor.set_image(image)
             # Process prompts
-            input_box, input_point, input_label = self._process_prompts(
-                prompt_type, prompt_coords, image.shape[:2]
-            )
             # Run inference
             if prompt_type == "auto":
                 # For auto segmentation, try multiple approaches and select best result
                 h, w = image.shape[:2]
                 # Try multiple points in key areas for medical images
-                sample_points = np.array([
-                    [w//3, h//3],      # Upper left lung area
-                    [2*w//3, h//3],    # Upper right lung area
-                    [w//2, 2*h//3],    # Lower center area
-                ])
                 sample_labels = np.array([1, 1, 1])  # All positive points
                 masks, scores, logits = self.predictor.predict(
                     point_coords=sample_points,
                     point_labels=sample_labels,
@@ -283,29 +275,29 @@ class MedSAM2Tool(BaseTool):
                     box=input_box,
                     multimask_output=True,
                 )
             # Create visualization
             prompt_info = {
-                'box': input_box,
-                'point': input_point,
-                'type': prompt_type,
-                'scores': scores  # Add scores for legend display
             }
             viz_path = self._create_visualization(image, masks, prompt_info)
             # Create output dictionary (main results)
             output = {
                 "segmentation_image_path": viz_path,
-                "confidence_scores": scores.tolist() if hasattr(scores, 'tolist') else list(scores),
                 "num_masks": len(masks),
                 "best_mask_score": float(scores[0]) if len(scores) > 0 else 0.0,
                 "mask_summary": {
                     "total_masks": len(masks),
                     "mask_shapes": [list(mask.shape) for mask in masks],
-                    "segmented_area_pixels": [int(mask.sum()) for mask in masks]
                 },
             }
             # Create metadata dictionary
             metadata = {
                 "image_path": image_path,
@@ -317,9 +309,9 @@ class MedSAM2Tool(BaseTool):
                 "num_masks_generated": len(masks),
                 "analysis_status": "completed",
             }
             return output, metadata
         except Exception as e:
             error_output = {"error": str(e)}
             error_metadata = {
@@ -338,4 +330,4 @@ class MedSAM2Tool(BaseTool):
         run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
     ) -> Tuple[Dict[str, Any], Dict]:
         """Async version of _run."""
-        return self._run(image_path, prompt_type, prompt_coords, slice_index, run_manager)

 from hydra.core.global_hydra import GlobalHydra
 class MedSAM2Input(BaseModel):
     """Input schema for the MedSAM2 Tool."""
 class MedSAM2Tool(BaseTool):
     """Advanced medical image segmentation tool using MedSAM2.
     This tool provides state-of-the-art medical image segmentation capabilities using
     the MedSAM2 model, which is specifically adapted for medical imaging from Meta's SAM2.
     Supports interactive prompting with boxes, points, or automatic segmentation.
             # This works around the issue with initialize_config_module in sam2
             if GlobalHydra.instance().is_initialized():
                 GlobalHydra.instance().clear()
             config_dir = Path(__file__).parent.parent.parent.parent / "MedSAM2" / "sam2" / "configs"
             initialize_config_dir(config_dir=str(config_dir), version_base="1.2")
             hf_hub_download(
+                repo_id=model_path, filename=model_file, local_dir=self.cache_dir, local_dir_use_symlinks=False
             )
+            config_path = model_cfg.replace(".yaml", "")
             sam2_model = build_sam2(config_path, str(self.cache_dir / model_file), device=device)
             self.predictor = SAM2ImagePredictor(sam2_model)
         """Load and preprocess image for medical analysis."""
         try:
             # Handle different image formats
+            if image_path.lower().endswith(".dcm"):
                 # DICOM files - would need DICOM processor
                 raise ValueError("DICOM files not directly supported. Please convert to standard image format first.")
             # Load standard image formats
             image = Image.open(image_path)
             # For medical images, convert to grayscale first if needed, then to RGB
+            if image.mode == "L":  # Grayscale
                 # Convert grayscale to RGB for SAM2
+                image = image.convert("RGB")
+            elif image.mode != "RGB":
+                if image.mode == "RGBA":
                     # Create white background for RGBA
+                    background = Image.new("RGB", image.size, (255, 255, 255))
                     background.paste(image, mask=image.split()[-1])
                     image = background
                 else:
+                    image = image.convert("RGB")
             # Convert to numpy array
             image_np = np.array(image)
             # Ensure image is in proper range [0, 255]
             if image_np.max() <= 1.0:
                 image_np = (image_np * 255).astype(np.uint8)
             else:
                 image_np = image_np.astype(np.uint8)
             return image_np
         except Exception as e:
             raise ValueError(f"Failed to load image {image_path}: {str(e)}")
         """Process and validate prompts."""
         if prompt_type == "auto":
             return None, None, None
         if prompt_coords is None:
             if prompt_type != "auto":
                 raise ValueError(f"Prompt coordinates required for prompt type '{prompt_type}'")
             return None, None, None
         if prompt_type == "box":
             if len(prompt_coords) != 4:
                 raise ValueError("Box prompt requires 4 coordinates: [x1,y1,x2,y2]")
             x1, y1, x2, y2 = prompt_coords
             # Validate coordinates
             if x1 >= x2 or y1 >= y2:
                 raise ValueError("Invalid box coordinates: x1 < x2 and y1 < y2 required")
             input_box = np.array([[x1, y1, x2, y2]])
             return input_box, None, None
         elif prompt_type == "point":
             if len(prompt_coords) != 2:
                 raise ValueError("Point prompt requires 2 coordinates: [x,y]")
             x, y = prompt_coords
             input_point = np.array([[x, y]])
             input_label = np.array([1])  # Positive point
             return None, input_point, input_label
         else:
             raise ValueError(f"Unknown prompt type: {prompt_type}")
     def _create_visualization(self, image: np.ndarray, masks: np.ndarray, prompt_info: Dict) -> str:
         """Create visualization of segmentation results."""
         plt.figure(figsize=(10, 10))
         # Convert RGB image to grayscale for background display
         if len(image.shape) == 3:
             # Convert RGB to grayscale using standard luminance formula
+            gray_image = 0.299 * image[:, :, 0] + 0.587 * image[:, :, 1] + 0.114 * image[:, :, 2]
         else:
             gray_image = image
         # Display grayscale background
+        plt.imshow(gray_image, cmap="gray", extent=[0, image.shape[1], image.shape[0], 0])
         # Generate color palette for multiple masks
         colors = plt.cm.rainbow(np.linspace(0, 1, len(masks)))
         # Process and overlay each mask
         for idx, (mask, color) in enumerate(zip(masks, colors)):
             if mask.sum() > 0:
                 mask_bool = mask.astype(bool)
                 colored_mask = np.zeros((*mask_bool.shape, 4))
                 colored_mask[mask_bool] = (*color[:3], 0.3)  # 30% transparency like segmentation tool
+                plt.imshow(colored_mask, extent=[0, image.shape[1], image.shape[0], 0])
                 # Add legend entry for each mask
                 mask_label = f"Mask {idx + 1} (score: {prompt_info.get('scores', [0])[idx] if idx < len(prompt_info.get('scores', [])) else 0:.3f})"
                 plt.plot([], [], color=color, label=mask_label, linewidth=3)
         # Add prompt visualization with consistent styling
+        if prompt_info.get("box") is not None:
+            box = prompt_info["box"][0]
             x1, y1, x2, y2 = box
+            plt.plot([x1, x2, x2, x1, x1], [y1, y1, y2, y2, y1], "g-", linewidth=2, label="Box Prompt")
+        if prompt_info.get("point") is not None:
+            point = prompt_info["point"][0]
+            plt.plot(point[0], point[1], "go", markersize=10, label="Point Prompt")
         plt.title("Segmentation Overlay")
         plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
         plt.axis("off")
         # Save visualization with higher DPI like segmentation tool
         viz_path = self.temp_dir / f"medsam2_result_{uuid.uuid4().hex[:8]}.png"
+        plt.savefig(viz_path, bbox_inches="tight", dpi=300)
         plt.close()
         return str(viz_path)
     def _run(
         try:
             # Load image
             image = self._load_image(image_path)
             # Set image for predictor
             self.predictor.set_image(image)
             # Process prompts
+            input_box, input_point, input_label = self._process_prompts(prompt_type, prompt_coords, image.shape[:2])
             # Run inference
             if prompt_type == "auto":
                 # For auto segmentation, try multiple approaches and select best result
                 h, w = image.shape[:2]
                 # Try multiple points in key areas for medical images
+                sample_points = np.array(
+                    [
+                        [w // 3, h // 3],  # Upper left lung area
+                        [2 * w // 3, h // 3],  # Upper right lung area
+                        [w // 2, 2 * h // 3],  # Lower center area
+                    ]
+                )
                 sample_labels = np.array([1, 1, 1])  # All positive points
                 masks, scores, logits = self.predictor.predict(
                     point_coords=sample_points,
                     point_labels=sample_labels,
                     box=input_box,
                     multimask_output=True,
                 )
             # Create visualization
             prompt_info = {
+                "box": input_box,
+                "point": input_point,
+                "type": prompt_type,
+                "scores": scores,  # Add scores for legend display
             }
             viz_path = self._create_visualization(image, masks, prompt_info)
             # Create output dictionary (main results)
             output = {
                 "segmentation_image_path": viz_path,
+                "confidence_scores": scores.tolist() if hasattr(scores, "tolist") else list(scores),
                 "num_masks": len(masks),
                 "best_mask_score": float(scores[0]) if len(scores) > 0 else 0.0,
                 "mask_summary": {
                     "total_masks": len(masks),
                     "mask_shapes": [list(mask.shape) for mask in masks],
+                    "segmented_area_pixels": [int(mask.sum()) for mask in masks],
                 },
             }
             # Create metadata dictionary
             metadata = {
                 "image_path": image_path,
                 "num_masks_generated": len(masks),
                 "analysis_status": "completed",
             }
             return output, metadata
         except Exception as e:
             error_output = {"error": str(e)}
             error_metadata = {
         run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
     ) -> Tuple[Dict[str, Any], Dict]:
         """Async version of _run."""
+        return self._run(image_path, prompt_type, prompt_coords, slice_index, run_manager)

medrax/tools/segmentation/segmentation.py CHANGED Viewed

@@ -41,9 +41,7 @@ class OrganMetrics(BaseModel):
     area_pixels: int = Field(..., description="Area in pixels")
     area_cm2: float = Field(..., description="Approximate area in cm²")
     centroid: Tuple[float, float] = Field(..., description="(y, x) coordinates of centroid")
-    bbox: Tuple[int, int, int, int] = Field(
-        ..., description="Bounding box coordinates (min_y, min_x, max_y, max_x)"
-    )
     # Size metrics
     width: int = Field(..., description="Width of the organ in pixels")
@@ -51,9 +49,7 @@ class OrganMetrics(BaseModel):
     aspect_ratio: float = Field(..., description="Height/width ratio")
     # Position metrics
-    relative_position: Dict[str, float] = Field(
-        ..., description="Position relative to image boundaries (0-1 scale)"
-    )
     # Analysis metrics
     mean_intensity: float = Field(..., description="Mean pixel intensity in the organ region")
@@ -90,9 +86,7 @@ class ChestXRaySegmentationTool(BaseTool):
         self.model = self.model.to(self.device)
         self.model.eval()
-        self.transform = torchvision.transforms.Compose(
-            [xrv.datasets.XRayCenterCrop(), xrv.datasets.XRayResizer(512)]
-        )
         self.temp_dir = temp_dir if isinstance(temp_dir, Path) else Path(temp_dir)
         self.temp_dir.mkdir(exist_ok=True)
@@ -115,9 +109,7 @@ class ChestXRaySegmentationTool(BaseTool):
             "Spine": 13,
         }
-    def _align_mask_to_original(
-        self, mask: np.ndarray, original_shape: Tuple[int, int]
-    ) -> np.ndarray:
         """
         Align a mask from the transformed (cropped/resized) space back to the full original image.
         Assumes that the transform does a center crop to a square of side = min(original height, width)
@@ -170,23 +162,17 @@ class ChestXRaySegmentationTool(BaseTool):
             bbox=tuple(map(int, props.bbox)),
             width=int(props.bbox[3] - props.bbox[1]),
             height=int(props.bbox[2] - props.bbox[0]),
-            aspect_ratio=float(
-                (props.bbox[2] - props.bbox[0]) / max(1, props.bbox[3] - props.bbox[1])
-            ),
             relative_position=relative_pos,
             mean_intensity=float(mean_intensity),
             std_intensity=float(std_intensity),
             confidence_score=float(confidence),
         )
-    def _save_visualization(
-        self, original_img: np.ndarray, pred_masks: torch.Tensor, organ_indices: List[int]
-    ) -> str:
         """Save visualization of original image with segmentation masks overlaid."""
         plt.figure(figsize=(10, 10))
-        plt.imshow(
-            original_img, cmap="gray", extent=[0, original_img.shape[1], original_img.shape[0], 0]
-        )
         # Generate color palette for organs
         colors = plt.cm.rainbow(np.linspace(0, 1, len(organ_indices)))
@@ -202,14 +188,10 @@ class ChestXRaySegmentationTool(BaseTool):
                 # Create a colored overlay with transparency
                 colored_mask = np.zeros((*original_img.shape, 4))
                 colored_mask[mask > 0] = (*color[:3], 0.3)
-                plt.imshow(
-                    colored_mask, extent=[0, original_img.shape[1], original_img.shape[0], 0]
-                )
                 # Add legend entry for the organ
-                organ_name = list(self.organ_map.keys())[
-                    list(self.organ_map.values()).index(organ_idx)
-                ]
                 plt.plot([], [], color=color, label=organ_name, linewidth=3)
         plt.title("Segmentation Overlay")
@@ -266,9 +248,7 @@ class ChestXRaySegmentationTool(BaseTool):
             for idx, organ_name in zip(organ_indices, organs):
                 mask = pred_masks[0, idx].cpu().numpy()
                 if mask.sum() > 0:
-                    metrics = self._compute_organ_metrics(
-                        mask, original_img, float(pred_probs[0, idx].mean().cpu())
-                    )
                     if metrics:
                         results[organ_name] = metrics

     area_pixels: int = Field(..., description="Area in pixels")
     area_cm2: float = Field(..., description="Approximate area in cm²")
     centroid: Tuple[float, float] = Field(..., description="(y, x) coordinates of centroid")
+    bbox: Tuple[int, int, int, int] = Field(..., description="Bounding box coordinates (min_y, min_x, max_y, max_x)")
     # Size metrics
     width: int = Field(..., description="Width of the organ in pixels")
     aspect_ratio: float = Field(..., description="Height/width ratio")
     # Position metrics
+    relative_position: Dict[str, float] = Field(..., description="Position relative to image boundaries (0-1 scale)")
     # Analysis metrics
     mean_intensity: float = Field(..., description="Mean pixel intensity in the organ region")
         self.model = self.model.to(self.device)
         self.model.eval()
+        self.transform = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop(), xrv.datasets.XRayResizer(512)])
         self.temp_dir = temp_dir if isinstance(temp_dir, Path) else Path(temp_dir)
         self.temp_dir.mkdir(exist_ok=True)
             "Spine": 13,
         }
+    def _align_mask_to_original(self, mask: np.ndarray, original_shape: Tuple[int, int]) -> np.ndarray:
         """
         Align a mask from the transformed (cropped/resized) space back to the full original image.
         Assumes that the transform does a center crop to a square of side = min(original height, width)
             bbox=tuple(map(int, props.bbox)),
             width=int(props.bbox[3] - props.bbox[1]),
             height=int(props.bbox[2] - props.bbox[0]),
+            aspect_ratio=float((props.bbox[2] - props.bbox[0]) / max(1, props.bbox[3] - props.bbox[1])),
             relative_position=relative_pos,
             mean_intensity=float(mean_intensity),
             std_intensity=float(std_intensity),
             confidence_score=float(confidence),
         )
+    def _save_visualization(self, original_img: np.ndarray, pred_masks: torch.Tensor, organ_indices: List[int]) -> str:
         """Save visualization of original image with segmentation masks overlaid."""
         plt.figure(figsize=(10, 10))
+        plt.imshow(original_img, cmap="gray", extent=[0, original_img.shape[1], original_img.shape[0], 0])
         # Generate color palette for organs
         colors = plt.cm.rainbow(np.linspace(0, 1, len(organ_indices)))
                 # Create a colored overlay with transparency
                 colored_mask = np.zeros((*original_img.shape, 4))
                 colored_mask[mask > 0] = (*color[:3], 0.3)
+                plt.imshow(colored_mask, extent=[0, original_img.shape[1], original_img.shape[0], 0])
                 # Add legend entry for the organ
+                organ_name = list(self.organ_map.keys())[list(self.organ_map.values()).index(organ_idx)]
                 plt.plot([], [], color=color, label=organ_name, linewidth=3)
         plt.title("Segmentation Overlay")
             for idx, organ_name in zip(organ_indices, organs):
                 mask = pred_masks[0, idx].cpu().numpy()
                 if mask.sum() > 0:
+                    metrics = self._compute_organ_metrics(mask, original_img, float(pred_probs[0, idx].mean().cpu()))
                     if metrics:
                         results[organ_name] = metrics

medrax/tools/utils.py CHANGED Viewed

@@ -16,18 +16,10 @@ class ImageVisualizerInput(BaseModel):
     image_path: str = Field(..., description="Path to the image file to display, only supports JPG or PNG images")
     title: Optional[str] = Field(None, description="Optional title to display above the image")
-    description: Optional[str] = Field(
-        None, description="Optional description to display below the image"
-    )
-    width: Optional[int] = Field(
-        10, description="Optional figure width in inches"
-    )
-    height: Optional[int] = Field(
-        10, description="Optional figure height in inches"
-    )
-    cmap: Optional[str] = Field(
-        "rgb", description="Optional colormap to use for displaying the image"
-    )
 class ImageVisualizerTool(BaseTool):
@@ -65,9 +57,7 @@ class ImageVisualizerTool(BaseTool):
         # Add description if provided
         if description:
-            plt.figtext(
-                0.5, 0.01, description, wrap=True, horizontalalignment="center", fontsize=10
-            )
         # Adjust margins to minimize whitespace while preventing overlap
         plt.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95)

     image_path: str = Field(..., description="Path to the image file to display, only supports JPG or PNG images")
     title: Optional[str] = Field(None, description="Optional title to display above the image")
+    description: Optional[str] = Field(None, description="Optional description to display below the image")
+    width: Optional[int] = Field(10, description="Optional figure width in inches")
+    height: Optional[int] = Field(10, description="Optional figure height in inches")
+    cmap: Optional[str] = Field("rgb", description="Optional colormap to use for displaying the image")
 class ImageVisualizerTool(BaseTool):
         # Add description if provided
         if description:
+            plt.figtext(0.5, 0.01, description, wrap=True, horizontalalignment="center", fontsize=10)
         # Adjust margins to minimize whitespace while preventing overlap
         plt.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.95)

medrax/tools/vqa/__init__.py CHANGED Viewed

@@ -1,16 +1,16 @@
 """Visual Question Answering tools for medical images."""
 from .llava_med import LlavaMedTool, LlavaMedInput
-from .xray_vqa import CheXagentXRayVQATool, XRayVQAToolInput
 from .medgemma.medgemma_client import MedGemmaAPIClientTool, MedGemmaVQAInput
 from .medgemma.medgemma_setup import setup_medgemma_env
 __all__ = [
     "LlavaMedTool",
     "LlavaMedInput",
-    "CheXagentXRayVQATool",
     "XRayVQAToolInput",
     "MedGemmaAPIClientTool",
     "MedGemmaVQAInput",
-    "setup_medgemma_env"
-]

 """Visual Question Answering tools for medical images."""
 from .llava_med import LlavaMedTool, LlavaMedInput
+from .xray_vqa import CheXagentXRayVQATool, XRayVQAToolInput
 from .medgemma.medgemma_client import MedGemmaAPIClientTool, MedGemmaVQAInput
 from .medgemma.medgemma_setup import setup_medgemma_env
 __all__ = [
     "LlavaMedTool",
     "LlavaMedInput",
+    "CheXagentXRayVQATool",
     "XRayVQAToolInput",
     "MedGemmaAPIClientTool",
     "MedGemmaVQAInput",
+    "setup_medgemma_env",
+]

medrax/tools/vqa/llava_med.py CHANGED Viewed

@@ -83,13 +83,7 @@ class LlavaMedTool(BaseTool):
         self, question: str, image_path: Optional[str] = None
     ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
         if self.model.config.mm_use_im_start_end:
-            question = (
-                DEFAULT_IM_START_TOKEN
-                + DEFAULT_IMAGE_TOKEN
-                + DEFAULT_IM_END_TOKEN
-                + "\n"
-                + question
-            )
         else:
             question = DEFAULT_IMAGE_TOKEN + "\n" + question
@@ -99,9 +93,7 @@ class LlavaMedTool(BaseTool):
         prompt = conv.get_prompt()
         input_ids = (
-            tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
-            .unsqueeze(0)
-            .cuda()
         )
         image_tensor = None
@@ -147,11 +139,11 @@ class LlavaMedTool(BaseTool):
                 )
             answer = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
             output = {
                 "answer": answer,
             }
             metadata = {
                 "question": question,
                 "image_path": image_path,

         self, question: str, image_path: Optional[str] = None
     ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
         if self.model.config.mm_use_im_start_end:
+            question = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + "\n" + question
         else:
             question = DEFAULT_IMAGE_TOKEN + "\n" + question
         prompt = conv.get_prompt()
         input_ids = (
+            tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).cuda()
         )
         image_tensor = None
                 )
             answer = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
             output = {
                 "answer": answer,
             }
             metadata = {
                 "question": question,
                 "image_path": image_path,

medrax/tools/vqa/xray_vqa.py CHANGED Viewed

@@ -15,13 +15,9 @@ from langchain_core.tools import BaseTool
 class XRayVQAToolInput(BaseModel):
     """Input schema for the CheXagent Tool."""
-    image_paths: List[str] = Field(
-        ..., description="List of paths to chest X-ray images to analyze"
-    )
     prompt: str = Field(..., description="Question or instruction about the chest X-ray images")
-    max_new_tokens: int = Field(
-        512, description="Maximum number of tokens to generate in the response"
-    )
 class CheXagentXRayVQATool(BaseTool):
@@ -99,16 +95,14 @@ class CheXagentXRayVQATool(BaseTool):
         Returns:
             str: Model's response
         """
-        query = self.tokenizer.from_list_format(
-            [*[{"image": path} for path in image_paths], {"text": prompt}]
-        )
         conv = [
             {"from": "system", "value": "You are a helpful assistant."},
             {"from": "human", "value": query},
         ]
-        input_ids = self.tokenizer.apply_chat_template(
-            conv, add_generation_prompt=True, return_tensors="pt"
-        ).to(device=self.device)
         # Run inference
         with torch.inference_mode():

 class XRayVQAToolInput(BaseModel):
     """Input schema for the CheXagent Tool."""
+    image_paths: List[str] = Field(..., description="List of paths to chest X-ray images to analyze")
     prompt: str = Field(..., description="Question or instruction about the chest X-ray images")
+    max_new_tokens: int = Field(512, description="Maximum number of tokens to generate in the response")
 class CheXagentXRayVQATool(BaseTool):
         Returns:
             str: Model's response
         """
+        query = self.tokenizer.from_list_format([*[{"image": path} for path in image_paths], {"text": prompt}])
         conv = [
             {"from": "system", "value": "You are a helpful assistant."},
             {"from": "human", "value": query},
         ]
+        input_ids = self.tokenizer.apply_chat_template(conv, add_generation_prompt=True, return_tensors="pt").to(
+            device=self.device
+        )
         # Run inference
         with torch.inference_mode():

medrax/tools/xray_generation.py CHANGED Viewed

@@ -11,26 +11,15 @@ from langchain_core.tools import BaseTool
 class ChestXRayGeneratorInput(BaseModel):
     """Input schema for the Chest X-Ray Generator Tool."""
     prompt: str = Field(
-        ...,
-        description="Description of the medical condition to generate (e.g., 'big left-sided pleural effusion')"
-    )
-    height: int = Field(
-        512,
-        description="Height of generated image in pixels"
-    )
-    width: int = Field(
-        512,
-        description="Width of generated image in pixels"
-    )
-    num_inference_steps: int = Field(
-        75,
-        description="Number of denoising steps (higher = better quality but slower)"
     )
     guidance_scale: float = Field(
-        4.0,
-        description="How closely to follow the prompt (higher = more faithful but less diverse)"
     )
@@ -60,11 +49,11 @@ class ChestXRayGeneratorTool(BaseTool):
     ):
         """Initialize the chest X-ray generator tool."""
         super().__init__()
         self.device = torch.device(device) if device else "cuda"
         self.model = StableDiffusionPipeline.from_pretrained(model_path, cache_dir=cache_dir)
         self.model = self.model.to(torch.float32).to(self.device)
         self.temp_dir = Path(temp_dir if temp_dir else tempfile.mkdtemp())
         self.temp_dir.mkdir(exist_ok=True)
@@ -97,7 +86,7 @@ class ChestXRayGeneratorTool(BaseTool):
                 num_inference_steps=num_inference_steps,
                 height=height,
                 width=width,
-                guidance_scale=guidance_scale
             )
             # Save generated image
@@ -107,7 +96,7 @@ class ChestXRayGeneratorTool(BaseTool):
             output = {
                 "image_path": str(image_path),
             }
             metadata = {
                 "prompt": prompt,
                 "num_inference_steps": num_inference_steps,
@@ -126,7 +115,7 @@ class ChestXRayGeneratorTool(BaseTool):
                     "prompt": prompt,
                     "analysis_status": "failed",
                     "error_details": str(e),
-                }
             )
     async def _arun(
@@ -139,4 +128,4 @@ class ChestXRayGeneratorTool(BaseTool):
         run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
     ) -> Tuple[Dict[str, str], Dict]:
         """Async version of _run."""
-        return self._run(prompt, num_inference_steps, guidance_scale, height, width)

 class ChestXRayGeneratorInput(BaseModel):
     """Input schema for the Chest X-Ray Generator Tool."""
     prompt: str = Field(
+        ..., description="Description of the medical condition to generate (e.g., 'big left-sided pleural effusion')"
     )
+    height: int = Field(512, description="Height of generated image in pixels")
+    width: int = Field(512, description="Width of generated image in pixels")
+    num_inference_steps: int = Field(75, description="Number of denoising steps (higher = better quality but slower)")
     guidance_scale: float = Field(
+        4.0, description="How closely to follow the prompt (higher = more faithful but less diverse)"
     )
     ):
         """Initialize the chest X-ray generator tool."""
         super().__init__()
         self.device = torch.device(device) if device else "cuda"
         self.model = StableDiffusionPipeline.from_pretrained(model_path, cache_dir=cache_dir)
         self.model = self.model.to(torch.float32).to(self.device)
         self.temp_dir = Path(temp_dir if temp_dir else tempfile.mkdtemp())
         self.temp_dir.mkdir(exist_ok=True)
                 num_inference_steps=num_inference_steps,
                 height=height,
                 width=width,
+                guidance_scale=guidance_scale,
             )
             # Save generated image
             output = {
                 "image_path": str(image_path),
             }
             metadata = {
                 "prompt": prompt,
                 "num_inference_steps": num_inference_steps,
                     "prompt": prompt,
                     "analysis_status": "failed",
                     "error_details": str(e),
+                },
             )
     async def _arun(
         run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
     ) -> Tuple[Dict[str, str], Dict]:
         """Async version of _run."""
+        return self._run(prompt, num_inference_steps, guidance_scale, height, width)