Spaces:

yipengsun
/

diagnostic-devils-advocate

Running on Zero

yipengsun commited on Jan 28

Commit

c8dea05

1 Parent(s): c0fff99

Refactor bias detection and output parsing; update requirements

- Replaced regex-based sign extraction with MedGemma integration in bias_detector.py.
- Enhanced output_parser.py to utilize llm_output_parser for improved JSON extraction.
- Updated prompts.py to clarify bias source options.
- Added llm-output-parser dependency in requirements.txt.

Files changed (4) hide show

agents/bias_detector.py +27 -39
agents/output_parser.py +13 -1
agents/prompts.py +1 -1
requirements.txt +1 -0

agents/bias_detector.py CHANGED Viewed

@@ -4,7 +4,7 @@ Runs MedSigLIP sign verification on imaging findings mentioned by the Diagnostic
 Outputs structured JSON.
 """
-import re
 import logging
 from agents.state import PipelineState
 from agents.prompts import BIAS_DETECTOR_SYSTEM, BIAS_DETECTOR_USER
@@ -13,22 +13,21 @@ from models import medgemma_client, medsiglip_client
 logger = logging.getLogger(__name__)
-# Common imaging signs that SigLIP can meaningfully evaluate on chest X-ray.
-# These are visual patterns, not abstract diagnoses.
-_KNOWN_SIGNS = [
-    "pleural effusion", "consolidation", "infiltrates", "pneumothorax",
-    "widened mediastinum", "cardiomegaly", "pulmonary edema", "atelectasis",
-    "rib fracture", "subcutaneous emphysema", "hilar enlargement",
-    "hyperinflation", "pleural thickening", "lung opacity", "air bronchogram",
-    "mediastinal shift", "tracheal deviation", "cephalization",
-]
-def _extract_signs(findings: object) -> list[str]:
-    """Extract imaging signs mentioned in the Diagnostician's findings.
-    Matches against known radiological signs rather than parsing diagnoses.
-    """
     if isinstance(findings, list):
         chunks: list[str] = []
         for item in findings:
@@ -41,31 +40,20 @@ def _extract_signs(findings: object) -> list[str]:
     else:
         findings_text = str(findings)
-    findings_lower = findings_text.lower()
-    found = []
-    for sign in _KNOWN_SIGNS:
-        if sign in findings_lower:
-            found.append(sign)
-    # Also extract any explicit "abnormal" findings with simple patterns
-    # e.g., "visible pleural line", "blunted costophrenic angle"
-    extra_patterns = [
-        r'(?:visible|subtle|small|large|bilateral|unilateral|left|right)\s+([\w\s]{5,30}?)(?:\.|,|;|\n)',
-    ]
-    for pat in extra_patterns:
-        for m in re.findall(pat, findings_lower):
-            cleaned = m.strip()
-            if cleaned not in found and len(cleaned) > 5:
-                found.append(cleaned)
-    # Deduplicate, limit to 8
-    seen = set()
-    unique = []
-    for s in found:
-        if s not in seen:
-            seen.add(s)
-            unique.append(s)
-    return unique[:8]
 def run(state: PipelineState) -> PipelineState:

 Outputs structured JSON.
 """
+import json
 import logging
 from agents.state import PipelineState
 from agents.prompts import BIAS_DETECTOR_SYSTEM, BIAS_DETECTOR_USER
 logger = logging.getLogger(__name__)
+_SIGN_EXTRACTION_PROMPT = """\
+Extract radiological signs (imaging abnormalities) from the following diagnostic findings.
+Return ONLY a JSON array of short sign names.
+Rules:
+- Only include imaging abnormalities that could be visually verified on a medical image.
+- Do NOT include normal anatomical structures, abstract diagnoses, clinical impressions, or treatment recommendations.
+- Maximum 8 signs. If more exist, keep the most clinically significant ones.
+- Return an empty array [] if no signs are found.
+Findings:
+{findings_text}"""
+def _extract_signs(findings: object) -> list[str]:
+    """Extract signs from findings using MedGemma."""
     if isinstance(findings, list):
         chunks: list[str] = []
         for item in findings:
     else:
         findings_text = str(findings)
+    if not findings_text.strip():
+        return []
+    try:
+        raw = medgemma_client.generate_text(
+            _SIGN_EXTRACTION_PROMPT.format(findings_text=findings_text),
+        )
+        parsed = json.loads(raw.strip().strip("`").removeprefix("json").strip())
+        if isinstance(parsed, list):
+            return [str(s).strip().lower() for s in parsed if isinstance(s, str)][:8]
+    except (json.JSONDecodeError, Exception) as e:
+        logger.warning("LLM sign extraction failed, raw output: %s — %s", raw, e)
+    return []
 def run(state: PipelineState) -> PipelineState:

agents/output_parser.py CHANGED Viewed

@@ -1,11 +1,13 @@
 """
 JSON output parser for LLM responses.
-Uses json_repair to handle malformed JSON (missing commas, truncation, extra text, etc.).
 """
 import logging
 from collections.abc import Mapping
 from json_repair import repair_json
 logger = logging.getLogger(__name__)
@@ -48,6 +50,16 @@ def parse_json_response(text: str) -> dict:
     if isinstance(result, list):
         return _coerce_list_root(result)
     raise ValueError(
         f"Could not parse JSON from LLM output (got {type(result).__name__}, length={len(text)})"
     )

 """
 JSON output parser for LLM responses.
+Uses json_repair for malformed JSON, and llm_output_parser as fallback
+to extract JSON from mixed text/markdown LLM output.
 """
 import logging
 from collections.abc import Mapping
 from json_repair import repair_json
+from llm_output_parser import parse_json as extract_json
 logger = logging.getLogger(__name__)
     if isinstance(result, list):
         return _coerce_list_root(result)
+    # Fallback: json_repair returned a plain string (model output natural language).
+    # Use llm_output_parser to extract JSON from mixed text/markdown.
+    if isinstance(result, str):
+        logger.warning("json_repair returned str, trying llm_output_parser extraction")
+        extracted = extract_json(text, allow_incomplete=True, strict=False)
+        if isinstance(extracted, Mapping):
+            return dict(extracted)
+        if isinstance(extracted, list):
+            return _coerce_list_root(extracted)
     raise ValueError(
         f"Could not parse JSON from LLM output (got {type(result).__name__}, length={len(text)})"
     )

agents/prompts.py CHANGED Viewed

@@ -56,7 +56,7 @@ Compare both assessments objectively. Neither is assumed correct. Respond with J
   "discrepancy_summary": "how the two assessments differ — note which points are uncertain",
   "identified_biases": [
     {{
-      "source": "doctor | AI | both",
       "type": "bias type",
       "evidence": "why you suspect this bias",
       "severity": "choose from LOW | MEDIUM | HIGH"

   "discrepancy_summary": "how the two assessments differ — note which points are uncertain",
   "identified_biases": [
     {{
+      "source": "choose from HUMAN | AI | BOTH",
       "type": "bias type",
       "evidence": "why you suspect this bias",
       "severity": "choose from LOW | MEDIUM | HIGH"

requirements.txt CHANGED Viewed

@@ -8,3 +8,4 @@ Pillow>=10.0.0
 numpy>=1.24.0
 scipy>=1.10.0
 json-repair>=0.30.0

 numpy>=1.24.0
 scipy>=1.10.0
 json-repair>=0.30.0
+llm-output-parser>=0.3.0