MoizMedgemma27b / app.py
Muhammadidrees's picture
Update app.py
20a300a verified
import os
import gradio as gr
import torch
import threading
from datetime import datetime
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT
import re
print("=" * 50)
print("πŸš€ Starting AI Health Assistant")
print(f"Torch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print("=" * 50)
# ------------------------------------------------------
# πŸ”Ή STEP 1: Authentication for HF Spaces
# ------------------------------------------------------
# Check both possible token names (order matters!)
hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
if not hf_token:
raise ValueError(
"❌ No token found!\n"
"Please add your Hugging Face token in Space Settings β†’ Repository secrets.\n"
"Name it either: HUGGINGFACE_HUB_TOKEN or HF_TOKEN\n"
"Create a token at: https://huggingface.co/settings/tokens"
)
print(f"βœ… HF Token found (length: {len(hf_token)})")
print("πŸ” Logging in to Hugging Face...")
login(token=hf_token)
print("βœ… Login successful!")
# ------------------------------------------------------
# πŸ”Ή STEP 2: Load model and tokenizer
# ------------------------------------------------------
# Option 1: MedGemma (requires access request)
# model_id = "google/medgemma-27b-text-it"
# Option 2: Regular Gemma (no access needed, works immediately)
model_id = "google/gemma-2-9b-it" # Smaller, faster, no access required
# Option 3: Mistral (medical fine-tuned alternative)
# model_id = "mistralai/Mistral-7B-Instruct-v0.3"
print("πŸ”„ Loading tokenizer...")
try:
tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
print("βœ… Tokenizer loaded successfully!")
except Exception as e:
print(f"❌ Error loading tokenizer: {e}")
raise
print("πŸ”„ Loading model... (this may take several minutes)")
try:
model = AutoModelForCausalLM.from_pretrained(
model_id,
token=hf_token,
torch_dtype=torch.bfloat16,
device_map="auto",
low_cpu_mem_usage=True
)
print("βœ… Model loaded successfully!")
except Exception as e:
print(f"❌ Error loading model: {e}")
raise
# ------------------------------------------------------
# πŸ”Ή STEP 3: Input validation helpers
# ------------------------------------------------------
def validate_numeric(value, name, min_val=0, max_val=None):
"""Validate numeric input"""
try:
num = float(value)
if num < min_val:
return False, f"{name} must be >= {min_val}"
if max_val and num > max_val:
return False, f"{name} must be <= {max_val}"
return True, num
except (ValueError, TypeError):
return False, f"{name} must be a valid number"
# ------------------------------------------------------
# πŸ”Ή STEP 4: PDF Generation Function
# ------------------------------------------------------
def parse_markdown_table(md_text):
"""Extract and parse markdown tables from text"""
tables = []
lines = md_text.split('\n')
i = 0
while i < len(lines):
line = lines[i].strip()
if '|' in line and line.startswith('|'):
table_lines = [line]
i += 1
if i < len(lines) and '|' in lines[i]:
i += 1
while i < len(lines) and '|' in lines[i] and lines[i].strip():
table_lines.append(lines[i].strip())
i += 1
parsed_table = []
for tline in table_lines:
cells = [cell.strip() for cell in tline.split('|')[1:-1]]
if cells:
parsed_table.append(cells)
if len(parsed_table) > 1:
tables.append(parsed_table)
i += 1
return tables
def create_pdf_report(patient_data, biomarkers, ai_response, filename="health_report.pdf"):
"""Generate a professional PDF report"""
doc = SimpleDocTemplate(filename, pagesize=letter,
topMargin=0.5*inch, bottomMargin=0.5*inch,
leftMargin=0.75*inch, rightMargin=0.75*inch)
story = []
styles = getSampleStyleSheet()
# Custom styles
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=24,
textColor=colors.HexColor('#1a5490'),
spaceAfter=30,
alignment=TA_CENTER,
fontName='Helvetica-Bold'
)
heading_style = ParagraphStyle(
'CustomHeading',
parent=styles['Heading2'],
fontSize=14,
textColor=colors.HexColor('#2c5aa0'),
spaceAfter=12,
spaceBefore=12,
fontName='Helvetica-Bold'
)
subheading_style = ParagraphStyle(
'CustomSubHeading',
parent=styles['Heading3'],
fontSize=12,
textColor=colors.HexColor('#444444'),
spaceAfter=10,
spaceBefore=10,
fontName='Helvetica-Bold'
)
normal_style = ParagraphStyle(
'CustomNormal',
parent=styles['Normal'],
fontSize=10,
spaceAfter=8,
leading=14
)
# Title
story.append(Paragraph("AI Health Assessment Report", title_style))
story.append(Paragraph(f"Generated on: {datetime.now().strftime('%B %d, %Y at %I:%M %p')}",
ParagraphStyle('Date', parent=styles['Normal'], fontSize=9,
textColor=colors.grey, alignment=TA_CENTER)))
story.append(Spacer(1, 0.3*inch))
# Patient Information Section
story.append(Paragraph("Patient Information", heading_style))
patient_table_data = [
['Age', f"{patient_data['age']} years", 'Gender', patient_data['gender']],
['Height', f"{patient_data['height']} cm", 'Weight', f"{patient_data['weight']} kg"],
['BMI', f"{patient_data['weight'] / ((patient_data['height']/100)**2):.1f}", 'Report ID', f"RPT-{datetime.now().strftime('%Y%m%d%H%M')}"]
]
patient_table = Table(patient_table_data, colWidths=[1.2*inch, 1.8*inch, 1.2*inch, 1.8*inch])
patient_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, -1), colors.HexColor('#f0f4f8')),
('TEXTCOLOR', (0, 0), (-1, -1), colors.black),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),
('FONTSIZE', (0, 0), (-1, -1), 10),
('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
('FONTNAME', (2, 0), (2, -1), 'Helvetica-Bold'),
('BOTTOMPADDING', (0, 0), (-1, -1), 8),
('TOPPADDING', (0, 0), (-1, -1), 8),
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
]))
story.append(patient_table)
story.append(Spacer(1, 0.2*inch))
# Biomarkers Input Section
story.append(Paragraph("Laboratory Biomarkers - Input Values", heading_style))
biomarker_table_data = [
['Biomarker', 'Value', 'Unit', 'Biomarker', 'Value', 'Unit']
]
biomarker_list = [
('Albumin', biomarkers['albumin'], 'g/dL'),
('Creatinine', biomarkers['creatinine'], 'mg/dL'),
('Glucose', biomarkers['glucose'], 'mg/dL'),
('CRP', biomarkers['crp'], 'mg/L'),
('MCV', biomarkers['mcv'], 'fL'),
('RDW', biomarkers['rdw'], '%'),
('ALP', biomarkers['alp'], 'U/L'),
('WBC', biomarkers['wbc'], 'x10^3/uL'),
('Lymphocytes', biomarkers['lymphocytes'], '%'),
('Hemoglobin', biomarkers['hb'], 'g/dL'),
('Plasma (PV)', biomarkers['pv'], 'mL'),
]
for i in range(0, len(biomarker_list), 2):
row = list(biomarker_list[i])
if i + 1 < len(biomarker_list):
row.extend(list(biomarker_list[i + 1]))
else:
row.extend(['', '', ''])
biomarker_table_data.append(row)
biomarker_table = Table(biomarker_table_data, colWidths=[1.4*inch, 0.9*inch, 0.7*inch, 1.4*inch, 0.9*inch, 0.7*inch])
biomarker_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#2c5aa0')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 11),
('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
('FONTSIZE', (0, 1), (-1, -1), 9),
('BACKGROUND', (0, 1), (-1, -1), colors.white),
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#f8f9fa')]),
('TOPPADDING', (0, 0), (-1, -1), 6),
('BOTTOMPADDING', (0, 0), (-1, -1), 6),
]))
story.append(biomarker_table)
story.append(Spacer(1, 0.3*inch))
# AI Analysis Section
story.append(PageBreak())
story.append(Paragraph("AI-Generated Health Analysis", heading_style))
story.append(Spacer(1, 0.1*inch))
sections = ai_response.split('###')
for section in sections:
if not section.strip():
continue
lines = section.strip().split('\n')
section_title = lines[0].strip()
section_content = '\n'.join(lines[1:]).strip()
if section_title:
story.append(Paragraph(section_title, subheading_style))
if '|' in section_content:
tables = parse_markdown_table(section_content)
for table_data in tables:
if len(table_data) > 1:
pdf_table = Table(table_data, repeatRows=1)
table_style = TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#2c5aa0')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 9),
('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
('FONTSIZE', (0, 1), (-1, -1), 8),
('BACKGROUND', (0, 1), (-1, -1), colors.white),
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#f8f9fa')]),
('TOPPADDING', (0, 0), (-1, -1), 6),
('BOTTOMPADDING', (0, 0), (-1, -1), 6),
('VALIGN', (0, 0), (-1, -1), 'TOP'),
])
pdf_table.setStyle(table_style)
story.append(pdf_table)
story.append(Spacer(1, 0.15*inch))
text_content = re.sub(r'\|[^\n]+\n', '', section_content)
text_content = re.sub(r'\n\s*\n', '\n', text_content).strip()
if text_content:
for para in text_content.split('\n'):
if para.strip():
story.append(Paragraph(para.strip(), normal_style))
else:
for para in section_content.split('\n'):
if para.strip():
story.append(Paragraph(para.strip(), normal_style))
story.append(Spacer(1, 0.1*inch))
# Footer/Disclaimer
story.append(Spacer(1, 0.2*inch))
disclaimer_style = ParagraphStyle(
'Disclaimer',
parent=styles['Normal'],
fontSize=8,
textColor=colors.HexColor('#666666'),
alignment=TA_CENTER,
borderWidth=1,
borderColor=colors.HexColor('#cccccc'),
borderPadding=10,
backColor=colors.HexColor('#fffef0')
)
story.append(Paragraph(
"<b>IMPORTANT DISCLAIMER:</b> This report is generated by an AI system for informational purposes only. "
"It is NOT a medical diagnosis and should NOT replace professional medical advice. "
"Always consult with qualified healthcare professionals for medical decisions and treatment.",
disclaimer_style
))
doc.build(story)
return filename
# ------------------------------------------------------
# πŸ”Ή STEP 5: Define the model interaction function
# ------------------------------------------------------
def respond(albumin, creatinine, glucose, crp, mcv, rdw, alp, wbc, lymphocytes, hb, pv, age, gender, height, weight):
try:
# Validate all inputs
validations = [
validate_numeric(albumin, "Albumin", 0, 10),
validate_numeric(creatinine, "Creatinine", 0, 20),
validate_numeric(glucose, "Glucose", 0, 1000),
validate_numeric(crp, "CRP", 0, 500),
validate_numeric(mcv, "MCV", 0, 200),
validate_numeric(rdw, "RDW", 0, 50),
validate_numeric(alp, "ALP", 0, 1000),
validate_numeric(wbc, "WBC", 0, 100),
validate_numeric(lymphocytes, "Lymphocytes", 0, 100),
validate_numeric(hb, "Hemoglobin", 0, 25),
validate_numeric(pv, "Plasma", 0, 10000),
validate_numeric(age, "Age", 0, 150),
validate_numeric(height, "Height", 0, 300),
validate_numeric(weight, "Weight", 0, 500),
]
for is_valid, result in validations:
if not is_valid:
return f"❌ Validation Error: {result}", None
validated_values = [result for is_valid, result in validations]
albumin, creatinine, glucose, crp, mcv, rdw, alp, wbc, lymphocytes, hb, pv, age, height, weight = validated_values
system_message = (
"You are an AI Health Assistant that analyzes laboratory biomarkers "
"and generates structured, patient-friendly health summaries.\n\n"
"Your task is to evaluate the provided biomarkers and generate an AI-driven medical report "
"with insights, observations, and clear explanations.\n"
"You must strictly follow this structured format:\n\n"
"### Tabular Mapping\n"
"- Always include a Markdown table with exactly five columns:\n"
"| Biomarker | Value | Status (Low/Normal/High) | AI-Inferred Insight | Reference Range |\n"
"- Include **all available biomarkers** below:\n"
"Albumin, Creatinine, Glucose, CRP, MCV, RDW, ALP, WBC, Lymphocytes, Hemoglobin, Plasma (PV)\n"
"- The first row after the header must begin directly with 'Albumin'.\n"
"- Each biomarker must appear exactly once as a separate row.\n\n"
"### Executive Summary\n"
"- List Top 3 Health Priorities.\n"
"- Highlight Key Strengths or normal biomarkers.\n\n"
"### System-Specific Analysis\n"
"- Summarize findings grouped by organ systems (Liver, Kidney, Immune, Blood, etc.).\n"
"- Status: 'Optimal' | 'Monitor' | 'Needs Attention'.\n"
"- Provide 2-3 sentences of explanation in plain, supportive language.\n\n"
"### Personalized Action Plan\n"
"- Provide categorized recommendations (Nutrition, Lifestyle, Testing, Medical Consultation).\n"
"- Never recommend medication or treatment.\n\n"
"### Interaction Alerts\n"
"- Highlight potential relationships between markers (e.g., high CRP + low Albumin).\n\n"
"### Constraints\n"
"- Never give a diagnosis or prescribe medicine.\n"
"- Never use data not present in the input.\n"
"- Always recommend consulting a healthcare professional.\n"
"- Always include normal reference ranges for each biomarker.\n"
"- Use simple, clear, patient-friendly language.\n"
"- Provide additional explanation instead of just writing direct points.\n"
"- Be concise and avoid repetition."
)
user_message = (
f"Patient Info:\n"
f"- Age: {age} years\n"
f"- Gender: {gender}\n"
f"- Height: {height} cm\n"
f"- Weight: {weight} kg\n\n"
f"Biomarkers:\n"
f"- Albumin: {albumin} g/dL\n"
f"- Creatinine: {creatinine} mg/dL\n"
f"- Glucose: {glucose} mg/dL\n"
f"- CRP: {crp} mg/L\n"
f"- MCV: {mcv} fL\n"
f"- RDW: {rdw} %\n"
f"- ALP: {alp} U/L\n"
f"- WBC: {wbc} x10^3/ΞΌL\n"
f"- Lymphocytes: {lymphocytes} %\n"
f"- Hemoglobin: {hb} g/dL\n"
f"- Plasma (PV): {pv} mL"
)
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": user_message}
]
encodings = tokenizer.apply_chat_template(
messages,
return_tensors="pt",
add_special_tokens=True,
padding=True,
truncation=True
).to(model.device)
attention_mask = encodings.ne(tokenizer.pad_token_id)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
input_ids=encodings,
attention_mask=attention_mask,
max_new_tokens=2000,
temperature=0.2,
top_p=0.9,
do_sample=True,
streamer=streamer,
pad_token_id=tokenizer.pad_token_id
)
thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
response = ""
for token in streamer:
response += token
thread.join()
patient_data = {
'age': age,
'gender': gender,
'height': height,
'weight': weight
}
biomarkers = {
'albumin': albumin,
'creatinine': creatinine,
'glucose': glucose,
'crp': crp,
'mcv': mcv,
'rdw': rdw,
'alp': alp,
'wbc': wbc,
'lymphocytes': lymphocytes,
'hb': hb,
'pv': pv
}
pdf_filename = f"health_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
pdf_path = create_pdf_report(patient_data, biomarkers, response, pdf_filename)
return response, pdf_path
except Exception as e:
import traceback
error_msg = f"❌ Error generating report: {str(e)}\n\n{traceback.format_exc()}"
print(error_msg)
return error_msg, None
# ------------------------------------------------------
# πŸ”Ή STEP 6: Gradio UI
# ------------------------------------------------------
with gr.Blocks(theme=gr.themes.Soft(), css="""
.output-markdown table { border-collapse: collapse; width: 100%; margin: 20px 0; }
.output-markdown th { background-color: #2c5aa0; color: white; padding: 12px; text-align: left; }
.output-markdown td { padding: 10px; border: 1px solid #ddd; }
.output-markdown tr:nth-child(even) { background-color: #f8f9fa; }
.output-markdown h3 { color: #2c5aa0; margin-top: 20px; }
""") as demo:
gr.Markdown("# πŸ§ͺ AI Health Assistant with PDF Export")
gr.Markdown("*Analyze biomarkers with AI-powered insights and download a professional PDF report.*")
with gr.Row():
with gr.Column():
gr.Markdown("### πŸ“Š Biomarkers")
albumin = gr.Number(label="Albumin (g/dL)", value=4.5, minimum=0, maximum=10)
creatinine = gr.Number(label="Creatinine (mg/dL)", value=1.5, minimum=0, maximum=20)
glucose = gr.Number(label="Glucose (mg/dL, fasting)", value=160, minimum=0, maximum=1000)
crp = gr.Number(label="CRP (mg/L)", value=2.5, minimum=0, maximum=500)
mcv = gr.Number(label="MCV (fL)", value=90, minimum=0, maximum=200)
rdw = gr.Number(label="RDW (%)", value=13, minimum=0, maximum=50)
alp = gr.Number(label="ALP (U/L)", value=70, minimum=0, maximum=1000)
wbc = gr.Number(label="WBC (10^3/ΞΌL)", value=7.5, minimum=0, maximum=100)
lymphocytes = gr.Number(label="Lymphocytes (%)", value=30, minimum=0, maximum=100)
hb = gr.Number(label="Hemoglobin (g/dL)", value=14.5, minimum=0, maximum=25)
pv = gr.Number(label="Plasma (PV) (mL)", value=3000, minimum=0, maximum=10000)
with gr.Column():
gr.Markdown("### πŸ‘€ Patient Information")
age = gr.Number(label="Age (years)", value=30, minimum=0, maximum=150)
gender = gr.Dropdown(choices=["Male", "Female"], label="Gender", value="Male")
height = gr.Number(label="Height (cm)", value=170, minimum=0, maximum=300)
weight = gr.Number(label="Weight (kg)", value=70, minimum=0, maximum=500)
btn = gr.Button("πŸ”¬ Generate Health Report & PDF", variant="primary", size="lg")
with gr.Row():
output = gr.Markdown(label="AI Health Report")
with gr.Row():
pdf_output = gr.File(label="πŸ“„ Download PDF Report")
gr.Markdown("---")
gr.Markdown("⚠️ **Disclaimer:** This tool provides informational insights only and is not a substitute for professional medical advice.")
btn.click(
respond,
inputs=[albumin, creatinine, glucose, crp, mcv, rdw, alp, wbc, lymphocytes, hb, pv, age, gender, height, weight],
outputs=[output, pdf_output]
)
# ------------------------------------------------------
# πŸ”Ή STEP 7: Launch for Hugging Face Spaces
# ------------------------------------------------------
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)