Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from PIL import Image | |
| import pytesseract | |
| import os | |
| from langchain_huggingface import HuggingFaceEndpoint | |
| from langchain.chains import LLMChain | |
| from langchain_core.prompts import PromptTemplate | |
| import re | |
| import json | |
| # Set up the Hugging Face API key | |
| api_key = os.environ.get("HFBearer") | |
| os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key | |
| # API URL and headers | |
| API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud" | |
| # Function to extract text from image | |
| def extract_text_from_image(image): | |
| return pytesseract.image_to_string(image) | |
| # Function to extract JSON from text | |
| def extract_json(text): | |
| match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL) | |
| if match: | |
| json_str = match.group(1) | |
| try: | |
| return json.loads(json_str) | |
| except json.JSONDecodeError: | |
| return "Error decoding JSON" | |
| return "No JSON found" | |
| # Function to get metadata title from image | |
| def get_image_metadata(image): | |
| return image.name.split('.')[0] | |
| def count_tokens(text): | |
| return len(text.split()) | |
| # Mapping of image parameters to expected fields | |
| image_params = { | |
| "bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique", | |
| "bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin", | |
| "ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute, valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm", | |
| "echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides", | |
| "echographie-poumons": "medecin_responsable, score calcique, technique, resultats", | |
| "echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion", | |
| "echotomographie-cardiaque": "medecin_responsable, taille, poids, surface_corporelle, conclusion", | |
| "echotomographie-prostate": "medecin_responsable, vessie, ureteres, prostate, conclusion", | |
| "hematologie": "medecin_responsable, leucocytes, hematies, hemoglobines, hematocrite" | |
| } | |
| # Streamlit app layout | |
| st.title("Medical Patient Data Extractor") | |
| st.write("This app extracts medical patient data from uploaded images.") | |
| # User prompt template | |
| user_input = """ | |
| You will extract parameters from a text inside a JSON object, written between <JSON> and </JSON>. | |
| List of parameters: {parameters} | |
| Here is an example of a valid response: | |
| <JSON> | |
| {{"date_naissance": "", "prenom": "", "nom": ""}} | |
| </JSON> | |
| Here is the text from which you need to extract the parameters: | |
| {texte} | |
| """ | |
| prompt = PromptTemplate.from_template(user_input) | |
| # Initialize Hugging Face LLM | |
| llm = HuggingFaceEndpoint(endpoint_url=API_URL) | |
| llm_chain = prompt | llm | |
| # File uploader for multiple images | |
| uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True) | |
| if st.button("Submit"): | |
| if uploaded_images: | |
| all_json_data = {} # Dictionary to store JSON data for each image | |
| for uploaded_image in uploaded_images: | |
| with st.spinner(f"Extracting text from image: {uploaded_image.name}..."): | |
| image = Image.open(uploaded_image) | |
| # Display the uploaded image | |
| st.image(image, caption=f"Uploaded Image: {uploaded_image.name}", use_column_width=True) | |
| extracted_text = extract_text_from_image(image) | |
| st.text_area(f"Extracted Text from {uploaded_image.name}", value=extracted_text, height=200, key=f"{uploaded_image.name}") | |
| max_text_length = 500 # Adjust as needed | |
| if count_tokens(extracted_text) > max_text_length: | |
| extracted_text = " ".join(extracted_text.split()[:max_text_length]) | |
| title = get_image_metadata(uploaded_image) | |
| parameters = image_params.get(title, "Unknown parameters") | |
| with st.spinner(f"Fetching response from API for {uploaded_image.name}..."): | |
| output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters}) | |
| st.success(f"Response received for {uploaded_image.name}!") | |
| # Extract JSON from the API output | |
| json_data = extract_json(output) | |
| all_json_data[title] = json_data | |
| st.write(f"**{title} JSON Data:**") | |
| st.json(json_data) # Display JSON nicely | |
| st.write("All extracted JSON Data:") | |
| st.json(all_json_data) # Display all extracted JSON data together | |
| else: | |
| st.warning("Please upload at least one image to extract text.") |