Spaces:

Johnyquest7
/

medical-transcription-notes

Running on Zero

App Files Files Community

Johnyquest7 commited on Jun 28

Commit

e29a040

verified ·

1 Parent(s): 351ee26

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -23

app.py CHANGED Viewed

@@ -282,7 +282,7 @@ TRANSCRIPT: ''' + transcription
             outputs = loaded_llm_model.generate(
                 **inputs,
                 max_new_tokens=2048,
-                temperature=0.1,  # Lower temperature for more consistent medical notes
                 do_sample=True,
                 top_p=0.9,
                 repetition_penalty=1.1,
@@ -323,32 +323,79 @@ TRANSCRIPT: ''' + transcription
     except Exception as e:
         return f"Error generating medical note: {str(e)}"
-def save_audio_file(audio_data):
-    """Save recorded audio to downloadable file"""
-    if audio_data is None:
         return None
     try:
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        filename = f"recording_{timestamp}.wav"
-        sample_rate, audio_array = audio_data
-        # Ensure audio_array is in the right format
-        if isinstance(audio_array, np.ndarray):
-            if len(audio_array.shape) > 1:
-                audio_array = audio_array.mean(axis=1)  # Convert to mono
-            audio_tensor = torch.from_numpy(audio_array).float()
-        else:
-            audio_tensor = torch.tensor(audio_array).float()
-        # Create temporary file
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
-            torchaudio.save(tmp_file.name, audio_tensor.unsqueeze(0), sample_rate)
-            return tmp_file.name
     except Exception as e:
-        print(f"Error saving audio: {e}")
         return None
 # Available models
@@ -364,7 +411,9 @@ WHISPER_MODELS = [
 MEDICAL_LLM_MODELS = [
     "OnDeviceMedNotes/JT_latest_model",
     "OnDeviceMedNotes/Medical_Summary_Notes",
-    "OnDeviceMedNotes/Struct_Med_Note_v01"
 ]
 # Create Gradio interface
@@ -412,7 +461,7 @@ with gr.Blocks(title="Medical Transcription & Note Generation", theme=gr.themes.
             # LLM model selection
             llm_model = gr.Dropdown(
                 choices=MEDICAL_LLM_MODELS,
-                value="OnDeviceMedNotes/Struct_Med_Note_v01",
                 label="Select Medical LLM Model"
             )
@@ -474,11 +523,22 @@ with gr.Blocks(title="Medical Transcription & Note Generation", theme=gr.themes.
         if audio_data is None:
             return None, gr.update(visible=False)
-        file_path = save_audio_file(audio_data)
         if file_path:
             return file_path, gr.update(visible=True)
         return None, gr.update(visible=False)
     download_audio_btn.click(
         fn=prepare_audio_download,
         inputs=audio_input,

             outputs = loaded_llm_model.generate(
                 **inputs,
                 max_new_tokens=2048,
+                temperature=0.3,  # Lower temperature for more consistent medical notes
                 do_sample=True,
                 top_p=0.9,
                 repetition_penalty=1.1,
     except Exception as e:
         return f"Error generating medical note: {str(e)}"
+# Global variable to store original audio for download
+original_audio_data = None
+def store_original_audio(audio_data):
+    """Store the original audio data for download"""
+    global original_audio_data
+    original_audio_data = audio_data
+    return audio_data
+def save_original_audio():
+    """Save the stored original audio without any processing"""
+    global original_audio_data
+    if original_audio_data is None:
         return None
     try:
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        sample_rate, audio_array = original_audio_data
+        # Create temporary file with timestamp
+        temp_dir = tempfile.gettempdir()
+        filename = f"medical_recording_{timestamp}.wav"
+        filepath = os.path.join(temp_dir, filename)
+        # Method 1: Try using scipy.io.wavfile (preserves original format best)
+        try:
+            import scipy.io.wavfile as wavfile
+            wavfile.write(filepath, sample_rate, audio_array)
+            return filepath
+        except:
+            pass
+        # Method 2: Fallback to torchaudio with minimal processing
+        try:
+            if isinstance(audio_array, np.ndarray):
+                audio_tensor = torch.from_numpy(audio_array.copy())  # Copy to avoid modifications
+            else:
+                audio_tensor = torch.tensor(audio_array)
+            # Handle tensor dimensions
+            if len(audio_tensor.shape) == 1:
+                audio_tensor = audio_tensor.unsqueeze(0)  # Add channel dimension
+            elif len(audio_tensor.shape) == 2:
+                if audio_tensor.shape[0] > audio_tensor.shape[1]:
+                    audio_tensor = audio_tensor.T  # Transpose if needed
+            # Save with original data type preservation
+            torchaudio.save(
+                filepath,
+                audio_tensor,
+                sample_rate,
+                encoding="PCM_S",
+                bits_per_sample=16
+            )
+            return filepath
+        except Exception as e:
+            print(f"Torchaudio save failed: {e}")
+        # Method 3: Last resort - use soundfile
+        try:
+            import soundfile as sf
+            sf.write(filepath, audio_array, sample_rate)
+            return filepath
+        except Exception as e:
+            print(f"Soundfile save failed: {e}")
+        return None
     except Exception as e:
+        print(f"Error saving original audio: {e}")
         return None
 # Available models
 MEDICAL_LLM_MODELS = [
     "OnDeviceMedNotes/JT_latest_model",
     "OnDeviceMedNotes/Medical_Summary_Notes",
+    "Johnyquest7/combined_hpi",
+    "OnDeviceMedNotes/Struct_Med_Note_v01",
+    "meta-llama/Llama-3.2-1B-Instruct",
 ]
 # Create Gradio interface
             # LLM model selection
             llm_model = gr.Dropdown(
                 choices=MEDICAL_LLM_MODELS,
+                value="OnDeviceMedNotes/JT_latest_model",
                 label="Select Medical LLM Model"
             )
         if audio_data is None:
             return None, gr.update(visible=False)
+        # Store the original audio first
+        store_original_audio(audio_data)
+        # Save the original audio without processing
+        file_path = save_original_audio()
         if file_path:
             return file_path, gr.update(visible=True)
         return None, gr.update(visible=False)
+    # Audio input change handler to store original audio
+    audio_input.change(
+        fn=store_original_audio,
+        inputs=audio_input,
+        outputs=None
+    )
     download_audio_btn.click(
         fn=prepare_audio_download,
         inputs=audio_input,