Johnyquest7 commited on
Commit
e29a040
·
verified ·
1 Parent(s): 351ee26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -23
app.py CHANGED
@@ -282,7 +282,7 @@ TRANSCRIPT: ''' + transcription
282
  outputs = loaded_llm_model.generate(
283
  **inputs,
284
  max_new_tokens=2048,
285
- temperature=0.1, # Lower temperature for more consistent medical notes
286
  do_sample=True,
287
  top_p=0.9,
288
  repetition_penalty=1.1,
@@ -323,32 +323,79 @@ TRANSCRIPT: ''' + transcription
323
  except Exception as e:
324
  return f"Error generating medical note: {str(e)}"
325
 
326
- def save_audio_file(audio_data):
327
- """Save recorded audio to downloadable file"""
328
- if audio_data is None:
 
 
 
 
 
 
 
 
 
 
 
329
  return None
330
 
331
  try:
332
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
333
- filename = f"recording_{timestamp}.wav"
334
 
335
- sample_rate, audio_array = audio_data
336
-
337
- # Ensure audio_array is in the right format
338
- if isinstance(audio_array, np.ndarray):
339
- if len(audio_array.shape) > 1:
340
- audio_array = audio_array.mean(axis=1) # Convert to mono
341
- audio_tensor = torch.from_numpy(audio_array).float()
342
- else:
343
- audio_tensor = torch.tensor(audio_array).float()
344
-
345
- # Create temporary file
346
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
347
- torchaudio.save(tmp_file.name, audio_tensor.unsqueeze(0), sample_rate)
348
- return tmp_file.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
 
 
 
350
  except Exception as e:
351
- print(f"Error saving audio: {e}")
352
  return None
353
 
354
  # Available models
@@ -364,7 +411,9 @@ WHISPER_MODELS = [
364
  MEDICAL_LLM_MODELS = [
365
  "OnDeviceMedNotes/JT_latest_model",
366
  "OnDeviceMedNotes/Medical_Summary_Notes",
367
- "OnDeviceMedNotes/Struct_Med_Note_v01"
 
 
368
  ]
369
 
370
  # Create Gradio interface
@@ -412,7 +461,7 @@ with gr.Blocks(title="Medical Transcription & Note Generation", theme=gr.themes.
412
  # LLM model selection
413
  llm_model = gr.Dropdown(
414
  choices=MEDICAL_LLM_MODELS,
415
- value="OnDeviceMedNotes/Struct_Med_Note_v01",
416
  label="Select Medical LLM Model"
417
  )
418
 
@@ -474,11 +523,22 @@ with gr.Blocks(title="Medical Transcription & Note Generation", theme=gr.themes.
474
  if audio_data is None:
475
  return None, gr.update(visible=False)
476
 
477
- file_path = save_audio_file(audio_data)
 
 
 
 
478
  if file_path:
479
  return file_path, gr.update(visible=True)
480
  return None, gr.update(visible=False)
481
 
 
 
 
 
 
 
 
482
  download_audio_btn.click(
483
  fn=prepare_audio_download,
484
  inputs=audio_input,
 
282
  outputs = loaded_llm_model.generate(
283
  **inputs,
284
  max_new_tokens=2048,
285
+ temperature=0.3, # Lower temperature for more consistent medical notes
286
  do_sample=True,
287
  top_p=0.9,
288
  repetition_penalty=1.1,
 
323
  except Exception as e:
324
  return f"Error generating medical note: {str(e)}"
325
 
326
+ # Global variable to store original audio for download
327
+ original_audio_data = None
328
+
329
+ def store_original_audio(audio_data):
330
+ """Store the original audio data for download"""
331
+ global original_audio_data
332
+ original_audio_data = audio_data
333
+ return audio_data
334
+
335
+ def save_original_audio():
336
+ """Save the stored original audio without any processing"""
337
+ global original_audio_data
338
+
339
+ if original_audio_data is None:
340
  return None
341
 
342
  try:
343
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
 
344
 
345
+ sample_rate, audio_array = original_audio_data
346
+
347
+ # Create temporary file with timestamp
348
+ temp_dir = tempfile.gettempdir()
349
+ filename = f"medical_recording_{timestamp}.wav"
350
+ filepath = os.path.join(temp_dir, filename)
351
+
352
+ # Method 1: Try using scipy.io.wavfile (preserves original format best)
353
+ try:
354
+ import scipy.io.wavfile as wavfile
355
+ wavfile.write(filepath, sample_rate, audio_array)
356
+ return filepath
357
+ except:
358
+ pass
359
+
360
+ # Method 2: Fallback to torchaudio with minimal processing
361
+ try:
362
+ if isinstance(audio_array, np.ndarray):
363
+ audio_tensor = torch.from_numpy(audio_array.copy()) # Copy to avoid modifications
364
+ else:
365
+ audio_tensor = torch.tensor(audio_array)
366
+
367
+ # Handle tensor dimensions
368
+ if len(audio_tensor.shape) == 1:
369
+ audio_tensor = audio_tensor.unsqueeze(0) # Add channel dimension
370
+ elif len(audio_tensor.shape) == 2:
371
+ if audio_tensor.shape[0] > audio_tensor.shape[1]:
372
+ audio_tensor = audio_tensor.T # Transpose if needed
373
+
374
+ # Save with original data type preservation
375
+ torchaudio.save(
376
+ filepath,
377
+ audio_tensor,
378
+ sample_rate,
379
+ encoding="PCM_S",
380
+ bits_per_sample=16
381
+ )
382
+ return filepath
383
+
384
+ except Exception as e:
385
+ print(f"Torchaudio save failed: {e}")
386
+
387
+ # Method 3: Last resort - use soundfile
388
+ try:
389
+ import soundfile as sf
390
+ sf.write(filepath, audio_array, sample_rate)
391
+ return filepath
392
+ except Exception as e:
393
+ print(f"Soundfile save failed: {e}")
394
 
395
+ return None
396
+
397
  except Exception as e:
398
+ print(f"Error saving original audio: {e}")
399
  return None
400
 
401
  # Available models
 
411
  MEDICAL_LLM_MODELS = [
412
  "OnDeviceMedNotes/JT_latest_model",
413
  "OnDeviceMedNotes/Medical_Summary_Notes",
414
+ "Johnyquest7/combined_hpi",
415
+ "OnDeviceMedNotes/Struct_Med_Note_v01",
416
+ "meta-llama/Llama-3.2-1B-Instruct",
417
  ]
418
 
419
  # Create Gradio interface
 
461
  # LLM model selection
462
  llm_model = gr.Dropdown(
463
  choices=MEDICAL_LLM_MODELS,
464
+ value="OnDeviceMedNotes/JT_latest_model",
465
  label="Select Medical LLM Model"
466
  )
467
 
 
523
  if audio_data is None:
524
  return None, gr.update(visible=False)
525
 
526
+ # Store the original audio first
527
+ store_original_audio(audio_data)
528
+
529
+ # Save the original audio without processing
530
+ file_path = save_original_audio()
531
  if file_path:
532
  return file_path, gr.update(visible=True)
533
  return None, gr.update(visible=False)
534
 
535
+ # Audio input change handler to store original audio
536
+ audio_input.change(
537
+ fn=store_original_audio,
538
+ inputs=audio_input,
539
+ outputs=None
540
+ )
541
+
542
  download_audio_btn.click(
543
  fn=prepare_audio_download,
544
  inputs=audio_input,