Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -282,7 +282,7 @@ TRANSCRIPT: ''' + transcription
|
|
| 282 |
outputs = loaded_llm_model.generate(
|
| 283 |
**inputs,
|
| 284 |
max_new_tokens=2048,
|
| 285 |
-
temperature=0.
|
| 286 |
do_sample=True,
|
| 287 |
top_p=0.9,
|
| 288 |
repetition_penalty=1.1,
|
|
@@ -323,32 +323,79 @@ TRANSCRIPT: ''' + transcription
|
|
| 323 |
except Exception as e:
|
| 324 |
return f"Error generating medical note: {str(e)}"
|
| 325 |
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
return None
|
| 330 |
|
| 331 |
try:
|
| 332 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 333 |
-
filename = f"recording_{timestamp}.wav"
|
| 334 |
|
| 335 |
-
sample_rate, audio_array =
|
| 336 |
-
|
| 337 |
-
#
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
|
|
|
|
|
|
|
| 350 |
except Exception as e:
|
| 351 |
-
print(f"Error saving audio: {e}")
|
| 352 |
return None
|
| 353 |
|
| 354 |
# Available models
|
|
@@ -364,7 +411,9 @@ WHISPER_MODELS = [
|
|
| 364 |
MEDICAL_LLM_MODELS = [
|
| 365 |
"OnDeviceMedNotes/JT_latest_model",
|
| 366 |
"OnDeviceMedNotes/Medical_Summary_Notes",
|
| 367 |
-
"
|
|
|
|
|
|
|
| 368 |
]
|
| 369 |
|
| 370 |
# Create Gradio interface
|
|
@@ -412,7 +461,7 @@ with gr.Blocks(title="Medical Transcription & Note Generation", theme=gr.themes.
|
|
| 412 |
# LLM model selection
|
| 413 |
llm_model = gr.Dropdown(
|
| 414 |
choices=MEDICAL_LLM_MODELS,
|
| 415 |
-
value="OnDeviceMedNotes/
|
| 416 |
label="Select Medical LLM Model"
|
| 417 |
)
|
| 418 |
|
|
@@ -474,11 +523,22 @@ with gr.Blocks(title="Medical Transcription & Note Generation", theme=gr.themes.
|
|
| 474 |
if audio_data is None:
|
| 475 |
return None, gr.update(visible=False)
|
| 476 |
|
| 477 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
if file_path:
|
| 479 |
return file_path, gr.update(visible=True)
|
| 480 |
return None, gr.update(visible=False)
|
| 481 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
download_audio_btn.click(
|
| 483 |
fn=prepare_audio_download,
|
| 484 |
inputs=audio_input,
|
|
|
|
| 282 |
outputs = loaded_llm_model.generate(
|
| 283 |
**inputs,
|
| 284 |
max_new_tokens=2048,
|
| 285 |
+
temperature=0.3, # Lower temperature for more consistent medical notes
|
| 286 |
do_sample=True,
|
| 287 |
top_p=0.9,
|
| 288 |
repetition_penalty=1.1,
|
|
|
|
| 323 |
except Exception as e:
|
| 324 |
return f"Error generating medical note: {str(e)}"
|
| 325 |
|
| 326 |
+
# Global variable to store original audio for download
|
| 327 |
+
original_audio_data = None
|
| 328 |
+
|
| 329 |
+
def store_original_audio(audio_data):
|
| 330 |
+
"""Store the original audio data for download"""
|
| 331 |
+
global original_audio_data
|
| 332 |
+
original_audio_data = audio_data
|
| 333 |
+
return audio_data
|
| 334 |
+
|
| 335 |
+
def save_original_audio():
|
| 336 |
+
"""Save the stored original audio without any processing"""
|
| 337 |
+
global original_audio_data
|
| 338 |
+
|
| 339 |
+
if original_audio_data is None:
|
| 340 |
return None
|
| 341 |
|
| 342 |
try:
|
| 343 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
|
| 344 |
|
| 345 |
+
sample_rate, audio_array = original_audio_data
|
| 346 |
+
|
| 347 |
+
# Create temporary file with timestamp
|
| 348 |
+
temp_dir = tempfile.gettempdir()
|
| 349 |
+
filename = f"medical_recording_{timestamp}.wav"
|
| 350 |
+
filepath = os.path.join(temp_dir, filename)
|
| 351 |
+
|
| 352 |
+
# Method 1: Try using scipy.io.wavfile (preserves original format best)
|
| 353 |
+
try:
|
| 354 |
+
import scipy.io.wavfile as wavfile
|
| 355 |
+
wavfile.write(filepath, sample_rate, audio_array)
|
| 356 |
+
return filepath
|
| 357 |
+
except:
|
| 358 |
+
pass
|
| 359 |
+
|
| 360 |
+
# Method 2: Fallback to torchaudio with minimal processing
|
| 361 |
+
try:
|
| 362 |
+
if isinstance(audio_array, np.ndarray):
|
| 363 |
+
audio_tensor = torch.from_numpy(audio_array.copy()) # Copy to avoid modifications
|
| 364 |
+
else:
|
| 365 |
+
audio_tensor = torch.tensor(audio_array)
|
| 366 |
+
|
| 367 |
+
# Handle tensor dimensions
|
| 368 |
+
if len(audio_tensor.shape) == 1:
|
| 369 |
+
audio_tensor = audio_tensor.unsqueeze(0) # Add channel dimension
|
| 370 |
+
elif len(audio_tensor.shape) == 2:
|
| 371 |
+
if audio_tensor.shape[0] > audio_tensor.shape[1]:
|
| 372 |
+
audio_tensor = audio_tensor.T # Transpose if needed
|
| 373 |
+
|
| 374 |
+
# Save with original data type preservation
|
| 375 |
+
torchaudio.save(
|
| 376 |
+
filepath,
|
| 377 |
+
audio_tensor,
|
| 378 |
+
sample_rate,
|
| 379 |
+
encoding="PCM_S",
|
| 380 |
+
bits_per_sample=16
|
| 381 |
+
)
|
| 382 |
+
return filepath
|
| 383 |
+
|
| 384 |
+
except Exception as e:
|
| 385 |
+
print(f"Torchaudio save failed: {e}")
|
| 386 |
+
|
| 387 |
+
# Method 3: Last resort - use soundfile
|
| 388 |
+
try:
|
| 389 |
+
import soundfile as sf
|
| 390 |
+
sf.write(filepath, audio_array, sample_rate)
|
| 391 |
+
return filepath
|
| 392 |
+
except Exception as e:
|
| 393 |
+
print(f"Soundfile save failed: {e}")
|
| 394 |
|
| 395 |
+
return None
|
| 396 |
+
|
| 397 |
except Exception as e:
|
| 398 |
+
print(f"Error saving original audio: {e}")
|
| 399 |
return None
|
| 400 |
|
| 401 |
# Available models
|
|
|
|
| 411 |
MEDICAL_LLM_MODELS = [
|
| 412 |
"OnDeviceMedNotes/JT_latest_model",
|
| 413 |
"OnDeviceMedNotes/Medical_Summary_Notes",
|
| 414 |
+
"Johnyquest7/combined_hpi",
|
| 415 |
+
"OnDeviceMedNotes/Struct_Med_Note_v01",
|
| 416 |
+
"meta-llama/Llama-3.2-1B-Instruct",
|
| 417 |
]
|
| 418 |
|
| 419 |
# Create Gradio interface
|
|
|
|
| 461 |
# LLM model selection
|
| 462 |
llm_model = gr.Dropdown(
|
| 463 |
choices=MEDICAL_LLM_MODELS,
|
| 464 |
+
value="OnDeviceMedNotes/JT_latest_model",
|
| 465 |
label="Select Medical LLM Model"
|
| 466 |
)
|
| 467 |
|
|
|
|
| 523 |
if audio_data is None:
|
| 524 |
return None, gr.update(visible=False)
|
| 525 |
|
| 526 |
+
# Store the original audio first
|
| 527 |
+
store_original_audio(audio_data)
|
| 528 |
+
|
| 529 |
+
# Save the original audio without processing
|
| 530 |
+
file_path = save_original_audio()
|
| 531 |
if file_path:
|
| 532 |
return file_path, gr.update(visible=True)
|
| 533 |
return None, gr.update(visible=False)
|
| 534 |
|
| 535 |
+
# Audio input change handler to store original audio
|
| 536 |
+
audio_input.change(
|
| 537 |
+
fn=store_original_audio,
|
| 538 |
+
inputs=audio_input,
|
| 539 |
+
outputs=None
|
| 540 |
+
)
|
| 541 |
+
|
| 542 |
download_audio_btn.click(
|
| 543 |
fn=prepare_audio_download,
|
| 544 |
inputs=audio_input,
|