Spaces:
Sleeping
Sleeping
| import spaces | |
| import gradio as gr | |
| import numpy as np | |
| def get_mismatched_sentences(reference, hypothesis): | |
| """ | |
| Get mismatched sentences between reference and hypothesis. | |
| """ | |
| reference = reference.split() | |
| hypothesis = hypothesis.split() | |
| mismatched = [] | |
| for ref, hyp in zip(reference, hypothesis): | |
| if ref != hyp: | |
| mismatched.append((ref, hyp)) | |
| return mismatched | |
| def calculate_wer(reference, hypothesis): | |
| reference_words = reference.split() | |
| hypothesis_words = hypothesis.split() | |
| m = len(reference_words) | |
| n = len(hypothesis_words) | |
| # Initialize DP table | |
| dp = np.zeros((m+1, n+1), dtype=np.int32) | |
| # Base cases | |
| for i in range(m+1): | |
| dp[i][0] = i | |
| for j in range(n+1): | |
| dp[0][j] = j | |
| # Fill DP table | |
| for i in range(1, m+1): | |
| for j in range(1, n+1): | |
| cost = 0 if reference_words[i-1] == hypothesis_words[j-1] else 1 | |
| dp[i][j] = min(dp[i-1][j] + 1, # Deletion | |
| dp[i][j-1] + 1, # Insertion | |
| dp[i-1][j-1] + cost) # Substitution or no cost | |
| wer = dp[m][n] / m | |
| return wer | |
| def calculate_cer(reference, hypothesis): | |
| reference = reference.replace(" ", "") | |
| hypothesis = hypothesis.replace(" ", "") | |
| m = len(reference) | |
| n = len(hypothesis) | |
| # Initialize DP table | |
| dp = np.zeros((m+1, n+1), dtype=np.int32) | |
| # Base cases | |
| for i in range(m+1): | |
| dp[i][0] = i | |
| for j in range(n+1): | |
| dp[0][j] = j | |
| # Fill DP table | |
| for i in range(1, m+1): | |
| for j in range(1, n+1): | |
| cost = 0 if reference[i-1] == hypothesis[j-1] else 1 | |
| dp[i][j] = min(dp[i-1][j] + 1, # Deletion | |
| dp[i][j-1] + 1, # Insertion | |
| dp[i-1][j-1] + cost) # Substitution or no cost | |
| cer = dp[m][n] / m | |
| return cer | |
| def process_files(reference_file, hypothesis_file): | |
| try: | |
| with open(reference_file.name, 'r') as f: | |
| reference_text = f.read() | |
| with open(hypothesis_file.name, 'r') as f: | |
| hypothesis_text = f.read() | |
| wer_value = calculate_wer(reference_text, hypothesis_text) | |
| cer_value = calculate_cer(reference_text, hypothesis_text) | |
| mismatched_sentences = get_mismatched_sentences(reference_text, hypothesis_text) | |
| return { | |
| "WER": wer_value, | |
| "CER": cer_value, | |
| "Mismatched Sentences": mismatched_sentences | |
| } | |
| except Exception as e: | |
| return {"error": str(e)} | |
| def main(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# ASR Metrics Calculator") | |
| with gr.Row(): | |
| reference_file = gr.File(label="Upload Reference File") | |
| hypothesis_file = gr.File(label="Upload Hypothesis File") | |
| with gr.Row(): | |
| reference_preview = gr.Textbox(label="Reference Preview", lines=3) | |
| hypothesis_preview = gr.Textbox(label="Hypothesis Preview", lines=3) | |
| with gr.Row(): | |
| compute_button = gr.Button("Compute Metrics") | |
| results_output = gr.JSON(label="Results") | |
| # Update previews when files are uploaded | |
| def update_previews(ref_file, hyp_file): | |
| ref_text = "" | |
| hyp_text = "" | |
| if ref_file: | |
| with open(ref_file.name, 'r') as f: | |
| ref_text = f.read()[:200] # Show first 200 characters | |
| if hyp_file: | |
| with open(hyp_file.name, 'r') as f: | |
| hyp_text = f.read()[:200] # Show first 200 characters | |
| return ref_text, hyp_text | |
| reference_file.change( | |
| fn=update_previews, | |
| inputs=[reference_file, hypothesis_file], | |
| outputs=[reference_preview, hypothesis_preview] | |
| ) | |
| hypothesis_file.change( | |
| fn=update_previews, | |
| inputs=[reference_file, hypothesis_file], | |
| outputs=[reference_preview, hypothesis_preview] | |
| ) | |
| compute_button.click( | |
| fn=process_files, | |
| inputs=[reference_file, hypothesis_file], | |
| outputs=results_output | |
| ) | |
| demo.launch() | |
| if __name__ == "__main__": | |
| main() | |