Spaces:

Slamlab
/

asr_metrics

Sleeping

App Files Files Community

asr_metrics / app.py

akki2825

fix mismatch

3c4a41b verified 6 months ago

raw

history blame

4.31 kB

	import spaces
	import gradio as gr
	import numpy as np

	@spaces.GPU()
	def get_mismatched_sentences(reference, hypothesis):
	"""
	Get mismatched sentences between reference and hypothesis.
	"""
	reference = reference.split()
	hypothesis = hypothesis.split()

	mismatched = []
	for ref, hyp in zip(reference, hypothesis):
	if ref != hyp:
	mismatched.append((ref, hyp))

	return mismatched

	@spaces.GPU()
	def calculate_wer(reference, hypothesis):
	reference_words = reference.split()
	hypothesis_words = hypothesis.split()

	m = len(reference_words)
	n = len(hypothesis_words)

	# Initialize DP table
	dp = np.zeros((m+1, n+1), dtype=np.int32)

	# Base cases
	for i in range(m+1):
	dp[i][0] = i
	for j in range(n+1):
	dp[0][j] = j

	# Fill DP table
	for i in range(1, m+1):
	for j in range(1, n+1):
	cost = 0 if reference_words[i-1] == hypothesis_words[j-1] else 1
	dp[i][j] = min(dp[i-1][j] + 1, # Deletion
	dp[i][j-1] + 1, # Insertion
	dp[i-1][j-1] + cost) # Substitution or no cost

	wer = dp[m][n] / m
	return wer

	@spaces.GPU()
	def calculate_cer(reference, hypothesis):
	reference = reference.replace(" ", "")
	hypothesis = hypothesis.replace(" ", "")

	m = len(reference)
	n = len(hypothesis)

	# Initialize DP table
	dp = np.zeros((m+1, n+1), dtype=np.int32)

	# Base cases
	for i in range(m+1):
	dp[i][0] = i
	for j in range(n+1):
	dp[0][j] = j

	# Fill DP table
	for i in range(1, m+1):
	for j in range(1, n+1):
	cost = 0 if reference[i-1] == hypothesis[j-1] else 1
	dp[i][j] = min(dp[i-1][j] + 1, # Deletion
	dp[i][j-1] + 1, # Insertion
	dp[i-1][j-1] + cost) # Substitution or no cost

	cer = dp[m][n] / m
	return cer


	@spaces.GPU()
	def process_files(reference_file, hypothesis_file):
	try:
	with open(reference_file.name, 'r') as f:
	reference_text = f.read()

	with open(hypothesis_file.name, 'r') as f:
	hypothesis_text = f.read()

	wer_value = calculate_wer(reference_text, hypothesis_text)
	cer_value = calculate_cer(reference_text, hypothesis_text)
	mismatched_sentences = get_mismatched_sentences(reference_text, hypothesis_text)

	return {
	"WER": wer_value,
	"CER": cer_value,
	"Mismatched Sentences": mismatched_sentences
	}
	except Exception as e:
	return {"error": str(e)}

	def main():
	with gr.Blocks() as demo:
	gr.Markdown("# ASR Metrics Calculator")

	with gr.Row():
	reference_file = gr.File(label="Upload Reference File")
	hypothesis_file = gr.File(label="Upload Hypothesis File")

	with gr.Row():
	reference_preview = gr.Textbox(label="Reference Preview", lines=3)
	hypothesis_preview = gr.Textbox(label="Hypothesis Preview", lines=3)

	with gr.Row():
	compute_button = gr.Button("Compute Metrics")
	results_output = gr.JSON(label="Results")

	# Update previews when files are uploaded
	def update_previews(ref_file, hyp_file):
	ref_text = ""
	hyp_text = ""

	if ref_file:
	with open(ref_file.name, 'r') as f:
	ref_text = f.read()[:200] # Show first 200 characters
	if hyp_file:
	with open(hyp_file.name, 'r') as f:
	hyp_text = f.read()[:200] # Show first 200 characters

	return ref_text, hyp_text

	reference_file.change(
	fn=update_previews,
	inputs=[reference_file, hypothesis_file],
	outputs=[reference_preview, hypothesis_preview]
	)
	hypothesis_file.change(
	fn=update_previews,
	inputs=[reference_file, hypothesis_file],
	outputs=[reference_preview, hypothesis_preview]
	)

	compute_button.click(
	fn=process_files,
	inputs=[reference_file, hypothesis_file],
	outputs=results_output
	)

	demo.launch()

	if __name__ == "__main__":
	main()