wav2vec-pashto-asr

Runtime error

App Files Files Community

Update app.py

by raz-135 - opened Jun 4

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+88

-51

Files changed (1) hide show

app.py +88 -51

app.py CHANGED Viewed

@@ -1,51 +1,86 @@
 import torch
 import gradio as gr
 import pytube as pt
 from transformers import pipeline
 from huggingface_hub import model_info
-#from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 MODEL_NAME = "ihanif/wav2vec2-xls-r-300m-pashto"
 lang = "ps"
-#load pre-trained model and tokenizer
-#processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
-#model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME)
 device = 0 if torch.cuda.is_available() else "cpu"
-pipe = pipeline(
-    task="automatic-speech-recognition",
-    model=MODEL_NAME,
-    #chunk_length_s=30,
-    device=device,
-)
-#pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
 def transcribe(microphone, file_upload):
     warn_output = ""
-    # if (microphone is not None) and (file_upload is not None):
-    #     warn_output = (
-    #         "WARNING: You've uploaded an audio file and used the microphone. "
-    #         "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
-    #     )
-    # elif (microphone is None) and (file_upload is None):
-    #     return "ERROR: You have to either use the microphone or upload an audio file"
     if (microphone is None) and (file_upload is None):
         return "ERROR: You have to either use the microphone or upload an audio file"
     file = microphone if microphone is not None else file_upload
-    text = pipe(file)["text"]
-    #transcription = wav2vec_model(audio)["text"]
-    return warn_output + text
-def _return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
     HTML_str = (
         f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
@@ -53,28 +88,29 @@ def _return_yt_html_embed(yt_url):
     )
     return HTML_str
 def yt_transcribe(yt_url):
-    yt = pt.YouTube(yt_url)
-    html_embed_str = _return_yt_html_embed(yt_url)
-    stream = yt.streams.filter(only_audio=True)[0]
-    stream.download(filename="audio.mp3")
-    text = pipe("audio.mp3")["text"]
-    return html_embed_str, text
 demo = gr.Blocks()
-examples=[["example-1.wav","example-2.wav"]]
-# examples=["example-1.wav"]
 mf_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
-        gr.inputs.Audio(source="microphone", type="filepath", optional=True),
-        gr.inputs.Audio(source="upload", type="filepath", optional=True),
     ],
     outputs="text",
     layout="horizontal",
@@ -87,9 +123,9 @@ mf_transcribe = gr.Interface(
     examples=examples,
 )
-yt_transcribe = gr.Interface(
     fn=yt_transcribe,
-    inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
     outputs=["html", "text"],
     layout="horizontal",
     theme="huggingface",
@@ -101,6 +137,7 @@ yt_transcribe = gr.Interface(
 )
 with demo:
-    gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
-demo.launch(enable_queue=False)

 import torch
 import gradio as gr
 import pytube as pt
 from transformers import pipeline
 from huggingface_hub import model_info
+import os
+import time
+import requests
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+# Set longer timeout for huggingface_hub
+os.environ['HF_HUB_DOWNLOAD_TIMEOUT'] = '60'
 MODEL_NAME = "ihanif/wav2vec2-xls-r-300m-pashto"
 lang = "ps"
 device = 0 if torch.cuda.is_available() else "cpu"
+def create_pipeline_with_retry(model_name, max_retries=3, timeout=60):
+    """Create pipeline with retry mechanism and custom timeout"""
+    # Configure requests session with retry strategy
+    session = requests.Session()
+    retry_strategy = Retry(
+        total=max_retries,
+        backoff_factor=1,
+        status_forcelist=[429, 500, 502, 503, 504],
+    )
+    adapter = HTTPAdapter(max_retries=retry_strategy)
+    session.mount("http://", adapter)
+    session.mount("https://", adapter)
+    for attempt in range(max_retries):
+        try:
+            print(f"Attempting to load model (attempt {attempt + 1}/{max_retries})...")
+            # Try to create the pipeline with increased timeout
+            pipe = pipeline(
+                task="automatic-speech-recognition",
+                model=model_name,
+                device=device,
+                # Add timeout parameter if supported
+            )
+            print("Model loaded successfully!")
+            return pipe
+        except Exception as e:
+            print(f"Attempt {attempt + 1} failed: {str(e)}")
+            if attempt < max_retries - 1:
+                wait_time = (attempt + 1) * 10  # Exponential backoff
+                print(f"Waiting {wait_time} seconds before retry...")
+                time.sleep(wait_time)
+            else:
+                print("All attempts failed. Please check your internet connection.")
+                raise e
+# Initialize pipeline with retry mechanism
+try:
+    pipe = create_pipeline_with_retry(MODEL_NAME)
+except Exception as e:
+    print(f"Failed to load model: {e}")
+    # Fallback to a different model or handle gracefully
+    pipe = None
 def transcribe(microphone, file_upload):
+    if pipe is None:
+        return "ERROR: Model not loaded. Please check your internet connection and restart the application."
     warn_output = ""
     if (microphone is None) and (file_upload is None):
         return "ERROR: You have to either use the microphone or upload an audio file"
     file = microphone if microphone is not None else file_upload
+    try:
+        text = pipe(file)["text"]
+        return warn_output + text
+    except Exception as e:
+        return f"ERROR: Transcription failed - {str(e)}"
+def return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
     HTML_str = (
         f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
     )
     return HTML_str
 def yt_transcribe(yt_url):
+    if pipe is None:
+        return "", "ERROR: Model not loaded. Please check your internet connection and restart the application."
+    try:
+        yt = pt.YouTube(yt_url)
+        html_embed_str = return_yt_html_embed(yt_url)
+        stream = yt.streams.filter(only_audio=True)[0]
+        stream.download(filename="audio.mp3")
+        text = pipe("audio.mp3")["text"]
+        return html_embed_str, text
+    except Exception as e:
+        return "", f"ERROR: YouTube transcription failed - {str(e)}"
+# Create Gradio interface
 demo = gr.Blocks()
+examples = [["example-1.wav", "example-2.wav"]]
 mf_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
+        gr.Audio(source="microphone", type="filepath", label="Microphone"),
+        gr.Audio(source="upload", type="filepath", label="Upload Audio"),
     ],
     outputs="text",
     layout="horizontal",
     examples=examples,
 )
+yt_transcribe_interface = gr.Interface(
     fn=yt_transcribe,
+    inputs=[gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
     outputs=["html", "text"],
     layout="horizontal",
     theme="huggingface",
 )
 with demo:
+    gr.TabbedInterface([mf_transcribe, yt_transcribe_interface], ["Transcribe Audio", "Transcribe YouTube"])
+if __name__ == "__main__":
+    demo.launch(enable_queue=False)