Spaces:
Runtime error
Runtime error
To handle videos longer than one hour and to transcribe them in segments, we need to make several modifications to the yt_transcribe function.
#15
by
Illia56
- opened
app.py
CHANGED
|
@@ -71,21 +71,42 @@ def download_yt_audio(yt_url, filename):
|
|
| 71 |
raise gr.Error(str(err))
|
| 72 |
|
| 73 |
|
| 74 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
html_embed_str = _return_yt_html_embed(yt_url)
|
| 76 |
|
| 77 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
| 78 |
filepath = os.path.join(tmpdirname, "video.mp4")
|
| 79 |
download_yt_audio(yt_url, filepath)
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
-
return html_embed_str, text
|
| 89 |
|
| 90 |
|
| 91 |
demo = gr.Blocks()
|
|
|
|
| 71 |
raise gr.Error(str(err))
|
| 72 |
|
| 73 |
|
| 74 |
+
def ffmpeg_read(file_path, sampling_rate):
|
| 75 |
+
# This function should use FFmpeg to extract audio and convert it to the desired format and sampling rate.
|
| 76 |
+
# The exact implementation will depend on your requirements and setup.
|
| 77 |
+
# For now, I'll provide a placeholder.
|
| 78 |
+
raise NotImplementedError("Please implement the ffmpeg_read function.")
|
| 79 |
+
|
| 80 |
+
def yt_transcribe(yt_url, task, max_filesize=75.0, segment_length=30*1000):
|
| 81 |
html_embed_str = _return_yt_html_embed(yt_url)
|
| 82 |
|
| 83 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
| 84 |
filepath = os.path.join(tmpdirname, "video.mp4")
|
| 85 |
download_yt_audio(yt_url, filepath)
|
| 86 |
+
|
| 87 |
+
# Load the audio using pydub
|
| 88 |
+
audio = AudioSegment.from_file(filepath, format="mp4")
|
| 89 |
+
|
| 90 |
+
# Split the audio into segments
|
| 91 |
+
segments = [audio[i:i+segment_length] for i in range(0, len(audio), segment_length)]
|
| 92 |
+
|
| 93 |
+
# Transcribe each segment and combine the results
|
| 94 |
+
transcriptions = []
|
| 95 |
+
for segment in segments:
|
| 96 |
+
with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as segment_file:
|
| 97 |
+
segment.export(segment_file.name, format="wav")
|
| 98 |
+
|
| 99 |
+
# Convert the segment using ffmpeg
|
| 100 |
+
segment_data = ffmpeg_read(segment_file.name, pipe.feature_extractor.sampling_rate)
|
| 101 |
+
inputs = {"array": segment_data, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
| 102 |
+
|
| 103 |
+
transcription = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
| 104 |
+
transcriptions.append(transcription)
|
| 105 |
+
|
| 106 |
+
full_transcription = " ".join(transcriptions)
|
| 107 |
+
|
| 108 |
+
return html_embed_str, full_transcription
|
| 109 |
|
|
|
|
| 110 |
|
| 111 |
|
| 112 |
demo = gr.Blocks()
|