whisper-transcribe-new / requirements.txt
liuyang
again
b7ba021
raw
history blame contribute delete
829 Bytes
# 1) Do NOT pin torch/torchaudio here — use the CUDA builds that come with the image
transformers==4.48.0
# Removed flash-attention since faster-whisper handles this internally
# https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.0.8/flash_attn-2.7.4.post1+cu126torch2.4-cp310-cp310-linux_x86_64.whl
pydantic==2.10.6
# 2) Main whisper stack — WhisperX (with compatible FW/CT2 for cuDNN 8 era)
whisperx==3.7.0
faster-whisper==1.2.0
ctranslate2==4.5.0
# (no 'torch' line — keep the image’s CUDA wheel)
# Optional: stable-ts for regroup/edit post-processing
stable-ts>=2.13.3
# 3) Extra libs your app really needs
gradio==5.0.1
spaces>=0.19.0
pyannote.audio>=3.3.1
pandas>=1.5.0
numpy>=1.24.0
librosa>=0.10.0
soundfile>=0.12.0
ffmpeg-python>=0.2.0
requests>=2.28.0
webrtcvad>=2.0.10
boto3