speechbrain
/

asr-whisper-medium-commonvoice-hi

Automatic Speech Recognition

hf-asr-leaderboard

Model card Files Files and versions

poonehmousavi commited on Aug 16, 2023

Commit

a9884db

·

1 Parent(s): e765b4c

Upload 3 files

Files changed (3) hide show

config.json +3 -0
example-hi.mp3 +0 -0
hyperparams.yaml +77 -0

config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "speechbrain_interface": "WhisperASR"
+}

example-hi.mp3 ADDED Viewed

Binary file (29.9 kB). View file

hyperparams.yaml ADDED Viewed

	@@ -0,0 +1,77 @@

+# ################################
+# Model: Whisper (Encoder-Decoder) + NLL
+# Augmentation: TimeDomainSpecAugment
+# Authors: Pooneh Mousavi 2022
+# ################################
+# URL for the biggest Fairseq english whisper model.
+whisper_hub: openai/whisper-large-v2
+# Normalize inputs with
+# the same normalization done in the paper. Refer to Appendix C for further information.
+normalized_transcripts: True
+language: hindi
+auto_mix_prec: False
+sample_rate: 16000
+# These values are only used for the searchers.
+# They needs to be hardcoded and should not be changed with Whisper.
+# They are used as part of the searching process.
+# The bos token of the searcher will be timestamp_index
+# and will be concatenated with the bos, language and task tokens.
+timestamp_index: 50363
+eos_index: 50257
+bos_index: 50258
+# Decoding parameters
+min_decode_ratio: 0.0
+max_decode_ratio: 0.1
+test_beam_size: 8
+# Model parameters
+freeze_whisper: True
+freeze_encoder: True
+whisper: !new:speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper
+    source: !ref <whisper_hub>
+    freeze: !ref <freeze_whisper>
+    freeze_encoder: !ref <freeze_encoder>
+    save_path: whisper_checkpoints
+    encoder_only:  False
+decoder: !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearch
+    model: !ref <whisper>
+    bos_index: !ref <timestamp_index>
+    eos_index: !ref <eos_index>
+    min_decode_ratio: !ref <min_decode_ratio>
+    max_decode_ratio: !ref <max_decode_ratio>
+# test_beam_searcher: !new:speechbrain.decoders.seq2seq.S2SWhisperBeamSearch
+#     module: [!ref <whisper>]
+#     bos_index: !ref <timestamp_index>
+#     eos_index: !ref <eos_index>
+#     min_decode_ratio: !ref <min_decode_ratio>
+#     max_decode_ratio: !ref <max_decode_ratio>
+#     beam_size: !ref <test_beam_size>
+modules:
+    whisper: !ref <whisper>
+    decoder:  !ref <decoder>
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+    loadables:
+        whisper: !ref <whisper>