ZachNagengast commited on
Commit
76b2205
·
verified ·
1 Parent(s): a28b7cc

Update with backwards compatible tokenizer format

Browse files
parakeet-tdt_ctc-110m/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "eos_token_id": 2,
4
+ "nemo_model_type": "parakeet",
5
+ "pad_token_id": 0,
6
+ "vocab_size": 1024,
7
+ }
parakeet-tdt_ctc-110m/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
parakeet-tdt_ctc-110m/tokenizer_config.json CHANGED
@@ -22,6 +22,6 @@
22
  "model_max_length": 1000000000000000019884624838656,
23
  "pad_token": "<pad>",
24
  "processor_class": "ParakeetProcessor",
25
- "tokenizer_class": "ParakeetTokenizerFast",
26
  "unk_token": "<unk>"
27
  }
 
22
  "model_max_length": 1000000000000000019884624838656,
23
  "pad_token": "<pad>",
24
  "processor_class": "ParakeetProcessor",
25
+ "tokenizer_class": "PreTrainedTokenizer",
26
  "unk_token": "<unk>"
27
  }