Commit
·
bc2cf52
1
Parent(s):
0264d8d
update model
Browse files- sherpa-onnx-asr.js +82 -6
- sherpa-onnx-wasm-main-asr.js +0 -0
- sherpa-onnx-wasm-main-asr.wasm +2 -2
sherpa-onnx-asr.js
CHANGED
|
@@ -31,6 +31,10 @@ function freeConfig(config, Module) {
|
|
| 31 |
freeConfig(config.nemoCtc, Module)
|
| 32 |
}
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
if ('whisper' in config) {
|
| 35 |
freeConfig(config.whisper, Module)
|
| 36 |
}
|
|
@@ -47,6 +51,10 @@ function freeConfig(config, Module) {
|
|
| 47 |
freeConfig(config.zipformerCtc, Module)
|
| 48 |
}
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
if ('moonshine' in config) {
|
| 51 |
freeConfig(config.moonshine, Module)
|
| 52 |
}
|
|
@@ -173,6 +181,22 @@ function initSherpaOnnxOnlineNemoCtcModelConfig(config, Module) {
|
|
| 173 |
}
|
| 174 |
}
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
function initSherpaOnnxOnlineModelConfig(config, Module) {
|
| 177 |
if (!('transducer' in config)) {
|
| 178 |
config.transducer = {
|
|
@@ -201,6 +225,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
|
|
| 201 |
};
|
| 202 |
}
|
| 203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
if (!('tokensBuf' in config)) {
|
| 205 |
config.tokensBuf = '';
|
| 206 |
}
|
|
@@ -221,8 +251,11 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
|
|
| 221 |
const nemoCtc =
|
| 222 |
initSherpaOnnxOnlineNemoCtcModelConfig(config.nemoCtc, Module);
|
| 223 |
|
| 224 |
-
const
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
const ptr = Module._malloc(len);
|
| 228 |
|
|
@@ -308,9 +341,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
|
|
| 308 |
Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset);
|
| 309 |
offset += nemoCtc.len;
|
| 310 |
|
|
|
|
|
|
|
|
|
|
| 311 |
return {
|
| 312 |
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
| 313 |
-
paraformer: paraformer, zipformer2Ctc: zipformer2Ctc, nemoCtc: nemoCtc
|
|
|
|
| 314 |
}
|
| 315 |
}
|
| 316 |
|
|
@@ -519,6 +556,10 @@ function createOnlineRecognizer(Module, myConfig) {
|
|
| 519 |
model: '',
|
| 520 |
};
|
| 521 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
let type = 1;
|
| 523 |
|
| 524 |
switch (type) {
|
|
@@ -541,6 +582,10 @@ function createOnlineRecognizer(Module, myConfig) {
|
|
| 541 |
// nemoCtc
|
| 542 |
onlineNemoCtcModelConfig.model = './nemo-ctc.onnx';
|
| 543 |
break;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
}
|
| 545 |
|
| 546 |
|
|
@@ -549,6 +594,7 @@ function createOnlineRecognizer(Module, myConfig) {
|
|
| 549 |
paraformer: onlineParaformerModelConfig,
|
| 550 |
zipformer2Ctc: onlineZipformer2CtcModelConfig,
|
| 551 |
nemoCtc: onlineNemoCtcModelConfig,
|
|
|
|
| 552 |
tokens: './tokens.txt',
|
| 553 |
numThreads: 1,
|
| 554 |
provider: 'cpu',
|
|
@@ -559,8 +605,8 @@ function createOnlineRecognizer(Module, myConfig) {
|
|
| 559 |
};
|
| 560 |
|
| 561 |
const featureConfig = {
|
| 562 |
-
sampleRate: 16000,
|
| 563 |
-
featureDim: 80,
|
| 564 |
};
|
| 565 |
|
| 566 |
let recognizerConfig = {
|
|
@@ -691,6 +737,23 @@ function initSherpaOnnxOfflineZipformerCtcModelConfig(config, Module) {
|
|
| 691 |
}
|
| 692 |
}
|
| 693 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 694 |
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
|
| 695 |
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
|
| 696 |
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
|
|
@@ -955,6 +1018,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
|
| 955 |
};
|
| 956 |
}
|
| 957 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 958 |
if (!('whisper' in config)) {
|
| 959 |
config.whisper = {
|
| 960 |
encoder: '',
|
|
@@ -1036,9 +1105,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
|
| 1036 |
|
| 1037 |
const canary = initSherpaOnnxOfflineCanaryModelConfig(config.canary, Module);
|
| 1038 |
|
|
|
|
|
|
|
|
|
|
| 1039 |
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
|
| 1040 |
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len +
|
| 1041 |
-
dolphin.len + zipformerCtc.len + canary.len;
|
| 1042 |
|
| 1043 |
const ptr = Module._malloc(len);
|
| 1044 |
|
|
@@ -1146,11 +1218,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
|
| 1146 |
Module._CopyHeap(canary.ptr, canary.len, ptr + offset);
|
| 1147 |
offset += canary.len;
|
| 1148 |
|
|
|
|
|
|
|
|
|
|
| 1149 |
return {
|
| 1150 |
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
| 1151 |
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
|
| 1152 |
senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr,
|
| 1153 |
dolphin: dolphin, zipformerCtc: zipformerCtc, canary: canary,
|
|
|
|
| 1154 |
}
|
| 1155 |
}
|
| 1156 |
|
|
|
|
| 31 |
freeConfig(config.nemoCtc, Module)
|
| 32 |
}
|
| 33 |
|
| 34 |
+
if ('toneCtc' in config) {
|
| 35 |
+
freeConfig(config.toneCtc, Module)
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
if ('whisper' in config) {
|
| 39 |
freeConfig(config.whisper, Module)
|
| 40 |
}
|
|
|
|
| 51 |
freeConfig(config.zipformerCtc, Module)
|
| 52 |
}
|
| 53 |
|
| 54 |
+
if ('wenetCtc' in config) {
|
| 55 |
+
freeConfig(config.wenetCtc, Module)
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
if ('moonshine' in config) {
|
| 59 |
freeConfig(config.moonshine, Module)
|
| 60 |
}
|
|
|
|
| 181 |
}
|
| 182 |
}
|
| 183 |
|
| 184 |
+
function initSherpaOnnxOnlineToneCtcModelConfig(config, Module) {
|
| 185 |
+
const n = Module.lengthBytesUTF8(config.model || '') + 1;
|
| 186 |
+
const buffer = Module._malloc(n);
|
| 187 |
+
|
| 188 |
+
const len = 1 * 4; // 1 pointer
|
| 189 |
+
const ptr = Module._malloc(len);
|
| 190 |
+
|
| 191 |
+
Module.stringToUTF8(config.model || '', buffer, n);
|
| 192 |
+
|
| 193 |
+
Module.setValue(ptr, buffer, 'i8*');
|
| 194 |
+
|
| 195 |
+
return {
|
| 196 |
+
buffer: buffer, ptr: ptr, len: len,
|
| 197 |
+
}
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
function initSherpaOnnxOnlineModelConfig(config, Module) {
|
| 201 |
if (!('transducer' in config)) {
|
| 202 |
config.transducer = {
|
|
|
|
| 225 |
};
|
| 226 |
}
|
| 227 |
|
| 228 |
+
if (!('toneCtc' in config)) {
|
| 229 |
+
config.toneCtc = {
|
| 230 |
+
model: '',
|
| 231 |
+
};
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
if (!('tokensBuf' in config)) {
|
| 235 |
config.tokensBuf = '';
|
| 236 |
}
|
|
|
|
| 251 |
const nemoCtc =
|
| 252 |
initSherpaOnnxOnlineNemoCtcModelConfig(config.nemoCtc, Module);
|
| 253 |
|
| 254 |
+
const toneCtc =
|
| 255 |
+
initSherpaOnnxOnlineToneCtcModelConfig(config.toneCtc, Module);
|
| 256 |
+
|
| 257 |
+
const len = transducer.len + paraformer.len + zipformer2Ctc.len + 9 * 4 +
|
| 258 |
+
nemoCtc.len + toneCtc.len;
|
| 259 |
|
| 260 |
const ptr = Module._malloc(len);
|
| 261 |
|
|
|
|
| 341 |
Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset);
|
| 342 |
offset += nemoCtc.len;
|
| 343 |
|
| 344 |
+
Module._CopyHeap(toneCtc.ptr, toneCtc.len, ptr + offset);
|
| 345 |
+
offset += toneCtc.len;
|
| 346 |
+
|
| 347 |
return {
|
| 348 |
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
| 349 |
+
paraformer: paraformer, zipformer2Ctc: zipformer2Ctc, nemoCtc: nemoCtc,
|
| 350 |
+
toneCtc: toneCtc,
|
| 351 |
}
|
| 352 |
}
|
| 353 |
|
|
|
|
| 556 |
model: '',
|
| 557 |
};
|
| 558 |
|
| 559 |
+
const onlineToneCtcModelConfig = {
|
| 560 |
+
model: '',
|
| 561 |
+
};
|
| 562 |
+
|
| 563 |
let type = 1;
|
| 564 |
|
| 565 |
switch (type) {
|
|
|
|
| 582 |
// nemoCtc
|
| 583 |
onlineNemoCtcModelConfig.model = './nemo-ctc.onnx';
|
| 584 |
break;
|
| 585 |
+
case 4:
|
| 586 |
+
// toneCtc
|
| 587 |
+
onlineToneCtcModelConfig.model = './tone-ctc.onnx';
|
| 588 |
+
break;
|
| 589 |
}
|
| 590 |
|
| 591 |
|
|
|
|
| 594 |
paraformer: onlineParaformerModelConfig,
|
| 595 |
zipformer2Ctc: onlineZipformer2CtcModelConfig,
|
| 596 |
nemoCtc: onlineNemoCtcModelConfig,
|
| 597 |
+
toneCtc: onlineToneCtcModelConfig,
|
| 598 |
tokens: './tokens.txt',
|
| 599 |
numThreads: 1,
|
| 600 |
provider: 'cpu',
|
|
|
|
| 605 |
};
|
| 606 |
|
| 607 |
const featureConfig = {
|
| 608 |
+
sampleRate: 16000, // it is ignored when toneCtc is used
|
| 609 |
+
featureDim: 80, // it is ignored when toneCtc is used
|
| 610 |
};
|
| 611 |
|
| 612 |
let recognizerConfig = {
|
|
|
|
| 737 |
}
|
| 738 |
}
|
| 739 |
|
| 740 |
+
function initSherpaOnnxOfflineWenetCtcModelConfig(config, Module) {
|
| 741 |
+
const n = Module.lengthBytesUTF8(config.model || '') + 1;
|
| 742 |
+
|
| 743 |
+
const buffer = Module._malloc(n);
|
| 744 |
+
|
| 745 |
+
const len = 1 * 4; // 1 pointer
|
| 746 |
+
const ptr = Module._malloc(len);
|
| 747 |
+
|
| 748 |
+
Module.stringToUTF8(config.model || '', buffer, n);
|
| 749 |
+
|
| 750 |
+
Module.setValue(ptr, buffer, 'i8*');
|
| 751 |
+
|
| 752 |
+
return {
|
| 753 |
+
buffer: buffer, ptr: ptr, len: len,
|
| 754 |
+
}
|
| 755 |
+
}
|
| 756 |
+
|
| 757 |
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
|
| 758 |
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
|
| 759 |
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
|
|
|
|
| 1018 |
};
|
| 1019 |
}
|
| 1020 |
|
| 1021 |
+
if (!('wenetCtc' in config)) {
|
| 1022 |
+
config.wenetCtc = {
|
| 1023 |
+
model: '',
|
| 1024 |
+
};
|
| 1025 |
+
}
|
| 1026 |
+
|
| 1027 |
if (!('whisper' in config)) {
|
| 1028 |
config.whisper = {
|
| 1029 |
encoder: '',
|
|
|
|
| 1105 |
|
| 1106 |
const canary = initSherpaOnnxOfflineCanaryModelConfig(config.canary, Module);
|
| 1107 |
|
| 1108 |
+
const wenetCtc =
|
| 1109 |
+
initSherpaOnnxOfflineWenetCtcModelConfig(config.wenetCtc, Module);
|
| 1110 |
+
|
| 1111 |
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
|
| 1112 |
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len +
|
| 1113 |
+
dolphin.len + zipformerCtc.len + canary.len + wenetCtc.len;
|
| 1114 |
|
| 1115 |
const ptr = Module._malloc(len);
|
| 1116 |
|
|
|
|
| 1218 |
Module._CopyHeap(canary.ptr, canary.len, ptr + offset);
|
| 1219 |
offset += canary.len;
|
| 1220 |
|
| 1221 |
+
Module._CopyHeap(wenetCtc.ptr, wenetCtc.len, ptr + offset);
|
| 1222 |
+
offset += wenetCtc.len;
|
| 1223 |
+
|
| 1224 |
return {
|
| 1225 |
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
| 1226 |
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
|
| 1227 |
senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr,
|
| 1228 |
dolphin: dolphin, zipformerCtc: zipformerCtc, canary: canary,
|
| 1229 |
+
wenetCtc: wenetCtc,
|
| 1230 |
}
|
| 1231 |
}
|
| 1232 |
|
sherpa-onnx-wasm-main-asr.js
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
sherpa-onnx-wasm-main-asr.wasm
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb70c5a72afee0f346493828bf33d85c85fbe04719eb51fd91cebc5e6ae1084f
|
| 3 |
+
size 11586465
|