Upload ChunkFormer Classification Model
Browse files
README.md
CHANGED
|
@@ -57,19 +57,27 @@ result = model.classify_audio(
|
|
| 57 |
audio_path="path/to/your/audio.wav",
|
| 58 |
chunk_size=-1, # -1 for full attention
|
| 59 |
left_context_size=-1,
|
| 60 |
-
right_context_size=-1
|
| 61 |
-
return_probabilities=True
|
| 62 |
)
|
| 63 |
|
| 64 |
print(result)
|
| 65 |
# Output example:
|
| 66 |
# {
|
| 67 |
-
# 'gender':
|
| 68 |
-
#
|
| 69 |
-
#
|
| 70 |
-
#
|
| 71 |
-
#
|
| 72 |
-
# '
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
# }
|
| 74 |
```
|
| 75 |
|
|
@@ -78,8 +86,7 @@ print(result)
|
|
| 78 |
```bash
|
| 79 |
chunkformer-decode \
|
| 80 |
--model_checkpoint khanhld/chunkformer-gender-emotion-dialect-age-classification \
|
| 81 |
-
--audio_file path/to/audio.wav
|
| 82 |
-
--return_probabilities
|
| 83 |
```
|
| 84 |
|
| 85 |
## Training
|
|
|
|
| 57 |
audio_path="path/to/your/audio.wav",
|
| 58 |
chunk_size=-1, # -1 for full attention
|
| 59 |
left_context_size=-1,
|
| 60 |
+
right_context_size=-1
|
|
|
|
| 61 |
)
|
| 62 |
|
| 63 |
print(result)
|
| 64 |
# Output example:
|
| 65 |
# {
|
| 66 |
+
# 'gender': {
|
| 67 |
+
# 'label': 'female',
|
| 68 |
+
# 'label_id': 0,
|
| 69 |
+
# 'prob': 0.95
|
| 70 |
+
# },
|
| 71 |
+
# 'dialect': {
|
| 72 |
+
# 'label': 'northern dialect',
|
| 73 |
+
# 'label_id': 3,
|
| 74 |
+
# 'prob': 0.70
|
| 75 |
+
# },
|
| 76 |
+
# 'emotion': {
|
| 77 |
+
# 'label': 'neutral',
|
| 78 |
+
# 'label_id': 5,
|
| 79 |
+
# 'prob': 0.80
|
| 80 |
+
# }
|
| 81 |
# }
|
| 82 |
```
|
| 83 |
|
|
|
|
| 86 |
```bash
|
| 87 |
chunkformer-decode \
|
| 88 |
--model_checkpoint khanhld/chunkformer-gender-emotion-dialect-age-classification \
|
| 89 |
+
--audio_file path/to/audio.wav
|
|
|
|
| 90 |
```
|
| 91 |
|
| 92 |
## Training
|