khanhld's picture
Upload ChunkFormer Classification Model
360da7f verified
accum_grad: 1
cmvn: global_cmvn
cmvn_conf:
cmvn_file: data/train_hf/global_cmvn
is_json_cmvn: true
dataset: classification
dataset_conf:
batch_conf:
batch_size: 4
batch_type: dynamic
max_frames_in_batch: 80000
pad_feat: true
fbank_conf:
dither: 1.0
frame_length: 25
frame_shift: 10
num_mel_bins: 80
filter_conf:
max_length: 40960
min_length: 0
resample_conf:
resample_rate: 16000
shuffle: true
shuffle_conf:
shuffle_size: 1000
sort: false
sort_conf:
sort_size: 500
spec_aug: true
spec_aug_conf:
max_f: 10
max_t: 50
num_f_mask: 2
num_t_mask: 2
speed_perturb: true
tasks:
- gender
- emotion
- dialect
- age
dtype: fp16
encoder: chunkformer
encoder_conf:
activation_type: swish
attention_dropout_rate: 0.1
attention_heads: 4
cnn_module_kernel: 15
cnn_module_norm: layer_norm
dropout_rate: 0.1
dynamic_chunk_sizes:
- -1
- -1
- 64
- 128
- 256
dynamic_conv: true
dynamic_left_context_sizes:
- 64
- 128
- 256
dynamic_right_context_sizes:
- 64
- 128
- 256
input_layer: dw_striding
linear_units: 2048
normalize_before: true
num_blocks: 12
output_size: 512
pos_enc_layer_type: chunk_rel_pos
positional_dropout_rate: 0.1
selfattention_layer_type: chunk_rel_seflattn
use_cnn_module: true
grad_clip: 5.0
input_dim: 80
log_interval: 100
max_epoch: 100
model: classification
model_conf:
dropout_rate: 0.1
label_smoothing: 0.2
tasks:
age: 5
dialect: 5
emotion: 8
gender: 2
model_dir: exp/v1
optim: adamw
optim_conf:
lr: 0.001
save_states: model_only
scheduler: warmuplr
scheduler_conf:
warmup_steps: 5000
train_engine: torch_ddp
use_amp: true