accum_grad: 1
cmvn: global_cmvn
cmvn_conf:
  cmvn_file: data/train_hf/global_cmvn
  is_json_cmvn: true
dataset: classification
dataset_conf:
  batch_conf:
    batch_size: 4
    batch_type: dynamic
    max_frames_in_batch: 80000
    pad_feat: true
  fbank_conf:
    dither: 1.0
    frame_length: 25
    frame_shift: 10
    num_mel_bins: 80
  filter_conf:
    max_length: 40960
    min_length: 0
  resample_conf:
    resample_rate: 16000
  shuffle: true
  shuffle_conf:
    shuffle_size: 1000
  sort: false
  sort_conf:
    sort_size: 500
  spec_aug: true
  spec_aug_conf:
    max_f: 10
    max_t: 50
    num_f_mask: 2
    num_t_mask: 2
  speed_perturb: true
  tasks:
  - gender
  - emotion
  - dialect
  - age
dtype: fp16
encoder: chunkformer
encoder_conf:
  activation_type: swish
  attention_dropout_rate: 0.1
  attention_heads: 4
  cnn_module_kernel: 15
  cnn_module_norm: layer_norm
  dropout_rate: 0.1
  dynamic_chunk_sizes:
  - -1
  - -1
  - 64
  - 128
  - 256
  dynamic_conv: true
  dynamic_left_context_sizes:
  - 64
  - 128
  - 256
  dynamic_right_context_sizes:
  - 64
  - 128
  - 256
  input_layer: dw_striding
  linear_units: 2048
  normalize_before: true
  num_blocks: 12
  output_size: 512
  pos_enc_layer_type: chunk_rel_pos
  positional_dropout_rate: 0.1
  selfattention_layer_type: chunk_rel_seflattn
  use_cnn_module: true
grad_clip: 5.0
input_dim: 80
log_interval: 100
max_epoch: 100
model: classification
model_conf:
  dropout_rate: 0.1
  label_smoothing: 0.2
  tasks:
    age: 5
    dialect: 5
    emotion: 8
    gender: 2
model_dir: exp/v1
optim: adamw
optim_conf:
  lr: 0.001
save_states: model_only
scheduler: warmuplr
scheduler_conf:
  warmup_steps: 5000
train_engine: torch_ddp
use_amp: true