khanhld
/

chunkformer-gender-emotion-dialect-age-classification

Audio Classification

speech-classification

speech-processing

Model card Files Files and versions

chunkformer-gender-emotion-dialect-age-classification / config.yaml

khanhld's picture

Upload ChunkFormer Classification Model

360da7f verified 20 days ago

history blame contribute delete

1.72 kB

	accum_grad: 1
	cmvn: global_cmvn
	cmvn_conf:
	cmvn_file: data/train_hf/global_cmvn
	is_json_cmvn: true
	dataset: classification
	dataset_conf:
	batch_conf:
	batch_size: 4
	batch_type: dynamic
	max_frames_in_batch: 80000
	pad_feat: true
	fbank_conf:
	dither: 1.0
	frame_length: 25
	frame_shift: 10
	num_mel_bins: 80
	filter_conf:
	max_length: 40960
	min_length: 0
	resample_conf:
	resample_rate: 16000
	shuffle: true
	shuffle_conf:
	shuffle_size: 1000
	sort: false
	sort_conf:
	sort_size: 500
	spec_aug: true
	spec_aug_conf:
	max_f: 10
	max_t: 50
	num_f_mask: 2
	num_t_mask: 2
	speed_perturb: true
	tasks:
	- gender
	- emotion
	- dialect
	- age
	dtype: fp16
	encoder: chunkformer
	encoder_conf:
	activation_type: swish
	attention_dropout_rate: 0.1
	attention_heads: 4
	cnn_module_kernel: 15
	cnn_module_norm: layer_norm
	dropout_rate: 0.1
	dynamic_chunk_sizes:
	- -1
	- -1
	- 64
	- 128
	- 256
	dynamic_conv: true
	dynamic_left_context_sizes:
	- 64
	- 128
	- 256
	dynamic_right_context_sizes:
	- 64
	- 128
	- 256
	input_layer: dw_striding
	linear_units: 2048
	normalize_before: true
	num_blocks: 12
	output_size: 512
	pos_enc_layer_type: chunk_rel_pos
	positional_dropout_rate: 0.1
	selfattention_layer_type: chunk_rel_seflattn
	use_cnn_module: true
	grad_clip: 5.0
	input_dim: 80
	log_interval: 100
	max_epoch: 100
	model: classification
	model_conf:
	dropout_rate: 0.1
	label_smoothing: 0.2
	tasks:
	age: 5
	dialect: 5
	emotion: 8
	gender: 2
	model_dir: exp/v1
	optim: adamw
	optim_conf:
	lr: 0.001
	save_states: model_only
	scheduler: warmuplr
	scheduler_conf:
	warmup_steps: 5000
	train_engine: torch_ddp
	use_amp: true