Spaces:

Thanh-Lam
/

vietnamese-speaker-profiling-v2

Sleeping

Vietnamese Speaker Profiling with wav2vec2-base-vi-vlsp2020

c3418e9 9 days ago

695 Bytes

	# Inference Configuration

	# Model
	model:
	checkpoint: "model/vulehuubinh"
	name: "nguyenvulebinh/wav2vec2-base-vi-vlsp2020"
	head_hidden_dim: 256

	# Audio Processing
	audio:
	sampling_rate: 16000
	max_duration: 5

	# Inference
	inference:
	batch_size: 1
	device: "cuda"

	# Input
	input:
	audio_path: null
	audio_dir: null

	# Output
	output:
	dir: "output/predictions"
	save_results: true
	format: "json"

	# Label Mappings
	# NOTE: Model was trained with Female=0, Male=1 (opposite of finetune.yaml order)
	# This is because pandas .map() may have processed labels in different order
	labels:
	gender:
	0: "Female"
	1: "Male"
	dialect:
	0: "North"
	1: "Central"
	2: "South"