# Inference Configuration # Model model: checkpoint: "model/vulehuubinh" name: "nguyenvulebinh/wav2vec2-base-vi-vlsp2020" head_hidden_dim: 256 # Audio Processing audio: sampling_rate: 16000 max_duration: 5 # Inference inference: batch_size: 1 device: "cuda" # Input input: audio_path: null audio_dir: null # Output output: dir: "output/predictions" save_results: true format: "json" # Label Mappings # NOTE: Model was trained with Female=0, Male=1 (opposite of finetune.yaml order) # This is because pandas .map() may have processed labels in different order labels: gender: 0: "Female" 1: "Male" dialect: 0: "North" 1: "Central" 2: "South"