| model: | |
| llama_path: "meta-llama/Meta-Llama-3.1-8B-Instruct" | |
| freeze_beats: True | |
| use_audio_Qformer: True | |
| max_pooling: False | |
| downsample_factor: 8 | |
| freeze_audio_QFormer: False | |
| window_level_Qformer: True | |
| num_audio_query_token: 1 | |
| second_per_window: 0.333333 | |
| second_stride: 0.333333 | |
| audio_llama_proj_model: "" | |
| freeze_audio_llama_proj: False | |
| lora: True | |
| lora_rank: 32 | |
| lora_alpha: 32 | |
| lora_dropout: 0.1 | |
| prompt_template: "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" | |
| max_txt_len: 160 | |
| end_sym: <|end_of_text|> | |
| beats_cfg: | |
| input_patch_size: 16 | |
| embed_dim: 512 | |
| conv_bias: False | |
| encoder_layers: 12 | |
| encoder_embed_dim: 768 | |
| encoder_ffn_embed_dim: 3072 | |
| encoder_attention_heads: 12 | |
| activation_fn: "gelu" | |
| layer_wise_gradient_decay_ratio: 0.6 | |
| layer_norm_first: False | |
| deep_norm: True | |
| dropout: 0.0 | |
| attention_dropout: 0.0 | |
| activation_dropout: 0.0 | |
| encoder_layerdrop: 0.05 | |
| dropout_input: 0.0 | |
| conv_pos: 128 | |
| conv_pos_groups: 16 | |
| relative_position_embedding: True | |
| num_buckets: 320 | |
| max_distance: 800 | |
| gru_rel_pos: True | |
| finetuned_model: True | |
| predictor_dropout: 0.0 | |
| predictor_class: 527 | |
| generate: | |
| max_new_tokens: 300 | |
| num_beams: 2 | |
| do_sample: False | |
| min_length: 1 | |
| temperature: 0.1 | |
| repetition_penalty: 1.0 | |
| length_penalty: 1.0 | |