{ "architectures": [ "VocosModel" ], "hidden_dim": 512, "hop_length": 256, "input_channels": 100, "intermediate_dim": 1536, "kernel_size": 7, "layer_norm_eps": 1e-06, "layer_scale_init_value": 0.125, "model_type": "vocos", "n_fft": 1024, "num_layers": 8, "padding": 3, "spec_padding": "center", "torch_dtype": "float32", "transformers_version": "4.56.0.dev0", "use_adaptive_norm": false }