| { | |
| "_class_name": "Transformer2DModel", | |
| "_diffusers_version": "0.27.2", | |
| "in_channels": 8, | |
| "num_layers": 24, | |
| "inner_dim": 2560, | |
| "attention_head_dim": 128, | |
| "num_attention_heads": 20, | |
| "mlp_ratio": 2.5, | |
| "out_channels": 8, | |
| "max_position": 32768, | |
| "rope_theta": 1000000.0, | |
| "speaker_embedding_dim": 512, | |
| "text_embedding_dim": 768, | |
| "ssl_encoder_depths": [8, 8], | |
| "ssl_names": ["mert", "m-hubert"], | |
| "ssl_latent_dims": [1024, 768], | |
| "patch_size": [16, 1], | |
| "max_height": 16, | |
| "max_width": 32768, | |
| "lyric_encoder_vocab_size": 6693, | |
| "lyric_hidden_size": 1024 | |
| } | |