| { | |
| "_class_name": "ACEStepTransformer2DModel", | |
| "_diffusers_version": "0.32.2", | |
| "attention_head_dim": 128, | |
| "in_channels": 8, | |
| "inner_dim": 2560, | |
| "lyric_encoder_vocab_size": 6693, | |
| "lyric_hidden_size": 1024, | |
| "max_height": 16, | |
| "max_position": 32768, | |
| "max_width": 32768, | |
| "mlp_ratio": 2.5, | |
| "num_attention_heads": 20, | |
| "num_layers": 24, | |
| "out_channels": 8, | |
| "patch_size": [ | |
| 16, | |
| 1 | |
| ], | |
| "rope_theta": 1000000.0, | |
| "speaker_embedding_dim": 512, | |
| "ssl_encoder_depths": [ | |
| 8, | |
| 8 | |
| ], | |
| "ssl_latent_dims": [ | |
| 1024, | |
| 768 | |
| ], | |
| "ssl_names": [ | |
| "mert", | |
| "m-hubert" | |
| ], | |
| "text_embedding_dim": 768 | |
| } | |