name: mosaicfm d_model: 2560 n_layers: 32 init_device: cpu expansion_ratio: 4 standard_scale_outputs: false transformer_activation: gelu n_heads: 20 norm_scheme: pre use_generative_training: false use_cell_conditioned_generation: false use_glu: false cell_emb_style: cls attn_config: attn_impl: flash use_attn_mask: false attn_type: grouped_query_attention kv_nheads: 20 attn_pdrop: 0.0 norm_config: norm_type: layernorm eps: 1.0e-05 expression_encoder: input_emb_style: continuous dropout: 0.1 max_value: 512 activation: gelu use_norm: true gene_encoder: use_norm: true mvc: arch_style: inner product query_activation: sigmoid scaled_dot_product: true expression_decoder: n_outputs: 1 n_layers: 1 activation: gelu chemical_encoder: num_drugs: 378 fp_dim: 2048 activation: gelu padding_idx: 0 freeze: false vocab_size: 62721 precision: amp_bf16 wandb_id: vevotx/vevo-MFM-v2/cdk744ih