science-keyword-classification / model_architecture.txt
SajilAwale's picture
Added INDUS-SDE-GKR
6b25024 verified
raw
history blame
2.14 kB
ModernBertForSequenceClassification(
(model): ModernBertModel(
(embeddings): ModernBertEmbeddings(
(tok_embeddings): Embedding(50368, 768, padding_idx=50283)
(norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(drop): Dropout(p=0.0, inplace=False)
)
(layers): ModuleList(
(0): ModernBertEncoderLayer(
(attn_norm): Identity()
(attn): ModernBertAttention(
(Wqkv): Linear(in_features=768, out_features=2304, bias=False)
(rotary_emb): ModernBertRotaryEmbedding()
(Wo): Linear(in_features=768, out_features=768, bias=False)
(out_drop): Identity()
)
(mlp_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): ModernBertMLP(
(Wi): Linear(in_features=768, out_features=2304, bias=False)
(act): GELUActivation()
(drop): Dropout(p=0.0, inplace=False)
(Wo): Linear(in_features=1152, out_features=768, bias=False)
)
)
(1-21): 21 x ModernBertEncoderLayer(
(attn_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): ModernBertAttention(
(Wqkv): Linear(in_features=768, out_features=2304, bias=False)
(rotary_emb): ModernBertRotaryEmbedding()
(Wo): Linear(in_features=768, out_features=768, bias=False)
(out_drop): Identity()
)
(mlp_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): ModernBertMLP(
(Wi): Linear(in_features=768, out_features=2304, bias=False)
(act): GELUActivation()
(drop): Dropout(p=0.0, inplace=False)
(Wo): Linear(in_features=1152, out_features=768, bias=False)
)
)
)
(final_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)
(head): ModernBertPredictionHead(
(dense): Linear(in_features=768, out_features=768, bias=False)
(act): GELUActivation()
(norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)
(drop): Dropout(p=0.0, inplace=False)
(classifier): Linear(in_features=768, out_features=3240, bias=True)
)