SajilAwale commited on
Commit
6b25024
·
verified ·
1 Parent(s): 698d29f

Added INDUS-SDE-GKR

Browse files
Files changed (3) hide show
  1. config.json +0 -0
  2. model_architecture.txt +51 -0
  3. training_args.bin +3 -0
config.json ADDED
The diff for this file is too large to render. See raw diff
 
model_architecture.txt ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ModernBertForSequenceClassification(
2
+ (model): ModernBertModel(
3
+ (embeddings): ModernBertEmbeddings(
4
+ (tok_embeddings): Embedding(50368, 768, padding_idx=50283)
5
+ (norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
6
+ (drop): Dropout(p=0.0, inplace=False)
7
+ )
8
+ (layers): ModuleList(
9
+ (0): ModernBertEncoderLayer(
10
+ (attn_norm): Identity()
11
+ (attn): ModernBertAttention(
12
+ (Wqkv): Linear(in_features=768, out_features=2304, bias=False)
13
+ (rotary_emb): ModernBertRotaryEmbedding()
14
+ (Wo): Linear(in_features=768, out_features=768, bias=False)
15
+ (out_drop): Identity()
16
+ )
17
+ (mlp_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
18
+ (mlp): ModernBertMLP(
19
+ (Wi): Linear(in_features=768, out_features=2304, bias=False)
20
+ (act): GELUActivation()
21
+ (drop): Dropout(p=0.0, inplace=False)
22
+ (Wo): Linear(in_features=1152, out_features=768, bias=False)
23
+ )
24
+ )
25
+ (1-21): 21 x ModernBertEncoderLayer(
26
+ (attn_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
27
+ (attn): ModernBertAttention(
28
+ (Wqkv): Linear(in_features=768, out_features=2304, bias=False)
29
+ (rotary_emb): ModernBertRotaryEmbedding()
30
+ (Wo): Linear(in_features=768, out_features=768, bias=False)
31
+ (out_drop): Identity()
32
+ )
33
+ (mlp_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
34
+ (mlp): ModernBertMLP(
35
+ (Wi): Linear(in_features=768, out_features=2304, bias=False)
36
+ (act): GELUActivation()
37
+ (drop): Dropout(p=0.0, inplace=False)
38
+ (Wo): Linear(in_features=1152, out_features=768, bias=False)
39
+ )
40
+ )
41
+ )
42
+ (final_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
43
+ )
44
+ (head): ModernBertPredictionHead(
45
+ (dense): Linear(in_features=768, out_features=768, bias=False)
46
+ (act): GELUActivation()
47
+ (norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
48
+ )
49
+ (drop): Dropout(p=0.0, inplace=False)
50
+ (classifier): Linear(in_features=768, out_features=3240, bias=True)
51
+ )
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e148719ab875ff00516bfe4758c0d886ccf1c4590ce8ab8c356272ac9057110
3
+ size 5432