DavidNguyen commited on
Commit
e53e0dc
·
verified ·
1 Parent(s): 80b10fe

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +204 -0
config.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/pft",
3
+ "architectures": [
4
+ "LlavaPhiForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "auto_map": {
9
+ "AutoConfig": "configuration_phi3.Phi3Config",
10
+ "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
11
+ },
12
+ "bal_comp_loss_coef": 0.01,
13
+ "balance_loss_coef": 0.01,
14
+ "bos_token_id": 1,
15
+ "clip_smoe": true,
16
+ "diversity_loss_coef": 0.01,
17
+ "dropout": false,
18
+ "e_loss_coef": 0.001,
19
+ "embd_pdrop": 0.0,
20
+ "entropy_advance_loss": false,
21
+ "eos_token_id": 32000,
22
+ "freeze_backbone": false,
23
+ "freeze_mm_mlp_adapter": false,
24
+ "hidden_act": "silu",
25
+ "hidden_size": 3072,
26
+ "hybrid": false,
27
+ "image_aspect_ratio": "pad",
28
+ "init_weight": true,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8192,
31
+ "is_cosine": false,
32
+ "is_norm_weight": false,
33
+ "local_rank": 0,
34
+ "loss1": "balanceloss",
35
+ "loss2": "zloss",
36
+ "luna": false,
37
+ "max_compete_in_iter": 3,
38
+ "max_position_embeddings": 131072,
39
+ "mlp_norm": false,
40
+ "mlp_smoe": true,
41
+ "mm_hidden_size": 1152,
42
+ "mm_patch_merge_type": "flat",
43
+ "mm_projector_lr": null,
44
+ "mm_projector_type": "moe",
45
+ "mm_use_im_patch_token": false,
46
+ "mm_use_im_start_end": false,
47
+ "mm_vision_select_feature": "patch",
48
+ "mm_vision_select_layer": -2,
49
+ "mm_vision_tower": "google/siglip-so400m-patch14-224",
50
+ "model_name_or_path": "/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/pft",
51
+ "model_type": "llava_phi",
52
+ "moe_name": "smoe_share",
53
+ "moe_relu_l1_reg_coeff_multiplier": 1.2,
54
+ "mp_pixel_shuffle_factor": 1,
55
+ "norm_softmax": false,
56
+ "normalization": true,
57
+ "num_attention_heads": 32,
58
+ "num_experts": 6,
59
+ "num_hidden_layers": 32,
60
+ "num_key_value_heads": 32,
61
+ "num_layers": 3,
62
+ "num_selected": 3,
63
+ "number_of_previous_tokens": 2,
64
+ "original_max_position_embeddings": 4096,
65
+ "pad_token_id": 32000,
66
+ "pretrain_mm_mlp_adapter": "/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/pft/mm_projector.bin",
67
+ "rate_compete": 0.2,
68
+ "rate_flip": 0.05,
69
+ "resid_pdrop": 0.0,
70
+ "rms_norm_eps": 1e-05,
71
+ "rope_scaling": {
72
+ "long_factor": [
73
+ 1.0800000429153442,
74
+ 1.1100000143051147,
75
+ 1.1399999856948853,
76
+ 1.340000033378601,
77
+ 1.5899999141693115,
78
+ 1.600000023841858,
79
+ 1.6200000047683716,
80
+ 2.620000123977661,
81
+ 3.2300000190734863,
82
+ 3.2300000190734863,
83
+ 4.789999961853027,
84
+ 7.400000095367432,
85
+ 7.700000286102295,
86
+ 9.09000015258789,
87
+ 12.199999809265137,
88
+ 17.670000076293945,
89
+ 24.46000099182129,
90
+ 28.57000160217285,
91
+ 30.420001983642578,
92
+ 30.840002059936523,
93
+ 32.590003967285156,
94
+ 32.93000411987305,
95
+ 42.320003509521484,
96
+ 44.96000289916992,
97
+ 50.340003967285156,
98
+ 50.45000457763672,
99
+ 57.55000305175781,
100
+ 57.93000411987305,
101
+ 58.21000289916992,
102
+ 60.1400032043457,
103
+ 62.61000442504883,
104
+ 62.62000274658203,
105
+ 62.71000289916992,
106
+ 63.1400032043457,
107
+ 63.1400032043457,
108
+ 63.77000427246094,
109
+ 63.93000411987305,
110
+ 63.96000289916992,
111
+ 63.970001220703125,
112
+ 64.02999877929688,
113
+ 64.06999969482422,
114
+ 64.08000183105469,
115
+ 64.12000274658203,
116
+ 64.41000366210938,
117
+ 64.4800033569336,
118
+ 64.51000213623047,
119
+ 64.52999877929688,
120
+ 64.83999633789062
121
+ ],
122
+ "short_factor": [
123
+ 1.0,
124
+ 1.0199999809265137,
125
+ 1.0299999713897705,
126
+ 1.0299999713897705,
127
+ 1.0499999523162842,
128
+ 1.0499999523162842,
129
+ 1.0499999523162842,
130
+ 1.0499999523162842,
131
+ 1.0499999523162842,
132
+ 1.0699999332427979,
133
+ 1.0999999046325684,
134
+ 1.1099998950958252,
135
+ 1.1599998474121094,
136
+ 1.1599998474121094,
137
+ 1.1699998378753662,
138
+ 1.2899998426437378,
139
+ 1.339999794960022,
140
+ 1.679999828338623,
141
+ 1.7899998426437378,
142
+ 1.8199998140335083,
143
+ 1.8499997854232788,
144
+ 1.8799997568130493,
145
+ 1.9099997282028198,
146
+ 1.9399996995925903,
147
+ 1.9899996519088745,
148
+ 2.0199997425079346,
149
+ 2.0199997425079346,
150
+ 2.0199997425079346,
151
+ 2.0199997425079346,
152
+ 2.0199997425079346,
153
+ 2.0199997425079346,
154
+ 2.0299997329711914,
155
+ 2.0299997329711914,
156
+ 2.0299997329711914,
157
+ 2.0299997329711914,
158
+ 2.0299997329711914,
159
+ 2.0299997329711914,
160
+ 2.0299997329711914,
161
+ 2.0299997329711914,
162
+ 2.0299997329711914,
163
+ 2.0799996852874756,
164
+ 2.0899996757507324,
165
+ 2.189999580383301,
166
+ 2.2199995517730713,
167
+ 2.5899994373321533,
168
+ 2.729999542236328,
169
+ 2.749999523162842,
170
+ 2.8399994373321533
171
+ ],
172
+ "type": "longrope"
173
+ },
174
+ "rope_theta": 10000.0,
175
+ "router_loss_coef": 0.01,
176
+ "router_theta": 0.1,
177
+ "router_z_loss_coef": 0.001,
178
+ "scales": [
179
+ 1,
180
+ 3
181
+ ],
182
+ "sliding_window": 262144,
183
+ "sparse_upcycling": true,
184
+ "std_gate": 0.02,
185
+ "strategy_train": "base",
186
+ "tie_word_embeddings": false,
187
+ "tokenizer_model_max_length": 2048,
188
+ "tokenizer_padding_side": "right",
189
+ "topk_max": 2,
190
+ "topk_min": 1,
191
+ "torch_dtype": "bfloat16",
192
+ "training": true,
193
+ "transformers_version": "4.43.0",
194
+ "tune_mm_mlp_adapter": false,
195
+ "unit_test": true,
196
+ "use_cache": true,
197
+ "use_mm_proj": true,
198
+ "use_old": false,
199
+ "version": "phi35",
200
+ "vision_tower": "google/siglip-so400m-patch14-224",
201
+ "vision_tower_dir": "/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/pft/clip.bin",
202
+ "vocab_size": 32064,
203
+ "warm_up": 0.05
204
+ }