manaestras commited on
Commit
3aa00f1
·
verified ·
1 Parent(s): 6c238ab

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +15 -95
config.json CHANGED
@@ -1,8 +1,4 @@
1
  {
2
- "add_classification_head": false,
3
- "anyres_pooling_size": 2,
4
- "anyres_vit_max_image_size": 2048,
5
- "anyres_vit_two_views": false,
6
  "architectures": [
7
  "HunYuanVLForConditionalGeneration"
8
  ],
@@ -10,59 +6,29 @@
10
  "attention_dropout": 0.0,
11
  "attention_head_dim": 128,
12
  "bos_token_id": 120000,
13
- "cla_share_factor": 2,
14
- "class_num": 0,
15
- "dense_list": [
16
- 1024,
17
- 0
18
- ],
19
  "eod_token_id": 120020,
20
  "eos_token_id": 120020,
21
- "group_limited_greedy": false,
22
  "head_dim": 128,
23
  "hidden_act": "silu",
24
  "hidden_size": 1024,
25
- "im_end_id": 120119,
26
- "im_newline_id": 120121,
27
- "im_start_id": 120118,
28
  "image_token_id": 120120,
 
29
  "initializer_range": 0.02,
30
  "intermediate_size": 3584,
31
- "kv_lora_rank": null,
32
- "mask_init_id": 13,
33
  "max_position_embeddings": 32768,
34
- "tie_word_embeddings": true,
35
  "mlp_bias": false,
36
  "model_type": "hunyuan_vl",
37
- "moe_drop_tokens": false,
38
- "moe_intermediate_size": null,
39
- "moe_layer_num_skipped": 0,
40
- "moe_random_routing_dropped_token": false,
41
- "moe_topk": null,
42
- "mtp_loss_factor": 0.3,
43
- "mtp_moe_layer_num_skipped": 0,
44
- "mtp_no_bias": false,
45
- "mtp_type": "moe",
46
- "n_group": null,
47
- "norm_topk_prob": true,
48
  "norm_type": "rms",
49
  "num_attention_heads": 16,
50
  "num_experts": 1,
51
  "num_hidden_layers": 24,
52
  "num_key_value_heads": 8,
53
- "num_media_embeds": 257,
54
- "num_nextn_predict_layers": 1,
55
- "num_predictor_layers": 1,
56
- "num_shared_expert": null,
57
  "org_vocab_size": 120818,
58
  "pad_id": 120002,
59
- "pad_token_id": 120002,
60
- "pool_type": "last",
61
- "position_embedding_xdrope": true,
62
  "pretraining_tp": 1,
63
- "q_lora_rank": null,
64
- "qk_nope_head_dim": null,
65
- "qk_rope_head_dim": null,
66
  "rms_norm_eps": 1e-05,
67
  "rope_scaling": {
68
  "alpha": 1000.0,
@@ -71,90 +37,44 @@
71
  "factor": 1.0,
72
  "mscale": 1.0,
73
  "mscale_all_dim": 1.0,
74
- "type": "xdrope"
 
 
 
 
 
 
75
  },
76
  "rope_theta": 10000.0,
77
  "routed_scaling_factor": 1.0,
78
  "sep_token_id": 0,
79
- "skip_cls_token": true,
80
  "text_end_id": 8,
81
  "text_start_id": 7,
82
  "tie_word_embeddings": true,
83
- "topk_group": null,
84
- "torch_dtype": "bfloat16",
85
  "transformers_version": "4.49.0",
86
  "use_cache": true,
87
- "use_cla": false,
88
- "use_mixed_mlp_moe": false,
89
- "use_mla": false,
90
  "use_qk_norm": true,
91
- "use_rotary_pos_emb": true,
92
- "v_head_dim": null,
93
- "video_end_id": 120123,
94
- "video_start_id": 120122,
95
  "vision_config": {
96
- "adaptor_patch_size": 2,
97
  "add_patchemb_bias": true,
98
  "attention_dropout": 0.0,
99
  "cat_extra_token": 1,
100
- "fuse_vit_feature": 0,
101
  "hidden_act": "gelu",
102
  "hidden_dropout": 0.0,
103
  "hidden_size": 1152,
104
  "img_max_token_num": 4096,
105
  "intermediate_size": 4304,
106
  "interpolate_mode": "bilinear",
107
- "layer_norm_eps": 1e-05,
108
  "max_image_size": 2048,
109
  "max_vit_seq_len": 16384,
110
- "num_key_value_heads": 16,
111
  "num_attention_heads": 16,
112
  "num_channels": 3,
113
  "num_hidden_layers": 27,
114
  "out_hidden_size": 1024,
115
  "patch_size": 16,
116
- "use_qk_norm": false,
117
- "attention_bias": true,
118
  "rms_norm_eps": 1e-05,
119
- "xdrope_section": [
120
- 0.25,
121
- 0.25,
122
- 0.25,
123
- 0.25
124
- ],
125
- "use_rotary_pos_emb": false,
126
- "position_embedding_xdrope": false,
127
- "max_position_embeddings": 262144,
128
- "rope_theta": 10000.0,
129
- "mlp_bias": true,
130
- "norm_type": "torch_nn",
131
- "anyres_pooling_size": 2,
132
- "anyres_vit_max_image_size": 2048,
133
- "anyres_vit_two_views": false,
134
- "perceive_pre_norm": true,
135
- "perceive_post_norm": true,
136
- "poolmlp": 0,
137
- "remove_prenorm": true,
138
- "two_view": true,
139
- "use_normalize_pool": false,
140
- "use_fusion_block": false,
141
- "use_layernorm": true
142
  },
143
- "vit_add_patchemb_bias": true,
144
- "vit_input_resolution": 224,
145
- "vit_mapping_type": "simple_conv_mlp",
146
- "vit_norm_type": "fused",
147
- "vit_patch": 1,
148
- "vit_path": null,
149
- "vit_remove_prenorm": true,
150
- "vit_token": 58,
151
- "vit_type": "AnyResVit-tp",
152
- "vit_used_rms_norm": true,
153
- "vocab_size": 120818,
154
- "xdrope_section": [
155
- 0.25,
156
- 0.25,
157
- 0.25,
158
- 0.25
159
- ]
160
  }
 
1
  {
 
 
 
 
2
  "architectures": [
3
  "HunYuanVLForConditionalGeneration"
4
  ],
 
6
  "attention_dropout": 0.0,
7
  "attention_head_dim": 128,
8
  "bos_token_id": 120000,
 
 
 
 
 
 
9
  "eod_token_id": 120020,
10
  "eos_token_id": 120020,
 
11
  "head_dim": 128,
12
  "hidden_act": "silu",
13
  "hidden_size": 1024,
14
+ "image_start_token_id": 120118,
15
+ "image_end_token_id": 120119,
 
16
  "image_token_id": 120120,
17
+ "image_newline_token_id": 120121,
18
  "initializer_range": 0.02,
19
  "intermediate_size": 3584,
 
 
20
  "max_position_embeddings": 32768,
 
21
  "mlp_bias": false,
22
  "model_type": "hunyuan_vl",
 
 
 
 
 
 
 
 
 
 
 
23
  "norm_type": "rms",
24
  "num_attention_heads": 16,
25
  "num_experts": 1,
26
  "num_hidden_layers": 24,
27
  "num_key_value_heads": 8,
 
 
 
 
28
  "org_vocab_size": 120818,
29
  "pad_id": 120002,
30
+ "pad_token_id": -1,
 
 
31
  "pretraining_tp": 1,
 
 
 
32
  "rms_norm_eps": 1e-05,
33
  "rope_scaling": {
34
  "alpha": 1000.0,
 
37
  "factor": 1.0,
38
  "mscale": 1.0,
39
  "mscale_all_dim": 1.0,
40
+ "type": "xdrope",
41
+ "xdrope_section": [
42
+ 16,
43
+ 16,
44
+ 16,
45
+ 16
46
+ ]
47
  },
48
  "rope_theta": 10000.0,
49
  "routed_scaling_factor": 1.0,
50
  "sep_token_id": 0,
 
51
  "text_end_id": 8,
52
  "text_start_id": 7,
53
  "tie_word_embeddings": true,
54
+ "dtype": "bfloat16",
 
55
  "transformers_version": "4.49.0",
56
  "use_cache": true,
 
 
 
57
  "use_qk_norm": true,
58
+ "use_cla": false,
 
 
 
59
  "vision_config": {
 
60
  "add_patchemb_bias": true,
61
  "attention_dropout": 0.0,
62
  "cat_extra_token": 1,
 
63
  "hidden_act": "gelu",
64
  "hidden_dropout": 0.0,
65
  "hidden_size": 1152,
66
  "img_max_token_num": 4096,
67
  "intermediate_size": 4304,
68
  "interpolate_mode": "bilinear",
 
69
  "max_image_size": 2048,
70
  "max_vit_seq_len": 16384,
 
71
  "num_attention_heads": 16,
72
  "num_channels": 3,
73
  "num_hidden_layers": 27,
74
  "out_hidden_size": 1024,
75
  "patch_size": 16,
 
 
76
  "rms_norm_eps": 1e-05,
77
+ "spatial_merge_size": 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  },
79
+ "vocab_size": 120818
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  }