zharer commited on
Commit
81a93df
·
verified ·
1 Parent(s): 97fdfb6

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -5,7 +5,7 @@
5
  "params": {
6
  "depth": 2,
7
  "input_dim": 1024,
8
- "n_embed": 4096,
9
  "projector_type": "mlp_gelu"
10
  }
11
  },
@@ -15,7 +15,7 @@
15
  "params": {
16
  "depth": 2,
17
  "input_dim": 8,
18
- "n_embed": 4096,
19
  "projector_type": "mlp_gelu"
20
  }
21
  },
@@ -23,9 +23,9 @@
23
  "cls": "vision_head",
24
  "model_type": "gen_head",
25
  "params": {
26
- "image_token_embed": 4096,
27
  "image_token_size": 16384,
28
- "n_embed": 4096
29
  }
30
  },
31
  "gen_vision_config": {
@@ -43,9 +43,27 @@
43
  "torch_dtype": "bfloat16",
44
  "vocab_size": 102400
45
  },
46
- "model_type": "janus",
47
  "torch_dtype": "float16",
48
- "transformers_version": "4.33.1",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  "vision_config": {
50
  "cls": "CLIPVisionTower",
51
  "model_type": "vision",
@@ -57,10 +75,8 @@
57
  }
58
  },
59
  "architectures": [
60
- "JanusForConditionalGeneration"
61
  ],
62
  "use_cache": true,
63
- "webgpu_compatible": true,
64
- "quantization": "q4f16",
65
- "pipeline_tag": "text-to-image"
66
  }
 
5
  "params": {
6
  "depth": 2,
7
  "input_dim": 1024,
8
+ "n_embed": 2048,
9
  "projector_type": "mlp_gelu"
10
  }
11
  },
 
15
  "params": {
16
  "depth": 2,
17
  "input_dim": 8,
18
+ "n_embed": 2048,
19
  "projector_type": "mlp_gelu"
20
  }
21
  },
 
23
  "cls": "vision_head",
24
  "model_type": "gen_head",
25
  "params": {
26
+ "image_token_embed": 2048,
27
  "image_token_size": 16384,
28
+ "n_embed": 2048
29
  }
30
  },
31
  "gen_vision_config": {
 
43
  "torch_dtype": "bfloat16",
44
  "vocab_size": 102400
45
  },
46
+ "model_type": "multi_modality",
47
  "torch_dtype": "float16",
48
+ "transformers_version": "4.36.0",
49
+ "transformers.js_config": {
50
+ "kv_cache_dtype": {
51
+ "fp16": "float16",
52
+ "q4f16": "float16"
53
+ },
54
+ "dtype": {
55
+ "prepare_inputs_embeds": "fp32",
56
+ "language_model": "q4",
57
+ "lm_head": "fp32",
58
+ "gen_head": "fp32",
59
+ "gen_img_embeds": "fp32",
60
+ "image_decode": "fp32"
61
+ },
62
+ "use_external_data_format": {
63
+ "language_model.onnx": true,
64
+ "language_model_fp16.onnx": true
65
+ }
66
+ },
67
  "vision_config": {
68
  "cls": "CLIPVisionTower",
69
  "model_type": "vision",
 
75
  }
76
  },
77
  "architectures": [
78
+ "MultiModalityCausalLM"
79
  ],
80
  "use_cache": true,
81
+ "_name_or_path": "janus-pro-7b-webgpu-working"
 
 
82
  }
generation_config.json CHANGED
@@ -1,12 +1,9 @@
1
  {
2
  "bos_token_id": 100000,
3
  "eos_token_id": 100001,
4
- "pad_token_id": 151643,
5
- "max_length": 2048,
6
- "max_new_tokens": 1024,
7
  "do_sample": true,
 
 
8
  "temperature": 0.7,
9
- "top_p": 0.9,
10
- "use_cache": true,
11
- "num_image_tokens": 576
12
  }
 
1
  {
2
  "bos_token_id": 100000,
3
  "eos_token_id": 100001,
 
 
 
4
  "do_sample": true,
5
+ "num_image_tokens": 576,
6
+ "pad_token_id": 100015,
7
  "temperature": 0.7,
8
+ "top_p": 0.95
 
 
9
  }
model_index.json CHANGED
@@ -1,20 +1,32 @@
1
  {
2
- "model_name": "Janus-Pro-7B",
3
- "model_type": "multimodal",
4
- "architecture": "janus",
5
  "format": "onnx",
6
- "quantization": "q4f16",
7
  "device": "webgpu",
8
- "total_size_mb": 4935.2,
9
- "components_count": 6,
10
- "status": "complete",
11
- "capabilities": [
12
- "text-to-image",
13
- "image-to-text",
14
- "multimodal-chat"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  ],
16
- "transformers_js_compatible": true,
17
- "webgpu_optimized": true,
18
- "export_date": "2025-09-27",
19
- "notes": "All 6 components successfully exported for WebGPU deployment"
20
  }
 
1
  {
2
+ "model_type": "multi_modality",
3
+ "pipeline_tag": "image-to-text",
4
+ "framework": "transformers.js",
5
  "format": "onnx",
 
6
  "device": "webgpu",
7
+ "total_size_mb": 4743.1,
8
+ "onnx_files": [
9
+ {
10
+ "name": "decoder.onnx",
11
+ "size_mb": 320.1
12
+ },
13
+ {
14
+ "name": "embed_tokens.onnx",
15
+ "size_mb": 64.0
16
+ },
17
+ {
18
+ "name": "encoder.onnx",
19
+ "size_mb": 1159.0
20
+ },
21
+ {
22
+ "name": "lm_head.onnx",
23
+ "size_mb": 1600.0
24
+ },
25
+ {
26
+ "name": "prepare_inputs_embeds.onnx",
27
+ "size_mb": 1600.0
28
+ }
29
  ],
30
+ "based_on": "onnx-community/Janus-1.3B-ONNX (working structure)",
31
+ "scaled_to": "Janus-Pro-7B"
 
 
32
  }
onnx/decoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22eb336ab33e4d0b9ee5c09c2d3906682c4aeff408a189a2eb45077ff3f1de30
3
+ size 335628308
onnx/embed_tokens.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b94e5d5ccaf0a02a804dbcb35c7f0d0b57f22fd30b1f187c2ec797fb34df6c5b
3
- size 1677721872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:184a1b1eb70e501a8a5831e7ab5193364f973a4d9946dc12caf410cf85294f13
3
+ size 67125624
onnx/encoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c17e3ed46cb81ac30d8cf1c9c6c723afc77108a52a63df4c6b74e7c1dd8431c
3
+ size 1215274800
processor_config.json CHANGED
@@ -2,6 +2,8 @@
2
  "add_special_token": false,
3
  "ignore_id": -100,
4
  "image_tag": "<image_placeholder>",
 
 
5
  "mask_prompt": true,
6
  "num_image_tokens": 576,
7
  "processor_class": "VLChatProcessor",
 
2
  "add_special_token": false,
3
  "ignore_id": -100,
4
  "image_tag": "<image_placeholder>",
5
+ "image_start_tag": "<begin_of_image>",
6
+ "image_end_tag": "<end_of_image>",
7
  "mask_prompt": true,
8
  "num_image_tokens": 576,
9
  "processor_class": "VLChatProcessor",