kunato commited on
Commit
6777f44
·
verified ·
1 Parent(s): 2625128

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ iter_0000001/.metadata filter=lfs diff=lfs merge=lfs -text
37
+ iter_0000001/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
38
+ iter_0000001/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
args.json ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "Qwen/Qwen3-4B-Instruct-2507",
3
+ "model_type": "qwen3_moe",
4
+ "model_revision": null,
5
+ "task_type": "causal_lm",
6
+ "torch_dtype": "bfloat16",
7
+ "attn_impl": null,
8
+ "new_special_tokens": [],
9
+ "num_labels": null,
10
+ "problem_type": null,
11
+ "rope_scaling": null,
12
+ "device_map": null,
13
+ "max_memory": {},
14
+ "max_model_len": null,
15
+ "local_repo_path": null,
16
+ "init_strategy": null,
17
+ "template": "qwen3",
18
+ "system": null,
19
+ "max_length": 2048,
20
+ "truncation_strategy": "delete",
21
+ "max_pixels": null,
22
+ "agent_template": null,
23
+ "norm_bbox": null,
24
+ "use_chat_template": true,
25
+ "padding_free": false,
26
+ "padding_side": "right",
27
+ "loss_scale": "default",
28
+ "sequence_parallel_size": 1,
29
+ "response_prefix": null,
30
+ "template_backend": "swift",
31
+ "dataset": [],
32
+ "val_dataset": [],
33
+ "split_dataset_ratio": 0.0,
34
+ "data_seed": 42,
35
+ "dataset_num_proc": 1,
36
+ "load_from_cache_file": true,
37
+ "dataset_shuffle": true,
38
+ "val_dataset_shuffle": false,
39
+ "streaming": false,
40
+ "interleave_prob": null,
41
+ "stopping_strategy": "first_exhausted",
42
+ "shuffle_buffer_size": 1000,
43
+ "download_mode": "reuse_dataset_if_exists",
44
+ "columns": {},
45
+ "strict": false,
46
+ "remove_unused_columns": true,
47
+ "model_name": null,
48
+ "model_author": null,
49
+ "custom_dataset_info": [],
50
+ "quant_method": null,
51
+ "quant_bits": null,
52
+ "hqq_axis": null,
53
+ "bnb_4bit_compute_dtype": "bfloat16",
54
+ "bnb_4bit_quant_type": "nf4",
55
+ "bnb_4bit_use_double_quant": true,
56
+ "bnb_4bit_quant_storage": null,
57
+ "max_new_tokens": null,
58
+ "temperature": null,
59
+ "top_k": null,
60
+ "top_p": null,
61
+ "repetition_penalty": null,
62
+ "num_beams": 1,
63
+ "stream": false,
64
+ "stop_words": [],
65
+ "logprobs": false,
66
+ "top_logprobs": null,
67
+ "ckpt_dir": null,
68
+ "lora_modules": [],
69
+ "tuner_backend": "peft",
70
+ "train_type": "lora",
71
+ "adapters": [],
72
+ "external_plugins": [],
73
+ "seed": 42,
74
+ "model_kwargs": {},
75
+ "load_args": true,
76
+ "load_data_args": false,
77
+ "packing": false,
78
+ "custom_register_path": [],
79
+ "use_hf": true,
80
+ "hub_token": null,
81
+ "ddp_timeout": 18000000,
82
+ "ddp_backend": null,
83
+ "ignore_args_error": false,
84
+ "use_swift_lora": false,
85
+ "merge_lora": false,
86
+ "safe_serialization": true,
87
+ "max_shard_size": "5GB",
88
+ "output_dir": "/data/workspace/kunato/ms-swift/Qwen3-4B-Instruct-2507-mcore",
89
+ "quant_n_samples": 256,
90
+ "quant_batch_size": 1,
91
+ "group_size": 128,
92
+ "to_ollama": false,
93
+ "to_mcore": true,
94
+ "to_hf": false,
95
+ "mcore_model": null,
96
+ "mcore_adapters": [],
97
+ "thread_count": 2,
98
+ "test_convert_precision": false,
99
+ "push_to_hub": false,
100
+ "hub_model_id": null,
101
+ "hub_private_repo": false,
102
+ "commit_message": "update files",
103
+ "to_peft_format": false,
104
+ "exist_ok": false,
105
+ "rank": 0,
106
+ "local_rank": 0,
107
+ "global_world_size": 1,
108
+ "local_world_size": 1,
109
+ "model_suffix": "Qwen3-4B-Instruct-2507",
110
+ "model_info": "ModelInfo(model_type='qwen3_moe', model_dir='/data/share/cache/huggingface/hub/models--Qwen--Qwen3-4B-Instruct-2507/snapshots/eb25fbe4f35f7147763bc24445679d1c00588d89', torch_dtype=torch.bfloat16, max_model_len=262144, quant_method=None, quant_bits=None, rope_scaling=None, is_moe_model=False, config=None, task_type='causal_lm', num_labels=None)",
111
+ "model_meta": "ModelMeta(model_type='qwen3_moe', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-30B-A3B-Base', hf_model_id='Qwen/Qwen3-30B-A3B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-30B-A3B', hf_model_id='Qwen/Qwen3-30B-A3B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-235B-A22B', hf_model_id='Qwen/Qwen3-235B-A22B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-30B-A3B-FP8', hf_model_id='Qwen/Qwen3-30B-A3B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-235B-A22B-FP8', hf_model_id='Qwen/Qwen3-235B-A22B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='swift/Qwen3-30B-A3B-AWQ', hf_model_id='cognitivecomputations/Qwen3-30B-A3B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='swift/Qwen3-235B-A22B-AWQ', hf_model_id='cognitivecomputations/Qwen3-235B-A22B-AWQ', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-30B-A3B-Instruct-2507', hf_model_id='Qwen/Qwen3-30B-A3B-Instruct-2507', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-30B-A3B-Instruct-2507-FP8', hf_model_id='Qwen/Qwen3-30B-A3B-Instruct-2507-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-235B-A22B-Instruct-2507', hf_model_id='Qwen/Qwen3-235B-A22B-Instruct-2507', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-235B-A22B-Instruct-2507-FP8', hf_model_id='Qwen/Qwen3-235B-A22B-Instruct-2507-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='swift/Qwen3-235B-A22B-Instruct-2507-AWQ', hf_model_id=None, model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Coder-480B-A35B-Instruct', hf_model_id='Qwen/Qwen3-Coder-480B-A35B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8', hf_model_id='Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='swift/Qwen3-Coder-480B-A35B-Instruct-AWQ', hf_model_id=None, model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=['coding'])], template='qwen3', get_function=<function get_model_tokenizer_with_flash_attn at 0x7f501cca3250>, model_arch=None, architectures=['Qwen3MoeForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.51'], tags=[])",
112
+ "model_dir": "/data/share/cache/huggingface/hub/models--Qwen--Qwen3-4B-Instruct-2507/snapshots/eb25fbe4f35f7147763bc24445679d1c00588d89",
113
+ "hub": "<class 'swift.hub.hub.HFHub'>"
114
+ }
iter_0000001/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18ad8769cc141b505dc5c8df1641276faaf8584d84af82643ccb781f650844c2
3
+ size 154431
iter_0000001/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:896790f566ca4999b5abd57db503f9c1fb5e8303a5d91f9c3589dd5f612f9b38
3
+ size 4013715547
iter_0000001/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b5b31afe54eb13137d7f0f995ac92290af786adec40ec5b73379cec05ef293c
3
+ size 4032124045
iter_0000001/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb2f26779d54a5e0c9ed9d9de6bb337bde6bc1018b371d3b2012edbfe20c03fc
3
+ size 19239
iter_0000001/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 1
runs/events.out.tfevents.1757992794.fc4f4ac1-02.cloud.together.ai.1284059.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11310db2aa2b4e532663b1fe8f0d2e4a081ec87708cfa61f11f8eb8fcc363d3a
3
+ size 88