Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

.gitattributes +3 -0
args.json +114 -0
iter_0000001/.metadata +3 -0
iter_0000001/__0_0.distcp +3 -0
iter_0000001/__0_1.distcp +3 -0
iter_0000001/common.pt +3 -0
iter_0000001/metadata.json +1 -0
latest_checkpointed_iteration.txt +1 -0
runs/events.out.tfevents.1757992794.fc4f4ac1-02.cloud.together.ai.1284059.0 +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+iter_0000001/.metadata filter=lfs diff=lfs merge=lfs -text
+iter_0000001/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
+iter_0000001/__0_1.distcp filter=lfs diff=lfs merge=lfs -text

args.json ADDED Viewed

	@@ -0,0 +1,114 @@

+{
+  "model": "Qwen/Qwen3-4B-Instruct-2507",
+  "model_type": "qwen3_moe",
+  "model_revision": null,
+  "task_type": "causal_lm",
+  "torch_dtype": "bfloat16",
+  "attn_impl": null,
+  "new_special_tokens": [],
+  "num_labels": null,
+  "problem_type": null,
+  "rope_scaling": null,
+  "device_map": null,
+  "max_memory": {},
+  "max_model_len": null,
+  "local_repo_path": null,
+  "init_strategy": null,
+  "template": "qwen3",
+  "system": null,
+  "max_length": 2048,
+  "truncation_strategy": "delete",
+  "max_pixels": null,
+  "agent_template": null,
+  "norm_bbox": null,
+  "use_chat_template": true,
+  "padding_free": false,
+  "padding_side": "right",
+  "loss_scale": "default",
+  "sequence_parallel_size": 1,
+  "response_prefix": null,
+  "template_backend": "swift",
+  "dataset": [],
+  "val_dataset": [],
+  "split_dataset_ratio": 0.0,
+  "data_seed": 42,
+  "dataset_num_proc": 1,
+  "load_from_cache_file": true,
+  "dataset_shuffle": true,
+  "val_dataset_shuffle": false,
+  "streaming": false,
+  "interleave_prob": null,
+  "stopping_strategy": "first_exhausted",
+  "shuffle_buffer_size": 1000,
+  "download_mode": "reuse_dataset_if_exists",
+  "columns": {},
+  "strict": false,
+  "remove_unused_columns": true,
+  "model_name": null,
+  "model_author": null,
+  "custom_dataset_info": [],
+  "quant_method": null,
+  "quant_bits": null,
+  "hqq_axis": null,
+  "bnb_4bit_compute_dtype": "bfloat16",
+  "bnb_4bit_quant_type": "nf4",
+  "bnb_4bit_use_double_quant": true,
+  "bnb_4bit_quant_storage": null,
+  "max_new_tokens": null,
+  "temperature": null,
+  "top_k": null,
+  "top_p": null,
+  "repetition_penalty": null,
+  "num_beams": 1,
+  "stream": false,
+  "stop_words": [],
+  "logprobs": false,
+  "top_logprobs": null,
+  "ckpt_dir": null,
+  "lora_modules": [],
+  "tuner_backend": "peft",
+  "train_type": "lora",
+  "adapters": [],
+  "external_plugins": [],
+  "seed": 42,
+  "model_kwargs": {},
+  "load_args": true,
+  "load_data_args": false,
+  "packing": false,
+  "custom_register_path": [],
+  "use_hf": true,
+  "hub_token": null,
+  "ddp_timeout": 18000000,
+  "ddp_backend": null,
+  "ignore_args_error": false,
+  "use_swift_lora": false,
+  "merge_lora": false,
+  "safe_serialization": true,
+  "max_shard_size": "5GB",
+  "output_dir": "/data/workspace/kunato/ms-swift/Qwen3-4B-Instruct-2507-mcore",
+  "quant_n_samples": 256,
+  "quant_batch_size": 1,
+  "group_size": 128,
+  "to_ollama": false,
+  "to_mcore": true,
+  "to_hf": false,
+  "mcore_model": null,
+  "mcore_adapters": [],
+  "thread_count": 2,
+  "test_convert_precision": false,
+  "push_to_hub": false,
+  "hub_model_id": null,
+  "hub_private_repo": false,
+  "commit_message": "update files",
+  "to_peft_format": false,
+  "exist_ok": false,
+  "rank": 0,
+  "local_rank": 0,
+  "global_world_size": 1,
+  "local_world_size": 1,
+  "model_suffix": "Qwen3-4B-Instruct-2507",
+  "model_info": "ModelInfo(model_type='qwen3_moe', model_dir='/data/share/cache/huggingface/hub/models--Qwen--Qwen3-4B-Instruct-2507/snapshots/eb25fbe4f35f7147763bc24445679d1c00588d89', torch_dtype=torch.bfloat16, max_model_len=262144, quant_method=None, quant_bits=None, rope_scaling=None, is_moe_model=False, config=None, task_type='causal_lm', num_labels=None)",
+  "model_meta": "ModelMeta(model_type='qwen3_moe', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-30B-A3B-Base', hf_model_id='Qwen/Qwen3-30B-A3B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-30B-A3B', hf_model_id='Qwen/Qwen3-30B-A3B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-235B-A22B', hf_model_id='Qwen/Qwen3-235B-A22B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-30B-A3B-FP8', hf_model_id='Qwen/Qwen3-30B-A3B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-235B-A22B-FP8', hf_model_id='Qwen/Qwen3-235B-A22B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='swift/Qwen3-30B-A3B-AWQ', hf_model_id='cognitivecomputations/Qwen3-30B-A3B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='swift/Qwen3-235B-A22B-AWQ', hf_model_id='cognitivecomputations/Qwen3-235B-A22B-AWQ', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-30B-A3B-Instruct-2507', hf_model_id='Qwen/Qwen3-30B-A3B-Instruct-2507', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-30B-A3B-Instruct-2507-FP8', hf_model_id='Qwen/Qwen3-30B-A3B-Instruct-2507-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-235B-A22B-Instruct-2507', hf_model_id='Qwen/Qwen3-235B-A22B-Instruct-2507', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-235B-A22B-Instruct-2507-FP8', hf_model_id='Qwen/Qwen3-235B-A22B-Instruct-2507-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='swift/Qwen3-235B-A22B-Instruct-2507-AWQ', hf_model_id=None, model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Coder-480B-A35B-Instruct', hf_model_id='Qwen/Qwen3-Coder-480B-A35B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8', hf_model_id='Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='swift/Qwen3-Coder-480B-A35B-Instruct-AWQ', hf_model_id=None, model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=['coding'])], template='qwen3', get_function=<function get_model_tokenizer_with_flash_attn at 0x7f501cca3250>, model_arch=None, architectures=['Qwen3MoeForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.51'], tags=[])",
+  "model_dir": "/data/share/cache/huggingface/hub/models--Qwen--Qwen3-4B-Instruct-2507/snapshots/eb25fbe4f35f7147763bc24445679d1c00588d89",
+  "hub": "<class 'swift.hub.hub.HFHub'>"
+}

iter_0000001/.metadata ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18ad8769cc141b505dc5c8df1641276faaf8584d84af82643ccb781f650844c2
+size 154431

iter_0000001/__0_0.distcp ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:896790f566ca4999b5abd57db503f9c1fb5e8303a5d91f9c3589dd5f612f9b38
+size 4013715547

iter_0000001/__0_1.distcp ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b5b31afe54eb13137d7f0f995ac92290af786adec40ec5b73379cec05ef293c
+size 4032124045

iter_0000001/common.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb2f26779d54a5e0c9ed9d9de6bb337bde6bc1018b371d3b2012edbfe20c03fc
+size 19239

iter_0000001/metadata.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}

latest_checkpointed_iteration.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 1

runs/events.out.tfevents.1757992794.fc4f4ac1-02.cloud.together.ai.1284059.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11310db2aa2b4e532663b1fe8f0d2e4a081ec87708cfa61f11f8eb8fcc363d3a
+size 88