Upload model trained with Unsloth

Browse files

Upload model trained with Unsloth 2x faster

Files changed (4) hide show

README.md +4 -1
config.json +39 -80
generation_config.json +2 -1
pytorch_model.bin +1 -1

README.md CHANGED Viewed

@@ -1,6 +1,9 @@
 ---
 library_name: transformers
-tags: []
 ---
 # Model Card for Model ID

 ---
 library_name: transformers
+tags:
+- unsloth
+- trl
+- sft
 ---
 # Model Card for Model ID

config.json CHANGED Viewed

@@ -4,7 +4,6 @@
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
-  "bos_token_id": 151643,
   "eos_token_id": 151645,
   "head_dim": 128,
   "hidden_act": "silu",
@@ -55,92 +54,50 @@
   "num_attention_heads": 32,
   "num_hidden_layers": 36,
   "num_key_value_heads": 8,
   "quantization_config": {
-    "include_input_output_embeddings": true,
-    "modules_to_not_convert": [],
     "quant_method": "torchao",
     "quant_type": {
       "default": {
         "_data": {
-          "module_fqn_to_config": {
-            "_default": {
-              "_data": {
-                "act_mapping_type": {
-                  "_data": "ASYMMETRIC",
-                  "_type": "MappingType"
-                },
-                "intx_choose_qparams_algorithm": {
-                  "_data": "AFFINE",
-                  "_type": "IntxChooseQParamsAlgorithm"
-                },
-                "intx_packing_format": {
-                  "_data": "UNPACKED_TO_INT8",
-                  "_type": "IntxPackingFormat"
-                },
-                "layout": {
-                  "_data": {},
-                  "_type": "QDQLayout",
-                  "_version": 1
-                },
-                "weight_dtype": {
-                  "_data": "int4",
-                  "_type": "torch.dtype"
-                },
-                "weight_granularity": {
-                  "_data": {
-                    "group_size": 32
-                  },
-                  "_type": "PerGroup",
-                  "_version": 1
-                },
-                "weight_mapping_type": {
-                  "_data": "SYMMETRIC",
-                  "_type": "MappingType"
-                },
-                "weight_scale_dtype": null
-              },
-              "_type": "Int8DynamicActivationIntxWeightConfig",
-              "_version": 2
             },
-            "model.embed_tokens": {
-              "_data": {
-                "granularity": {
-                  "_data": {
-                    "axis": 0
-                  },
-                  "_type": "PerAxis",
-                  "_version": 1
-                },
-                "intx_choose_qparams_algorithm": {
-                  "_data": "AFFINE",
-                  "_type": "IntxChooseQParamsAlgorithm"
-                },
-                "intx_packing_format": {
-                  "_data": "UNPACKED_TO_INT8",
-                  "_type": "IntxPackingFormat"
-                },
-                "layout": {
-                  "_data": {},
-                  "_type": "QDQLayout",
-                  "_version": 1
-                },
-                "mapping_type": {
-                  "_data": "SYMMETRIC",
-                  "_type": "MappingType"
-                },
-                "scale_dtype": null,
-                "weight_dtype": {
-                  "_data": "int8",
-                  "_type": "torch.dtype"
-                }
-              },
-              "_type": "IntxWeightOnlyConfig",
-              "_version": 2
-            }
-          }
         },
-        "_type": "ModuleFqnToConfig",
-        "_version": 1
       }
     },
     "quant_type_kwargs": {},
@@ -153,6 +110,8 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.55.4",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "eos_token_id": 151645,
   "head_dim": 128,
   "hidden_act": "silu",
   "num_attention_heads": 32,
   "num_hidden_layers": 36,
   "num_key_value_heads": 8,
+  "pad_token_id": 151654,
   "quantization_config": {
+    "include_input_output_embeddings": false,
+    "modules_to_not_convert": null,
     "quant_method": "torchao",
     "quant_type": {
       "default": {
         "_data": {
+          "act_mapping_type": {
+            "_data": "ASYMMETRIC",
+            "_type": "MappingType"
+          },
+          "intx_choose_qparams_algorithm": {
+            "_data": "AFFINE",
+            "_type": "IntxChooseQParamsAlgorithm"
+          },
+          "intx_packing_format": {
+            "_data": "UNPACKED_TO_INT8",
+            "_type": "IntxPackingFormat"
+          },
+          "layout": {
+            "_data": {},
+            "_type": "QDQLayout",
+            "_version": 1
+          },
+          "weight_dtype": {
+            "_data": "int4",
+            "_type": "torch.dtype"
+          },
+          "weight_granularity": {
+            "_data": {
+              "group_size": 32
             },
+            "_type": "PerGroup",
+            "_version": 1
+          },
+          "weight_mapping_type": {
+            "_data": "SYMMETRIC",
+            "_type": "MappingType"
+          },
+          "weight_scale_dtype": null
         },
+        "_type": "Int8DynamicActivationIntxWeightConfig",
+        "_version": 2
       }
     },
     "quant_type_kwargs": {},
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.55.4",
+  "unsloth_fixed": true,
+  "unsloth_version": "2025.9.7",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

generation_config.json CHANGED Viewed

@@ -5,7 +5,8 @@
     151645,
     151643
   ],
-  "pad_token_id": 151643,
   "temperature": 0.6,
   "top_k": 20,
   "top_p": 0.95,

     151645,
     151643
   ],
+  "max_length": 40960,
+  "pad_token_id": 151654,
   "temperature": 0.6,
   "top_k": 20,
   "top_p": 0.95,

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2af09e30fe6009b1daa82aa9694b346f65c51d5609ebea72c68dba1a4864b274
 size 4789478103

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e941a1e130f71b98627a59036b29d7b5d45416660d5955263d648c561350138
 size 4789478103