metascroy commited on
Commit
72dada8
·
verified ·
1 Parent(s): 4e3c935

Upload model trained with Unsloth

Browse files

Upload model trained with Unsloth 2x faster

Files changed (4) hide show
  1. README.md +4 -1
  2. config.json +39 -80
  3. generation_config.json +2 -1
  4. pytorch_model.bin +1 -1
README.md CHANGED
@@ -1,6 +1,9 @@
1
  ---
2
  library_name: transformers
3
- tags: []
 
 
 
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
2
  library_name: transformers
3
+ tags:
4
+ - unsloth
5
+ - trl
6
+ - sft
7
  ---
8
 
9
  # Model Card for Model ID
config.json CHANGED
@@ -4,7 +4,6 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 151643,
8
  "eos_token_id": 151645,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
@@ -55,92 +54,50 @@
55
  "num_attention_heads": 32,
56
  "num_hidden_layers": 36,
57
  "num_key_value_heads": 8,
 
58
  "quantization_config": {
59
- "include_input_output_embeddings": true,
60
- "modules_to_not_convert": [],
61
  "quant_method": "torchao",
62
  "quant_type": {
63
  "default": {
64
  "_data": {
65
- "module_fqn_to_config": {
66
- "_default": {
67
- "_data": {
68
- "act_mapping_type": {
69
- "_data": "ASYMMETRIC",
70
- "_type": "MappingType"
71
- },
72
- "intx_choose_qparams_algorithm": {
73
- "_data": "AFFINE",
74
- "_type": "IntxChooseQParamsAlgorithm"
75
- },
76
- "intx_packing_format": {
77
- "_data": "UNPACKED_TO_INT8",
78
- "_type": "IntxPackingFormat"
79
- },
80
- "layout": {
81
- "_data": {},
82
- "_type": "QDQLayout",
83
- "_version": 1
84
- },
85
- "weight_dtype": {
86
- "_data": "int4",
87
- "_type": "torch.dtype"
88
- },
89
- "weight_granularity": {
90
- "_data": {
91
- "group_size": 32
92
- },
93
- "_type": "PerGroup",
94
- "_version": 1
95
- },
96
- "weight_mapping_type": {
97
- "_data": "SYMMETRIC",
98
- "_type": "MappingType"
99
- },
100
- "weight_scale_dtype": null
101
- },
102
- "_type": "Int8DynamicActivationIntxWeightConfig",
103
- "_version": 2
104
  },
105
- "model.embed_tokens": {
106
- "_data": {
107
- "granularity": {
108
- "_data": {
109
- "axis": 0
110
- },
111
- "_type": "PerAxis",
112
- "_version": 1
113
- },
114
- "intx_choose_qparams_algorithm": {
115
- "_data": "AFFINE",
116
- "_type": "IntxChooseQParamsAlgorithm"
117
- },
118
- "intx_packing_format": {
119
- "_data": "UNPACKED_TO_INT8",
120
- "_type": "IntxPackingFormat"
121
- },
122
- "layout": {
123
- "_data": {},
124
- "_type": "QDQLayout",
125
- "_version": 1
126
- },
127
- "mapping_type": {
128
- "_data": "SYMMETRIC",
129
- "_type": "MappingType"
130
- },
131
- "scale_dtype": null,
132
- "weight_dtype": {
133
- "_data": "int8",
134
- "_type": "torch.dtype"
135
- }
136
- },
137
- "_type": "IntxWeightOnlyConfig",
138
- "_version": 2
139
- }
140
- }
141
  },
142
- "_type": "ModuleFqnToConfig",
143
- "_version": 1
144
  }
145
  },
146
  "quant_type_kwargs": {},
@@ -153,6 +110,8 @@
153
  "tie_word_embeddings": false,
154
  "torch_dtype": "bfloat16",
155
  "transformers_version": "4.55.4",
 
 
156
  "use_cache": true,
157
  "use_sliding_window": false,
158
  "vocab_size": 151936
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
 
7
  "eos_token_id": 151645,
8
  "head_dim": 128,
9
  "hidden_act": "silu",
 
54
  "num_attention_heads": 32,
55
  "num_hidden_layers": 36,
56
  "num_key_value_heads": 8,
57
+ "pad_token_id": 151654,
58
  "quantization_config": {
59
+ "include_input_output_embeddings": false,
60
+ "modules_to_not_convert": null,
61
  "quant_method": "torchao",
62
  "quant_type": {
63
  "default": {
64
  "_data": {
65
+ "act_mapping_type": {
66
+ "_data": "ASYMMETRIC",
67
+ "_type": "MappingType"
68
+ },
69
+ "intx_choose_qparams_algorithm": {
70
+ "_data": "AFFINE",
71
+ "_type": "IntxChooseQParamsAlgorithm"
72
+ },
73
+ "intx_packing_format": {
74
+ "_data": "UNPACKED_TO_INT8",
75
+ "_type": "IntxPackingFormat"
76
+ },
77
+ "layout": {
78
+ "_data": {},
79
+ "_type": "QDQLayout",
80
+ "_version": 1
81
+ },
82
+ "weight_dtype": {
83
+ "_data": "int4",
84
+ "_type": "torch.dtype"
85
+ },
86
+ "weight_granularity": {
87
+ "_data": {
88
+ "group_size": 32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  },
90
+ "_type": "PerGroup",
91
+ "_version": 1
92
+ },
93
+ "weight_mapping_type": {
94
+ "_data": "SYMMETRIC",
95
+ "_type": "MappingType"
96
+ },
97
+ "weight_scale_dtype": null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  },
99
+ "_type": "Int8DynamicActivationIntxWeightConfig",
100
+ "_version": 2
101
  }
102
  },
103
  "quant_type_kwargs": {},
 
110
  "tie_word_embeddings": false,
111
  "torch_dtype": "bfloat16",
112
  "transformers_version": "4.55.4",
113
+ "unsloth_fixed": true,
114
+ "unsloth_version": "2025.9.7",
115
  "use_cache": true,
116
  "use_sliding_window": false,
117
  "vocab_size": 151936
generation_config.json CHANGED
@@ -5,7 +5,8 @@
5
  151645,
6
  151643
7
  ],
8
- "pad_token_id": 151643,
 
9
  "temperature": 0.6,
10
  "top_k": 20,
11
  "top_p": 0.95,
 
5
  151645,
6
  151643
7
  ],
8
+ "max_length": 40960,
9
+ "pad_token_id": 151654,
10
  "temperature": 0.6,
11
  "top_k": 20,
12
  "top_p": 0.95,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2af09e30fe6009b1daa82aa9694b346f65c51d5609ebea72c68dba1a4864b274
3
  size 4789478103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e941a1e130f71b98627a59036b29d7b5d45416660d5955263d648c561350138
3
  size 4789478103