unsloth
/

Qwen2.5-1.5B-Instruct-bnb-4bit

Text Generation

text-generation-inference

4-bit precision

Model card Files Files and versions

danielhanchen commited on Nov 12, 2024

Commit

05ad3fd

·

verified ·

1 Parent(s): c456147

Upload Qwen2ForCausalLM

Files changed (2) hide show

config.json +2 -0
generation_config.json +2 -1

config.json CHANGED Viewed

@@ -16,6 +16,7 @@
   "num_attention_heads": 12,
   "num_hidden_layers": 28,
   "num_key_value_heads": 2,
   "quantization_config": {
     "_load_in_4bit": true,
     "_load_in_8bit": false,
@@ -37,6 +38,7 @@
   "tie_word_embeddings": true,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.44.2",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

   "num_attention_heads": 12,
   "num_hidden_layers": 28,
   "num_key_value_heads": 2,
+  "pad_token_id": 151665,
   "quantization_config": {
     "_load_in_4bit": true,
     "_load_in_8bit": false,
   "tie_word_embeddings": true,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.44.2",
+  "unsloth_fixed": true,
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

generation_config.json CHANGED Viewed

@@ -5,7 +5,8 @@
     151645,
     151643
   ],
-  "pad_token_id": 151643,
   "repetition_penalty": 1.1,
   "temperature": 0.7,
   "top_k": 20,

     151645,
     151643
   ],
+  "max_length": 32768,
+  "pad_token_id": 151665,
   "repetition_penalty": 1.1,
   "temperature": 0.7,
   "top_k": 20,