Update model
Browse files- README.md +5 -6
- config.json +2 -3
- model.safetensors +1 -1
- tokenizer.json +1 -6
README.md
CHANGED
|
@@ -73,7 +73,7 @@ widget:
|
|
| 73 |
- [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-reddit-instruct-curated)] [euclaise/reddit-instruct-curated](https://huggingface.co/datasets/euclaise/reddit-instruct-curated)
|
| 74 |
- [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-aya_dataset)] [CohereForAI/aya_dataset](https://huggingface.co/datasets/CohereForAI/aya_dataset)
|
| 75 |
- [HuggingFaceH4/ultrafeedback_binarized](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
|
| 76 |
-
- License: [Apache License 2.0](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-
|
| 77 |
|
| 78 |
## Recommended Prompt Format
|
| 79 |
|
|
@@ -91,7 +91,7 @@ widget:
|
|
| 91 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
|
| 92 |
import torch
|
| 93 |
|
| 94 |
-
model_path = "Felladrin/TinyMistral-248M-Chat-
|
| 95 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 96 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 97 |
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
|
|
@@ -178,7 +178,6 @@ llamafactory-cli train \
|
|
| 178 |
--preprocessing_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
|
| 179 |
--dataloader_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
|
| 180 |
--finetuning_type full \
|
| 181 |
-
--template default \
|
| 182 |
--flash_attn auto \
|
| 183 |
--enable_liger_kernel True \
|
| 184 |
--dataset_dir data \
|
|
@@ -188,15 +187,15 @@ llamafactory-cli train \
|
|
| 188 |
--num_train_epochs 2.0 \
|
| 189 |
--per_device_train_batch_size 4 \
|
| 190 |
--gradient_accumulation_steps 4 \
|
| 191 |
-
--lr_scheduler_type
|
| 192 |
--max_grad_norm 1.0 \
|
| 193 |
--logging_steps 10 \
|
| 194 |
--save_steps 50 \
|
| 195 |
--save_total_limit 1 \
|
| 196 |
--warmup_ratio 0.1 \
|
| 197 |
--packing False \
|
| 198 |
-
--report_to
|
| 199 |
-
--output_dir ~/TinyMistral-248M-Chat-
|
| 200 |
--pure_bf16 True \
|
| 201 |
--plot_loss True \
|
| 202 |
--trust_remote_code True \
|
|
|
|
| 73 |
- [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-reddit-instruct-curated)] [euclaise/reddit-instruct-curated](https://huggingface.co/datasets/euclaise/reddit-instruct-curated)
|
| 74 |
- [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-aya_dataset)] [CohereForAI/aya_dataset](https://huggingface.co/datasets/CohereForAI/aya_dataset)
|
| 75 |
- [HuggingFaceH4/ultrafeedback_binarized](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
|
| 76 |
+
- License: [Apache License 2.0](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v4/resolve/main/license.txt)
|
| 77 |
|
| 78 |
## Recommended Prompt Format
|
| 79 |
|
|
|
|
| 91 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
|
| 92 |
import torch
|
| 93 |
|
| 94 |
+
model_path = "Felladrin/TinyMistral-248M-Chat-v4"
|
| 95 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 96 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 97 |
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
|
|
|
|
| 178 |
--preprocessing_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
|
| 179 |
--dataloader_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
|
| 180 |
--finetuning_type full \
|
|
|
|
| 181 |
--flash_attn auto \
|
| 182 |
--enable_liger_kernel True \
|
| 183 |
--dataset_dir data \
|
|
|
|
| 187 |
--num_train_epochs 2.0 \
|
| 188 |
--per_device_train_batch_size 4 \
|
| 189 |
--gradient_accumulation_steps 4 \
|
| 190 |
+
--lr_scheduler_type linear \
|
| 191 |
--max_grad_norm 1.0 \
|
| 192 |
--logging_steps 10 \
|
| 193 |
--save_steps 50 \
|
| 194 |
--save_total_limit 1 \
|
| 195 |
--warmup_ratio 0.1 \
|
| 196 |
--packing False \
|
| 197 |
+
--report_to tensorboard \
|
| 198 |
+
--output_dir ~/TinyMistral-248M-Chat-v4 \
|
| 199 |
--pure_bf16 True \
|
| 200 |
--plot_loss True \
|
| 201 |
--trust_remote_code True \
|
config.json
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "Felladrin/TinyMistral-248M-Chat-v3",
|
| 3 |
"architectures": ["MistralForCausalLM"],
|
| 4 |
"attention_dropout": 0.0,
|
| 5 |
"bos_token_id": 32000,
|
|
@@ -19,8 +18,8 @@
|
|
| 19 |
"sliding_window": null,
|
| 20 |
"tie_word_embeddings": false,
|
| 21 |
"torch_dtype": "bfloat16",
|
| 22 |
-
"transformers_version": "4.
|
| 23 |
-
"use_cache":
|
| 24 |
"use_sliding_window": false,
|
| 25 |
"vocab_size": 32005
|
| 26 |
}
|
|
|
|
| 1 |
{
|
|
|
|
| 2 |
"architectures": ["MistralForCausalLM"],
|
| 3 |
"attention_dropout": 0.0,
|
| 4 |
"bos_token_id": 32000,
|
|
|
|
| 18 |
"sliding_window": null,
|
| 19 |
"tie_word_embeddings": false,
|
| 20 |
"torch_dtype": "bfloat16",
|
| 21 |
+
"transformers_version": "4.50.0",
|
| 22 |
+
"use_cache": false,
|
| 23 |
"use_sliding_window": false,
|
| 24 |
"vocab_size": 32005
|
| 25 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 496060688
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da09172da13d6da1727beb0cef6c42e3fbc99bd3d9bdfedc0df8f5b2746c02a0
|
| 3 |
size 496060688
|
tokenizer.json
CHANGED
|
@@ -1,11 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"version": "1.0",
|
| 3 |
-
"truncation":
|
| 4 |
-
"direction": "Right",
|
| 5 |
-
"max_length": 1536,
|
| 6 |
-
"strategy": "LongestFirst",
|
| 7 |
-
"stride": 0
|
| 8 |
-
},
|
| 9 |
"padding": null,
|
| 10 |
"added_tokens": [
|
| 11 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"version": "1.0",
|
| 3 |
+
"truncation": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
"padding": null,
|
| 5 |
"added_tokens": [
|
| 6 |
{
|