Training in progress, epoch 1
Browse files- config.json +6 -6
- model.safetensors +2 -2
- runs/Jan13_01-01-54_46046415ec16/events.out.tfevents.1736730115.46046415ec16.261.0 +3 -0
- tokenizer.json +41 -2
- tokenizer_config.json +1 -1
- training_args.bin +3 -0
config.json
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "
|
| 3 |
"architectures": [
|
| 4 |
"DeepseekV3ForCausalLM"
|
| 5 |
],
|
| 6 |
"attention_bias": false,
|
| 7 |
"attention_dropout": 0.0,
|
| 8 |
"auto_map": {
|
| 9 |
-
"AutoConfig": "configuration_deepseek.DeepseekV3Config",
|
| 10 |
-
"AutoModel": "modeling_deepseek.DeepseekV3Model",
|
| 11 |
-
"AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
|
| 12 |
},
|
| 13 |
"aux_loss_alpha": 0.001,
|
| 14 |
"bos_token_id": 0,
|
|
@@ -54,8 +54,8 @@
|
|
| 54 |
"tie_word_embeddings": false,
|
| 55 |
"topk_group": 4,
|
| 56 |
"topk_method": "noaux_tc",
|
| 57 |
-
"torch_dtype": "
|
| 58 |
-
"transformers_version": "4.
|
| 59 |
"use_cache": true,
|
| 60 |
"v_head_dim": 128,
|
| 61 |
"vocab_size": 129280
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "tonyshark/deepseek-v3-1b",
|
| 3 |
"architectures": [
|
| 4 |
"DeepseekV3ForCausalLM"
|
| 5 |
],
|
| 6 |
"attention_bias": false,
|
| 7 |
"attention_dropout": 0.0,
|
| 8 |
"auto_map": {
|
| 9 |
+
"AutoConfig": "tonyshark/deepseek-v3-1b--configuration_deepseek.DeepseekV3Config",
|
| 10 |
+
"AutoModel": "tonyshark/deepseek-v3-1b--modeling_deepseek.DeepseekV3Model",
|
| 11 |
+
"AutoModelForCausalLM": "tonyshark/deepseek-v3-1b--modeling_deepseek.DeepseekV3ForCausalLM"
|
| 12 |
},
|
| 13 |
"aux_loss_alpha": 0.001,
|
| 14 |
"bos_token_id": 0,
|
|
|
|
| 54 |
"tie_word_embeddings": false,
|
| 55 |
"topk_group": 4,
|
| 56 |
"topk_method": "noaux_tc",
|
| 57 |
+
"torch_dtype": "float32",
|
| 58 |
+
"transformers_version": "4.46.1",
|
| 59 |
"use_cache": true,
|
| 60 |
"v_head_dim": 128,
|
| 61 |
"vocab_size": 129280
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4d0893b4a6f7ad8282fe4255f087a31b6c0b1f6d95b956cb1d85a85e11fa7e3
|
| 3 |
+
size 4198331024
|
runs/Jan13_01-01-54_46046415ec16/events.out.tfevents.1736730115.46046415ec16.261.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:906a12664fe166fc82b3a97f8ae7993546d54364ee2ab0b60419d4bdc75d9b7b
|
| 3 |
+
size 6719
|
tokenizer.json
CHANGED
|
@@ -1,7 +1,19 @@
|
|
| 1 |
{
|
| 2 |
"version": "1.0",
|
| 3 |
-
"truncation":
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"added_tokens": [
|
| 6 |
{
|
| 7 |
"id": 0,
|
|
@@ -7419,6 +7431,12 @@
|
|
| 7419 |
"id": "A",
|
| 7420 |
"type_id": 0
|
| 7421 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7422 |
}
|
| 7423 |
],
|
| 7424 |
"pair": [
|
|
@@ -7434,6 +7452,12 @@
|
|
| 7434 |
"type_id": 0
|
| 7435 |
}
|
| 7436 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7437 |
{
|
| 7438 |
"SpecialToken": {
|
| 7439 |
"id": "<|begin▁of▁sentence|>",
|
|
@@ -7445,6 +7469,12 @@
|
|
| 7445 |
"id": "B",
|
| 7446 |
"type_id": 1
|
| 7447 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7448 |
}
|
| 7449 |
],
|
| 7450 |
"special_tokens": {
|
|
@@ -7456,6 +7486,15 @@
|
|
| 7456 |
"tokens": [
|
| 7457 |
"<|begin▁of▁sentence|>"
|
| 7458 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7459 |
}
|
| 7460 |
}
|
| 7461 |
},
|
|
|
|
| 1 |
{
|
| 2 |
"version": "1.0",
|
| 3 |
+
"truncation": {
|
| 4 |
+
"direction": "Right",
|
| 5 |
+
"max_length": 128,
|
| 6 |
+
"strategy": "LongestFirst",
|
| 7 |
+
"stride": 0
|
| 8 |
+
},
|
| 9 |
+
"padding": {
|
| 10 |
+
"strategy": "BatchLongest",
|
| 11 |
+
"direction": "Left",
|
| 12 |
+
"pad_to_multiple_of": null,
|
| 13 |
+
"pad_id": 1,
|
| 14 |
+
"pad_type_id": 0,
|
| 15 |
+
"pad_token": "<|end▁of▁sentence|>"
|
| 16 |
+
},
|
| 17 |
"added_tokens": [
|
| 18 |
{
|
| 19 |
"id": 0,
|
|
|
|
| 7431 |
"id": "A",
|
| 7432 |
"type_id": 0
|
| 7433 |
}
|
| 7434 |
+
},
|
| 7435 |
+
{
|
| 7436 |
+
"SpecialToken": {
|
| 7437 |
+
"id": "<|end▁of▁sentence|>",
|
| 7438 |
+
"type_id": 0
|
| 7439 |
+
}
|
| 7440 |
}
|
| 7441 |
],
|
| 7442 |
"pair": [
|
|
|
|
| 7452 |
"type_id": 0
|
| 7453 |
}
|
| 7454 |
},
|
| 7455 |
+
{
|
| 7456 |
+
"SpecialToken": {
|
| 7457 |
+
"id": "<|end▁of▁sentence|>",
|
| 7458 |
+
"type_id": 0
|
| 7459 |
+
}
|
| 7460 |
+
},
|
| 7461 |
{
|
| 7462 |
"SpecialToken": {
|
| 7463 |
"id": "<|begin▁of▁sentence|>",
|
|
|
|
| 7469 |
"id": "B",
|
| 7470 |
"type_id": 1
|
| 7471 |
}
|
| 7472 |
+
},
|
| 7473 |
+
{
|
| 7474 |
+
"SpecialToken": {
|
| 7475 |
+
"id": "<|end▁of▁sentence|>",
|
| 7476 |
+
"type_id": 1
|
| 7477 |
+
}
|
| 7478 |
}
|
| 7479 |
],
|
| 7480 |
"special_tokens": {
|
|
|
|
| 7486 |
"tokens": [
|
| 7487 |
"<|begin▁of▁sentence|>"
|
| 7488 |
]
|
| 7489 |
+
},
|
| 7490 |
+
"<|end▁of▁sentence|>": {
|
| 7491 |
+
"id": "<|end▁of▁sentence|>",
|
| 7492 |
+
"ids": [
|
| 7493 |
+
1
|
| 7494 |
+
],
|
| 7495 |
+
"tokens": [
|
| 7496 |
+
"<|end▁of▁sentence|>"
|
| 7497 |
+
]
|
| 7498 |
}
|
| 7499 |
}
|
| 7500 |
},
|
tokenizer_config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"add_bos_token": true,
|
| 3 |
-
"add_eos_token":
|
| 4 |
"add_prefix_space": null,
|
| 5 |
"added_tokens_decoder": {
|
| 6 |
"0": {
|
|
|
|
| 1 |
{
|
| 2 |
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": true,
|
| 4 |
"add_prefix_space": null,
|
| 5 |
"added_tokens_decoder": {
|
| 6 |
"0": {
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64839e50b7edf20ea18785ae47789266b16e874589534193bc54617cdebab4c4
|
| 3 |
+
size 5560
|