Add files using upload-large-folder tool
Browse files- README.md +254 -0
- chat_template.jinja +111 -0
- config.json +65 -0
- model-00013-of-00270.safetensors +3 -0
- model-00014-of-00270.safetensors +3 -0
- model-00035-of-00270.safetensors +3 -0
- model-00037-of-00270.safetensors +3 -0
- model-00050-of-00270.safetensors +3 -0
- model-00051-of-00270.safetensors +3 -0
- model-00058-of-00270.safetensors +3 -0
- model-00063-of-00270.safetensors +3 -0
- model-00070-of-00270.safetensors +3 -0
- model-00076-of-00270.safetensors +3 -0
- model-00080-of-00270.safetensors +3 -0
- model-00086-of-00270.safetensors +3 -0
- model-00092-of-00270.safetensors +3 -0
- model-00097-of-00270.safetensors +3 -0
- model-00104-of-00270.safetensors +3 -0
- model-00107-of-00270.safetensors +3 -0
- model-00111-of-00270.safetensors +3 -0
- model-00114-of-00270.safetensors +3 -0
- model-00130-of-00270.safetensors +3 -0
- model-00133-of-00270.safetensors +3 -0
- model-00135-of-00270.safetensors +3 -0
- model-00139-of-00270.safetensors +3 -0
- model-00140-of-00270.safetensors +3 -0
- model-00142-of-00270.safetensors +3 -0
- model-00144-of-00270.safetensors +3 -0
- model-00146-of-00270.safetensors +3 -0
- model-00147-of-00270.safetensors +3 -0
- model-00148-of-00270.safetensors +3 -0
- model-00150-of-00270.safetensors +3 -0
- model-00151-of-00270.safetensors +3 -0
- model-00152-of-00270.safetensors +3 -0
- model-00153-of-00270.safetensors +3 -0
- model-00154-of-00270.safetensors +3 -0
- model-00155-of-00270.safetensors +3 -0
- model-00163-of-00270.safetensors +3 -0
- model-00164-of-00270.safetensors +3 -0
- model-00166-of-00270.safetensors +3 -0
- model-00167-of-00270.safetensors +3 -0
- model-00180-of-00270.safetensors +3 -0
- model-00183-of-00270.safetensors +3 -0
- model-00186-of-00270.safetensors +3 -0
- model-00192-of-00270.safetensors +3 -0
- model.safetensors.index.json +0 -0
- special_tokens_map.json +23 -0
- tokenizer.json +0 -0
- tokenizer_config.json +0 -0
README.md
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
library_name: transformers
|
| 4 |
+
base_model:
|
| 5 |
+
- deepseek-ai/DeepSeek-V3-Base
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
<p align="center">
|
| 9 |
+
<img src="images/deep-cogito-logo.png" alt="Logo" width="40%">
|
| 10 |
+
</p>
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# Cogito v2 preview - 671B MoE
|
| 14 |
+
|
| 15 |
+
[Blog Post](https://www.deepcogito.com/research/cogito-v2-preview)
|
| 16 |
+
|
| 17 |
+
The Cogito v2 LLMs are instruction tuned generative models. All models are released under an open license for commercial use.
|
| 18 |
+
|
| 19 |
+
- Cogito v2 models are hybrid reasoning models. Each model can answer directly (standard LLM), or self-reflect before answering (like reasoning models).
|
| 20 |
+
- The LLMs are trained using **Iterated Distillation and Amplification (IDA)** - an scalable and efficient alignment strategy for superintelligence using iterative self-improvement.
|
| 21 |
+
- The models have been optimized for coding, STEM, instruction following and general helpfulness, and have significantly higher multilingual, coding and tool calling capabilities than size equivalent counterparts.
|
| 22 |
+
- In both standard and reasoning modes, Cogito v2-preview models outperform their size equivalent counterparts on common industry benchmarks.
|
| 23 |
+
- This model is trained in over 30 languages and supports a context length of 128k.
|
| 24 |
+
|
| 25 |
+
# Evaluations
|
| 26 |
+
For detailed evaluations, please refer to the [Blog Post](https://www.deepcogito.com/research/cogito-v2-preview).
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# Usage
|
| 30 |
+
Here is a snippet below for usage with Transformers:
|
| 31 |
+
|
| 32 |
+
```python
|
| 33 |
+
import transformers
|
| 34 |
+
import torch
|
| 35 |
+
|
| 36 |
+
model_id = "deepcogito/cogito-v2-preview-llama-671B-MoE"
|
| 37 |
+
|
| 38 |
+
pipeline = transformers.pipeline(
|
| 39 |
+
"text-generation",
|
| 40 |
+
model=model_id,
|
| 41 |
+
model_kwargs={"torch_dtype": torch.bfloat16},
|
| 42 |
+
device_map="auto",
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
messages = [
|
| 46 |
+
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
|
| 47 |
+
{"role": "user", "content": "Give me a short introduction to LLMs."},
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
outputs = pipeline(
|
| 51 |
+
messages,
|
| 52 |
+
max_new_tokens=512,
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
print(outputs[0]["generated_text"][-1])
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
## Implementing extended thinking
|
| 61 |
+
- By default, the model will answer in the standard mode.
|
| 62 |
+
- To enable thinking, you can do any one of the two methods:
|
| 63 |
+
- Set `enable_thinking=True` while applying the chat template.
|
| 64 |
+
- Add a specific system prompt, along with prefilling the response with "\<think\>\n".
|
| 65 |
+
|
| 66 |
+
**NOTE: Unlike Cogito v1 models, we initiate the response with "\<think\>\n" at the beginning of every output when reasoning is enabled. This is because hybrid models can be brittle at times, and adding a "\<think\>\n" ensures that the model does indeed respect thinking.**
|
| 67 |
+
|
| 68 |
+
### Method 1 - Set enable_thinking=True in the tokenizer
|
| 69 |
+
If you are using Huggingface tokenizers, then you can simply use add the argument `enable_thinking=True` to the tokenization (this option is added to the chat template).
|
| 70 |
+
|
| 71 |
+
Here is an example -
|
| 72 |
+
```python
|
| 73 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 74 |
+
|
| 75 |
+
model_name = "deepcogito/cogito-v2-preview-llama-671B-MoE"
|
| 76 |
+
|
| 77 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 78 |
+
model_name,
|
| 79 |
+
torch_dtype="auto",
|
| 80 |
+
device_map="auto"
|
| 81 |
+
)
|
| 82 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 83 |
+
|
| 84 |
+
prompt = "Give me a short introduction to LLMs."
|
| 85 |
+
messages = [
|
| 86 |
+
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
|
| 87 |
+
{"role": "user", "content": prompt}
|
| 88 |
+
]
|
| 89 |
+
|
| 90 |
+
text = tokenizer.apply_chat_template(
|
| 91 |
+
messages,
|
| 92 |
+
tokenize=False,
|
| 93 |
+
add_generation_prompt=True,
|
| 94 |
+
enable_thinking=True
|
| 95 |
+
)
|
| 96 |
+
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
| 97 |
+
|
| 98 |
+
generated_ids = model.generate(
|
| 99 |
+
**model_inputs,
|
| 100 |
+
max_new_tokens=512
|
| 101 |
+
)
|
| 102 |
+
generated_ids = [
|
| 103 |
+
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
| 104 |
+
]
|
| 105 |
+
|
| 106 |
+
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 107 |
+
print(response)
|
| 108 |
+
```
|
| 109 |
+
|
| 110 |
+
### Method 2 - Add a specific system prompt, along with prefilling the response with "\<think\>\n".
|
| 111 |
+
To enable thinking using this method, you need to do two parts -
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
Step 1 - Simply use this in the system prompt `system_instruction = 'Enable deep thinking subroutine.'`
|
| 115 |
+
|
| 116 |
+
If you already have a system_instruction, then use `system_instruction = 'Enable deep thinking subroutine.' + '\n\n' + system_instruction`.
|
| 117 |
+
|
| 118 |
+
Step 2 - Prefil the response with the tokens `"<think>\n"`.
|
| 119 |
+
|
| 120 |
+
Here is an example -
|
| 121 |
+
|
| 122 |
+
```python
|
| 123 |
+
import transformers
|
| 124 |
+
import torch
|
| 125 |
+
|
| 126 |
+
model_name = "deepcogito/cogito-v2-preview-llama-671B-MoE"
|
| 127 |
+
|
| 128 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 129 |
+
model_name,
|
| 130 |
+
torch_dtype="auto",
|
| 131 |
+
device_map="auto"
|
| 132 |
+
)
|
| 133 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 134 |
+
|
| 135 |
+
# Step 1 - Add deep thinking instruction.
|
| 136 |
+
DEEP_THINKING_INSTRUCTION = "Enable deep thinking subroutine."
|
| 137 |
+
|
| 138 |
+
messages = [
|
| 139 |
+
{"role": "system", "content": DEEP_THINKING_INSTRUCTION},
|
| 140 |
+
{"role": "user", "content": "Write a bash script that takes a matrix represented as a string with format '[1,2],[3,4],[5,6]' and prints the transpose in the same format."},
|
| 141 |
+
]
|
| 142 |
+
|
| 143 |
+
text = tokenizer.apply_chat_template(
|
| 144 |
+
messages,
|
| 145 |
+
tokenize=False,
|
| 146 |
+
add_generation_prompt=True
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
# Step 2 - Prefill response with "<think>\n".
|
| 150 |
+
text += "<think>\n"
|
| 151 |
+
|
| 152 |
+
# Now, continue as usual.
|
| 153 |
+
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
| 154 |
+
|
| 155 |
+
generated_ids = model.generate(
|
| 156 |
+
**model_inputs,
|
| 157 |
+
max_new_tokens=512
|
| 158 |
+
)
|
| 159 |
+
generated_ids = [
|
| 160 |
+
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
| 161 |
+
]
|
| 162 |
+
|
| 163 |
+
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 164 |
+
print(response)
|
| 165 |
+
```
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
Similarly, if you have a system prompt, you can append the `DEEP_THINKING_INSTRUCTION` to the beginning in this way -
|
| 169 |
+
|
| 170 |
+
```python
|
| 171 |
+
DEEP_THINKING_INSTRUCTION = "Enable deep thinking subroutine."
|
| 172 |
+
|
| 173 |
+
system_prompt = "Reply to each prompt with only the actual code - no explanations."
|
| 174 |
+
prompt = "Write a bash script that takes a matrix represented as a string with format '[1,2],[3,4],[5,6]' and prints the transpose in the same format."
|
| 175 |
+
|
| 176 |
+
messages = [
|
| 177 |
+
{"role": "system", "content": DEEP_THINKING_INSTRUCTION + '\n\n' + system_prompt},
|
| 178 |
+
{"role": "user", "content": prompt}
|
| 179 |
+
]
|
| 180 |
+
```
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
# Tool Calling
|
| 184 |
+
Cogito models support tool calling (single, parallel, multiple and parallel_multiple) both in standard and extended thinking mode.
|
| 185 |
+
|
| 186 |
+
Here is a snippet -
|
| 187 |
+
|
| 188 |
+
```python
|
| 189 |
+
# First, define a tool
|
| 190 |
+
def get_current_temperature(location: str) -> float:
|
| 191 |
+
"""
|
| 192 |
+
Get the current temperature at a location.
|
| 193 |
+
|
| 194 |
+
Args:
|
| 195 |
+
location: The location to get the temperature for, in the format "City, Country"
|
| 196 |
+
Returns:
|
| 197 |
+
The current temperature at the specified location in the specified units, as a float.
|
| 198 |
+
"""
|
| 199 |
+
return 22. # A real function should probably actually get the temperature!
|
| 200 |
+
|
| 201 |
+
# Next, create a chat and apply the chat template
|
| 202 |
+
messages = [
|
| 203 |
+
{"role": "user", "content": "Hey, what's the temperature in Paris right now?"}
|
| 204 |
+
]
|
| 205 |
+
|
| 206 |
+
model_inputs = tokenizer.apply_chat_template(messages, tools=[get_current_temperature], add_generation_prompt=True)
|
| 207 |
+
|
| 208 |
+
text = tokenizer.apply_chat_template(messages, tools=[get_current_temperature], add_generation_prompt=True, tokenize=False)
|
| 209 |
+
inputs = tokenizer(text, return_tensors="pt", add_special_tokens=False).to(model.device)
|
| 210 |
+
outputs = model.generate(**inputs, max_new_tokens=512)
|
| 211 |
+
output_text = tokenizer.batch_decode(outputs)[0][len(text):]
|
| 212 |
+
print(output_text)
|
| 213 |
+
```
|
| 214 |
+
|
| 215 |
+
This will result in the output -
|
| 216 |
+
```
|
| 217 |
+
<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_current_temperature
|
| 218 |
+
```json
|
| 219 |
+
{"location":"Paris, France"}
|
| 220 |
+
```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>
|
| 221 |
+
```
|
| 222 |
+
|
| 223 |
+
You can then generate text from this input as normal. If the model generates a tool call, you should add it to the chat like so:
|
| 224 |
+
|
| 225 |
+
```python
|
| 226 |
+
tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France"}}
|
| 227 |
+
messages.append({"role": "assistant", "tool_calls": [{"type": "function", "function": tool_call}]})
|
| 228 |
+
```
|
| 229 |
+
|
| 230 |
+
and then call the tool and append the result, with the `tool` role, like so:
|
| 231 |
+
|
| 232 |
+
```python
|
| 233 |
+
messages.append({"role": "tool", "name": "get_current_temperature", "content": "22.0"})
|
| 234 |
+
```
|
| 235 |
+
|
| 236 |
+
After that, you can `generate()` again to let the model use the tool result in the chat:
|
| 237 |
+
|
| 238 |
+
```python
|
| 239 |
+
text = tokenizer.apply_chat_template(messages, tools=[get_current_temperature], add_generation_prompt=True, tokenize=False)
|
| 240 |
+
inputs = tokenizer(text, return_tensors="pt", add_special_tokens=False).to(model.device)
|
| 241 |
+
outputs = model.generate(**inputs, max_new_tokens=512)
|
| 242 |
+
output_text = tokenizer.batch_decode(outputs)[0][len(text):]
|
| 243 |
+
```
|
| 244 |
+
|
| 245 |
+
This should result in the string -
|
| 246 |
+
```
|
| 247 |
+
'The current temperature in Paris is 22.0 degrees.<|end▁of▁sentence|>'
|
| 248 |
+
```
|
| 249 |
+
|
| 250 |
+
## License
|
| 251 |
+
This repository and the model weights are licensed under **MIT License**.
|
| 252 |
+
|
| 253 |
+
## Contact
|
| 254 |
+
If you would like to reach out to our team, send an email to [[email protected]]([email protected]).
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{# ==================================================================== #}
|
| 2 |
+
{# Deepseek v3 template with enable_thinking and tools support #}
|
| 3 |
+
{# ==================================================================== #}
|
| 4 |
+
{%- if not enable_thinking is defined %}{% set enable_thinking = false %}{% endif -%}
|
| 5 |
+
{%- if not tools is defined %}{% set tools = none %}{% endif -%}
|
| 6 |
+
{%- if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif -%}
|
| 7 |
+
|
| 8 |
+
{# --------------------------- Collect system prompt -------------------- #}
|
| 9 |
+
{%- set ns = namespace(system_prompt='', is_last_user=false, outputs_open=false, first_output=true) -%}
|
| 10 |
+
|
| 11 |
+
{%- if messages and messages[0].role == 'system' -%}
|
| 12 |
+
{%- set raw = messages[0].content -%}
|
| 13 |
+
{%- set ns.system_prompt = raw if raw is string else raw[0].text -%}
|
| 14 |
+
{%- set messages = messages[1:] -%}
|
| 15 |
+
{%- endif -%}
|
| 16 |
+
|
| 17 |
+
{# --------------------------- Inject deep thinking --------------------- #}
|
| 18 |
+
{%- if enable_thinking -%}
|
| 19 |
+
{%- set ns.system_prompt = ns.system_prompt and 'Enable deep thinking subroutine.
|
| 20 |
+
|
| 21 |
+
' ~ ns.system_prompt or 'Enable deep thinking subroutine.' -%}
|
| 22 |
+
{%- endif -%}
|
| 23 |
+
|
| 24 |
+
{# --------------------------- Append tools block ----------------------- #}
|
| 25 |
+
{%- if tools is not none -%}
|
| 26 |
+
{%- if ns.system_prompt -%}
|
| 27 |
+
{%- set ns.system_prompt = ns.system_prompt ~ '
|
| 28 |
+
|
| 29 |
+
You have the following functions available:
|
| 30 |
+
|
| 31 |
+
' -%}
|
| 32 |
+
{%- else -%}
|
| 33 |
+
{%- set ns.system_prompt = 'You have the following functions available:
|
| 34 |
+
|
| 35 |
+
' -%}
|
| 36 |
+
{%- endif -%}
|
| 37 |
+
{%- for t in tools -%}
|
| 38 |
+
{%- set ns.system_prompt = ns.system_prompt ~ "```json
|
| 39 |
+
" ~ (t | tojson(indent=4)) ~ "
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
" -%}
|
| 43 |
+
{%- endfor -%}
|
| 44 |
+
{%- endif -%}
|
| 45 |
+
|
| 46 |
+
{{- bos_token -}}{{- ns.system_prompt -}}
|
| 47 |
+
|
| 48 |
+
{# --------------------------- Iterate conversation --------------------- #}
|
| 49 |
+
{%- for m in messages -%}
|
| 50 |
+
{# --------------------------- USER ---------------------------------- #}
|
| 51 |
+
{%- if m.role == 'user' -%}
|
| 52 |
+
{%- set ns.is_last_user = true -%}
|
| 53 |
+
{%- set txt = m.content if m.content is string else m.content | selectattr('type','equalto','text') | map(attribute='text') | join('') -%}
|
| 54 |
+
{{- "<|User|>" -}}{{- txt -}}{{- "<|Assistant|>" -}}
|
| 55 |
+
{%- endif -%}
|
| 56 |
+
|
| 57 |
+
{# --------------------------- ASSISTANT with TOOL CALLS -------------- #}
|
| 58 |
+
{%- if m.role == 'assistant' and m.tool_calls is defined and m.tool_calls -%}
|
| 59 |
+
{%- set ns.is_last_user = false -%}
|
| 60 |
+
{%- set lead = m.content is string and m.content|trim or (m.content and m.content | selectattr('type','equalto','text') | map(attribute='text') | join('')) or '' -%}
|
| 61 |
+
{{- lead -}}{{- "<|tool▁calls▁begin|>" -}}
|
| 62 |
+
{%- for call in m.tool_calls -%}
|
| 63 |
+
{{- "<|tool▁call▁begin|>" -}}{{- call.type -}}{{- "<|tool▁sep|>" -}}{{- call.function.name -}}
|
| 64 |
+
{{- "
|
| 65 |
+
```json
|
| 66 |
+
" -}}{{- call.function.arguments -}}{{- "
|
| 67 |
+
```" -}}{{- "<|tool▁call▁end|>" -}}
|
| 68 |
+
{%- if not loop.last -%}{{- "
|
| 69 |
+
" -}}{%- endif -%}
|
| 70 |
+
{%- endfor -%}
|
| 71 |
+
{{- "<|tool▁calls▁end|>" -}}{{- "<|end▁of▁sentence|>" -}}
|
| 72 |
+
{%- endif -%}
|
| 73 |
+
|
| 74 |
+
{# --------------------------- ASSISTANT plain ------------------------ #}
|
| 75 |
+
{%- if m.role == 'assistant' and (m.tool_calls is not defined or not m.tool_calls) -%}
|
| 76 |
+
{%- set ns.is_last_user = false -%}
|
| 77 |
+
{%- set txt = m.content if m.content is string else m.content | selectattr('type','equalto','text') | map(attribute='text') | join('') -%}
|
| 78 |
+
{{- txt -}}{{- "<|end▁of▁sentence|>" -}}
|
| 79 |
+
{%- endif -%}
|
| 80 |
+
|
| 81 |
+
{# --------------------------- TOOL output ---------------------------- #}
|
| 82 |
+
{%- if m.role == 'tool' -%}
|
| 83 |
+
{%- set ns.is_last_user = false -%}
|
| 84 |
+
{%- set out_txt = m.content if m.content is string else m.content | selectattr('type','equalto','text') | map(attribute='text') | join('') -%}
|
| 85 |
+
{%- if not ns.outputs_open -%}
|
| 86 |
+
{{- "<|tool▁outputs▁begin|>" -}}
|
| 87 |
+
{%- set ns.outputs_open = true -%}
|
| 88 |
+
{%- endif -%}
|
| 89 |
+
{{- "<|tool▁output▁begin|>" -}}{{- out_txt -}}{{- "<|tool▁output▁end|>" -}}
|
| 90 |
+
{%- if loop.nextitem is defined and loop.nextitem.role == 'tool' -%}
|
| 91 |
+
{{- "
|
| 92 |
+
" -}}
|
| 93 |
+
{%- endif -%}
|
| 94 |
+
{%- if loop.nextitem is undefined or loop.nextitem.role != 'tool' -%}
|
| 95 |
+
{{- "<|tool▁outputs▁end|>" -}}
|
| 96 |
+
{%- set ns.outputs_open = false -%}
|
| 97 |
+
{%- endif -%}
|
| 98 |
+
{%- endif -%}
|
| 99 |
+
{%- endfor -%}
|
| 100 |
+
|
| 101 |
+
{%- if ns.outputs_open -%}
|
| 102 |
+
{{- "<|tool▁outputs▁end|>" -}}
|
| 103 |
+
{%- endif -%}
|
| 104 |
+
|
| 105 |
+
{%- if add_generation_prompt and not ns.is_last_user -%}
|
| 106 |
+
{{- "<|Assistant|>" -}}
|
| 107 |
+
{%- endif -%}
|
| 108 |
+
|
| 109 |
+
{%- if add_generation_prompt and enable_thinking -%}
|
| 110 |
+
{{- '<think>\n' -}}
|
| 111 |
+
{%- endif -%}
|
config.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DeepseekV3ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"auto_map": {
|
| 8 |
+
"AutoConfig": "configuration_deepseek.DeepseekV3Config",
|
| 9 |
+
"AutoModel": "modeling_deepseek.DeepseekV3Model",
|
| 10 |
+
"AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
|
| 11 |
+
},
|
| 12 |
+
"aux_loss_alpha": 0.001,
|
| 13 |
+
"bos_token_id": 0,
|
| 14 |
+
"eos_token_id": 1,
|
| 15 |
+
"ep_size": 1,
|
| 16 |
+
"first_k_dense_replace": 3,
|
| 17 |
+
"head_dim": 64,
|
| 18 |
+
"hidden_act": "silu",
|
| 19 |
+
"hidden_size": 7168,
|
| 20 |
+
"initializer_range": 0.02,
|
| 21 |
+
"intermediate_size": 18432,
|
| 22 |
+
"kv_lora_rank": 512,
|
| 23 |
+
"max_position_embeddings": 163840,
|
| 24 |
+
"model_type": "deepseek_v3",
|
| 25 |
+
"moe_intermediate_size": 2048,
|
| 26 |
+
"moe_layer_freq": 1,
|
| 27 |
+
"n_group": 8,
|
| 28 |
+
"n_routed_experts": 256,
|
| 29 |
+
"n_shared_experts": 1,
|
| 30 |
+
"norm_topk_prob": true,
|
| 31 |
+
"num_attention_heads": 128,
|
| 32 |
+
"num_experts_per_tok": 8,
|
| 33 |
+
"num_hidden_layers": 61,
|
| 34 |
+
"num_key_value_heads": 128,
|
| 35 |
+
"num_nextn_predict_layers": 1,
|
| 36 |
+
"pretraining_tp": 1,
|
| 37 |
+
"q_lora_rank": 1536,
|
| 38 |
+
"qk_head_dim": 192,
|
| 39 |
+
"qk_nope_head_dim": 128,
|
| 40 |
+
"qk_rope_head_dim": 64,
|
| 41 |
+
"rms_norm_eps": 1e-06,
|
| 42 |
+
"rope_interleave": true,
|
| 43 |
+
"rope_scaling": {
|
| 44 |
+
"beta_fast": 32.0,
|
| 45 |
+
"beta_slow": 1.0,
|
| 46 |
+
"factor": 40.0,
|
| 47 |
+
"mscale": 1.0,
|
| 48 |
+
"mscale_all_dim": 1.0,
|
| 49 |
+
"original_max_position_embeddings": 4096,
|
| 50 |
+
"rope_type": "yarn",
|
| 51 |
+
"type": "yarn"
|
| 52 |
+
},
|
| 53 |
+
"rope_theta": 10000,
|
| 54 |
+
"routed_scaling_factor": 2.5,
|
| 55 |
+
"scoring_func": "sigmoid",
|
| 56 |
+
"seq_aux": true,
|
| 57 |
+
"tie_word_embeddings": false,
|
| 58 |
+
"topk_group": 4,
|
| 59 |
+
"topk_method": "noaux_tc",
|
| 60 |
+
"torch_dtype": "bfloat16",
|
| 61 |
+
"transformers_version": "4.53.0",
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"v_head_dim": 128,
|
| 64 |
+
"vocab_size": 128815
|
| 65 |
+
}
|
model-00013-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:528eda684cf26182f1d3738825951a468f1f17160eedd968caa7775e22b16a4c
|
| 3 |
+
size 4991243176
|
model-00014-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2aeb89c810e9391ae05ebc866f1d352e142f838de49727edd7c8c0bfbf13f8eb
|
| 3 |
+
size 4991243104
|
model-00035-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c7d8094e37913851e467396e0f97a5b82cafcc043e894c46af1c7b8ff18b138
|
| 3 |
+
size 4991243184
|
model-00037-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0669c5c805d8bc5da20721e01ef43167ec0d4bab95b8e3e786019fecfea3ca24
|
| 3 |
+
size 4991243112
|
model-00050-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87b52c005ac938659d96e44ab72af5237ece079260f6f42efff2e4fad9664f64
|
| 3 |
+
size 4991243176
|
model-00051-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:600fa301dd312476ccae8301afcf2b6040a97b6e7a64566a4abd4ac18c5d7ca5
|
| 3 |
+
size 4991243128
|
model-00058-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b67cd0a329853534451799d89a41311064725ca9cc6efd1401055b12b7809fb
|
| 3 |
+
size 4991243184
|
model-00063-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05135e6b9a847a8362e096801bd822ff8963f5081f885d384f02782be9186ea6
|
| 3 |
+
size 4991243184
|
model-00070-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5db7dd3886cd1efe95c3e71992a040a8ebfdf006090e6589bb14274a145cd0b1
|
| 3 |
+
size 4991243040
|
model-00076-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:753d43eef73554281de8c014437fc52363b3b0d42f94b3dcbc543d4d4e73a5c0
|
| 3 |
+
size 4987475008
|
model-00080-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fe14a2912d344bfa02004b89c40f3b09f00390c9d29104838c2436852d315bc
|
| 3 |
+
size 4987474904
|
model-00086-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53dd70afa496482ac4c8464000cf9cb988226ce57fe99c6e2d27f7a4f1b904dc
|
| 3 |
+
size 4991243184
|
model-00092-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a193ea76e92fcc2859f74b3b48ec46e6c3bc63ea98ab7048f19b0942b6bdd259
|
| 3 |
+
size 4991243176
|
model-00097-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9597e27d6c49e1d22983f7399faf73a38e6094a06f03efea284c9768e6997bf3
|
| 3 |
+
size 4991242984
|
model-00104-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2eb1682d7d6fcdaaaf38e7bbf5d6e0fc839e83a8412e4d4af48852e233d13ed0
|
| 3 |
+
size 4991243184
|
model-00107-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7de0ee73cc0666e04a959b5f9d68d149ca6aea1ce7f3aa60447e56d3820e4c7
|
| 3 |
+
size 4991243024
|
model-00111-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:908d0c2ac9ca0263139a54c62aa87caf1ddf10ca5ddb6146a7a53f04c08e26d6
|
| 3 |
+
size 4991243128
|
model-00114-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:774268dd3eed041c404e7f6c10598d3ec659e04e072f2598a9c22b130a3dde00
|
| 3 |
+
size 4991243184
|
model-00130-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c59dd63ab9fce3d08ea4d29b43257035a83a1e672eb1ae7d4b561c460a33a2f3
|
| 3 |
+
size 4991243040
|
model-00133-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ab8cbe26258f01764979b6c7ca92dacf682875cc46327c52f17befa63969450
|
| 3 |
+
size 4991243176
|
model-00135-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aafefd5868c1ecc1075d62b9c753f05cc75f0efd402e0ef3dbd30f22b74715b7
|
| 3 |
+
size 4991243016
|
model-00139-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ead43e9715cf7cf366d4d75cfac72f604c9fff32369ce5c1e6d4029183a202d8
|
| 3 |
+
size 4991243080
|
model-00140-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0b901e7838d3b16d52c4acaf97602ba2cb33c09e80e200589d88985de6da52d
|
| 3 |
+
size 4987474904
|
model-00142-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e778511b53e53dc71e6f75f889f8249cb5c01a3e37efbb14bf4d601c06ab13a
|
| 3 |
+
size 4991243176
|
model-00144-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab0663d951e7150a5f079c4999e406b6e1c414534d621f8ef6827924cf1ccbfb
|
| 3 |
+
size 4991243008
|
model-00146-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dc010b65907b5b50c0720f380828a53a534d74b1b1039c02eef8f6e3156a354
|
| 3 |
+
size 4991243008
|
model-00147-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:edfff13fc58349e2de360af0f18d1d113be6a3e8e6005e5f00e7365c19c13e6d
|
| 3 |
+
size 4991243008
|
model-00148-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31460dd55cdd56927fe63cb92deaf146022584bcc894b6e229832f37d1c50aed
|
| 3 |
+
size 4991242944
|
model-00150-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b18c96326d1431ef2dfdab1a18c8f81987b62b58dde45c667f7da377c08f309
|
| 3 |
+
size 4987475024
|
model-00151-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de8a7704116259418eda5836b5eb900376d8c7d4b73d05858072d85374d94c6a
|
| 3 |
+
size 4991243184
|
model-00152-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2aa505dc1df319dfbe612162dec2ed4be4c611798b3bd1e8f93f004880c36d9
|
| 3 |
+
size 4991243176
|
model-00153-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9b18d129355e8174642a43c9d9b98d78befa854b29a9d3a68a5402b51d5be51
|
| 3 |
+
size 4991243048
|
model-00154-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dadd5d193c45e70b49fe68edbc1a00d06b7152b18f784cae3886d25b50da188
|
| 3 |
+
size 4987474936
|
model-00155-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:928cba68cfc129fa9347316fc69fc43f63e3d1f9489ba108f6076a9aa0c472e4
|
| 3 |
+
size 4991243184
|
model-00163-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:deb344fc6c3ac28f36611cf06fc5b22e742c8b989b5c07fdc15d1514ed6f743c
|
| 3 |
+
size 4987474896
|
model-00164-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc8c0b7cb3cfa641196d7f77ff6e99c8545790b52883a95d06cec3ec44dba383
|
| 3 |
+
size 4991243184
|
model-00166-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d937cd412ae7757b96f52dde0cc230d682ce01002d20c172f03c9e4f7a6adb90
|
| 3 |
+
size 4991243184
|
model-00167-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6c8d5c5b9567841903fb91224893a36c2280a381f724dbd4b3b970c02777afc
|
| 3 |
+
size 4991243024
|
model-00180-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:692398ead170ad838173620263cd324f395046cf3bc7c696a2b9b3fd137f8262
|
| 3 |
+
size 4991243160
|
model-00183-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25007c26dda0d2c8d0b3af47008eeacb14c1656a6744c0cda2c87f1b59aea72b
|
| 3 |
+
size 4991243184
|
model-00186-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:573516c323614dbee31ebe169452608b8e4ab12d9596829820dfd955a688eef7
|
| 3 |
+
size 4772499320
|
model-00192-of-00270.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49b6fdbee2091030bc94f8e63cef718918c84e228dadf824e5e2e1cbddb48deb
|
| 3 |
+
size 4991243184
|
model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<|begin▁of▁sentence|>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "<|end▁of▁sentence|>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "<|▁pad▁|>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
}
|
| 23 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|