LG-AI-EXAONE commited on Mar 18

Commit

20b9a76

0 Parent(s):

Initial commit

Browse files

Files changed (28) hide show

.gitattributes +35 -0
README.md +274 -0
assets/EXAONE_Symbol+BI_3d.png +0 -0
assets/exaone_deep_overall_performance.png +0 -0
config.json +40 -0
configuration_exaone.py +183 -0
generation_config.json +11 -0
merges.txt +0 -0
model-00001-of-00014.safetensors +3 -0
model-00002-of-00014.safetensors +3 -0
model-00003-of-00014.safetensors +3 -0
model-00004-of-00014.safetensors +3 -0
model-00005-of-00014.safetensors +3 -0
model-00006-of-00014.safetensors +3 -0
model-00007-of-00014.safetensors +3 -0
model-00008-of-00014.safetensors +3 -0
model-00009-of-00014.safetensors +3 -0
model-00010-of-00014.safetensors +3 -0
model-00011-of-00014.safetensors +3 -0
model-00012-of-00014.safetensors +3 -0
model-00013-of-00014.safetensors +3 -0
model-00014-of-00014.safetensors +3 -0
model.safetensors.index.json +586 -0
modeling_exaone.py +1394 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer_config.json +3221 -0
vocab.json +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,274 @@

+---
+base_model: LGAI-EXAONE/EXAONE-3.5-32B-Instruct
+base_model_relation: finetune
+license: other
+license_name: exaone
+license_link: LICENSE
+language:
+- en
+- ko
+tags:
+- lg-ai
+- exaone
+- exaone-deep
+pipeline_tag: text-generation
+library_name: transformers
+---
+<p align="center">
+<img src="assets/EXAONE_Symbol+BI_3d.png", width="300", style="margin: 40 auto;">
+<br>
+# EXAONE-Deep-32B
+## Introduction
+We introduce EXAONE Deep, which exhibits superior capabilities in various reasoning tasks including math and coding benchmarks, ranging from 2.4B to 32B parameters developed and released by LG AI Research. Evaluation results show that 1) EXAONE Deep **2.4B** outperforms other models of comparable size, 2) EXAONE Deep **7.8B** outperforms not only open-weight models of comparable scale but also a proprietary reasoning model OpenAI o1-mini, and 3) EXAONE Deep **32B** demonstrates competitive performance against leading open-weight models.
+For more details, please refer to our [documentation](https://lgresearch.ai/data/upload/EXAONE_Deep__Model_Card.pdf), [blog](https://www.lgresearch.ai/news/view?seq=543) and [GitHub](https://github.com/LG-AI-EXAONE/EXAONE-Deep).
+<p align="center">
+<img src="assets/exaone_deep_overall_performance.png", width="100%", style="margin: 40 auto;">
+This repository contains the reasoning 32B language model with the following features:
+- Number of Parameters (without embeddings): 30.95B
+- Number of Layers: 64
+- Number of Attention Heads: GQA with 40 Q-heads and 8 KV-heads
+- Vocab Size: 102,400
+- Context Length: 32,768 tokens
+## Quickstart
+We recommend to use `transformers` v4.43.1 or later.
+Here is the code snippet to run conversational inference with the model:
+```python
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from threading import Thread
+model_name = "LGAI-EXAONE/EXAONE-Deep-32B"
+streaming = True    # choose the streaming option
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.bfloat16,
+    trust_remote_code=True,
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+messages = [
+    {"role": "user", "content": "How many golf balls can fit in a school bus?"}
+]
+input_ids = tokenizer.apply_chat_template(
+    messages,
+    tokenize=True,
+    add_generation_prompt=True,
+    return_tensors="pt"
+)
+if streaming:
+    streamer = TextIteratorStreamer(tokenizer)
+    thread = Thread(target=model.generate, kwargs=dict(
+        input_ids=input_ids.to("cuda"),
+        eos_token_id=tokenizer.eos_token_id,
+        max_new_tokens=32768,
+        do_sample=True,
+        temperature=0.6,
+        top_p=0.95,
+        streamer=streamer
+    ))
+    thread.start()
+    for text in streamer:
+        print(text, end="", flush=True)
+else:
+    output = model.generate(
+        input_ids.to("cuda"),
+        eos_token_id=tokenizer.eos_token_id,
+        max_new_tokens=32768,
+        do_sample=True,
+        temperature=0.6,
+        top_p=0.95,
+    )
+    print(tokenizer.decode(output[0]))
+```
+> ### Note
+> The EXAONE Deep models are trained with an optimized configuration,
+> so we recommend following the [Usage Guideline](#usage-guideline) section to achieve optimal performance.
+## Evaluation
+The following table shows the evaluation results of reasoning tasks such as math and coding. The full evaluation results can be found in the [documentation](https://lgresearch.ai/data/upload/EXAONE_Deep__Model_Card.pdf).
+<table>
+    <tr>
+        <th>Models</th>
+        <th>MATH-500 (pass@1)</th>
+        <th>AIME 2024 (pass@1 / cons@64)</th>
+        <th>AIME 2025 (pass@1 / cons@64)</th>
+        <th>CSAT Math 2025 (pass@1)</th>
+        <th>GPQA Diamond (pass@1)</th>
+        <th>Live Code Bench (pass@1)</th>
+    </tr>
+    <tr>
+        <td>EXAONE Deep 32B</td>
+        <td>95.7</td>
+        <td>72.1 / <strong>90.0</strong></td>
+        <td>65.8 / <strong>80.0</strong></td>
+        <td><strong>94.5</strong></td>
+        <td>66.1</td>
+        <td>59.5</td>
+    </tr>
+    <tr>
+        <td>DeepSeek-R1-Distill-Qwen-32B</td>
+        <td>94.3</td>
+        <td>72.6 / 83.3</td>
+        <td>55.2 / 73.3</td>
+        <td>84.1</td>
+        <td>62.1</td>
+        <td>57.2</td>
+    </tr>
+    <tr>
+        <td>QwQ-32B</td>
+        <td>95.5</td>
+        <td><strong>79.5</strong> / 86.7</td>
+        <td><strong>67.1</strong> / 76.7</td>
+        <td>94.4</td>
+        <td>63.3</td>
+        <td>63.4</td>
+    </tr>
+    <tr>
+        <td>DeepSeek-R1-Distill-Llama-70B</td>
+        <td>94.5</td>
+        <td>70.0 / 86.7</td>
+        <td>53.9 / 66.7</td>
+        <td>88.8</td>
+        <td>65.2</td>
+        <td>57.5</td>
+    </tr>
+    <tr>
+        <td>DeepSeek-R1 (671B)</td>
+        <td><strong>97.3</strong></td>
+        <td>79.8 / 86.7</td>
+        <td>66.8 / <strong>80.0</strong></td>
+        <td>89.9</td>
+        <td><strong>71.5</strong></td>
+        <td><strong>65.9</strong></td>
+    </tr>
+    <tr>
+        <th colspan="7" height="30px"></th>
+    </tr>
+    <tr>
+        <td>EXAONE Deep 7.8B</td>
+        <td><strong>94.8</strong></td>
+        <td><strong>70.0</strong> / <strong>83.3</strong></td>
+        <td><strong>59.6</strong> / <strong>76.7</strong></td>
+        <td><strong>89.9</strong></td>
+        <td><strong>62.6</strong></td>
+        <td><strong>55.2</strong></td>
+    </tr>
+    <tr>
+        <td>DeepSeek-R1-Distill-Qwen-7B</td>
+        <td>92.8</td>
+        <td>55.5 / <strong>83.3</strong></td>
+        <td>38.5 / 56.7</td>
+        <td>79.7</td>
+        <td>49.1</td>
+        <td>37.6</td>
+    </tr>
+    <tr>
+        <td>DeepSeek-R1-Distill-Llama-8B</td>
+        <td>89.1</td>
+        <td>50.4 / 80.0</td>
+        <td>33.6 / 53.3</td>
+        <td>74.1</td>
+        <td>49.0</td>
+        <td>39.6</td>
+    </tr>
+    <tr>
+        <td>OpenAI o1-mini</td>
+        <td>90.0</td>
+        <td>63.6 / 80.0</td>
+        <td>54.8 / 66.7</td>
+        <td>84.4</td>
+        <td>60.0</td>
+        <td>53.8</td>
+    </tr>
+    <tr>
+        <th colspan="7" height="30px"></th>
+    </tr>
+    <tr>
+        <td>EXAONE Deep 2.4B</td>
+        <td><strong>92.3</strong></td>
+        <td><strong>52.5</strong> / <strong>76.7</strong></td>
+        <td><strong>47.9</strong> / <strong>73.3</strong></td>
+        <td><strong>79.2</strong></td>
+        <td><strong>54.3</strong></td>
+        <td><strong>46.6</strong></td>
+    </tr>
+    <tr>
+        <td>DeepSeek-R1-Distill-Qwen-1.5B</td>
+        <td>83.9</td>
+        <td>28.9 / 52.7</td>
+        <td>23.9 / 36.7</td>
+        <td>65.6</td>
+        <td>33.8</td>
+        <td>16.9</td>
+    </tr>
+</table>
+## Deployment
+EXAONE Deep models can be inferred in the various frameworks, such as:
+- `TensorRT-LLM`
+- `vLLM`
+- `SGLang`
+- `llama.cpp`
+- `Ollama`
+Please refer to our [EXAONE Deep GitHub](https://github.com/LG-AI-EXAONE/EXAONE-Deep) for more details about the inference frameworks.
+## Quantization
+We are working on quantized versions of EXAONE Deep models in both **AWQ** and **GGUF** formats. We will update this section with detailed instructions upon release.
+## Usage Guideline
+To achieve the expected performance, we recommend using the following configurations:
+1. Ensure the model starts with `<thought>\n` for reasoning steps. The model's output quality may be degraded when you omit it. You can easily apply this feature by using `tokenizer.apply_chat_template()` with `add_generation_prompt=True`. Please check the example code on [Quickstart](#quickstart) section.
+2. The reasoning steps of EXAONE Deep models enclosed by `<thought>\n...\n</thought>` usually have lots of tokens, so previous reasoning steps may be necessary to be removed in multi-turn situation. The provided tokenizer handles this automatically.
+3. Avoid using system prompt, and build the instruction on the user prompt.
+4. When it comes to math problems, include **"Please reason step by step, and put your final answer within \boxed{}."** in your prompt.
+5. In our evaluation, we use `temperature=0.6` and `top_p=0.95` for generation.
+6. When evaluating the models, it is recommended to test multiple times to assess the expected performance accurately.
+## Limitation
+The EXAONE language model has certain limitations and may occasionally generate inappropriate responses. The language model generates responses based on the output probability of tokens, and it is determined during learning from training data. While we have made every effort to exclude personal, harmful, and biased information from the training data, some problematic content may still be included, potentially leading to undesirable responses. Please note that the text generated by EXAONE language model does not reflects the views of LG AI Research.
+- Inappropriate answers may be generated, which contain personal, harmful or other inappropriate information.
+- Biased responses may be generated, which are associated with age, gender, race, and so on.
+- The generated responses rely heavily on statistics from the training data, which can result in the generation of
+semantically or syntactically incorrect sentences.
+- Since the model does not reflect the latest information, the responses may be false or contradictory.
+LG AI Research strives to reduce potential risks that may arise from EXAONE language models. Users are not allowed
+to engage in any malicious activities (e.g., keying in illegal information) that may induce the creation of inappropriate
+outputs violating LG AI’s ethical principles when using EXAONE language models.
+## License
+The model is licensed under [EXAONE AI Model License Agreement 1.1 - NC](./LICENSE)
+## Citation
+TBU
+## Contact
+LG AI Research Technical Support: [email protected]

assets/EXAONE_Symbol+BI_3d.png ADDED Viewed

assets/exaone_deep_overall_performance.png ADDED Viewed

config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "activation_function": "silu",
+  "architectures": [
+    "ExaoneForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoConfig": "configuration_exaone.ExaoneConfig",
+    "AutoModelForCausalLM": "modeling_exaone.ExaoneForCausalLM",
+    "AutoModelForSequenceClassification": "modeling_exaone.ExaoneForSequenceClassification"
+  },
+  "bos_token_id": 1,
+  "embed_dropout": 0.0,
+  "eos_token_id": 361,
+  "head_dim": 128,
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "intermediate_size": 27392,
+  "layer_norm_epsilon": 1e-05,
+  "ln_no_scale": false,
+  "max_position_embeddings": 32768,
+  "model_type": "exaone",
+  "num_attention_heads": 40,
+  "num_key_value_heads": 8,
+  "num_layers": 64,
+  "pad_token_id": 0,
+  "rope_scaling": {
+    "factor": 8.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 1000000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.43.1",
+  "use_cache": true,
+  "vocab_size": 102400
+}

configuration_exaone.py ADDED Viewed

	@@ -0,0 +1,183 @@

+# coding=utf-8
+# Copyright 2021 The LG AI Research EXAONE Lab. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""EXAONE model configuration"""
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
+class ExaoneConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`ExaoneModel`]. It is used to
+    instantiate a EXAONE model according to the specified arguments, defining the model architecture. Instantiating a
+    configuration with the defaults will yield a similar configuration to that of the EXAONE-3.0-7.8B-Instruct [LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct)
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
+    outputs. Read the documentation from [`PretrainedConfig`] for more information.
+    Args:
+        vocab_size (`int`, *optional*, defaults to 102400):
+            Vocabulary size of the EXAONE model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`ExaoneModel`]. Vocabulary size of the model.
+            Defines the different tokens that can be represented by the `inputs_ids` passed to the forward method of
+            [`ExaoneModel`].
+        max_position_embeddings (`int`, *optional*, defaults to 2048):
+            The maximum sequence length that this model might ever be used with. Typically set this to something large
+            just in case (e.g., 512 or 1024 or 2048).
+        hidden_size (`int`, *optional*, defaults to 2048):
+            Dimensionality of the encoder layers and the pooler layer.
+        num_layers (`int`, *optional*, defaults to 32):
+            Number of hidden layers in the Transformer encoder.
+        num_attention_heads (`int`, *optional*, defaults to 32):
+            Number of attention heads for each attention layer in the Transformer decoder.
+        num_key_value_heads (`int`, *optional*):
+            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
+            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
+            `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
+            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
+            by meanpooling all the original heads within that group. For more details checkout [this
+            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
+            `num_attention_heads`.
+        intermediate_size (`int`, *optional*, defaults to `hidden_size * 4`):
+            Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
+        activation_function (`str` or `function`, *optional*, defaults to `"silu"`):
+            The non-linear activation function (function or string) in the decoder.
+        rope_theta (`float`, *optional*, defaults to 10000.0):
+            The base period of the RoPE embeddings.
+        rope_scaling (`Dict`, *optional*):
+            Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
+            and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value
+            accordingly.
+            Expected contents:
+                `rope_type` (`str`):
+                    The sub-variant of RoPE to use. Can be one of ['default', 'linear', 'dynamic', 'yarn', 'longrope',
+                    'llama3'], with 'default' being the original RoPE implementation.
+                `factor` (`float`, *optional*):
+                    Used with all rope types except 'default'. The scaling factor to apply to the RoPE embeddings. In
+                    most scaling types, a `factor` of x will enable the model to handle sequences of length x *
+                    original maximum pre-trained length.
+                `original_max_position_embeddings` (`int`, *optional*):
+                    Used with 'dynamic', 'longrope' and 'llama3'. The original max position embeddings used during
+                    pretraining.
+                `attention_factor` (`float`, *optional*):
+                    Used with 'yarn' and 'longrope'. The scaling factor to be applied on the attention
+                    computation. If unspecified, it defaults to value recommended by the implementation, using the
+                    `factor` field to infer the suggested value.
+                `beta_fast` (`float`, *optional*):
+                    Only used with 'yarn'. Parameter to set the boundary for extrapolation (only) in the linear
+                    ramp function. If unspecified, it defaults to 32.
+                `beta_slow` (`float`, *optional*):
+                    Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear
+                    ramp function. If unspecified, it defaults to 1.
+                `short_factor` (`List[float]`, *optional*):
+                    Only used with 'longrope'. The scaling factor to be applied to short contexts (<
+                    `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
+                    size divided by the number of attention heads divided by 2
+                `long_factor` (`List[float]`, *optional*):
+                    Only used with 'longrope'. The scaling factor to be applied to long contexts (<
+                    `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
+                    size divided by the number of attention heads divided by 2
+                `low_freq_factor` (`float`, *optional*):
+                    Only used with 'llama3'. Scaling factor applied to low frequency components of the RoPE
+                `high_freq_factor` (`float`, *optional*):
+                    Only used with 'llama3'. Scaling factor applied to high frequency components of the RoPE
+        embed_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+        layer_norm_epsilon (`float`, *optional*, defaults to 1e-05):
+            The epsilon used by the layer normalization layers.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models). Only
+            relevant if ``config.is_decoder=True``.
+        bos_token_id (`int`, *optional*, defaults to 0):
+            Beginning of stream token id.
+        eos_token_id (`int`, *optional*, defaults to 2):
+            End of stream token id.
+    Example:
+    ```python
+    >>> from transformers import EXAONEModel, ExaoneConfig
+    >>> # Initializing a EXAONE configuration
+    >>> configuration = ExaoneConfig()
+    >>> # Initializing a model from configuration
+    >>> model = EXAONEModel(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "exaone"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    attribute_map = {"num_hidden_layers": "num_layers"}
+    def __init__(
+        self,
+        vocab_size=102400,
+        max_position_embeddings=2048,
+        hidden_size=2048,
+        num_layers=32,
+        num_attention_heads=32,
+        num_key_value_heads=None,
+        intermediate_size=None,
+        activation_function="silu",
+        rope_theta=10000.0,
+        rope_scaling=None,
+        embed_dropout=0.0,
+        attention_dropout=0.0,
+        layer_norm_epsilon=1e-5,
+        initializer_range=0.02,
+        use_cache=True,
+        bos_token_id=0,
+        eos_token_id=2,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.num_attention_heads = num_attention_heads
+        self.num_layers = num_layers
+        if num_key_value_heads is None:
+            num_key_value_heads = num_attention_heads
+        self.num_key_value_heads = num_key_value_heads
+        if intermediate_size:
+            self.intermediate_size = intermediate_size
+        else:
+            self.intermediate_size = hidden_size * 4
+        self.activation_function = activation_function
+        self.embed_dropout = embed_dropout
+        self.attention_dropout = attention_dropout
+        self.layer_norm_epsilon = layer_norm_epsilon
+        self.initializer_range = initializer_range
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.rope_scaling = rope_scaling
+        self.bos_token_id = bos_token_id
+        self.eos_token_id = eos_token_id
+        super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)

generation_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "do_sample": true,
+  "eos_token_id": 361,
+  "pad_token_id": 0,
+  "repetition_penalty": 1.0,
+  "temperature": 0.6,
+  "top_p": 0.95,
+  "transformers_version": "4.43.1"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08823c38117c2977d50d889c301875bf38e381133460b51702cac0b8a43b30e9
+size 4991318640

model-00002-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:265542d0a48061cff11f4574e787ea91c7198bd1d214fb1b9fc00e9c605d4b69
+size 4889103656

model-00003-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa790ae7b76d1b4a6369da563d7823b801f93ee8f70e3827dcf872192a3f0e4b
+size 4836664424

model-00004-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4cc020a639f9ae14e31b03bf456c422cf6a96e650f644995eba62290d189ea7b
+size 4836664440

model-00005-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2841184aab3081e9a852c4d166ee842bd4542e9e0cb2a6ef7c8dbd33913ef64
+size 4836664440

model-00006-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e953a4bd78bf48c97f66f995eee44f197b5f5e09a85968121fe7c5f369a6a60c
+size 4836664440

model-00007-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:938dfad5e7599db2d338dbbfc5d059908755176ca0db7f4c152ffa86dcaba265
+size 4836664440

model-00008-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:10959eb65e70de3bb70d15c5a730953dcd950494689a5f731d72848cef31665f
+size 4836664440

model-00009-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b823f2b4847891eb9de9c8ccca39ab2f4b83bcfd6c6dada938a3cfc24fefeed
+size 4836664440

model-00010-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ca67840a58b5456c43be9b8759acb7f84066ed9666f78933eecb1cd9f724d7b
+size 4836664440

model-00011-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f564364ec489bebc58c24cbd17073225c9fcabc56da3ab503f8c705d76f2067d
+size 4836664440

model-00012-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e099f26ee840a33db91c9fbea7bfe2349964e78870a4594cca9f8e5b964d5978
+size 4836664440

model-00013-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b3242a4d4c80fc0916c560550643edf8362440dfc84d9163a8b5c909e0fde8e
+size 4710824472

model-00014-of-00014.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a8e7dce8b53662765bebbc30796fe086a02189b2691ab0dc7e6f48897b38987
+size 1048576128

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,586 @@

+{
+  "metadata": {
+    "total_size": 64006400000
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00014-of-00014.safetensors",
+    "transformer.h.0.attn.attention.k_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.0.attn.attention.out_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.0.attn.attention.q_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.0.attn.attention.v_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.0.ln_1.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.0.ln_2.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.0.mlp.c_fc_0.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.0.mlp.c_fc_1.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.0.mlp.c_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.1.attn.attention.k_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.1.attn.attention.out_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.1.attn.attention.q_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.1.attn.attention.v_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.1.ln_1.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.1.ln_2.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.1.mlp.c_fc_0.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.1.mlp.c_fc_1.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.1.mlp.c_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.10.attn.attention.k_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.10.attn.attention.out_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.10.attn.attention.q_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.10.attn.attention.v_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.10.ln_1.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.10.ln_2.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.10.mlp.c_fc_0.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.10.mlp.c_fc_1.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.10.mlp.c_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.11.attn.attention.k_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.11.attn.attention.out_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.11.attn.attention.q_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.11.attn.attention.v_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.11.ln_1.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.11.ln_2.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.11.mlp.c_fc_0.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.11.mlp.c_fc_1.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.11.mlp.c_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.12.attn.attention.k_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.12.attn.attention.out_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.12.attn.attention.q_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.12.attn.attention.v_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.12.ln_1.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.12.ln_2.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.12.mlp.c_fc_0.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.12.mlp.c_fc_1.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.12.mlp.c_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.13.attn.attention.k_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.13.attn.attention.out_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.13.attn.attention.q_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.13.attn.attention.v_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.13.ln_1.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.13.ln_2.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.13.mlp.c_fc_0.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.13.mlp.c_fc_1.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.13.mlp.c_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.14.attn.attention.k_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.14.attn.attention.out_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.14.attn.attention.q_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.14.attn.attention.v_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.14.ln_1.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.14.ln_2.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.14.mlp.c_fc_0.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.14.mlp.c_fc_1.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.14.mlp.c_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.15.attn.attention.k_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.15.attn.attention.out_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.15.attn.attention.q_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.15.attn.attention.v_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.15.ln_1.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.15.ln_2.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.15.mlp.c_fc_0.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.15.mlp.c_fc_1.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.15.mlp.c_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.16.attn.attention.k_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.16.attn.attention.out_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.16.attn.attention.q_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.16.attn.attention.v_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.16.ln_1.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.16.ln_2.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.16.mlp.c_fc_0.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.16.mlp.c_fc_1.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.16.mlp.c_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.17.attn.attention.k_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.17.attn.attention.out_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.17.attn.attention.q_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.17.attn.attention.v_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.17.ln_1.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.17.ln_2.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.17.mlp.c_fc_0.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.17.mlp.c_fc_1.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.17.mlp.c_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.18.attn.attention.k_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.18.attn.attention.out_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.18.attn.attention.q_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.18.attn.attention.v_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.18.ln_1.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.18.ln_2.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.18.mlp.c_fc_0.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.18.mlp.c_fc_1.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.18.mlp.c_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.19.attn.attention.k_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.19.attn.attention.out_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.19.attn.attention.q_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.19.attn.attention.v_proj.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.19.ln_1.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.19.ln_2.weight": "model-00004-of-00014.safetensors",
+    "transformer.h.19.mlp.c_fc_0.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.19.mlp.c_fc_1.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.19.mlp.c_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.2.attn.attention.k_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.2.attn.attention.out_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.2.attn.attention.q_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.2.attn.attention.v_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.2.ln_1.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.2.ln_2.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.2.mlp.c_fc_0.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.2.mlp.c_fc_1.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.2.mlp.c_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.20.attn.attention.k_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.20.attn.attention.out_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.20.attn.attention.q_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.20.attn.attention.v_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.20.ln_1.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.20.ln_2.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.20.mlp.c_fc_0.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.20.mlp.c_fc_1.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.20.mlp.c_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.21.attn.attention.k_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.21.attn.attention.out_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.21.attn.attention.q_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.21.attn.attention.v_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.21.ln_1.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.21.ln_2.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.21.mlp.c_fc_0.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.21.mlp.c_fc_1.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.21.mlp.c_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.22.attn.attention.k_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.22.attn.attention.out_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.22.attn.attention.q_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.22.attn.attention.v_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.22.ln_1.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.22.ln_2.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.22.mlp.c_fc_0.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.22.mlp.c_fc_1.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.22.mlp.c_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.23.attn.attention.k_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.23.attn.attention.out_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.23.attn.attention.q_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.23.attn.attention.v_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.23.ln_1.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.23.ln_2.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.23.mlp.c_fc_0.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.23.mlp.c_fc_1.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.23.mlp.c_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.24.attn.attention.k_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.24.attn.attention.out_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.24.attn.attention.q_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.24.attn.attention.v_proj.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.24.ln_1.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.24.ln_2.weight": "model-00005-of-00014.safetensors",
+    "transformer.h.24.mlp.c_fc_0.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.24.mlp.c_fc_1.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.24.mlp.c_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.25.attn.attention.k_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.25.attn.attention.out_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.25.attn.attention.q_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.25.attn.attention.v_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.25.ln_1.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.25.ln_2.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.25.mlp.c_fc_0.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.25.mlp.c_fc_1.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.25.mlp.c_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.26.attn.attention.k_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.26.attn.attention.out_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.26.attn.attention.q_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.26.attn.attention.v_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.26.ln_1.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.26.ln_2.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.26.mlp.c_fc_0.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.26.mlp.c_fc_1.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.26.mlp.c_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.27.attn.attention.k_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.27.attn.attention.out_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.27.attn.attention.q_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.27.attn.attention.v_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.27.ln_1.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.27.ln_2.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.27.mlp.c_fc_0.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.27.mlp.c_fc_1.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.27.mlp.c_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.28.attn.attention.k_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.28.attn.attention.out_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.28.attn.attention.q_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.28.attn.attention.v_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.28.ln_1.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.28.ln_2.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.28.mlp.c_fc_0.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.28.mlp.c_fc_1.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.28.mlp.c_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.29.attn.attention.k_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.29.attn.attention.out_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.29.attn.attention.q_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.29.attn.attention.v_proj.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.29.ln_1.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.29.ln_2.weight": "model-00006-of-00014.safetensors",
+    "transformer.h.29.mlp.c_fc_0.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.29.mlp.c_fc_1.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.29.mlp.c_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.3.attn.attention.k_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.3.attn.attention.out_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.3.attn.attention.q_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.3.attn.attention.v_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.3.ln_1.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.3.ln_2.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.3.mlp.c_fc_0.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.3.mlp.c_fc_1.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.3.mlp.c_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.30.attn.attention.k_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.30.attn.attention.out_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.30.attn.attention.q_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.30.attn.attention.v_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.30.ln_1.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.30.ln_2.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.30.mlp.c_fc_0.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.30.mlp.c_fc_1.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.30.mlp.c_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.31.attn.attention.k_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.31.attn.attention.out_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.31.attn.attention.q_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.31.attn.attention.v_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.31.ln_1.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.31.ln_2.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.31.mlp.c_fc_0.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.31.mlp.c_fc_1.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.31.mlp.c_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.32.attn.attention.k_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.32.attn.attention.out_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.32.attn.attention.q_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.32.attn.attention.v_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.32.ln_1.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.32.ln_2.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.32.mlp.c_fc_0.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.32.mlp.c_fc_1.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.32.mlp.c_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.33.attn.attention.k_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.33.attn.attention.out_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.33.attn.attention.q_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.33.attn.attention.v_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.33.ln_1.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.33.ln_2.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.33.mlp.c_fc_0.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.33.mlp.c_fc_1.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.33.mlp.c_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.34.attn.attention.k_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.34.attn.attention.out_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.34.attn.attention.q_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.34.attn.attention.v_proj.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.34.ln_1.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.34.ln_2.weight": "model-00007-of-00014.safetensors",
+    "transformer.h.34.mlp.c_fc_0.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.34.mlp.c_fc_1.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.34.mlp.c_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.35.attn.attention.k_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.35.attn.attention.out_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.35.attn.attention.q_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.35.attn.attention.v_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.35.ln_1.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.35.ln_2.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.35.mlp.c_fc_0.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.35.mlp.c_fc_1.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.35.mlp.c_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.36.attn.attention.k_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.36.attn.attention.out_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.36.attn.attention.q_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.36.attn.attention.v_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.36.ln_1.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.36.ln_2.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.36.mlp.c_fc_0.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.36.mlp.c_fc_1.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.36.mlp.c_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.37.attn.attention.k_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.37.attn.attention.out_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.37.attn.attention.q_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.37.attn.attention.v_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.37.ln_1.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.37.ln_2.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.37.mlp.c_fc_0.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.37.mlp.c_fc_1.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.37.mlp.c_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.38.attn.attention.k_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.38.attn.attention.out_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.38.attn.attention.q_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.38.attn.attention.v_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.38.ln_1.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.38.ln_2.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.38.mlp.c_fc_0.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.38.mlp.c_fc_1.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.38.mlp.c_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.39.attn.attention.k_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.39.attn.attention.out_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.39.attn.attention.q_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.39.attn.attention.v_proj.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.39.ln_1.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.39.ln_2.weight": "model-00008-of-00014.safetensors",
+    "transformer.h.39.mlp.c_fc_0.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.39.mlp.c_fc_1.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.39.mlp.c_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.4.attn.attention.k_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.4.attn.attention.out_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.4.attn.attention.q_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.4.attn.attention.v_proj.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.4.ln_1.weight": "model-00001-of-00014.safetensors",
+    "transformer.h.4.ln_2.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.4.mlp.c_fc_0.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.4.mlp.c_fc_1.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.4.mlp.c_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.40.attn.attention.k_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.40.attn.attention.out_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.40.attn.attention.q_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.40.attn.attention.v_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.40.ln_1.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.40.ln_2.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.40.mlp.c_fc_0.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.40.mlp.c_fc_1.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.40.mlp.c_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.41.attn.attention.k_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.41.attn.attention.out_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.41.attn.attention.q_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.41.attn.attention.v_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.41.ln_1.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.41.ln_2.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.41.mlp.c_fc_0.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.41.mlp.c_fc_1.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.41.mlp.c_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.42.attn.attention.k_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.42.attn.attention.out_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.42.attn.attention.q_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.42.attn.attention.v_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.42.ln_1.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.42.ln_2.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.42.mlp.c_fc_0.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.42.mlp.c_fc_1.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.42.mlp.c_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.43.attn.attention.k_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.43.attn.attention.out_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.43.attn.attention.q_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.43.attn.attention.v_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.43.ln_1.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.43.ln_2.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.43.mlp.c_fc_0.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.43.mlp.c_fc_1.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.43.mlp.c_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.44.attn.attention.k_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.44.attn.attention.out_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.44.attn.attention.q_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.44.attn.attention.v_proj.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.44.ln_1.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.44.ln_2.weight": "model-00009-of-00014.safetensors",
+    "transformer.h.44.mlp.c_fc_0.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.44.mlp.c_fc_1.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.44.mlp.c_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.45.attn.attention.k_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.45.attn.attention.out_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.45.attn.attention.q_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.45.attn.attention.v_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.45.ln_1.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.45.ln_2.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.45.mlp.c_fc_0.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.45.mlp.c_fc_1.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.45.mlp.c_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.46.attn.attention.k_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.46.attn.attention.out_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.46.attn.attention.q_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.46.attn.attention.v_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.46.ln_1.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.46.ln_2.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.46.mlp.c_fc_0.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.46.mlp.c_fc_1.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.46.mlp.c_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.47.attn.attention.k_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.47.attn.attention.out_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.47.attn.attention.q_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.47.attn.attention.v_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.47.ln_1.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.47.ln_2.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.47.mlp.c_fc_0.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.47.mlp.c_fc_1.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.47.mlp.c_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.48.attn.attention.k_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.48.attn.attention.out_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.48.attn.attention.q_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.48.attn.attention.v_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.48.ln_1.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.48.ln_2.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.48.mlp.c_fc_0.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.48.mlp.c_fc_1.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.48.mlp.c_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.49.attn.attention.k_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.49.attn.attention.out_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.49.attn.attention.q_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.49.attn.attention.v_proj.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.49.ln_1.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.49.ln_2.weight": "model-00010-of-00014.safetensors",
+    "transformer.h.49.mlp.c_fc_0.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.49.mlp.c_fc_1.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.49.mlp.c_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.5.attn.attention.k_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.5.attn.attention.out_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.5.attn.attention.q_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.5.attn.attention.v_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.5.ln_1.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.5.ln_2.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.5.mlp.c_fc_0.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.5.mlp.c_fc_1.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.5.mlp.c_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.50.attn.attention.k_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.50.attn.attention.out_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.50.attn.attention.q_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.50.attn.attention.v_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.50.ln_1.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.50.ln_2.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.50.mlp.c_fc_0.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.50.mlp.c_fc_1.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.50.mlp.c_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.51.attn.attention.k_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.51.attn.attention.out_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.51.attn.attention.q_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.51.attn.attention.v_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.51.ln_1.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.51.ln_2.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.51.mlp.c_fc_0.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.51.mlp.c_fc_1.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.51.mlp.c_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.52.attn.attention.k_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.52.attn.attention.out_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.52.attn.attention.q_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.52.attn.attention.v_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.52.ln_1.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.52.ln_2.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.52.mlp.c_fc_0.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.52.mlp.c_fc_1.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.52.mlp.c_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.53.attn.attention.k_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.53.attn.attention.out_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.53.attn.attention.q_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.53.attn.attention.v_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.53.ln_1.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.53.ln_2.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.53.mlp.c_fc_0.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.53.mlp.c_fc_1.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.53.mlp.c_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.54.attn.attention.k_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.54.attn.attention.out_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.54.attn.attention.q_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.54.attn.attention.v_proj.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.54.ln_1.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.54.ln_2.weight": "model-00011-of-00014.safetensors",
+    "transformer.h.54.mlp.c_fc_0.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.54.mlp.c_fc_1.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.54.mlp.c_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.55.attn.attention.k_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.55.attn.attention.out_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.55.attn.attention.q_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.55.attn.attention.v_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.55.ln_1.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.55.ln_2.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.55.mlp.c_fc_0.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.55.mlp.c_fc_1.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.55.mlp.c_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.56.attn.attention.k_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.56.attn.attention.out_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.56.attn.attention.q_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.56.attn.attention.v_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.56.ln_1.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.56.ln_2.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.56.mlp.c_fc_0.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.56.mlp.c_fc_1.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.56.mlp.c_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.57.attn.attention.k_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.57.attn.attention.out_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.57.attn.attention.q_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.57.attn.attention.v_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.57.ln_1.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.57.ln_2.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.57.mlp.c_fc_0.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.57.mlp.c_fc_1.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.57.mlp.c_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.58.attn.attention.k_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.58.attn.attention.out_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.58.attn.attention.q_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.58.attn.attention.v_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.58.ln_1.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.58.ln_2.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.58.mlp.c_fc_0.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.58.mlp.c_fc_1.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.58.mlp.c_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.59.attn.attention.k_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.59.attn.attention.out_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.59.attn.attention.q_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.59.attn.attention.v_proj.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.59.ln_1.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.59.ln_2.weight": "model-00012-of-00014.safetensors",
+    "transformer.h.59.mlp.c_fc_0.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.59.mlp.c_fc_1.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.59.mlp.c_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.6.attn.attention.k_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.6.attn.attention.out_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.6.attn.attention.q_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.6.attn.attention.v_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.6.ln_1.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.6.ln_2.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.6.mlp.c_fc_0.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.6.mlp.c_fc_1.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.6.mlp.c_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.60.attn.attention.k_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.60.attn.attention.out_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.60.attn.attention.q_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.60.attn.attention.v_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.60.ln_1.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.60.ln_2.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.60.mlp.c_fc_0.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.60.mlp.c_fc_1.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.60.mlp.c_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.61.attn.attention.k_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.61.attn.attention.out_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.61.attn.attention.q_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.61.attn.attention.v_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.61.ln_1.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.61.ln_2.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.61.mlp.c_fc_0.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.61.mlp.c_fc_1.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.61.mlp.c_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.62.attn.attention.k_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.62.attn.attention.out_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.62.attn.attention.q_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.62.attn.attention.v_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.62.ln_1.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.62.ln_2.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.62.mlp.c_fc_0.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.62.mlp.c_fc_1.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.62.mlp.c_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.63.attn.attention.k_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.63.attn.attention.out_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.63.attn.attention.q_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.63.attn.attention.v_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.63.ln_1.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.63.ln_2.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.63.mlp.c_fc_0.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.63.mlp.c_fc_1.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.63.mlp.c_proj.weight": "model-00013-of-00014.safetensors",
+    "transformer.h.7.attn.attention.k_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.7.attn.attention.out_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.7.attn.attention.q_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.7.attn.attention.v_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.7.ln_1.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.7.ln_2.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.7.mlp.c_fc_0.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.7.mlp.c_fc_1.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.7.mlp.c_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.8.attn.attention.k_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.8.attn.attention.out_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.8.attn.attention.q_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.8.attn.attention.v_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.8.ln_1.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.8.ln_2.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.8.mlp.c_fc_0.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.8.mlp.c_fc_1.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.8.mlp.c_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.9.attn.attention.k_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.9.attn.attention.out_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.9.attn.attention.q_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.9.attn.attention.v_proj.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.9.ln_1.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.9.ln_2.weight": "model-00002-of-00014.safetensors",
+    "transformer.h.9.mlp.c_fc_0.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.9.mlp.c_fc_1.weight": "model-00003-of-00014.safetensors",
+    "transformer.h.9.mlp.c_proj.weight": "model-00003-of-00014.safetensors",
+    "transformer.ln_f.weight": "model-00013-of-00014.safetensors",
+    "transformer.wte.weight": "model-00001-of-00014.safetensors"
+  }
+}

modeling_exaone.py ADDED Viewed

	@@ -0,0 +1,1394 @@

+# coding=utf-8
+# Copyright 2021 The LG AI Research EXAONE Lab.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""LG AI Research EXAONE Lab"""
+import math
+from typing import Optional, Tuple, Union
+import torch
+import torch.utils.checkpoint
+from packaging import version
+from torch import nn
+from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+from transformers.activations import ACT2FN
+from transformers.cache_utils import Cache, DynamicCache, StaticCache
+from transformers.generation import GenerationMixin
+from transformers.modeling_attn_mask_utils import AttentionMaskConverter
+from transformers.modeling_flash_attention_utils import _flash_attention_forward
+from transformers.modeling_outputs import (
+    BaseModelOutputWithPast,
+    BaseModelOutputWithPastAndCrossAttentions,
+    CausalLMOutputWithPast,
+    QuestionAnsweringModelOutput,
+    SequenceClassifierOutputWithPast,
+)
+from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS
+from transformers.modeling_utils import PreTrainedModel
+from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
+from transformers.utils import (
+    add_code_sample_docstrings,
+    add_start_docstrings,
+    add_start_docstrings_to_model_forward,
+    is_flash_attn_2_available,
+    logging,
+)
+from .configuration_exaone import ExaoneConfig
+if is_flash_attn_2_available():
+    try:
+        import flash_attn
+        if version.parse(flash_attn.__version__) > version.parse("2.4.2"):
+            from flash_attn.ops.triton.layer_norm import rms_norm_fn
+        else:
+            from flash_attn.ops.triton.layernorm import rms_norm_fn
+    except ImportError:
+        pass
+logger = logging.get_logger(__name__)
+_CHECKPOINT_FOR_DOC = "exaone"
+_CONFIG_FOR_DOC = "ExaoneConfig"
+EXAONE_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "exaone",
+]
+@torch.jit.script
+def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
+    """
+    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
+    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
+    """
+    batch, num_key_value_heads, slen, head_dim = hidden_states.shape
+    if n_rep == 1:
+        return hidden_states
+    hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim)
+    return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
+def apply_rotary_pos_emb(q, k, cos, sin, unsqueeze_dim=1):
+    """Applies Rotary Position Embedding to the query and key tensors.
+    Args:
+        q (`torch.Tensor`): The query tensor.
+        k (`torch.Tensor`): The key tensor.
+        cos (`torch.Tensor`): The cosine part of the rotary embedding.
+        sin (`torch.Tensor`): The sine part of the rotary embedding.
+        unsqueeze_dim (`int`, *optional*, defaults to 1):
+            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
+            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
+            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
+            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
+            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
+            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
+    Returns:
+        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
+    """
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_embed = (q * cos) + (rotate_half(q) * sin)
+    k_embed = (k * cos) + (rotate_half(k) * sin)
+    return q_embed, k_embed
+def rotate_half(x):
+    """Rotates half the hidden dims of the input."""
+    x1 = x[..., : x.shape[-1] // 2]
+    x2 = x[..., x.shape[-1] // 2 :]
+    return torch.cat((-x2, x1), dim=-1)
+def _prepare_4d_causal_attention_mask_with_cache_position(
+    attention_mask: torch.Tensor,
+    sequence_length: int,
+    target_length: int,
+    dtype: torch.dtype,
+    device: torch.device,
+    min_dtype: float,
+    cache_position: torch.Tensor,
+    batch_size: int,
+):
+    """
+    Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D mask of shape
+    `(batch_size, key_value_length)`, or if the input `attention_mask` is already 4D, do nothing.
+    Args:
+        attention_mask (`torch.Tensor`):
+            A 2D attention mask of shape `(batch_size, key_value_length)` or a 4D attention mask of shape `(batch_size, 1, query_length, key_value_length)`.
+        sequence_length (`int`):
+            The sequence length being processed.
+        target_length (`int`):
+            The target length: when generating with static cache, the mask should be as long as the static cache, to account for the 0 padding, the part of the cache that is not filled yet.
+        dtype (`torch.dtype`):
+            The dtype to use for the 4D attention mask.
+        device (`torch.device`):
+            The device to plcae the 4D attention mask on.
+        min_dtype (`float`):
+            The minimum value representable with the dtype `dtype`.
+        cache_position (`torch.Tensor`):
+            Indices depicting the position of the input sequence tokens in the sequence.
+        batch_size (`torch.Tensor`):
+            Batch size.
+    """
+    if attention_mask is not None and attention_mask.dim() == 4:
+        # In this case we assume that the mask comes already in inverted form and requires no inversion or slicing.
+        causal_mask = attention_mask
+    else:
+        causal_mask = torch.full((sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device)
+        if sequence_length != 1:
+            causal_mask = torch.triu(causal_mask, diagonal=1)
+        causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
+        causal_mask = causal_mask[None, None, :, :].expand(batch_size, 1, -1, -1)
+        if attention_mask is not None:
+            causal_mask = causal_mask.clone()  # copy to contiguous memory for in-place edit
+            mask_length = attention_mask.shape[-1]
+            padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :]
+            padding_mask = padding_mask == 0
+            causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
+                padding_mask, min_dtype
+            )
+    return causal_mask
+class ExaoneRMSNorm(torch.nn.Module):
+    def __init__(self, hidden_size, eps=1e-6):
+        super().__init__()
+        self.eps = eps
+        self.weight = torch.nn.Parameter(torch.ones(hidden_size))
+    def forward(self, hidden_states):
+        input_dtype = hidden_states.dtype
+        hidden_states = hidden_states.to(torch.float32)
+        variance = hidden_states.pow(2).mean(-1, keepdim=True)
+        hidden_states = hidden_states * torch.rsqrt(variance + self.eps)
+        return self.weight * hidden_states.to(input_dtype)
+class ExaoneTritonRMSNorm(torch.nn.Module):
+    def __init__(
+        self,
+        hidden_size: int = 0,
+        eps: float = 1e-5,
+    ):
+        super().__init__()
+        self.eps = eps
+        self.drop = None
+        self.weight = torch.nn.Parameter(torch.empty(hidden_size))
+        self.register_parameter("bias", None)
+        self.reset_parameters()
+    def reset_parameters(self):
+        torch.nn.init.ones_(self.weight)
+    def forward(self, x, residual=None, prenorm=False, residual_in_fp32=False):
+        return rms_norm_fn(
+            x,
+            self.weight,
+            self.bias,
+            residual=residual,
+            eps=self.eps,
+            dropout_p=self.drop.p if self.drop is not None and self.training else 0.0,
+            prenorm=prenorm,
+            residual_in_fp32=residual_in_fp32,
+        )
+ALL_LAYERNORM_LAYERS.append(ExaoneRMSNorm)
+ALL_LAYERNORM_LAYERS.append(ExaoneTritonRMSNorm)
+class ExaoneRotaryEmbedding(nn.Module):
+    def __init__(self, config: ExaoneConfig, device=None):
+        super().__init__()
+        if config.rope_scaling is not None:
+            self.rope_type = config.rope_scaling.get("rope_type", config.rope_scaling.get("type"))
+        else:
+            self.rope_type = "default"
+        self.rope_theta = config.rope_theta
+        self.max_seq_len = config.max_position_embeddings
+        self.original_max_seq_len = config.max_position_embeddings
+        self.config = config
+        if self.rope_type not in ROPE_INIT_FUNCTIONS:
+            raise KeyError(f"The EXAONE model does not support RoPE type: {self.rope_type}")
+        self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type]
+        inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device)
+        self.register_buffer("inv_freq", inv_freq, persistent=False)
+        self.original_inv_freq = self.inv_freq
+    def _update_freq(self, position_ids, device):
+        """
+        dynamic RoPE layers should recompute `inv_freq` in the following situations:
+        1 - growing beyond the cached sequence length (allow scaling)
+        2 - the current sequence length is in the original scale (avoid losing precision with small sequences)
+        """
+        seq_len = torch.max(position_ids) + 1
+        if seq_len > self.max_seq_len:  # expand to seq_len
+            inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device, seq_len=seq_len)
+            self.register_buffer("inv_freq", inv_freq, persistent=False)
+            self.max_seq_len = seq_len
+        if seq_len < self.original_max_seq_len and self.max_seq_len > self.original_max_seq_len:  # reset to original
+            self.register_buffer("inv_freq", self.original_inv_freq, persistent=False)
+            self.max_seq_len = self.original_max_seq_len
+    @torch.no_grad()
+    def forward(self, x, position_ids):
+        if "dynamic" in self.rope_type:
+            self._update_freq(position_ids, device=x.device)
+        inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1).to(x.device)
+        position_ids_expanded = position_ids[:, None, :].float()
+        device_type = x.device.type
+        device_type = device_type if isinstance(device_type, str) and device_type != "mps" else "cpu"
+        with torch.autocast(device_type=device_type, enabled=False):
+            freqs = (inv_freq_expanded @ position_ids_expanded).transpose(1, 2)
+            emb = torch.cat((freqs, freqs), dim=-1)
+            cos, sin = emb.cos(), emb.sin()
+        cos, sin = cos * self.attention_scaling, sin * self.attention_scaling
+        return cos.to(x.dtype), sin.to(x.dtype)
+class ExaoneSelfAttention(nn.Module):
+    def __init__(self, config: ExaoneConfig, layer_idx: Optional[int] = None):
+        super().__init__()
+        self.config = config
+        self.layer_idx = layer_idx
+        self.embed_dim = config.hidden_size
+        self.num_heads = config.num_attention_heads
+        self.head_dim = self.embed_dim // self.num_heads
+        self.num_key_value_heads = config.num_key_value_heads
+        self.num_key_value_groups = self.num_heads // self.num_key_value_heads
+        self.attention_dropout_rate = config.attention_dropout
+        if self.head_dim * self.num_heads != self.embed_dim:
+            raise ValueError(
+                f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})."
+            )
+        self.rotary = ExaoneRotaryEmbedding(config)
+        self.k_proj = nn.Linear(self.embed_dim, self.num_key_value_heads * self.head_dim, bias=False)
+        self.v_proj = nn.Linear(self.embed_dim, self.num_key_value_heads * self.head_dim, bias=False)
+        self.q_proj = nn.Linear(self.embed_dim, self.num_heads * self.head_dim, bias=False)
+        self.out_proj = nn.Linear(self.embed_dim, self.embed_dim, bias=False)
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Cache] = None,
+        output_attentions: Optional[bool] = False,
+        use_cache: Optional[bool] = False,
+        cache_position: Optional[torch.LongTensor] = None,
+        position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        bsz, q_len, _ = hidden_states.size()
+        query_states = self.q_proj(hidden_states)
+        key_states = self.k_proj(hidden_states)
+        value_states = self.v_proj(hidden_states)
+        query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+        key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        if position_embeddings is None:
+            cos, sin = self.rotary(value_states, position_ids=position_ids)
+        else:
+            cos, sin = position_embeddings
+        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
+        if past_key_value is not None:
+            # sin and cos are specific to RoPE models; cache_position needed for the static cache
+            cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
+            key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
+        key_states = repeat_kv(key_states, self.num_key_value_groups)
+        value_states = repeat_kv(value_states, self.num_key_value_groups)
+        attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
+        if attention_mask is not None:
+            causal_mask = attention_mask[:, :, :, : key_states.shape[-2]]
+            attn_weights = attn_weights + causal_mask
+        attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
+        attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout_rate, training=self.training)
+        attn_output = torch.matmul(attn_weights, value_states)
+        if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
+            raise ValueError(
+                f"Attention outputs should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
+                f" {attn_output.size()}"
+            )
+        attn_output = attn_output.transpose(1, 2).contiguous()
+        attn_output = attn_output.reshape(bsz, q_len, self.embed_dim).contiguous()
+        attn_output = self.out_proj(attn_output)
+        if not output_attentions:
+            attn_weights = None
+        return attn_output, attn_weights, past_key_value
+class ExaoneFlashAttention(ExaoneSelfAttention):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+        return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Cache] = None,
+        output_attentions: Optional[bool] = False,
+        use_cache: Optional[bool] = False,
+        cache_position: Optional[torch.LongTensor] = None,
+        position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        if isinstance(past_key_value, StaticCache):
+            raise ValueError(
+                "`static` cache implementation is not compatible with `attn_implementation==flash_attention_2` "
+                "make sure to use `sdpa` in the mean time, and open an issue at https://github.com/huggingface/transformers"
+            )
+        output_attentions = False
+        bsz, q_len, h_size = hidden_states.size()
+        query_states = self.q_proj(hidden_states)
+        key_states = self.k_proj(hidden_states)
+        value_states = self.v_proj(hidden_states)
+        query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+        key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        if position_embeddings is None:
+            cos, sin = self.rotary(value_states, position_ids=position_ids)
+        else:
+            cos, sin = position_embeddings
+        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
+        if past_key_value is not None:
+            # sin and cos are specific to RoPE models; cache_position needed for the static cache
+            cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
+            # Only update cache as shape of [bsz, n_head, q_len, head_dim]
+            # TODO: need to be fixed when transformers' KV cache layout is changed
+            key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
+        query_states = query_states.transpose(1, 2)
+        key_states = key_states.transpose(1, 2)
+        value_states = value_states.transpose(1, 2)
+        # In PEFT, usually we cast the layer norms in float32 for training stability reasons
+        # therefore the input hidden states gets silently casted in float32. Hence, we need
+        # cast them back in the correct dtype just to be sure everything works as expected.
+        input_dtype = query_states.dtype
+        if input_dtype == torch.float32:
+            if torch.is_autocast_enabled():
+                target_dtype = torch.get_autocast_gpu_dtype()
+            # Handle the case where the model is quantized
+            elif hasattr(self.config, "_pre_quantization_dtype"):
+                target_dtype = self.config._pre_quantization_dtype
+            else:
+                target_dtype = self.q_proj.weight.dtype
+            logger.warning_once(
+                f"The input hidden states seems to be silently casted in float32, this might be related to"
+                f" the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in"
+                f" {target_dtype}."
+            )
+            query_states = query_states.to(target_dtype)
+            key_states = key_states.to(target_dtype)
+            value_states = value_states.to(target_dtype)
+        dropout_rate = self.attention_dropout_rate if self.training else 0.0
+        attn_output = _flash_attention_forward(
+            query_states, key_states, value_states, attention_mask, q_len, dropout=dropout_rate, is_causal=True
+        )
+        attn_output = attn_output.reshape(bsz, q_len, self.embed_dim).contiguous()
+        attn_output = self.out_proj(attn_output)
+        if not output_attentions:
+            attn_weights = None
+        return attn_output, attn_weights, past_key_value
+class ExaoneSdpaAttention(ExaoneSelfAttention):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Cache] = None,
+        output_attentions: Optional[bool] = False,
+        use_cache: Optional[bool] = False,
+        cache_position: Optional[torch.LongTensor] = None,
+        position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        if output_attentions:
+            logger.warning_once(
+                "ExaoneModel is using ExaoneSdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to the manual attention implementation, "
+                'but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.'
+            )
+            return super().forward(
+                hidden_states=hidden_states,
+                attention_mask=attention_mask,
+                position_ids=position_ids,
+                past_key_value=past_key_value,
+                output_attentions=output_attentions,
+                use_cache=use_cache,
+                cache_position=cache_position,
+                position_embeddings=position_embeddings,
+                **kwargs,
+            )
+        bsz, q_len, _ = hidden_states.size()
+        query_states = self.q_proj(hidden_states)
+        key_states = self.k_proj(hidden_states)
+        value_states = self.v_proj(hidden_states)
+        query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+        key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        if position_embeddings is None:
+            cos, sin = self.rotary(value_states, position_ids=position_ids)
+        else:
+            cos, sin = position_embeddings
+        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
+        if past_key_value is not None:
+            # sin and cos are specific to RoPE models; cache_position needed for the static cache
+            cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
+            key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
+        key_states = repeat_kv(key_states, self.num_key_value_groups)
+        value_states = repeat_kv(value_states, self.num_key_value_groups)
+        causal_mask = attention_mask
+        if attention_mask is not None:
+            causal_mask = causal_mask[:, :, :, : key_states.shape[-2]]
+        # SDPA with memory-efficient backend is currently (torch==2.1.2) bugged with non-contiguous inputs with custom attn_mask,
+        # Reference: https://github.com/pytorch/pytorch/issues/112577.
+        if query_states.device.type == "cuda" and causal_mask is not None:
+            query_states = query_states.contiguous()
+            key_states = key_states.contiguous()
+            value_states = value_states.contiguous()
+        # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment
+        # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling.
+        is_causal = True if causal_mask is None and q_len > 1 else False
+        attn_output = torch.nn.functional.scaled_dot_product_attention(
+            query_states,
+            key_states,
+            value_states,
+            attn_mask=causal_mask,
+            dropout_p=self.attention_dropout_rate if self.training else 0.0,
+            is_causal=is_causal,
+        )
+        attn_output = attn_output.transpose(1, 2).contiguous()
+        attn_output = attn_output.reshape(bsz, q_len, self.embed_dim).contiguous()
+        attn_output = self.out_proj(attn_output)
+        return attn_output, None, past_key_value
+class ExaoneAttention(nn.Module):
+    def __init__(self, config, layer_id=0):
+        super().__init__()
+        self.layer_id = layer_id
+        if "flash" in config._attn_implementation:
+            self.attention = ExaoneFlashAttention(config, self.layer_id)
+        elif "sdpa" in config._attn_implementation:
+            self.attention = ExaoneSdpaAttention(config, self.layer_id)
+        else:
+            self.attention = ExaoneSelfAttention(config, self.layer_id)
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Cache] = None,
+        output_attentions: Optional[bool] = False,
+        use_cache: Optional[bool] = False,
+        cache_position: Optional[torch.LongTensor] = None,
+        position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        return self.attention(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_value=past_key_value,
+            output_attentions=output_attentions,
+            use_cache=use_cache,
+            cache_position=cache_position,
+            position_embeddings=position_embeddings,
+            **kwargs,
+        )
+class ExaoneGatedMLP(nn.Module):
+    def __init__(self, intermediate_size, config):
+        super().__init__()
+        self.config = config
+        embed_dim = config.hidden_size
+        self.c_fc_0 = nn.Linear(embed_dim, intermediate_size, bias=False)
+        self.c_fc_1 = nn.Linear(embed_dim, intermediate_size, bias=False)
+        self.c_proj = nn.Linear(intermediate_size, embed_dim, bias=False)
+        self.act = ACT2FN[config.activation_function]
+    def forward(self, hidden_states):
+        output_proj = self.c_proj(self.act(self.c_fc_0(hidden_states)) * self.c_fc_1(hidden_states))
+        return output_proj
+class ExaoneBlock(nn.Module):
+    def __init__(self, config, layer_id):
+        super().__init__()
+        self.config = config
+        hidden_size = config.hidden_size
+        inner_dim = config.intermediate_size if config.intermediate_size is not None else 4 * hidden_size
+        self.ln_1 = ExaoneRMSNorm(hidden_size=hidden_size, eps=config.layer_norm_epsilon)
+        self.attn = ExaoneAttention(config, layer_id)
+        self.ln_2 = ExaoneRMSNorm(hidden_size=hidden_size, eps=config.layer_norm_epsilon)
+        self.mlp = ExaoneGatedMLP(inner_dim, config)
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Cache] = None,
+        output_attentions: Optional[bool] = False,
+        use_cache: Optional[bool] = False,
+        cache_position: Optional[torch.LongTensor] = None,
+        position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+        **kwargs,
+    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
+        residual = hidden_states
+        hidden_states = self.ln_1(hidden_states)
+        hidden_states, self_attn_weights, present_key_value = self.attn(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_value=past_key_value,
+            output_attentions=output_attentions,
+            use_cache=use_cache,
+            cache_position=cache_position,
+            position_embeddings=position_embeddings,
+            **kwargs,
+        )
+        # residual connection
+        hidden_states = residual + hidden_states
+        residual = hidden_states
+        hidden_states = self.ln_2(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+        hidden_states = residual + hidden_states
+        outputs = (hidden_states,)
+        if output_attentions:
+            outputs += (self_attn_weights,)
+        if use_cache:
+            outputs += (present_key_value,)
+        return outputs
+class ExaonePreTrainedModel(PreTrainedModel):
+    """
+    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
+    models.
+    """
+    config_class = ExaoneConfig
+    base_model_prefix = "transformer"
+    supports_gradient_checkpointing = True
+    _no_split_modules = ["ExaoneBlock"]
+    _skip_keys_device_placement = "past_key_values"
+    _supports_flash_attn_2 = True
+    _supports_sdpa = True
+    _supports_cache_class = True
+    def __init__(self, *inputs, **kwargs):
+        super().__init__(*inputs, **kwargs)
+    def _init_weights(self, module):
+        """Initialize the weights."""
+        if isinstance(module, (nn.Linear,)):
+            # Slightly different from the TF version which uses truncated_normal for initialization
+            # cf https://github.com/pytorch/pytorch/pull/5617
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.Embedding):
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+            if module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+        elif isinstance(module, ExaoneRMSNorm):
+            module.weight.data.fill_(1.0)
+EXAONE_START_DOCSTRING = r"""
+    This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
+    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+    etc.)
+    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
+    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
+    and behavior.
+    Parameters:
+        config ([`ExaoneConfig`]): Model configuration class with all the parameters of the model.
+            Initializing with a config file does not load the weights associated with the model, only the
+            configuration. Check out the `PreTrainedModel.from_pretrained` method to load the model weights.
+"""
+EXAONE_INPUTS_DOCSTRING = r"""
+    Args:
+        input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`, *optional*):
+            `input_ids_length` = `sequence_length` if `past_key_values` is `None` else
+            `past_key_values.get_seq_length()` (`sequence_length` of input past key value states). Indices of input
+            sequence tokens in the vocabulary.
+            If `past_key_values` is used, only `input_ids` that do not have their past calculated should be
+            passed as `input_ids`.
+            `What are input IDs? <../glossary.html#input-ids>`__
+        attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+            `What are attention masks? <../glossary.html#attention-mask>`__
+        position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
+            config.max_position_embeddings - 1]`.
+            `What are position IDs? <../glossary.html#position-ids>`_
+        past_key_values (`Cache`, *optional*):
+            Contains precomputed hidden-states (key and values in the attention blocks) as computed by the model (see
+            `past_key_values` output below). Can be used to speed up sequential decoding. This typically consists
+            in the `past_key_values` returned by the model at a previous stage of decoding, when `use_cache=True` or
+            `config.use_cache=True`.
+        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
+            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
+            This is useful if you want more control over how to convert `input_ids` indices into associated
+            vectors than the model's internal embedding lookup matrix.
+            If `past_key_values` is used, optionally only the last `inputs_embeds` have to be input (see
+            `past_key_values`).
+        use_cache (`bool`, *optional*):
+            If set to `True`, `past_key_values` key value states are returned and can be used to speed up
+            decoding (see `past_key_values`).
+        output_attentions (`bool`, *optional*):
+            Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned
+            tensors for more detail.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+        cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*):
+            Indices depicting the position of the input sequence tokens in the sequence. Contrarily to `position_ids`,
+            this tensor is not affected by padding. It is used to update the cache in the correct position and to infer
+            the complete sequence length.
+"""
+@add_start_docstrings(
+    "The bare EXAONE Model transformer outputting raw hidden-states without any specific head on top.",
+    EXAONE_START_DOCSTRING,
+)
+class ExaoneModel(ExaonePreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.config = config
+        self.embed_dim = config.hidden_size
+        self.wte = nn.Embedding(config.vocab_size, self.embed_dim, self.config.pad_token_id)
+        self.drop = nn.Dropout(float(config.embed_dropout))
+        self.h = nn.ModuleList([ExaoneBlock(config, layer_id=i) for i in range(config.num_layers)])
+        self.ln_f = ExaoneRMSNorm(hidden_size=self.embed_dim, eps=config.layer_norm_epsilon)
+        self.rotary = ExaoneRotaryEmbedding(config)
+        self.gradient_checkpointing = False
+        # Initialize weights and apply final processing
+        self.post_init()
+    def get_input_embeddings(self):
+        return self.wte
+    def set_input_embeddings(self, new_embeddings):
+        self.wte = new_embeddings
+    @add_start_docstrings_to_model_forward(EXAONE_INPUTS_DOCSTRING)
+    @add_code_sample_docstrings(
+        checkpoint=_CHECKPOINT_FOR_DOC,
+        output_type=BaseModelOutputWithPastAndCrossAttentions,
+        config_class=_CONFIG_FOR_DOC,
+    )
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        past_key_values: Optional[Cache] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        cache_position: Optional[torch.LongTensor] = None,
+    ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPast]:
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        use_cache = use_cache if use_cache is not None else self.config.use_cache
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        if self.gradient_checkpointing and self.training:
+            if use_cache:
+                logger.warning_once(
+                    "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                )
+                use_cache = False
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
+        elif input_ids is not None:
+            batch_size, seq_length = input_ids.shape[:2]
+        elif inputs_embeds is not None:
+            batch_size, seq_length = inputs_embeds.shape[:2]
+        else:
+            raise ValueError("You have to specify either input_ids or inputs_embeds")
+        return_legacy_cache = False
+        if (
+            use_cache and not isinstance(past_key_values, Cache) and not self.training
+        ):  # kept for BC (non `Cache` `past_key_values` inputs)
+            return_legacy_cache = True
+            past_key_values = DynamicCache.from_legacy_cache(past_key_values)
+            logger.warning_once(
+                "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. "
+                "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)"
+            )
+        if inputs_embeds is None:
+            inputs_embeds = self.wte(input_ids)
+        if cache_position is None:
+            past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
+            cache_position = torch.arange(
+                past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
+            )
+        if position_ids is None:
+            position_ids = cache_position.unsqueeze(0)
+        causal_mask = self._update_causal_mask(
+            attention_mask, inputs_embeds, cache_position, past_key_values, output_attentions
+        )
+        hidden_states = inputs_embeds
+        hidden_states = self.drop(hidden_states)
+        position_embeddings = self.rotary(hidden_states, position_ids)
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attns = () if output_attentions else None
+        next_decoder_cache = None
+        for block in self.h:
+            if output_hidden_states:
+                all_hidden_states = all_hidden_states + (hidden_states,)
+            if self.gradient_checkpointing and self.training:
+                outputs = self._gradient_checkpointing_func(
+                    block.__call__,
+                    hidden_states,
+                    causal_mask,
+                    position_ids,
+                    past_key_values,
+                    output_attentions,
+                    use_cache,
+                    cache_position,
+                    position_embeddings,
+                )
+            else:
+                outputs = block(
+                    hidden_states,
+                    attention_mask=causal_mask,
+                    position_ids=position_ids,
+                    past_key_value=past_key_values,
+                    output_attentions=output_attentions,
+                    use_cache=use_cache,
+                    cache_position=cache_position,
+                    position_embeddings=position_embeddings,
+                )
+            hidden_states = outputs[0]
+            if use_cache:
+                next_decoder_cache = outputs[2 if output_attentions else 1]
+            if output_attentions:
+                all_self_attns += (outputs[1],)
+        hidden_states = self.ln_f(hidden_states)
+        # Add last hidden state
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+        next_cache = None
+        if use_cache:
+            next_cache = next_decoder_cache.to_legacy_cache() if return_legacy_cache else next_decoder_cache
+        if not return_dict:
+            return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
+        return BaseModelOutputWithPast(
+            last_hidden_state=hidden_states,
+            past_key_values=next_cache,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attns,
+        )
+    def _update_causal_mask(
+        self,
+        attention_mask: torch.Tensor,
+        input_tensor: torch.Tensor,
+        cache_position: torch.Tensor,
+        past_key_values: Cache,
+        output_attentions: bool,
+    ):
+        # TODO: As of torch==2.2.0, the `attention_mask` passed to the model in `generate` is 2D and of dynamic length even when the static
+        # KV cache is used. This is an issue for torch.compile which then recaptures cudagraphs at each decode steps due to the dynamic shapes.
+        # (`recording cudagraph tree for symint key 13`, etc.), which is VERY slow. A workaround is `@torch.compiler.disable`, but this prevents using
+        # `fullgraph=True`. See more context in https://github.com/huggingface/transformers/pull/29114
+        if self.config._attn_implementation == "flash_attention_2":
+            if attention_mask is not None and 0.0 in attention_mask:
+                return attention_mask
+            return None
+        # For SDPA, when possible, we will rely on its `is_causal` argument instead of its `attn_mask` argument, in
+        # order to dispatch on Flash Attention 2. This feature is not compatible with static cache, as SDPA will fail
+        # to infer the attention mask.
+        past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
+        using_static_cache = isinstance(past_key_values, StaticCache)
+        # When output attentions is True, sdpa implementation's forward method calls the eager implementation's forward
+        if self.config._attn_implementation == "sdpa" and not using_static_cache and not output_attentions:
+            if AttentionMaskConverter._ignore_causal_mask_sdpa(
+                attention_mask,
+                inputs_embeds=input_tensor,
+                past_key_values_length=past_seen_tokens,
+                is_training=self.training,
+            ):
+                return None
+        dtype, device = input_tensor.dtype, input_tensor.device
+        min_dtype = torch.finfo(dtype).min
+        sequence_length = input_tensor.shape[1]
+        if using_static_cache:
+            target_length = past_key_values.get_max_length()
+        else:
+            target_length = (
+                attention_mask.shape[-1]
+                if isinstance(attention_mask, torch.Tensor)
+                else past_seen_tokens + sequence_length + 1
+            )
+        # In case the provided `attention` mask is 2D, we generate a causal mask here (4D).
+        causal_mask = _prepare_4d_causal_attention_mask_with_cache_position(
+            attention_mask,
+            sequence_length=sequence_length,
+            target_length=target_length,
+            dtype=dtype,
+            device=device,
+            min_dtype=min_dtype,
+            cache_position=cache_position,
+            batch_size=input_tensor.shape[0],
+        )
+        if (
+            self.config._attn_implementation == "sdpa"
+            and attention_mask is not None
+            and attention_mask.device.type == "cuda"
+            and not output_attentions
+        ):
+            # Attend to all tokens in fully masked rows in the causal_mask, for example the relevant first rows when
+            # using left padding. This is required by F.scaled_dot_product_attention memory-efficient attention path.
+            # Details: https://github.com/pytorch/pytorch/issues/110213
+            causal_mask = AttentionMaskConverter._unmask_unattended(causal_mask, min_dtype)
+        return causal_mask
+@add_start_docstrings(
+    """
+    The EXAONE Model transformer with a language modeling head on top (linear layer with weights tied to the input
+    embeddings).
+    """,
+    EXAONE_START_DOCSTRING,
+)
+class ExaoneForCausalLM(ExaonePreTrainedModel, GenerationMixin):
+    _tied_weights_keys = ["lm_head.weight"]
+    def __init__(self, config):
+        super().__init__(config)
+        self.transformer = ExaoneModel(config)
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+        self.config = config
+        # Initialize weights and apply final processing
+        self.post_init()
+    def get_output_embeddings(self):
+        return self.lm_head
+    def set_output_embeddings(self, new_embeddings):
+        self.lm_head = new_embeddings
+    @add_start_docstrings_to_model_forward(EXAONE_INPUTS_DOCSTRING)
+    @add_code_sample_docstrings(
+        checkpoint=_CHECKPOINT_FOR_DOC,
+        output_type=BaseModelOutputWithPast,
+        config_class=_CONFIG_FOR_DOC,
+    )
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        past_key_values: Optional[Cache] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+        labels: Optional[torch.Tensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        cache_position: Optional[torch.LongTensor] = None,
+    ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPast]:
+        r"""
+        Args:
+            labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+                Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set
+                `labels = input_ids` Indices are selected in `[-100, 0, ..., config.vocab_size]` All labels set to `-100`
+                are ignored (masked), the loss is only computed for labels in `[0, ..., config.vocab_size]`
+        Example:
+        ```python
+        >>> from transformers import AutoModelForCausalLM, AutoTokenizer
+        >>> model = AutoModelForCausalLM.from_pretrained("LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
+                                                         trust_remote_code=True)
+        >>> tokenizer = AutoTokenizer.from_pretrained("LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct")
+        >>> prompt = "Explain how wonderful you are"
+        >>> messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": prompt}
+        ]
+        >>> input_ids = tokenizer.apply_chat_template(
+            messages,
+            tokenize=True,
+            add_generation_prompt=True,
+            return_tensors="pt"
+        )
+        >>> output = model.generate(input_ids, max_new_tokens=128)
+        >>> tokenizer.decode(output[0], skip_special_tokens=True)
+        "[|system|]You are a helpful assistant.\n[|user|]Explain how wonderful you are\n[|assistant|]Thank you for your kind words! I'm here to assist you with information, answer questions, and help you in any way I can. My goal is to provide accurate, helpful, and timely responses. Whether you need help with a specific task, want to learn something new, or just need someone to talk to, I'm here for you. How can I assist you today?"
+        ```
+        """
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        transformer_outputs = self.transformer(
+            input_ids,
+            attention_mask=attention_mask,
+            past_key_values=past_key_values,
+            position_ids=position_ids,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            cache_position=cache_position,
+        )
+        hidden_states = transformer_outputs[0]
+        lm_logits = self.lm_head(hidden_states)
+        lm_logits = lm_logits.float()
+        loss = None
+        if labels is not None:
+            lm_logits = lm_logits.to(torch.float32)
+            # Shift so that tokens < n predict n
+            shift_logits = lm_logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous()
+            # Flatten the tokens
+            loss_fct = CrossEntropyLoss()
+            loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
+            lm_logits = lm_logits.to(hidden_states.dtype)
+            loss = loss.to(hidden_states.dtype)
+        if not return_dict:
+            output = (lm_logits,) + transformer_outputs[1:]
+            return ((loss,) + output) if loss is not None else output
+        return CausalLMOutputWithPast(
+            loss=loss,
+            logits=lm_logits,
+            past_key_values=transformer_outputs.past_key_values,
+            hidden_states=transformer_outputs.hidden_states,
+            attentions=transformer_outputs.attentions,
+        )
+    def prepare_inputs_for_generation(
+        self,
+        input_ids,
+        past_key_values=None,
+        attention_mask=None,
+        inputs_embeds=None,
+        cache_position=None,
+        position_ids=None,
+        use_cache=True,
+        **kwargs,
+    ):
+        # If we have cache: let's slice `input_ids` through `cache_position`, to keep only the unprocessed tokens
+        # Exception 1: when passing input_embeds, input_ids may be missing entries
+        # Exception 2: some generation methods do special slicing of input_ids, so we don't need to do it here
+        if past_key_values is not None:
+            if inputs_embeds is not None:  # Exception 1
+                input_ids = input_ids[:, -cache_position.shape[0] :]
+            elif input_ids.shape[1] != cache_position.shape[0]:  # Default case (the "else", a no op, is Exception 2)
+                input_ids = input_ids[:, cache_position]
+        if attention_mask is not None and position_ids is None:
+            # create position_ids on the fly for batch generation
+            position_ids = attention_mask.long().cumsum(-1) - 1
+            position_ids.masked_fill_(attention_mask == 0, 1)
+            if past_key_values:
+                position_ids = position_ids[:, -input_ids.shape[1] :]
+                # This `clone` call is needed to avoid recapturing cuda graphs with `torch.compile`'s  `mode="reduce-overhead`, as otherwise the input `position_ids` would have various stride during the decoding. Here, simply using `.contiguous()` is not sufficient as in the batch size = 1 case, `position_ids` is already contiguous but with varying stride which retriggers a capture.
+                position_ids = position_ids.clone(memory_format=torch.contiguous_format)
+        # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
+        if inputs_embeds is not None and cache_position[0] == 0:
+            model_inputs = {"inputs_embeds": inputs_embeds, "input_ids": None}
+        else:
+            model_inputs = {"input_ids": input_ids, "inputs_embeds": None}
+        if isinstance(past_key_values, StaticCache) and attention_mask.ndim == 2:
+            if inputs_embeds is not None:
+                batch_size, sequence_length, _ = inputs_embeds.shape
+                device = inputs_embeds.device
+            else:
+                batch_size, sequence_length = input_ids.shape
+                device = input_ids.device
+            dtype = self.lm_head.weight.dtype
+            min_dtype = torch.finfo(dtype).min
+            attention_mask = _prepare_4d_causal_attention_mask_with_cache_position(
+                attention_mask,
+                sequence_length=sequence_length,
+                target_length=past_key_values.get_max_length(),
+                dtype=dtype,
+                device=device,
+                min_dtype=min_dtype,
+                cache_position=cache_position,
+                batch_size=batch_size,
+            )
+        model_inputs.update(
+            {
+                "position_ids": position_ids,
+                "cache_position": cache_position,
+                "past_key_values": past_key_values,
+                "use_cache": use_cache,
+                "attention_mask": attention_mask,
+            }
+        )
+        return model_inputs
+@add_start_docstrings(
+    """
+    The EXAONE Model transformer with a sequence classification head on top (linear layer).
+    [`ExaoneForSequenceClassification`] uses the last token in order to do the classification, as
+    other causal models (e.g. GPT-1) do.
+    Since it does classification on the last token, it requires to know the position of the last token. If a
+    `pad_token_id` is defined in the configuration, it finds the last token that is not a padding token in each
+    row. If no `pad_token_id` is defined, it simply takes the last value in each row of the batch. Since it cannot
+    guess the padding tokens when `inputs_embeds` are passed instead of `input_ids`, it does the same (take
+    the last value in each row of the batch).
+    """,
+    EXAONE_START_DOCSTRING,
+)
+class ExaoneForSequenceClassification(ExaonePreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.transformer = ExaoneModel(config)
+        self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)
+        # Initialize weights and apply final processing
+        self.post_init()
+    @add_start_docstrings_to_model_forward(EXAONE_INPUTS_DOCSTRING)
+    @add_code_sample_docstrings(
+        checkpoint=_CHECKPOINT_FOR_DOC,
+        output_type=SequenceClassifierOutputWithPast,
+        config_class=_CONFIG_FOR_DOC,
+    )
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        past_key_values: Optional[Cache] = None,
+        inputs_embeds: Optional[torch.Tensor] = None,
+        labels: Optional[torch.Tensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutputWithPast]:
+        r"""
+        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
+            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
+            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        transformer_outputs = self.transformer(
+            input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        hidden_states = transformer_outputs[0]
+        logits = self.score(hidden_states)
+        if input_ids is not None:
+            batch_size, sequence_length = input_ids.shape[:2]
+        else:
+            batch_size, sequence_length = inputs_embeds.shape[:2]
+        if self.config.pad_token_id is None and batch_size != 1:
+            raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
+        if self.config.pad_token_id is None:
+            sequence_lengths = -1
+        else:
+            if input_ids is not None:
+                # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
+                sequence_lengths = torch.ne(input_ids, self.config.pad_token_id).sum(-1) - 1
+                sequence_lengths = sequence_lengths % input_ids.shape[-1]
+                sequence_lengths = sequence_lengths.to(logits.device)
+            else:
+                sequence_lengths = -1
+                logger.warning(
+                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+                    "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+                )
+        pooled_logits = logits[torch.arange(batch_size, device=logits.device), sequence_lengths]
+        loss = None
+        if labels is not None:
+            labels = labels.to(logits.device)
+            if self.config.problem_type is None:
+                if self.num_labels == 1:
+                    self.config.problem_type = "regression"
+                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
+                    self.config.problem_type = "single_label_classification"
+                else:
+                    self.config.problem_type = "multi_label_classification"
+            if self.config.problem_type == "regression":
+                loss_fct = MSELoss()
+                if self.num_labels == 1:
+                    loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
+                else:
+                    loss = loss_fct(pooled_logits, labels)
+            elif self.config.problem_type == "single_label_classification":
+                loss_fct = CrossEntropyLoss()
+                loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
+            elif self.config.problem_type == "multi_label_classification":
+                loss_fct = BCEWithLogitsLoss()
+                loss = loss_fct(pooled_logits, labels)
+        if not return_dict:
+            output = (pooled_logits,) + transformer_outputs[1:]
+            return ((loss,) + output) if loss is not None else output
+        return SequenceClassifierOutputWithPast(
+            loss=loss,
+            logits=pooled_logits,
+            past_key_values=transformer_outputs.past_key_values,
+            hidden_states=transformer_outputs.hidden_states,
+            attentions=transformer_outputs.attentions,
+        )
+@add_start_docstrings(
+    """
+    The EXAONE Model transformer with a span classification head on top for extractive question-answering tasks like
+    SQuAD (a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
+    """,
+    EXAONE_START_DOCSTRING,
+)
+class ExaoneForQuestionAnswering(ExaonePreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.transformer = ExaoneModel(config)
+        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)
+        # Model parallel
+        self.model_parallel = False
+        self.device_map = None
+        # Initialize weights and apply final processing
+        self.post_init()
+    def forward(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        attention_mask: Optional[torch.FloatTensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[Cache] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        start_positions: Optional[torch.LongTensor] = None,
+        end_positions: Optional[torch.LongTensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple[torch.Tensor], QuestionAnsweringModelOutput]:
+        r"""
+        start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+            Labels for position (index) of the start of the labelled span for computing the token classification loss.
+            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the
+            sequence are not taken into account for computing the loss.
+        end_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+            Labels for position (index) of the end of the labelled span for computing the token classification loss.
+            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the
+            sequence are not taken into account for computing the loss.
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        outputs = self.transformer(
+            input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        sequence_output = outputs[0]
+        logits = self.qa_outputs(sequence_output)
+        start_logits, end_logits = logits.split(1, dim=-1)
+        start_logits = start_logits.squeeze(-1).contiguous()
+        end_logits = end_logits.squeeze(-1).contiguous()
+        total_loss = None
+        if start_positions is not None and end_positions is not None:
+            # If we are on multi-GPU, split add a dimension
+            if len(start_positions.size()) > 1:
+                start_positions = start_positions.squeeze(-1).to(start_logits.device)
+            if len(end_positions.size()) > 1:
+                end_positions = end_positions.squeeze(-1).to(end_logits.device)
+            # sometimes the start/end positions are outside our model inputs, we ignore these terms
+            ignored_index = start_logits.size(1)
+            start_positions = start_positions.clamp(0, ignored_index)
+            end_positions = end_positions.clamp(0, ignored_index)
+            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
+            start_loss = loss_fct(start_logits, start_positions)
+            end_loss = loss_fct(end_logits, end_positions)
+            total_loss = (start_loss + end_loss) / 2
+        if not return_dict:
+            output = (start_logits, end_logits) + outputs[2:]
+            return ((total_loss,) + output) if total_loss is not None else output
+        return QuestionAnsweringModelOutput(
+            loss=total_loss,
+            start_logits=start_logits,
+            end_logits=end_logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "[BOS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[|endofturn|]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3221 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[BOS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[EOS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "                               ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "5": {
+      "content": "                              ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "6": {
+      "content": "                             ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "7": {
+      "content": "                            ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "8": {
+      "content": "                           ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "9": {
+      "content": "                          ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "10": {
+      "content": "                         ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "11": {
+      "content": "                        ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "12": {
+      "content": "                       ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "13": {
+      "content": "                      ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "14": {
+      "content": "                     ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "15": {
+      "content": "                    ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "16": {
+      "content": "                   ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "17": {
+      "content": "                  ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "18": {
+      "content": "                 ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "19": {
+      "content": "                ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "20": {
+      "content": "               ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "21": {
+      "content": "              ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "22": {
+      "content": "             ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "23": {
+      "content": "            ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "24": {
+      "content": "           ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "25": {
+      "content": "          ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "26": {
+      "content": "         ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "27": {
+      "content": "        ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "28": {
+      "content": "       ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "29": {
+      "content": "      ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "30": {
+      "content": "     ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "31": {
+      "content": "    ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32": {
+      "content": "   ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "33": {
+      "content": "  ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "34": {
+      "content": "\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "35": {
+      "content": "\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "36": {
+      "content": "\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "37": {
+      "content": "\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "38": {
+      "content": "\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "39": {
+      "content": "\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "40": {
+      "content": "\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "41": {
+      "content": "\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "42": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "43": {
+      "content": "<|c|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "44": {
+      "content": "<|c++|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "45": {
+      "content": "<|python|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "46": {
+      "content": "<|javascript|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "47": {
+      "content": "<|markdown|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "48": {
+      "content": "<|html|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49": {
+      "content": "<|css|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50": {
+      "content": "<|vue|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "51": {
+      "content": "<|java|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "52": {
+      "content": "PI:URL",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "53": {
+      "content": "PI:EMAIL",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "54": {
+      "content": "PI:ACCOUNT_NUM",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "55": {
+      "content": "PI:PHONE_NUM",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "56": {
+      "content": "PI:BUSINESS_NUM",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57": {
+      "content": "PI:ANNON",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "58": {
+      "content": "PI:KEY",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "59": {
+      "content": "PI:ID",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "60": {
+      "content": "PI:IP_ADDRESS",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "61": {
+      "content": "PI:USER",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "62": {
+      "content": "[unused0]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "63": {
+      "content": "[unused1]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "64": {
+      "content": "[unused2]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "65": {
+      "content": "[unused3]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "66": {
+      "content": "[unused4]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "67": {
+      "content": "[unused5]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "68": {
+      "content": "[unused6]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "69": {
+      "content": "[unused7]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "70": {
+      "content": "[unused8]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "71": {
+      "content": "[unused9]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "72": {
+      "content": "[unused10]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "73": {
+      "content": "[unused11]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "74": {
+      "content": "[unused12]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "75": {
+      "content": "[unused13]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "76": {
+      "content": "[unused14]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "77": {
+      "content": "[unused15]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "78": {
+      "content": "[unused16]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "[unused17]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "[unused18]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "81": {
+      "content": "[unused19]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "82": {
+      "content": "[unused20]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "83": {
+      "content": "[unused21]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "84": {
+      "content": "[unused22]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "85": {
+      "content": "[unused23]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "86": {
+      "content": "[unused24]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "87": {
+      "content": "[unused25]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "88": {
+      "content": "[unused26]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "89": {
+      "content": "[unused27]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "90": {
+      "content": "[unused28]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "91": {
+      "content": "[unused29]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "92": {
+      "content": "[unused30]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "93": {
+      "content": "[unused31]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "94": {
+      "content": "[unused32]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "95": {
+      "content": "[unused33]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "96": {
+      "content": "[unused34]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "97": {
+      "content": "[unused35]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "98": {
+      "content": "[unused36]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "99": {
+      "content": "[unused37]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[unused38]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[unused39]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[unused40]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[unused41]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "104": {
+      "content": "[unused42]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "105": {
+      "content": "[unused43]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "106": {
+      "content": "[unused44]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "107": {
+      "content": "[unused45]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "108": {
+      "content": "[unused46]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "109": {
+      "content": "[unused47]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "110": {
+      "content": "[unused48]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "111": {
+      "content": "[unused49]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "112": {
+      "content": "[unused50]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "113": {
+      "content": "[unused51]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "114": {
+      "content": "[unused52]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "115": {
+      "content": "[unused53]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "116": {
+      "content": "[unused54]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "117": {
+      "content": "[unused55]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "118": {
+      "content": "[unused56]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "119": {
+      "content": "[unused57]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "120": {
+      "content": "[unused58]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "121": {
+      "content": "[unused59]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "122": {
+      "content": "[unused60]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "123": {
+      "content": "[unused61]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "124": {
+      "content": "[unused62]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "125": {
+      "content": "[unused63]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "126": {
+      "content": "[unused64]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "127": {
+      "content": "[unused65]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128": {
+      "content": "[unused66]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "129": {
+      "content": "[unused67]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "130": {
+      "content": "[unused68]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131": {
+      "content": "[unused69]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "132": {
+      "content": "[unused70]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "133": {
+      "content": "[unused71]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "134": {
+      "content": "[unused72]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "135": {
+      "content": "[unused73]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "136": {
+      "content": "[unused74]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "137": {
+      "content": "[unused75]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "138": {
+      "content": "[unused76]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "139": {
+      "content": "[unused77]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "140": {
+      "content": "[unused78]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "141": {
+      "content": "[unused79]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "142": {
+      "content": "[unused80]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "143": {
+      "content": "[unused81]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "144": {
+      "content": "[unused82]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "145": {
+      "content": "[unused83]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "146": {
+      "content": "[unused84]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "147": {
+      "content": "[unused85]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "148": {
+      "content": "[unused86]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "149": {
+      "content": "[unused87]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "150": {
+      "content": "[unused88]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151": {
+      "content": "[unused89]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "152": {
+      "content": "[unused90]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "153": {
+      "content": "[unused91]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "154": {
+      "content": "[unused92]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "155": {
+      "content": "[unused93]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "156": {
+      "content": "[unused94]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "157": {
+      "content": "[unused95]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "158": {
+      "content": "[unused96]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "159": {
+      "content": "[unused97]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "160": {
+      "content": "[unused98]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "161": {
+      "content": "[unused99]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "162": {
+      "content": "[extra_id_0]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "163": {
+      "content": "[extra_id_1]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "164": {
+      "content": "[extra_id_2]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "165": {
+      "content": "[extra_id_3]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "166": {
+      "content": "[extra_id_4]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "167": {
+      "content": "[extra_id_5]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "168": {
+      "content": "[extra_id_6]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "169": {
+      "content": "[extra_id_7]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "170": {
+      "content": "[extra_id_8]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "171": {
+      "content": "[extra_id_9]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "172": {
+      "content": "[extra_id_10]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "173": {
+      "content": "[extra_id_11]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "174": {
+      "content": "[extra_id_12]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "175": {
+      "content": "[extra_id_13]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "176": {
+      "content": "[extra_id_14]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "177": {
+      "content": "[extra_id_15]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "178": {
+      "content": "[extra_id_16]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "179": {
+      "content": "[extra_id_17]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "180": {
+      "content": "[extra_id_18]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "181": {
+      "content": "[extra_id_19]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "182": {
+      "content": "[extra_id_20]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "183": {
+      "content": "[extra_id_21]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "184": {
+      "content": "[extra_id_22]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "185": {
+      "content": "[extra_id_23]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "186": {
+      "content": "[extra_id_24]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "187": {
+      "content": "[extra_id_25]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "188": {
+      "content": "[extra_id_26]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "189": {
+      "content": "[extra_id_27]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "190": {
+      "content": "[extra_id_28]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "191": {
+      "content": "[extra_id_29]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "192": {
+      "content": "[extra_id_30]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "193": {
+      "content": "[extra_id_31]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "194": {
+      "content": "[extra_id_32]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "195": {
+      "content": "[extra_id_33]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "196": {
+      "content": "[extra_id_34]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "197": {
+      "content": "[extra_id_35]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "198": {
+      "content": "[extra_id_36]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "199": {
+      "content": "[extra_id_37]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200": {
+      "content": "[extra_id_38]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "201": {
+      "content": "[extra_id_39]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "202": {
+      "content": "[extra_id_40]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "203": {
+      "content": "[extra_id_41]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "204": {
+      "content": "[extra_id_42]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "205": {
+      "content": "[extra_id_43]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "206": {
+      "content": "[extra_id_44]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "207": {
+      "content": "[extra_id_45]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "208": {
+      "content": "[extra_id_46]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "209": {
+      "content": "[extra_id_47]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "210": {
+      "content": "[extra_id_48]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "211": {
+      "content": "[extra_id_49]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "212": {
+      "content": "[extra_id_50]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "213": {
+      "content": "[extra_id_51]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "214": {
+      "content": "[extra_id_52]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "215": {
+      "content": "[extra_id_53]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "216": {
+      "content": "[extra_id_54]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "217": {
+      "content": "[extra_id_55]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "218": {
+      "content": "[extra_id_56]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "219": {
+      "content": "[extra_id_57]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "220": {
+      "content": "[extra_id_58]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "221": {
+      "content": "[extra_id_59]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "222": {
+      "content": "[extra_id_60]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "223": {
+      "content": "[extra_id_61]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "224": {
+      "content": "[extra_id_62]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "225": {
+      "content": "[extra_id_63]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "226": {
+      "content": "[extra_id_64]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "227": {
+      "content": "[extra_id_65]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "228": {
+      "content": "[extra_id_66]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "229": {
+      "content": "[extra_id_67]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "230": {
+      "content": "[extra_id_68]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "231": {
+      "content": "[extra_id_69]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "232": {
+      "content": "[extra_id_70]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "233": {
+      "content": "[extra_id_71]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "234": {
+      "content": "[extra_id_72]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "235": {
+      "content": "[extra_id_73]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "236": {
+      "content": "[extra_id_74]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "237": {
+      "content": "[extra_id_75]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "238": {
+      "content": "[extra_id_76]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "239": {
+      "content": "[extra_id_77]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "240": {
+      "content": "[extra_id_78]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "241": {
+      "content": "[extra_id_79]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "242": {
+      "content": "[extra_id_80]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "243": {
+      "content": "[extra_id_81]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "244": {
+      "content": "[extra_id_82]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "245": {
+      "content": "[extra_id_83]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "246": {
+      "content": "[extra_id_84]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "247": {
+      "content": "[extra_id_85]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "248": {
+      "content": "[extra_id_86]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "249": {
+      "content": "[extra_id_87]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250": {
+      "content": "[extra_id_88]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "251": {
+      "content": "[extra_id_89]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "252": {
+      "content": "[extra_id_90]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "253": {
+      "content": "[extra_id_91]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "254": {
+      "content": "[extra_id_92]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "255": {
+      "content": "[extra_id_93]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256": {
+      "content": "[extra_id_94]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "257": {
+      "content": "[extra_id_95]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "258": {
+      "content": "[extra_id_96]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "259": {
+      "content": "[extra_id_97]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "260": {
+      "content": "[extra_id_98]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "261": {
+      "content": "[extra_id_99]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "262": {
+      "content": "[extra_id_100]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "263": {
+      "content": "[extra_id_101]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "264": {
+      "content": "[extra_id_102]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "265": {
+      "content": "[extra_id_103]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "266": {
+      "content": "[extra_id_104]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "267": {
+      "content": "[extra_id_105]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "268": {
+      "content": "[extra_id_106]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "269": {
+      "content": "[extra_id_107]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "270": {
+      "content": "[extra_id_108]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "271": {
+      "content": "[extra_id_109]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "272": {
+      "content": "[extra_id_110]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "273": {
+      "content": "[extra_id_111]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "274": {
+      "content": "[extra_id_112]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "275": {
+      "content": "[extra_id_113]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "276": {
+      "content": "[extra_id_114]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "277": {
+      "content": "[extra_id_115]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "278": {
+      "content": "[extra_id_116]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "279": {
+      "content": "[extra_id_117]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "280": {
+      "content": "[extra_id_118]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "281": {
+      "content": "[extra_id_119]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "282": {
+      "content": "[extra_id_120]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "283": {
+      "content": "[extra_id_121]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "284": {
+      "content": "[extra_id_122]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "285": {
+      "content": "[extra_id_123]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "286": {
+      "content": "[extra_id_124]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "287": {
+      "content": "[extra_id_125]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "288": {
+      "content": "[extra_id_126]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "289": {
+      "content": "[extra_id_127]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "290": {
+      "content": "[extra_id_128]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "291": {
+      "content": "[extra_id_129]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "292": {
+      "content": "[extra_id_130]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "293": {
+      "content": "[extra_id_131]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "294": {
+      "content": "[extra_id_132]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "295": {
+      "content": "[extra_id_133]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "296": {
+      "content": "[extra_id_134]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "297": {
+      "content": "[extra_id_135]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "298": {
+      "content": "[extra_id_136]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "299": {
+      "content": "[extra_id_137]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "300": {
+      "content": "[extra_id_138]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "301": {
+      "content": "[extra_id_139]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "302": {
+      "content": "[extra_id_140]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "303": {
+      "content": "[extra_id_141]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "304": {
+      "content": "[extra_id_142]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "305": {
+      "content": "[extra_id_143]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "306": {
+      "content": "[extra_id_144]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "307": {
+      "content": "[extra_id_145]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "308": {
+      "content": "[extra_id_146]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "309": {
+      "content": "[extra_id_147]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "310": {
+      "content": "[extra_id_148]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "311": {
+      "content": "[extra_id_149]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "312": {
+      "content": "[extra_id_150]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "313": {
+      "content": "[extra_id_151]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "314": {
+      "content": "[extra_id_152]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "315": {
+      "content": "[extra_id_153]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "316": {
+      "content": "[extra_id_154]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "317": {
+      "content": "[extra_id_155]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "318": {
+      "content": "[extra_id_156]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "319": {
+      "content": "[extra_id_157]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "320": {
+      "content": "[extra_id_158]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "321": {
+      "content": "[extra_id_159]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "322": {
+      "content": "[extra_id_160]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "323": {
+      "content": "[extra_id_161]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "324": {
+      "content": "[extra_id_162]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "325": {
+      "content": "[extra_id_163]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "326": {
+      "content": "[extra_id_164]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "327": {
+      "content": "[extra_id_165]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "328": {
+      "content": "[extra_id_166]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "329": {
+      "content": "[extra_id_167]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "330": {
+      "content": "[extra_id_168]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "331": {
+      "content": "[extra_id_169]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "332": {
+      "content": "[extra_id_170]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "333": {
+      "content": "[extra_id_171]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "334": {
+      "content": "[extra_id_172]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "335": {
+      "content": "[extra_id_173]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "336": {
+      "content": "[extra_id_174]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "337": {
+      "content": "[extra_id_175]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "338": {
+      "content": "[extra_id_176]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "339": {
+      "content": "[extra_id_177]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "340": {
+      "content": "[extra_id_178]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "341": {
+      "content": "[extra_id_179]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "342": {
+      "content": "[extra_id_180]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "343": {
+      "content": "[extra_id_181]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "344": {
+      "content": "[extra_id_182]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "345": {
+      "content": "[extra_id_183]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "346": {
+      "content": "[extra_id_184]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "347": {
+      "content": "[extra_id_185]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "348": {
+      "content": "[extra_id_186]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "349": {
+      "content": "[extra_id_187]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "350": {
+      "content": "[extra_id_188]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "351": {
+      "content": "[extra_id_189]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "352": {
+      "content": "[extra_id_190]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "353": {
+      "content": "[extra_id_191]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "354": {
+      "content": "[extra_id_192]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "355": {
+      "content": "[extra_id_193]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "356": {
+      "content": "[extra_id_194]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "357": {
+      "content": "[extra_id_195]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "358": {
+      "content": "[extra_id_196]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "359": {
+      "content": "[extra_id_197]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "360": {
+      "content": "[extra_id_198]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "361": {
+      "content": "[|endofturn|]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_token": [
+    "[unused0]",
+    "[unused1]",
+    "[unused2]",
+    "[unused3]",
+    "[unused4]",
+    "[unused5]",
+    "[unused6]",
+    "[unused7]",
+    "[unused8]",
+    "[unused9]",
+    "[unused10]",
+    "[unused11]",
+    "[unused12]",
+    "[unused13]",
+    "[unused14]",
+    "[unused15]",
+    "[unused16]",
+    "[unused17]",
+    "[unused18]",
+    "[unused19]",
+    "[unused20]",
+    "[unused21]",
+    "[unused22]",
+    "[unused23]",
+    "[unused24]",
+    "[unused25]",
+    "[unused26]",
+    "[unused27]",
+    "[unused28]",
+    "[unused29]",
+    "[unused30]",
+    "[unused31]",
+    "[unused32]",
+    "[unused33]",
+    "[unused34]",
+    "[unused35]",
+    "[unused36]",
+    "[unused37]",
+    "[unused38]",
+    "[unused39]",
+    "[unused40]",
+    "[unused41]",
+    "[unused42]",
+    "[unused43]",
+    "[unused44]",
+    "[unused45]",
+    "[unused46]",
+    "[unused47]",
+    "[unused48]",
+    "[unused49]",
+    "[unused50]",
+    "[unused51]",
+    "[unused52]",
+    "[unused53]",
+    "[unused54]",
+    "[unused55]",
+    "[unused56]",
+    "[unused57]",
+    "[unused58]",
+    "[unused59]",
+    "[unused60]",
+    "[unused61]",
+    "[unused62]",
+    "[unused63]",
+    "[unused64]",
+    "[unused65]",
+    "[unused66]",
+    "[unused67]",
+    "[unused68]",
+    "[unused69]",
+    "[unused70]",
+    "[unused71]",
+    "[unused72]",
+    "[unused73]",
+    "[unused74]",
+    "[unused75]",
+    "[unused76]",
+    "[unused77]",
+    "[unused78]",
+    "[unused79]",
+    "[unused80]",
+    "[unused81]",
+    "[unused82]",
+    "[unused83]",
+    "[unused84]",
+    "[unused85]",
+    "[unused86]",
+    "[unused87]",
+    "[unused88]",
+    "[unused89]",
+    "[unused90]",
+    "[unused91]",
+    "[unused92]",
+    "[unused93]",
+    "[unused94]",
+    "[unused95]",
+    "[unused96]",
+    "[unused97]",
+    "[unused98]",
+    "[unused99]",
+    "[extra_id_0]",
+    "[extra_id_1]",
+    "[extra_id_2]",
+    "[extra_id_3]",
+    "[extra_id_4]",
+    "[extra_id_5]",
+    "[extra_id_6]",
+    "[extra_id_7]",
+    "[extra_id_8]",
+    "[extra_id_9]",
+    "[extra_id_10]",
+    "[extra_id_11]",
+    "[extra_id_12]",
+    "[extra_id_13]",
+    "[extra_id_14]",
+    "[extra_id_15]",
+    "[extra_id_16]",
+    "[extra_id_17]",
+    "[extra_id_18]",
+    "[extra_id_19]",
+    "[extra_id_20]",
+    "[extra_id_21]",
+    "[extra_id_22]",
+    "[extra_id_23]",
+    "[extra_id_24]",
+    "[extra_id_25]",
+    "[extra_id_26]",
+    "[extra_id_27]",
+    "[extra_id_28]",
+    "[extra_id_29]",
+    "[extra_id_30]",
+    "[extra_id_31]",
+    "[extra_id_32]",
+    "[extra_id_33]",
+    "[extra_id_34]",
+    "[extra_id_35]",
+    "[extra_id_36]",
+    "[extra_id_37]",
+    "[extra_id_38]",
+    "[extra_id_39]",
+    "[extra_id_40]",
+    "[extra_id_41]",
+    "[extra_id_42]",
+    "[extra_id_43]",
+    "[extra_id_44]",
+    "[extra_id_45]",
+    "[extra_id_46]",
+    "[extra_id_47]",
+    "[extra_id_48]",
+    "[extra_id_49]",
+    "[extra_id_50]",
+    "[extra_id_51]",
+    "[extra_id_52]",
+    "[extra_id_53]",
+    "[extra_id_54]",
+    "[extra_id_55]",
+    "[extra_id_56]",
+    "[extra_id_57]",
+    "[extra_id_58]",
+    "[extra_id_59]",
+    "[extra_id_60]",
+    "[extra_id_61]",
+    "[extra_id_62]",
+    "[extra_id_63]",
+    "[extra_id_64]",
+    "[extra_id_65]",
+    "[extra_id_66]",
+    "[extra_id_67]",
+    "[extra_id_68]",
+    "[extra_id_69]",
+    "[extra_id_70]",
+    "[extra_id_71]",
+    "[extra_id_72]",
+    "[extra_id_73]",
+    "[extra_id_74]",
+    "[extra_id_75]",
+    "[extra_id_76]",
+    "[extra_id_77]",
+    "[extra_id_78]",
+    "[extra_id_79]",
+    "[extra_id_80]",
+    "[extra_id_81]",
+    "[extra_id_82]",
+    "[extra_id_83]",
+    "[extra_id_84]",
+    "[extra_id_85]",
+    "[extra_id_86]",
+    "[extra_id_87]",
+    "[extra_id_88]",
+    "[extra_id_89]",
+    "[extra_id_90]",
+    "[extra_id_91]",
+    "[extra_id_92]",
+    "[extra_id_93]",
+    "[extra_id_94]",
+    "[extra_id_95]",
+    "[extra_id_96]",
+    "[extra_id_97]",
+    "[extra_id_98]",
+    "[extra_id_99]",
+    "[extra_id_100]",
+    "[extra_id_101]",
+    "[extra_id_102]",
+    "[extra_id_103]",
+    "[extra_id_104]",
+    "[extra_id_105]",
+    "[extra_id_106]",
+    "[extra_id_107]",
+    "[extra_id_108]",
+    "[extra_id_109]",
+    "[extra_id_110]",
+    "[extra_id_111]",
+    "[extra_id_112]",
+    "[extra_id_113]",
+    "[extra_id_114]",
+    "[extra_id_115]",
+    "[extra_id_116]",
+    "[extra_id_117]",
+    "[extra_id_118]",
+    "[extra_id_119]",
+    "[extra_id_120]",
+    "[extra_id_121]",
+    "[extra_id_122]",
+    "[extra_id_123]",
+    "[extra_id_124]",
+    "[extra_id_125]",
+    "[extra_id_126]",
+    "[extra_id_127]",
+    "[extra_id_128]",
+    "[extra_id_129]",
+    "[extra_id_130]",
+    "[extra_id_131]",
+    "[extra_id_132]",
+    "[extra_id_133]",
+    "[extra_id_134]",
+    "[extra_id_135]",
+    "[extra_id_136]",
+    "[extra_id_137]",
+    "[extra_id_138]",
+    "[extra_id_139]",
+    "[extra_id_140]",
+    "[extra_id_141]",
+    "[extra_id_142]",
+    "[extra_id_143]",
+    "[extra_id_144]",
+    "[extra_id_145]",
+    "[extra_id_146]",
+    "[extra_id_147]",
+    "[extra_id_148]",
+    "[extra_id_149]",
+    "[extra_id_150]",
+    "[extra_id_151]",
+    "[extra_id_152]",
+    "[extra_id_153]",
+    "[extra_id_154]",
+    "[extra_id_155]",
+    "[extra_id_156]",
+    "[extra_id_157]",
+    "[extra_id_158]",
+    "[extra_id_159]",
+    "[extra_id_160]",
+    "[extra_id_161]",
+    "[extra_id_162]",
+    "[extra_id_163]",
+    "[extra_id_164]",
+    "[extra_id_165]",
+    "[extra_id_166]",
+    "[extra_id_167]",
+    "[extra_id_168]",
+    "[extra_id_169]",
+    "[extra_id_170]",
+    "[extra_id_171]",
+    "[extra_id_172]",
+    "[extra_id_173]",
+    "[extra_id_174]",
+    "[extra_id_175]",
+    "[extra_id_176]",
+    "[extra_id_177]",
+    "[extra_id_178]",
+    "[extra_id_179]",
+    "[extra_id_180]",
+    "[extra_id_181]",
+    "[extra_id_182]",
+    "[extra_id_183]",
+    "[extra_id_184]",
+    "[extra_id_185]",
+    "[extra_id_186]",
+    "[extra_id_187]",
+    "[extra_id_188]",
+    "[extra_id_189]",
+    "[extra_id_190]",
+    "[extra_id_191]",
+    "[extra_id_192]",
+    "[extra_id_193]",
+    "[extra_id_194]",
+    "[extra_id_195]",
+    "[extra_id_196]",
+    "[extra_id_197]",
+    "[extra_id_198]",
+    "[|endofturn|]",
+    "PI:URL",
+    "PI:EMAIL",
+    "PI:ACCOUNT_NUM",
+    "PI:PHONE_NUM",
+    "PI:BUSINESS_NUM",
+    "PI:ANNON",
+    "PI:KEY",
+    "PI:ID",
+    "PI:IP_ADDRESS",
+    "PI:USER"
+  ],
+  "bos_token": "[BOS]",
+  "chat_template": "{% for message in messages %}{% if loop.first and message['role'] != 'system' %}{{ '[|system|][|endofturn|]\n' }}{% endif %}{% set content = message['content'] %}{% if '</thought>' in content %}{% set content = content.split('</thought>')[-1].lstrip('\\n') %}{% endif %}{{ '[|' + message['role'] + '|]' + content }}{% if not message['role'] == 'user' %}{{ '[|endofturn|]' }}{% endif %}{% if not loop.last %}{{ '\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '\n[|assistant|]<thought>\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "[|endofturn|]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "[UNK]"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff