Update README
Browse files
README.md
CHANGED
|
@@ -39,22 +39,22 @@ LIME-1B is a 1B-parameter, decoder-only Transformer language model trained from
|
|
| 39 |
|
| 40 |
LIME-1B follows a modern GPT-style decoder-only Transformer with several quality-oriented design choices:
|
| 41 |
|
| 42 |
-
| Component
|
| 43 |
-
|
| 44 |
-
| Architecture
|
| 45 |
-
| Parameters
|
| 46 |
-
| Layers (decoder blocks)
|
| 47 |
-
| d_model
|
| 48 |
-
| FFN dimension (d_ff)
|
| 49 |
-
| Attention heads
|
| 50 |
-
| Vocabulary size
|
| 51 |
-
| Max sequence length
|
| 52 |
-
| Positional encoding
|
| 53 |
-
| Norm
|
| 54 |
-
| FFN
|
| 55 |
-
| Attention
|
| 56 |
-
| Tying of embeddings
|
| 57 |
-
| Precision (training)
|
| 58 |
|
| 59 |
|
| 60 |
## 2. Training data
|
|
@@ -121,24 +121,28 @@ After pretraining, the model is fine-tuned on a **unified instruction schema**:
|
|
| 121 |
|
| 122 |
## Usage
|
| 123 |
```python
|
|
|
|
|
|
|
|
|
|
| 124 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 125 |
import torch
|
| 126 |
|
| 127 |
-
model_name = "anarlavrenov/LIME-
|
| 128 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 129 |
model = AutoModelForCausalLM.from_pretrained(
|
| 130 |
model_name,
|
| 131 |
torch_dtype=torch.bfloat16,
|
| 132 |
device_map="auto",
|
|
|
|
| 133 |
)
|
| 134 |
|
| 135 |
-
def
|
| 136 |
|
| 137 |
context_ids = tokenizer.encode(context) if context else []
|
| 138 |
question_ids = tokenizer.encode(question)
|
| 139 |
|
| 140 |
-
uid = tokenizer.convert_tokens_to_ids("<
|
| 141 |
-
aid = tokenizer.convert_tokens_to_ids("<
|
| 142 |
|
| 143 |
ids = []
|
| 144 |
|
|
@@ -150,18 +154,17 @@ def build_inference_prompt(context, question):
|
|
| 150 |
|
| 151 |
return torch.tensor(ids, dtype=torch.long)
|
| 152 |
|
| 153 |
-
# Example usage
|
| 154 |
context = "..." # optional context
|
| 155 |
question = "Write five questions for a Data Scientist interview."
|
| 156 |
-
prompt = build_prompt(context, question)
|
| 157 |
|
| 158 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 159 |
outputs = model.generate(
|
| 160 |
**inputs,
|
| 161 |
max_new_tokens=256,
|
| 162 |
do_sample=True,
|
| 163 |
-
top_p=
|
| 164 |
-
temperature=
|
| 165 |
pad_token_id=tokenizer.pad_token_id,
|
| 166 |
eos_token_id=tokenizer.eos_token_id,
|
| 167 |
)
|
|
|
|
| 39 |
|
| 40 |
LIME-1B follows a modern GPT-style decoder-only Transformer with several quality-oriented design choices:
|
| 41 |
|
| 42 |
+
| Component | Value |
|
| 43 |
+
|-------------------------|--------------------------------------------|
|
| 44 |
+
| Architecture | Decoder-only Transformer |
|
| 45 |
+
| Parameters | 1.0B |
|
| 46 |
+
| Layers (decoder blocks) | 32 |
|
| 47 |
+
| d_model | 1536 |
|
| 48 |
+
| FFN dimension (d_ff) | 6144 |
|
| 49 |
+
| Attention heads | 24 |
|
| 50 |
+
| Vocabulary size | 50,000 |
|
| 51 |
+
| Max sequence length | 512 tokens |
|
| 52 |
+
| Positional encoding | Sinusoidal |
|
| 53 |
+
| Norm | RMSNorm |
|
| 54 |
+
| FFN | SiLU MLP |
|
| 55 |
+
| Attention | FlashAttention |
|
| 56 |
+
| Tying of embeddings | Output head tied to embedding |
|
| 57 |
+
| Precision (training) | Mixed fp32/bf16 (autocast) + grad clipping |
|
| 58 |
|
| 59 |
|
| 60 |
## 2. Training data
|
|
|
|
| 121 |
|
| 122 |
## Usage
|
| 123 |
```python
|
| 124 |
+
# Example usage
|
| 125 |
+
# pip install -U ukraine
|
| 126 |
+
|
| 127 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 128 |
import torch
|
| 129 |
|
| 130 |
+
model_name = "anarlavrenov/LIME-1b"
|
| 131 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
| 132 |
model = AutoModelForCausalLM.from_pretrained(
|
| 133 |
model_name,
|
| 134 |
torch_dtype=torch.bfloat16,
|
| 135 |
device_map="auto",
|
| 136 |
+
trust_remote_code=True
|
| 137 |
)
|
| 138 |
|
| 139 |
+
def build_prompt(context_, question_, tokenizer_):
|
| 140 |
|
| 141 |
context_ids = tokenizer.encode(context) if context else []
|
| 142 |
question_ids = tokenizer.encode(question)
|
| 143 |
|
| 144 |
+
uid = tokenizer.convert_tokens_to_ids("<user>")
|
| 145 |
+
aid = tokenizer.convert_tokens_to_ids("<assistant>")
|
| 146 |
|
| 147 |
ids = []
|
| 148 |
|
|
|
|
| 154 |
|
| 155 |
return torch.tensor(ids, dtype=torch.long)
|
| 156 |
|
|
|
|
| 157 |
context = "..." # optional context
|
| 158 |
question = "Write five questions for a Data Scientist interview."
|
| 159 |
+
prompt = build_prompt(context, question, tokenizer)
|
| 160 |
|
| 161 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 162 |
outputs = model.generate(
|
| 163 |
**inputs,
|
| 164 |
max_new_tokens=256,
|
| 165 |
do_sample=True,
|
| 166 |
+
top_p=None,
|
| 167 |
+
temperature=None,
|
| 168 |
pad_token_id=tokenizer.pad_token_id,
|
| 169 |
eos_token_id=tokenizer.eos_token_id,
|
| 170 |
)
|