Upload folder using huggingface_hub
Browse files- README.md +6 -32
- config.json +2 -11
- model.safetensors +2 -2
README.md
CHANGED
|
@@ -12,7 +12,7 @@ base_model:
|
|
| 12 |
|
| 13 |
This tiny model is for debugging. It is randomly initialized with the config adapted from [openai/gpt-oss-120b](https://huggingface.co/openai/gpt-oss-120b).
|
| 14 |
|
| 15 |
-
Note: This model
|
| 16 |
|
| 17 |
### Example usage:
|
| 18 |
|
|
@@ -33,8 +33,8 @@ model_id = "yujiepan/gpt-oss-tiny-random"
|
|
| 33 |
pipe = pipeline(
|
| 34 |
"text-generation",
|
| 35 |
model=model_id,
|
| 36 |
-
torch_dtype=
|
| 37 |
-
device_map="cuda"
|
| 38 |
)
|
| 39 |
|
| 40 |
messages = [
|
|
@@ -53,7 +53,6 @@ print(outputs[0]["generated_text"][-1])
|
|
| 53 |
```python
|
| 54 |
import json
|
| 55 |
|
| 56 |
-
import safetensors
|
| 57 |
import torch
|
| 58 |
from huggingface_hub import hf_hub_download
|
| 59 |
from transformers import (
|
|
@@ -94,7 +93,7 @@ with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
|
|
| 94 |
config = AutoConfig.from_pretrained(save_folder)
|
| 95 |
print(config)
|
| 96 |
torch.set_default_dtype(torch.bfloat16)
|
| 97 |
-
model = AutoModelForCausalLM.from_config(config
|
| 98 |
torch.set_default_dtype(torch.float32)
|
| 99 |
model.generation_config = GenerationConfig.from_pretrained(
|
| 100 |
source_model_id, trust_remote_code=True,
|
|
@@ -107,32 +106,7 @@ with torch.no_grad():
|
|
| 107 |
model.save_pretrained(save_folder)
|
| 108 |
|
| 109 |
# mxfp4
|
| 110 |
-
from unittest.mock import Mock
|
| 111 |
-
|
| 112 |
-
from transformers.quantizers.auto import AutoHfQuantizer
|
| 113 |
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
set_seed(42)
|
| 117 |
-
bf16_state_dict = model.cuda().state_dict()
|
| 118 |
-
hf_quantizer: Mxfp4HfQuantizer = AutoHfQuantizer.from_config(quantization_config)
|
| 119 |
-
hf_quantizer.pre_quantized = False
|
| 120 |
-
ffn_keys = ['model.layers.0.mlp.experts.down_proj', 'model.layers.0.mlp.experts.gate_up_proj',
|
| 121 |
-
'model.layers.1.mlp.experts.down_proj', 'model.layers.1.mlp.experts.gate_up_proj']
|
| 122 |
-
for key in ffn_keys:
|
| 123 |
-
hf_quantizer.create_quantized_param(model, bf16_state_dict[key], key, "cuda", bf16_state_dict)
|
| 124 |
-
state_dict = model.state_dict()
|
| 125 |
-
del state_dict['lm_head.weight']
|
| 126 |
-
for key in ffn_keys:
|
| 127 |
-
del state_dict[key]
|
| 128 |
-
for k, v in state_dict.items():
|
| 129 |
-
if str(v.device) == 'meta':
|
| 130 |
-
print(k, v.device, v.shape)
|
| 131 |
-
|
| 132 |
-
safetensors.torch.save_file(state_dict, f"{save_folder}/model.safetensors")
|
| 133 |
-
with open(f"{save_folder}/config.json", "r", encoding='utf-8') as f:
|
| 134 |
-
config = json.load(f)
|
| 135 |
-
config['quantization_config'] = quantization_config
|
| 136 |
-
with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
|
| 137 |
-
json.dump(config, f, indent=2)
|
| 138 |
```
|
|
|
|
| 12 |
|
| 13 |
This tiny model is for debugging. It is randomly initialized with the config adapted from [openai/gpt-oss-120b](https://huggingface.co/openai/gpt-oss-120b).
|
| 14 |
|
| 15 |
+
Note: This model is in BF16; quantized MXFP4 FFN is not used.
|
| 16 |
|
| 17 |
### Example usage:
|
| 18 |
|
|
|
|
| 33 |
pipe = pipeline(
|
| 34 |
"text-generation",
|
| 35 |
model=model_id,
|
| 36 |
+
torch_dtype=torch.bfloat16,
|
| 37 |
+
device_map="cuda"
|
| 38 |
)
|
| 39 |
|
| 40 |
messages = [
|
|
|
|
| 53 |
```python
|
| 54 |
import json
|
| 55 |
|
|
|
|
| 56 |
import torch
|
| 57 |
from huggingface_hub import hf_hub_download
|
| 58 |
from transformers import (
|
|
|
|
| 93 |
config = AutoConfig.from_pretrained(save_folder)
|
| 94 |
print(config)
|
| 95 |
torch.set_default_dtype(torch.bfloat16)
|
| 96 |
+
model = AutoModelForCausalLM.from_config(config)
|
| 97 |
torch.set_default_dtype(torch.float32)
|
| 98 |
model.generation_config = GenerationConfig.from_pretrained(
|
| 99 |
source_model_id, trust_remote_code=True,
|
|
|
|
| 106 |
model.save_pretrained(save_folder)
|
| 107 |
|
| 108 |
# mxfp4
|
|
|
|
|
|
|
|
|
|
| 109 |
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
|
| 110 |
+
# model = AutoModelForCausalLM.from_pretrained(save_folder, trust_remote_code=True, torch_dtype=torch.bfloat16, quantization_config=quantization_config)
|
| 111 |
+
# model.save_pretrained(save_folder, safe_serialization=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
```
|
config.json
CHANGED
|
@@ -42,14 +42,5 @@
|
|
| 42 |
"torch_dtype": "bfloat16",
|
| 43 |
"transformers_version": "4.56.0.dev0",
|
| 44 |
"use_cache": true,
|
| 45 |
-
"vocab_size": 201088
|
| 46 |
-
|
| 47 |
-
"modules_to_not_convert": [
|
| 48 |
-
"model.layers.*.self_attn",
|
| 49 |
-
"model.layers.*.mlp.router",
|
| 50 |
-
"model.embed_tokens",
|
| 51 |
-
"lm_head"
|
| 52 |
-
],
|
| 53 |
-
"quant_method": "mxfp4"
|
| 54 |
-
}
|
| 55 |
-
}
|
|
|
|
| 42 |
"torch_dtype": "bfloat16",
|
| 43 |
"transformers_version": "4.56.0.dev0",
|
| 44 |
"use_cache": true,
|
| 45 |
+
"vocab_size": 201088
|
| 46 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aefe8b9c4b4969f6d13c5d778760f3dce4e25134324b33677934550d9df02a7c
|
| 3 |
+
size 13710176
|