marcellobullo commited on
Commit
2f00d9e
·
verified ·
1 Parent(s): 9526898

End of training

Browse files
README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: lvwerra/gpt2-imdb
3
+ datasets: marcellobullo/gpt2-imdb-raw
4
+ library_name: transformers
5
+ model_name: sharedrep-imdb-reward-clustering-seed100-k16
6
+ tags:
7
+ - generated_from_trainer
8
+ - reward-trainer
9
+ - trl
10
+ licence: license
11
+ ---
12
+
13
+ # Model Card for sharedrep-imdb-reward-clustering-seed100-k16
14
+
15
+ This model is a fine-tuned version of [lvwerra/gpt2-imdb](https://huggingface.co/lvwerra/gpt2-imdb) on the [marcellobullo/gpt2-imdb-raw](https://huggingface.co/datasets/marcellobullo/gpt2-imdb-raw) dataset.
16
+ It has been trained using [TRL](https://github.com/huggingface/trl).
17
+
18
+ ## Quick start
19
+
20
+ ```python
21
+ from transformers import pipeline
22
+
23
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
24
+ generator = pipeline("text-generation", model="marcellobullo/sharedrep-imdb-reward-clustering-seed100-k16", device="cuda")
25
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
26
+ print(output["generated_text"])
27
+ ```
28
+
29
+ ## Training procedure
30
+
31
+
32
+
33
+
34
+ This model was trained with Reward.
35
+
36
+ ### Framework versions
37
+
38
+ - TRL: 0.19.1
39
+ - Transformers: 4.56.1
40
+ - Pytorch: 2.8.0
41
+ - Datasets: 3.3.2
42
+ - Tokenizers: 0.22.0
43
+
44
+ ## Citations
45
+
46
+
47
+
48
+ Cite TRL as:
49
+
50
+ ```bibtex
51
+ @misc{vonwerra2022trl,
52
+ title = {{TRL: Transformer Reinforcement Learning}},
53
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
54
+ year = 2020,
55
+ journal = {GitHub repository},
56
+ publisher = {GitHub},
57
+ howpublished = {\url{https://github.com/huggingface/trl}}
58
+ }
59
+ ```
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "SharedRepGPT2RM"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "dtype": "float32",
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "k": 16,
13
+ "layer_norm_epsilon": 1e-05,
14
+ "model_type": "sharedrep-gpt2",
15
+ "n_ctx": 1024,
16
+ "n_embd": 768,
17
+ "n_head": 12,
18
+ "n_heads": 2,
19
+ "n_inner": null,
20
+ "n_layer": 12,
21
+ "n_positions": 1024,
22
+ "output_past": true,
23
+ "pad_token_id": 50256,
24
+ "reorder_and_upcast_attn": false,
25
+ "resid_pdrop": 0.1,
26
+ "scale_attn_by_inverse_layer_idx": false,
27
+ "scale_attn_weights": true,
28
+ "summary_activation": null,
29
+ "summary_first_dropout": 0.1,
30
+ "summary_proj_to_labels": true,
31
+ "summary_type": "cls_index",
32
+ "summary_use_proj": true,
33
+ "transformers_version": "4.56.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:368c96818f91fe2e8988c36a3a4ae9a9734fc1953a8226c98aafc355d7139b63
3
+ size 497823776
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": false,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "extra_special_tokens": {},
19
+ "max_len": 1024,
20
+ "model_max_length": 1024,
21
+ "pad_token": "<|endoftext|>",
22
+ "tokenizer_class": "GPT2Tokenizer",
23
+ "unk_token": "<|endoftext|>"
24
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1be71eac730ca09284b1aeceae6f340046c6793cf31f4aeb7234c5baa15cab3
3
+ size 6033
vocab.json ADDED
The diff for this file is too large to render. See raw diff