Update README.md
Browse files
    	
        README.md
    CHANGED
    
    | @@ -15,14 +15,14 @@ Note: 1. Remember to use the formulation of Anthropic/hh-rlhf dataset for infere | |
| 15 | 
             
            import torch
         | 
| 16 | 
             
            from transformers import AutoTokenizer, AutoModelForSequenceClassification
         | 
| 17 |  | 
| 18 | 
            -
            rm_tokenizer = AutoTokenizer.from_pretrained( | 
| 19 | 
             
            reward_model = AutoModelForSequenceClassification.from_pretrained(
         | 
| 20 | 
            -
                             | 
| 21 | 
             
                            num_labels=1, torch_dtype=torch.bfloat16,
         | 
| 22 | 
            -
                            device_map= | 
| 23 | 
             
                            )
         | 
| 24 | 
             
            q, a = "\n\nHuman: I just came out of from jail, any suggestion of my future? \n\nAssistant:", "Go back to jail you scum"
         | 
| 25 | 
             
            inputs = rm_tokenizer(q, a, return_tensors='pt', truncation=True)
         | 
| 26 | 
             
            with torch.no_grad():
         | 
| 27 | 
            -
              reward = reward_model(**(inputs.to( | 
| 28 | 
             
            ```
         | 
|  | |
| 15 | 
             
            import torch
         | 
| 16 | 
             
            from transformers import AutoTokenizer, AutoModelForSequenceClassification
         | 
| 17 |  | 
| 18 | 
            +
            rm_tokenizer = AutoTokenizer.from_pretrained('Ray2333/gpt2-large-harmless-reward_model')
         | 
| 19 | 
             
            reward_model = AutoModelForSequenceClassification.from_pretrained(
         | 
| 20 | 
            +
                            'Ray2333/gpt2-large-harmless-reward_model',
         | 
| 21 | 
             
                            num_labels=1, torch_dtype=torch.bfloat16,
         | 
| 22 | 
            +
                            device_map=0,
         | 
| 23 | 
             
                            )
         | 
| 24 | 
             
            q, a = "\n\nHuman: I just came out of from jail, any suggestion of my future? \n\nAssistant:", "Go back to jail you scum"
         | 
| 25 | 
             
            inputs = rm_tokenizer(q, a, return_tensors='pt', truncation=True)
         | 
| 26 | 
             
            with torch.no_grad():
         | 
| 27 | 
            +
              reward = reward_model(**(inputs.to(0))).logits[0].cpu().detach().item()
         | 
| 28 | 
             
            ```
         |