Adding model files
Browse files- config.json +88 -0
- merges.txt +0 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +15 -0
- tokenizer.json +0 -0
- tokenizer_config.json +16 -0
- trainer_state.json +241 -0
- vocab.json +0 -0
    	
        config.json
    ADDED
    
    | @@ -0,0 +1,88 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "_name_or_path": "roberta-base",
         | 
| 3 | 
            +
              "architectures": [
         | 
| 4 | 
            +
                "RobertaForSequenceClassification"
         | 
| 5 | 
            +
              ],
         | 
| 6 | 
            +
              "attention_probs_dropout_prob": 0.1,
         | 
| 7 | 
            +
              "bos_token_id": 0,
         | 
| 8 | 
            +
              "classifier_dropout": null,
         | 
| 9 | 
            +
              "eos_token_id": 2,
         | 
| 10 | 
            +
              "hidden_act": "gelu",
         | 
| 11 | 
            +
              "hidden_dropout_prob": 0.1,
         | 
| 12 | 
            +
              "hidden_size": 768,
         | 
| 13 | 
            +
              "id2label": {
         | 
| 14 | 
            +
                "0": "admiration",
         | 
| 15 | 
            +
                "1": "amusement",
         | 
| 16 | 
            +
                "2": "anger",
         | 
| 17 | 
            +
                "3": "annoyance",
         | 
| 18 | 
            +
                "4": "approval",
         | 
| 19 | 
            +
                "5": "caring",
         | 
| 20 | 
            +
                "6": "confusion",
         | 
| 21 | 
            +
                "7": "curiosity",
         | 
| 22 | 
            +
                "8": "desire",
         | 
| 23 | 
            +
                "9": "disappointment",
         | 
| 24 | 
            +
                "10": "disapproval",
         | 
| 25 | 
            +
                "11": "disgust",
         | 
| 26 | 
            +
                "12": "embarrassment",
         | 
| 27 | 
            +
                "13": "excitement",
         | 
| 28 | 
            +
                "14": "fear",
         | 
| 29 | 
            +
                "15": "gratitude",
         | 
| 30 | 
            +
                "16": "grief",
         | 
| 31 | 
            +
                "17": "joy",
         | 
| 32 | 
            +
                "18": "love",
         | 
| 33 | 
            +
                "19": "nervousness",
         | 
| 34 | 
            +
                "20": "optimism",
         | 
| 35 | 
            +
                "21": "pride",
         | 
| 36 | 
            +
                "22": "realization",
         | 
| 37 | 
            +
                "23": "relief",
         | 
| 38 | 
            +
                "24": "remorse",
         | 
| 39 | 
            +
                "25": "sadness",
         | 
| 40 | 
            +
                "26": "surprise",
         | 
| 41 | 
            +
                "27": "neutral"
         | 
| 42 | 
            +
              },
         | 
| 43 | 
            +
              "initializer_range": 0.02,
         | 
| 44 | 
            +
              "intermediate_size": 3072,
         | 
| 45 | 
            +
              "label2id": {
         | 
| 46 | 
            +
                "admiration": 0,
         | 
| 47 | 
            +
                "amusement": 1,
         | 
| 48 | 
            +
                "anger": 2,
         | 
| 49 | 
            +
                "annoyance": 3,
         | 
| 50 | 
            +
                "approval": 4,
         | 
| 51 | 
            +
                "caring": 5,
         | 
| 52 | 
            +
                "confusion": 6,
         | 
| 53 | 
            +
                "curiosity": 7,
         | 
| 54 | 
            +
                "desire": 8,
         | 
| 55 | 
            +
                "disappointment": 9,
         | 
| 56 | 
            +
                "disapproval": 10,
         | 
| 57 | 
            +
                "disgust": 11,
         | 
| 58 | 
            +
                "embarrassment": 12,
         | 
| 59 | 
            +
                "excitement": 13,
         | 
| 60 | 
            +
                "fear": 14,
         | 
| 61 | 
            +
                "gratitude": 15,
         | 
| 62 | 
            +
                "grief": 16,
         | 
| 63 | 
            +
                "joy": 17,
         | 
| 64 | 
            +
                "love": 18,
         | 
| 65 | 
            +
                "nervousness": 19,
         | 
| 66 | 
            +
                "neutral": 27,
         | 
| 67 | 
            +
                "optimism": 20,
         | 
| 68 | 
            +
                "pride": 21,
         | 
| 69 | 
            +
                "realization": 22,
         | 
| 70 | 
            +
                "relief": 23,
         | 
| 71 | 
            +
                "remorse": 24,
         | 
| 72 | 
            +
                "sadness": 25,
         | 
| 73 | 
            +
                "surprise": 26
         | 
| 74 | 
            +
              },
         | 
| 75 | 
            +
              "layer_norm_eps": 1e-05,
         | 
| 76 | 
            +
              "max_position_embeddings": 514,
         | 
| 77 | 
            +
              "model_type": "roberta",
         | 
| 78 | 
            +
              "num_attention_heads": 12,
         | 
| 79 | 
            +
              "num_hidden_layers": 12,
         | 
| 80 | 
            +
              "pad_token_id": 1,
         | 
| 81 | 
            +
              "position_embedding_type": "absolute",
         | 
| 82 | 
            +
              "problem_type": "multi_label_classification",
         | 
| 83 | 
            +
              "torch_dtype": "float32",
         | 
| 84 | 
            +
              "transformers_version": "4.21.3",
         | 
| 85 | 
            +
              "type_vocab_size": 1,
         | 
| 86 | 
            +
              "use_cache": true,
         | 
| 87 | 
            +
              "vocab_size": 50265
         | 
| 88 | 
            +
            }
         | 
    	
        merges.txt
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        pytorch_model.bin
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:4fd088956d38ce7ca956815b0203caf6f29b492b04c22c50d67542b3e02c449d
         | 
| 3 | 
            +
            size 498740269
         | 
    	
        special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,15 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "bos_token": "<s>",
         | 
| 3 | 
            +
              "cls_token": "<s>",
         | 
| 4 | 
            +
              "eos_token": "</s>",
         | 
| 5 | 
            +
              "mask_token": {
         | 
| 6 | 
            +
                "content": "<mask>",
         | 
| 7 | 
            +
                "lstrip": true,
         | 
| 8 | 
            +
                "normalized": false,
         | 
| 9 | 
            +
                "rstrip": false,
         | 
| 10 | 
            +
                "single_word": false
         | 
| 11 | 
            +
              },
         | 
| 12 | 
            +
              "pad_token": "<pad>",
         | 
| 13 | 
            +
              "sep_token": "</s>",
         | 
| 14 | 
            +
              "unk_token": "<unk>"
         | 
| 15 | 
            +
            }
         | 
    	
        tokenizer.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1,16 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "add_prefix_space": false,
         | 
| 3 | 
            +
              "bos_token": "<s>",
         | 
| 4 | 
            +
              "cls_token": "<s>",
         | 
| 5 | 
            +
              "eos_token": "</s>",
         | 
| 6 | 
            +
              "errors": "replace",
         | 
| 7 | 
            +
              "mask_token": "<mask>",
         | 
| 8 | 
            +
              "model_max_length": 512,
         | 
| 9 | 
            +
              "name_or_path": "roberta-base",
         | 
| 10 | 
            +
              "pad_token": "<pad>",
         | 
| 11 | 
            +
              "sep_token": "</s>",
         | 
| 12 | 
            +
              "special_tokens_map_file": null,
         | 
| 13 | 
            +
              "tokenizer_class": "RobertaTokenizer",
         | 
| 14 | 
            +
              "trim_offsets": true,
         | 
| 15 | 
            +
              "unk_token": "<unk>"
         | 
| 16 | 
            +
            }
         | 
    	
        trainer_state.json
    ADDED
    
    | @@ -0,0 +1,241 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "best_metric": 0.5862595419847328,
         | 
| 3 | 
            +
              "best_model_checkpoint": "roberta-base-go_emotions/checkpoint-16281",
         | 
| 4 | 
            +
              "epoch": 3.0,
         | 
| 5 | 
            +
              "global_step": 16281,
         | 
| 6 | 
            +
              "is_hyper_param_search": false,
         | 
| 7 | 
            +
              "is_local_process_zero": true,
         | 
| 8 | 
            +
              "is_world_process_zero": true,
         | 
| 9 | 
            +
              "log_history": [
         | 
| 10 | 
            +
                {
         | 
| 11 | 
            +
                  "epoch": 0.09,
         | 
| 12 | 
            +
                  "learning_rate": 1.9815736134144095e-05,
         | 
| 13 | 
            +
                  "loss": 0.1826,
         | 
| 14 | 
            +
                  "step": 500
         | 
| 15 | 
            +
                },
         | 
| 16 | 
            +
                {
         | 
| 17 | 
            +
                  "epoch": 0.18,
         | 
| 18 | 
            +
                  "learning_rate": 1.963147226828819e-05,
         | 
| 19 | 
            +
                  "loss": 0.1317,
         | 
| 20 | 
            +
                  "step": 1000
         | 
| 21 | 
            +
                },
         | 
| 22 | 
            +
                {
         | 
| 23 | 
            +
                  "epoch": 0.28,
         | 
| 24 | 
            +
                  "learning_rate": 1.9447208402432286e-05,
         | 
| 25 | 
            +
                  "loss": 0.1146,
         | 
| 26 | 
            +
                  "step": 1500
         | 
| 27 | 
            +
                },
         | 
| 28 | 
            +
                {
         | 
| 29 | 
            +
                  "epoch": 0.37,
         | 
| 30 | 
            +
                  "learning_rate": 1.9262944536576377e-05,
         | 
| 31 | 
            +
                  "loss": 0.1078,
         | 
| 32 | 
            +
                  "step": 2000
         | 
| 33 | 
            +
                },
         | 
| 34 | 
            +
                {
         | 
| 35 | 
            +
                  "epoch": 0.46,
         | 
| 36 | 
            +
                  "learning_rate": 1.9078680670720474e-05,
         | 
| 37 | 
            +
                  "loss": 0.1006,
         | 
| 38 | 
            +
                  "step": 2500
         | 
| 39 | 
            +
                },
         | 
| 40 | 
            +
                {
         | 
| 41 | 
            +
                  "epoch": 0.55,
         | 
| 42 | 
            +
                  "learning_rate": 1.8894416804864568e-05,
         | 
| 43 | 
            +
                  "loss": 0.0976,
         | 
| 44 | 
            +
                  "step": 3000
         | 
| 45 | 
            +
                },
         | 
| 46 | 
            +
                {
         | 
| 47 | 
            +
                  "epoch": 0.64,
         | 
| 48 | 
            +
                  "learning_rate": 1.871015293900866e-05,
         | 
| 49 | 
            +
                  "loss": 0.096,
         | 
| 50 | 
            +
                  "step": 3500
         | 
| 51 | 
            +
                },
         | 
| 52 | 
            +
                {
         | 
| 53 | 
            +
                  "epoch": 0.74,
         | 
| 54 | 
            +
                  "learning_rate": 1.8525889073152755e-05,
         | 
| 55 | 
            +
                  "loss": 0.0925,
         | 
| 56 | 
            +
                  "step": 4000
         | 
| 57 | 
            +
                },
         | 
| 58 | 
            +
                {
         | 
| 59 | 
            +
                  "epoch": 0.83,
         | 
| 60 | 
            +
                  "learning_rate": 1.8341625207296852e-05,
         | 
| 61 | 
            +
                  "loss": 0.0921,
         | 
| 62 | 
            +
                  "step": 4500
         | 
| 63 | 
            +
                },
         | 
| 64 | 
            +
                {
         | 
| 65 | 
            +
                  "epoch": 0.92,
         | 
| 66 | 
            +
                  "learning_rate": 1.8157361341440943e-05,
         | 
| 67 | 
            +
                  "loss": 0.0911,
         | 
| 68 | 
            +
                  "step": 5000
         | 
| 69 | 
            +
                },
         | 
| 70 | 
            +
                {
         | 
| 71 | 
            +
                  "epoch": 1.0,
         | 
| 72 | 
            +
                  "eval_accuracy": 0.40213785477331365,
         | 
| 73 | 
            +
                  "eval_f1": 0.5346146303196705,
         | 
| 74 | 
            +
                  "eval_loss": 0.08816272765398026,
         | 
| 75 | 
            +
                  "eval_roc_auc": 0.7098850238721621,
         | 
| 76 | 
            +
                  "eval_runtime": 11.8306,
         | 
| 77 | 
            +
                  "eval_samples_per_second": 458.641,
         | 
| 78 | 
            +
                  "eval_steps_per_second": 57.394,
         | 
| 79 | 
            +
                  "step": 5427
         | 
| 80 | 
            +
                },
         | 
| 81 | 
            +
                {
         | 
| 82 | 
            +
                  "epoch": 1.01,
         | 
| 83 | 
            +
                  "learning_rate": 1.797309747558504e-05,
         | 
| 84 | 
            +
                  "loss": 0.0897,
         | 
| 85 | 
            +
                  "step": 5500
         | 
| 86 | 
            +
                },
         | 
| 87 | 
            +
                {
         | 
| 88 | 
            +
                  "epoch": 1.11,
         | 
| 89 | 
            +
                  "learning_rate": 1.7788833609729134e-05,
         | 
| 90 | 
            +
                  "loss": 0.0856,
         | 
| 91 | 
            +
                  "step": 6000
         | 
| 92 | 
            +
                },
         | 
| 93 | 
            +
                {
         | 
| 94 | 
            +
                  "epoch": 1.2,
         | 
| 95 | 
            +
                  "learning_rate": 1.7604569743873227e-05,
         | 
| 96 | 
            +
                  "loss": 0.0816,
         | 
| 97 | 
            +
                  "step": 6500
         | 
| 98 | 
            +
                },
         | 
| 99 | 
            +
                {
         | 
| 100 | 
            +
                  "epoch": 1.29,
         | 
| 101 | 
            +
                  "learning_rate": 1.742030587801732e-05,
         | 
| 102 | 
            +
                  "loss": 0.0853,
         | 
| 103 | 
            +
                  "step": 7000
         | 
| 104 | 
            +
                },
         | 
| 105 | 
            +
                {
         | 
| 106 | 
            +
                  "epoch": 1.38,
         | 
| 107 | 
            +
                  "learning_rate": 1.7236042012161415e-05,
         | 
| 108 | 
            +
                  "loss": 0.0846,
         | 
| 109 | 
            +
                  "step": 7500
         | 
| 110 | 
            +
                },
         | 
| 111 | 
            +
                {
         | 
| 112 | 
            +
                  "epoch": 1.47,
         | 
| 113 | 
            +
                  "learning_rate": 1.7051778146305512e-05,
         | 
| 114 | 
            +
                  "loss": 0.0843,
         | 
| 115 | 
            +
                  "step": 8000
         | 
| 116 | 
            +
                },
         | 
| 117 | 
            +
                {
         | 
| 118 | 
            +
                  "epoch": 1.57,
         | 
| 119 | 
            +
                  "learning_rate": 1.6867514280449606e-05,
         | 
| 120 | 
            +
                  "loss": 0.0807,
         | 
| 121 | 
            +
                  "step": 8500
         | 
| 122 | 
            +
                },
         | 
| 123 | 
            +
                {
         | 
| 124 | 
            +
                  "epoch": 1.66,
         | 
| 125 | 
            +
                  "learning_rate": 1.66832504145937e-05,
         | 
| 126 | 
            +
                  "loss": 0.0796,
         | 
| 127 | 
            +
                  "step": 9000
         | 
| 128 | 
            +
                },
         | 
| 129 | 
            +
                {
         | 
| 130 | 
            +
                  "epoch": 1.75,
         | 
| 131 | 
            +
                  "learning_rate": 1.6498986548737793e-05,
         | 
| 132 | 
            +
                  "loss": 0.081,
         | 
| 133 | 
            +
                  "step": 9500
         | 
| 134 | 
            +
                },
         | 
| 135 | 
            +
                {
         | 
| 136 | 
            +
                  "epoch": 1.84,
         | 
| 137 | 
            +
                  "learning_rate": 1.6314722682881887e-05,
         | 
| 138 | 
            +
                  "loss": 0.0798,
         | 
| 139 | 
            +
                  "step": 10000
         | 
| 140 | 
            +
                },
         | 
| 141 | 
            +
                {
         | 
| 142 | 
            +
                  "epoch": 1.93,
         | 
| 143 | 
            +
                  "learning_rate": 1.613045881702598e-05,
         | 
| 144 | 
            +
                  "loss": 0.0821,
         | 
| 145 | 
            +
                  "step": 10500
         | 
| 146 | 
            +
                },
         | 
| 147 | 
            +
                {
         | 
| 148 | 
            +
                  "epoch": 2.0,
         | 
| 149 | 
            +
                  "eval_accuracy": 0.44010320678216,
         | 
| 150 | 
            +
                  "eval_f1": 0.5612426312342098,
         | 
| 151 | 
            +
                  "eval_loss": 0.08432479202747345,
         | 
| 152 | 
            +
                  "eval_roc_auc": 0.7305379849481191,
         | 
| 153 | 
            +
                  "eval_runtime": 11.8056,
         | 
| 154 | 
            +
                  "eval_samples_per_second": 459.613,
         | 
| 155 | 
            +
                  "eval_steps_per_second": 57.515,
         | 
| 156 | 
            +
                  "step": 10854
         | 
| 157 | 
            +
                },
         | 
| 158 | 
            +
                {
         | 
| 159 | 
            +
                  "epoch": 2.03,
         | 
| 160 | 
            +
                  "learning_rate": 1.5946194951170078e-05,
         | 
| 161 | 
            +
                  "loss": 0.0776,
         | 
| 162 | 
            +
                  "step": 11000
         | 
| 163 | 
            +
                },
         | 
| 164 | 
            +
                {
         | 
| 165 | 
            +
                  "epoch": 2.12,
         | 
| 166 | 
            +
                  "learning_rate": 1.5761931085314172e-05,
         | 
| 167 | 
            +
                  "loss": 0.0726,
         | 
| 168 | 
            +
                  "step": 11500
         | 
| 169 | 
            +
                },
         | 
| 170 | 
            +
                {
         | 
| 171 | 
            +
                  "epoch": 2.21,
         | 
| 172 | 
            +
                  "learning_rate": 1.5577667219458266e-05,
         | 
| 173 | 
            +
                  "loss": 0.0718,
         | 
| 174 | 
            +
                  "step": 12000
         | 
| 175 | 
            +
                },
         | 
| 176 | 
            +
                {
         | 
| 177 | 
            +
                  "epoch": 2.3,
         | 
| 178 | 
            +
                  "learning_rate": 1.539340335360236e-05,
         | 
| 179 | 
            +
                  "loss": 0.0735,
         | 
| 180 | 
            +
                  "step": 12500
         | 
| 181 | 
            +
                },
         | 
| 182 | 
            +
                {
         | 
| 183 | 
            +
                  "epoch": 2.4,
         | 
| 184 | 
            +
                  "learning_rate": 1.5209139487746453e-05,
         | 
| 185 | 
            +
                  "loss": 0.0735,
         | 
| 186 | 
            +
                  "step": 13000
         | 
| 187 | 
            +
                },
         | 
| 188 | 
            +
                {
         | 
| 189 | 
            +
                  "epoch": 2.49,
         | 
| 190 | 
            +
                  "learning_rate": 1.5024875621890549e-05,
         | 
| 191 | 
            +
                  "loss": 0.0721,
         | 
| 192 | 
            +
                  "step": 13500
         | 
| 193 | 
            +
                },
         | 
| 194 | 
            +
                {
         | 
| 195 | 
            +
                  "epoch": 2.58,
         | 
| 196 | 
            +
                  "learning_rate": 1.4840611756034643e-05,
         | 
| 197 | 
            +
                  "loss": 0.0722,
         | 
| 198 | 
            +
                  "step": 14000
         | 
| 199 | 
            +
                },
         | 
| 200 | 
            +
                {
         | 
| 201 | 
            +
                  "epoch": 2.67,
         | 
| 202 | 
            +
                  "learning_rate": 1.4656347890178736e-05,
         | 
| 203 | 
            +
                  "loss": 0.0751,
         | 
| 204 | 
            +
                  "step": 14500
         | 
| 205 | 
            +
                },
         | 
| 206 | 
            +
                {
         | 
| 207 | 
            +
                  "epoch": 2.76,
         | 
| 208 | 
            +
                  "learning_rate": 1.4472084024322832e-05,
         | 
| 209 | 
            +
                  "loss": 0.0727,
         | 
| 210 | 
            +
                  "step": 15000
         | 
| 211 | 
            +
                },
         | 
| 212 | 
            +
                {
         | 
| 213 | 
            +
                  "epoch": 2.86,
         | 
| 214 | 
            +
                  "learning_rate": 1.4287820158466926e-05,
         | 
| 215 | 
            +
                  "loss": 0.0735,
         | 
| 216 | 
            +
                  "step": 15500
         | 
| 217 | 
            +
                },
         | 
| 218 | 
            +
                {
         | 
| 219 | 
            +
                  "epoch": 2.95,
         | 
| 220 | 
            +
                  "learning_rate": 1.4103556292611021e-05,
         | 
| 221 | 
            +
                  "loss": 0.0714,
         | 
| 222 | 
            +
                  "step": 16000
         | 
| 223 | 
            +
                },
         | 
| 224 | 
            +
                {
         | 
| 225 | 
            +
                  "epoch": 3.0,
         | 
| 226 | 
            +
                  "eval_accuracy": 0.47475119793586434,
         | 
| 227 | 
            +
                  "eval_f1": 0.5862595419847328,
         | 
| 228 | 
            +
                  "eval_loss": 0.0838962271809578,
         | 
| 229 | 
            +
                  "eval_roc_auc": 0.7506773514396311,
         | 
| 230 | 
            +
                  "eval_runtime": 11.8261,
         | 
| 231 | 
            +
                  "eval_samples_per_second": 458.814,
         | 
| 232 | 
            +
                  "eval_steps_per_second": 57.415,
         | 
| 233 | 
            +
                  "step": 16281
         | 
| 234 | 
            +
                }
         | 
| 235 | 
            +
              ],
         | 
| 236 | 
            +
              "max_steps": 54270,
         | 
| 237 | 
            +
              "num_train_epochs": 10,
         | 
| 238 | 
            +
              "total_flos": 8568237917583360.0,
         | 
| 239 | 
            +
              "trial_name": null,
         | 
| 240 | 
            +
              "trial_params": null
         | 
| 241 | 
            +
            }
         | 
    	
        vocab.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
