| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.971563981042654, |
| "eval_steps": 100, |
| "global_step": 104, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.018957345971563982, |
| "grad_norm": 66.08671668865084, |
| "learning_rate": 4.545454545454545e-08, |
| "logits/chosen": 117.53560638427734, |
| "logits/rejected": 126.8960952758789, |
| "logps/chosen": -335.40118408203125, |
| "logps/rejected": -439.16552734375, |
| "loss": 0.5, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.1895734597156398, |
| "grad_norm": 69.395273460518, |
| "learning_rate": 4.545454545454545e-07, |
| "logits/chosen": 135.0714569091797, |
| "logits/rejected": 138.4192657470703, |
| "logps/chosen": -394.4978332519531, |
| "logps/rejected": -438.8009338378906, |
| "loss": 0.4962, |
| "rewards/accuracies": 0.4652777910232544, |
| "rewards/chosen": 0.07493551820516586, |
| "rewards/margins": 0.048605356365442276, |
| "rewards/rejected": 0.026330159977078438, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3791469194312796, |
| "grad_norm": 29.977088974021598, |
| "learning_rate": 4.885348141000122e-07, |
| "logits/chosen": 122.71434020996094, |
| "logits/rejected": 126.32965087890625, |
| "logps/chosen": -353.5599060058594, |
| "logps/rejected": -406.46490478515625, |
| "loss": 0.411, |
| "rewards/accuracies": 0.581250011920929, |
| "rewards/chosen": 0.9426037073135376, |
| "rewards/margins": 0.2662777602672577, |
| "rewards/rejected": 0.676325798034668, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5687203791469194, |
| "grad_norm": 25.169253527598716, |
| "learning_rate": 4.5025027361734613e-07, |
| "logits/chosen": 144.8006134033203, |
| "logits/rejected": 138.1434326171875, |
| "logps/chosen": -380.2307434082031, |
| "logps/rejected": -436.6331481933594, |
| "loss": 0.3519, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.6117245554924011, |
| "rewards/margins": 1.4432713985443115, |
| "rewards/rejected": -0.8315467834472656, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.7582938388625592, |
| "grad_norm": 24.67006837987384, |
| "learning_rate": 3.893311157806091e-07, |
| "logits/chosen": 136.48574829101562, |
| "logits/rejected": 125.43827819824219, |
| "logps/chosen": -322.3842468261719, |
| "logps/rejected": -364.0414733886719, |
| "loss": 0.3425, |
| "rewards/accuracies": 0.706250011920929, |
| "rewards/chosen": 1.5904960632324219, |
| "rewards/margins": 1.8988056182861328, |
| "rewards/rejected": -0.3083093464374542, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.9478672985781991, |
| "grad_norm": 25.00841040201808, |
| "learning_rate": 3.126631330646801e-07, |
| "logits/chosen": 154.9099884033203, |
| "logits/rejected": 158.31307983398438, |
| "logps/chosen": -383.2764892578125, |
| "logps/rejected": -484.3291015625, |
| "loss": 0.3108, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.793367624282837, |
| "rewards/margins": 2.193502902984619, |
| "rewards/rejected": -0.4001353681087494, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.1374407582938388, |
| "grad_norm": 20.059631761558116, |
| "learning_rate": 2.2891223348923882e-07, |
| "logits/chosen": 145.4669647216797, |
| "logits/rejected": 149.66366577148438, |
| "logps/chosen": -360.8850402832031, |
| "logps/rejected": -458.97705078125, |
| "loss": 0.2596, |
| "rewards/accuracies": 0.7437499761581421, |
| "rewards/chosen": 1.8560327291488647, |
| "rewards/margins": 2.9866089820861816, |
| "rewards/rejected": -1.1305763721466064, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.3270142180094786, |
| "grad_norm": 19.000102691830318, |
| "learning_rate": 1.4754491880085317e-07, |
| "logits/chosen": 140.24868774414062, |
| "logits/rejected": 141.07412719726562, |
| "logps/chosen": -329.3720703125, |
| "logps/rejected": -430.41650390625, |
| "loss": 0.2358, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 1.8885892629623413, |
| "rewards/margins": 2.7337210178375244, |
| "rewards/rejected": -0.8451315760612488, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.5165876777251186, |
| "grad_norm": 20.284917774527994, |
| "learning_rate": 7.775827023107834e-08, |
| "logits/chosen": 127.765625, |
| "logits/rejected": 143.33151245117188, |
| "logps/chosen": -311.8875427246094, |
| "logps/rejected": -435.666015625, |
| "loss": 0.2087, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 2.0881741046905518, |
| "rewards/margins": 3.2095909118652344, |
| "rewards/rejected": -1.121416687965393, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.7061611374407581, |
| "grad_norm": 20.77837485504225, |
| "learning_rate": 2.7440387297912122e-08, |
| "logits/chosen": 128.07351684570312, |
| "logits/rejected": 140.38681030273438, |
| "logps/chosen": -341.97064208984375, |
| "logps/rejected": -461.2508850097656, |
| "loss": 0.2054, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 2.22301983833313, |
| "rewards/margins": 3.40147066116333, |
| "rewards/rejected": -1.1784509420394897, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.8957345971563981, |
| "grad_norm": 19.748881150000962, |
| "learning_rate": 2.27878296044029e-09, |
| "logits/chosen": 134.47386169433594, |
| "logits/rejected": 134.50311279296875, |
| "logps/chosen": -334.57818603515625, |
| "logps/rejected": -429.48236083984375, |
| "loss": 0.1946, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 2.3698461055755615, |
| "rewards/margins": 3.005765438079834, |
| "rewards/rejected": -0.6359192728996277, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.8957345971563981, |
| "eval_logits/chosen": 113.83429718017578, |
| "eval_logits/rejected": 108.64144134521484, |
| "eval_logps/chosen": -333.18048095703125, |
| "eval_logps/rejected": -365.2297668457031, |
| "eval_loss": 0.2941707372665405, |
| "eval_rewards/accuracies": 0.6458333134651184, |
| "eval_rewards/chosen": 1.5243864059448242, |
| "eval_rewards/margins": 1.6823266744613647, |
| "eval_rewards/rejected": -0.15794026851654053, |
| "eval_runtime": 116.4001, |
| "eval_samples_per_second": 6.443, |
| "eval_steps_per_second": 0.206, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.971563981042654, |
| "step": 104, |
| "total_flos": 0.0, |
| "train_loss": 0.2986852119748409, |
| "train_runtime": 2293.3212, |
| "train_samples_per_second": 5.887, |
| "train_steps_per_second": 0.045 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 104, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|