diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,29493 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.16, + "eval_steps": 10, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "accuracy_reward": 0.2395833432674408, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.18410088121891022, + "adam_stats/lm_head/lr_effective_max": 4.743410954688443e-06, + "adam_stats/lm_head/lr_effective_mean": -4.643494247967561e-13, + "adam_stats/lm_head/lr_effective_min": -4.7434205043828115e-06, + "adam_stats/lm_head/lr_effective_std": 3.368802765635337e-07, + "adam_stats/lr_effective_max": 4.743410954688443e-06, + "adam_stats/lr_effective_mean": 1.944820693378091e-11, + "adam_stats/lr_effective_min": -4.7434205043828115e-06, + "adam_stats/m_t_max": 0.004091937094926834, + "adam_stats/m_t_mean": -4.856836807731746e-11, + "adam_stats/m_t_min": -0.005426740273833275, + "adam_stats/v_t_max": 2.944911784652504e-06, + "adam_stats/v_t_mean": 3.7637492948661144e-13, + "adam_stats/v_t_min": 0.0, + "advantages": -2.4835269396561444e-09, + "advantages/max": 2.4741740226745605, + "advantages/median": 0.0, + "advantages/min": -2.4741740226745605, + "advantages/p25": -0.3534534275531769, + "advantages/p75": 0.0, + "advantages/var": 0.5891808867454529, + "all_logprobs": -0.3230817914009094, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.0034637451171875, + "all_logprobs/min": -10.625, + "all_logprobs/p1": -4.03125, + "all_logprobs/p10": -1.0625, + "all_logprobs/p25": -0.1943359375, + "all_logprobs/p5": -1.90625, + "all_logprobs/p75": -0.00014591217041015625, + "all_logprobs/var": 0.6337048411369324, + "clip_ratio": 0.0, + "completion_length": 534.1041870117188, + "completion_length/correct": 542.95654296875, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 525.0, + "completion_length/correct/min": 253.0, + "completion_length/correct/p25": 378.0, + "completion_length/correct/p75": 666.0, + "completion_length/correct/var": 47137.31640625, + "completion_length/incorrect": 531.3150634765625, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 407.0, + "completion_length/incorrect/min": 8.0, + "completion_length/incorrect/p25": 299.0, + "completion_length/incorrect/p75": 714.0, + "completion_length/incorrect/var": 86581.1953125, + "completion_length/max": 1024.0, + "completion_length/median": 452.0, + "completion_length/min": 8.0, + "completion_length/p25": 316.75, + "completion_length/p75": 687.75, + "completion_length/var": 76560.3984375, + "epoch": 0.0016, + "feature_vector_variance/max_squared_error": 234783.609375, + "feature_vector_variance/metric": 46119.7734375, + "generated_tokens/total": 51274.0, + "global_fisher_curvature": 104960.0, + "global_fisher_curvature/max": 104960.0, + "global_fisher_curvature/median": 104960.0, + "global_fisher_curvature/min": 104960.0, + "global_fisher_curvature/p25": 104960.0, + "global_fisher_curvature/p75": 104960.0, + "global_fisher_curvature/p85": 104960.0, + "global_fisher_curvature/p90": 104960.0, + "global_fisher_curvature/p95": 104960.0, + "global_fisher_curvature/p99": 104960.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 4704.0, + "global_hessian_coeff/max": 4704.0, + "global_hessian_coeff/median": 4704.0, + "global_hessian_coeff/min": 4704.0, + "global_hessian_coeff/p25": 4704.0, + "global_hessian_coeff/p75": 4704.0, + "global_hessian_coeff/p99": 4704.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 4704.0, + "global_hessian_coeff_abs/max": 4704.0, + "global_hessian_coeff_abs/median": 4704.0, + "global_hessian_coeff_abs/min": 4704.0, + "global_hessian_coeff_abs/p25": 4704.0, + "global_hessian_coeff_abs/p75": 4704.0, + "global_hessian_coeff_abs/p99": 4704.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 1.1157127618789673, + "grouped_std_rewards": 0.2781156599521637, + "learning_rate": 1.5e-06, + "loss": 0.0, + "mean_logprobs": -0.3515625, + "mean_logprobs/var": 0.03955078125, + "num_completions/total": 96, + "per_sentence_gradient_norm": 175.46615600585938, + "per_sentence_gradient_norm/max": 1184.0, + "per_sentence_gradient_norm/median": 102.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 224.0, + "per_sentence_gradient_norm/var": 61929.81640625, + "per_token_feature_norm": 287.3249206542969, + "per_token_feature_norm/max": 432.0, + "per_token_feature_norm/median": 284.0, + "per_token_feature_norm/min": 115.5, + "per_token_feature_norm/p25": 242.0, + "per_token_feature_norm/p75": 334.0, + "per_token_feature_norm/var": 3518.2900390625, + "per_token_gradient_norm": 35.48508071899414, + "per_token_gradient_norm/max": 1408.0, + "per_token_gradient_norm/median": 0.01123046875, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 7.09375, + "per_token_gradient_norm/var": 12838.4501953125, + "per_token_policy_error_norm": 0.14946198463439941, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.05078125, + "per_token_policy_error_norm/var": 0.11736708134412766, + "policy_entropy": 0.36306920647621155, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 0.0262451171875, + "policy_entropy/min": 6.024492904543877e-09, + "policy_entropy/p25": 0.001617431640625, + "policy_entropy/p75": 0.56640625, + "policy_entropy/var": 0.3452393710613251, + "policy_loss": 1.862645149230957e-09, + "policy_loss/max": 2.4741742610931396, + "policy_loss/median": 0.0, + "policy_loss/min": -2.4741742610931396, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.3534534275531769, + "policy_loss/var": 0.5891808867454529, + "policy_sharpness": 4.72580099105835, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 3.230664014816284, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 0.8007653951644897, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 16.39922523498535, + "reward": 0.2395833432674408, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.18410088121891022, + "rewards/accuracy_reward": 0.2395833432674408, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.18410088121891022, + "sentence_fisher_curvature": 18170732.0, + "sentence_fisher_curvature/max": 381681664.0, + "sentence_fisher_curvature/median": 1662976.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 9371648.0, + "sentence_fisher_curvature/p85": 24936448.0, + "sentence_fisher_curvature/p90": 36962304.0, + "sentence_fisher_curvature/p95": 90439680.0, + "sentence_fisher_curvature/p99": 361758784.0, + "sentence_fisher_curvature/var": 3160955269677056.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 91105.0859375, + "sentence_full_gradient_variance/metric": 91105.0859375, + "sentence_full_gradient_variance/p75": 91105.0859375, + "sentence_full_gradient_variance/p90": 91105.0859375, + "sentence_full_gradient_variance/p95": 91105.0859375, + "sentence_full_gradient_variance/p99": 91105.0859375, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 15993454.0, + "sentence_hessian_coeff/max": 784334848.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -180355072.0, + "sentence_hessian_coeff/p25": -708608.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 744489088.0, + "sentence_hessian_coeff/var": 1.29730292482048e+16, + "sentence_hessian_coeff_abs": 25788366.0, + "sentence_hessian_coeff_abs/max": 784334848.0, + "sentence_hessian_coeff_abs/median": 491520.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 4767744.0, + "sentence_hessian_coeff_abs/p99": 744489088.0, + "sentence_hessian_coeff_abs/var": 1.255947221598208e+16, + "step": 1, + "token_fisher_curvature": 1695171328.0, + "token_fisher_curvature/max": 644245094400.0, + "token_fisher_curvature/median": 5.098991096019745e-08, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 6656.0, + "token_fisher_curvature/p85": 21233664.0, + "token_fisher_curvature/p90": 343932928.0, + "token_fisher_curvature/p95": 2785017856.0, + "token_fisher_curvature/p99": 38799409152.0, + "token_fisher_curvature/var": 2.235324203689071e+20, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": -159364592.0, + "token_hessian_coeff/max": 1494648619008.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -1571958030336.0, + "token_hessian_coeff/p25": -2.0081643015146255e-08, + "token_hessian_coeff/p75": 0.0267333984375, + "token_hessian_coeff/p99": 13354663936.0, + "token_hessian_coeff/var": 1.0904892288725166e+21, + "token_hessian_coeff_abs": 2858351616.0, + "token_hessian_coeff_abs/max": 1571958030336.0, + "token_hessian_coeff_abs/median": 0.000278472900390625, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 115712.0, + "token_hessian_coeff_abs/p99": 46976204800.0, + "token_hessian_coeff_abs/var": 1.0823442578401846e+21 + }, + { + "accuracy_reward": 0.1666666716337204, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.14035086333751678, + "adam_stats/lm_head/lr_effective_max": 1.2765966857841704e-05, + "adam_stats/lm_head/lr_effective_mean": 8.830071396292993e-11, + "adam_stats/lm_head/lr_effective_min": -1.2765972314809915e-05, + "adam_stats/lm_head/lr_effective_std": 7.562684913864359e-07, + "adam_stats/lr_effective_max": 1.2765966857841704e-05, + "adam_stats/lr_effective_mean": 9.065007772202094e-11, + "adam_stats/lr_effective_min": -1.2765972314809915e-05, + "adam_stats/m_t_max": 0.006578319240361452, + "adam_stats/m_t_mean": -1.1842138381013e-10, + "adam_stats/m_t_min": -0.010800614953041077, + "adam_stats/v_t_max": 7.520056442444911e-06, + "adam_stats/v_t_mean": 7.258531341085284e-13, + "adam_stats/v_t_min": 0.0, + "advantages": 6.208817460162663e-09, + "advantages/max": 2.4741740226745605, + "advantages/median": 0.0, + "advantages/min": -0.9352393746376038, + "advantages/p25": -0.5399450659751892, + "advantages/p75": 0.0, + "advantages/var": 0.5155580043792725, + "all_logprobs": -0.3400115966796875, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.004913330078125, + "all_logprobs/min": -10.5, + "all_logprobs/p1": -4.09375, + "all_logprobs/p10": -1.125, + "all_logprobs/p25": -0.2265625, + "all_logprobs/p5": -1.9765625, + "all_logprobs/p75": -0.0001506805419921875, + "all_logprobs/var": 0.6706479787826538, + "clip_ratio": 0.0, + "completion_length": 457.5520935058594, + "completion_length/correct": 369.375, + "completion_length/correct/max": 532.0, + "completion_length/correct/median": 398.0, + "completion_length/correct/min": 109.0, + "completion_length/correct/p25": 276.5, + "completion_length/correct/p75": 487.75, + "completion_length/correct/var": 20607.849609375, + "completion_length/incorrect": 475.1875, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 381.0, + "completion_length/incorrect/min": 14.0, + "completion_length/incorrect/p25": 233.75, + "completion_length/incorrect/p75": 612.0, + "completion_length/incorrect/var": 88930.71875, + "completion_length/max": 1024.0, + "completion_length/median": 387.0, + "completion_length/min": 14.0, + "completion_length/p25": 233.75, + "completion_length/p75": 540.5, + "completion_length/var": 78778.1875, + "epoch": 0.0032, + "feature_vector_variance/max_squared_error": 209079.40625, + "feature_vector_variance/metric": 47083.49609375, + "generated_tokens/total": 95199.0, + "global_fisher_curvature": 101376.0, + "global_fisher_curvature/max": 101376.0, + "global_fisher_curvature/median": 101376.0, + "global_fisher_curvature/min": 101376.0, + "global_fisher_curvature/p25": 101376.0, + "global_fisher_curvature/p75": 101376.0, + "global_fisher_curvature/p85": 101376.0, + "global_fisher_curvature/p90": 101376.0, + "global_fisher_curvature/p95": 101376.0, + "global_fisher_curvature/p99": 101376.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 1.1408701539039612e-07, + "global_fisher_kl_divergence/max": 1.1408701539039612e-07, + "global_fisher_kl_divergence/median": 1.1408701539039612e-07, + "global_fisher_kl_divergence/min": 1.1408701539039612e-07, + "global_fisher_kl_divergence/p25": 1.1408701539039612e-07, + "global_fisher_kl_divergence/p75": 1.1408701539039612e-07, + "global_fisher_kl_divergence/p85": 1.1408701539039612e-07, + "global_fisher_kl_divergence/p90": 1.1408701539039612e-07, + "global_fisher_kl_divergence/p95": 1.1408701539039612e-07, + "global_fisher_kl_divergence/p99": 1.1408701539039612e-07, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.15625, + "global_full_update_term/max": 0.15625, + "global_full_update_term/median": 0.15625, + "global_full_update_term/min": 0.15625, + "global_full_update_term/p25": 0.15625, + "global_full_update_term/p75": 0.15625, + "global_full_update_term/p85": 0.15625, + "global_full_update_term/p90": 0.15625, + "global_full_update_term/p95": 0.15625, + "global_full_update_term/p99": 0.15625, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 12544.0, + "global_hessian_coeff/max": 12544.0, + "global_hessian_coeff/median": 12544.0, + "global_hessian_coeff/min": 12544.0, + "global_hessian_coeff/p25": 12544.0, + "global_hessian_coeff/p75": 12544.0, + "global_hessian_coeff/p99": 12544.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 12544.0, + "global_hessian_coeff_abs/max": 12544.0, + "global_hessian_coeff_abs/median": 12544.0, + "global_hessian_coeff_abs/min": 12544.0, + "global_hessian_coeff_abs/p25": 12544.0, + "global_hessian_coeff_abs/p75": 12544.0, + "global_hessian_coeff_abs/p99": 12544.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 1.166612982749939, + "grouped_std_rewards": 0.26687896251678467, + "learning_rate": 3e-06, + "loss": -0.0, + "mean_logprobs": -0.357421875, + "mean_logprobs/var": 0.0294189453125, + "num_completions/total": 192, + "per_sentence_gradient_norm": 155.1171875, + "per_sentence_gradient_norm/max": 764.0, + "per_sentence_gradient_norm/median": 92.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 218.0, + "per_sentence_gradient_norm/var": 39728.4453125, + "per_token_feature_norm": 294.16943359375, + "per_token_feature_norm/max": 430.0, + "per_token_feature_norm/median": 298.0, + "per_token_feature_norm/min": 117.5, + "per_token_feature_norm/p25": 249.0, + "per_token_feature_norm/p75": 338.0, + "per_token_feature_norm/var": 3393.75244140625, + "per_token_gradient_norm": 22.017518997192383, + "per_token_gradient_norm/max": 1072.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.302734375, + "per_token_gradient_norm/var": 5227.6533203125, + "per_token_policy_error_norm": 0.1560748964548111, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.06640625, + "per_token_policy_error_norm/var": 0.12134422361850739, + "policy_entropy": 0.3835253119468689, + "policy_entropy/max": 3.828125, + "policy_entropy/median": 0.035888671875, + "policy_entropy/min": 5.2386894822120667e-08, + "policy_entropy/p25": 0.0016632080078125, + "policy_entropy/p75": 0.609375, + "policy_entropy/var": 0.364763081073761, + "policy_loss": -1.614292521878724e-08, + "policy_loss/max": 0.9352394342422485, + "policy_loss/median": 0.0, + "policy_loss/min": -2.4741742610931396, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.5399450063705444, + "policy_loss/var": 0.5155580043792725, + "policy_sharpness": 4.610587120056152, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 2.861328125, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 0.6869634389877319, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 16.700960159301758, + "reward": 0.1666666716337204, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.14035086333751678, + "rewards/accuracy_reward": 0.1666666716337204, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.14035086333751678, + "sentence_fisher_curvature": 19954582.0, + "sentence_fisher_curvature/max": 312475648.0, + "sentence_fisher_curvature/median": 2342912.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 17268736.0, + "sentence_fisher_curvature/p85": 28803072.0, + "sentence_fisher_curvature/p90": 58720256.0, + "sentence_fisher_curvature/p95": 92667904.0, + "sentence_fisher_curvature/p99": 209872816.0, + "sentence_fisher_curvature/var": 2163443343818752.0, + "sentence_fisher_kl_divergence": 2.2432923287851736e-05, + "sentence_fisher_kl_divergence/max": 0.0003509521484375, + "sentence_fisher_kl_divergence/median": 2.637505531311035e-06, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 1.9431114196777344e-05, + "sentence_fisher_kl_divergence/p85": 3.248453140258789e-05, + "sentence_fisher_kl_divergence/p90": 6.604194641113281e-05, + "sentence_fisher_kl_divergence/p95": 0.00010406970977783203, + "sentence_fisher_kl_divergence/p99": 0.00023589171178173274, + "sentence_fisher_kl_divergence/var": 2.7313251660388005e-09, + "sentence_full_gradient_variance/max_squared_error": 62667.94921875, + "sentence_full_gradient_variance/metric": 62667.94921875, + "sentence_full_gradient_variance/p75": 62667.94921875, + "sentence_full_gradient_variance/p90": 62667.94921875, + "sentence_full_gradient_variance/p95": 62667.94921875, + "sentence_full_gradient_variance/p99": 62667.94921875, + "sentence_full_update_term": 0.09275754541158676, + "sentence_full_update_term/max": 0.8671875, + "sentence_full_update_term/median": 0.0126953125, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.068115234375, + "sentence_full_update_term/p85": 0.18408203125, + "sentence_full_update_term/p90": 0.3388671875, + "sentence_full_update_term/p95": 0.578125, + "sentence_full_update_term/p99": 0.703906774520874, + "sentence_full_update_term/var": 0.03314317390322685, + "sentence_hessian_coeff": 14471702.0, + "sentence_hessian_coeff/max": 320864256.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -55050240.0, + "sentence_hessian_coeff/p25": -1959936.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 268068624.0, + "sentence_hessian_coeff/var": 3062666553720832.0, + "sentence_hessian_coeff_abs": 19661632.0, + "sentence_hessian_coeff_abs/max": 320864256.0, + "sentence_hessian_coeff_abs/median": 544768.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 7979008.0, + "sentence_hessian_coeff_abs/p99": 268068624.0, + "sentence_hessian_coeff_abs/var": 2883652316823552.0, + "step": 2, + "token_fisher_curvature": 612268416.0, + "token_fisher_curvature/max": 217969590272.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.009521484375, + "token_fisher_curvature/p85": 153600.0, + "token_fisher_curvature/p90": 38273024.0, + "token_fisher_curvature/p95": 1325400064.0, + "token_fisher_curvature/p99": 15502147584.0, + "token_fisher_curvature/var": 2.4442070917693047e+19, + "token_fisher_kl_divergence": 0.0006889690994285047, + "token_fisher_kl_divergence/max": 0.2451171875, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 1.071365218763276e-14, + "token_fisher_kl_divergence/p85": 1.73225998878479e-07, + "token_fisher_kl_divergence/p90": 4.315376281738281e-05, + "token_fisher_kl_divergence/p95": 0.00148773193359375, + "token_fisher_kl_divergence/p99": 0.0174560546875, + "token_fisher_kl_divergence/var": 3.0952898669056594e-05, + "token_full_update_term": 0.008964319713413715, + "token_full_update_term/max": 2.21875, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 1.3783574104309082e-07, + "token_full_update_term/p85": 0.00040435791015625, + "token_full_update_term/p90": 0.0062255859375, + "token_full_update_term/p95": 0.03662109375, + "token_full_update_term/p99": 0.20777511596679688, + "token_full_update_term/var": 0.0032429599668830633, + "token_hessian_coeff": 351424960.0, + "token_hessian_coeff/max": 459561500672.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -59861106688.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 4.857778549194336e-06, + "token_hessian_coeff/p99": 7701528576.0, + "token_hessian_coeff/var": 5.705219821385495e+19, + "token_hessian_coeff_abs": 604970880.0, + "token_hessian_coeff_abs/max": 459561500672.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 6.8125, + "token_hessian_coeff_abs/p99": 11846287360.0, + "token_hessian_coeff_abs/var": 5.680969432727919e+19 + }, + { + "accuracy_reward": 0.1979166716337204, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.16041667759418488, + "adam_stats/lm_head/lr_effective_max": 2.2339620045386255e-05, + "adam_stats/lm_head/lr_effective_mean": 8.774812820799838e-11, + "adam_stats/lm_head/lr_effective_min": -2.2354339307639748e-05, + "adam_stats/lm_head/lr_effective_std": 1.1202168934687506e-06, + "adam_stats/lr_effective_max": 2.235354986623861e-05, + "adam_stats/lr_effective_mean": -7.064243634502532e-11, + "adam_stats/lr_effective_min": -2.2354339307639748e-05, + "adam_stats/m_t_max": 0.007818096317350864, + "adam_stats/m_t_mean": -1.0030586083953352e-10, + "adam_stats/m_t_min": -0.011020811274647713, + "adam_stats/v_t_max": 8.848381185089238e-06, + "adam_stats/v_t_mean": 1.1200842770647812e-12, + "adam_stats/v_t_min": 0.0, + "advantages": -6.208817349140361e-10, + "advantages/max": 2.4741740226745605, + "advantages/median": 0.0, + "advantages/min": -1.2073814868927002, + "advantages/p25": -0.3534534275531769, + "advantages/p75": 0.0, + "advantages/var": 0.5155614018440247, + "all_logprobs": -0.4081622064113617, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.00897216796875, + "all_logprobs/min": -10.4375, + "all_logprobs/p1": -4.46875, + "all_logprobs/p10": -1.359375, + "all_logprobs/p25": -0.345703125, + "all_logprobs/p5": -2.265625, + "all_logprobs/p75": -0.00014209747314453125, + "all_logprobs/var": 0.8185628652572632, + "clip_ratio": 0.0, + "completion_length": 388.3958435058594, + "completion_length/correct": 299.52630615234375, + "completion_length/correct/max": 552.0, + "completion_length/correct/median": 254.0, + "completion_length/correct/min": 98.0, + "completion_length/correct/p25": 198.0, + "completion_length/correct/p75": 395.0, + "completion_length/correct/var": 18420.375, + "completion_length/incorrect": 410.3246765136719, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 345.0, + "completion_length/incorrect/min": 2.0, + "completion_length/incorrect/p25": 223.0, + "completion_length/incorrect/p75": 540.0, + "completion_length/incorrect/var": 69262.8828125, + "completion_length/max": 1024.0, + "completion_length/median": 325.0, + "completion_length/min": 2.0, + "completion_length/p25": 222.75, + "completion_length/p75": 512.25, + "completion_length/var": 60869.80078125, + "epoch": 0.0048, + "feature_vector_variance/max_squared_error": 221999.203125, + "feature_vector_variance/metric": 48507.8515625, + "generated_tokens/total": 132485.0, + "global_fisher_curvature": 847872.0, + "global_fisher_curvature/max": 847872.0, + "global_fisher_curvature/median": 847872.0, + "global_fisher_curvature/min": 847872.0, + "global_fisher_curvature/p25": 847872.0, + "global_fisher_curvature/p75": 847872.0, + "global_fisher_curvature/p85": 847872.0, + "global_fisher_curvature/p90": 847872.0, + "global_fisher_curvature/p95": 847872.0, + "global_fisher_curvature/p99": 847872.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 3.814697265625e-06, + "global_fisher_kl_divergence/max": 3.814697265625e-06, + "global_fisher_kl_divergence/median": 3.814697265625e-06, + "global_fisher_kl_divergence/min": 3.814697265625e-06, + "global_fisher_kl_divergence/p25": 3.814697265625e-06, + "global_fisher_kl_divergence/p75": 3.814697265625e-06, + "global_fisher_kl_divergence/p85": 3.814697265625e-06, + "global_fisher_kl_divergence/p90": 3.814697265625e-06, + "global_fisher_kl_divergence/p95": 3.814697265625e-06, + "global_fisher_kl_divergence/p99": 3.814697265625e-06, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 2.734375, + "global_full_update_term/max": 2.734375, + "global_full_update_term/median": 2.734375, + "global_full_update_term/min": 2.734375, + "global_full_update_term/p25": 2.734375, + "global_full_update_term/p75": 2.734375, + "global_full_update_term/p85": 2.734375, + "global_full_update_term/p90": 2.734375, + "global_full_update_term/p95": 2.734375, + "global_full_update_term/p99": 2.734375, + "global_full_update_term/var": NaN, + "global_hessian_coeff": -630784.0, + "global_hessian_coeff/max": -630784.0, + "global_hessian_coeff/median": -630784.0, + "global_hessian_coeff/min": -630784.0, + "global_hessian_coeff/p25": -630784.0, + "global_hessian_coeff/p75": -630784.0, + "global_hessian_coeff/p99": -630784.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 630784.0, + "global_hessian_coeff_abs/max": 630784.0, + "global_hessian_coeff_abs/median": 630784.0, + "global_hessian_coeff_abs/min": 630784.0, + "global_hessian_coeff_abs/p25": 630784.0, + "global_hessian_coeff_abs/p75": 630784.0, + "global_hessian_coeff_abs/p99": 630784.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 6.346400260925293, + "grouped_std_rewards": 0.2714322507381439, + "learning_rate": 4.5e-06, + "loss": -0.0, + "mean_logprobs": -0.443359375, + "mean_logprobs/var": 0.26171875, + "num_completions/total": 288, + "per_sentence_gradient_norm": 172.9296875, + "per_sentence_gradient_norm/max": 1208.0, + "per_sentence_gradient_norm/median": 101.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 294.5, + "per_sentence_gradient_norm/var": 44032.375, + "per_token_feature_norm": 294.0665283203125, + "per_token_feature_norm/max": 436.0, + "per_token_feature_norm/median": 296.0, + "per_token_feature_norm/min": 109.5, + "per_token_feature_norm/p25": 246.25, + "per_token_feature_norm/p75": 342.0, + "per_token_feature_norm/var": 3591.009765625, + "per_token_gradient_norm": 25.531064987182617, + "per_token_gradient_norm/max": 1144.0, + "per_token_gradient_norm/median": 0.000667572021484375, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 1.1640625, + "per_token_gradient_norm/var": 6130.5634765625, + "per_token_policy_error_norm": 0.18340852856636047, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.12890625, + "per_token_policy_error_norm/var": 0.1380734145641327, + "policy_entropy": 0.45374614000320435, + "policy_entropy/max": 3.671875, + "policy_entropy/median": 0.0595703125, + "policy_entropy/min": 8.475035429000854e-08, + "policy_entropy/p25": 0.00156402587890625, + "policy_entropy/p75": 0.734375, + "policy_entropy/var": 0.44493338465690613, + "policy_loss": -4.346172310931706e-09, + "policy_loss/max": 1.2073814868927002, + "policy_loss/median": 0.0, + "policy_loss/min": -2.4741742610931396, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.35345345735549927, + "policy_loss/var": 0.5155614614486694, + "policy_sharpness": 4.379619121551514, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 2.512500047683716, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 0.5241957306861877, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 17.003019332885742, + "reward": 0.1979166716337204, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.16041667759418488, + "rewards/accuracy_reward": 0.1979166716337204, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.16041667759418488, + "sentence_fisher_curvature": 112750744.0, + "sentence_fisher_curvature/max": 8388608000.0, + "sentence_fisher_curvature/median": 3375104.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 27787264.0, + "sentence_fisher_curvature/p85": 62128128.0, + "sentence_fisher_curvature/p90": 85721088.0, + "sentence_fisher_curvature/p95": 118489088.0, + "sentence_fisher_curvature/p99": 710330048.0, + "sentence_fisher_curvature/var": 7.310046707297812e+17, + "sentence_fisher_kl_divergence": 0.0005083703435957432, + "sentence_fisher_kl_divergence/max": 0.037841796875, + "sentence_fisher_kl_divergence/median": 1.519918441772461e-05, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0001251697540283203, + "sentence_fisher_kl_divergence/p85": 0.00027942657470703125, + "sentence_fisher_kl_divergence/p90": 0.00038623809814453125, + "sentence_fisher_kl_divergence/p95": 0.0005321502685546875, + "sentence_fisher_kl_divergence/p99": 0.0032040756195783615, + "sentence_fisher_kl_divergence/var": 1.4875973647576757e-05, + "sentence_full_gradient_variance/max_squared_error": 72626.3828125, + "sentence_full_gradient_variance/metric": 72626.3828125, + "sentence_full_gradient_variance/p75": 72626.3828125, + "sentence_full_gradient_variance/p90": 72626.3828125, + "sentence_full_gradient_variance/p95": 72626.3828125, + "sentence_full_gradient_variance/p99": 72626.3828125, + "sentence_full_update_term": 0.21272659301757812, + "sentence_full_update_term/max": 4.0, + "sentence_full_update_term/median": 0.0301513671875, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.255859375, + "sentence_full_update_term/p85": 0.443359375, + "sentence_full_update_term/p90": 0.603515625, + "sentence_full_update_term/p95": 0.763671875, + "sentence_full_update_term/p99": 1.6324294805526733, + "sentence_full_update_term/var": 0.22978171706199646, + "sentence_hessian_coeff": -79249552.0, + "sentence_hessian_coeff/max": 211812352.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -7683964928.0, + "sentence_hessian_coeff/p25": -1783808.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 153039856.0, + "sentence_hessian_coeff/var": 6.17796410913325e+17, + "sentence_hessian_coeff_abs": 100917048.0, + "sentence_hessian_coeff_abs/max": 7683964928.0, + "sentence_hessian_coeff_abs/median": 966656.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 17072128.0, + "sentence_hessian_coeff_abs/p99": 675095744.0, + "sentence_hessian_coeff_abs/var": 6.13851569351295e+17, + "step": 3, + "token_fisher_curvature": 712397184.0, + "token_fisher_curvature/max": 283467841536.0, + "token_fisher_curvature/median": 4.4941828036826337e-13, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 4.0625, + "token_fisher_curvature/p85": 1808384.0, + "token_fisher_curvature/p90": 125304832.0, + "token_fisher_curvature/p95": 1937768448.0, + "token_fisher_curvature/p99": 19730006016.0, + "token_fisher_curvature/var": 2.6981458992739385e+19, + "token_fisher_kl_divergence": 0.003206813707947731, + "token_fisher_kl_divergence/max": 1.2734375, + "token_fisher_kl_divergence/median": 2.016252743098005e-24, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 1.830358087318018e-11, + "token_fisher_kl_divergence/p85": 8.13603401184082e-06, + "token_fisher_kl_divergence/p90": 0.0005645751953125, + "token_fisher_kl_divergence/p95": 0.00872802734375, + "token_fisher_kl_divergence/p99": 0.0888671875, + "token_fisher_kl_divergence/var": 0.0005466605653055012, + "token_full_update_term": 0.02061985619366169, + "token_full_update_term/max": 6.9375, + "token_full_update_term/median": 1.3358203432289883e-12, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 4.0531158447265625e-06, + "token_full_update_term/p85": 0.002716064453125, + "token_full_update_term/p90": 0.0205078125, + "token_full_update_term/p95": 0.0838623046875, + "token_full_update_term/p99": 0.453125, + "token_full_update_term/var": 0.01765078492462635, + "token_hessian_coeff": 61220004.0, + "token_hessian_coeff/max": 661424963584.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -143881404416.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 7.236003875732422e-05, + "token_hessian_coeff/p99": 2837839872.0, + "token_hessian_coeff/var": 7.028832149916195e+19, + "token_hessian_coeff_abs": 712787008.0, + "token_hessian_coeff_abs/max": 661424963584.0, + "token_hessian_coeff_abs/median": 4.516914486885071e-08, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 500.0, + "token_hessian_coeff_abs/p99": 17870880768.0, + "token_hessian_coeff_abs/var": 6.9783993107687145e+19 + }, + { + "accuracy_reward": 0.1875, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.1539473831653595, + "adam_stats/lm_head/lr_effective_max": 3.284503327449784e-05, + "adam_stats/lm_head/lr_effective_mean": 1.5552928389617193e-10, + "adam_stats/lm_head/lr_effective_min": -3.280876626376994e-05, + "adam_stats/lm_head/lr_effective_std": 1.5815307961020153e-06, + "adam_stats/lr_effective_max": 3.284503327449784e-05, + "adam_stats/lr_effective_mean": -2.1299735586399748e-10, + "adam_stats/lr_effective_min": -3.284041304141283e-05, + "adam_stats/m_t_max": 0.009930035099387169, + "adam_stats/m_t_mean": -3.821267260950556e-11, + "adam_stats/m_t_min": -0.008541584014892578, + "adam_stats/v_t_max": 8.840019290801138e-06, + "adam_stats/v_t_mean": 1.5360395247410175e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 2.0489096641540527e-08, + "advantages/max": 2.4741740226745605, + "advantages/median": 0.0, + "advantages/min": -0.7244288325309753, + "advantages/p25": -0.7244288325309753, + "advantages/p75": 0.0, + "advantages/var": 0.5155736804008484, + "all_logprobs": -0.31797587871551514, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.0031280517578125, + "all_logprobs/min": -11.3125, + "all_logprobs/p1": -4.0, + "all_logprobs/p10": -1.0546875, + "all_logprobs/p25": -0.177734375, + "all_logprobs/p5": -1.875, + "all_logprobs/p75": -0.00013256072998046875, + "all_logprobs/var": 0.6444713473320007, + "clip_ratio": 0.0, + "completion_length": 426.38543701171875, + "completion_length/correct": 430.8333435058594, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 407.0, + "completion_length/correct/min": 92.0, + "completion_length/correct/p25": 236.0, + "completion_length/correct/p75": 559.0, + "completion_length/correct/var": 59180.1484375, + "completion_length/incorrect": 425.3589782714844, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 319.0, + "completion_length/incorrect/min": 60.0, + "completion_length/incorrect/p25": 218.0, + "completion_length/incorrect/p75": 541.25, + "completion_length/incorrect/var": 81218.8046875, + "completion_length/max": 1024.0, + "completion_length/median": 345.0, + "completion_length/min": 60.0, + "completion_length/p25": 216.75, + "completion_length/p75": 561.75, + "completion_length/var": 76424.71875, + "epoch": 0.0064, + "feature_vector_variance/max_squared_error": 208599.203125, + "feature_vector_variance/metric": 46142.8359375, + "generated_tokens/total": 173418.0, + "global_fisher_curvature": 1892352.0, + "global_fisher_curvature/max": 1892352.0, + "global_fisher_curvature/median": 1892352.0, + "global_fisher_curvature/min": 1892352.0, + "global_fisher_curvature/p25": 1892352.0, + "global_fisher_curvature/p75": 1892352.0, + "global_fisher_curvature/p85": 1892352.0, + "global_fisher_curvature/p90": 1892352.0, + "global_fisher_curvature/p95": 1892352.0, + "global_fisher_curvature/p99": 1892352.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 1.919269561767578e-05, + "global_fisher_kl_divergence/max": 1.919269561767578e-05, + "global_fisher_kl_divergence/median": 1.919269561767578e-05, + "global_fisher_kl_divergence/min": 1.919269561767578e-05, + "global_fisher_kl_divergence/p25": 1.919269561767578e-05, + "global_fisher_kl_divergence/p75": 1.919269561767578e-05, + "global_fisher_kl_divergence/p85": 1.919269561767578e-05, + "global_fisher_kl_divergence/p90": 1.919269561767578e-05, + "global_fisher_kl_divergence/p95": 1.919269561767578e-05, + "global_fisher_kl_divergence/p99": 1.919269561767578e-05, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 2.34375, + "global_full_update_term/max": 2.34375, + "global_full_update_term/median": 2.34375, + "global_full_update_term/min": 2.34375, + "global_full_update_term/p25": 2.34375, + "global_full_update_term/p75": 2.34375, + "global_full_update_term/p85": 2.34375, + "global_full_update_term/p90": 2.34375, + "global_full_update_term/p95": 2.34375, + "global_full_update_term/p99": 2.34375, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 1728512.0, + "global_hessian_coeff/max": 1728512.0, + "global_hessian_coeff/median": 1728512.0, + "global_hessian_coeff/min": 1728512.0, + "global_hessian_coeff/p25": 1728512.0, + "global_hessian_coeff/p75": 1728512.0, + "global_hessian_coeff/p99": 1728512.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 1728512.0, + "global_hessian_coeff_abs/max": 1728512.0, + "global_hessian_coeff_abs/median": 1728512.0, + "global_hessian_coeff_abs/min": 1728512.0, + "global_hessian_coeff_abs/p25": 1728512.0, + "global_hessian_coeff_abs/p75": 1728512.0, + "global_hessian_coeff_abs/p99": 1728512.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 1.4005130529403687, + "grouped_std_rewards": 0.283684104681015, + "learning_rate": 6e-06, + "loss": -0.0, + "mean_logprobs": -0.345703125, + "mean_logprobs/var": 0.031005859375, + "num_completions/total": 384, + "per_sentence_gradient_norm": 166.38021850585938, + "per_sentence_gradient_norm/max": 740.0, + "per_sentence_gradient_norm/median": 147.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 275.0, + "per_sentence_gradient_norm/var": 33811.9453125, + "per_token_feature_norm": 286.9299011230469, + "per_token_feature_norm/max": 440.0, + "per_token_feature_norm/median": 286.0, + "per_token_feature_norm/min": 113.5, + "per_token_feature_norm/p25": 238.0, + "per_token_feature_norm/p75": 336.0, + "per_token_feature_norm/var": 3567.454345703125, + "per_token_gradient_norm": 23.826942443847656, + "per_token_gradient_norm/max": 976.0, + "per_token_gradient_norm/median": 0.00144195556640625, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 1.1796875, + "per_token_gradient_norm/var": 5103.2294921875, + "per_token_policy_error_norm": 0.14572298526763916, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.046875, + "per_token_policy_error_norm/var": 0.11488145589828491, + "policy_entropy": 0.35733407735824585, + "policy_entropy/max": 3.671875, + "policy_entropy/median": 0.024169921875, + "policy_entropy/min": 2.3137545213103294e-09, + "policy_entropy/p25": 0.001495361328125, + "policy_entropy/p75": 0.53515625, + "policy_entropy/var": 0.35319751501083374, + "policy_loss": -1.8005570368018198e-08, + "policy_loss/max": 0.7244288921356201, + "policy_loss/median": 0.0, + "policy_loss/min": -2.4741740226745605, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.7244287729263306, + "policy_loss/var": 0.5155736804008484, + "policy_sharpness": 4.806330680847168, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 3.560546875, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 0.7814719676971436, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 16.540964126586914, + "reward": 0.1875, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.1539473831653595, + "rewards/accuracy_reward": 0.1875, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.1539473831653595, + "sentence_fisher_curvature": 19365120.0, + "sentence_fisher_curvature/max": 227540992.0, + "sentence_fisher_curvature/median": 6258688.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 18874368.0, + "sentence_fisher_curvature/p85": 37322752.0, + "sentence_fisher_curvature/p90": 52166656.0, + "sentence_fisher_curvature/p95": 70516736.0, + "sentence_fisher_curvature/p99": 201641248.0, + "sentence_fisher_curvature/var": 1534880114540544.0, + "sentence_fisher_kl_divergence": 0.00019604961562436074, + "sentence_fisher_kl_divergence/max": 0.0023040771484375, + "sentence_fisher_kl_divergence/median": 6.341934204101562e-05, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.00019073486328125, + "sentence_fisher_kl_divergence/p85": 0.000377655029296875, + "sentence_fisher_kl_divergence/p90": 0.00052642822265625, + "sentence_fisher_kl_divergence/p95": 0.000713348388671875, + "sentence_fisher_kl_divergence/p99": 0.002043152693659067, + "sentence_fisher_kl_divergence/var": 1.5755091453684145e-07, + "sentence_full_gradient_variance/max_squared_error": 60506.1171875, + "sentence_full_gradient_variance/metric": 60506.1171875, + "sentence_full_gradient_variance/p75": 60506.1171875, + "sentence_full_gradient_variance/p90": 60506.1171875, + "sentence_full_gradient_variance/p95": 60506.1171875, + "sentence_full_gradient_variance/p99": 60506.1171875, + "sentence_full_update_term": 0.27026623487472534, + "sentence_full_update_term/max": 2.4375, + "sentence_full_update_term/median": 0.09375, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.33349609375, + "sentence_full_update_term/p85": 0.5146484375, + "sentence_full_update_term/p90": 0.615234375, + "sentence_full_update_term/p95": 1.341796875, + "sentence_full_update_term/p99": 2.2445318698883057, + "sentence_full_update_term/var": 0.2162838727235794, + "sentence_hessian_coeff": 10884352.0, + "sentence_hessian_coeff/max": 270532608.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -32899072.0, + "sentence_hessian_coeff/p25": -3805184.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 254594304.0, + "sentence_hessian_coeff/var": 2275716607508480.0, + "sentence_hessian_coeff_abs": 17417644.0, + "sentence_hessian_coeff_abs/max": 270532608.0, + "sentence_hessian_coeff_abs/median": 2056192.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 9568256.0, + "sentence_hessian_coeff_abs/p99": 254594304.0, + "sentence_hessian_coeff_abs/var": 2088864860602368.0, + "step": 4, + "token_fisher_curvature": 588713344.0, + "token_fisher_curvature/max": 161061273600.0, + "token_fisher_curvature/median": 6.650680006714538e-12, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 2.84375, + "token_fisher_curvature/p85": 540672.0, + "token_fisher_curvature/p90": 66060288.0, + "token_fisher_curvature/p95": 1761607680.0, + "token_fisher_curvature/p99": 15324938240.0, + "token_fisher_curvature/var": 1.6521672936310964e+19, + "token_fisher_kl_divergence": 0.005961586721241474, + "token_fisher_kl_divergence/max": 1.6328125, + "token_fisher_kl_divergence/median": 6.741521992307176e-23, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 2.8762769943568856e-11, + "token_fisher_kl_divergence/p85": 5.4836273193359375e-06, + "token_fisher_kl_divergence/p90": 0.000667572021484375, + "token_fisher_kl_divergence/p95": 0.017822265625, + "token_fisher_kl_divergence/p99": 0.15496063232421875, + "token_fisher_kl_divergence/var": 0.001694616163149476, + "token_full_update_term": 0.02777189016342163, + "token_full_update_term/max": 7.90625, + "token_full_update_term/median": 9.379164112033322e-12, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 6.22868537902832e-06, + "token_full_update_term/p85": 0.0026092529296875, + "token_full_update_term/p90": 0.0272216796875, + "token_full_update_term/p95": 0.13671875, + "token_full_update_term/p99": 0.631561279296875, + "token_full_update_term/var": 0.02669869363307953, + "token_hessian_coeff": 222416592.0, + "token_hessian_coeff/max": 358629769216.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -35433480192.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.00035858154296875, + "token_hessian_coeff/p99": 6811549696.0, + "token_hessian_coeff/var": 2.566697085150062e+19, + "token_hessian_coeff_abs": 503972800.0, + "token_hessian_coeff_abs/max": 358629769216.0, + "token_hessian_coeff_abs/median": 5.066394805908203e-07, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 544.0, + "token_hessian_coeff_abs/p99": 11252793344.0, + "token_hessian_coeff_abs/var": 2.546244409654824e+19 + }, + { + "accuracy_reward": 0.2395833432674408, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.18410088121891022, + "adam_stats/lm_head/lr_effective_max": 4.3708507291739807e-05, + "adam_stats/lm_head/lr_effective_mean": 2.582696001507401e-10, + "adam_stats/lm_head/lr_effective_min": -4.371169416117482e-05, + "adam_stats/lm_head/lr_effective_std": 1.92399511433905e-06, + "adam_stats/lr_effective_max": 4.3817286496050656e-05, + "adam_stats/lr_effective_mean": -2.5109556101021724e-10, + "adam_stats/lr_effective_min": -4.384257044875994e-05, + "adam_stats/m_t_max": 0.012550926767289639, + "adam_stats/m_t_mean": 6.570141652950667e-11, + "adam_stats/m_t_min": -0.009006421081721783, + "adam_stats/v_t_max": 1.0397794540040195e-05, + "adam_stats/v_t_mean": 2.046501640026732e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 1.30385160446167e-08, + "advantages/max": 2.4741740226745605, + "advantages/median": -0.3534534275531769, + "advantages/min": -0.9352393746376038, + "advantages/p25": -0.7244288325309753, + "advantages/p75": 0.0, + "advantages/var": 0.662872850894928, + "all_logprobs": -0.3058839440345764, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.002471923828125, + "all_logprobs/min": -10.25, + "all_logprobs/p1": -3.890625, + "all_logprobs/p10": -1.0078125, + "all_logprobs/p25": -0.1650390625, + "all_logprobs/p5": -1.8359375, + "all_logprobs/p75": -8.58306884765625e-05, + "all_logprobs/var": 0.5939487814903259, + "clip_ratio": 0.0, + "completion_length": 627.5833740234375, + "completion_length/correct": 560.0435180664062, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 552.0, + "completion_length/correct/min": 206.0, + "completion_length/correct/p25": 399.5, + "completion_length/correct/p75": 673.0, + "completion_length/correct/var": 51653.13671875, + "completion_length/incorrect": 648.863037109375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 674.0, + "completion_length/incorrect/min": 17.0, + "completion_length/incorrect/p25": 407.0, + "completion_length/incorrect/p75": 934.0, + "completion_length/incorrect/var": 85729.3984375, + "completion_length/max": 1024.0, + "completion_length/median": 609.0, + "completion_length/min": 17.0, + "completion_length/p25": 405.5, + "completion_length/p75": 868.25, + "completion_length/var": 78388.0, + "epoch": 0.008, + "feature_vector_variance/max_squared_error": 223275.109375, + "feature_vector_variance/metric": 46307.6015625, + "generated_tokens/total": 233666.0, + "global_fisher_curvature": 226304.0, + "global_fisher_curvature/max": 226304.0, + "global_fisher_curvature/median": 226304.0, + "global_fisher_curvature/min": 226304.0, + "global_fisher_curvature/p25": 226304.0, + "global_fisher_curvature/p75": 226304.0, + "global_fisher_curvature/p85": 226304.0, + "global_fisher_curvature/p90": 226304.0, + "global_fisher_curvature/p95": 226304.0, + "global_fisher_curvature/p99": 226304.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 4.082918167114258e-06, + "global_fisher_kl_divergence/max": 4.082918167114258e-06, + "global_fisher_kl_divergence/median": 4.082918167114258e-06, + "global_fisher_kl_divergence/min": 4.082918167114258e-06, + "global_fisher_kl_divergence/p25": 4.082918167114258e-06, + "global_fisher_kl_divergence/p75": 4.082918167114258e-06, + "global_fisher_kl_divergence/p85": 4.082918167114258e-06, + "global_fisher_kl_divergence/p90": 4.082918167114258e-06, + "global_fisher_kl_divergence/p95": 4.082918167114258e-06, + "global_fisher_kl_divergence/p99": 4.082918167114258e-06, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.376953125, + "global_full_update_term/max": 0.376953125, + "global_full_update_term/median": 0.376953125, + "global_full_update_term/min": 0.376953125, + "global_full_update_term/p25": 0.376953125, + "global_full_update_term/p75": 0.376953125, + "global_full_update_term/p85": 0.376953125, + "global_full_update_term/p90": 0.376953125, + "global_full_update_term/p95": 0.376953125, + "global_full_update_term/p99": 0.376953125, + "global_full_update_term/var": NaN, + "global_hessian_coeff": -27264.0, + "global_hessian_coeff/max": -27264.0, + "global_hessian_coeff/median": -27264.0, + "global_hessian_coeff/min": -27264.0, + "global_hessian_coeff/p25": -27264.0, + "global_hessian_coeff/p75": -27264.0, + "global_hessian_coeff/p99": -27264.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 27264.0, + "global_hessian_coeff_abs/max": 27264.0, + "global_hessian_coeff_abs/median": 27264.0, + "global_hessian_coeff_abs/min": 27264.0, + "global_hessian_coeff_abs/p25": 27264.0, + "global_hessian_coeff_abs/p75": 27264.0, + "global_hessian_coeff_abs/p99": 27264.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 5.67471170425415, + "grouped_std_rewards": 0.3576904237270355, + "learning_rate": 7.5e-06, + "loss": -0.0, + "mean_logprobs": -0.330078125, + "mean_logprobs/var": 0.034423828125, + "num_completions/total": 480, + "per_sentence_gradient_norm": 205.984375, + "per_sentence_gradient_norm/max": 784.0, + "per_sentence_gradient_norm/median": 187.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 51.0, + "per_sentence_gradient_norm/p75": 319.0, + "per_sentence_gradient_norm/var": 32147.751953125, + "per_token_feature_norm": 284.9111633300781, + "per_token_feature_norm/max": 426.0, + "per_token_feature_norm/median": 282.0, + "per_token_feature_norm/min": 110.0, + "per_token_feature_norm/p25": 237.0, + "per_token_feature_norm/p75": 332.0, + "per_token_feature_norm/var": 3612.973876953125, + "per_token_gradient_norm": 33.56553649902344, + "per_token_gradient_norm/max": 1168.0, + "per_token_gradient_norm/median": 0.0191650390625, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 10.1875, + "per_token_gradient_norm/var": 7056.6982421875, + "per_token_policy_error_norm": 0.1418100744485855, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.0390625, + "per_token_policy_error_norm/var": 0.11196411401033401, + "policy_entropy": 0.34624743461608887, + "policy_entropy/max": 3.75, + "policy_entropy/median": 0.0196533203125, + "policy_entropy/min": 1.955777406692505e-08, + "policy_entropy/p25": 0.00099945068359375, + "policy_entropy/p75": 0.51171875, + "policy_entropy/var": 0.34143200516700745, + "policy_loss": -2.1730860666480112e-08, + "policy_loss/max": 0.9352393746376038, + "policy_loss/median": 0.3534533977508545, + "policy_loss/min": -2.4741742610931396, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.7244287729263306, + "policy_loss/var": 0.662872850894928, + "policy_sharpness": 4.9966511726379395, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 3.943603515625, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 0.8952881097793579, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 16.807403564453125, + "reward": 0.2395833432674408, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.18410088121891022, + "rewards/accuracy_reward": 0.2395833432674408, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.18410088121891022, + "sentence_fisher_curvature": 19209644.0, + "sentence_fisher_curvature/max": 444596224.0, + "sentence_fisher_curvature/median": 5046272.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 313344.0, + "sentence_fisher_curvature/p75": 18513920.0, + "sentence_fisher_curvature/p85": 35454976.0, + "sentence_fisher_curvature/p90": 49020928.0, + "sentence_fisher_curvature/p95": 62980096.0, + "sentence_fisher_curvature/p99": 130312792.0, + "sentence_fisher_curvature/var": 2418905180012544.0, + "sentence_fisher_kl_divergence": 0.00034562809742055833, + "sentence_fisher_kl_divergence/max": 0.00799560546875, + "sentence_fisher_kl_divergence/median": 9.059906005859375e-05, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 5.632638931274414e-06, + "sentence_fisher_kl_divergence/p75": 0.0003337860107421875, + "sentence_fisher_kl_divergence/p85": 0.0006380081176757812, + "sentence_fisher_kl_divergence/p90": 0.0008831024169921875, + "sentence_fisher_kl_divergence/p95": 0.001132965087890625, + "sentence_fisher_kl_divergence/p99": 0.002342242281883955, + "sentence_fisher_kl_divergence/var": 7.824200451977958e-07, + "sentence_full_gradient_variance/max_squared_error": 73514.453125, + "sentence_full_gradient_variance/metric": 73514.453125, + "sentence_full_gradient_variance/p75": 73514.453125, + "sentence_full_gradient_variance/p90": 73514.453125, + "sentence_full_gradient_variance/p95": 73514.453125, + "sentence_full_gradient_variance/p99": 73514.453125, + "sentence_full_update_term": 0.43184536695480347, + "sentence_full_update_term/max": 3.484375, + "sentence_full_update_term/median": 0.2041015625, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.019683837890625, + "sentence_full_update_term/p75": 0.595703125, + "sentence_full_update_term/p85": 0.810546875, + "sentence_full_update_term/p90": 0.99609375, + "sentence_full_update_term/p95": 1.814453125, + "sentence_full_update_term/p99": 2.816408395767212, + "sentence_full_update_term/var": 0.40632492303848267, + "sentence_hessian_coeff": 2033440.0, + "sentence_hessian_coeff/max": 112721920.0, + "sentence_hessian_coeff/median": -90624.0, + "sentence_hessian_coeff/min": -246415360.0, + "sentence_hessian_coeff/p25": -3252224.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 111227704.0, + "sentence_hessian_coeff/var": 1302724381507584.0, + "sentence_hessian_coeff_abs": 14574304.0, + "sentence_hessian_coeff_abs/max": 246415360.0, + "sentence_hessian_coeff_abs/median": 2441216.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 64512.0, + "sentence_hessian_coeff_abs/p75": 14303232.0, + "sentence_hessian_coeff_abs/p99": 119407000.0, + "sentence_hessian_coeff_abs/var": 1092256421380096.0, + "step": 5, + "token_fisher_curvature": 890725056.0, + "token_fisher_curvature/max": 300647710720.0, + "token_fisher_curvature/median": 2.4586915969848633e-07, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 21632.0, + "token_fisher_curvature/p85": 45088768.0, + "token_fisher_curvature/p90": 578813952.0, + "token_fisher_curvature/p95": 3674210304.0, + "token_fisher_curvature/p99": 22011707392.0, + "token_fisher_curvature/var": 2.7652548113775722e+19, + "token_fisher_kl_divergence": 0.016036363318562508, + "token_fisher_kl_divergence/max": 5.40625, + "token_fisher_kl_divergence/median": 4.4181238528784306e-18, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 3.8929283618927e-07, + "token_fisher_kl_divergence/p85": 0.000812530517578125, + "token_fisher_kl_divergence/p90": 0.01043701171875, + "token_fisher_kl_divergence/p95": 0.06591796875, + "token_fisher_kl_divergence/p99": 0.396484375, + "token_fisher_kl_divergence/var": 0.008962450549006462, + "token_full_update_term": 0.055675920099020004, + "token_full_update_term/max": 21.0, + "token_full_update_term/median": 2.1973391994833946e-09, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.000629425048828125, + "token_full_update_term/p85": 0.02685546875, + "token_full_update_term/p90": 0.09033203125, + "token_full_update_term/p95": 0.263671875, + "token_full_update_term/p99": 1.277587890625, + "token_full_update_term/var": 0.09561020880937576, + "token_hessian_coeff": 365783904.0, + "token_hessian_coeff/max": 708669603840.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -55029268480.0, + "token_hessian_coeff/p25": -7.239577826112509e-10, + "token_hessian_coeff/p75": 0.275390625, + "token_hessian_coeff/p99": 16378757120.0, + "token_hessian_coeff/var": 5.0903025503889785e+19, + "token_hessian_coeff_abs": 766861056.0, + "token_hessian_coeff_abs/max": 708669603840.0, + "token_hessian_coeff_abs/median": 0.001373291015625, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 339968.0, + "token_hessian_coeff_abs/p99": 19193135104.0, + "token_hessian_coeff_abs/var": 5.044874248366483e+19 + }, + { + "accuracy_reward": 0.34375, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 1.0, + "accuracy_reward/var": 0.2279605269432068, + "adam_stats/lm_head/lr_effective_max": 5.449418677017093e-05, + "adam_stats/lm_head/lr_effective_mean": 1.209687350289812e-10, + "adam_stats/lm_head/lr_effective_min": -5.452635377878323e-05, + "adam_stats/lm_head/lr_effective_std": 2.3333261651714565e-06, + "adam_stats/lr_effective_max": 5.500761835719459e-05, + "adam_stats/lr_effective_mean": -7.771058241345941e-10, + "adam_stats/lr_effective_min": -5.505318404175341e-05, + "adam_stats/m_t_max": 0.009390580467879772, + "adam_stats/m_t_mean": 1.1945183557571237e-11, + "adam_stats/m_t_min": -0.009741873480379581, + "adam_stats/v_t_max": 1.1915883078472689e-05, + "adam_stats/v_t_mean": 2.5602642835659273e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 2.4835269396561444e-09, + "advantages/max": 2.4741740226745605, + "advantages/median": -0.3534534275531769, + "advantages/min": -2.4741740226745605, + "advantages/p25": -0.5399450659751892, + "advantages/p75": 0.5399450659751892, + "advantages/var": 0.8101338148117065, + "all_logprobs": -0.310273140668869, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.0025787353515625, + "all_logprobs/min": -12.875, + "all_logprobs/p1": -4.15625, + "all_logprobs/p10": -1.0, + "all_logprobs/p25": -0.1474609375, + "all_logprobs/p5": -1.8671875, + "all_logprobs/p75": -0.0001125335693359375, + "all_logprobs/var": 0.6607945561408997, + "clip_ratio": 0.0, + "completion_length": 678.6771240234375, + "completion_length/correct": 458.06060791015625, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 396.0, + "completion_length/correct/min": 100.0, + "completion_length/correct/p25": 267.0, + "completion_length/correct/p75": 577.0, + "completion_length/correct/var": 64550.3125, + "completion_length/incorrect": 794.2381591796875, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 135.0, + "completion_length/incorrect/p25": 551.5, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 93257.5390625, + "completion_length/max": 1024.0, + "completion_length/median": 715.0, + "completion_length/min": 100.0, + "completion_length/p25": 364.0, + "completion_length/p75": 1024.0, + "completion_length/var": 108369.09375, + "epoch": 0.0096, + "feature_vector_variance/max_squared_error": 221193.984375, + "feature_vector_variance/metric": 47644.125, + "generated_tokens/total": 298819.0, + "global_fisher_curvature": 700416.0, + "global_fisher_curvature/max": 700416.0, + "global_fisher_curvature/median": 700416.0, + "global_fisher_curvature/min": 700416.0, + "global_fisher_curvature/p25": 700416.0, + "global_fisher_curvature/p75": 700416.0, + "global_fisher_curvature/p85": 700416.0, + "global_fisher_curvature/p90": 700416.0, + "global_fisher_curvature/p95": 700416.0, + "global_fisher_curvature/p99": 700416.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 1.9669532775878906e-05, + "global_fisher_kl_divergence/max": 1.9669532775878906e-05, + "global_fisher_kl_divergence/median": 1.9669532775878906e-05, + "global_fisher_kl_divergence/min": 1.9669532775878906e-05, + "global_fisher_kl_divergence/p25": 1.9669532775878906e-05, + "global_fisher_kl_divergence/p75": 1.9669532775878906e-05, + "global_fisher_kl_divergence/p85": 1.9669532775878906e-05, + "global_fisher_kl_divergence/p90": 1.9669532775878906e-05, + "global_fisher_kl_divergence/p95": 1.9669532775878906e-05, + "global_fisher_kl_divergence/p99": 1.9669532775878906e-05, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 3.453125, + "global_full_update_term/max": 3.453125, + "global_full_update_term/median": 3.453125, + "global_full_update_term/min": 3.453125, + "global_full_update_term/p25": 3.453125, + "global_full_update_term/p75": 3.453125, + "global_full_update_term/p85": 3.453125, + "global_full_update_term/p90": 3.453125, + "global_full_update_term/p95": 3.453125, + "global_full_update_term/p99": 3.453125, + "global_full_update_term/var": NaN, + "global_hessian_coeff": -3776.0, + "global_hessian_coeff/max": -3776.0, + "global_hessian_coeff/median": -3776.0, + "global_hessian_coeff/min": -3776.0, + "global_hessian_coeff/p25": -3776.0, + "global_hessian_coeff/p75": -3776.0, + "global_hessian_coeff/p99": -3776.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 3776.0, + "global_hessian_coeff_abs/max": 3776.0, + "global_hessian_coeff_abs/median": 3776.0, + "global_hessian_coeff_abs/min": 3776.0, + "global_hessian_coeff_abs/p25": 3776.0, + "global_hessian_coeff_abs/p75": 3776.0, + "global_hessian_coeff_abs/p99": 3776.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 5.414507865905762, + "grouped_std_rewards": 0.387875497341156, + "learning_rate": 9e-06, + "loss": -0.0, + "mean_logprobs": -0.333984375, + "mean_logprobs/var": 0.03173828125, + "num_completions/total": 576, + "per_sentence_gradient_norm": 218.69271850585938, + "per_sentence_gradient_norm/max": 904.0, + "per_sentence_gradient_norm/median": 144.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 99.5, + "per_sentence_gradient_norm/p75": 276.0, + "per_sentence_gradient_norm/var": 38311.45703125, + "per_token_feature_norm": 279.63519287109375, + "per_token_feature_norm/max": 432.0, + "per_token_feature_norm/median": 272.0, + "per_token_feature_norm/min": 106.0, + "per_token_feature_norm/p25": 235.0, + "per_token_feature_norm/p75": 324.0, + "per_token_feature_norm/var": 3535.54345703125, + "per_token_gradient_norm": 33.23598098754883, + "per_token_gradient_norm/max": 1248.0, + "per_token_gradient_norm/median": 0.09423828125, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.002410888671875, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 15.5625, + "per_token_gradient_norm/var": 7802.81103515625, + "per_token_policy_error_norm": 0.13975562155246735, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.03125, + "per_token_policy_error_norm/var": 0.11284685879945755, + "policy_entropy": 0.34555792808532715, + "policy_entropy/max": 3.765625, + "policy_entropy/median": 0.0205078125, + "policy_entropy/min": 1.5425030142068863e-09, + "policy_entropy/p25": 0.00127410888671875, + "policy_entropy/p75": 0.4765625, + "policy_entropy/var": 0.36185935139656067, + "policy_loss": -1.862645149230957e-08, + "policy_loss/max": 2.4741742610931396, + "policy_loss/median": 0.3534534275531769, + "policy_loss/min": -2.4741744995117188, + "policy_loss/p25": -0.5399450659751892, + "policy_loss/p75": 0.5399450659751892, + "policy_loss/var": 0.8101338148117065, + "policy_sharpness": 4.912707805633545, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 3.75, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 0.8187839984893799, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 16.85114860534668, + "reward": 0.34375, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 1.0, + "reward/var": 0.2279605269432068, + "rewards/accuracy_reward": 0.34375, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 1.0, + "rewards/accuracy_reward/var": 0.2279605269432068, + "sentence_fisher_curvature": 17402412.0, + "sentence_fisher_curvature/max": 177209344.0, + "sentence_fisher_curvature/median": 3522560.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 937984.0, + "sentence_fisher_curvature/p75": 17563648.0, + "sentence_fisher_curvature/p85": 38207488.0, + "sentence_fisher_curvature/p90": 52166656.0, + "sentence_fisher_curvature/p95": 83886080.0, + "sentence_fisher_curvature/p99": 138359728.0, + "sentence_fisher_curvature/var": 949020134473728.0, + "sentence_fisher_kl_divergence": 0.0004892467404715717, + "sentence_fisher_kl_divergence/max": 0.004974365234375, + "sentence_fisher_kl_divergence/median": 9.918212890625e-05, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 2.637505531311035e-05, + "sentence_fisher_kl_divergence/p75": 0.0004935264587402344, + "sentence_fisher_kl_divergence/p85": 0.0010738372802734375, + "sentence_fisher_kl_divergence/p90": 0.00146484375, + "sentence_fisher_kl_divergence/p95": 0.0023651123046875, + "sentence_fisher_kl_divergence/p99": 0.00388718000613153, + "sentence_fisher_kl_divergence/var": 7.495561931136763e-07, + "sentence_full_gradient_variance/max_squared_error": 84802.875, + "sentence_full_gradient_variance/metric": 84802.875, + "sentence_full_gradient_variance/p75": 84802.875, + "sentence_full_gradient_variance/p90": 84802.875, + "sentence_full_gradient_variance/p95": 84802.875, + "sentence_full_gradient_variance/p99": 84802.875, + "sentence_full_update_term": 0.6284942626953125, + "sentence_full_update_term/max": 6.09375, + "sentence_full_update_term/median": 0.150390625, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0711669921875, + "sentence_full_update_term/p75": 0.56640625, + "sentence_full_update_term/p85": 1.162109375, + "sentence_full_update_term/p90": 2.1796875, + "sentence_full_update_term/p95": 3.0390625, + "sentence_full_update_term/p99": 4.490630149841309, + "sentence_full_update_term/var": 1.2057324647903442, + "sentence_hessian_coeff": 5345899.0, + "sentence_hessian_coeff/max": 222298112.0, + "sentence_hessian_coeff/median": -218112.0, + "sentence_hessian_coeff/min": -173015040.0, + "sentence_hessian_coeff/p25": -1128448.0, + "sentence_hessian_coeff/p75": 643072.0, + "sentence_hessian_coeff/p99": 134139368.0, + "sentence_hessian_coeff/var": 1976529856757760.0, + "sentence_hessian_coeff_abs": 18148342.0, + "sentence_hessian_coeff_abs/max": 222298112.0, + "sentence_hessian_coeff_abs/median": 983040.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 238848.0, + "sentence_hessian_coeff_abs/p75": 8470528.0, + "sentence_hessian_coeff_abs/p99": 175479344.0, + "sentence_hessian_coeff_abs/var": 1672579987275776.0, + "step": 6, + "token_fisher_curvature": 918943872.0, + "token_fisher_curvature/max": 397284474880.0, + "token_fisher_curvature/median": 0.0002040863037109375, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 9.913492249324918e-11, + "token_fisher_curvature/p75": 185344.0, + "token_fisher_curvature/p85": 71516160.0, + "token_fisher_curvature/p90": 587202560.0, + "token_fisher_curvature/p95": 2801795072.0, + "token_fisher_curvature/p99": 20535312384.0, + "token_fisher_curvature/var": 4.968351757510482e+19, + "token_fisher_kl_divergence": 0.025836195796728134, + "token_fisher_kl_divergence/max": 11.1875, + "token_fisher_kl_divergence/median": 5.745404152435185e-15, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 2.7925617479790214e-21, + "token_fisher_kl_divergence/p75": 5.21540641784668e-06, + "token_fisher_kl_divergence/p85": 0.0020051002502441406, + "token_fisher_kl_divergence/p90": 0.0164794921875, + "token_fisher_kl_divergence/p95": 0.07861328125, + "token_fisher_kl_divergence/p99": 0.578125, + "token_fisher_kl_divergence/var": 0.03926755487918854, + "token_full_update_term": 0.06973042339086533, + "token_full_update_term/max": 39.0, + "token_full_update_term/median": 6.565824151039124e-08, + "token_full_update_term/min": -10.25, + "token_full_update_term/p25": 4.320099833421409e-11, + "token_full_update_term/p75": 0.0017852783203125, + "token_full_update_term/p85": 0.034912109375, + "token_full_update_term/p90": 0.087890625, + "token_full_update_term/p95": 0.2177734375, + "token_full_update_term/p99": 1.277191162109375, + "token_full_update_term/var": 0.33924543857574463, + "token_hessian_coeff": 103121888.0, + "token_hessian_coeff/max": 970662608896.0, + "token_hessian_coeff/median": 3.546476364135742e-06, + "token_hessian_coeff/min": -725849473024.0, + "token_hessian_coeff/p25": -8.940696716308594e-07, + "token_hessian_coeff/p75": 132.0, + "token_hessian_coeff/p99": 4646633472.0, + "token_hessian_coeff/var": 1.6843415107463335e+20, + "token_hessian_coeff_abs": 1071588544.0, + "token_hessian_coeff_abs/max": 970662608896.0, + "token_hessian_coeff_abs/median": 0.18359375, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 2.4139881134033203e-06, + "token_hessian_coeff_abs/p75": 1040384.0, + "token_hessian_coeff_abs/p99": 19595788288.0, + "token_hessian_coeff_abs/var": 1.6729646440314097e+20 + }, + { + "accuracy_reward": 0.375, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 1.0, + "accuracy_reward/var": 0.236842080950737, + "adam_stats/lm_head/lr_effective_max": 6.609374395338818e-05, + "adam_stats/lm_head/lr_effective_mean": 8.01089056201576e-11, + "adam_stats/lm_head/lr_effective_min": -6.528822268592194e-05, + "adam_stats/lm_head/lr_effective_std": 2.963520500998129e-06, + "adam_stats/lr_effective_max": 6.657531048404053e-05, + "adam_stats/lr_effective_mean": -7.109357547108175e-10, + "adam_stats/lr_effective_min": -6.624061643378809e-05, + "adam_stats/m_t_max": 0.009267074055969715, + "adam_stats/m_t_mean": -6.351098466583771e-12, + "adam_stats/m_t_min": -0.012790011242032051, + "adam_stats/v_t_max": 2.3358268663287163e-05, + "adam_stats/v_t_mean": 3.0467729034144853e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 4.346172310931706e-09, + "advantages/max": 2.4741740226745605, + "advantages/median": 0.0, + "advantages/min": -2.4741740226745605, + "advantages/p25": -0.7244288325309753, + "advantages/p75": 0.7771314382553101, + "advantages/var": 0.7365214228630066, + "all_logprobs": -0.35432255268096924, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.005157470703125, + "all_logprobs/min": -12.0, + "all_logprobs/p1": -4.3125, + "all_logprobs/p10": -1.1796875, + "all_logprobs/p25": -0.2216796875, + "all_logprobs/p5": -2.125, + "all_logprobs/p75": -0.0001544952392578125, + "all_logprobs/var": 0.728594958782196, + "clip_ratio": 0.0, + "completion_length": 782.4583740234375, + "completion_length/correct": 659.75, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 637.0, + "completion_length/correct/min": 118.0, + "completion_length/correct/p25": 471.5, + "completion_length/correct/p75": 952.0, + "completion_length/correct/var": 86263.7890625, + "completion_length/incorrect": 856.0833740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 48.0, + "completion_length/incorrect/p25": 811.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 86164.71875, + "completion_length/max": 1024.0, + "completion_length/median": 1018.0, + "completion_length/min": 48.0, + "completion_length/p25": 560.5, + "completion_length/p75": 1024.0, + "completion_length/var": 94423.7265625, + "epoch": 0.0112, + "feature_vector_variance/max_squared_error": 225481.34375, + "feature_vector_variance/metric": 45955.9140625, + "generated_tokens/total": 373935.0, + "global_fisher_curvature": 569344.0, + "global_fisher_curvature/max": 569344.0, + "global_fisher_curvature/median": 569344.0, + "global_fisher_curvature/min": 569344.0, + "global_fisher_curvature/p25": 569344.0, + "global_fisher_curvature/p75": 569344.0, + "global_fisher_curvature/p85": 569344.0, + "global_fisher_curvature/p90": 569344.0, + "global_fisher_curvature/p95": 569344.0, + "global_fisher_curvature/p99": 569344.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 2.300739288330078e-05, + "global_fisher_kl_divergence/max": 2.300739288330078e-05, + "global_fisher_kl_divergence/median": 2.300739288330078e-05, + "global_fisher_kl_divergence/min": 2.300739288330078e-05, + "global_fisher_kl_divergence/p25": 2.300739288330078e-05, + "global_fisher_kl_divergence/p75": 2.300739288330078e-05, + "global_fisher_kl_divergence/p85": 2.300739288330078e-05, + "global_fisher_kl_divergence/p90": 2.300739288330078e-05, + "global_fisher_kl_divergence/p95": 2.300739288330078e-05, + "global_fisher_kl_divergence/p99": 2.300739288330078e-05, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 5.34375, + "global_full_update_term/max": 5.34375, + "global_full_update_term/median": 5.34375, + "global_full_update_term/min": 5.34375, + "global_full_update_term/p25": 5.34375, + "global_full_update_term/p75": 5.34375, + "global_full_update_term/p85": 5.34375, + "global_full_update_term/p90": 5.34375, + "global_full_update_term/p95": 5.34375, + "global_full_update_term/p99": 5.34375, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 434176.0, + "global_hessian_coeff/max": 434176.0, + "global_hessian_coeff/median": 434176.0, + "global_hessian_coeff/min": 434176.0, + "global_hessian_coeff/p25": 434176.0, + "global_hessian_coeff/p75": 434176.0, + "global_hessian_coeff/p99": 434176.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 434176.0, + "global_hessian_coeff_abs/max": 434176.0, + "global_hessian_coeff_abs/median": 434176.0, + "global_hessian_coeff_abs/min": 434176.0, + "global_hessian_coeff_abs/p25": 434176.0, + "global_hessian_coeff_abs/p75": 434176.0, + "global_hessian_coeff_abs/p99": 434176.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 2.900984525680542, + "grouped_std_rewards": 0.3959498107433319, + "learning_rate": 1.05e-05, + "loss": -0.0, + "mean_logprobs": -0.353515625, + "mean_logprobs/var": 0.0289306640625, + "num_completions/total": 672, + "per_sentence_gradient_norm": 258.4609375, + "per_sentence_gradient_norm/max": 1600.0, + "per_sentence_gradient_norm/median": 216.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 96.5, + "per_sentence_gradient_norm/p75": 365.5, + "per_sentence_gradient_norm/var": 63448.80078125, + "per_token_feature_norm": 280.12603759765625, + "per_token_feature_norm/max": 430.0, + "per_token_feature_norm/median": 272.0, + "per_token_feature_norm/min": 111.0, + "per_token_feature_norm/p25": 235.0, + "per_token_feature_norm/p75": 326.0, + "per_token_feature_norm/var": 3640.689697265625, + "per_token_gradient_norm": 41.74949645996094, + "per_token_gradient_norm/max": 1224.0, + "per_token_gradient_norm/median": 0.1083984375, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.000629425048828125, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 24.125, + "per_token_gradient_norm/var": 9340.259765625, + "per_token_policy_error_norm": 0.1580170840024948, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.06640625, + "per_token_policy_error_norm/var": 0.12329376488924026, + "policy_entropy": 0.4012596607208252, + "policy_entropy/max": 3.78125, + "policy_entropy/median": 0.037353515625, + "policy_entropy/min": 6.239861249923706e-08, + "policy_entropy/p25": 0.00170135498046875, + "policy_entropy/p75": 0.60546875, + "policy_entropy/var": 0.42382556200027466, + "policy_loss": -1.862645149230957e-08, + "policy_loss/max": 2.4741740226745605, + "policy_loss/median": 0.0, + "policy_loss/min": -2.4741742610931396, + "policy_loss/p25": -0.7771314978599548, + "policy_loss/p75": 0.7244287729263306, + "policy_loss/var": 0.7365214824676514, + "policy_sharpness": 4.447429656982422, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 2.723828077316284, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 0.6247279047966003, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 16.591564178466797, + "reward": 0.375, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 1.0, + "reward/var": 0.236842080950737, + "rewards/accuracy_reward": 0.375, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 1.0, + "rewards/accuracy_reward/var": 0.236842080950737, + "sentence_fisher_curvature": 18974720.0, + "sentence_fisher_curvature/max": 272629760.0, + "sentence_fisher_curvature/median": 5931008.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 1167360.0, + "sentence_fisher_curvature/p75": 18251776.0, + "sentence_fisher_curvature/p85": 31817728.0, + "sentence_fisher_curvature/p90": 47972352.0, + "sentence_fisher_curvature/p95": 85065728.0, + "sentence_fisher_curvature/p99": 139146464.0, + "sentence_fisher_curvature/var": 1356159948685312.0, + "sentence_fisher_kl_divergence": 0.0007683771545998752, + "sentence_fisher_kl_divergence/max": 0.01104736328125, + "sentence_fisher_kl_divergence/median": 0.000240325927734375, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 4.7266483306884766e-05, + "sentence_fisher_kl_divergence/p75": 0.0007381439208984375, + "sentence_fisher_kl_divergence/p85": 0.0012874603271484375, + "sentence_fisher_kl_divergence/p90": 0.00194549560546875, + "sentence_fisher_kl_divergence/p95": 0.00344085693359375, + "sentence_fisher_kl_divergence/p99": 0.005625932943075895, + "sentence_fisher_kl_divergence/var": 2.2239678401092533e-06, + "sentence_full_gradient_variance/max_squared_error": 128245.9296875, + "sentence_full_gradient_variance/metric": 128245.9296875, + "sentence_full_gradient_variance/p75": 128245.9296875, + "sentence_full_gradient_variance/p90": 128245.9296875, + "sentence_full_gradient_variance/p95": 128245.9296875, + "sentence_full_gradient_variance/p99": 128245.9296875, + "sentence_full_update_term": 1.1257070302963257, + "sentence_full_update_term/max": 22.75, + "sentence_full_update_term/median": 0.408203125, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.080078125, + "sentence_full_update_term/p75": 1.166015625, + "sentence_full_update_term/p85": 1.56640625, + "sentence_full_update_term/p90": 2.2578125, + "sentence_full_update_term/p95": 3.2265625, + "sentence_full_update_term/p99": 12.596907615661621, + "sentence_full_update_term/var": 7.5454583168029785, + "sentence_hessian_coeff": 8212699.0, + "sentence_hessian_coeff/max": 557842432.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -214958080.0, + "sentence_hessian_coeff/p25": -2789376.0, + "sentence_hessian_coeff/p75": 4767744.0, + "sentence_hessian_coeff/p99": 258003088.0, + "sentence_hessian_coeff/var": 4772843074093056.0, + "sentence_hessian_coeff_abs": 20749020.0, + "sentence_hessian_coeff_abs/max": 557842432.0, + "sentence_hessian_coeff_abs/median": 3866624.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 317952.0, + "sentence_hessian_coeff_abs/p75": 11616256.0, + "sentence_hessian_coeff_abs/p99": 258003088.0, + "sentence_hessian_coeff_abs/var": 4405948982493184.0, + "step": 7, + "token_fisher_curvature": 1198022016.0, + "token_fisher_curvature/max": 367219703808.0, + "token_fisher_curvature/median": 0.000232696533203125, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 3.4638958368304884e-13, + "token_fisher_curvature/p75": 651264.0, + "token_fisher_curvature/p85": 195035136.0, + "token_fisher_curvature/p90": 1207959552.0, + "token_fisher_curvature/p95": 5670699008.0, + "token_fisher_curvature/p99": 24830279680.0, + "token_fisher_curvature/var": 5.77332313180529e+19, + "token_fisher_kl_divergence": 0.04852347820997238, + "token_fisher_kl_divergence/max": 14.875, + "token_fisher_kl_divergence/median": 9.43689570931383e-15, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 1.406207041340147e-23, + "token_fisher_kl_divergence/p75": 2.6345252990722656e-05, + "token_fisher_kl_divergence/p85": 0.00787353515625, + "token_fisher_kl_divergence/p90": 0.048828125, + "token_fisher_kl_divergence/p95": 0.2294921875, + "token_fisher_kl_divergence/p99": 1.0078125, + "token_fisher_kl_divergence/var": 0.09472405165433884, + "token_full_update_term": 0.10488499701023102, + "token_full_update_term/max": 49.25, + "token_full_update_term/median": 1.0291114449501038e-07, + "token_full_update_term/min": -17.0, + "token_full_update_term/p25": 3.417710559006082e-12, + "token_full_update_term/p75": 0.005157470703125, + "token_full_update_term/p85": 0.0791015625, + "token_full_update_term/p90": 0.2041015625, + "token_full_update_term/p95": 0.45751953125, + "token_full_update_term/p99": 1.640625, + "token_full_update_term/var": 0.7254995703697205, + "token_hessian_coeff": 126859104.0, + "token_hessian_coeff/max": 880468295680.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -721554505728.0, + "token_hessian_coeff/p25": -0.00262451171875, + "token_hessian_coeff/p75": 34.0, + "token_hessian_coeff/p99": 10737418240.0, + "token_hessian_coeff/var": 2.122304963665325e+20, + "token_hessian_coeff_abs": 1255848448.0, + "token_hessian_coeff_abs/max": 880468295680.0, + "token_hessian_coeff_abs/median": 0.2734375, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 4.0512531995773315e-08, + "token_hessian_coeff_abs/p75": 3379200.0, + "token_hessian_coeff_abs/p99": 22260219904.0, + "token_hessian_coeff_abs/var": 2.1066938336913706e+20 + }, + { + "accuracy_reward": 0.3333333432674408, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 1.0, + "accuracy_reward/var": 0.224561408162117, + "adam_stats/lm_head/lr_effective_max": 7.767600618535653e-05, + "adam_stats/lm_head/lr_effective_mean": 2.126029491344994e-10, + "adam_stats/lm_head/lr_effective_min": -7.635731890331954e-05, + "adam_stats/lm_head/lr_effective_std": 3.247618451496237e-06, + "adam_stats/lr_effective_max": 7.780094165354967e-05, + "adam_stats/lr_effective_mean": -9.31447030616539e-10, + "adam_stats/lr_effective_min": -7.756172999506816e-05, + "adam_stats/m_t_max": 0.009826255962252617, + "adam_stats/m_t_mean": -1.713791435153933e-11, + "adam_stats/m_t_min": -0.014291796833276749, + "adam_stats/v_t_max": 2.4108177967718802e-05, + "adam_stats/v_t_mean": 3.5640375199708085e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 2.4835269396561444e-09, + "advantages/max": 2.4741740226745605, + "advantages/median": 0.0, + "advantages/min": -2.4741740226745605, + "advantages/p25": -0.3534534275531769, + "advantages/p75": 0.3534534275531769, + "advantages/var": 0.5891907215118408, + "all_logprobs": -0.3296404182910919, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.00335693359375, + "all_logprobs/min": -13.0625, + "all_logprobs/p1": -4.21875, + "all_logprobs/p10": -1.0859375, + "all_logprobs/p25": -0.1806640625, + "all_logprobs/p5": -1.96875, + "all_logprobs/p75": -6.4849853515625e-05, + "all_logprobs/var": 0.6903733611106873, + "clip_ratio": 0.0, + "completion_length": 793.3541870117188, + "completion_length/correct": 670.90625, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 586.0, + "completion_length/correct/min": 167.0, + "completion_length/correct/p25": 365.75, + "completion_length/correct/p75": 1024.0, + "completion_length/correct/var": 99666.796875, + "completion_length/incorrect": 854.578125, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 149.0, + "completion_length/incorrect/p25": 734.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 62822.9765625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 149.0, + "completion_length/p25": 580.75, + "completion_length/p75": 1024.0, + "completion_length/var": 81760.0546875, + "epoch": 0.0128, + "feature_vector_variance/max_squared_error": 206634.109375, + "feature_vector_variance/metric": 45705.35546875, + "generated_tokens/total": 450097.0, + "global_fisher_curvature": 675840.0, + "global_fisher_curvature/max": 675840.0, + "global_fisher_curvature/median": 675840.0, + "global_fisher_curvature/min": 675840.0, + "global_fisher_curvature/p25": 675840.0, + "global_fisher_curvature/p75": 675840.0, + "global_fisher_curvature/p85": 675840.0, + "global_fisher_curvature/p90": 675840.0, + "global_fisher_curvature/p95": 675840.0, + "global_fisher_curvature/p99": 675840.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 3.719329833984375e-05, + "global_fisher_kl_divergence/max": 3.719329833984375e-05, + "global_fisher_kl_divergence/median": 3.719329833984375e-05, + "global_fisher_kl_divergence/min": 3.719329833984375e-05, + "global_fisher_kl_divergence/p25": 3.719329833984375e-05, + "global_fisher_kl_divergence/p75": 3.719329833984375e-05, + "global_fisher_kl_divergence/p85": 3.719329833984375e-05, + "global_fisher_kl_divergence/p90": 3.719329833984375e-05, + "global_fisher_kl_divergence/p95": 3.719329833984375e-05, + "global_fisher_kl_divergence/p99": 3.719329833984375e-05, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 5.0625, + "global_full_update_term/max": 5.0625, + "global_full_update_term/median": 5.0625, + "global_full_update_term/min": 5.0625, + "global_full_update_term/p25": 5.0625, + "global_full_update_term/p75": 5.0625, + "global_full_update_term/p85": 5.0625, + "global_full_update_term/p90": 5.0625, + "global_full_update_term/p95": 5.0625, + "global_full_update_term/p99": 5.0625, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 438272.0, + "global_hessian_coeff/max": 438272.0, + "global_hessian_coeff/median": 438272.0, + "global_hessian_coeff/min": 438272.0, + "global_hessian_coeff/p25": 438272.0, + "global_hessian_coeff/p75": 438272.0, + "global_hessian_coeff/p99": 438272.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 438272.0, + "global_hessian_coeff_abs/max": 438272.0, + "global_hessian_coeff_abs/median": 438272.0, + "global_hessian_coeff_abs/min": 438272.0, + "global_hessian_coeff_abs/p25": 438272.0, + "global_hessian_coeff_abs/p75": 438272.0, + "global_hessian_coeff_abs/p99": 438272.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 5.548678398132324, + "grouped_std_rewards": 0.28722870349884033, + "learning_rate": 1.2e-05, + "loss": -0.0, + "mean_logprobs": -0.32421875, + "mean_logprobs/var": 0.0238037109375, + "num_completions/total": 768, + "per_sentence_gradient_norm": 192.0078125, + "per_sentence_gradient_norm/max": 1248.0, + "per_sentence_gradient_norm/median": 121.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 276.5, + "per_sentence_gradient_norm/var": 55462.8359375, + "per_token_feature_norm": 272.55255126953125, + "per_token_feature_norm/max": 432.0, + "per_token_feature_norm/median": 262.0, + "per_token_feature_norm/min": 109.0, + "per_token_feature_norm/p25": 227.0, + "per_token_feature_norm/p75": 314.0, + "per_token_feature_norm/var": 3565.177978515625, + "per_token_gradient_norm": 27.04145622253418, + "per_token_gradient_norm/max": 1224.0, + "per_token_gradient_norm/median": 0.004791259765625, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 4.34375, + "per_token_gradient_norm/var": 6296.42578125, + "per_token_policy_error_norm": 0.14765873551368713, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.046875, + "per_token_policy_error_norm/var": 0.11676546186208725, + "policy_entropy": 0.37114351987838745, + "policy_entropy/max": 3.78125, + "policy_entropy/median": 0.02587890625, + "policy_entropy/min": 1.076841726899147e-09, + "policy_entropy/p25": 0.00077056884765625, + "policy_entropy/p75": 0.5390625, + "policy_entropy/var": 0.3911415636539459, + "policy_loss": -1.2417634698280722e-09, + "policy_loss/max": 2.4741740226745605, + "policy_loss/median": 0.0, + "policy_loss/min": -2.4741740226745605, + "policy_loss/p25": -0.3534534275531769, + "policy_loss/p75": 0.3534534275531769, + "policy_loss/var": 0.589190661907196, + "policy_sharpness": 4.8581743240356445, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 3.373046875, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 0.7269828915596008, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 17.338909149169922, + "reward": 0.3333333432674408, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 1.0, + "reward/var": 0.224561408162117, + "rewards/accuracy_reward": 0.3333333432674408, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 1.0, + "rewards/accuracy_reward/var": 0.224561408162117, + "sentence_fisher_curvature": 14877446.0, + "sentence_fisher_curvature/max": 379584512.0, + "sentence_fisher_curvature/median": 2244608.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 11796480.0, + "sentence_fisher_curvature/p85": 16990208.0, + "sentence_fisher_curvature/p90": 23068672.0, + "sentence_fisher_curvature/p95": 51314688.0, + "sentence_fisher_curvature/p99": 198286304.0, + "sentence_fisher_curvature/var": 2181423821750272.0, + "sentence_fisher_kl_divergence": 0.0008191289380192757, + "sentence_fisher_kl_divergence/max": 0.0208740234375, + "sentence_fisher_kl_divergence/median": 0.0001239776611328125, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0006513595581054688, + "sentence_fisher_kl_divergence/p85": 0.0009365081787109375, + "sentence_fisher_kl_divergence/p90": 0.001270294189453125, + "sentence_fisher_kl_divergence/p95": 0.0028228759765625, + "sentence_fisher_kl_divergence/p99": 0.010900910943746567, + "sentence_fisher_kl_divergence/var": 6.600628239539219e-06, + "sentence_full_gradient_variance/max_squared_error": 90780.0859375, + "sentence_full_gradient_variance/metric": 90780.0859375, + "sentence_full_gradient_variance/p75": 90780.0859375, + "sentence_full_gradient_variance/p90": 90780.0859375, + "sentence_full_gradient_variance/p95": 90780.0859375, + "sentence_full_gradient_variance/p99": 90780.0859375, + "sentence_full_update_term": 0.9322217702865601, + "sentence_full_update_term/max": 16.0, + "sentence_full_update_term/median": 0.1484375, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.796875, + "sentence_full_update_term/p85": 1.859375, + "sentence_full_update_term/p90": 2.2109375, + "sentence_full_update_term/p95": 4.1015625, + "sentence_full_update_term/p99": 11.250015258789062, + "sentence_full_update_term/var": 4.854344367980957, + "sentence_hessian_coeff": 3074234.75, + "sentence_hessian_coeff/max": 415236096.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -343932928.0, + "sentence_hessian_coeff/p25": -564224.0, + "sentence_hessian_coeff/p75": 660480.0, + "sentence_hessian_coeff/p99": 379374912.0, + "sentence_hessian_coeff/var": 4880142631436288.0, + "sentence_hessian_coeff_abs": 17538908.0, + "sentence_hessian_coeff_abs/max": 415236096.0, + "sentence_hessian_coeff_abs/median": 647168.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 7340032.0, + "sentence_hessian_coeff_abs/p99": 379374912.0, + "sentence_hessian_coeff_abs/var": 4578841548816384.0, + "step": 8, + "token_fisher_curvature": 738231104.0, + "token_fisher_curvature/max": 362924736512.0, + "token_fisher_curvature/median": 1.3096723705530167e-09, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 930.0, + "token_fisher_curvature/p85": 10420224.0, + "token_fisher_curvature/p90": 240123904.0, + "token_fisher_curvature/p95": 2122317824.0, + "token_fisher_curvature/p99": 16508780544.0, + "token_fisher_curvature/var": 3.863683420200213e+19, + "token_fisher_kl_divergence": 0.040698420256376266, + "token_fisher_kl_divergence/max": 20.0, + "token_fisher_kl_divergence/median": 7.199780051661553e-20, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 5.133915692567825e-08, + "token_fisher_kl_divergence/p85": 0.000576019287109375, + "token_fisher_kl_divergence/p90": 0.01324462890625, + "token_fisher_kl_divergence/p95": 0.1171875, + "token_fisher_kl_divergence/p99": 0.91015625, + "token_fisher_kl_divergence/var": 0.11750605702400208, + "token_full_update_term": 0.05165638402104378, + "token_full_update_term/max": 54.0, + "token_full_update_term/median": 2.191882231272757e-10, + "token_full_update_term/min": -31.875, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.00018024444580078125, + "token_full_update_term/p85": 0.0181884765625, + "token_full_update_term/p90": 0.08349609375, + "token_full_update_term/p95": 0.2734375, + "token_full_update_term/p99": 1.0, + "token_full_update_term/var": 0.37063443660736084, + "token_hessian_coeff": -401543232.0, + "token_hessian_coeff/max": 708669603840.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -863288426496.0, + "token_hessian_coeff/p25": -4.0605664253234863e-07, + "token_hessian_coeff/p75": 0.0009899139404296875, + "token_hessian_coeff/p99": 1761607680.0, + "token_hessian_coeff/var": 1.4424163191305011e+20, + "token_hessian_coeff_abs": 835846400.0, + "token_hessian_coeff_abs/max": 863288426496.0, + "token_hessian_coeff_abs/median": 1.9550323486328125e-05, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 26752.0, + "token_hessian_coeff_abs/p99": 12884901888.0, + "token_hessian_coeff_abs/var": 1.4370423460985831e+20 + }, + { + "accuracy_reward": 0.5, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 1.0, + "accuracy_reward/var": 0.2526315748691559, + "adam_stats/lm_head/lr_effective_max": 8.420248195761815e-05, + "adam_stats/lm_head/lr_effective_mean": 2.5204374698439835e-10, + "adam_stats/lm_head/lr_effective_min": -8.372864249395207e-05, + "adam_stats/lm_head/lr_effective_std": 3.7841657558601582e-06, + "adam_stats/lr_effective_max": 8.790422725724056e-05, + "adam_stats/lr_effective_mean": -6.163590748897718e-10, + "adam_stats/lr_effective_min": -8.799870556686074e-05, + "adam_stats/m_t_max": 0.008594420738518238, + "adam_stats/m_t_mean": -1.8543014346228404e-11, + "adam_stats/m_t_min": -0.011300116777420044, + "adam_stats/v_t_max": 2.432820838293992e-05, + "adam_stats/v_t_mean": 3.6665050336115446e-12, + "adam_stats/v_t_min": 0.0, + "advantages": -6.208817349140361e-10, + "advantages/max": 2.4741740226745605, + "advantages/median": 0.0, + "advantages/min": -2.4741740226745605, + "advantages/p25": -0.3534534275531769, + "advantages/p75": 0.5399450659751892, + "advantages/var": 0.5891971588134766, + "all_logprobs": -0.24748706817626953, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.000911712646484375, + "all_logprobs/min": -10.25, + "all_logprobs/p1": -3.6875, + "all_logprobs/p10": -0.76171875, + "all_logprobs/p25": -0.07177734375, + "all_logprobs/p5": -1.5703125, + "all_logprobs/p75": -2.300739288330078e-05, + "all_logprobs/var": 0.5058194398880005, + "clip_ratio": 0.0, + "completion_length": 896.03125, + "completion_length/correct": 876.5208740234375, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 1024.0, + "completion_length/correct/min": 196.0, + "completion_length/correct/p25": 683.5, + "completion_length/correct/p75": 1024.0, + "completion_length/correct/var": 53277.4453125, + "completion_length/incorrect": 915.5416870117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 85.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 54249.0625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 85.0, + "completion_length/p25": 831.0, + "completion_length/p75": 1024.0, + "completion_length/var": 53581.984375, + "epoch": 0.0144, + "feature_vector_variance/max_squared_error": 204814.8125, + "feature_vector_variance/metric": 46900.046875, + "generated_tokens/total": 536116.0, + "global_fisher_curvature": 43264.0, + "global_fisher_curvature/max": 43264.0, + "global_fisher_curvature/median": 43264.0, + "global_fisher_curvature/min": 43264.0, + "global_fisher_curvature/p25": 43264.0, + "global_fisher_curvature/p75": 43264.0, + "global_fisher_curvature/p85": 43264.0, + "global_fisher_curvature/p90": 43264.0, + "global_fisher_curvature/p95": 43264.0, + "global_fisher_curvature/p99": 43264.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 3.11434268951416e-06, + "global_fisher_kl_divergence/max": 3.11434268951416e-06, + "global_fisher_kl_divergence/median": 3.11434268951416e-06, + "global_fisher_kl_divergence/min": 3.11434268951416e-06, + "global_fisher_kl_divergence/p25": 3.11434268951416e-06, + "global_fisher_kl_divergence/p75": 3.11434268951416e-06, + "global_fisher_kl_divergence/p85": 3.11434268951416e-06, + "global_fisher_kl_divergence/p90": 3.11434268951416e-06, + "global_fisher_kl_divergence/p95": 3.11434268951416e-06, + "global_fisher_kl_divergence/p99": 3.11434268951416e-06, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.7109375, + "global_full_update_term/max": 0.7109375, + "global_full_update_term/median": 0.7109375, + "global_full_update_term/min": 0.7109375, + "global_full_update_term/p25": 0.7109375, + "global_full_update_term/p75": 0.7109375, + "global_full_update_term/p85": 0.7109375, + "global_full_update_term/p90": 0.7109375, + "global_full_update_term/p95": 0.7109375, + "global_full_update_term/p99": 0.7109375, + "global_full_update_term/var": NaN, + "global_hessian_coeff": -8576.0, + "global_hessian_coeff/max": -8576.0, + "global_hessian_coeff/median": -8576.0, + "global_hessian_coeff/min": -8576.0, + "global_hessian_coeff/p25": -8576.0, + "global_hessian_coeff/p75": -8576.0, + "global_hessian_coeff/p99": -8576.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 8576.0, + "global_hessian_coeff_abs/max": 8576.0, + "global_hessian_coeff_abs/median": 8576.0, + "global_hessian_coeff_abs/min": 8576.0, + "global_hessian_coeff_abs/p25": 8576.0, + "global_hessian_coeff_abs/p75": 8576.0, + "global_hessian_coeff_abs/p99": 8576.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.585943877696991, + "grouped_std_rewards": 0.2917885184288025, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.0, + "mean_logprobs": -0.248046875, + "mean_logprobs/var": 0.0279541015625, + "num_completions/total": 864, + "per_sentence_gradient_norm": 144.703125, + "per_sentence_gradient_norm/max": 796.0, + "per_sentence_gradient_norm/median": 108.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 191.0, + "per_sentence_gradient_norm/var": 27937.46875, + "per_token_feature_norm": 263.7426452636719, + "per_token_feature_norm/max": 434.0, + "per_token_feature_norm/median": 254.0, + "per_token_feature_norm/min": 112.0, + "per_token_feature_norm/p25": 220.0, + "per_token_feature_norm/p75": 306.0, + "per_token_feature_norm/var": 3386.611083984375, + "per_token_gradient_norm": 19.774944305419922, + "per_token_gradient_norm/max": 1216.0, + "per_token_gradient_norm/median": 0.003448486328125, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 1.59375, + "per_token_gradient_norm/var": 4364.65966796875, + "per_token_policy_error_norm": 0.11406482756137848, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.0078125, + "per_token_policy_error_norm/var": 0.09380858391523361, + "policy_entropy": 0.28225404024124146, + "policy_entropy/max": 3.640625, + "policy_entropy/median": 0.00823974609375, + "policy_entropy/min": 1.2980308383703232e-08, + "policy_entropy/p25": 0.000293731689453125, + "policy_entropy/p75": 0.296875, + "policy_entropy/var": 0.2968198359012604, + "policy_loss": -7.450580596923828e-09, + "policy_loss/max": 2.4741742610931396, + "policy_loss/median": 0.0, + "policy_loss/min": -2.4741742610931396, + "policy_loss/p25": -0.5399450063705444, + "policy_loss/p75": 0.3534534275531769, + "policy_loss/var": 0.5891971588134766, + "policy_sharpness": 5.583710670471191, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 4.99212646484375, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 1.2365460395812988, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 17.090564727783203, + "reward": 0.5, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 1.0, + "reward/var": 0.2526315748691559, + "rewards/accuracy_reward": 0.5, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 1.0, + "rewards/accuracy_reward/var": 0.2526315748691559, + "sentence_fisher_curvature": 6717781.5, + "sentence_fisher_curvature/max": 121634816.0, + "sentence_fisher_curvature/median": 942080.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 4579328.0, + "sentence_fisher_curvature/p85": 8069120.0, + "sentence_fisher_curvature/p90": 15564800.0, + "sentence_fisher_curvature/p95": 26181632.0, + "sentence_fisher_curvature/p99": 114163736.0, + "sentence_fisher_curvature/var": 355362775498752.0, + "sentence_fisher_kl_divergence": 0.00048315589083358645, + "sentence_fisher_kl_divergence/max": 0.00872802734375, + "sentence_fisher_kl_divergence/median": 6.771087646484375e-05, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0003299713134765625, + "sentence_fisher_kl_divergence/p85": 0.0005817413330078125, + "sentence_fisher_kl_divergence/p90": 0.00112152099609375, + "sentence_fisher_kl_divergence/p95": 0.00188446044921875, + "sentence_fisher_kl_divergence/p99": 0.008206178434193134, + "sentence_fisher_kl_divergence/var": 1.8338098470849218e-06, + "sentence_full_gradient_variance/max_squared_error": 48091.44921875, + "sentence_full_gradient_variance/metric": 48091.44921875, + "sentence_full_gradient_variance/p75": 48091.44921875, + "sentence_full_gradient_variance/p90": 48091.44921875, + "sentence_full_gradient_variance/p95": 48091.44921875, + "sentence_full_gradient_variance/p99": 48091.44921875, + "sentence_full_update_term": 0.5655670166015625, + "sentence_full_update_term/max": 7.4375, + "sentence_full_update_term/median": 0.1318359375, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.4189453125, + "sentence_full_update_term/p85": 0.91796875, + "sentence_full_update_term/p90": 1.4609375, + "sentence_full_update_term/p95": 3.4765625, + "sentence_full_update_term/p99": 5.032820224761963, + "sentence_full_update_term/var": 1.4264752864837646, + "sentence_hessian_coeff": -1638325.375, + "sentence_hessian_coeff/max": 122683392.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -149946368.0, + "sentence_hessian_coeff/p25": -330240.0, + "sentence_hessian_coeff/p75": 272384.0, + "sentence_hessian_coeff/p99": 39505368.0, + "sentence_hessian_coeff/var": 556022338945024.0, + "sentence_hessian_coeff_abs": 6594709.5, + "sentence_hessian_coeff_abs/max": 149946368.0, + "sentence_hessian_coeff_abs/median": 303104.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 1595392.0, + "sentence_hessian_coeff_abs/p99": 124046624.0, + "sentence_hessian_coeff_abs/var": 514786794143744.0, + "step": 9, + "token_fisher_curvature": 468063008.0, + "token_fisher_curvature/max": 362924736512.0, + "token_fisher_curvature/median": 3.3287506084889174e-10, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 15.625, + "token_fisher_curvature/p85": 712704.0, + "token_fisher_curvature/p90": 50593792.0, + "token_fisher_curvature/p95": 926941184.0, + "token_fisher_curvature/p99": 9462349824.0, + "token_fisher_curvature/var": 2.3691814760433582e+19, + "token_fisher_kl_divergence": 0.033709585666656494, + "token_fisher_kl_divergence/max": 26.125, + "token_fisher_kl_divergence/median": 2.3928680759933985e-20, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 1.127773430198431e-09, + "token_fisher_kl_divergence/p85": 5.125999450683594e-05, + "token_fisher_kl_divergence/p90": 0.0036468505859375, + "token_fisher_kl_divergence/p95": 0.06689453125, + "token_fisher_kl_divergence/p99": 0.6796875, + "token_fisher_kl_divergence/var": 0.1228402853012085, + "token_full_update_term": 0.053951188921928406, + "token_full_update_term/max": 77.5, + "token_full_update_term/median": 1.3369572116062045e-10, + "token_full_update_term/min": -32.75, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 2.8014183044433594e-05, + "token_full_update_term/p85": 0.00537109375, + "token_full_update_term/p90": 0.04248046875, + "token_full_update_term/p95": 0.16015625, + "token_full_update_term/p99": 0.925079345703125, + "token_full_update_term/var": 0.6635953187942505, + "token_hessian_coeff": -43253448.0, + "token_hessian_coeff/max": 828928688128.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -678604832768.0, + "token_hessian_coeff/p25": -2.2724270820617676e-06, + "token_hessian_coeff/p75": 2.485513687133789e-05, + "token_hessian_coeff/p99": 1652555776.0, + "token_hessian_coeff/var": 8.003833321744237e+19, + "token_hessian_coeff_abs": 508573664.0, + "token_hessian_coeff_abs/max": 828928688128.0, + "token_hessian_coeff_abs/median": 7.361173629760742e-06, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 1272.0, + "token_hessian_coeff_abs/p99": 5905580032.0, + "token_hessian_coeff_abs/var": 7.978155766993807e+19 + }, + { + "accuracy_reward": 0.53125, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 1.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 1.0, + "accuracy_reward/var": 0.2516447603702545, + "adam_stats/lm_head/lr_effective_max": 9.466197661822662e-05, + "adam_stats/lm_head/lr_effective_mean": 1.6163725913287408e-10, + "adam_stats/lm_head/lr_effective_min": -8.864361006999388e-05, + "adam_stats/lm_head/lr_effective_std": 3.845002538582776e-06, + "adam_stats/lr_effective_max": 9.696212509879842e-05, + "adam_stats/lr_effective_mean": -8.596623413126281e-10, + "adam_stats/lr_effective_min": -9.750304889166728e-05, + "adam_stats/m_t_max": 0.007561949547380209, + "adam_stats/m_t_mean": -8.44153150159066e-11, + "adam_stats/m_t_min": -0.009238934144377708, + "adam_stats/v_t_max": 2.7438254619482905e-05, + "adam_stats/v_t_mean": 4.186150558138646e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 3.725290298461914e-09, + "advantages/max": 1.6198352575302124, + "advantages/median": 0.0, + "advantages/min": -2.4741740226745605, + "advantages/p25": 0.0, + "advantages/p75": 0.3534534275531769, + "advantages/var": 0.4418848156929016, + "all_logprobs": -0.17863407731056213, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.000171661376953125, + "all_logprobs/min": -12.5625, + "all_logprobs/p1": -3.015625, + "all_logprobs/p10": -0.474609375, + "all_logprobs/p25": -0.0263671875, + "all_logprobs/p5": -1.140625, + "all_logprobs/p75": -6.079673767089844e-06, + "all_logprobs/var": 0.3423314690589905, + "clip_ratio": 0.0, + "completion_length": 836.6354370117188, + "completion_length/correct": 738.11767578125, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 808.0, + "completion_length/correct/min": 291.0, + "completion_length/correct/p25": 445.5, + "completion_length/correct/p75": 1024.0, + "completion_length/correct/var": 80117.90625, + "completion_length/incorrect": 948.2889404296875, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 168.0, + "completion_length/incorrect/p25": 975.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 26324.486328125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 168.0, + "completion_length/p25": 709.75, + "completion_length/p75": 1024.0, + "completion_length/var": 65475.3515625, + "epoch": 0.016, + "feature_vector_variance/max_squared_error": 207866.234375, + "feature_vector_variance/metric": 40596.5859375, + "generated_tokens/total": 616433.0, + "global_fisher_curvature": 171008.0, + "global_fisher_curvature/max": 171008.0, + "global_fisher_curvature/median": 171008.0, + "global_fisher_curvature/min": 171008.0, + "global_fisher_curvature/p25": 171008.0, + "global_fisher_curvature/p75": 171008.0, + "global_fisher_curvature/p85": 171008.0, + "global_fisher_curvature/p90": 171008.0, + "global_fisher_curvature/p95": 171008.0, + "global_fisher_curvature/p99": 171008.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 1.5616416931152344e-05, + "global_fisher_kl_divergence/max": 1.5616416931152344e-05, + "global_fisher_kl_divergence/median": 1.5616416931152344e-05, + "global_fisher_kl_divergence/min": 1.5616416931152344e-05, + "global_fisher_kl_divergence/p25": 1.5616416931152344e-05, + "global_fisher_kl_divergence/p75": 1.5616416931152344e-05, + "global_fisher_kl_divergence/p85": 1.5616416931152344e-05, + "global_fisher_kl_divergence/p90": 1.5616416931152344e-05, + "global_fisher_kl_divergence/p95": 1.5616416931152344e-05, + "global_fisher_kl_divergence/p99": 1.5616416931152344e-05, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 3.890625, + "global_full_update_term/max": 3.890625, + "global_full_update_term/median": 3.890625, + "global_full_update_term/min": 3.890625, + "global_full_update_term/p25": 3.890625, + "global_full_update_term/p75": 3.890625, + "global_full_update_term/p85": 3.890625, + "global_full_update_term/p90": 3.890625, + "global_full_update_term/p95": 3.890625, + "global_full_update_term/p99": 3.890625, + "global_full_update_term/var": NaN, + "global_hessian_coeff": -323584.0, + "global_hessian_coeff/max": -323584.0, + "global_hessian_coeff/median": -323584.0, + "global_hessian_coeff/min": -323584.0, + "global_hessian_coeff/p25": -323584.0, + "global_hessian_coeff/p75": -323584.0, + "global_hessian_coeff/p99": -323584.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 323584.0, + "global_hessian_coeff_abs/max": 323584.0, + "global_hessian_coeff_abs/median": 323584.0, + "global_hessian_coeff_abs/min": 323584.0, + "global_hessian_coeff_abs/p25": 323584.0, + "global_hessian_coeff_abs/p75": 323584.0, + "global_hessian_coeff_abs/p99": 323584.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 9.977411270141602, + "grouped_std_rewards": 0.2041158676147461, + "learning_rate": 1.5e-05, + "loss": 0.0, + "mean_logprobs": -0.1826171875, + "mean_logprobs/var": 0.01202392578125, + "num_completions/total": 960, + "per_sentence_gradient_norm": 86.59245300292969, + "per_sentence_gradient_norm/max": 992.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 110.25, + "per_sentence_gradient_norm/var": 25386.490234375, + "per_token_feature_norm": 250.4801483154297, + "per_token_feature_norm/max": 430.0, + "per_token_feature_norm/median": 242.0, + "per_token_feature_norm/min": 111.5, + "per_token_feature_norm/p25": 213.0, + "per_token_feature_norm/p75": 282.0, + "per_token_feature_norm/var": 2642.022705078125, + "per_token_gradient_norm": 11.348506927490234, + "per_token_gradient_norm/max": 1392.0, + "per_token_gradient_norm/median": 2.0742416381835938e-05, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.01043701171875, + "per_token_gradient_norm/var": 3260.010986328125, + "per_token_policy_error_norm": 0.08797167241573334, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.0, + "per_token_policy_error_norm/var": 0.07608958333730698, + "policy_entropy": 0.19904950261116028, + "policy_entropy/max": 3.65625, + "policy_entropy/median": 0.00180816650390625, + "policy_entropy/min": 5.326000973582268e-09, + "policy_entropy/p25": 8.678436279296875e-05, + "policy_entropy/p75": 0.1337890625, + "policy_entropy/var": 0.17862914502620697, + "policy_loss": 1.2417634920325327e-08, + "policy_loss/max": 2.4741742610931396, + "policy_loss/median": 0.0, + "policy_loss/min": -1.6198352575302124, + "policy_loss/p25": -0.3534534275531769, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.4418848156929016, + "policy_sharpness": 6.560134410858154, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 9.1875, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 2.2186522483825684, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 15.616608619689941, + "reward": 0.53125, + "reward/max": 1.0, + "reward/median": 1.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 1.0, + "reward/var": 0.2516447603702545, + "rewards/accuracy_reward": 0.53125, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 1.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 1.0, + "rewards/accuracy_reward/var": 0.2516447603702545, + "sentence_fisher_curvature": 10953576.0, + "sentence_fisher_curvature/max": 868220928.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 1181696.0, + "sentence_fisher_curvature/p85": 1968128.0, + "sentence_fisher_curvature/p90": 3047424.0, + "sentence_fisher_curvature/p95": 10682368.0, + "sentence_fisher_curvature/p99": 103182336.0, + "sentence_fisher_curvature/var": 7867154946850816.0, + "sentence_fisher_kl_divergence": 0.000998005853034556, + "sentence_fisher_kl_divergence/max": 0.0791015625, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.00010776519775390625, + "sentence_fisher_kl_divergence/p85": 0.00017905235290527344, + "sentence_fisher_kl_divergence/p90": 0.00027751922607421875, + "sentence_fisher_kl_divergence/p95": 0.0009737014770507812, + "sentence_fisher_kl_divergence/p99": 0.009405741468071938, + "sentence_fisher_kl_divergence/var": 6.530254904646426e-05, + "sentence_full_gradient_variance/max_squared_error": 32278.296875, + "sentence_full_gradient_variance/metric": 32278.296875, + "sentence_full_gradient_variance/p75": 32278.296875, + "sentence_full_gradient_variance/p90": 32278.296875, + "sentence_full_gradient_variance/p95": 32278.296875, + "sentence_full_gradient_variance/p99": 32278.296875, + "sentence_full_update_term": 0.4264809489250183, + "sentence_full_update_term/max": 12.75, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.162353515625, + "sentence_full_update_term/p85": 0.28173828125, + "sentence_full_update_term/p90": 0.375, + "sentence_full_update_term/p95": 1.50390625, + "sentence_full_update_term/p99": 9.8406343460083, + "sentence_full_update_term/var": 2.841885805130005, + "sentence_hessian_coeff": -18188222.0, + "sentence_hessian_coeff/max": 26607616.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -1652555776.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 58368.0, + "sentence_hessian_coeff/p99": 16397140.0, + "sentence_hessian_coeff/var": 2.858210755923149e+16, + "sentence_hessian_coeff_abs": 19445672.0, + "sentence_hessian_coeff_abs/max": 1652555776.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 402944.0, + "sentence_hessian_coeff_abs/p99": 198185536.0, + "sentence_hessian_coeff_abs/var": 2.853428954084147e+16, + "step": 10, + "token_fisher_curvature": 356626208.0, + "token_fisher_curvature/max": 618475290624.0, + "token_fisher_curvature/median": 8.131516293641283e-19, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 3.14321368932724e-08, + "token_fisher_curvature/p85": 2.015625, + "token_fisher_curvature/p90": 30976.0, + "token_fisher_curvature/p95": 61865984.0, + "token_fisher_curvature/p99": 4630511616.0, + "token_fisher_curvature/var": 3.950038623641089e+19, + "token_fisher_kl_divergence": 0.03250093385577202, + "token_fisher_kl_divergence/max": 56.25, + "token_fisher_kl_divergence/median": 7.415292509077511e-29, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 2.859583229930518e-18, + "token_fisher_kl_divergence/p85": 1.837179297581315e-10, + "token_fisher_kl_divergence/p90": 2.816319465637207e-06, + "token_fisher_kl_divergence/p95": 0.005645751953125, + "token_fisher_kl_divergence/p99": 0.421875, + "token_fisher_kl_divergence/var": 0.32804644107818604, + "token_full_update_term": 0.02379104308784008, + "token_full_update_term/max": 34.0, + "token_full_update_term/median": 5.10702591327572e-15, + "token_full_update_term/min": -113.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 1.3606040738523006e-09, + "token_full_update_term/p85": 8.225440979003906e-06, + "token_full_update_term/p90": 0.001220703125, + "token_full_update_term/p95": 0.053466796875, + "token_full_update_term/p99": 0.42938232421875, + "token_full_update_term/var": 0.9432822465896606, + "token_hessian_coeff": -240660320.0, + "token_hessian_coeff/max": 270582939648.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -1520418422784.0, + "token_hessian_coeff/p25": -3.228706191293895e-11, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 471203840.0, + "token_hessian_coeff/var": 1.8032645124847252e+20, + "token_hessian_coeff_abs": 535312096.0, + "token_hessian_coeff_abs/max": 1520418422784.0, + "token_hessian_coeff_abs/median": 1.3571366253017914e-12, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.00021648406982421875, + "token_hessian_coeff_abs/p99": 2348810240.0, + "token_hessian_coeff_abs/var": 1.800977880142672e+20 + }, + { + "accuracy_reward": 0.5208333730697632, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 1.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 1.0, + "accuracy_reward/var": 0.25219300389289856, + "adam_stats/lm_head/lr_effective_max": 9.621938079362735e-05, + "adam_stats/lm_head/lr_effective_mean": -1.6843670555544143e-11, + "adam_stats/lm_head/lr_effective_min": -9.105424396693707e-05, + "adam_stats/lm_head/lr_effective_std": 3.678399934869958e-06, + "adam_stats/lr_effective_max": 9.715316991787404e-05, + "adam_stats/lr_effective_mean": -9.4620877799656e-10, + "adam_stats/lr_effective_min": -9.930810483638197e-05, + "adam_stats/m_t_max": 0.01475722435861826, + "adam_stats/m_t_mean": -6.709036798335788e-11, + "adam_stats/m_t_min": -0.012040731497108936, + "adam_stats/v_t_max": 2.7413720090407878e-05, + "adam_stats/v_t_mean": 4.702983228549451e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 1.2073814868927002, + "advantages/median": 0.0, + "advantages/min": -2.4741740226745605, + "advantages/p25": 0.0, + "advantages/p75": 0.3534534275531769, + "advantages/var": 0.5155341029167175, + "all_logprobs": -0.16751527786254883, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.00011157989501953125, + "all_logprobs/min": -11.875, + "all_logprobs/p1": -2.875, + "all_logprobs/p10": -0.439453125, + "all_logprobs/p25": -0.02392578125, + "all_logprobs/p5": -1.046875, + "all_logprobs/p75": -3.5762786865234375e-06, + "all_logprobs/var": 0.30902099609375, + "clip_ratio": 0.0, + "completion_length": 876.1354370117188, + "completion_length/correct": 782.8800048828125, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 1024.0, + "completion_length/correct/min": 255.0, + "completion_length/correct/p25": 501.5, + "completion_length/correct/p75": 1024.0, + "completion_length/correct/var": 78312.8046875, + "completion_length/incorrect": 977.5, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 585.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 13113.1015625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 255.0, + "completion_length/p25": 741.75, + "completion_length/p75": 1024.0, + "completion_length/var": 56156.69140625, + "epoch": 0.0176, + "feature_vector_variance/max_squared_error": 193643.15625, + "feature_vector_variance/metric": 39689.328125, + "generated_tokens/total": 700542.0, + "global_fisher_curvature": 30080.0, + "global_fisher_curvature/max": 30080.0, + "global_fisher_curvature/median": 30080.0, + "global_fisher_curvature/min": 30080.0, + "global_fisher_curvature/p25": 30080.0, + "global_fisher_curvature/p75": 30080.0, + "global_fisher_curvature/p85": 30080.0, + "global_fisher_curvature/p90": 30080.0, + "global_fisher_curvature/p95": 30080.0, + "global_fisher_curvature/p99": 30080.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 3.382563591003418e-06, + "global_fisher_kl_divergence/max": 3.382563591003418e-06, + "global_fisher_kl_divergence/median": 3.382563591003418e-06, + "global_fisher_kl_divergence/min": 3.382563591003418e-06, + "global_fisher_kl_divergence/p25": 3.382563591003418e-06, + "global_fisher_kl_divergence/p75": 3.382563591003418e-06, + "global_fisher_kl_divergence/p85": 3.382563591003418e-06, + "global_fisher_kl_divergence/p90": 3.382563591003418e-06, + "global_fisher_kl_divergence/p95": 3.382563591003418e-06, + "global_fisher_kl_divergence/p99": 3.382563591003418e-06, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.671875, + "global_full_update_term/max": 0.671875, + "global_full_update_term/median": 0.671875, + "global_full_update_term/min": 0.671875, + "global_full_update_term/p25": 0.671875, + "global_full_update_term/p75": 0.671875, + "global_full_update_term/p85": 0.671875, + "global_full_update_term/p90": 0.671875, + "global_full_update_term/p95": 0.671875, + "global_full_update_term/p99": 0.671875, + "global_full_update_term/var": NaN, + "global_hessian_coeff": -24704.0, + "global_hessian_coeff/max": -24704.0, + "global_hessian_coeff/median": -24704.0, + "global_hessian_coeff/min": -24704.0, + "global_hessian_coeff/p25": -24704.0, + "global_hessian_coeff/p75": -24704.0, + "global_hessian_coeff/p99": -24704.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 24704.0, + "global_hessian_coeff_abs/max": 24704.0, + "global_hessian_coeff_abs/median": 24704.0, + "global_hessian_coeff_abs/min": 24704.0, + "global_hessian_coeff_abs/p25": 24704.0, + "global_hessian_coeff_abs/p75": 24704.0, + "global_hessian_coeff_abs/p99": 24704.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 3.5106589794158936, + "grouped_std_rewards": 0.24268518388271332, + "learning_rate": 1.4995431202643219e-05, + "loss": 0.0, + "mean_logprobs": -0.1650390625, + "mean_logprobs/var": 0.0048828125, + "num_completions/total": 1056, + "per_sentence_gradient_norm": 117.98698425292969, + "per_sentence_gradient_norm/max": 1480.0, + "per_sentence_gradient_norm/median": 53.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 124.25, + "per_sentence_gradient_norm/var": 42991.1171875, + "per_token_feature_norm": 239.49180603027344, + "per_token_feature_norm/max": 422.0, + "per_token_feature_norm/median": 229.0, + "per_token_feature_norm/min": 114.0, + "per_token_feature_norm/p25": 203.0, + "per_token_feature_norm/p75": 268.0, + "per_token_feature_norm/var": 2556.952880859375, + "per_token_gradient_norm": 12.55235481262207, + "per_token_gradient_norm/max": 1208.0, + "per_token_gradient_norm/median": 8.761882781982422e-06, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0113525390625, + "per_token_gradient_norm/var": 3526.060791015625, + "per_token_policy_error_norm": 0.08286329358816147, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.0, + "per_token_policy_error_norm/var": 0.06949115544557571, + "policy_entropy": 0.1915467083454132, + "policy_entropy/max": 3.796875, + "policy_entropy/median": 0.00122833251953125, + "policy_entropy/min": 2.1464074961841106e-10, + "policy_entropy/p25": 5.340576171875e-05, + "policy_entropy/p75": 0.1240234375, + "policy_entropy/var": 0.1720447540283203, + "policy_loss": 6.829699028543246e-09, + "policy_loss/max": 2.4741742610931396, + "policy_loss/median": 0.0, + "policy_loss/min": -1.2073816061019897, + "policy_loss/p25": -0.3534534275531769, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.5155341029167175, + "policy_sharpness": 6.744584560394287, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 10.0, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 2.549999952316284, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 15.060028076171875, + "reward": 0.5208333730697632, + "reward/max": 1.0, + "reward/median": 1.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 1.0, + "reward/var": 0.25219300389289856, + "rewards/accuracy_reward": 0.5208333730697632, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 1.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 1.0, + "rewards/accuracy_reward/var": 0.25219300389289856, + "sentence_fisher_curvature": 5821259.0, + "sentence_fisher_curvature/max": 183500800.0, + "sentence_fisher_curvature/median": 248832.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 1601536.0, + "sentence_fisher_curvature/p85": 7069696.0, + "sentence_fisher_curvature/p90": 15138816.0, + "sentence_fisher_curvature/p95": 30703616.0, + "sentence_fisher_curvature/p99": 61722196.0, + "sentence_fisher_curvature/var": 428460367413248.0, + "sentence_fisher_kl_divergence": 0.0006543382769450545, + "sentence_fisher_kl_divergence/max": 0.0206298828125, + "sentence_fisher_kl_divergence/median": 2.8014183044433594e-05, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0001804828643798828, + "sentence_fisher_kl_divergence/p85": 0.00079345703125, + "sentence_fisher_kl_divergence/p90": 0.00170135498046875, + "sentence_fisher_kl_divergence/p95": 0.003452301025390625, + "sentence_fisher_kl_divergence/p99": 0.006945844739675522, + "sentence_fisher_kl_divergence/var": 5.415203304437455e-06, + "sentence_full_gradient_variance/max_squared_error": 55884.21875, + "sentence_full_gradient_variance/metric": 55884.21875, + "sentence_full_gradient_variance/p75": 55884.21875, + "sentence_full_gradient_variance/p90": 55884.21875, + "sentence_full_gradient_variance/p95": 55884.21875, + "sentence_full_gradient_variance/p99": 55884.21875, + "sentence_full_update_term": 0.824729323387146, + "sentence_full_update_term/max": 32.0, + "sentence_full_update_term/median": 0.041259765625, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.22802734375, + "sentence_full_update_term/p85": 0.9638671875, + "sentence_full_update_term/p90": 1.70703125, + "sentence_full_update_term/p95": 4.1015625, + "sentence_full_update_term/p99": 7.21101713180542, + "sentence_full_update_term/var": 11.720251083374023, + "sentence_hessian_coeff": -6071859.5, + "sentence_hessian_coeff/max": 36175872.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -381681664.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 171520.0, + "sentence_hessian_coeff/p99": 17435912.0, + "sentence_hessian_coeff/var": 1669349500780544.0, + "sentence_hessian_coeff_abs": 8089809.0, + "sentence_hessian_coeff_abs/max": 381681664.0, + "sentence_hessian_coeff_abs/median": 54272.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 560128.0, + "sentence_hessian_coeff_abs/p99": 89313400.0, + "sentence_hessian_coeff_abs/var": 1640470945988608.0, + "step": 11, + "token_fisher_curvature": 330663808.0, + "token_fisher_curvature/max": 347892350976.0, + "token_fisher_curvature/median": 2.064642808932357e-20, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 4.1676685214042664e-08, + "token_fisher_curvature/p85": 18.826171875, + "token_fisher_curvature/p90": 118272.0, + "token_fisher_curvature/p95": 79167488.0, + "token_fisher_curvature/p99": 5435817984.0, + "token_fisher_curvature/var": 2.3795571274700292e+19, + "token_fisher_kl_divergence": 0.03718935698270798, + "token_fisher_kl_divergence/max": 39.25, + "token_fisher_kl_divergence/median": 2.3172789090867222e-30, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 4.689174395999807e-18, + "token_fisher_kl_divergence/p85": 2.1188952814554796e-09, + "token_fisher_kl_divergence/p90": 1.329183578491211e-05, + "token_fisher_kl_divergence/p95": 0.0089111328125, + "token_fisher_kl_divergence/p99": 0.61328125, + "token_fisher_kl_divergence/var": 0.3010416626930237, + "token_full_update_term": 0.010893868282437325, + "token_full_update_term/max": 10.875, + "token_full_update_term/median": 8.81239525796218e-16, + "token_full_update_term/min": -71.5, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 1.6298145055770874e-09, + "token_full_update_term/p85": 3.266334533691406e-05, + "token_full_update_term/p90": 0.002410888671875, + "token_full_update_term/p95": 0.068359375, + "token_full_update_term/p99": 0.82781982421875, + "token_full_update_term/var": 0.5855955481529236, + "token_hessian_coeff": -394476224.0, + "token_hessian_coeff/max": 60666413056.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -833223655424.0, + "token_hessian_coeff/p25": -2.0372681319713593e-09, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 767557632.0, + "token_hessian_coeff/var": 1.0571869392154604e+20, + "token_hessian_coeff_abs": 483863072.0, + "token_hessian_coeff_abs/max": 833223655424.0, + "token_hessian_coeff_abs/median": 8.79296635503124e-14, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0002727508544921875, + "token_hessian_coeff_abs/p99": 3170893824.0, + "token_hessian_coeff_abs/var": 1.0564017999522981e+20 + }, + { + "accuracy_reward": 0.6458333730697632, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 1.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 1.0, + "accuracy_reward/var": 0.2311403602361679, + "adam_stats/lm_head/lr_effective_max": 9.693973697721958e-05, + "adam_stats/lm_head/lr_effective_mean": 8.738937351537857e-11, + "adam_stats/lm_head/lr_effective_min": -9.110764221986756e-05, + "adam_stats/lm_head/lr_effective_std": 3.4421518648741767e-06, + "adam_stats/lr_effective_max": 9.863713785307482e-05, + "adam_stats/lr_effective_mean": -9.113290122542139e-10, + "adam_stats/lr_effective_min": -9.905218030326068e-05, + "adam_stats/m_t_max": 0.013224453665316105, + "adam_stats/m_t_mean": -7.095022486192093e-11, + "adam_stats/m_t_min": -0.010762864723801613, + "adam_stats/v_t_max": 2.759222661552485e-05, + "adam_stats/v_t_mean": 5.2214283244311765e-12, + "adam_stats/v_t_min": 0.0, + "advantages": -5.587935447692871e-09, + "advantages/max": 2.4741740226745605, + "advantages/median": 0.0, + "advantages/min": -2.4741740226745605, + "advantages/p25": 0.0, + "advantages/p75": 0.3534534275531769, + "advantages/var": 0.44189244508743286, + "all_logprobs": -0.16318336129188538, + "all_logprobs/max": 0.0, + "all_logprobs/median": -8.630752563476562e-05, + "all_logprobs/min": -13.0625, + "all_logprobs/p1": -2.828125, + "all_logprobs/p10": -0.4296875, + "all_logprobs/p25": -0.0181884765625, + "all_logprobs/p5": -1.03125, + "all_logprobs/p75": -2.6226043701171875e-06, + "all_logprobs/var": 0.3076001703739166, + "clip_ratio": 0.0, + "completion_length": 723.4583740234375, + "completion_length/correct": 605.3064575195312, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 473.0, + "completion_length/correct/min": 207.0, + "completion_length/correct/p25": 351.5, + "completion_length/correct/p75": 905.5, + "completion_length/correct/var": 81702.2890625, + "completion_length/incorrect": 938.9117431640625, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 348.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 34842.203125, + "completion_length/max": 1024.0, + "completion_length/median": 787.0, + "completion_length/min": 207.0, + "completion_length/p25": 412.5, + "completion_length/p75": 1024.0, + "completion_length/var": 90288.734375, + "epoch": 0.0192, + "feature_vector_variance/max_squared_error": 190744.34375, + "feature_vector_variance/metric": 40362.18359375, + "generated_tokens/total": 769994.0, + "global_fisher_curvature": 11392.0, + "global_fisher_curvature/max": 11392.0, + "global_fisher_curvature/median": 11392.0, + "global_fisher_curvature/min": 11392.0, + "global_fisher_curvature/p25": 11392.0, + "global_fisher_curvature/p75": 11392.0, + "global_fisher_curvature/p85": 11392.0, + "global_fisher_curvature/p90": 11392.0, + "global_fisher_curvature/p95": 11392.0, + "global_fisher_curvature/p99": 11392.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 1.2814998626708984e-06, + "global_fisher_kl_divergence/max": 1.2814998626708984e-06, + "global_fisher_kl_divergence/median": 1.2814998626708984e-06, + "global_fisher_kl_divergence/min": 1.2814998626708984e-06, + "global_fisher_kl_divergence/p25": 1.2814998626708984e-06, + "global_fisher_kl_divergence/p75": 1.2814998626708984e-06, + "global_fisher_kl_divergence/p85": 1.2814998626708984e-06, + "global_fisher_kl_divergence/p90": 1.2814998626708984e-06, + "global_fisher_kl_divergence/p95": 1.2814998626708984e-06, + "global_fisher_kl_divergence/p99": 1.2814998626708984e-06, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.1318359375, + "global_full_update_term/max": 0.1318359375, + "global_full_update_term/median": 0.1318359375, + "global_full_update_term/min": 0.1318359375, + "global_full_update_term/p25": 0.1318359375, + "global_full_update_term/p75": 0.1318359375, + "global_full_update_term/p85": 0.1318359375, + "global_full_update_term/p90": 0.1318359375, + "global_full_update_term/p95": 0.1318359375, + "global_full_update_term/p99": 0.1318359375, + "global_full_update_term/var": NaN, + "global_hessian_coeff": -5056.0, + "global_hessian_coeff/max": -5056.0, + "global_hessian_coeff/median": -5056.0, + "global_hessian_coeff/min": -5056.0, + "global_hessian_coeff/p25": -5056.0, + "global_hessian_coeff/p75": -5056.0, + "global_hessian_coeff/p99": -5056.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 5056.0, + "global_hessian_coeff_abs/max": 5056.0, + "global_hessian_coeff_abs/median": 5056.0, + "global_hessian_coeff_abs/min": 5056.0, + "global_hessian_coeff_abs/p25": 5056.0, + "global_hessian_coeff_abs/p75": 5056.0, + "global_hessian_coeff_abs/p99": 5056.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 5.376178741455078, + "grouped_std_rewards": 0.21463683247566223, + "learning_rate": 1.4981730376948682e-05, + "loss": -0.0, + "mean_logprobs": -0.1650390625, + "mean_logprobs/var": 0.00994873046875, + "num_completions/total": 1152, + "per_sentence_gradient_norm": 73.88542175292969, + "per_sentence_gradient_norm/max": 500.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 107.75, + "per_sentence_gradient_norm/var": 12466.3681640625, + "per_token_feature_norm": 239.12074279785156, + "per_token_feature_norm/max": 426.0, + "per_token_feature_norm/median": 229.0, + "per_token_feature_norm/min": 110.5, + "per_token_feature_norm/p25": 203.0, + "per_token_feature_norm/p75": 270.0, + "per_token_feature_norm/var": 2621.724365234375, + "per_token_gradient_norm": 12.782122611999512, + "per_token_gradient_norm/max": 1184.0, + "per_token_gradient_norm/median": 1.3470649719238281e-05, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0203857421875, + "per_token_gradient_norm/var": 3219.482666015625, + "per_token_policy_error_norm": 0.08155374228954315, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.0, + "per_token_policy_error_norm/var": 0.06963551789522171, + "policy_entropy": 0.18107269704341888, + "policy_entropy/max": 3.8125, + "policy_entropy/median": 0.0009613037109375, + "policy_entropy/min": 1.3969838619232178e-09, + "policy_entropy/p25": 3.910064697265625e-05, + "policy_entropy/p75": 0.099609375, + "policy_entropy/var": 0.16148632764816284, + "policy_loss": -2.4835269396561444e-09, + "policy_loss/max": 2.4741742610931396, + "policy_loss/median": 0.0, + "policy_loss/min": -2.4741744995117188, + "policy_loss/p25": -0.3534534275531769, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.4418925344944, + "policy_sharpness": 6.831802845001221, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 10.0, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 2.6849608421325684, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 14.832135200500488, + "reward": 0.6458333730697632, + "reward/max": 1.0, + "reward/median": 1.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 1.0, + "reward/var": 0.2311403602361679, + "rewards/accuracy_reward": 0.6458333730697632, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 1.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 1.0, + "rewards/accuracy_reward/var": 0.2311403602361679, + "sentence_fisher_curvature": 1656802.375, + "sentence_fisher_curvature/max": 18874368.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 839680.0, + "sentence_fisher_curvature/p85": 2643968.0, + "sentence_fisher_curvature/p90": 5029888.0, + "sentence_fisher_curvature/p95": 11583488.0, + "sentence_fisher_curvature/p99": 16882080.0, + "sentence_fisher_curvature/var": 15732103446528.0, + "sentence_fisher_kl_divergence": 0.0001863128854893148, + "sentence_fisher_kl_divergence/max": 0.0021209716796875, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 9.429454803466797e-05, + "sentence_fisher_kl_divergence/p85": 0.00029730796813964844, + "sentence_fisher_kl_divergence/p90": 0.0005645751953125, + "sentence_fisher_kl_divergence/p95": 0.001300811767578125, + "sentence_fisher_kl_divergence/p99": 0.0018962867325171828, + "sentence_fisher_kl_divergence/var": 1.9898938319329318e-07, + "sentence_full_gradient_variance/max_squared_error": 17604.56640625, + "sentence_full_gradient_variance/metric": 17604.56640625, + "sentence_full_gradient_variance/p75": 17604.56640625, + "sentence_full_gradient_variance/p90": 17604.56640625, + "sentence_full_gradient_variance/p95": 17604.56640625, + "sentence_full_gradient_variance/p99": 17604.56640625, + "sentence_full_update_term": 0.2587118148803711, + "sentence_full_update_term/max": 3.578125, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.171630859375, + "sentence_full_update_term/p85": 0.32666015625, + "sentence_full_update_term/p90": 0.806640625, + "sentence_full_update_term/p95": 1.52734375, + "sentence_full_update_term/p99": 2.8804709911346436, + "sentence_full_update_term/var": 0.38140061497688293, + "sentence_hessian_coeff": -507591.1875, + "sentence_hessian_coeff/max": 14942208.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -31588352.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 23264.0, + "sentence_hessian_coeff/p99": 10210524.0, + "sentence_hessian_coeff/var": 21390509998080.0, + "sentence_hessian_coeff_abs": 1469812.75, + "sentence_hessian_coeff_abs/max": 31588352.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 180992.0, + "sentence_hessian_coeff_abs/p99": 15774566.0, + "sentence_hessian_coeff_abs/var": 19467782324224.0, + "step": 12, + "token_fisher_curvature": 284487424.0, + "token_fisher_curvature/max": 324270030848.0, + "token_fisher_curvature/median": 1.6008922703106276e-19, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 4.153698682785034e-07, + "token_fisher_curvature/p85": 40.75, + "token_fisher_curvature/p90": 226096.0, + "token_fisher_curvature/p95": 110100480.0, + "token_fisher_curvature/p99": 5502926848.0, + "token_fisher_curvature/var": 1.443211375988546e+19, + "token_fisher_kl_divergence": 0.0319812074303627, + "token_fisher_kl_divergence/max": 36.5, + "token_fisher_kl_divergence/median": 1.8045193206930645e-29, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 4.662069341687669e-17, + "token_fisher_kl_divergence/p85": 4.5693013817071915e-09, + "token_fisher_kl_divergence/p90": 2.5379471480846405e-05, + "token_fisher_kl_divergence/p95": 0.01239013671875, + "token_fisher_kl_divergence/p99": 0.6171875, + "token_fisher_kl_divergence/var": 0.1823292076587677, + "token_full_update_term": 0.040531232953071594, + "token_full_update_term/max": 70.0, + "token_full_update_term/median": 2.4702462297909733e-15, + "token_full_update_term/min": -63.5, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 5.587935447692871e-09, + "token_full_update_term/p85": 5.2774325013160706e-05, + "token_full_update_term/p90": 0.00384521484375, + "token_full_update_term/p95": 0.08203125, + "token_full_update_term/p99": 0.849578857421875, + "token_full_update_term/var": 0.7978852391242981, + "token_hessian_coeff": -90756584.0, + "token_hessian_coeff/max": 468151435264.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -751619276800.0, + "token_hessian_coeff/p25": -1.3585577107733116e-11, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 576684032.0, + "token_hessian_coeff/var": 5.6050178083090596e+19, + "token_hessian_coeff_abs": 359995520.0, + "token_hessian_coeff_abs/max": 751619276800.0, + "token_hessian_coeff_abs/median": 3.4994229736184934e-13, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0016632080078125, + "token_hessian_coeff_abs/p99": 3346923520.0, + "token_hessian_coeff_abs/var": 5.592881398961668e+19 + }, + { + "accuracy_reward": 0.4791666865348816, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 1.0, + "accuracy_reward/var": 0.25219297409057617, + "adam_stats/lm_head/lr_effective_max": 9.292331378674135e-05, + "adam_stats/lm_head/lr_effective_mean": 3.590329769598277e-11, + "adam_stats/lm_head/lr_effective_min": -9.436469554202631e-05, + "adam_stats/lm_head/lr_effective_std": 3.4784980016411282e-06, + "adam_stats/lr_effective_max": 9.768295421963558e-05, + "adam_stats/lr_effective_mean": -7.984914396352849e-10, + "adam_stats/lr_effective_min": -9.7835531050805e-05, + "adam_stats/m_t_max": 0.01184902060776949, + "adam_stats/m_t_mean": -4.3260745680173684e-11, + "adam_stats/m_t_min": -0.011217352002859116, + "adam_stats/v_t_max": 2.797168417600915e-05, + "adam_stats/v_t_mean": 5.73270814499538e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 1.2417634698280722e-09, + "advantages/max": 2.4741740226745605, + "advantages/median": 0.0, + "advantages/min": -2.4741740226745605, + "advantages/p25": 0.0, + "advantages/p75": 0.3534534275531769, + "advantages/var": 0.4418881833553314, + "all_logprobs": -0.15096144378185272, + "all_logprobs/max": 0.0, + "all_logprobs/median": -9.775161743164062e-05, + "all_logprobs/min": -11.6875, + "all_logprobs/p1": -2.765625, + "all_logprobs/p10": -0.353515625, + "all_logprobs/p25": -0.01263427734375, + "all_logprobs/p5": -0.93359375, + "all_logprobs/p75": -2.6226043701171875e-06, + "all_logprobs/var": 0.28863316774368286, + "clip_ratio": 0.0, + "completion_length": 731.875, + "completion_length/correct": 587.5652465820312, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 505.0, + "completion_length/correct/min": 228.0, + "completion_length/correct/p25": 391.5, + "completion_length/correct/p75": 836.75, + "completion_length/correct/var": 70476.5625, + "completion_length/incorrect": 864.6399536132812, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 374.0, + "completion_length/incorrect/p25": 672.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 47017.171875, + "completion_length/max": 1024.0, + "completion_length/median": 680.0, + "completion_length/min": 228.0, + "completion_length/p25": 498.25, + "completion_length/p75": 1024.0, + "completion_length/var": 76995.5546875, + "epoch": 0.0208, + "feature_vector_variance/max_squared_error": 183202.9375, + "feature_vector_variance/metric": 41393.0234375, + "generated_tokens/total": 840254.0, + "global_fisher_curvature": 954368.0, + "global_fisher_curvature/max": 954368.0, + "global_fisher_curvature/median": 954368.0, + "global_fisher_curvature/min": 954368.0, + "global_fisher_curvature/p25": 954368.0, + "global_fisher_curvature/p75": 954368.0, + "global_fisher_curvature/p85": 954368.0, + "global_fisher_curvature/p90": 954368.0, + "global_fisher_curvature/p95": 954368.0, + "global_fisher_curvature/p99": 954368.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.00010728836059570312, + "global_fisher_kl_divergence/max": 0.00010728836059570312, + "global_fisher_kl_divergence/median": 0.00010728836059570312, + "global_fisher_kl_divergence/min": 0.00010728836059570312, + "global_fisher_kl_divergence/p25": 0.00010728836059570312, + "global_fisher_kl_divergence/p75": 0.00010728836059570312, + "global_fisher_kl_divergence/p85": 0.00010728836059570312, + "global_fisher_kl_divergence/p90": 0.00010728836059570312, + "global_fisher_kl_divergence/p95": 0.00010728836059570312, + "global_fisher_kl_divergence/p99": 0.00010728836059570312, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 8.25, + "global_full_update_term/max": 8.25, + "global_full_update_term/median": 8.25, + "global_full_update_term/min": 8.25, + "global_full_update_term/p25": 8.25, + "global_full_update_term/p75": 8.25, + "global_full_update_term/p85": 8.25, + "global_full_update_term/p90": 8.25, + "global_full_update_term/p95": 8.25, + "global_full_update_term/p99": 8.25, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 440320.0, + "global_hessian_coeff/max": 440320.0, + "global_hessian_coeff/median": 440320.0, + "global_hessian_coeff/min": 440320.0, + "global_hessian_coeff/p25": 440320.0, + "global_hessian_coeff/p75": 440320.0, + "global_hessian_coeff/p99": 440320.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 440320.0, + "global_hessian_coeff_abs/max": 440320.0, + "global_hessian_coeff_abs/median": 440320.0, + "global_hessian_coeff_abs/min": 440320.0, + "global_hessian_coeff_abs/p25": 440320.0, + "global_hessian_coeff_abs/p75": 440320.0, + "global_hessian_coeff_abs/p99": 440320.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 2.2749478816986084, + "grouped_std_rewards": 0.20866911113262177, + "learning_rate": 1.495891421526205e-05, + "loss": -0.0, + "mean_logprobs": -0.1484375, + "mean_logprobs/var": 0.00885009765625, + "num_completions/total": 1248, + "per_sentence_gradient_norm": 93.703125, + "per_sentence_gradient_norm/max": 1128.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 104.25, + "per_sentence_gradient_norm/var": 35831.2109375, + "per_token_feature_norm": 240.77284240722656, + "per_token_feature_norm/max": 430.0, + "per_token_feature_norm/median": 235.0, + "per_token_feature_norm/min": 106.5, + "per_token_feature_norm/p25": 207.0, + "per_token_feature_norm/p75": 270.0, + "per_token_feature_norm/var": 2160.388427734375, + "per_token_gradient_norm": 12.0921630859375, + "per_token_gradient_norm/max": 1080.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.00970458984375, + "per_token_gradient_norm/var": 3671.476318359375, + "per_token_policy_error_norm": 0.07454688102006912, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.0, + "per_token_policy_error_norm/var": 0.06413715332746506, + "policy_entropy": 0.17062672972679138, + "policy_entropy/max": 3.78125, + "policy_entropy/median": 0.00109100341796875, + "policy_entropy/min": 1.9208528101444244e-09, + "policy_entropy/p25": 3.9577484130859375e-05, + "policy_entropy/p75": 0.0751953125, + "policy_entropy/var": 0.1565079391002655, + "policy_loss": 2.4835269396561444e-09, + "policy_loss/max": 2.4741742610931396, + "policy_loss/median": 0.0, + "policy_loss/min": -2.4741740226745605, + "policy_loss/p25": -0.3534534275531769, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.4418881833553314, + "policy_sharpness": 6.742232322692871, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 10.0, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 2.5738282203674316, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 14.99264144897461, + "reward": 0.4791666865348816, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 1.0, + "reward/var": 0.25219297409057617, + "rewards/accuracy_reward": 0.4791666865348816, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 1.0, + "rewards/accuracy_reward/var": 0.25219297409057617, + "sentence_fisher_curvature": 6160349.5, + "sentence_fisher_curvature/max": 195035136.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 995328.0, + "sentence_fisher_curvature/p85": 3289088.0, + "sentence_fisher_curvature/p90": 5931008.0, + "sentence_fisher_curvature/p95": 10633216.0, + "sentence_fisher_curvature/p99": 135266496.0, + "sentence_fisher_curvature/var": 737433201147904.0, + "sentence_fisher_kl_divergence": 0.0006908751092851162, + "sentence_fisher_kl_divergence/max": 0.0218505859375, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.00011181831359863281, + "sentence_fisher_kl_divergence/p85": 0.0003695487976074219, + "sentence_fisher_kl_divergence/p90": 0.0006656646728515625, + "sentence_fisher_kl_divergence/p95": 0.001190185546875, + "sentence_fisher_kl_divergence/p99": 0.015182516537606716, + "sentence_fisher_kl_divergence/var": 9.270786904380657e-06, + "sentence_full_gradient_variance/max_squared_error": 43764.24609375, + "sentence_full_gradient_variance/metric": 43764.24609375, + "sentence_full_gradient_variance/p75": 43764.24609375, + "sentence_full_gradient_variance/p90": 43764.24609375, + "sentence_full_gradient_variance/p95": 43764.24609375, + "sentence_full_gradient_variance/p99": 43764.24609375, + "sentence_full_update_term": 0.6368573904037476, + "sentence_full_update_term/max": 18.125, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.160400390625, + "sentence_full_update_term/p85": 0.42626953125, + "sentence_full_update_term/p90": 0.615234375, + "sentence_full_update_term/p95": 1.537109375, + "sentence_full_update_term/p99": 12.603142738342285, + "sentence_full_update_term/var": 6.438117504119873, + "sentence_hessian_coeff": -28297.333984375, + "sentence_hessian_coeff/max": 415236096.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -203423744.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 106752.0, + "sentence_hessian_coeff/p99": 26553160.0, + "sentence_hessian_coeff/var": 2623492189061120.0, + "sentence_hessian_coeff_abs": 9492916.0, + "sentence_hessian_coeff_abs/max": 415236096.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 305664.0, + "sentence_hessian_coeff_abs/p99": 214015008.0, + "sentence_hessian_coeff_abs/var": 2532428950274048.0, + "step": 13, + "token_fisher_curvature": 371548224.0, + "token_fisher_curvature/max": 264140488704.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 1.9441358745098114e-08, + "token_fisher_curvature/p85": 5.71875, + "token_fisher_curvature/p90": 24320.0, + "token_fisher_curvature/p95": 42205184.0, + "token_fisher_curvature/p99": 5933367296.0, + "token_fisher_curvature/var": 2.4353483265843134e+19, + "token_fisher_kl_divergence": 0.04169995337724686, + "token_fisher_kl_divergence/max": 29.625, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 2.1819568721270777e-18, + "token_fisher_kl_divergence/p85": 6.402842700481415e-10, + "token_fisher_kl_divergence/p90": 2.726912498474121e-06, + "token_fisher_kl_divergence/p95": 0.004730224609375, + "token_fisher_kl_divergence/p99": 0.66729736328125, + "token_fisher_kl_divergence/var": 0.3067188858985901, + "token_full_update_term": 0.03977407142519951, + "token_full_update_term/max": 72.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": -38.75, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 1.127773430198431e-09, + "token_full_update_term/p85": 2.6959460228681564e-05, + "token_full_update_term/p90": 0.00122833251953125, + "token_full_update_term/p95": 0.047119140625, + "token_full_update_term/p99": 0.765625, + "token_full_update_term/var": 1.2576195001602173, + "token_hessian_coeff": -155288256.0, + "token_hessian_coeff/max": 493921239040.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -498216206336.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 463110144.0, + "token_hessian_coeff/var": 8.682152831244829e+19, + "token_hessian_coeff_abs": 534589760.0, + "token_hessian_coeff_abs/max": 498216206336.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.00017380714416503906, + "token_hessian_coeff_abs/p99": 3321888768.0, + "token_hessian_coeff_abs/var": 8.655986213722364e+19 + }, + { + "accuracy_reward": 0.40625, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 1.0, + "accuracy_reward/var": 0.2437499761581421, + "adam_stats/lm_head/lr_effective_max": 9.486650378676131e-05, + "adam_stats/lm_head/lr_effective_mean": -1.86928510864659e-11, + "adam_stats/lm_head/lr_effective_min": -9.634382149670273e-05, + "adam_stats/lm_head/lr_effective_std": 3.2582315725449007e-06, + "adam_stats/lr_effective_max": 9.760395187186077e-05, + "adam_stats/lr_effective_mean": -7.64507068762299e-10, + "adam_stats/lr_effective_min": -9.814113582251593e-05, + "adam_stats/m_t_max": 0.010586847551167011, + "adam_stats/m_t_mean": -1.1189466887773225e-11, + "adam_stats/m_t_min": -0.0099131278693676, + "adam_stats/v_t_max": 2.8844600819866173e-05, + "adam_stats/v_t_mean": 6.2520409856159365e-12, + "adam_stats/v_t_min": 0.0, + "advantages": -1.614292521878724e-08, + "advantages/max": 2.4741740226745605, + "advantages/median": 0.0, + "advantages/min": -1.6198352575302124, + "advantages/p25": -0.3534534275531769, + "advantages/p75": 0.5399450659751892, + "advantages/var": 0.5892137289047241, + "all_logprobs": -0.2070305198431015, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.000614166259765625, + "all_logprobs/min": -11.1875, + "all_logprobs/p1": -3.75, + "all_logprobs/p10": -0.51953125, + "all_logprobs/p25": -0.0240478515625, + "all_logprobs/p5": -1.3203125, + "all_logprobs/p75": -7.987022399902344e-06, + "all_logprobs/var": 0.4741431772708893, + "clip_ratio": 0.0, + "completion_length": 752.8854370117188, + "completion_length/correct": 547.2820434570312, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 495.0, + "completion_length/correct/min": 150.0, + "completion_length/correct/p25": 353.5, + "completion_length/correct/p75": 734.0, + "completion_length/correct/var": 60392.94921875, + "completion_length/incorrect": 893.5614013671875, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 299.0, + "completion_length/incorrect/p25": 790.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 47901.10546875, + "completion_length/max": 1024.0, + "completion_length/median": 805.0, + "completion_length/min": 150.0, + "completion_length/p25": 485.25, + "completion_length/p75": 1024.0, + "completion_length/var": 81621.5234375, + "epoch": 0.0224, + "feature_vector_variance/max_squared_error": 227730.796875, + "feature_vector_variance/metric": 44957.3203125, + "generated_tokens/total": 912531.0, + "global_fisher_curvature": 278528.0, + "global_fisher_curvature/max": 278528.0, + "global_fisher_curvature/median": 278528.0, + "global_fisher_curvature/min": 278528.0, + "global_fisher_curvature/p25": 278528.0, + "global_fisher_curvature/p75": 278528.0, + "global_fisher_curvature/p85": 278528.0, + "global_fisher_curvature/p90": 278528.0, + "global_fisher_curvature/p95": 278528.0, + "global_fisher_curvature/p99": 278528.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 3.123283386230469e-05, + "global_fisher_kl_divergence/max": 3.123283386230469e-05, + "global_fisher_kl_divergence/median": 3.123283386230469e-05, + "global_fisher_kl_divergence/min": 3.123283386230469e-05, + "global_fisher_kl_divergence/p25": 3.123283386230469e-05, + "global_fisher_kl_divergence/p75": 3.123283386230469e-05, + "global_fisher_kl_divergence/p85": 3.123283386230469e-05, + "global_fisher_kl_divergence/p90": 3.123283386230469e-05, + "global_fisher_kl_divergence/p95": 3.123283386230469e-05, + "global_fisher_kl_divergence/p99": 3.123283386230469e-05, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 3.375, + "global_full_update_term/max": 3.375, + "global_full_update_term/median": 3.375, + "global_full_update_term/min": 3.375, + "global_full_update_term/p25": 3.375, + "global_full_update_term/p75": 3.375, + "global_full_update_term/p85": 3.375, + "global_full_update_term/p90": 3.375, + "global_full_update_term/p95": 3.375, + "global_full_update_term/p99": 3.375, + "global_full_update_term/var": NaN, + "global_hessian_coeff": -260096.0, + "global_hessian_coeff/max": -260096.0, + "global_hessian_coeff/median": -260096.0, + "global_hessian_coeff/min": -260096.0, + "global_hessian_coeff/p25": -260096.0, + "global_hessian_coeff/p75": -260096.0, + "global_hessian_coeff/p99": -260096.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 260096.0, + "global_hessian_coeff_abs/max": 260096.0, + "global_hessian_coeff_abs/median": 260096.0, + "global_hessian_coeff_abs/min": 260096.0, + "global_hessian_coeff_abs/p25": 260096.0, + "global_hessian_coeff_abs/p75": 260096.0, + "global_hessian_coeff_abs/p99": 260096.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 27.330108642578125, + "grouped_std_rewards": 0.31000807881355286, + "learning_rate": 1.4927010515561777e-05, + "loss": 0.0, + "mean_logprobs": -0.205078125, + "mean_logprobs/var": 0.038330078125, + "num_completions/total": 1344, + "per_sentence_gradient_norm": 137.03173828125, + "per_sentence_gradient_norm/max": 1152.0, + "per_sentence_gradient_norm/median": 77.5, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 223.5, + "per_sentence_gradient_norm/var": 32985.32421875, + "per_token_feature_norm": 252.37965393066406, + "per_token_feature_norm/max": 454.0, + "per_token_feature_norm/median": 246.0, + "per_token_feature_norm/min": 106.0, + "per_token_feature_norm/p25": 213.0, + "per_token_feature_norm/p75": 284.0, + "per_token_feature_norm/var": 2830.94482421875, + "per_token_gradient_norm": 21.40143394470215, + "per_token_gradient_norm/max": 1144.0, + "per_token_gradient_norm/median": 0.0004253387451171875, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.462890625, + "per_token_gradient_norm/var": 6236.60791015625, + "per_token_policy_error_norm": 0.0925130620598793, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.0078125, + "per_token_policy_error_norm/var": 0.07893026620149612, + "policy_entropy": 0.2299569696187973, + "policy_entropy/max": 3.796875, + "policy_entropy/median": 0.005096435546875, + "policy_entropy/min": 2.08092387765646e-09, + "policy_entropy/p25": 0.000110626220703125, + "policy_entropy/p75": 0.1279296875, + "policy_entropy/var": 0.27051523327827454, + "policy_loss": 9.934107758624577e-09, + "policy_loss/max": 1.6198352575302124, + "policy_loss/median": 0.0, + "policy_loss/min": -2.4741740226745605, + "policy_loss/p25": -0.5399450659751892, + "policy_loss/p75": 0.3534534275531769, + "policy_loss/var": 0.5892137289047241, + "policy_sharpness": 6.009340763092041, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 7.75, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 1.875, + "policy_sharpness/p75": 10.0, + "policy_sharpness/var": 16.58403205871582, + "reward": 0.40625, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 1.0, + "reward/var": 0.2437499761581421, + "rewards/accuracy_reward": 0.40625, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 1.0, + "rewards/accuracy_reward/var": 0.2437499761581421, + "sentence_fisher_curvature": 8555911.0, + "sentence_fisher_curvature/max": 149946368.0, + "sentence_fisher_curvature/median": 440320.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 6397952.0, + "sentence_fisher_curvature/p85": 12763136.0, + "sentence_fisher_curvature/p90": 19529728.0, + "sentence_fisher_curvature/p95": 39387136.0, + "sentence_fisher_curvature/p99": 149946368.0, + "sentence_fisher_curvature/var": 550044382003200.0, + "sentence_fisher_kl_divergence": 0.0009562339400872588, + "sentence_fisher_kl_divergence/max": 0.0167236328125, + "sentence_fisher_kl_divergence/median": 4.935264587402344e-05, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0007171630859375, + "sentence_fisher_kl_divergence/p85": 0.00142669677734375, + "sentence_fisher_kl_divergence/p90": 0.0021820068359375, + "sentence_fisher_kl_divergence/p95": 0.00440216064453125, + "sentence_fisher_kl_divergence/p99": 0.0167236328125, + "sentence_fisher_kl_divergence/var": 6.850349109299714e-06, + "sentence_full_gradient_variance/max_squared_error": 50843.421875, + "sentence_full_gradient_variance/metric": 50843.421875, + "sentence_full_gradient_variance/p75": 50843.421875, + "sentence_full_gradient_variance/p90": 50843.421875, + "sentence_full_gradient_variance/p95": 50843.421875, + "sentence_full_gradient_variance/p99": 50843.421875, + "sentence_full_update_term": 0.7374088168144226, + "sentence_full_update_term/max": 18.125, + "sentence_full_update_term/median": 0.08837890625, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.728515625, + "sentence_full_update_term/p85": 1.193359375, + "sentence_full_update_term/p90": 2.03515625, + "sentence_full_update_term/p95": 3.0, + "sentence_full_update_term/p99": 7.110972881317139, + "sentence_full_update_term/var": 4.320592403411865, + "sentence_hessian_coeff": -4233229.0, + "sentence_hessian_coeff/max": 69730304.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -197132288.0, + "sentence_hessian_coeff/p25": -57088.0, + "sentence_hessian_coeff/p75": 301056.0, + "sentence_hessian_coeff/p99": 36608516.0, + "sentence_hessian_coeff/var": 929408776929280.0, + "sentence_hessian_coeff_abs": 8403966.0, + "sentence_hessian_coeff_abs/max": 197132288.0, + "sentence_hessian_coeff_abs/median": 102912.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 3264512.0, + "sentence_hessian_coeff_abs/p99": 196136144.0, + "sentence_hessian_coeff_abs/var": 876147558580224.0, + "step": 14, + "token_fisher_curvature": 775418880.0, + "token_fisher_curvature/max": 283467841536.0, + "token_fisher_curvature/median": 7.416289804496046e-14, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.326171875, + "token_fisher_curvature/p85": 10112.0, + "token_fisher_curvature/p90": 7346432.0, + "token_fisher_curvature/p95": 926941184.0, + "token_fisher_curvature/p99": 20803747840.0, + "token_fisher_curvature/var": 4.3321536588606865e+19, + "token_fisher_kl_divergence": 0.08675261586904526, + "token_fisher_kl_divergence/max": 31.75, + "token_fisher_kl_divergence/median": 8.323504913814841e-24, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 3.660716174636036e-11, + "token_fisher_kl_divergence/p85": 1.1324882507324219e-06, + "token_fisher_kl_divergence/p90": 0.0008209049701690674, + "token_fisher_kl_divergence/p95": 0.103515625, + "token_fisher_kl_divergence/p99": 2.328125, + "token_fisher_kl_divergence/var": 0.5420700907707214, + "token_full_update_term": 0.0206716600805521, + "token_full_update_term/max": 94.5, + "token_full_update_term/median": 1.6271428648906294e-12, + "token_full_update_term/min": -34.25, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 2.6971101760864258e-06, + "token_full_update_term/p85": 0.000598907470703125, + "token_full_update_term/p90": 0.0167236328125, + "token_full_update_term/p95": 0.1806640625, + "token_full_update_term/p99": 1.125, + "token_full_update_term/var": 0.7003974318504333, + "token_hessian_coeff": -710063552.0, + "token_hessian_coeff/max": 670014898176.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -418759311360.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0029754638671875, + "token_hessian_coeff/p99": 1073741824.0, + "token_hessian_coeff/var": 9.648751975018057e+19, + "token_hessian_coeff_abs": 924013760.0, + "token_hessian_coeff_abs/max": 670014898176.0, + "token_hessian_coeff_abs/median": 1.257285475730896e-08, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 32.25, + "token_hessian_coeff_abs/p99": 20803747840.0, + "token_hessian_coeff_abs/var": 9.613790144082687e+19 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 8.51881704875268e-05, + "adam_stats/lm_head/lr_effective_mean": -1.677677441414005e-11, + "adam_stats/lm_head/lr_effective_min": -8.65147594595328e-05, + "adam_stats/lm_head/lr_effective_std": 2.9255077151901787e-06, + "adam_stats/lr_effective_max": 8.764524682192132e-05, + "adam_stats/lr_effective_mean": -6.864859791733124e-10, + "adam_stats/lr_effective_min": -8.812859596218914e-05, + "adam_stats/m_t_max": 0.009528162889182568, + "adam_stats/m_t_mean": -1.007056764368297e-11, + "adam_stats/m_t_min": -0.00892181508243084, + "adam_stats/v_t_max": 2.8815757104894146e-05, + "adam_stats/v_t_mean": 6.245789042208516e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.0835750102996826, + "all_logprobs/max": -3.5762786865234375e-07, + "all_logprobs/median": -0.1201171875, + "all_logprobs/min": -20.875, + "all_logprobs/p1": -5.9375, + "all_logprobs/p10": -3.0625, + "all_logprobs/p25": -2.296875, + "all_logprobs/p5": -4.125, + "all_logprobs/p75": -0.006256103515625, + "all_logprobs/var": 2.302117347717285, + "clip_ratio": 0.0, + "completion_length": 1002.9479370117188, + "completion_length/incorrect": 1002.9479370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 9.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 21049.630859375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 9.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 21049.630859375, + "epoch": 0.024, + "feature_vector_variance/max_squared_error": 205128.25, + "feature_vector_variance/metric": 44451.734375, + "generated_tokens/total": 1008814.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.488605814759156e-05, + "loss": 0.0, + "mean_logprobs": -1.15625, + "mean_logprobs/var": 1.1953125, + "num_completions/total": 1440, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 285.0216064453125, + "per_token_feature_norm/max": 438.0, + "per_token_feature_norm/median": 280.0, + "per_token_feature_norm/min": 118.0, + "per_token_feature_norm/p25": 251.0, + "per_token_feature_norm/p75": 320.0, + "per_token_feature_norm/var": 2475.5859375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.37660861015319824, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.0390625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.8984375, + "per_token_policy_error_norm/var": 0.21569594740867615, + "policy_entropy": 1.1075103282928467, + "policy_entropy/max": 3.921875, + "policy_entropy/median": 0.46875, + "policy_entropy/min": 8.52346420288086e-06, + "policy_entropy/p25": 0.041015625, + "policy_entropy/p75": 2.359375, + "policy_entropy/var": 1.4589391946792603, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 1.4865425825119019, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.22955277562141418, + "policy_sharpness/min": 0.003409090917557478, + "policy_sharpness/p25": 0.10053636878728867, + "policy_sharpness/p75": 1.0910993814468384, + "policy_sharpness/var": 7.547967910766602, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 15, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 7.645028381375596e-05, + "adam_stats/lm_head/lr_effective_mean": -1.5047822363167818e-11, + "adam_stats/lm_head/lr_effective_min": -7.76407978264615e-05, + "adam_stats/lm_head/lr_effective_std": 2.625149136292748e-06, + "adam_stats/lr_effective_max": 7.865433872211725e-05, + "adam_stats/lr_effective_mean": -6.160491561324477e-10, + "adam_stats/lr_effective_min": -7.908898260211572e-05, + "adam_stats/m_t_max": 0.008575346320867538, + "adam_stats/m_t_mean": -9.063477399151587e-12, + "adam_stats/m_t_min": -0.008029633201658726, + "adam_stats/v_t_max": 2.8786942493752576e-05, + "adam_stats/v_t_mean": 6.2395431703332616e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.2527068853378296, + "all_logprobs/max": -1.6570091247558594e-05, + "all_logprobs/median": -0.294921875, + "all_logprobs/min": -11.375, + "all_logprobs/p1": -6.000312805175781, + "all_logprobs/p10": -3.078125, + "all_logprobs/p25": -2.375, + "all_logprobs/p5": -3.984375, + "all_logprobs/p75": -0.0146484375, + "all_logprobs/var": 2.31557297706604, + "clip_ratio": 0.0, + "completion_length": 992.7083740234375, + "completion_length/incorrect": 992.7083740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 6.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 30678.65234375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 6.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 30678.65234375, + "epoch": 0.0256, + "feature_vector_variance/max_squared_error": 188368.71875, + "feature_vector_variance/metric": 34984.23046875, + "generated_tokens/total": 1104114.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.4836107005503543e-05, + "loss": 0.0, + "mean_logprobs": -1.3359375, + "mean_logprobs/var": 1.2265625, + "num_completions/total": 1536, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 277.644775390625, + "per_token_feature_norm/max": 430.0, + "per_token_feature_norm/median": 274.0, + "per_token_feature_norm/min": 129.0, + "per_token_feature_norm/p25": 247.0, + "per_token_feature_norm/p75": 310.0, + "per_token_feature_norm/var": 2077.76318359375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.439473956823349, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.140625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0078125, + "per_token_policy_error_norm/p75": 0.9140625, + "per_token_policy_error_norm/var": 0.22007492184638977, + "policy_entropy": 1.2699778079986572, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 0.82421875, + "policy_entropy/min": 0.00022411346435546875, + "policy_entropy/p25": 0.0830078125, + "policy_entropy/p75": 2.390625, + "policy_entropy/var": 1.471320390701294, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.698192834854126, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.17392286658287048, + "policy_sharpness/min": 0.0178571417927742, + "policy_sharpness/p25": 0.10082481801509857, + "policy_sharpness/p75": 0.49687421321868896, + "policy_sharpness/var": 2.3443851470947266, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 16, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 6.856639811303467e-05, + "adam_stats/lm_head/lr_effective_mean": -1.3488826375307461e-11, + "adam_stats/lm_head/lr_effective_min": -6.963413761695847e-05, + "adam_stats/lm_head/lr_effective_std": 2.3541772407043027e-06, + "adam_stats/lr_effective_max": 7.054227899061516e-05, + "adam_stats/lr_effective_mean": -5.52499102024484e-10, + "adam_stats/lr_effective_min": -7.093289605109021e-05, + "adam_stats/m_t_max": 0.00771781150251627, + "adam_stats/m_t_mean": -8.157157241339696e-12, + "adam_stats/m_t_min": -0.007226669695228338, + "adam_stats/v_t_max": 2.875815516745206e-05, + "adam_stats/v_t_mean": 6.233303803671042e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.3409957885742188, + "all_logprobs/max": -5.4836273193359375e-06, + "all_logprobs/median": -0.498046875, + "all_logprobs/min": -18.625, + "all_logprobs/p1": -6.123750686645508, + "all_logprobs/p10": -3.1875, + "all_logprobs/p25": -2.421875, + "all_logprobs/p5": -4.25, + "all_logprobs/p75": -0.015869140625, + "all_logprobs/var": 2.4346866607666016, + "clip_ratio": 0.0, + "completion_length": 958.3854370117188, + "completion_length/incorrect": 958.3854370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 15.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 59016.95703125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 15.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 59016.95703125, + "epoch": 0.0272, + "feature_vector_variance/max_squared_error": 189933.546875, + "feature_vector_variance/metric": 36113.265625, + "generated_tokens/total": 1196119.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.4777217947069972e-05, + "loss": 0.0, + "mean_logprobs": -1.5, + "mean_logprobs/var": 1.3359375, + "num_completions/total": 1632, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 274.6009521484375, + "per_token_feature_norm/max": 432.0, + "per_token_feature_norm/median": 272.0, + "per_token_feature_norm/min": 126.5, + "per_token_feature_norm/p25": 241.0, + "per_token_feature_norm/p75": 306.0, + "per_token_feature_norm/var": 2090.13818359375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.4646243751049042, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.25, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0078125, + "per_token_policy_error_norm/p75": 0.921875, + "per_token_policy_error_norm/var": 0.2216111570596695, + "policy_entropy": 1.355431079864502, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 1.0703125, + "policy_entropy/min": 7.82012939453125e-05, + "policy_entropy/p25": 0.087890625, + "policy_entropy/p75": 2.453125, + "policy_entropy/var": 1.5166549682617188, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.837539553642273, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.15314719080924988, + "policy_sharpness/min": 0.020717287436127663, + "policy_sharpness/p25": 0.09867196530103683, + "policy_sharpness/p75": 0.49383223056793213, + "policy_sharpness/var": 3.337709903717041, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 17, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 6.145751831354573e-05, + "adam_stats/lm_head/lr_effective_mean": -1.2083783626493094e-11, + "adam_stats/lm_head/lr_effective_min": -6.241454684641212e-05, + "adam_stats/lm_head/lr_effective_std": 2.1098701381561114e-06, + "adam_stats/lr_effective_max": 6.322775152511895e-05, + "adam_stats/lr_effective_mean": -4.951982157663792e-10, + "adam_stats/lr_effective_min": -6.35785618214868e-05, + "adam_stats/m_t_max": 0.00694603007286787, + "adam_stats/m_t_mean": -7.341413935102459e-12, + "adam_stats/m_t_min": -0.006504002492874861, + "adam_stats/v_t_max": 2.8729396944982e-05, + "adam_stats/v_t_mean": 6.227070942221857e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -0.8785899877548218, + "all_logprobs/max": -8.344650268554688e-07, + "all_logprobs/median": -0.0341796875, + "all_logprobs/min": -11.0, + "all_logprobs/p1": -5.96875, + "all_logprobs/p10": -2.9375, + "all_logprobs/p25": -1.5546875, + "all_logprobs/p5": -3.90625, + "all_logprobs/p75": -0.005706787109375, + "all_logprobs/var": 2.108905792236328, + "clip_ratio": 0.0, + "completion_length": 1003.7604370117188, + "completion_length/incorrect": 1003.7604370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 33.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 19463.783203125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 33.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 19463.783203125, + "epoch": 0.0288, + "feature_vector_variance/max_squared_error": 211125.296875, + "feature_vector_variance/metric": 41662.94921875, + "generated_tokens/total": 1292480.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.4709462719537392e-05, + "loss": 0.0, + "mean_logprobs": -0.9296875, + "mean_logprobs/var": 0.77734375, + "num_completions/total": 1728, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 281.8272705078125, + "per_token_feature_norm/max": 434.0, + "per_token_feature_norm/median": 280.0, + "per_token_feature_norm/min": 115.5, + "per_token_feature_norm/p25": 242.0, + "per_token_feature_norm/p75": 320.0, + "per_token_feature_norm/var": 2479.136474609375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.3063848614692688, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.015625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0, + "per_token_policy_error_norm/p75": 0.7734375, + "per_token_policy_error_norm/var": 0.19530028104782104, + "policy_entropy": 0.89394611120224, + "policy_entropy/max": 3.890625, + "policy_entropy/median": 0.171875, + "policy_entropy/min": 1.3470649719238281e-05, + "policy_entropy/p25": 0.034912109375, + "policy_entropy/p75": 1.9453125, + "policy_entropy/var": 1.310990571975708, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 1.3478492498397827, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.2924269139766693, + "policy_sharpness/min": 0.014141658321022987, + "policy_sharpness/p25": 0.09979591518640518, + "policy_sharpness/p75": 1.2027064561843872, + "policy_sharpness/var": 5.954662322998047, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 18, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 5.505145963979885e-05, + "adam_stats/lm_head/lr_effective_mean": -1.0818500609244275e-11, + "adam_stats/lm_head/lr_effective_min": -5.590872387983836e-05, + "adam_stats/lm_head/lr_effective_std": 1.8897418385677156e-06, + "adam_stats/lr_effective_max": 5.6636468798387796e-05, + "adam_stats/lr_effective_mean": -4.435647682488053e-10, + "adam_stats/lr_effective_min": -5.69513431400992e-05, + "adam_stats/m_t_max": 0.006251426879316568, + "adam_stats/m_t_mean": -6.6073149555812005e-12, + "adam_stats/m_t_min": -0.005853602197021246, + "adam_stats/v_t_max": 2.8700667826342396e-05, + "adam_stats/v_t_mean": 6.2208424175813626e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.5370652675628662, + "all_logprobs/max": -0.00012874603271484375, + "all_logprobs/median": -1.09375, + "all_logprobs/min": -12.625, + "all_logprobs/p1": -6.1875, + "all_logprobs/p10": -3.53125, + "all_logprobs/p25": -2.625, + "all_logprobs/p5": -4.65625, + "all_logprobs/p75": -0.02197265625, + "all_logprobs/var": 2.6935675144195557, + "clip_ratio": 0.0, + "completion_length": 996.15625, + "completion_length/incorrect": 996.15625, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 41.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 24470.681640625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 41.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 24470.681640625, + "epoch": 0.0304, + "feature_vector_variance/max_squared_error": 180962.6875, + "feature_vector_variance/metric": 33304.35546875, + "generated_tokens/total": 1388111.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.4632923872213653e-05, + "loss": 0.0, + "mean_logprobs": -1.5859375, + "mean_logprobs/var": 1.265625, + "num_completions/total": 1824, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 271.7148132324219, + "per_token_feature_norm/max": 430.0, + "per_token_feature_norm/median": 268.0, + "per_token_feature_norm/min": 132.0, + "per_token_feature_norm/p25": 239.0, + "per_token_feature_norm/p75": 304.0, + "per_token_feature_norm/var": 1879.9178466796875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5097320675849915, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.5625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0078125, + "per_token_policy_error_norm/p75": 0.9375, + "per_token_policy_error_norm/var": 0.2234601080417633, + "policy_entropy": 1.531424880027771, + "policy_entropy/max": 3.90625, + "policy_entropy/median": 1.6875, + "policy_entropy/min": 0.00138092041015625, + "policy_entropy/p25": 0.11669921875, + "policy_entropy/p75": 2.65625, + "policy_entropy/var": 1.6979436874389648, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.5927935838699341, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1371345967054367, + "policy_sharpness/min": 0.014513804577291012, + "policy_sharpness/p25": 0.08911474049091339, + "policy_sharpness/p75": 0.4084489643573761, + "policy_sharpness/var": 1.9475643634796143, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 19, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 4.928235284751281e-05, + "adam_stats/lm_head/lr_effective_mean": -9.679552298580418e-12, + "adam_stats/lm_head/lr_effective_min": -5.0049777200911194e-05, + "adam_stats/lm_head/lr_effective_std": 1.6915232663450297e-06, + "adam_stats/lr_effective_max": 5.070062616141513e-05, + "adam_stats/lr_effective_mean": -3.970665463093326e-10, + "adam_stats/lr_effective_min": -5.0983060646103695e-05, + "adam_stats/m_t_max": 0.005626284051686525, + "adam_stats/m_t_mean": -5.946563987752063e-12, + "adam_stats/m_t_min": -0.005268241744488478, + "adam_stats/v_t_max": 2.867196781153325e-05, + "adam_stats/v_t_mean": 6.214622132877379e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.6397597789764404, + "all_logprobs/max": -1.6689300537109375e-06, + "all_logprobs/median": -2.0625, + "all_logprobs/min": -20.125, + "all_logprobs/p1": -6.25, + "all_logprobs/p10": -3.28125, + "all_logprobs/p25": -2.515625, + "all_logprobs/p5": -4.375, + "all_logprobs/p75": -0.044921875, + "all_logprobs/var": 2.3654098510742188, + "clip_ratio": 0.0, + "completion_length": 953.1979370117188, + "completion_length/incorrect": 953.1979370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 5.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 64593.36328125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 5.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 64593.36328125, + "epoch": 0.032, + "feature_vector_variance/max_squared_error": 193529.21875, + "feature_vector_variance/metric": 30730.400390625, + "generated_tokens/total": 1479618.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.4547694655894313e-05, + "loss": 0.0, + "mean_logprobs": -1.8203125, + "mean_logprobs/var": 1.3671875, + "num_completions/total": 1920, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 267.9793701171875, + "per_token_feature_norm/max": 424.0, + "per_token_feature_norm/median": 266.0, + "per_token_feature_norm/min": 131.0, + "per_token_feature_norm/p25": 239.0, + "per_token_feature_norm/p75": 296.0, + "per_token_feature_norm/var": 1889.36474609375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5720800757408142, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.859375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0234375, + "per_token_policy_error_norm/p75": 0.93359375, + "per_token_policy_error_norm/var": 0.20658697187900543, + "policy_entropy": 1.6234450340270996, + "policy_entropy/max": 3.828125, + "policy_entropy/median": 2.28125, + "policy_entropy/min": 2.47955322265625e-05, + "policy_entropy/p25": 0.2119140625, + "policy_entropy/p75": 2.4375, + "policy_entropy/var": 1.404934048652649, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4389694035053253, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.17760415375232697, + "policy_sharpness/min": 0.02490171231329441, + "policy_sharpness/p25": 0.10200873762369156, + "policy_sharpness/p75": 0.4397580325603485, + "policy_sharpness/var": 1.0039888620376587, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 20, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 4.409011307870969e-05, + "adam_stats/lm_head/lr_effective_mean": -8.655095737331031e-12, + "adam_stats/lm_head/lr_effective_min": -4.4776679715141654e-05, + "adam_stats/lm_head/lr_effective_std": 1.5131453210415202e-06, + "adam_stats/lr_effective_max": 4.535838888841681e-05, + "adam_stats/lr_effective_mean": -3.5521977026498064e-10, + "adam_stats/lr_effective_min": -4.561157766147517e-05, + "adam_stats/m_t_max": 0.005063655320554972, + "adam_stats/m_t_mean": -5.351891672888964e-12, + "adam_stats/m_t_min": -0.004741417244076729, + "adam_stats/v_t_max": 2.864329690055456e-05, + "adam_stats/v_t_mean": 6.208407919705561e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.7984647750854492, + "all_logprobs/max": -0.00018596649169921875, + "all_logprobs/median": -2.1875, + "all_logprobs/min": -15.6875, + "all_logprobs/p1": -6.3125, + "all_logprobs/p10": -3.296875, + "all_logprobs/p25": -2.546875, + "all_logprobs/p5": -4.53125, + "all_logprobs/p75": -0.0908203125, + "all_logprobs/var": 2.2711100578308105, + "clip_ratio": 0.0, + "completion_length": 875.7708740234375, + "completion_length/incorrect": 875.7708740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 17.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 120704.0859375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 17.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 120704.0859375, + "epoch": 0.0336, + "feature_vector_variance/max_squared_error": 174555.890625, + "feature_vector_variance/metric": 26784.072265625, + "generated_tokens/total": 1563692.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.4453878909250906e-05, + "loss": 0.0, + "mean_logprobs": -2.15625, + "mean_logprobs/var": 1.4765625, + "num_completions/total": 2016, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 269.62652587890625, + "per_token_feature_norm/max": 386.0, + "per_token_feature_norm/median": 270.0, + "per_token_feature_norm/min": 128.0, + "per_token_feature_norm/p25": 238.0, + "per_token_feature_norm/p75": 302.0, + "per_token_feature_norm/var": 2113.79931640625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6268874406814575, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.87890625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.04296875, + "per_token_policy_error_norm/p75": 0.94140625, + "per_token_policy_error_norm/var": 0.19114771485328674, + "policy_entropy": 1.7528033256530762, + "policy_entropy/max": 3.84375, + "policy_entropy/median": 2.3125, + "policy_entropy/min": 0.00183868408203125, + "policy_entropy/p25": 0.388671875, + "policy_entropy/p75": 2.421875, + "policy_entropy/var": 1.2698321342468262, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4258725941181183, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.2503829896450043, + "policy_sharpness/min": 0.019626349210739136, + "policy_sharpness/p25": 0.11740949004888535, + "policy_sharpness/p75": 0.4792306423187256, + "policy_sharpness/var": 0.8109359741210938, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 21, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 3.941997783840634e-05, + "adam_stats/lm_head/lr_effective_mean": -7.734160534234036e-12, + "adam_stats/lm_head/lr_effective_min": -4.0033814002526924e-05, + "adam_stats/lm_head/lr_effective_std": 1.3527225064535742e-06, + "adam_stats/lr_effective_max": 4.05534083256498e-05, + "adam_stats/lr_effective_mean": -3.175822937961925e-10, + "adam_stats/lr_effective_min": -4.0780225390335545e-05, + "adam_stats/m_t_max": 0.004557289648801088, + "adam_stats/m_t_mean": -4.8167160364431805e-12, + "adam_stats/m_t_min": -0.004267275333404541, + "adam_stats/v_t_max": 2.8614653274416924e-05, + "adam_stats/v_t_mean": 6.20219934438504e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.4796854257583618, + "all_logprobs/max": -0.000640869140625, + "all_logprobs/median": -1.59375, + "all_logprobs/min": -17.875, + "all_logprobs/p1": -6.34375, + "all_logprobs/p10": -3.0, + "all_logprobs/p25": -2.375, + "all_logprobs/p5": -4.125, + "all_logprobs/p75": -0.0301513671875, + "all_logprobs/var": 2.3094544410705566, + "clip_ratio": 0.0, + "completion_length": 901.75, + "completion_length/incorrect": 901.75, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 6.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 100047.0078125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 6.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 100047.0078125, + "epoch": 0.0352, + "feature_vector_variance/max_squared_error": 167320.3125, + "feature_vector_variance/metric": 28806.443359375, + "generated_tokens/total": 1650260.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.4351590932319506e-05, + "loss": 0.0, + "mean_logprobs": -1.796875, + "mean_logprobs/var": 1.7109375, + "num_completions/total": 2112, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 275.13897705078125, + "per_token_feature_norm/max": 384.0, + "per_token_feature_norm/median": 276.0, + "per_token_feature_norm/min": 127.0, + "per_token_feature_norm/p25": 248.0, + "per_token_feature_norm/p75": 308.0, + "per_token_feature_norm/var": 1915.307373046875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5296664237976074, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.7578125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.01953125, + "per_token_policy_error_norm/p75": 0.9140625, + "per_token_policy_error_norm/var": 0.20973776280879974, + "policy_entropy": 1.44556725025177, + "policy_entropy/max": 3.828125, + "policy_entropy/median": 2.0, + "policy_entropy/min": 0.004302978515625, + "policy_entropy/p25": 0.134765625, + "policy_entropy/p75": 2.359375, + "policy_entropy/var": 1.326259970664978, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.42370906472206116, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.2543642222881317, + "policy_sharpness/min": 0.02311674691736698, + "policy_sharpness/p25": 0.11666363477706909, + "policy_sharpness/p75": 0.46980518102645874, + "policy_sharpness/var": 0.5546246767044067, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 22, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 3.522207771311514e-05, + "adam_stats/lm_head/lr_effective_mean": -6.906821468921631e-12, + "adam_stats/lm_head/lr_effective_min": -3.5770543036051095e-05, + "adam_stats/lm_head/lr_effective_std": 1.2085376965842443e-06, + "adam_stats/lr_effective_max": 3.6234352592146024e-05, + "adam_stats/lr_effective_mean": -2.8375191085672213e-10, + "adam_stats/lr_effective_min": -3.6437424569157884e-05, + "adam_stats/m_t_max": 0.004101560451090336, + "adam_stats/m_t_mean": -4.335038838315652e-12, + "adam_stats/m_t_min": -0.003840547753497958, + "adam_stats/v_t_max": 2.8586038752109744e-05, + "adam_stats/v_t_mean": 6.195997707958423e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.6925466060638428, + "all_logprobs/max": -0.000579833984375, + "all_logprobs/median": -2.109375, + "all_logprobs/min": -14.8125, + "all_logprobs/p1": -6.363124847412109, + "all_logprobs/p10": -3.375, + "all_logprobs/p25": -2.515625, + "all_logprobs/p5": -4.5625, + "all_logprobs/p75": -0.0380859375, + "all_logprobs/var": 2.4367942810058594, + "clip_ratio": 0.0, + "completion_length": 832.6979370117188, + "completion_length/incorrect": 832.6979370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 3.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 149940.140625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 3.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 149940.140625, + "epoch": 0.0368, + "feature_vector_variance/max_squared_error": 120179.125, + "feature_vector_variance/metric": 29832.001953125, + "generated_tokens/total": 1730199.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.4240955347243754e-05, + "loss": 0.0, + "mean_logprobs": -2.234375, + "mean_logprobs/var": 2.0625, + "num_completions/total": 2208, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 266.5396728515625, + "per_token_feature_norm/max": 380.0, + "per_token_feature_norm/median": 270.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 235.0, + "per_token_feature_norm/p75": 302.0, + "per_token_feature_norm/var": 2285.095458984375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5863168835639954, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.8671875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.01953125, + "per_token_policy_error_norm/p75": 0.94140625, + "per_token_policy_error_norm/var": 0.20318154990673065, + "policy_entropy": 1.6269713640213013, + "policy_entropy/max": 3.84375, + "policy_entropy/median": 2.28125, + "policy_entropy/min": 0.00469970703125, + "policy_entropy/p25": 0.171875, + "policy_entropy/p75": 2.421875, + "policy_entropy/var": 1.3952224254608154, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.5735206604003906, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.2515025734901428, + "policy_sharpness/min": 0.021857744082808495, + "policy_sharpness/p25": 0.11253686249256134, + "policy_sharpness/p75": 0.49668505787849426, + "policy_sharpness/var": 1.2501083612442017, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 23, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 3.1451032555196434e-05, + "adam_stats/lm_head/lr_effective_mean": -6.164024152210956e-12, + "adam_stats/lm_head/lr_effective_min": -3.1940773624228314e-05, + "adam_stats/lm_head/lr_effective_std": 1.0790287205963978e-06, + "adam_stats/lr_effective_max": 3.2354520953958854e-05, + "adam_stats/lr_effective_mean": -2.533625809153506e-10, + "adam_stats/lr_effective_min": -3.253620525356382e-05, + "adam_stats/m_t_max": 0.0036914043594151735, + "adam_stats/m_t_mean": -3.9015127933916816e-12, + "adam_stats/m_t_min": -0.0034564929082989693, + "adam_stats/v_t_max": 2.855745333363302e-05, + "adam_stats/v_t_mean": 6.1898017093831026e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.6147871017456055, + "all_logprobs/max": -0.00030517578125, + "all_logprobs/median": -1.9453125, + "all_logprobs/min": -16.625, + "all_logprobs/p1": -6.4375, + "all_logprobs/p10": -3.265625, + "all_logprobs/p25": -2.46875, + "all_logprobs/p5": -4.5, + "all_logprobs/p75": -0.04736328125, + "all_logprobs/var": 2.442214250564575, + "clip_ratio": 0.0, + "completion_length": 902.0208740234375, + "completion_length/incorrect": 902.0208740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 9.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 98221.1171875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 9.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 98221.1171875, + "epoch": 0.0384, + "feature_vector_variance/max_squared_error": 150709.015625, + "feature_vector_variance/metric": 24508.302734375, + "generated_tokens/total": 1816793.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.4122106946441953e-05, + "loss": 0.0, + "mean_logprobs": -1.921875, + "mean_logprobs/var": 1.5390625, + "num_completions/total": 2304, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 264.44525146484375, + "per_token_feature_norm/max": 418.0, + "per_token_feature_norm/median": 268.0, + "per_token_feature_norm/min": 129.0, + "per_token_feature_norm/p25": 232.0, + "per_token_feature_norm/p75": 300.0, + "per_token_feature_norm/var": 2341.734130859375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5667506456375122, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.8359375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.02734375, + "per_token_policy_error_norm/p75": 0.93359375, + "per_token_policy_error_norm/var": 0.20872104167938232, + "policy_entropy": 1.5562694072723389, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.21875, + "policy_entropy/min": 0.0029449462890625, + "policy_entropy/p25": 0.2001953125, + "policy_entropy/p75": 2.390625, + "policy_entropy/var": 1.3184709548950195, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4084181785583496, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.22355489432811737, + "policy_sharpness/min": 0.019354742020368576, + "policy_sharpness/p25": 0.1128234714269638, + "policy_sharpness/p75": 0.45170390605926514, + "policy_sharpness/var": 0.5851565599441528, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 24, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.8065560400136746e-05, + "adam_stats/lm_head/lr_effective_mean": -5.497544260102405e-12, + "adam_stats/lm_head/lr_effective_min": -2.8502587156253867e-05, + "adam_stats/lm_head/lr_effective_std": 9.627748340790276e-07, + "adam_stats/lr_effective_max": 2.887143455154728e-05, + "adam_stats/lr_effective_mean": -2.2608169503168796e-10, + "adam_stats/lr_effective_min": -2.9033884857199155e-05, + "adam_stats/m_t_max": 0.0033222639467567205, + "adam_stats/m_t_mean": -3.511372399442325e-12, + "adam_stats/m_t_min": -0.003110843477770686, + "adam_stats/v_t_max": 2.8528897018986754e-05, + "adam_stats/v_t_mean": 6.183612216020817e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.586838960647583, + "all_logprobs/max": -0.00031280517578125, + "all_logprobs/median": -1.78125, + "all_logprobs/min": -15.0625, + "all_logprobs/p1": -6.46875, + "all_logprobs/p10": -3.421875, + "all_logprobs/p25": -2.453125, + "all_logprobs/p5": -4.8125, + "all_logprobs/p75": -0.0299072265625, + "all_logprobs/var": 2.6209418773651123, + "clip_ratio": 0.0, + "completion_length": 844.7083740234375, + "completion_length/incorrect": 844.7083740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 4.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 141407.375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 4.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 141407.375, + "epoch": 0.04, + "feature_vector_variance/max_squared_error": 175265.34375, + "feature_vector_variance/metric": 27399.98046875, + "generated_tokens/total": 1897885.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.3995190528383292e-05, + "loss": 0.0, + "mean_logprobs": -2.078125, + "mean_logprobs/var": 1.96875, + "num_completions/total": 2400, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 265.3020935058594, + "per_token_feature_norm/max": 386.0, + "per_token_feature_norm/median": 270.0, + "per_token_feature_norm/min": 126.5, + "per_token_feature_norm/p25": 234.0, + "per_token_feature_norm/p75": 300.0, + "per_token_feature_norm/var": 2341.552734375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.546040952205658, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.796875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.01953125, + "per_token_policy_error_norm/p75": 0.93359375, + "per_token_policy_error_norm/var": 0.21246472001075745, + "policy_entropy": 1.5018173456192017, + "policy_entropy/max": 3.84375, + "policy_entropy/median": 2.15625, + "policy_entropy/min": 0.002105712890625, + "policy_entropy/p25": 0.123046875, + "policy_entropy/p75": 2.375, + "policy_entropy/var": 1.4294644594192505, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.5589810013771057, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.24898460507392883, + "policy_sharpness/min": 0.022340185940265656, + "policy_sharpness/p25": 0.1107247918844223, + "policy_sharpness/p75": 0.4820491075515747, + "policy_sharpness/var": 1.406555414199829, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 25, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.03125, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.03059210442006588, + "adam_stats/lm_head/lr_effective_max": 4.559342414722778e-05, + "adam_stats/lm_head/lr_effective_mean": -1.1126289473084583e-10, + "adam_stats/lm_head/lr_effective_min": -4.641214036382735e-05, + "adam_stats/lm_head/lr_effective_std": 1.5176711940512178e-06, + "adam_stats/lr_effective_max": 4.721778168459423e-05, + "adam_stats/lr_effective_mean": -2.884773531164342e-10, + "adam_stats/lr_effective_min": -4.737763083539903e-05, + "adam_stats/m_t_max": 0.0029640975408256054, + "adam_stats/m_t_mean": -4.439384190119133e-12, + "adam_stats/m_t_min": -0.002834091428667307, + "adam_stats/v_t_max": 2.8500466214609332e-05, + "adam_stats/v_t_mean": 6.1785134300440525e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 3.1044087300813317e-09, + "advantages/max": 1.2073814868927002, + "advantages/median": 0.0, + "advantages/min": -0.7244288325309753, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.07365575432777405, + "all_logprobs": -1.4310084581375122, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.234375, + "all_logprobs/min": -16.875, + "all_logprobs/p1": -6.5625, + "all_logprobs/p10": -3.921875, + "all_logprobs/p25": -2.46875, + "all_logprobs/p5": -5.28125, + "all_logprobs/p75": -0.01483154296875, + "all_logprobs/var": 3.172882080078125, + "clip_ratio": 0.0, + "completion_length": 791.5729370117188, + "completion_length/correct": 778.6666870117188, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 661.0, + "completion_length/correct/min": 651.0, + "completion_length/correct/p25": 656.0, + "completion_length/correct/p75": 842.5, + "completion_length/correct/var": 45166.3359375, + "completion_length/incorrect": 791.9892578125, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 6.0, + "completion_length/incorrect/p25": 632.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 146975.765625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 6.0, + "completion_length/p25": 646.25, + "completion_length/p75": 1024.0, + "completion_length/var": 143290.71875, + "epoch": 0.0416, + "feature_vector_variance/max_squared_error": 177781.5625, + "feature_vector_variance/metric": 30884.884765625, + "generated_tokens/total": 1973876.0, + "global_fisher_curvature": 148480.0, + "global_fisher_curvature/max": 148480.0, + "global_fisher_curvature/median": 148480.0, + "global_fisher_curvature/min": 148480.0, + "global_fisher_curvature/p25": 148480.0, + "global_fisher_curvature/p75": 148480.0, + "global_fisher_curvature/p85": 148480.0, + "global_fisher_curvature/p90": 148480.0, + "global_fisher_curvature/p95": 148480.0, + "global_fisher_curvature/p99": 148480.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 1.4543533325195312e-05, + "global_fisher_kl_divergence/max": 1.4543533325195312e-05, + "global_fisher_kl_divergence/median": 1.4543533325195312e-05, + "global_fisher_kl_divergence/min": 1.4543533325195312e-05, + "global_fisher_kl_divergence/p25": 1.4543533325195312e-05, + "global_fisher_kl_divergence/p75": 1.4543533325195312e-05, + "global_fisher_kl_divergence/p85": 1.4543533325195312e-05, + "global_fisher_kl_divergence/p90": 1.4543533325195312e-05, + "global_fisher_kl_divergence/p95": 1.4543533325195312e-05, + "global_fisher_kl_divergence/p99": 1.4543533325195312e-05, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 2.3125, + "global_full_update_term/max": 2.3125, + "global_full_update_term/median": 2.3125, + "global_full_update_term/min": 2.3125, + "global_full_update_term/p25": 2.3125, + "global_full_update_term/p75": 2.3125, + "global_full_update_term/p85": 2.3125, + "global_full_update_term/p90": 2.3125, + "global_full_update_term/p95": 2.3125, + "global_full_update_term/p99": 2.3125, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 145408.0, + "global_hessian_coeff/max": 145408.0, + "global_hessian_coeff/median": 145408.0, + "global_hessian_coeff/min": 145408.0, + "global_hessian_coeff/p25": 145408.0, + "global_hessian_coeff/p75": 145408.0, + "global_hessian_coeff/p99": 145408.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 145408.0, + "global_hessian_coeff_abs/max": 145408.0, + "global_hessian_coeff_abs/median": 145408.0, + "global_hessian_coeff_abs/min": 145408.0, + "global_hessian_coeff_abs/p25": 145408.0, + "global_hessian_coeff_abs/p75": 145408.0, + "global_hessian_coeff_abs/p99": 145408.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.15249939262866974, + "grouped_std_rewards": 0.043129097670316696, + "learning_rate": 1.3860360721173195e-05, + "loss": -0.0, + "mean_logprobs": -2.046875, + "mean_logprobs/var": 2.828125, + "num_completions/total": 2496, + "per_sentence_gradient_norm": 16.05208396911621, + "per_sentence_gradient_norm/max": 624.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 5187.14404296875, + "per_token_feature_norm": 255.04762268066406, + "per_token_feature_norm/max": 424.0, + "per_token_feature_norm/median": 254.0, + "per_token_feature_norm/min": 118.0, + "per_token_feature_norm/p25": 222.0, + "per_token_feature_norm/p75": 290.0, + "per_token_feature_norm/var": 2458.46875, + "per_token_gradient_norm": 3.0647668838500977, + "per_token_gradient_norm/max": 628.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 699.75732421875, + "per_token_policy_error_norm": 0.45565173029899597, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.13671875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0078125, + "per_token_policy_error_norm/p75": 0.9375, + "per_token_policy_error_norm/var": 0.22560441493988037, + "policy_entropy": 1.3038935661315918, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 0.671875, + "policy_entropy/min": 2.0372681319713593e-08, + "policy_entropy/p25": 0.0703125, + "policy_entropy/p75": 2.375, + "policy_entropy/var": 1.6554737091064453, + "policy_loss": -6.208817349140361e-10, + "policy_loss/max": 0.7244288325309753, + "policy_loss/median": 0.0, + "policy_loss/min": -1.2073814868927002, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.07365573942661285, + "policy_sharpness": 0.8991132974624634, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.2859863340854645, + "policy_sharpness/min": 0.02221488580107689, + "policy_sharpness/p25": 0.10617081820964813, + "policy_sharpness/p75": 0.5295755863189697, + "policy_sharpness/var": 4.012679576873779, + "reward": 0.03125, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.03059210442006588, + "rewards/accuracy_reward": 0.03125, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.03059210442006588, + "sentence_fisher_curvature": 517077.34375, + "sentence_fisher_curvature/max": 34340864.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 1794048.0, + "sentence_fisher_curvature/p99": 6168666.5, + "sentence_fisher_curvature/var": 12600079286272.0, + "sentence_fisher_kl_divergence": 5.0569575250847265e-05, + "sentence_fisher_kl_divergence/max": 0.00335693359375, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.00017595291137695312, + "sentence_fisher_kl_divergence/p99": 0.0006045430200174451, + "sentence_fisher_kl_divergence/var": 1.2041773800319788e-07, + "sentence_full_gradient_variance/max_squared_error": 5335.53125, + "sentence_full_gradient_variance/metric": 5335.53125, + "sentence_full_gradient_variance/p75": 5335.53125, + "sentence_full_gradient_variance/p90": 5335.53125, + "sentence_full_gradient_variance/p95": 5335.53125, + "sentence_full_gradient_variance/p99": 5335.53125, + "sentence_full_update_term": 0.071136474609375, + "sentence_full_update_term/max": 5.09375, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.180419921875, + "sentence_full_update_term/p99": 0.724135160446167, + "sentence_full_update_term/var": 0.2737245261669159, + "sentence_hessian_coeff": 348928.0, + "sentence_hessian_coeff/max": 35913728.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -2179072.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 3500136.0, + "sentence_hessian_coeff/var": 13585783717888.0, + "sentence_hessian_coeff_abs": 461056.0, + "sentence_hessian_coeff_abs/max": 35913728.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 3865907.75, + "sentence_hessian_coeff_abs/var": 13494007103488.0, + "step": 26, + "token_fisher_curvature": 69924880.0, + "token_fisher_curvature/max": 106300440576.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 4.016328603029251e-09, + "token_fisher_curvature/p99": 264454144.0, + "token_fisher_curvature/var": 1.6546118928087122e+18, + "token_fisher_kl_divergence": 0.006846804171800613, + "token_fisher_kl_divergence/max": 10.4375, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 3.9387032047324966e-19, + "token_fisher_kl_divergence/p99": 0.025903701782226562, + "token_fisher_kl_divergence/var": 0.01586279273033142, + "token_full_update_term": 0.012503932230174541, + "token_full_update_term/max": 17.375, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": -0.53125, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 7.712515071034431e-10, + "token_full_update_term/p99": 0.171875, + "token_full_update_term/var": 0.05291706696152687, + "token_hessian_coeff": 26330140.0, + "token_hessian_coeff/max": 121332826112.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -24561844224.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 3032704.0, + "token_hessian_coeff/var": 1.7287793122213888e+18, + "token_hessian_coeff_abs": 57162792.0, + "token_hessian_coeff_abs/max": 121332826112.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 130547712.0, + "token_hessian_coeff_abs/var": 1.7262049431839048e+18 + }, + { + "accuracy_reward": 0.02083333395421505, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.020614033564925194, + "adam_stats/lm_head/lr_effective_max": 5.824910113005899e-05, + "adam_stats/lm_head/lr_effective_mean": -2.4545832033595616e-10, + "adam_stats/lm_head/lr_effective_min": -5.8388908655615523e-05, + "adam_stats/lm_head/lr_effective_std": 1.5846916312511894e-06, + "adam_stats/lr_effective_max": 5.963819785392843e-05, + "adam_stats/lr_effective_mean": -8.79371367390469e-11, + "adam_stats/lr_effective_min": -5.875889473827556e-05, + "adam_stats/m_t_max": 0.002670524874702096, + "adam_stats/m_t_mean": -7.683455651233406e-13, + "adam_stats/m_t_min": -0.002572998171672225, + "adam_stats/v_t_max": 2.8472006306401454e-05, + "adam_stats/v_t_mean": 6.173439797557689e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 2.4835269396561444e-09, + "advantages/max": 1.6198352575302124, + "advantages/median": 0.0, + "advantages/min": -0.5399450659751892, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.07365238666534424, + "all_logprobs": -1.2486512660980225, + "all_logprobs/max": 0.0, + "all_logprobs/median": -0.146484375, + "all_logprobs/min": -12.8125, + "all_logprobs/p1": -6.46875, + "all_logprobs/p10": -3.40625, + "all_logprobs/p25": -2.34375, + "all_logprobs/p5": -4.71875, + "all_logprobs/p75": -0.0133056640625, + "all_logprobs/var": 2.7421720027923584, + "clip_ratio": 0.0, + "completion_length": 867.7396240234375, + "completion_length/correct": 664.5, + "completion_length/correct/max": 704.0, + "completion_length/correct/median": 625.0, + "completion_length/correct/min": 625.0, + "completion_length/correct/p25": 644.75, + "completion_length/correct/p75": 684.25, + "completion_length/correct/var": 3120.5, + "completion_length/incorrect": 872.0637817382812, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 3.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 110191.1328125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 3.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 108792.25, + "epoch": 0.0432, + "feature_vector_variance/max_squared_error": 196989.75, + "feature_vector_variance/metric": 36917.10546875, + "generated_tokens/total": 2057179.0, + "global_fisher_curvature": 25984.0, + "global_fisher_curvature/max": 25984.0, + "global_fisher_curvature/median": 25984.0, + "global_fisher_curvature/min": 25984.0, + "global_fisher_curvature/p25": 25984.0, + "global_fisher_curvature/p75": 25984.0, + "global_fisher_curvature/p85": 25984.0, + "global_fisher_curvature/p90": 25984.0, + "global_fisher_curvature/p95": 25984.0, + "global_fisher_curvature/p99": 25984.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 2.4884939193725586e-06, + "global_fisher_kl_divergence/max": 2.4884939193725586e-06, + "global_fisher_kl_divergence/median": 2.4884939193725586e-06, + "global_fisher_kl_divergence/min": 2.4884939193725586e-06, + "global_fisher_kl_divergence/p25": 2.4884939193725586e-06, + "global_fisher_kl_divergence/p75": 2.4884939193725586e-06, + "global_fisher_kl_divergence/p85": 2.4884939193725586e-06, + "global_fisher_kl_divergence/p90": 2.4884939193725586e-06, + "global_fisher_kl_divergence/p95": 2.4884939193725586e-06, + "global_fisher_kl_divergence/p99": 2.4884939193725586e-06, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.26171875, + "global_full_update_term/max": 0.26171875, + "global_full_update_term/median": 0.26171875, + "global_full_update_term/min": 0.26171875, + "global_full_update_term/p25": 0.26171875, + "global_full_update_term/p75": 0.26171875, + "global_full_update_term/p85": 0.26171875, + "global_full_update_term/p90": 0.26171875, + "global_full_update_term/p95": 0.26171875, + "global_full_update_term/p99": 0.26171875, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 25856.0, + "global_hessian_coeff/max": 25856.0, + "global_hessian_coeff/median": 25856.0, + "global_hessian_coeff/min": 25856.0, + "global_hessian_coeff/p25": 25856.0, + "global_hessian_coeff/p75": 25856.0, + "global_hessian_coeff/p99": 25856.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 25856.0, + "global_hessian_coeff_abs/max": 25856.0, + "global_hessian_coeff_abs/median": 25856.0, + "global_hessian_coeff_abs/min": 25856.0, + "global_hessian_coeff_abs/p25": 25856.0, + "global_hessian_coeff_abs/p75": 25856.0, + "global_hessian_coeff_abs/p99": 25856.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.16821041703224182, + "grouped_std_rewards": 0.03857583925127983, + "learning_rate": 1.3717781794162813e-05, + "loss": -0.0, + "mean_logprobs": -1.5625, + "mean_logprobs/var": 1.8203125, + "num_completions/total": 2592, + "per_sentence_gradient_norm": 17.260417938232422, + "per_sentence_gradient_norm/max": 584.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 5886.69482421875, + "per_token_feature_norm": 264.0330505371094, + "per_token_feature_norm/max": 434.0, + "per_token_feature_norm/median": 266.0, + "per_token_feature_norm/min": 123.5, + "per_token_feature_norm/p25": 228.0, + "per_token_feature_norm/p75": 298.0, + "per_token_feature_norm/var": 2361.948974609375, + "per_token_gradient_norm": 2.01525616645813, + "per_token_gradient_norm/max": 784.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 537.433837890625, + "per_token_policy_error_norm": 0.42030370235443115, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.08203125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0078125, + "per_token_policy_error_norm/p75": 0.9140625, + "per_token_policy_error_norm/var": 0.22262750566005707, + "policy_entropy": 1.1750280857086182, + "policy_entropy/max": 3.84375, + "policy_entropy/median": 0.49609375, + "policy_entropy/min": 4.400499165058136e-08, + "policy_entropy/p25": 0.0712890625, + "policy_entropy/p75": 2.34375, + "policy_entropy/var": 1.4554766416549683, + "policy_loss": -2.4835269396561444e-09, + "policy_loss/max": 0.5399450659751892, + "policy_loss/median": 0.0, + "policy_loss/min": -1.6198352575302124, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.07365238666534424, + "policy_sharpness": 1.069507122039795, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.24115954339504242, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 0.10595925152301788, + "policy_sharpness/p75": 0.5510541796684265, + "policy_sharpness/var": 5.152231693267822, + "reward": 0.02083333395421505, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.020614033564925194, + "rewards/accuracy_reward": 0.02083333395421505, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.020614033564925194, + "sentence_fisher_curvature": 739498.6875, + "sentence_fisher_curvature/max": 49545216.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 1337344.0, + "sentence_fisher_curvature/p99": 13870809.0, + "sentence_fisher_curvature/var": 27052308692992.0, + "sentence_fisher_kl_divergence": 7.105618715286255e-05, + "sentence_fisher_kl_divergence/max": 0.0047607421875, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0001285076141357422, + "sentence_fisher_kl_divergence/p99": 0.0013324847677722573, + "sentence_fisher_kl_divergence/var": 2.4976563395284757e-07, + "sentence_full_gradient_variance/max_squared_error": 6065.046875, + "sentence_full_gradient_variance/metric": 6065.046875, + "sentence_full_gradient_variance/p75": 6065.046875, + "sentence_full_gradient_variance/p90": 6065.046875, + "sentence_full_gradient_variance/p95": 6065.046875, + "sentence_full_gradient_variance/p99": 6065.046875, + "sentence_full_update_term": 0.0823618620634079, + "sentence_full_update_term/max": 4.53125, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.1383056640625, + "sentence_full_update_term/p99": 2.3343820571899414, + "sentence_full_update_term/var": 0.2647559642791748, + "sentence_hessian_coeff": 879242.6875, + "sentence_hessian_coeff/max": 71303168.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -1810432.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 19441420.0, + "sentence_hessian_coeff/var": 55718891749376.0, + "sentence_hessian_coeff_abs": 954400.0, + "sentence_hessian_coeff_abs/max": 71303168.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 19441420.0, + "sentence_hessian_coeff_abs/var": 55579628273664.0, + "step": 27, + "token_fisher_curvature": 52504896.0, + "token_fisher_curvature/max": 144955146240.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 4.722716900711077e-17, + "token_fisher_curvature/p99": 14745600.0, + "token_fisher_curvature/var": 2.1238999482179256e+18, + "token_fisher_kl_divergence": 0.005045217461884022, + "token_fisher_kl_divergence/max": 13.9375, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 4.5387112181890914e-27, + "token_fisher_kl_divergence/p99": 0.0014190673828125, + "token_fisher_kl_divergence/var": 0.01961742714047432, + "token_full_update_term": 0.011811111122369766, + "token_full_update_term/max": 31.25, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 7.056508155578456e-14, + "token_full_update_term/p99": 0.03759002685546875, + "token_full_update_term/var": 0.10942334681749344, + "token_hessian_coeff": 44840456.0, + "token_hessian_coeff/max": 236223201280.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -8724152320.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 56284.0, + "token_hessian_coeff/var": 4.2484505324415877e+18, + "token_hessian_coeff_abs": 56897696.0, + "token_hessian_coeff_abs/max": 236223201280.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 15988736.0, + "token_hessian_coeff_abs/var": 4.2472234774649897e+18 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 5.187563874642365e-05, + "adam_stats/lm_head/lr_effective_mean": -2.1859353216413524e-10, + "adam_stats/lm_head/lr_effective_min": -5.2000454161316156e-05, + "adam_stats/lm_head/lr_effective_std": 1.4112063126958674e-06, + "adam_stats/lr_effective_max": 5.31129480805248e-05, + "adam_stats/lr_effective_mean": -7.833134835211197e-11, + "adam_stats/lr_effective_min": -5.232985131442547e-05, + "adam_stats/m_t_max": 0.0024034723173826933, + "adam_stats/m_t_mean": -6.915198123326471e-13, + "adam_stats/m_t_min": -0.002315698191523552, + "adam_stats/v_t_max": 2.8443535484257154e-05, + "adam_stats/v_t_mean": 6.167265916706688e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.8164012432098389, + "all_logprobs/max": -4.267692565917969e-05, + "all_logprobs/median": -2.171875, + "all_logprobs/min": -15.625, + "all_logprobs/p1": -6.5, + "all_logprobs/p10": -3.5625, + "all_logprobs/p25": -2.59375, + "all_logprobs/p5": -4.90625, + "all_logprobs/p75": -0.09765625, + "all_logprobs/var": 2.5308165550231934, + "clip_ratio": 0.0, + "completion_length": 833.9375, + "completion_length/incorrect": 833.9375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 2.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 142687.515625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 2.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 142687.515625, + "epoch": 0.0448, + "feature_vector_variance/max_squared_error": 141088.75, + "feature_vector_variance/metric": 28157.666015625, + "generated_tokens/total": 2137237.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.3567627457812107e-05, + "loss": 0.0, + "mean_logprobs": -2.328125, + "mean_logprobs/var": 1.90625, + "num_completions/total": 2688, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 264.87890625, + "per_token_feature_norm/max": 384.0, + "per_token_feature_norm/median": 266.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 230.0, + "per_token_feature_norm/p75": 300.0, + "per_token_feature_norm/var": 2366.580322265625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6225817203521729, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.046875, + "per_token_policy_error_norm/p75": 0.95703125, + "per_token_policy_error_norm/var": 0.1970088928937912, + "policy_entropy": 1.7244960069656372, + "policy_entropy/max": 3.828125, + "policy_entropy/median": 2.296875, + "policy_entropy/min": 0.0004215240478515625, + "policy_entropy/p25": 0.37890625, + "policy_entropy/p75": 2.4375, + "policy_entropy/var": 1.2779737710952759, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3792925477027893, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1597912758588791, + "policy_sharpness/min": 0.022608043625950813, + "policy_sharpness/p25": 0.10668536275625229, + "policy_sharpness/p75": 0.39865103363990784, + "policy_sharpness/var": 0.91048663854599, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 28, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 4.616843943949789e-05, + "adam_stats/lm_head/lr_effective_mean": -1.9453792743373555e-10, + "adam_stats/lm_head/lr_effective_min": -4.627979433280416e-05, + "adam_stats/lm_head/lr_effective_std": 1.2558672324303188e-06, + "adam_stats/lr_effective_max": 4.726980114355683e-05, + "adam_stats/lr_effective_mean": -6.972786931180863e-11, + "adam_stats/lr_effective_min": -4.657285535358824e-05, + "adam_stats/m_t_max": 0.002163124969229102, + "adam_stats/m_t_mean": -6.223744393116237e-13, + "adam_stats/m_t_min": -0.002084128325805068, + "adam_stats/v_t_max": 2.8415091946953908e-05, + "adam_stats/v_t_mean": 6.161098541068721e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.6574817895889282, + "all_logprobs/max": -0.0004215240478515625, + "all_logprobs/median": -1.5390625, + "all_logprobs/min": -12.0, + "all_logprobs/p1": -6.65625, + "all_logprobs/p10": -3.921875, + "all_logprobs/p25": -2.640625, + "all_logprobs/p5": -5.25, + "all_logprobs/p75": -0.0262451171875, + "all_logprobs/var": 3.053356647491455, + "clip_ratio": 0.0, + "completion_length": 801.1979370117188, + "completion_length/incorrect": 801.1979370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 14.0, + "completion_length/incorrect/p25": 884.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 153637.734375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 14.0, + "completion_length/p25": 884.75, + "completion_length/p75": 1024.0, + "completion_length/var": 153637.734375, + "epoch": 0.0464, + "feature_vector_variance/max_squared_error": 153181.609375, + "feature_vector_variance/metric": 25707.736328125, + "generated_tokens/total": 2214152.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.3410080652050414e-05, + "loss": 0.0, + "mean_logprobs": -2.203125, + "mean_logprobs/var": 1.9921875, + "num_completions/total": 2784, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 248.56222534179688, + "per_token_feature_norm/max": 390.0, + "per_token_feature_norm/median": 250.0, + "per_token_feature_norm/min": 125.0, + "per_token_feature_norm/p25": 214.0, + "per_token_feature_norm/p75": 284.0, + "per_token_feature_norm/var": 2556.31494140625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5418523550033569, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.734375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.015625, + "per_token_policy_error_norm/p75": 0.9609375, + "per_token_policy_error_norm/var": 0.2198379635810852, + "policy_entropy": 1.5294294357299805, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.015625, + "policy_entropy/min": 0.00396728515625, + "policy_entropy/p25": 0.11669921875, + "policy_entropy/p75": 2.46875, + "policy_entropy/var": 1.5637574195861816, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.39806538820266724, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.16622881591320038, + "policy_sharpness/min": 0.018811525776982307, + "policy_sharpness/p25": 0.10004688799381256, + "policy_sharpness/p75": 0.4306700825691223, + "policy_sharpness/var": 0.5393247604370117, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 29, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 4.106113192392513e-05, + "adam_stats/lm_head/lr_effective_mean": -1.7301153565352223e-10, + "adam_stats/lm_head/lr_effective_min": -4.116041600354947e-05, + "adam_stats/lm_head/lr_effective_std": 1.1168656328663928e-06, + "adam_stats/lr_effective_max": 4.204081051284447e-05, + "adam_stats/lr_effective_mean": -6.202703628499506e-11, + "adam_stats/lr_effective_min": -4.142096804571338e-05, + "adam_stats/m_t_max": 0.0019468123791739345, + "adam_stats/m_t_mean": -5.601351088686812e-13, + "adam_stats/m_t_min": -0.0018757154466584325, + "adam_stats/v_t_max": 2.8386677513481118e-05, + "adam_stats/v_t_mean": 6.15493767064379e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.6603697538375854, + "all_logprobs/max": -0.0004863739013671875, + "all_logprobs/median": -1.703125, + "all_logprobs/min": -18.0, + "all_logprobs/p1": -6.53125, + "all_logprobs/p10": -3.796875, + "all_logprobs/p25": -2.53125, + "all_logprobs/p5": -5.21875, + "all_logprobs/p75": -0.041259765625, + "all_logprobs/var": 2.888789653778076, + "clip_ratio": 0.0, + "completion_length": 780.9479370117188, + "completion_length/incorrect": 780.9479370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 9.0, + "completion_length/incorrect/p25": 382.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 167192.109375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 9.0, + "completion_length/p25": 382.0, + "completion_length/p75": 1024.0, + "completion_length/var": 167192.109375, + "epoch": 0.048, + "feature_vector_variance/max_squared_error": 174680.28125, + "feature_vector_variance/metric": 24293.41015625, + "generated_tokens/total": 2289123.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.3245333323392335e-05, + "loss": 0.0, + "mean_logprobs": -2.3125, + "mean_logprobs/var": 2.21875, + "num_completions/total": 2880, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 258.5142517089844, + "per_token_feature_norm/max": 384.0, + "per_token_feature_norm/median": 262.0, + "per_token_feature_norm/min": 128.0, + "per_token_feature_norm/p25": 224.0, + "per_token_feature_norm/p75": 294.0, + "per_token_feature_norm/var": 2468.549072265625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5576004981994629, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.78125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0234375, + "per_token_policy_error_norm/p75": 0.953125, + "per_token_policy_error_norm/var": 0.2125345915555954, + "policy_entropy": 1.5371278524398804, + "policy_entropy/max": 3.84375, + "policy_entropy/median": 2.0625, + "policy_entropy/min": 0.0038299560546875, + "policy_entropy/p25": 0.1669921875, + "policy_entropy/p75": 2.390625, + "policy_entropy/var": 1.440266728401184, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4431166648864746, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.25019776821136475, + "policy_sharpness/min": 0.021503599360585213, + "policy_sharpness/p25": 0.10896684229373932, + "policy_sharpness/p75": 0.4712221026420593, + "policy_sharpness/var": 0.7883291840553284, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 30, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 3.6493624065769836e-05, + "adam_stats/lm_head/lr_effective_mean": -1.5376115658494172e-10, + "adam_stats/lm_head/lr_effective_min": -3.658208515844308e-05, + "adam_stats/lm_head/lr_effective_std": 9.925638551067095e-07, + "adam_stats/lr_effective_max": 3.7364465242717415e-05, + "adam_stats/lr_effective_mean": -5.513864365092047e-11, + "adam_stats/lr_effective_min": -3.681357338791713e-05, + "adam_stats/m_t_max": 0.0017521311528980732, + "adam_stats/m_t_mean": -5.041135152546172e-13, + "adam_stats/m_t_min": -0.001688143820501864, + "adam_stats/v_t_max": 2.835829036484938e-05, + "adam_stats/v_t_mean": 6.148783305431893e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.9244521856307983, + "all_logprobs/max": -0.00030517578125, + "all_logprobs/median": -2.171875, + "all_logprobs/min": -17.375, + "all_logprobs/p1": -6.5625, + "all_logprobs/p10": -4.15625, + "all_logprobs/p25": -2.734375, + "all_logprobs/p5": -5.4375, + "all_logprobs/p75": -0.0830078125, + "all_logprobs/var": 2.9366095066070557, + "clip_ratio": 0.0, + "completion_length": 846.90625, + "completion_length/incorrect": 846.90625, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 31.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 123708.859375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 31.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 123708.859375, + "epoch": 0.0496, + "feature_vector_variance/max_squared_error": 189328.15625, + "feature_vector_variance/metric": 26375.05078125, + "generated_tokens/total": 2370426.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.3073586191080456e-05, + "loss": 0.0, + "mean_logprobs": -2.390625, + "mean_logprobs/var": 1.8984375, + "num_completions/total": 2976, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 256.7282409667969, + "per_token_feature_norm/max": 388.0, + "per_token_feature_norm/median": 258.0, + "per_token_feature_norm/min": 129.0, + "per_token_feature_norm/p25": 222.0, + "per_token_feature_norm/p75": 294.0, + "per_token_feature_norm/var": 2471.752197265625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6291661262512207, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.87890625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.04296875, + "per_token_policy_error_norm/p75": 0.9765625, + "per_token_policy_error_norm/var": 0.2004539519548416, + "policy_entropy": 1.7696256637573242, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.3125, + "policy_entropy/min": 0.002593994140625, + "policy_entropy/p25": 0.328125, + "policy_entropy/p75": 2.515625, + "policy_entropy/var": 1.3966423273086548, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.39935505390167236, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.17988459765911102, + "policy_sharpness/min": 0.017947928979992867, + "policy_sharpness/p25": 0.10488070547580719, + "policy_sharpness/p75": 0.43918782472610474, + "policy_sharpness/var": 0.7750334143638611, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 31, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 3.2411524443887174e-05, + "adam_stats/lm_head/lr_effective_mean": -1.3655705732862344e-10, + "adam_stats/lm_head/lr_effective_min": -3.2490279409103096e-05, + "adam_stats/lm_head/lr_effective_std": 8.814798775347299e-07, + "adam_stats/lr_effective_max": 3.3185082429554313e-05, + "adam_stats/lr_effective_mean": -4.898089919769255e-11, + "adam_stats/lr_effective_min": -3.2695814297767356e-05, + "adam_stats/m_t_max": 0.0015769179444760084, + "adam_stats/m_t_mean": -4.537082786294083e-13, + "adam_stats/m_t_min": -0.0015193293802440166, + "adam_stats/v_t_max": 2.83299323200481e-05, + "adam_stats/v_t_mean": 6.142635011752162e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.7044925689697266, + "all_logprobs/max": -0.000431060791015625, + "all_logprobs/median": -1.8203125, + "all_logprobs/min": -17.75, + "all_logprobs/p1": -6.625, + "all_logprobs/p10": -3.921875, + "all_logprobs/p25": -2.59375, + "all_logprobs/p5": -5.28125, + "all_logprobs/p75": -0.05126953125, + "all_logprobs/var": 2.9818506240844727, + "clip_ratio": 0.0, + "completion_length": 814.4166870117188, + "completion_length/incorrect": 814.4166870117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 38.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 149404.6875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 38.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 149404.6875, + "epoch": 0.0512, + "feature_vector_variance/max_squared_error": 127244.890625, + "feature_vector_variance/metric": 27139.1640625, + "generated_tokens/total": 2448610.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.2895048502539883e-05, + "loss": 0.0, + "mean_logprobs": -2.25, + "mean_logprobs/var": 2.03125, + "num_completions/total": 3072, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 261.66259765625, + "per_token_feature_norm/max": 386.0, + "per_token_feature_norm/median": 266.0, + "per_token_feature_norm/min": 129.0, + "per_token_feature_norm/p25": 226.0, + "per_token_feature_norm/p75": 300.0, + "per_token_feature_norm/var": 2474.680908203125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5668473839759827, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.8125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.03125, + "per_token_policy_error_norm/p75": 0.95703125, + "per_token_policy_error_norm/var": 0.21367092430591583, + "policy_entropy": 1.5704575777053833, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.15625, + "policy_entropy/min": 0.003997802734375, + "policy_entropy/p25": 0.2109375, + "policy_entropy/p75": 2.421875, + "policy_entropy/var": 1.444701910018921, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.43567606806755066, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.19831494987010956, + "policy_sharpness/min": 0.019131150096654892, + "policy_sharpness/p25": 0.10443872958421707, + "policy_sharpness/p75": 0.43159377574920654, + "policy_sharpness/var": 0.8848931789398193, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 32, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.8765643946826458e-05, + "adam_stats/lm_head/lr_effective_mean": -1.2119204251259674e-10, + "adam_stats/lm_head/lr_effective_min": -2.8835711418651044e-05, + "adam_stats/lm_head/lr_effective_std": 7.822733323337161e-07, + "adam_stats/lr_effective_max": 2.9452296075760387e-05, + "adam_stats/lr_effective_mean": -4.348008411647619e-11, + "adam_stats/lr_effective_min": -2.9018065106356516e-05, + "adam_stats/m_t_max": 0.0014192260568961501, + "adam_stats/m_t_mean": -4.0833555612317107e-13, + "adam_stats/m_t_min": -0.001367396442219615, + "adam_stats/v_t_max": 2.8301603379077278e-05, + "adam_stats/v_t_mean": 6.136491054881121e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.7948099374771118, + "all_logprobs/max": -0.00083160400390625, + "all_logprobs/median": -1.9921875, + "all_logprobs/min": -18.75, + "all_logprobs/p1": -6.59375, + "all_logprobs/p10": -4.09375, + "all_logprobs/p25": -2.71875, + "all_logprobs/p5": -5.40625, + "all_logprobs/p75": -0.046142578125, + "all_logprobs/var": 3.087768077850342, + "clip_ratio": 0.0, + "completion_length": 841.2291870117188, + "completion_length/incorrect": 841.2291870117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 8.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 132477.5625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 8.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 132477.5625, + "epoch": 0.0528, + "feature_vector_variance/max_squared_error": 130036.703125, + "feature_vector_variance/metric": 26523.44921875, + "generated_tokens/total": 2529368.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.270993777844248e-05, + "loss": 0.0, + "mean_logprobs": -2.25, + "mean_logprobs/var": 1.8359375, + "num_completions/total": 3168, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 259.54119873046875, + "per_token_feature_norm/max": 382.0, + "per_token_feature_norm/median": 262.0, + "per_token_feature_norm/min": 128.0, + "per_token_feature_norm/p25": 225.0, + "per_token_feature_norm/p75": 296.0, + "per_token_feature_norm/var": 2440.874755859375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5854425430297852, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.84375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.03125, + "per_token_policy_error_norm/p75": 0.97265625, + "per_token_policy_error_norm/var": 0.20988313853740692, + "policy_entropy": 1.6425113677978516, + "policy_entropy/max": 3.84375, + "policy_entropy/median": 2.25, + "policy_entropy/min": 0.004150390625, + "policy_entropy/p25": 0.173828125, + "policy_entropy/p75": 2.5, + "policy_entropy/var": 1.5130047798156738, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3647448420524597, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.14325480163097382, + "policy_sharpness/min": 0.020889103412628174, + "policy_sharpness/p25": 0.09732583910226822, + "policy_sharpness/p75": 0.3956434726715088, + "policy_sharpness/var": 0.4848918318748474, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 33, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.5511530111543834e-05, + "adam_stats/lm_head/lr_effective_mean": -1.0747852607906339e-10, + "adam_stats/lm_head/lr_effective_min": -2.5573823222657666e-05, + "adam_stats/lm_head/lr_effective_std": 6.937330567780009e-07, + "adam_stats/lr_effective_max": 2.6120604161405936e-05, + "adam_stats/lr_effective_mean": -3.8569283183909064e-11, + "adam_stats/lr_effective_min": -2.573549500084482e-05, + "adam_stats/m_t_max": 0.001277303439565003, + "adam_stats/m_t_mean": -3.6750328257992293e-13, + "adam_stats/m_t_min": -0.0012306567514315248, + "adam_stats/v_t_max": 2.8273301722947508e-05, + "adam_stats/v_t_mean": 6.130355337946591e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.8700535297393799, + "all_logprobs/max": -0.00022792816162109375, + "all_logprobs/median": -2.046875, + "all_logprobs/min": -12.8125, + "all_logprobs/p1": -6.78125, + "all_logprobs/p10": -4.46875, + "all_logprobs/p25": -2.8125, + "all_logprobs/p5": -5.59375, + "all_logprobs/p75": -0.04168701171875, + "all_logprobs/var": 3.349846124649048, + "clip_ratio": 0.0, + "completion_length": 762.6666870117188, + "completion_length/incorrect": 762.6666870117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 10.0, + "completion_length/incorrect/p25": 254.5, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 164842.015625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 10.0, + "completion_length/p25": 254.5, + "completion_length/p75": 1024.0, + "completion_length/var": 164842.015625, + "epoch": 0.0544, + "feature_vector_variance/max_squared_error": 166978.875, + "feature_vector_variance/metric": 25586.421875, + "generated_tokens/total": 2602584.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.2518479547691437e-05, + "loss": 0.0, + "mean_logprobs": -2.5, + "mean_logprobs/var": 2.03125, + "num_completions/total": 3264, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 255.70399475097656, + "per_token_feature_norm/max": 390.0, + "per_token_feature_norm/median": 260.0, + "per_token_feature_norm/min": 125.5, + "per_token_feature_norm/p25": 219.0, + "per_token_feature_norm/p75": 294.0, + "per_token_feature_norm/var": 2604.067626953125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5894595980644226, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.85546875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.03125, + "per_token_policy_error_norm/p75": 0.98046875, + "per_token_policy_error_norm/var": 0.21244339644908905, + "policy_entropy": 1.683651089668274, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.265625, + "policy_entropy/min": 0.002227783203125, + "policy_entropy/p25": 0.1669921875, + "policy_entropy/p75": 2.578125, + "policy_entropy/var": 1.583604097366333, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4339846670627594, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.14450331032276154, + "policy_sharpness/min": 0.018361344933509827, + "policy_sharpness/p25": 0.09865203499794006, + "policy_sharpness/p75": 0.3951322138309479, + "policy_sharpness/var": 1.2972527742385864, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 34, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.2609023289987817e-05, + "adam_stats/lm_head/lr_effective_mean": -9.524720595566194e-11, + "adam_stats/lm_head/lr_effective_min": -2.26643660425907e-05, + "adam_stats/lm_head/lr_effective_std": 6.147650424281892e-07, + "adam_stats/lr_effective_max": 2.314889025001321e-05, + "adam_stats/lr_effective_mean": -3.4188218228647926e-11, + "adam_stats/lr_effective_min": -2.280759872519411e-05, + "adam_stats/m_t_max": 0.0011495730141177773, + "adam_stats/m_t_mean": -3.3075674360852347e-13, + "adam_stats/m_t_min": -0.0011075910879299045, + "adam_stats/v_t_max": 2.8245029170648195e-05, + "adam_stats/v_t_mean": 6.124224825182489e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.9178978204727173, + "all_logprobs/max": 0.0, + "all_logprobs/median": -2.171875, + "all_logprobs/min": -18.75, + "all_logprobs/p1": -6.625, + "all_logprobs/p10": -4.2249908447265625, + "all_logprobs/p25": -2.75, + "all_logprobs/p5": -5.5, + "all_logprobs/p75": -0.06494140625, + "all_logprobs/var": 3.0508008003234863, + "clip_ratio": 0.0, + "completion_length": 774.15625, + "completion_length/incorrect": 774.15625, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 1.0, + "completion_length/incorrect/p25": 262.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 157635.359375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 1.0, + "completion_length/p25": 262.25, + "completion_length/p75": 1024.0, + "completion_length/var": 157635.359375, + "epoch": 0.056, + "feature_vector_variance/max_squared_error": 193554.03125, + "feature_vector_variance/metric": 32007.51953125, + "generated_tokens/total": 2676903.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.2320907072649045e-05, + "loss": 0.0, + "mean_logprobs": -2.515625, + "mean_logprobs/var": 2.171875, + "num_completions/total": 3360, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 258.054931640625, + "per_token_feature_norm/max": 420.0, + "per_token_feature_norm/median": 260.0, + "per_token_feature_norm/min": 126.5, + "per_token_feature_norm/p25": 220.0, + "per_token_feature_norm/p75": 296.0, + "per_token_feature_norm/var": 2648.35693359375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6198711395263672, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.03125, + "per_token_policy_error_norm/p75": 0.9765625, + "per_token_policy_error_norm/var": 0.2034369260072708, + "policy_entropy": 1.7489417791366577, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.3125, + "policy_entropy/min": 5.311449058353901e-10, + "policy_entropy/p25": 0.279296875, + "policy_entropy/p75": 2.546875, + "policy_entropy/var": 1.465030312538147, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.9200513362884521, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.15296143293380737, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 0.100252665579319, + "policy_sharpness/p75": 0.44051527976989746, + "policy_sharpness/var": 5.2437052726745605, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 35, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.0021885575260967e-05, + "adam_stats/lm_head/lr_effective_mean": -8.434523912637104e-11, + "adam_stats/lm_head/lr_effective_min": -2.0071014660061337e-05, + "adam_stats/lm_head/lr_effective_std": 5.443819759420876e-07, + "adam_stats/lr_effective_max": 2.0500048776739277e-05, + "adam_stats/lr_effective_mean": -3.028221076672999e-11, + "adam_stats/lr_effective_min": -2.0197814592393115e-05, + "adam_stats/m_t_max": 0.001034615677781403, + "adam_stats/m_t_mean": -2.976774642754476e-13, + "adam_stats/m_t_min": -0.000996831920929253, + "adam_stats/v_t_max": 2.8216783903189935e-05, + "adam_stats/v_t_mean": 6.118099950269684e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.071119785308838, + "all_logprobs/max": -0.00103759765625, + "all_logprobs/median": -2.203125, + "all_logprobs/min": -17.375, + "all_logprobs/p1": -6.9375, + "all_logprobs/p10": -4.875, + "all_logprobs/p25": -3.0, + "all_logprobs/p5": -5.875, + "all_logprobs/p75": -0.05126953125, + "all_logprobs/var": 3.5767006874084473, + "clip_ratio": 0.0, + "completion_length": 725.46875, + "completion_length/incorrect": 725.46875, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 34.0, + "completion_length/incorrect/p25": 237.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 165571.21875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 34.0, + "completion_length/p25": 237.25, + "completion_length/p75": 1024.0, + "completion_length/var": 165571.21875, + "epoch": 0.0576, + "feature_vector_variance/max_squared_error": 148319.53125, + "feature_vector_variance/metric": 25967.892578125, + "generated_tokens/total": 2746548.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.2117461064942437e-05, + "loss": 0.0, + "mean_logprobs": -2.765625, + "mean_logprobs/var": 2.078125, + "num_completions/total": 3456, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 247.6849822998047, + "per_token_feature_norm/max": 378.0, + "per_token_feature_norm/median": 250.0, + "per_token_feature_norm/min": 126.5, + "per_token_feature_norm/p25": 204.0, + "per_token_feature_norm/p75": 288.0, + "per_token_feature_norm/var": 2870.49951171875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.634103000164032, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.88671875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0390625, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.2026059627532959, + "policy_entropy": 1.7994928359985352, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.296875, + "policy_entropy/min": 0.005035400390625, + "policy_entropy/p25": 0.17578125, + "policy_entropy/p75": 2.65625, + "policy_entropy/var": 1.5990798473358154, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4191845655441284, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.14172570407390594, + "policy_sharpness/min": 0.017906662076711655, + "policy_sharpness/p25": 0.09465710073709488, + "policy_sharpness/p75": 0.4044453799724579, + "policy_sharpness/var": 0.8810803294181824, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 36, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.7717426089802757e-05, + "adam_stats/lm_head/lr_effective_mean": -7.463481221936519e-11, + "adam_stats/lm_head/lr_effective_min": -1.7761007256922312e-05, + "adam_stats/lm_head/lr_effective_std": 4.816935756934981e-07, + "adam_stats/lr_effective_max": 1.8140621250495315e-05, + "adam_stats/lr_effective_mean": -2.6802343330856182e-11, + "adam_stats/lr_effective_min": -1.7873173419502564e-05, + "adam_stats/m_t_max": 0.000931154063437134, + "adam_stats/m_t_mean": -2.679122928280625e-13, + "adam_stats/m_t_min": -0.0008971486822701991, + "adam_stats/v_t_max": 2.818856773956213e-05, + "adam_stats/v_t_mean": 6.111982881612521e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.0139920711517334, + "all_logprobs/max": -0.00133514404296875, + "all_logprobs/median": -2.125, + "all_logprobs/min": -22.75, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.9375, + "all_logprobs/p25": -3.078125, + "all_logprobs/p5": -5.875, + "all_logprobs/p75": -0.047607421875, + "all_logprobs/var": 3.7556819915771484, + "clip_ratio": 0.0, + "completion_length": 676.03125, + "completion_length/incorrect": 676.03125, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 25.0, + "completion_length/incorrect/p25": 179.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 183561.578125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 25.0, + "completion_length/p25": 179.25, + "completion_length/p75": 1024.0, + "completion_length/var": 183561.578125, + "epoch": 0.0592, + "feature_vector_variance/max_squared_error": 156842.28125, + "feature_vector_variance/metric": 26961.359375, + "generated_tokens/total": 2811447.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.1908389392193549e-05, + "loss": 0.0, + "mean_logprobs": -2.84375, + "mean_logprobs/var": 2.265625, + "num_completions/total": 3552, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 244.23007202148438, + "per_token_feature_norm/max": 382.0, + "per_token_feature_norm/median": 245.0, + "per_token_feature_norm/min": 127.5, + "per_token_feature_norm/p25": 205.0, + "per_token_feature_norm/p75": 280.0, + "per_token_feature_norm/var": 2575.72607421875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.608795702457428, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.87109375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.03515625, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.21226200461387634, + "policy_entropy": 1.755354404449463, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.28125, + "policy_entropy/min": 0.006011962890625, + "policy_entropy/p25": 0.1845703125, + "policy_entropy/p75": 2.765625, + "policy_entropy/var": 1.6785639524459839, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3332464098930359, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13552220165729523, + "policy_sharpness/min": 0.021075930446386337, + "policy_sharpness/p25": 0.08792345970869064, + "policy_sharpness/p75": 0.3980998992919922, + "policy_sharpness/var": 0.42611265182495117, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 37, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.5666171748307534e-05, + "adam_stats/lm_head/lr_effective_mean": -6.59916496448254e-11, + "adam_stats/lm_head/lr_effective_min": -1.5704801626270637e-05, + "adam_stats/lm_head/lr_effective_std": 4.2589695681272133e-07, + "adam_stats/lr_effective_max": 1.6040434275055304e-05, + "adam_stats/lr_effective_mean": -2.370411159025032e-11, + "adam_stats/lr_effective_min": -1.580394928168971e-05, + "adam_stats/m_t_max": 0.0008380386279895902, + "adam_stats/m_t_mean": -2.411185400646998e-13, + "adam_stats/m_t_min": -0.0008074337965808809, + "adam_stats/v_t_max": 2.816037886077538e-05, + "adam_stats/v_t_mean": 6.105871017125786e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.9170125722885132, + "all_logprobs/max": -0.00029754638671875, + "all_logprobs/median": -2.046875, + "all_logprobs/min": -20.0, + "all_logprobs/p1": -6.75, + "all_logprobs/p10": -4.5625, + "all_logprobs/p25": -2.90625, + "all_logprobs/p5": -5.65625, + "all_logprobs/p75": -0.037109375, + "all_logprobs/var": 3.4179770946502686, + "clip_ratio": 0.0, + "completion_length": 742.625, + "completion_length/incorrect": 742.625, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 23.0, + "completion_length/incorrect/p25": 205.5, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 169094.796875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 23.0, + "completion_length/p25": 205.5, + "completion_length/p75": 1024.0, + "completion_length/var": 169094.796875, + "epoch": 0.0608, + "feature_vector_variance/max_squared_error": 164046.828125, + "feature_vector_variance/metric": 32654.916015625, + "generated_tokens/total": 2882739.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.1693946776030601e-05, + "loss": 0.0, + "mean_logprobs": -2.609375, + "mean_logprobs/var": 2.328125, + "num_completions/total": 3648, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 253.2679443359375, + "per_token_feature_norm/max": 416.0, + "per_token_feature_norm/median": 253.0, + "per_token_feature_norm/min": 124.5, + "per_token_feature_norm/p25": 218.0, + "per_token_feature_norm/p75": 288.0, + "per_token_feature_norm/var": 2546.78466796875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5993814468383789, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.859375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0234375, + "per_token_policy_error_norm/p75": 0.9921875, + "per_token_policy_error_norm/var": 0.21308296918869019, + "policy_entropy": 1.7100895643234253, + "policy_entropy/max": 3.84375, + "policy_entropy/median": 2.265625, + "policy_entropy/min": 0.0014190673828125, + "policy_entropy/p25": 0.1689453125, + "policy_entropy/p75": 2.640625, + "policy_entropy/var": 1.5989798307418823, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4488385319709778, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1353931576013565, + "policy_sharpness/min": 0.022545771673321724, + "policy_sharpness/p25": 0.09132920205593109, + "policy_sharpness/p75": 0.3954756259918213, + "policy_sharpness/var": 1.3452078104019165, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 38, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.384158167638816e-05, + "adam_stats/lm_head/lr_effective_mean": -5.830382704408166e-11, + "adam_stats/lm_head/lr_effective_min": -1.3875796867068857e-05, + "adam_stats/lm_head/lr_effective_std": 3.76269355228942e-07, + "adam_stats/lr_effective_max": 1.4172308510751463e-05, + "adam_stats/lr_effective_mean": -2.094764639526403e-11, + "adam_stats/lr_effective_min": -1.3963369383418467e-05, + "adam_stats/m_t_max": 0.0007542347302660346, + "adam_stats/m_t_mean": -2.170062767719097e-13, + "adam_stats/m_t_min": -0.0007266903994604945, + "adam_stats/v_t_max": 2.8132219085819088e-05, + "adam_stats/v_t_mean": 6.099764356809478e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.0584752559661865, + "all_logprobs/max": -0.00026702880859375, + "all_logprobs/median": -2.21875, + "all_logprobs/min": -17.375, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -4.84375, + "all_logprobs/p25": -3.03125, + "all_logprobs/p5": -5.8125, + "all_logprobs/p75": -0.05322265625, + "all_logprobs/var": 3.5463974475860596, + "clip_ratio": 0.0, + "completion_length": 778.6041870117188, + "completion_length/incorrect": 778.6041870117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 12.0, + "completion_length/incorrect/p25": 263.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 157828.3125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 12.0, + "completion_length/p25": 263.75, + "completion_length/p75": 1024.0, + "completion_length/var": 157828.3125, + "epoch": 0.0624, + "feature_vector_variance/max_squared_error": 141059.0625, + "feature_vector_variance/metric": 28676.548828125, + "generated_tokens/total": 2957485.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.1474394481749037e-05, + "loss": 0.0, + "mean_logprobs": -2.671875, + "mean_logprobs/var": 2.109375, + "num_completions/total": 3744, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 250.02076721191406, + "per_token_feature_norm/max": 384.0, + "per_token_feature_norm/median": 250.0, + "per_token_feature_norm/min": 126.5, + "per_token_feature_norm/p25": 209.0, + "per_token_feature_norm/p75": 288.0, + "per_token_feature_norm/var": 2876.753173828125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6303678750991821, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.88671875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0390625, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.2052295058965683, + "policy_entropy": 1.8044943809509277, + "policy_entropy/max": 3.84375, + "policy_entropy/median": 2.328125, + "policy_entropy/min": 0.0017852783203125, + "policy_entropy/p25": 0.19921875, + "policy_entropy/p75": 2.703125, + "policy_entropy/var": 1.6195836067199707, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.5850614905357361, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1305626630783081, + "policy_sharpness/min": 0.018774010241031647, + "policy_sharpness/p25": 0.09054344892501831, + "policy_sharpness/p75": 0.35701408982276917, + "policy_sharpness/var": 2.636289596557617, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 39, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.2219760719744954e-05, + "adam_stats/lm_head/lr_effective_mean": -5.1470609024883984e-11, + "adam_stats/lm_head/lr_effective_min": -1.225004052685108e-05, + "adam_stats/lm_head/lr_effective_std": 3.3215994221791334e-07, + "adam_stats/lr_effective_max": 1.2511784007074311e-05, + "adam_stats/lr_effective_mean": -1.849700427547507e-11, + "adam_stats/lr_effective_min": -1.2327324839134235e-05, + "adam_stats/m_t_max": 0.0006788112223148346, + "adam_stats/m_t_mean": -1.953094939466729e-13, + "adam_stats/m_t_min": -0.0006540213362313807, + "adam_stats/v_t_max": 2.8104086595703848e-05, + "adam_stats/v_t_mean": 6.093665069067944e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.8680566549301147, + "all_logprobs/max": -0.00144195556640625, + "all_logprobs/median": -1.953125, + "all_logprobs/min": -18.875, + "all_logprobs/p1": -6.78125, + "all_logprobs/p10": -4.53125, + "all_logprobs/p25": -2.859375, + "all_logprobs/p5": -5.65625, + "all_logprobs/p75": -0.04833984375, + "all_logprobs/var": 3.4361140727996826, + "clip_ratio": 0.0, + "completion_length": 763.5208740234375, + "completion_length/incorrect": 763.5208740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 1.0, + "completion_length/incorrect/p25": 349.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 158025.015625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 1.0, + "completion_length/p25": 349.0, + "completion_length/p75": 1024.0, + "completion_length/var": 158025.015625, + "epoch": 0.064, + "feature_vector_variance/max_squared_error": 115623.1640625, + "feature_vector_variance/metric": 28832.25, + "generated_tokens/total": 3030783.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.125e-05, + "loss": 0.0, + "mean_logprobs": -2.546875, + "mean_logprobs/var": 2.5, + "num_completions/total": 3840, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 253.40536499023438, + "per_token_feature_norm/max": 382.0, + "per_token_feature_norm/median": 255.0, + "per_token_feature_norm/min": 124.0, + "per_token_feature_norm/p25": 215.0, + "per_token_feature_norm/p75": 292.0, + "per_token_feature_norm/var": 2571.2880859375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.587988555431366, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.8359375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.03125, + "per_token_policy_error_norm/p75": 0.98828125, + "per_token_policy_error_norm/var": 0.21470557153224945, + "policy_entropy": 1.6721690893173218, + "policy_entropy/max": 3.890625, + "policy_entropy/median": 2.234375, + "policy_entropy/min": 0.00726318359375, + "policy_entropy/p25": 0.193359375, + "policy_entropy/p75": 2.578125, + "policy_entropy/var": 1.560692548751831, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.331222265958786, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1353207528591156, + "policy_sharpness/min": 0.01627400889992714, + "policy_sharpness/p25": 0.08897119760513306, + "policy_sharpness/p75": 0.37828460335731506, + "policy_sharpness/var": 0.4052278399467468, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 40, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.010416666977107525, + "accuracy_reward/correct": 1.0, + "accuracy_reward/correct/max": 1.0, + "accuracy_reward/correct/median": 1.0, + "accuracy_reward/correct/min": 1.0, + "accuracy_reward/correct/p25": 1.0, + "accuracy_reward/correct/p75": 1.0, + "accuracy_reward/correct/var": NaN, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 1.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.01041666604578495, + "adam_stats/lm_head/lr_effective_max": 3.588590698200278e-05, + "adam_stats/lm_head/lr_effective_mean": -4.649147733459813e-11, + "adam_stats/lm_head/lr_effective_min": -3.5920438676839694e-05, + "adam_stats/lm_head/lr_effective_std": 9.643249541113619e-07, + "adam_stats/lr_effective_max": 3.621620999183506e-05, + "adam_stats/lr_effective_mean": -1.2071518784573243e-10, + "adam_stats/lr_effective_min": -3.631319123087451e-05, + "adam_stats/m_t_max": 0.0006887753843329847, + "adam_stats/m_t_mean": -3.798165341267756e-12, + "adam_stats/m_t_min": -0.0005506097222678363, + "adam_stats/v_t_max": 2.807776581903454e-05, + "adam_stats/v_t_mean": 6.089563749089866e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 1.2417634698280722e-09, + "advantages/max": 2.4741740226745605, + "advantages/median": 0.0, + "advantages/min": -0.3534534275531769, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0736425518989563, + "all_logprobs": -1.9394195079803467, + "all_logprobs/max": 0.0, + "all_logprobs/median": -2.09375, + "all_logprobs/min": -23.5, + "all_logprobs/p1": -6.75, + "all_logprobs/p10": -4.5, + "all_logprobs/p25": -2.875, + "all_logprobs/p5": -5.625, + "all_logprobs/p75": -0.049560546875, + "all_logprobs/var": 3.287757635116577, + "clip_ratio": 0.0, + "completion_length": 809.0208740234375, + "completion_length/correct": 1024.0, + "completion_length/correct/max": 1024.0, + "completion_length/correct/median": 1024.0, + "completion_length/correct/min": 1024.0, + "completion_length/correct/p25": 1024.0, + "completion_length/correct/p75": 1024.0, + "completion_length/correct/var": NaN, + "completion_length/incorrect": 806.7579345703125, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 6.0, + "completion_length/incorrect/p25": 636.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 137922.265625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 6.0, + "completion_length/p25": 677.0, + "completion_length/p75": 1024.0, + "completion_length/var": 136962.0625, + "epoch": 0.0656, + "feature_vector_variance/max_squared_error": 213472.765625, + "feature_vector_variance/metric": 30142.208984375, + "generated_tokens/total": 3108449.0, + "global_fisher_curvature": 176128.0, + "global_fisher_curvature/max": 176128.0, + "global_fisher_curvature/median": 176128.0, + "global_fisher_curvature/min": 176128.0, + "global_fisher_curvature/p25": 176128.0, + "global_fisher_curvature/p75": 176128.0, + "global_fisher_curvature/p85": 176128.0, + "global_fisher_curvature/p90": 176128.0, + "global_fisher_curvature/p95": 176128.0, + "global_fisher_curvature/p99": 176128.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 1.1146068572998047e-05, + "global_fisher_kl_divergence/max": 1.1146068572998047e-05, + "global_fisher_kl_divergence/median": 1.1146068572998047e-05, + "global_fisher_kl_divergence/min": 1.1146068572998047e-05, + "global_fisher_kl_divergence/p25": 1.1146068572998047e-05, + "global_fisher_kl_divergence/p75": 1.1146068572998047e-05, + "global_fisher_kl_divergence/p85": 1.1146068572998047e-05, + "global_fisher_kl_divergence/p90": 1.1146068572998047e-05, + "global_fisher_kl_divergence/p95": 1.1146068572998047e-05, + "global_fisher_kl_divergence/p99": 1.1146068572998047e-05, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 3.734375, + "global_full_update_term/max": 3.734375, + "global_full_update_term/median": 3.734375, + "global_full_update_term/min": 3.734375, + "global_full_update_term/p25": 3.734375, + "global_full_update_term/p75": 3.734375, + "global_full_update_term/p85": 3.734375, + "global_full_update_term/p90": 3.734375, + "global_full_update_term/p95": 3.734375, + "global_full_update_term/p99": 3.734375, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 222208.0, + "global_hessian_coeff/max": 222208.0, + "global_hessian_coeff/median": 222208.0, + "global_hessian_coeff/min": 222208.0, + "global_hessian_coeff/p25": 222208.0, + "global_hessian_coeff/p75": 222208.0, + "global_hessian_coeff/p99": 222208.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 222208.0, + "global_hessian_coeff_abs/max": 222208.0, + "global_hessian_coeff_abs/median": 222208.0, + "global_hessian_coeff_abs/min": 222208.0, + "global_hessian_coeff_abs/p25": 222208.0, + "global_hessian_coeff_abs/p75": 222208.0, + "global_hessian_coeff_abs/p99": 222208.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.16842985153198242, + "grouped_std_rewards": 0.02946278266608715, + "learning_rate": 1.1021036720894182e-05, + "loss": -0.0, + "mean_logprobs": -2.484375, + "mean_logprobs/var": 2.203125, + "num_completions/total": 3936, + "per_sentence_gradient_norm": 23.182292938232422, + "per_sentence_gradient_norm/max": 1256.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 17624.673828125, + "per_token_feature_norm": 247.39175415039062, + "per_token_feature_norm/max": 428.0, + "per_token_feature_norm/median": 249.0, + "per_token_feature_norm/min": 124.5, + "per_token_feature_norm/p25": 203.0, + "per_token_feature_norm/p75": 288.0, + "per_token_feature_norm/var": 2964.797119140625, + "per_token_gradient_norm": 3.7468647956848145, + "per_token_gradient_norm/max": 1224.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 1235.4332275390625, + "per_token_policy_error_norm": 0.6132293939590454, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.8671875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.03125, + "per_token_policy_error_norm/p75": 0.9921875, + "per_token_policy_error_norm/var": 0.2061355710029602, + "policy_entropy": 1.7346417903900146, + "policy_entropy/max": 3.90625, + "policy_entropy/median": 2.28125, + "policy_entropy/min": 4.6333298087120056e-08, + "policy_entropy/p25": 0.1953125, + "policy_entropy/p75": 2.578125, + "policy_entropy/var": 1.5461580753326416, + "policy_loss": -3.725290298461914e-09, + "policy_loss/max": 0.3534534275531769, + "policy_loss/median": 0.0, + "policy_loss/min": -2.4741742610931396, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0736425593495369, + "policy_sharpness": 0.7930785417556763, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1469372808933258, + "policy_sharpness/min": -0.0, + "policy_sharpness/p25": 0.10246755182743073, + "policy_sharpness/p75": 0.45138800144195557, + "policy_sharpness/var": 3.951462984085083, + "reward": 0.010416666977107525, + "reward/max": 1.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.01041666604578495, + "rewards/accuracy_reward": 0.010416666977107525, + "rewards/accuracy_reward/max": 1.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.01041666604578495, + "sentence_fisher_curvature": 1591680.0, + "sentence_fisher_curvature/max": 138412032.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 1710080.0, + "sentence_fisher_curvature/p99": 12306428.0, + "sentence_fisher_curvature/var": 199592280850432.0, + "sentence_fisher_kl_divergence": 0.00010103732347488403, + "sentence_fisher_kl_divergence/max": 0.0087890625, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.00010824203491210938, + "sentence_fisher_kl_divergence/p99": 0.0007801313186064363, + "sentence_fisher_kl_divergence/var": 8.047795745369513e-07, + "sentence_full_gradient_variance/max_squared_error": 17791.50390625, + "sentence_full_gradient_variance/metric": 17791.50390625, + "sentence_full_gradient_variance/p75": 17791.50390625, + "sentence_full_gradient_variance/p90": 17791.50390625, + "sentence_full_gradient_variance/p95": 17791.50390625, + "sentence_full_gradient_variance/p99": 17791.50390625, + "sentence_full_update_term": 0.1964569091796875, + "sentence_full_update_term/max": 17.25, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.173095703125, + "sentence_full_update_term/p99": 1.4451680183410645, + "sentence_full_update_term/var": 3.099125385284424, + "sentence_hessian_coeff": 2720736.0, + "sentence_hessian_coeff/max": 265289728.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": -1720320.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 13265296.0, + "sentence_hessian_coeff/var": 733389959200768.0, + "sentence_hessian_coeff_abs": 2806133.5, + "sentence_hessian_coeff_abs/max": 265289728.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 14899595.0, + "sentence_hessian_coeff_abs/var": 732913016504320.0, + "step": 41, + "token_fisher_curvature": 116938032.0, + "token_fisher_curvature/max": 367219703808.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.5947265625, + "token_fisher_curvature/p99": 717225984.0, + "token_fisher_curvature/var": 1.095870485069457e+19, + "token_fisher_kl_divergence": 0.007400303613394499, + "token_fisher_kl_divergence/max": 23.25, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 3.75734998669941e-11, + "token_fisher_kl_divergence/p99": 0.04541015625, + "token_fisher_kl_divergence/var": 0.04389025643467903, + "token_full_update_term": 0.025379793718457222, + "token_full_update_term/max": 71.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 3.3602118492126465e-06, + "token_full_update_term/p99": 0.10515213012695312, + "token_full_update_term/var": 0.5107356309890747, + "token_hessian_coeff": 179002944.0, + "token_hessian_coeff/max": 858993459200.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": -8589934592.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 3375104.0, + "token_hessian_coeff/var": 5.3537710450738725e+19, + "token_hessian_coeff_abs": 197141936.0, + "token_hessian_coeff_abs/max": 858993459200.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 220258304.0, + "token_hessian_coeff_abs/var": 5.353088468255349e+19 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 3.162934444844723e-05, + "adam_stats/lm_head/lr_effective_mean": -4.0975538934118205e-11, + "adam_stats/lm_head/lr_effective_min": -3.1659805244999006e-05, + "adam_stats/lm_head/lr_effective_std": 8.498558941028023e-07, + "adam_stats/lr_effective_max": 3.192069198121317e-05, + "adam_stats/lr_effective_mean": -1.0639179121030296e-10, + "adam_stats/lr_effective_min": -3.20061735692434e-05, + "adam_stats/m_t_max": 0.0006198978517204523, + "adam_stats/m_t_mean": -3.4183485035643724e-12, + "adam_stats/m_t_min": -0.0004955487092956901, + "adam_stats/v_t_max": 2.8049687898601405e-05, + "adam_stats/v_t_mean": 6.083474002327449e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.6892399787902832, + "all_logprobs/max": -0.00021266937255859375, + "all_logprobs/median": -1.3984375, + "all_logprobs/min": -18.375, + "all_logprobs/p1": -6.65625, + "all_logprobs/p10": -4.1875, + "all_logprobs/p25": -2.671875, + "all_logprobs/p5": -5.40625, + "all_logprobs/p75": -0.049560546875, + "all_logprobs/var": 3.2084238529205322, + "clip_ratio": 0.0, + "completion_length": 825.40625, + "completion_length/incorrect": 825.40625, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 28.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 131865.984375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 28.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 131865.984375, + "epoch": 0.0672, + "feature_vector_variance/max_squared_error": 173704.65625, + "feature_vector_variance/metric": 30431.880859375, + "generated_tokens/total": 3187688.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.078778360091808e-05, + "loss": 0.0, + "mean_logprobs": -2.1875, + "mean_logprobs/var": 2.203125, + "num_completions/total": 4032, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 256.3227844238281, + "per_token_feature_norm/max": 420.0, + "per_token_feature_norm/median": 256.0, + "per_token_feature_norm/min": 126.5, + "per_token_feature_norm/p25": 223.0, + "per_token_feature_norm/p75": 292.0, + "per_token_feature_norm/var": 2450.20703125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5521449446678162, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.7109375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.03125, + "per_token_policy_error_norm/p75": 0.96875, + "per_token_policy_error_norm/var": 0.2190038114786148, + "policy_entropy": 1.5374414920806885, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 1.8046875, + "policy_entropy/min": 0.00118255615234375, + "policy_entropy/p25": 0.2041015625, + "policy_entropy/p75": 2.5, + "policy_entropy/var": 1.5153738260269165, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.47193238139152527, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13598890602588654, + "policy_sharpness/min": 0.020663267001509666, + "policy_sharpness/p25": 0.09029558300971985, + "policy_sharpness/p75": 0.3786013126373291, + "policy_sharpness/var": 1.5384902954101562, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 42, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.7854062864207663e-05, + "adam_stats/lm_head/lr_effective_mean": -3.608345913730382e-11, + "adam_stats/lm_head/lr_effective_min": -2.788090569083579e-05, + "adam_stats/lm_head/lr_effective_std": 7.483403692276624e-07, + "adam_stats/lr_effective_max": 2.81108332274016e-05, + "adam_stats/lr_effective_mean": -9.368852221802726e-11, + "adam_stats/lr_effective_min": -2.818610482790973e-05, + "adam_stats/m_t_max": 0.0005579080316238105, + "adam_stats/m_t_mean": -3.076512352165328e-12, + "adam_stats/m_t_min": -0.0004459938209038228, + "adam_stats/v_t_max": 2.8021639081998728e-05, + "adam_stats/v_t_mean": 6.0773907607780675e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.9539422988891602, + "all_logprobs/max": -0.00115203857421875, + "all_logprobs/median": -2.125, + "all_logprobs/min": -16.75, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.46875, + "all_logprobs/p25": -2.890625, + "all_logprobs/p5": -5.625, + "all_logprobs/p75": -0.06005859375, + "all_logprobs/var": 3.3271241188049316, + "clip_ratio": 0.0, + "completion_length": 765.4583740234375, + "completion_length/incorrect": 765.4583740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 20.0, + "completion_length/incorrect/p25": 333.5, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 153544.765625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 20.0, + "completion_length/p25": 333.5, + "completion_length/p75": 1024.0, + "completion_length/var": 153544.765625, + "epoch": 0.0688, + "feature_vector_variance/max_squared_error": 103689.6171875, + "feature_vector_variance/metric": 25316.810546875, + "generated_tokens/total": 3261172.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.0550524823068504e-05, + "loss": 0.0, + "mean_logprobs": -2.53125, + "mean_logprobs/var": 1.953125, + "num_completions/total": 4128, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 249.2487030029297, + "per_token_feature_norm/max": 386.0, + "per_token_feature_norm/median": 251.0, + "per_token_feature_norm/min": 126.5, + "per_token_feature_norm/p25": 208.0, + "per_token_feature_norm/p75": 290.0, + "per_token_feature_norm/var": 2832.918701171875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6185621619224548, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.87109375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0390625, + "per_token_policy_error_norm/p75": 0.9921875, + "per_token_policy_error_norm/var": 0.20736482739448547, + "policy_entropy": 1.746405005455017, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.28125, + "policy_entropy/min": 0.00836181640625, + "policy_entropy/p25": 0.2373046875, + "policy_entropy/p75": 2.609375, + "policy_entropy/var": 1.5193241834640503, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.28998711705207825, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13995498418807983, + "policy_sharpness/min": 0.019792916253209114, + "policy_sharpness/p25": 0.09566183388233185, + "policy_sharpness/p75": 0.3874478340148926, + "policy_sharpness/var": 0.2133277952671051, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 43, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.45081555476645e-05, + "adam_stats/lm_head/lr_effective_mean": -3.1747934142734024e-11, + "adam_stats/lm_head/lr_effective_min": -2.4531791495974176e-05, + "adam_stats/lm_head/lr_effective_std": 6.583803155990609e-07, + "adam_stats/lr_effective_max": 2.4734252292546444e-05, + "adam_stats/lr_effective_mean": -8.243057625367811e-11, + "adam_stats/lr_effective_min": -2.4800481696729548e-05, + "adam_stats/m_t_max": 0.0005021172109991312, + "adam_stats/m_t_mean": -2.76886196262649e-12, + "adam_stats/m_t_min": -0.00040139444172382355, + "adam_stats/v_t_max": 2.7993617550237104e-05, + "adam_stats/v_t_mean": 6.071313590760852e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.139479875564575, + "all_logprobs/max": -0.0013275146484375, + "all_logprobs/median": -2.25, + "all_logprobs/min": -24.125, + "all_logprobs/p1": -6.90625, + "all_logprobs/p10": -4.71875, + "all_logprobs/p25": -3.0, + "all_logprobs/p5": -5.78125, + "all_logprobs/p75": -0.1953125, + "all_logprobs/var": 3.2916245460510254, + "clip_ratio": 0.0, + "completion_length": 738.875, + "completion_length/incorrect": 738.875, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 11.0, + "completion_length/incorrect/p25": 207.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 172576.34375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 11.0, + "completion_length/p25": 207.75, + "completion_length/p75": 1024.0, + "completion_length/var": 172576.34375, + "epoch": 0.0704, + "feature_vector_variance/max_squared_error": 130966.171875, + "feature_vector_variance/metric": 26125.796875, + "generated_tokens/total": 3332104.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.0309549450619342e-05, + "loss": 0.0, + "mean_logprobs": -2.8125, + "mean_logprobs/var": 2.015625, + "num_completions/total": 4224, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 247.6513214111328, + "per_token_feature_norm/max": 386.0, + "per_token_feature_norm/median": 249.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 202.0, + "per_token_feature_norm/p75": 290.0, + "per_token_feature_norm/var": 3021.473388671875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6715888381004333, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.89453125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.1015625, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.19097420573234558, + "policy_entropy": 1.8934626579284668, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.3125, + "policy_entropy/min": 0.00518798828125, + "policy_entropy/p25": 0.6015625, + "policy_entropy/p75": 2.671875, + "policy_entropy/var": 1.3850679397583008, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.32875755429267883, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13644957542419434, + "policy_sharpness/min": 0.019626349210739136, + "policy_sharpness/p25": 0.09542543441057205, + "policy_sharpness/p75": 0.37246736884117126, + "policy_sharpness/var": 0.5471135973930359, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 44, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.1545063646044582e-05, + "adam_stats/lm_head/lr_effective_mean": -2.7908586899827448e-11, + "adam_stats/lm_head/lr_effective_min": -2.1565856513916515e-05, + "adam_stats/lm_head/lr_effective_std": 5.78721369492996e-07, + "adam_stats/lr_effective_max": 2.174397923226934e-05, + "adam_stats/lr_effective_mean": -7.246116207060282e-11, + "adam_stats/lr_effective_min": -2.1802197807119228e-05, + "adam_stats/m_t_max": 0.0004519054782576859, + "adam_stats/m_t_mean": -2.4919751592106243e-12, + "adam_stats/m_t_min": -0.0003612549917306751, + "adam_stats/v_t_max": 2.7965625122305937e-05, + "adam_stats/v_t_mean": 6.065242058594933e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.2255959510803223, + "all_logprobs/max": -0.000766754150390625, + "all_logprobs/median": -2.296875, + "all_logprobs/min": -23.0, + "all_logprobs/p1": -6.78125, + "all_logprobs/p10": -4.96875, + "all_logprobs/p25": -3.109375, + "all_logprobs/p5": -5.875, + "all_logprobs/p75": -0.1640625, + "all_logprobs/var": 3.408902645111084, + "clip_ratio": 0.0, + "completion_length": 711.0625, + "completion_length/incorrect": 711.0625, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 14.0, + "completion_length/incorrect/p25": 190.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 181510.734375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 14.0, + "completion_length/p25": 190.0, + "completion_length/p75": 1024.0, + "completion_length/var": 181510.734375, + "epoch": 0.072, + "feature_vector_variance/max_squared_error": 124482.0625, + "feature_vector_variance/metric": 28174.82421875, + "generated_tokens/total": 3400366.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.0065151074942516e-05, + "loss": 0.0, + "mean_logprobs": -2.96875, + "mean_logprobs/var": 2.0625, + "num_completions/total": 4320, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 247.3270263671875, + "per_token_feature_norm/max": 392.0, + "per_token_feature_norm/median": 248.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 209.0, + "per_token_feature_norm/p75": 284.0, + "per_token_feature_norm/var": 2581.37548828125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6817054152488708, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.8984375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.10546875, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.18793106079101562, + "policy_entropy": 1.9487460851669312, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.34375, + "policy_entropy/min": 0.005615234375, + "policy_entropy/p25": 0.50390625, + "policy_entropy/p75": 2.765625, + "policy_entropy/var": 1.4793001413345337, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.36899998784065247, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12759941816329956, + "policy_sharpness/min": 0.0177498497068882, + "policy_sharpness/p25": 0.08725139498710632, + "policy_sharpness/p75": 0.33469945192337036, + "policy_sharpness/var": 0.9657173156738281, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 45, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.8923023162642494e-05, + "adam_stats/lm_head/lr_effective_mean": -2.4511257606940617e-11, + "adam_stats/lm_head/lr_effective_min": -1.894129854917992e-05, + "adam_stats/lm_head/lr_effective_std": 5.08238827023888e-07, + "adam_stats/lr_effective_max": 1.909786442411132e-05, + "adam_stats/lr_effective_mean": -6.363968380052043e-11, + "adam_stats/lr_effective_min": -1.9148996216244996e-05, + "adam_stats/m_t_max": 0.00040671491296961904, + "adam_stats/m_t_mean": -2.242779529801342e-12, + "adam_stats/m_t_min": -0.00032512948382645845, + "adam_stats/v_t_max": 2.7937659979215823e-05, + "adam_stats/v_t_mean": 6.059177899003787e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.1288280487060547, + "all_logprobs/max": -0.00072479248046875, + "all_logprobs/median": -2.265625, + "all_logprobs/min": -22.0, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.59375, + "all_logprobs/p25": -2.890625, + "all_logprobs/p5": -5.71875, + "all_logprobs/p75": -0.11767578125, + "all_logprobs/var": 3.1076114177703857, + "clip_ratio": 0.0, + "completion_length": 743.125, + "completion_length/incorrect": 743.125, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 7.0, + "completion_length/incorrect/p25": 278.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 162673.015625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 7.0, + "completion_length/p25": 278.75, + "completion_length/p75": 1024.0, + "completion_length/var": 162673.015625, + "epoch": 0.0736, + "feature_vector_variance/max_squared_error": 116726.7421875, + "feature_vector_variance/metric": 27125.234375, + "generated_tokens/total": 3471706.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 9.817627457812105e-06, + "loss": 0.0, + "mean_logprobs": -2.75, + "mean_logprobs/var": 1.984375, + "num_completions/total": 4416, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 255.30067443847656, + "per_token_feature_norm/max": 382.0, + "per_token_feature_norm/median": 256.0, + "per_token_feature_norm/min": 125.0, + "per_token_feature_norm/p25": 214.0, + "per_token_feature_norm/p75": 296.0, + "per_token_feature_norm/var": 2952.25927734375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6751828789710999, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.89453125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.08203125, + "per_token_policy_error_norm/p75": 0.98828125, + "per_token_policy_error_norm/var": 0.1831962615251541, + "policy_entropy": 1.9001189470291138, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.34375, + "policy_entropy/min": 0.0037689208984375, + "policy_entropy/p25": 0.37109375, + "policy_entropy/p75": 2.625, + "policy_entropy/var": 1.4333473443984985, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4085821509361267, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13733641803264618, + "policy_sharpness/min": 0.02084633894264698, + "policy_sharpness/p25": 0.09715279936790466, + "policy_sharpness/p75": 0.3572207987308502, + "policy_sharpness/var": 1.0338869094848633, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 46, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.6604621123406105e-05, + "adam_stats/lm_head/lr_effective_mean": -2.150748676377212e-11, + "adam_stats/lm_head/lr_effective_min": -1.6620668247924186e-05, + "adam_stats/lm_head/lr_effective_std": 4.459249680621724e-07, + "adam_stats/lr_effective_max": 1.6758158380980603e-05, + "adam_stats/lr_effective_mean": -5.5840138069029877e-11, + "adam_stats/lr_effective_min": -1.6803023754619062e-05, + "adam_stats/m_t_max": 0.00036604341585189104, + "adam_stats/m_t_mean": -2.018501251560556e-12, + "adam_stats/m_t_min": -0.00029261651798151433, + "adam_stats/v_t_max": 2.7909722120966762e-05, + "adam_stats/v_t_mean": 6.053117642540462e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.156625270843506, + "all_logprobs/max": -0.00154876708984375, + "all_logprobs/median": -2.28125, + "all_logprobs/min": -15.5, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.5625, + "all_logprobs/p25": -2.984375, + "all_logprobs/p5": -5.65625, + "all_logprobs/p75": -0.2275390625, + "all_logprobs/var": 3.0877742767333984, + "clip_ratio": 0.0, + "completion_length": 802.1979370117188, + "completion_length/incorrect": 802.1979370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 18.0, + "completion_length/incorrect/p25": 671.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 145209.5625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 18.0, + "completion_length/p25": 671.25, + "completion_length/p75": 1024.0, + "completion_length/var": 145209.5625, + "epoch": 0.0752, + "feature_vector_variance/max_squared_error": 154154.171875, + "feature_vector_variance/metric": 28222.380859375, + "generated_tokens/total": 3548717.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 9.567280168627493e-06, + "loss": 0.0, + "mean_logprobs": -2.6875, + "mean_logprobs/var": 1.9140625, + "num_completions/total": 4512, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 255.69760131835938, + "per_token_feature_norm/max": 386.0, + "per_token_feature_norm/median": 258.0, + "per_token_feature_norm/min": 126.5, + "per_token_feature_norm/p25": 217.0, + "per_token_feature_norm/p75": 294.0, + "per_token_feature_norm/var": 2626.089599609375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6842857599258423, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.8984375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.13671875, + "per_token_policy_error_norm/p75": 0.99609375, + "per_token_policy_error_norm/var": 0.18190722167491913, + "policy_entropy": 1.9446513652801514, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.34375, + "policy_entropy/min": 0.00897216796875, + "policy_entropy/p25": 0.66015625, + "policy_entropy/p75": 2.703125, + "policy_entropy/var": 1.364518165588379, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.23248571157455444, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12425007671117783, + "policy_sharpness/min": 0.018251799046993256, + "policy_sharpness/p25": 0.09241727739572525, + "policy_sharpness/p75": 0.2876163423061371, + "policy_sharpness/var": 0.11655956506729126, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 47, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.4556350834027398e-05, + "adam_stats/lm_head/lr_effective_mean": -1.8853777913885317e-11, + "adam_stats/lm_head/lr_effective_min": -1.457042799302144e-05, + "adam_stats/lm_head/lr_effective_std": 3.908776875505282e-07, + "adam_stats/lr_effective_max": 1.4691050637338776e-05, + "adam_stats/lr_effective_mean": -4.8949684583465825e-11, + "adam_stats/lr_effective_min": -1.4730382645211648e-05, + "adam_stats/m_t_max": 0.00032943906262516975, + "adam_stats/m_t_mean": -1.8166509963002397e-12, + "adam_stats/m_t_min": -0.00026335485745221376, + "adam_stats/v_t_max": 2.7881813366548158e-05, + "adam_stats/v_t_mean": 6.047065192332779e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.0761499404907227, + "all_logprobs/max": -0.00109100341796875, + "all_logprobs/median": -2.1875, + "all_logprobs/min": -17.625, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -4.84375, + "all_logprobs/p25": -3.015625, + "all_logprobs/p5": -5.8125, + "all_logprobs/p75": -0.1103515625, + "all_logprobs/var": 3.4714338779449463, + "clip_ratio": 0.0, + "completion_length": 683.5, + "completion_length/incorrect": 683.5, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 2.0, + "completion_length/incorrect/p25": 175.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 190291.953125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 2.0, + "completion_length/p25": 175.25, + "completion_length/p75": 1024.0, + "completion_length/var": 190291.953125, + "epoch": 0.0768, + "feature_vector_variance/max_squared_error": 172899.375, + "feature_vector_variance/metric": 29046.982421875, + "generated_tokens/total": 3614333.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 9.314414216997507e-06, + "loss": 0.0, + "mean_logprobs": -2.875, + "mean_logprobs/var": 2.203125, + "num_completions/total": 4608, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 246.25914001464844, + "per_token_feature_norm/max": 420.0, + "per_token_feature_norm/median": 244.0, + "per_token_feature_norm/min": 127.0, + "per_token_feature_norm/p25": 209.0, + "per_token_feature_norm/p75": 284.0, + "per_token_feature_norm/var": 2626.945556640625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6434690952301025, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.8828125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.05859375, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.19951510429382324, + "policy_entropy": 1.8373903036117554, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.3125, + "policy_entropy/min": 0.005218505859375, + "policy_entropy/p25": 0.375, + "policy_entropy/p75": 2.71875, + "policy_entropy/var": 1.5258394479751587, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3540326654911041, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1294296383857727, + "policy_sharpness/min": 0.01902911067008972, + "policy_sharpness/p25": 0.08777377009391785, + "policy_sharpness/p75": 0.3500259220600128, + "policy_sharpness/var": 0.841240406036377, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 48, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.2748228982673027e-05, + "adam_stats/lm_head/lr_effective_mean": -1.651128365975918e-11, + "adam_stats/lm_head/lr_effective_min": -1.2760569006786682e-05, + "adam_stats/lm_head/lr_effective_std": 3.4228963841087534e-07, + "adam_stats/lr_effective_max": 1.2866289580415469e-05, + "adam_stats/lr_effective_mean": -4.2867379784761184e-11, + "adam_stats/lr_effective_min": -1.290073214477161e-05, + "adam_stats/m_t_max": 0.0002964951563626528, + "adam_stats/m_t_mean": -1.6349843571031308e-12, + "adam_stats/m_t_min": -0.00023701936879660934, + "adam_stats/v_t_max": 2.7853931896970607e-05, + "adam_stats/v_t_mean": 6.041017946295524e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.7854183912277222, + "all_logprobs/max": -0.00058746337890625, + "all_logprobs/median": -1.8125, + "all_logprobs/min": -17.0, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.40625, + "all_logprobs/p25": -2.78125, + "all_logprobs/p5": -5.625, + "all_logprobs/p75": -0.0284423828125, + "all_logprobs/var": 3.412184715270996, + "clip_ratio": 0.0, + "completion_length": 799.8229370117188, + "completion_length/incorrect": 799.8229370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 17.0, + "completion_length/incorrect/p25": 646.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 146898.5, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 17.0, + "completion_length/p25": 646.0, + "completion_length/p75": 1024.0, + "completion_length/var": 146898.5, + "epoch": 0.0784, + "feature_vector_variance/max_squared_error": 137522.421875, + "feature_vector_variance/metric": 25133.56640625, + "generated_tokens/total": 3691116.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 9.059337681133194e-06, + "loss": 0.0, + "mean_logprobs": -2.34375, + "mean_logprobs/var": 2.0, + "num_completions/total": 4704, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 244.97865295410156, + "per_token_feature_norm/max": 386.0, + "per_token_feature_norm/median": 247.0, + "per_token_feature_norm/min": 127.0, + "per_token_feature_norm/p25": 202.0, + "per_token_feature_norm/p75": 284.0, + "per_token_feature_norm/var": 2848.4716796875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5639284253120422, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.80859375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0234375, + "per_token_policy_error_norm/p75": 0.98046875, + "per_token_policy_error_norm/var": 0.21905766427516937, + "policy_entropy": 1.5794572830200195, + "policy_entropy/max": 3.84375, + "policy_entropy/median": 2.203125, + "policy_entropy/min": 0.00439453125, + "policy_entropy/p25": 0.1123046875, + "policy_entropy/p75": 2.546875, + "policy_entropy/var": 1.6512304544448853, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.5265014171600342, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.15734602510929108, + "policy_sharpness/min": 0.02041003853082657, + "policy_sharpness/p25": 0.10038527846336365, + "policy_sharpness/p75": 0.41921162605285645, + "policy_sharpness/var": 1.6636004447937012, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 49, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.1153448213008232e-05, + "adam_stats/lm_head/lr_effective_mean": -1.4445256622808156e-11, + "adam_stats/lm_head/lr_effective_min": -1.1164252100570593e-05, + "adam_stats/lm_head/lr_effective_std": 2.9943910817564756e-07, + "adam_stats/lr_effective_max": 1.1256816833338235e-05, + "adam_stats/lr_effective_mean": -3.750303192995297e-11, + "adam_stats/lr_effective_min": -1.1286949302302673e-05, + "adam_stats/m_t_max": 0.0002668456290848553, + "adam_stats/m_t_mean": -1.4714881873753582e-12, + "adam_stats/m_t_min": -0.00021331742755137384, + "adam_stats/v_t_max": 2.782607771223411e-05, + "adam_stats/v_t_mean": 6.0349772054713036e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.2247390747070312, + "all_logprobs/max": -0.0010833740234375, + "all_logprobs/median": -2.28125, + "all_logprobs/min": -20.75, + "all_logprobs/p1": -6.90625, + "all_logprobs/p10": -5.1875, + "all_logprobs/p25": -3.296875, + "all_logprobs/p5": -5.96875, + "all_logprobs/p75": -0.130859375, + "all_logprobs/var": 3.795949935913086, + "clip_ratio": 0.0, + "completion_length": 730.0208740234375, + "completion_length/incorrect": 730.0208740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 29.0, + "completion_length/incorrect/p25": 269.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 156406.953125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 29.0, + "completion_length/p25": 269.0, + "completion_length/p75": 1024.0, + "completion_length/var": 156406.953125, + "epoch": 0.08, + "feature_vector_variance/max_squared_error": 100524.703125, + "feature_vector_variance/metric": 28328.580078125, + "generated_tokens/total": 3761198.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 8.80236133250198e-06, + "loss": 0.0, + "mean_logprobs": -2.859375, + "mean_logprobs/var": 1.984375, + "num_completions/total": 4800, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 244.14431762695312, + "per_token_feature_norm/max": 380.0, + "per_token_feature_norm/median": 244.0, + "per_token_feature_norm/min": 127.0, + "per_token_feature_norm/p25": 205.0, + "per_token_feature_norm/p75": 280.0, + "per_token_feature_norm/var": 2520.0703125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.665062427520752, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.8984375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0859375, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.20253321528434753, + "policy_entropy": 1.9130555391311646, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.328125, + "policy_entropy/min": 0.0057373046875, + "policy_entropy/p25": 0.4453125, + "policy_entropy/p75": 2.84375, + "policy_entropy/var": 1.5607959032058716, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.298290491104126, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11644859611988068, + "policy_sharpness/min": 0.019969986751675606, + "policy_sharpness/p25": 0.08461157977581024, + "policy_sharpness/p75": 0.27012956142425537, + "policy_sharpness/var": 0.5245119333267212, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 50, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 9.748042430146597e-06, + "adam_stats/lm_head/lr_effective_mean": -1.2624634844471405e-11, + "adam_stats/lm_head/lr_effective_min": -9.757491170603316e-06, + "adam_stats/lm_head/lr_effective_std": 2.616810945710313e-07, + "adam_stats/lr_effective_max": 9.838456207944546e-06, + "adam_stats/lr_effective_mean": -3.277588270234055e-11, + "adam_stats/lr_effective_min": -9.864790627034381e-06, + "adam_stats/m_t_max": 0.0002401610545348376, + "adam_stats/m_t_mean": -1.3243381001212806e-12, + "adam_stats/m_t_min": -0.00019198567315470427, + "adam_stats/v_t_max": 2.779825263132807e-05, + "adam_stats/v_t_mean": 6.02894210249838e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.3449032306671143, + "all_logprobs/max": -0.00102996826171875, + "all_logprobs/median": -2.328125, + "all_logprobs/min": -17.125, + "all_logprobs/p1": -6.90625, + "all_logprobs/p10": -5.125, + "all_logprobs/p25": -3.1875, + "all_logprobs/p5": -5.9375, + "all_logprobs/p75": -0.46875, + "all_logprobs/var": 3.371119737625122, + "clip_ratio": 0.0, + "completion_length": 712.65625, + "completion_length/incorrect": 712.65625, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 20.0, + "completion_length/incorrect/p25": 221.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 173946.921875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 20.0, + "completion_length/p25": 221.25, + "completion_length/p75": 1024.0, + "completion_length/var": 173946.921875, + "epoch": 0.0816, + "feature_vector_variance/max_squared_error": 108165.1875, + "feature_vector_variance/metric": 27615.98046875, + "generated_tokens/total": 3829613.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 8.543798257200491e-06, + "loss": 0.0, + "mean_logprobs": -3.078125, + "mean_logprobs/var": 2.046875, + "num_completions/total": 4896, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 248.23416137695312, + "per_token_feature_norm/max": 386.0, + "per_token_feature_norm/median": 250.0, + "per_token_feature_norm/min": 125.5, + "per_token_feature_norm/p25": 204.0, + "per_token_feature_norm/p75": 290.0, + "per_token_feature_norm/var": 2974.957763671875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.7145427465438843, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.90625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.2890625, + "per_token_policy_error_norm/p75": 1.0078125, + "per_token_policy_error_norm/var": 0.17219369113445282, + "policy_entropy": 2.0442843437194824, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.34375, + "policy_entropy/min": 0.00518798828125, + "policy_entropy/p25": 0.998046875, + "policy_entropy/p75": 2.796875, + "policy_entropy/var": 1.3806636333465576, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3070424199104309, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12663941085338593, + "policy_sharpness/min": 0.01919267699122429, + "policy_sharpness/p25": 0.08953551203012466, + "policy_sharpness/p75": 0.3302586078643799, + "policy_sharpness/var": 0.5893167853355408, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 51, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 8.510617590218317e-06, + "adam_stats/lm_head/lr_effective_mean": -1.102167489219763e-11, + "adam_stats/lm_head/lr_effective_min": -8.51887307362631e-06, + "adam_stats/lm_head/lr_effective_std": 2.2843964586627408e-07, + "adam_stats/lr_effective_max": 8.589613571530208e-06, + "adam_stats/lr_effective_mean": -2.8613944547117853e-11, + "adam_stats/lr_effective_min": -8.612605597591028e-06, + "adam_stats/m_t_max": 0.00021614493743982166, + "adam_stats/m_t_mean": -1.1919042142150005e-12, + "adam_stats/m_t_min": -0.00017278709856327623, + "adam_stats/v_t_max": 2.777045483526308e-05, + "adam_stats/v_t_mean": 6.022913071057623e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.5781617164611816, + "all_logprobs/max": -0.000659942626953125, + "all_logprobs/median": -2.46875, + "all_logprobs/min": -21.75, + "all_logprobs/p1": -7.03125, + "all_logprobs/p10": -5.6875, + "all_logprobs/p25": -4.0, + "all_logprobs/p5": -6.21875, + "all_logprobs/p75": -0.22265625, + "all_logprobs/var": 4.3463616371154785, + "clip_ratio": 0.0, + "completion_length": 536.1771240234375, + "completion_length/incorrect": 536.1771240234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 332.0, + "completion_length/incorrect/min": 10.0, + "completion_length/incorrect/p25": 138.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 178076.90625, + "completion_length/max": 1024.0, + "completion_length/median": 332.0, + "completion_length/min": 10.0, + "completion_length/p25": 138.25, + "completion_length/p75": 1024.0, + "completion_length/var": 178076.90625, + "epoch": 0.0832, + "feature_vector_variance/max_squared_error": 123786.3359375, + "feature_vector_variance/metric": 28080.857421875, + "generated_tokens/total": 3881086.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 8.283963474507402e-06, + "loss": 0.0, + "mean_logprobs": -3.546875, + "mean_logprobs/var": 1.96875, + "num_completions/total": 4992, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 235.71795654296875, + "per_token_feature_norm/max": 388.0, + "per_token_feature_norm/median": 232.0, + "per_token_feature_norm/min": 123.5, + "per_token_feature_norm/p25": 192.0, + "per_token_feature_norm/p75": 278.0, + "per_token_feature_norm/var": 2922.173828125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.707593560218811, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.9375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.14453125, + "per_token_policy_error_norm/p75": 1.0234375, + "per_token_policy_error_norm/var": 0.18934082984924316, + "policy_entropy": 2.1123273372650146, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.40625, + "policy_entropy/min": 0.002716064453125, + "policy_entropy/p25": 0.60546875, + "policy_entropy/p75": 3.21875, + "policy_entropy/var": 1.6854859590530396, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3689841032028198, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11329231411218643, + "policy_sharpness/min": 0.020408162847161293, + "policy_sharpness/p25": 0.06728203594684601, + "policy_sharpness/p75": 0.3229725956916809, + "policy_sharpness/var": 1.1476924419403076, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 52, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 7.422079306707019e-06, + "adam_stats/lm_head/lr_effective_mean": -9.611635272410712e-12, + "adam_stats/lm_head/lr_effective_min": -7.429283868987113e-06, + "adam_stats/lm_head/lr_effective_std": 1.9920099703085725e-07, + "adam_stats/lr_effective_max": 7.491024462069618e-06, + "adam_stats/lr_effective_mean": -2.4952965041458164e-11, + "adam_stats/lr_effective_min": -7.511074272770202e-06, + "adam_stats/m_t_max": 0.00019453043933026493, + "adam_stats/m_t_mean": -1.0727134241647618e-12, + "adam_stats/m_t_min": -0.00015550838725175709, + "adam_stats/v_t_max": 2.7742684324039146e-05, + "adam_stats/v_t_mean": 6.016889243787293e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.9985952377319336, + "all_logprobs/max": -0.001556396484375, + "all_logprobs/median": -2.109375, + "all_logprobs/min": -17.5, + "all_logprobs/p1": -6.75, + "all_logprobs/p10": -4.78125, + "all_logprobs/p25": -3.015625, + "all_logprobs/p5": -5.78125, + "all_logprobs/p75": -0.07373046875, + "all_logprobs/var": 3.519143581390381, + "clip_ratio": 0.0, + "completion_length": 690.78125, + "completion_length/incorrect": 690.78125, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 16.0, + "completion_length/incorrect/p25": 148.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 187605.59375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 16.0, + "completion_length/p25": 148.0, + "completion_length/p75": 1024.0, + "completion_length/var": 187605.59375, + "epoch": 0.0848, + "feature_vector_variance/max_squared_error": 167125.3125, + "feature_vector_variance/metric": 25647.8828125, + "generated_tokens/total": 3947401.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 8.02317355308094e-06, + "loss": 0.0, + "mean_logprobs": -2.796875, + "mean_logprobs/var": 2.34375, + "num_completions/total": 5088, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 246.61476135253906, + "per_token_feature_norm/max": 398.0, + "per_token_feature_norm/median": 249.0, + "per_token_feature_norm/min": 127.0, + "per_token_feature_norm/p25": 203.0, + "per_token_feature_norm/p75": 286.0, + "per_token_feature_norm/var": 2776.1953125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6199074983596802, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.8671875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.046875, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.20817019045352936, + "policy_entropy": 1.7707641124725342, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.28125, + "policy_entropy/min": 0.0067138671875, + "policy_entropy/p25": 0.275390625, + "policy_entropy/p75": 2.6875, + "policy_entropy/var": 1.5651490688323975, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3190523684024811, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13269957900047302, + "policy_sharpness/min": 0.019015606492757797, + "policy_sharpness/p25": 0.09211228787899017, + "policy_sharpness/p75": 0.34665584564208984, + "policy_sharpness/var": 0.49001455307006836, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 53, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 6.465398200816708e-06, + "adam_stats/lm_head/lr_effective_mean": -8.37244423096406e-12, + "adam_stats/lm_head/lr_effective_min": -6.4716791712271515e-06, + "adam_stats/lm_head/lr_effective_std": 1.7350687642192497e-07, + "adam_stats/lr_effective_max": 6.5255026129307225e-06, + "adam_stats/lr_effective_mean": -2.1735609839756975e-11, + "adam_stats/lr_effective_min": -6.542967639688868e-06, + "adam_stats/m_t_max": 0.00017507739539723843, + "adam_stats/m_t_mean": -9.654426780594805e-13, + "adam_stats/m_t_min": -0.00013995754125062376, + "adam_stats/v_t_max": 2.7714941097656265e-05, + "adam_stats/v_t_mean": 6.010873656453475e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.011820077896118, + "all_logprobs/max": -0.00113677978515625, + "all_logprobs/median": -2.203125, + "all_logprobs/min": -20.5, + "all_logprobs/p1": -6.78125, + "all_logprobs/p10": -4.375, + "all_logprobs/p25": -2.828125, + "all_logprobs/p5": -5.59375, + "all_logprobs/p75": -0.08203125, + "all_logprobs/var": 3.107332229614258, + "clip_ratio": 0.0, + "completion_length": 806.7916870117188, + "completion_length/incorrect": 806.7916870117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 16.0, + "completion_length/incorrect/p25": 962.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 148238.578125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 16.0, + "completion_length/p25": 962.75, + "completion_length/p75": 1024.0, + "completion_length/var": 148238.578125, + "epoch": 0.0864, + "feature_vector_variance/max_squared_error": 178057.671875, + "feature_vector_variance/metric": 27104.2578125, + "generated_tokens/total": 4024853.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 7.76174622526876e-06, + "loss": 0.0, + "mean_logprobs": -2.515625, + "mean_logprobs/var": 1.8359375, + "num_completions/total": 5184, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 258.5041809082031, + "per_token_feature_norm/max": 422.0, + "per_token_feature_norm/median": 262.0, + "per_token_feature_norm/min": 127.5, + "per_token_feature_norm/p25": 221.0, + "per_token_feature_norm/p75": 296.0, + "per_token_feature_norm/var": 2658.068115234375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6451503038406372, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.8828125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0546875, + "per_token_policy_error_norm/p75": 0.984375, + "per_token_policy_error_norm/var": 0.19460168480873108, + "policy_entropy": 1.8173824548721313, + "policy_entropy/max": 3.84375, + "policy_entropy/median": 2.3125, + "policy_entropy/min": 0.004730224609375, + "policy_entropy/p25": 0.30078125, + "policy_entropy/p75": 2.578125, + "policy_entropy/var": 1.4403105974197388, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3838566541671753, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.14189259707927704, + "policy_sharpness/min": 0.019107893109321594, + "policy_sharpness/p25": 0.09745132923126221, + "policy_sharpness/p75": 0.3822689950466156, + "policy_sharpness/var": 0.6859133839607239, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 54, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 5.625402081932407e-06, + "adam_stats/lm_head/lr_effective_mean": -7.284438677257477e-12, + "adam_stats/lm_head/lr_effective_min": -5.630870418826817e-06, + "adam_stats/lm_head/lr_effective_std": 1.5094907723778306e-07, + "adam_stats/lr_effective_max": 5.677737590303877e-06, + "adam_stats/lr_effective_mean": -1.8910803478711102e-11, + "adam_stats/lr_effective_min": -5.6929334277810995e-06, + "adam_stats/m_t_max": 0.00015756965149194002, + "adam_stats/m_t_mean": -8.688984319375759e-13, + "adam_stats/m_t_min": -0.00012596178567036986, + "adam_stats/v_t_max": 2.768722697510384e-05, + "adam_stats/v_t_mean": 6.004862839609215e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.1395456790924072, + "all_logprobs/max": -0.000457763671875, + "all_logprobs/median": -2.25, + "all_logprobs/min": -19.875, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -4.96875, + "all_logprobs/p25": -3.09375, + "all_logprobs/p5": -5.90625, + "all_logprobs/p75": -0.09814453125, + "all_logprobs/var": 3.5488011837005615, + "clip_ratio": 0.0, + "completion_length": 755.3229370117188, + "completion_length/incorrect": 755.3229370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 11.0, + "completion_length/incorrect/p25": 316.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 158737.671875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 11.0, + "completion_length/p25": 316.75, + "completion_length/p75": 1024.0, + "completion_length/var": 158737.671875, + "epoch": 0.088, + "feature_vector_variance/max_squared_error": 124868.0234375, + "feature_vector_variance/metric": 28337.859375, + "generated_tokens/total": 4097364.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 7.5e-06, + "loss": 0.0, + "mean_logprobs": -2.78125, + "mean_logprobs/var": 2.140625, + "num_completions/total": 5280, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 246.32113647460938, + "per_token_feature_norm/max": 388.0, + "per_token_feature_norm/median": 246.0, + "per_token_feature_norm/min": 125.0, + "per_token_feature_norm/p25": 207.0, + "per_token_feature_norm/p75": 284.0, + "per_token_feature_norm/var": 2684.501953125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6521206498146057, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.89453125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0625, + "per_token_policy_error_norm/p75": 1.0078125, + "per_token_policy_error_norm/var": 0.1973959356546402, + "policy_entropy": 1.882503628730774, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.34375, + "policy_entropy/min": 0.00445556640625, + "policy_entropy/p25": 0.345703125, + "policy_entropy/p75": 2.765625, + "policy_entropy/var": 1.5596482753753662, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.35105711221694946, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12005027383565903, + "policy_sharpness/min": 0.017627550289034843, + "policy_sharpness/p25": 0.08742018043994904, + "policy_sharpness/p75": 0.2919597029685974, + "policy_sharpness/var": 0.8328458666801453, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 55, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 4.888578587269876e-06, + "adam_stats/lm_head/lr_effective_mean": -6.330094001055775e-12, + "adam_stats/lm_head/lr_effective_min": -4.893334335065447e-06, + "adam_stats/lm_head/lr_effective_std": 1.3116410002567136e-07, + "adam_stats/lr_effective_max": 4.9340937948727515e-06, + "adam_stats/lr_effective_mean": -1.6433073651844765e-11, + "adam_stats/lr_effective_min": -4.947298293700442e-06, + "adam_stats/m_t_max": 0.00014181267761159688, + "adam_stats/m_t_mean": -7.820081387999167e-13, + "adam_stats/m_t_min": -0.00011336560419294983, + "adam_stats/v_t_max": 2.765954013739247e-05, + "adam_stats/v_t_mean": 5.9988585279779905e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.9483412504196167, + "all_logprobs/max": -0.001068115234375, + "all_logprobs/median": -1.984375, + "all_logprobs/min": -12.5, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -5.0, + "all_logprobs/p25": -3.046875, + "all_logprobs/p5": -5.90625, + "all_logprobs/p75": -0.03173828125, + "all_logprobs/var": 3.8772404193878174, + "clip_ratio": 0.0, + "completion_length": 671.8541870117188, + "completion_length/incorrect": 671.8541870117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 8.0, + "completion_length/incorrect/p25": 151.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 191199.46875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 8.0, + "completion_length/p25": 151.0, + "completion_length/p75": 1024.0, + "completion_length/var": 191199.46875, + "epoch": 0.0896, + "feature_vector_variance/max_squared_error": 121370.921875, + "feature_vector_variance/metric": 26045.326171875, + "generated_tokens/total": 4161862.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 7.238253774731245e-06, + "loss": 0.0, + "mean_logprobs": -2.84375, + "mean_logprobs/var": 2.578125, + "num_completions/total": 5376, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 243.21795654296875, + "per_token_feature_norm/max": 392.0, + "per_token_feature_norm/median": 244.0, + "per_token_feature_norm/min": 127.5, + "per_token_feature_norm/p25": 203.0, + "per_token_feature_norm/p75": 282.0, + "per_token_feature_norm/var": 2607.655029296875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5812342762947083, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.84375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0234375, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.22041119635105133, + "policy_entropy": 1.6746679544448853, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.265625, + "policy_entropy/min": 0.00482177734375, + "policy_entropy/p25": 0.123046875, + "policy_entropy/p75": 2.71875, + "policy_entropy/var": 1.7750180959701538, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.454285204410553, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13524310290813446, + "policy_sharpness/min": 0.021551618352532387, + "policy_sharpness/p25": 0.0883767157793045, + "policy_sharpness/p75": 0.3935920000076294, + "policy_sharpness/var": 1.2633923292160034, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 56, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 4.242904196871677e-06, + "adam_stats/lm_head/lr_effective_mean": -5.4938380233959805e-12, + "adam_stats/lm_head/lr_effective_min": -4.247034212312428e-06, + "adam_stats/lm_head/lr_effective_std": 1.1382851283769924e-07, + "adam_stats/lr_effective_max": 4.282438112568343e-06, + "adam_stats/lr_effective_mean": -1.4261965740336446e-11, + "adam_stats/lr_effective_min": -4.293898655305384e-06, + "adam_stats/m_t_max": 0.00012763139966409653, + "adam_stats/m_t_mean": -7.038075092342944e-13, + "adam_stats/m_t_min": -0.00010202904377365485, + "adam_stats/v_t_max": 2.763188058452215e-05, + "adam_stats/v_t_mean": 5.992858553155456e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.1132400035858154, + "all_logprobs/max": -0.0012664794921875, + "all_logprobs/median": -2.203125, + "all_logprobs/min": -17.0, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -5.03125, + "all_logprobs/p25": -3.203125, + "all_logprobs/p5": -5.9375, + "all_logprobs/p75": -0.06494140625, + "all_logprobs/var": 3.7741587162017822, + "clip_ratio": 0.0, + "completion_length": 595.6875, + "completion_length/incorrect": 595.6875, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 746.0, + "completion_length/incorrect/min": 25.0, + "completion_length/incorrect/p25": 162.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 187138.890625, + "completion_length/max": 1024.0, + "completion_length/median": 746.0, + "completion_length/min": 25.0, + "completion_length/p25": 162.0, + "completion_length/p75": 1024.0, + "completion_length/var": 187138.890625, + "epoch": 0.0912, + "feature_vector_variance/max_squared_error": 114647.3203125, + "feature_vector_variance/metric": 26345.90625, + "generated_tokens/total": 4219048.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 6.976826446919061e-06, + "loss": 0.0, + "mean_logprobs": -3.046875, + "mean_logprobs/var": 2.140625, + "num_completions/total": 5472, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 243.78504943847656, + "per_token_feature_norm/max": 390.0, + "per_token_feature_norm/median": 243.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 201.0, + "per_token_feature_norm/p75": 282.0, + "per_token_feature_norm/var": 2807.937255859375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6337357759475708, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.88671875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.04296875, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.20889292657375336, + "policy_entropy": 1.831646203994751, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.3125, + "policy_entropy/min": 0.00445556640625, + "policy_entropy/p25": 0.2431640625, + "policy_entropy/p75": 2.8125, + "policy_entropy/var": 1.6397606134414673, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4334234893321991, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12621697783470154, + "policy_sharpness/min": 0.01754351705312729, + "policy_sharpness/p25": 0.08859619498252869, + "policy_sharpness/p75": 0.36208558082580566, + "policy_sharpness/var": 1.2462151050567627, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 57, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 3.67768780051847e-06, + "adam_stats/lm_head/lr_effective_mean": -4.7618172425989425e-12, + "adam_stats/lm_head/lr_effective_min": -3.681270527522429e-06, + "adam_stats/lm_head/lr_effective_std": 9.865478034498665e-08, + "adam_stats/lr_effective_max": 3.711980752996169e-06, + "adam_stats/lr_effective_mean": -1.2361488568868317e-11, + "adam_stats/lr_effective_min": -3.721914708876284e-06, + "adam_stats/m_t_max": 0.00011486825678730384, + "adam_stats/m_t_mean": -6.334266336276151e-13, + "adam_stats/m_t_min": -9.182613575831056e-05, + "adam_stats/v_t_max": 2.7604248316492885e-05, + "adam_stats/v_t_mean": 5.986866384588563e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.1622347831726074, + "all_logprobs/max": -0.00106048583984375, + "all_logprobs/median": -2.28125, + "all_logprobs/min": -21.875, + "all_logprobs/p1": -6.875, + "all_logprobs/p10": -4.71875, + "all_logprobs/p25": -3.046875, + "all_logprobs/p5": -5.78125, + "all_logprobs/p75": -0.1064453125, + "all_logprobs/var": 3.2960851192474365, + "clip_ratio": 0.0, + "completion_length": 751.1354370117188, + "completion_length/incorrect": 751.1354370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 17.0, + "completion_length/incorrect/p25": 272.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 161013.828125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 17.0, + "completion_length/p25": 272.25, + "completion_length/p75": 1024.0, + "completion_length/var": 161013.828125, + "epoch": 0.0928, + "feature_vector_variance/max_squared_error": 169689.3125, + "feature_vector_variance/metric": 27572.826171875, + "generated_tokens/total": 4291157.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 6.7160365254926005e-06, + "loss": 0.0, + "mean_logprobs": -2.765625, + "mean_logprobs/var": 1.953125, + "num_completions/total": 5568, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 249.79367065429688, + "per_token_feature_norm/max": 400.0, + "per_token_feature_norm/median": 254.0, + "per_token_feature_norm/min": 127.0, + "per_token_feature_norm/p25": 204.0, + "per_token_feature_norm/p75": 292.0, + "per_token_feature_norm/var": 3123.656982421875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6758105754852295, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.8984375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.08984375, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.18782228231430054, + "policy_entropy": 1.9102864265441895, + "policy_entropy/max": 3.84375, + "policy_entropy/median": 2.34375, + "policy_entropy/min": 0.004852294921875, + "policy_entropy/p25": 0.326171875, + "policy_entropy/p75": 2.71875, + "policy_entropy/var": 1.4972407817840576, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.35613059997558594, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12532563507556915, + "policy_sharpness/min": 0.018768757581710815, + "policy_sharpness/p25": 0.08808372914791107, + "policy_sharpness/p75": 0.31490558385849, + "policy_sharpness/var": 0.8437452912330627, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 58, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 3.1834301807975862e-06, + "adam_stats/lm_head/lr_effective_mean": -4.121722060879129e-12, + "adam_stats/lm_head/lr_effective_min": -3.1865340588410618e-06, + "adam_stats/lm_head/lr_effective_std": 8.538746243402784e-08, + "adam_stats/lr_effective_max": 3.213137233615271e-06, + "adam_stats/lr_effective_mean": -1.0699685928927671e-11, + "adam_stats/lr_effective_min": -3.2217358238995075e-06, + "adam_stats/m_t_max": 0.00010338142601540312, + "adam_stats/m_t_mean": -5.700838022135168e-13, + "adam_stats/m_t_min": -8.264352072728798e-05, + "adam_stats/v_t_max": 2.7576645152294077e-05, + "adam_stats/v_t_mean": 5.980879420192098e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.3290295600891113, + "all_logprobs/max": -0.00099945068359375, + "all_logprobs/median": -2.3125, + "all_logprobs/min": -21.25, + "all_logprobs/p1": -6.875, + "all_logprobs/p10": -4.96875, + "all_logprobs/p25": -3.078125, + "all_logprobs/p5": -5.90625, + "all_logprobs/p75": -0.671875, + "all_logprobs/var": 3.192063331604004, + "clip_ratio": 0.0, + "completion_length": 667.75, + "completion_length/incorrect": 667.75, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 7.0, + "completion_length/incorrect/p25": 161.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 184945.53125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 7.0, + "completion_length/p25": 161.75, + "completion_length/p75": 1024.0, + "completion_length/var": 184945.53125, + "epoch": 0.0944, + "feature_vector_variance/max_squared_error": 115898.3984375, + "feature_vector_variance/metric": 28937.005859375, + "generated_tokens/total": 4355261.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 6.456201742799511e-06, + "loss": 0.0, + "mean_logprobs": -3.09375, + "mean_logprobs/var": 1.9375, + "num_completions/total": 5664, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 255.31063842773438, + "per_token_feature_norm/max": 382.0, + "per_token_feature_norm/median": 258.0, + "per_token_feature_norm/min": 125.0, + "per_token_feature_norm/p25": 215.0, + "per_token_feature_norm/p75": 296.0, + "per_token_feature_norm/var": 2867.278564453125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.721611738204956, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.90625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.375, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.16724500060081482, + "policy_entropy": 2.055471420288086, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.34375, + "policy_entropy/min": 0.003875732421875, + "policy_entropy/p25": 1.328125, + "policy_entropy/p75": 2.75, + "policy_entropy/var": 1.3058768510818481, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.37101253867149353, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13026073575019836, + "policy_sharpness/min": 0.01813475415110588, + "policy_sharpness/p25": 0.08935776352882385, + "policy_sharpness/p75": 0.35122257471084595, + "policy_sharpness/var": 0.8998095989227295, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 59, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.751699639702565e-06, + "adam_stats/lm_head/lr_effective_mean": -3.562616998284418e-12, + "adam_stats/lm_head/lr_effective_min": -2.7543837859411724e-06, + "adam_stats/lm_head/lr_effective_std": 7.379979649613233e-08, + "adam_stats/lr_effective_max": 2.7773969577538082e-06, + "adam_stats/lr_effective_mean": -9.24818555070317e-12, + "adam_stats/lr_effective_min": -2.784829575830372e-06, + "adam_stats/m_t_max": 9.3043279775884e-05, + "adam_stats/m_t_mean": -5.130757472528169e-13, + "adam_stats/m_t_min": -7.437916792696342e-05, + "adam_stats/v_t_max": 2.7549069272936322e-05, + "adam_stats/v_t_mean": 5.97489809364693e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.445023775100708, + "all_logprobs/max": -0.000751495361328125, + "all_logprobs/median": -2.375, + "all_logprobs/min": -19.0, + "all_logprobs/p1": -7.03125, + "all_logprobs/p10": -5.5, + "all_logprobs/p25": -3.6875, + "all_logprobs/p5": -6.15625, + "all_logprobs/p75": -0.0986328125, + "all_logprobs/var": 4.146855354309082, + "clip_ratio": 0.0, + "completion_length": 571.03125, + "completion_length/incorrect": 571.03125, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 345.0, + "completion_length/incorrect/min": 12.0, + "completion_length/incorrect/p25": 129.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 189297.109375, + "completion_length/max": 1024.0, + "completion_length/median": 345.0, + "completion_length/min": 12.0, + "completion_length/p25": 129.0, + "completion_length/p75": 1024.0, + "completion_length/var": 189297.109375, + "epoch": 0.096, + "feature_vector_variance/max_squared_error": 96458.6015625, + "feature_vector_variance/metric": 27611.423828125, + "generated_tokens/total": 4410080.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 6.197638667498023e-06, + "loss": 0.0, + "mean_logprobs": -3.421875, + "mean_logprobs/var": 2.15625, + "num_completions/total": 5760, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 237.63365173339844, + "per_token_feature_norm/max": 386.0, + "per_token_feature_norm/median": 234.0, + "per_token_feature_norm/min": 125.0, + "per_token_feature_norm/p25": 193.0, + "per_token_feature_norm/p75": 280.0, + "per_token_feature_norm/var": 3018.560302734375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6905180215835571, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.9140625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.078125, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.19162099063396454, + "policy_entropy": 2.0307464599609375, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.390625, + "policy_entropy/min": 0.003173828125, + "policy_entropy/p25": 0.3359375, + "policy_entropy/p75": 3.078125, + "policy_entropy/var": 1.71383535861969, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.44482821226119995, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11765006184577942, + "policy_sharpness/min": 0.01898709498345852, + "policy_sharpness/p25": 0.07504501938819885, + "policy_sharpness/p75": 0.31441909074783325, + "policy_sharpness/var": 1.7317765951156616, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 60, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.375013536948245e-06, + "adam_stats/lm_head/lr_effective_mean": -3.0748181778506023e-12, + "adam_stats/lm_head/lr_effective_min": -2.3773320663167397e-06, + "adam_stats/lm_head/lr_effective_std": 6.369064919908851e-08, + "adam_stats/lr_effective_max": 2.3972104372660397e-06, + "adam_stats/lr_effective_mean": -7.981813127111437e-12, + "adam_stats/lr_effective_min": -2.403625103397644e-06, + "adam_stats/m_t_max": 8.373895252589136e-05, + "adam_stats/m_t_mean": -4.617678310038509e-13, + "adam_stats/m_t_min": -6.694124749628827e-05, + "adam_stats/v_t_max": 2.752152067841962e-05, + "adam_stats/v_t_mean": 5.968924573357404e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.6073445081710815, + "all_logprobs/max": -0.0001277923583984375, + "all_logprobs/median": -0.69140625, + "all_logprobs/min": -17.5, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.3125, + "all_logprobs/p25": -2.6875, + "all_logprobs/p5": -5.5625, + "all_logprobs/p75": -0.02392578125, + "all_logprobs/var": 3.4872255325317383, + "clip_ratio": 0.0, + "completion_length": 832.9479370117188, + "completion_length/incorrect": 832.9479370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 13.0, + "completion_length/incorrect/p25": 1024.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 130044.6171875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 13.0, + "completion_length/p25": 1024.0, + "completion_length/p75": 1024.0, + "completion_length/var": 130044.6171875, + "epoch": 0.0976, + "feature_vector_variance/max_squared_error": 177277.359375, + "feature_vector_variance/metric": 28335.69140625, + "generated_tokens/total": 4490043.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 5.9406623188668065e-06, + "loss": 0.0, + "mean_logprobs": -2.109375, + "mean_logprobs/var": 2.09375, + "num_completions/total": 5856, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 247.77987670898438, + "per_token_feature_norm/max": 408.0, + "per_token_feature_norm/median": 251.0, + "per_token_feature_norm/min": 123.5, + "per_token_feature_norm/p25": 209.0, + "per_token_feature_norm/p75": 284.0, + "per_token_feature_norm/var": 2720.65478515625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5054638385772705, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.416015625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.015625, + "per_token_policy_error_norm/p75": 0.97265625, + "per_token_policy_error_norm/var": 0.2276507169008255, + "policy_entropy": 1.4279295206069946, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 1.28125, + "policy_entropy/min": 0.0011138916015625, + "policy_entropy/p25": 0.10595703125, + "policy_entropy/p75": 2.484375, + "policy_entropy/var": 1.6314263343811035, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.657354474067688, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1457803100347519, + "policy_sharpness/min": 0.0191341545432806, + "policy_sharpness/p25": 0.09250319004058838, + "policy_sharpness/p75": 0.4443665146827698, + "policy_sharpness/var": 2.4900400638580322, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 61, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.0467412014113506e-06, + "adam_stats/lm_head/lr_effective_mean": -2.649730478435086e-12, + "adam_stats/lm_head/lr_effective_min": -2.048740498139523e-06, + "adam_stats/lm_head/lr_effective_std": 5.488174537049417e-08, + "adam_stats/lr_effective_max": 2.0658842458942672e-06, + "adam_stats/lr_effective_mean": -6.878254910080983e-12, + "adam_stats/lr_effective_min": -2.0714119273179676e-06, + "adam_stats/m_t_max": 7.536505290772766e-05, + "adam_stats/m_t_mean": -4.155915791625303e-13, + "adam_stats/m_t_min": -6.024712274665944e-05, + "adam_stats/v_t_max": 2.749399936874397e-05, + "adam_stats/v_t_mean": 5.9629549561956985e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.162020683288574, + "all_logprobs/max": -0.0012054443359375, + "all_logprobs/median": -2.234375, + "all_logprobs/min": -18.0, + "all_logprobs/p1": -6.9375, + "all_logprobs/p10": -5.03125, + "all_logprobs/p25": -3.1875, + "all_logprobs/p5": -5.9375, + "all_logprobs/p75": -0.0751953125, + "all_logprobs/var": 3.69824481010437, + "clip_ratio": 0.0, + "completion_length": 685.1041870117188, + "completion_length/incorrect": 685.1041870117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 9.0, + "completion_length/incorrect/p25": 209.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 180743.984375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 9.0, + "completion_length/p25": 209.0, + "completion_length/p75": 1024.0, + "completion_length/var": 180743.984375, + "epoch": 0.0992, + "feature_vector_variance/max_squared_error": 117244.0703125, + "feature_vector_variance/metric": 28728.13671875, + "generated_tokens/total": 4555813.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 5.685585783002493e-06, + "loss": 0.0, + "mean_logprobs": -2.984375, + "mean_logprobs/var": 2.390625, + "num_completions/total": 5952, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 246.98052978515625, + "per_token_feature_norm/max": 384.0, + "per_token_feature_norm/median": 247.0, + "per_token_feature_norm/min": 125.0, + "per_token_feature_norm/p25": 208.0, + "per_token_feature_norm/p75": 286.0, + "per_token_feature_norm/var": 2785.434326171875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6499583125114441, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.890625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0546875, + "per_token_policy_error_norm/p75": 1.0078125, + "per_token_policy_error_norm/var": 0.199637308716774, + "policy_entropy": 1.8833259344100952, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.328125, + "policy_entropy/min": 0.0045166015625, + "policy_entropy/p25": 0.259765625, + "policy_entropy/p75": 2.875, + "policy_entropy/var": 1.6391428709030151, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.38865968585014343, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1278342604637146, + "policy_sharpness/min": 0.015570227988064289, + "policy_sharpness/p25": 0.08330244570970535, + "policy_sharpness/p75": 0.35086795687675476, + "policy_sharpness/var": 0.9540440440177917, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 62, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.7610086615604814e-06, + "adam_stats/lm_head/lr_effective_mean": -2.2797406323826763e-12, + "adam_stats/lm_head/lr_effective_min": -1.7627299939704244e-06, + "adam_stats/lm_head/lr_effective_std": 4.721520951989078e-08, + "adam_stats/lr_effective_max": 1.777491888788063e-06, + "adam_stats/lr_effective_mean": -5.917748062411743e-12, + "adam_stats/lr_effective_min": -1.7822478639573092e-06, + "adam_stats/m_t_max": 6.782854325138032e-05, + "adam_stats/m_t_mean": -3.7403215832725045e-13, + "adam_stats/m_t_min": -5.4222407925408334e-05, + "adam_stats/v_t_max": 2.7466505343909375e-05, + "adam_stats/v_t_mean": 5.956992277927897e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.2744998931884766, + "all_logprobs/max": -0.000858306884765625, + "all_logprobs/median": -2.296875, + "all_logprobs/min": -19.875, + "all_logprobs/p1": -6.875, + "all_logprobs/p10": -5.0625, + "all_logprobs/p25": -3.140625, + "all_logprobs/p5": -5.9375, + "all_logprobs/p75": -0.22265625, + "all_logprobs/var": 3.4125938415527344, + "clip_ratio": 0.0, + "completion_length": 672.0729370117188, + "completion_length/incorrect": 672.0729370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 24.0, + "completion_length/incorrect/p25": 178.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 179610.953125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 24.0, + "completion_length/p25": 178.0, + "completion_length/p75": 1024.0, + "completion_length/var": 179610.953125, + "epoch": 0.1008, + "feature_vector_variance/max_squared_error": 108457.1953125, + "feature_vector_variance/metric": 26989.7265625, + "generated_tokens/total": 4620332.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 5.432719831372507e-06, + "loss": 0.0, + "mean_logprobs": -3.015625, + "mean_logprobs/var": 1.9921875, + "num_completions/total": 6048, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 245.55479431152344, + "per_token_feature_norm/max": 382.0, + "per_token_feature_norm/median": 244.0, + "per_token_feature_norm/min": 127.0, + "per_token_feature_norm/p25": 201.0, + "per_token_feature_norm/p75": 286.0, + "per_token_feature_norm/var": 2863.7060546875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6925484538078308, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.90234375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.13671875, + "per_token_policy_error_norm/p75": 1.0078125, + "per_token_policy_error_norm/var": 0.17996510863304138, + "policy_entropy": 1.9876712560653687, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.34375, + "policy_entropy/min": 0.0031585693359375, + "policy_entropy/p25": 0.654296875, + "policy_entropy/p75": 2.78125, + "policy_entropy/var": 1.4610695838928223, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.48289597034454346, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13224564492702484, + "policy_sharpness/min": 0.019416263327002525, + "policy_sharpness/p25": 0.09191514551639557, + "policy_sharpness/p75": 0.3747972249984741, + "policy_sharpness/var": 1.5733484029769897, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 63, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.512618155175005e-06, + "adam_stats/lm_head/lr_effective_mean": -1.9581159610426724e-12, + "adam_stats/lm_head/lr_effective_min": -1.5140976756811142e-06, + "adam_stats/lm_head/lr_effective_std": 4.0551334734573175e-08, + "adam_stats/lr_effective_max": 1.5267870594470878e-06, + "adam_stats/lr_effective_mean": -5.082812209317167e-12, + "adam_stats/lr_effective_min": -1.530872054900101e-06, + "adam_stats/m_t_max": 6.104568456066772e-05, + "adam_stats/m_t_mean": -3.366290536252481e-13, + "adam_stats/m_t_min": -4.8800164222484455e-05, + "adam_stats/v_t_max": 2.7439038603915833e-05, + "adam_stats/v_t_mean": 5.951036104873131e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.9765669107437134, + "all_logprobs/max": -0.00102996826171875, + "all_logprobs/median": -2.09375, + "all_logprobs/min": -17.375, + "all_logprobs/p1": -6.90625, + "all_logprobs/p10": -5.0, + "all_logprobs/p25": -3.046875, + "all_logprobs/p5": -5.90625, + "all_logprobs/p75": -0.035400390625, + "all_logprobs/var": 3.8788747787475586, + "clip_ratio": 0.0, + "completion_length": 735.9583740234375, + "completion_length/incorrect": 735.9583740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 12.0, + "completion_length/incorrect/p25": 284.5, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 160447.96875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 12.0, + "completion_length/p25": 284.5, + "completion_length/p75": 1024.0, + "completion_length/var": 160447.96875, + "epoch": 0.1024, + "feature_vector_variance/max_squared_error": 113733.03125, + "feature_vector_variance/metric": 27434.041015625, + "generated_tokens/total": 4690984.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 5.182372542187895e-06, + "loss": 0.0, + "mean_logprobs": -2.671875, + "mean_logprobs/var": 2.34375, + "num_completions/total": 6144, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 240.55723571777344, + "per_token_feature_norm/max": 384.0, + "per_token_feature_norm/median": 241.0, + "per_token_feature_norm/min": 126.5, + "per_token_feature_norm/p25": 200.0, + "per_token_feature_norm/p75": 280.0, + "per_token_feature_norm/var": 2663.851806640625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5906612873077393, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.8671875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0234375, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.2195919007062912, + "policy_entropy": 1.6902403831481934, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.28125, + "policy_entropy/min": 0.00506591796875, + "policy_entropy/p25": 0.1357421875, + "policy_entropy/p75": 2.703125, + "policy_entropy/var": 1.7586359977722168, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.43582719564437866, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13243107497692108, + "policy_sharpness/min": 0.016023408621549606, + "policy_sharpness/p25": 0.08735716342926025, + "policy_sharpness/p75": 0.37583526968955994, + "policy_sharpness/var": 1.2878761291503906, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 64, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.296973209718999e-06, + "adam_stats/lm_head/lr_effective_mean": -1.678902459764653e-12, + "adam_stats/lm_head/lr_effective_min": -1.2982427506358363e-06, + "adam_stats/lm_head/lr_effective_std": 3.476660381807051e-08, + "adam_stats/lr_effective_max": 1.3091314485791372e-06, + "adam_stats/lr_effective_mean": -4.357982724689791e-12, + "adam_stats/lr_effective_min": -1.3126339126756648e-06, + "adam_stats/m_t_max": 5.494111610460095e-05, + "adam_stats/m_t_mean": -3.0296633799810346e-13, + "adam_stats/m_t_min": -4.392014670884237e-05, + "adam_stats/v_t_max": 2.7411599148763344e-05, + "adam_stats/v_t_mean": 5.9450842686270544e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.9544150829315186, + "all_logprobs/max": -0.000823974609375, + "all_logprobs/median": -2.109375, + "all_logprobs/min": -16.5, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -4.75, + "all_logprobs/p25": -2.984375, + "all_logprobs/p5": -5.78125, + "all_logprobs/p75": -0.03369140625, + "all_logprobs/var": 3.594964027404785, + "clip_ratio": 0.0, + "completion_length": 711.1666870117188, + "completion_length/incorrect": 711.1666870117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 18.0, + "completion_length/incorrect/p25": 193.5, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 177755.109375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 18.0, + "completion_length/p25": 193.5, + "completion_length/p75": 1024.0, + "completion_length/var": 177755.109375, + "epoch": 0.104, + "feature_vector_variance/max_squared_error": 150499.71875, + "feature_vector_variance/metric": 26709.564453125, + "generated_tokens/total": 4759256.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 4.934848925057485e-06, + "loss": 0.0, + "mean_logprobs": -2.671875, + "mean_logprobs/var": 2.25, + "num_completions/total": 6240, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 251.00634765625, + "per_token_feature_norm/max": 382.0, + "per_token_feature_norm/median": 255.0, + "per_token_feature_norm/min": 125.0, + "per_token_feature_norm/p25": 211.0, + "per_token_feature_norm/p75": 290.0, + "per_token_feature_norm/var": 2747.9306640625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5989192724227905, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.86328125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0234375, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.21446539461612701, + "policy_entropy": 1.7189302444458008, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.296875, + "policy_entropy/min": 0.0037078857421875, + "policy_entropy/p25": 0.134765625, + "policy_entropy/p75": 2.6875, + "policy_entropy/var": 1.6771247386932373, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4963602125644684, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13269957900047302, + "policy_sharpness/min": 0.017674067988991737, + "policy_sharpness/p25": 0.09255006164312363, + "policy_sharpness/p75": 0.3800223171710968, + "policy_sharpness/var": 1.4267221689224243, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 65, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.1100131587227224e-06, + "adam_stats/lm_head/lr_effective_mean": -1.4368375196235905e-12, + "adam_stats/lm_head/lr_effective_min": -1.111100459638692e-06, + "adam_stats/lm_head/lr_effective_std": 2.9751907604236294e-08, + "adam_stats/lr_effective_max": 1.1204267593711847e-06, + "adam_stats/lr_effective_mean": -3.72960169692238e-12, + "adam_stats/lr_effective_min": -1.1234241128477152e-06, + "adam_stats/m_t_max": 4.9447004130342975e-05, + "adam_stats/m_t_mean": -2.726697448558746e-13, + "adam_stats/m_t_min": -3.952813131036237e-05, + "adam_stats/v_t_max": 2.738418879744131e-05, + "adam_stats/v_t_mean": 5.939138937594013e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.8981194496154785, + "all_logprobs/max": -0.00121307373046875, + "all_logprobs/median": -2.15625, + "all_logprobs/min": -17.25, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -4.3125, + "all_logprobs/p25": -2.765625, + "all_logprobs/p5": -5.5625, + "all_logprobs/p75": -0.040771484375, + "all_logprobs/var": 3.2035927772521973, + "clip_ratio": 0.0, + "completion_length": 811.2708740234375, + "completion_length/incorrect": 811.2708740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 32.0, + "completion_length/incorrect/p25": 880.5, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 143758.78125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 32.0, + "completion_length/p25": 880.5, + "completion_length/p75": 1024.0, + "completion_length/var": 143758.78125, + "epoch": 0.1056, + "feature_vector_variance/max_squared_error": 115330.03125, + "feature_vector_variance/metric": 25094.9375, + "generated_tokens/total": 4837138.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 4.6904505493806595e-06, + "loss": 0.0, + "mean_logprobs": -2.4375, + "mean_logprobs/var": 1.9609375, + "num_completions/total": 6336, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 252.4497528076172, + "per_token_feature_norm/max": 380.0, + "per_token_feature_norm/median": 258.0, + "per_token_feature_norm/min": 125.5, + "per_token_feature_norm/p25": 208.0, + "per_token_feature_norm/p75": 294.0, + "per_token_feature_norm/var": 3107.172119140625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6094207167625427, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.03125, + "per_token_policy_error_norm/p75": 0.98046875, + "per_token_policy_error_norm/var": 0.2068381905555725, + "policy_entropy": 1.6970446109771729, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.296875, + "policy_entropy/min": 0.005767822265625, + "policy_entropy/p25": 0.1513671875, + "policy_entropy/p75": 2.515625, + "policy_entropy/var": 1.5204123258590698, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3865937888622284, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1593504250049591, + "policy_sharpness/min": 0.0180222075432539, + "policy_sharpness/p25": 0.10171490907669067, + "policy_sharpness/p75": 0.4141674041748047, + "policy_sharpness/var": 0.5745503306388855, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 66, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 9.481539109401638e-07, + "adam_stats/lm_head/lr_effective_mean": -1.2272801047999438e-12, + "adam_stats/lm_head/lr_effective_min": -9.49083300838538e-07, + "adam_stats/lm_head/lr_effective_std": 2.5410948012449808e-08, + "adam_stats/lr_effective_max": 9.57055704020604e-07, + "adam_stats/lr_effective_mean": -3.185613231537765e-12, + "adam_stats/lr_effective_min": -9.596159316060948e-07, + "adam_stats/m_t_max": 4.45023033535108e-05, + "adam_stats/m_t_mean": -2.4540246679367883e-13, + "adam_stats/m_t_min": -3.557531817932613e-05, + "adam_stats/v_t_max": 2.7356805730960332e-05, + "adam_stats/v_t_mean": 5.933199678093137e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.0227415561676025, + "all_logprobs/max": -0.0006866455078125, + "all_logprobs/median": -2.15625, + "all_logprobs/min": -17.125, + "all_logprobs/p1": -6.875, + "all_logprobs/p10": -4.71875, + "all_logprobs/p25": -2.96875, + "all_logprobs/p5": -5.75, + "all_logprobs/p75": -0.09912109375, + "all_logprobs/var": 3.4482803344726562, + "clip_ratio": 0.0, + "completion_length": 733.0208740234375, + "completion_length/incorrect": 733.0208740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 25.0, + "completion_length/incorrect/p25": 227.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 171625.40625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 25.0, + "completion_length/p25": 227.75, + "completion_length/p75": 1024.0, + "completion_length/var": 171625.40625, + "epoch": 0.1072, + "feature_vector_variance/max_squared_error": 177260.609375, + "feature_vector_variance/metric": 27395.310546875, + "generated_tokens/total": 4907508.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 4.4494751769315e-06, + "loss": 0.0, + "mean_logprobs": -2.71875, + "mean_logprobs/var": 2.21875, + "num_completions/total": 6432, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 253.79962158203125, + "per_token_feature_norm/max": 414.0, + "per_token_feature_norm/median": 258.0, + "per_token_feature_norm/min": 125.5, + "per_token_feature_norm/p25": 214.0, + "per_token_feature_norm/p75": 292.0, + "per_token_feature_norm/var": 2737.562255859375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6331211924552917, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.87890625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0625, + "per_token_policy_error_norm/p75": 0.99609375, + "per_token_policy_error_norm/var": 0.2019282579421997, + "policy_entropy": 1.7985812425613403, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.3125, + "policy_entropy/min": 0.004180908203125, + "policy_entropy/p25": 0.36328125, + "policy_entropy/p75": 2.671875, + "policy_entropy/var": 1.5063374042510986, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3328273594379425, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12707167863845825, + "policy_sharpness/min": 0.020656513050198555, + "policy_sharpness/p25": 0.08961804956197739, + "policy_sharpness/p75": 0.3233380913734436, + "policy_sharpness/var": 0.7352419495582581, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 67, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 8.082341196313791e-07, + "adam_stats/lm_head/lr_effective_mean": -1.0461332321243244e-12, + "adam_stats/lm_head/lr_effective_min": -8.090269147942308e-07, + "adam_stats/lm_head/lr_effective_std": 2.1658811633074038e-08, + "adam_stats/lr_effective_max": 8.158281161740888e-07, + "adam_stats/lr_effective_mean": -2.715382172585601e-12, + "adam_stats/lr_effective_min": -8.180105623978307e-07, + "adam_stats/m_t_max": 4.005207301815972e-05, + "adam_stats/m_t_mean": -2.2086230956099018e-13, + "adam_stats/m_t_min": -3.201778599759564e-05, + "adam_stats/v_t_max": 2.7329449949320406e-05, + "adam_stats/v_t_mean": 5.927267791167035e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.055643320083618, + "all_logprobs/max": -0.00160980224609375, + "all_logprobs/median": -2.234375, + "all_logprobs/min": -18.0, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.3125, + "all_logprobs/p25": -2.890625, + "all_logprobs/p5": -5.5625, + "all_logprobs/p75": -0.13671875, + "all_logprobs/var": 3.0209739208221436, + "clip_ratio": 0.0, + "completion_length": 812.3333740234375, + "completion_length/incorrect": 812.3333740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 40.0, + "completion_length/incorrect/p25": 869.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 139514.46875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 40.0, + "completion_length/p25": 869.75, + "completion_length/p75": 1024.0, + "completion_length/var": 139514.46875, + "epoch": 0.1088, + "feature_vector_variance/max_squared_error": 165455.671875, + "feature_vector_variance/metric": 27280.2890625, + "generated_tokens/total": 4985492.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 4.212216399081919e-06, + "loss": 0.0, + "mean_logprobs": -2.546875, + "mean_logprobs/var": 1.9375, + "num_completions/total": 6528, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 248.58639526367188, + "per_token_feature_norm/max": 390.0, + "per_token_feature_norm/median": 248.0, + "per_token_feature_norm/min": 127.0, + "per_token_feature_norm/p25": 208.0, + "per_token_feature_norm/p75": 288.0, + "per_token_feature_norm/var": 2850.8681640625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6623904705047607, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.890625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.08203125, + "per_token_policy_error_norm/p75": 0.98828125, + "per_token_policy_error_norm/var": 0.19005446135997772, + "policy_entropy": 1.8692495822906494, + "policy_entropy/max": 3.84375, + "policy_entropy/median": 2.328125, + "policy_entropy/min": 0.005584716796875, + "policy_entropy/p25": 0.451171875, + "policy_entropy/p75": 2.625, + "policy_entropy/var": 1.3763669729232788, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3134031295776367, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12754125893115997, + "policy_sharpness/min": 0.020412666723132133, + "policy_sharpness/p25": 0.0955030769109726, + "policy_sharpness/p75": 0.3063346743583679, + "policy_sharpness/var": 0.48646780848503113, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 68, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 6.874686846458644e-07, + "adam_stats/lm_head/lr_effective_mean": -8.897913872721319e-13, + "adam_stats/lm_head/lr_effective_min": -6.881435865579988e-07, + "adam_stats/lm_head/lr_effective_std": 1.8420683645103963e-08, + "adam_stats/lr_effective_max": 6.939329182387155e-07, + "adam_stats/lr_effective_mean": -2.309545350104303e-12, + "adam_stats/lr_effective_min": -6.95789196925034e-07, + "adam_stats/m_t_max": 3.6046865716343746e-05, + "adam_stats/m_t_mean": -1.9877587802748925e-13, + "adam_stats/m_t_min": -2.8816006306442432e-05, + "adam_stats/v_t_max": 2.7302121452521533e-05, + "adam_stats/v_t_mean": 5.921340241049622e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.1702377796173096, + "all_logprobs/max": -0.00124359130859375, + "all_logprobs/median": -2.265625, + "all_logprobs/min": -16.375, + "all_logprobs/p1": -6.875, + "all_logprobs/p10": -4.6875, + "all_logprobs/p25": -3.015625, + "all_logprobs/p5": -5.75, + "all_logprobs/p75": -0.2216796875, + "all_logprobs/var": 3.2034761905670166, + "clip_ratio": 0.0, + "completion_length": 730.40625, + "completion_length/incorrect": 730.40625, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 10.0, + "completion_length/incorrect/p25": 198.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 173192.203125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 10.0, + "completion_length/p25": 198.25, + "completion_length/p75": 1024.0, + "completion_length/var": 173192.203125, + "epoch": 0.1104, + "feature_vector_variance/max_squared_error": 101408.078125, + "feature_vector_variance/metric": 24834.87109375, + "generated_tokens/total": 5055611.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 3.978963279105821e-06, + "loss": 0.0, + "mean_logprobs": -2.8125, + "mean_logprobs/var": 1.8671875, + "num_completions/total": 6624, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 249.9857635498047, + "per_token_feature_norm/max": 382.0, + "per_token_feature_norm/median": 254.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 203.0, + "per_token_feature_norm/p75": 294.0, + "per_token_feature_norm/var": 3130.60302734375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6835070252418518, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.8984375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.1328125, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.18452699482440948, + "policy_entropy": 1.9334489107131958, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.328125, + "policy_entropy/min": 0.00518798828125, + "policy_entropy/p25": 0.66796875, + "policy_entropy/p75": 2.671875, + "policy_entropy/var": 1.3537994623184204, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.33959534764289856, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13102543354034424, + "policy_sharpness/min": 0.020139556378126144, + "policy_sharpness/p25": 0.09485293924808502, + "policy_sharpness/p75": 0.3427555561065674, + "policy_sharpness/var": 0.651888906955719, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 69, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 5.834057219544775e-07, + "adam_stats/lm_head/lr_effective_mean": -7.550768278859199e-13, + "adam_stats/lm_head/lr_effective_min": -5.839788173034322e-07, + "adam_stats/lm_head/lr_effective_std": 1.5630716276859857e-08, + "adam_stats/lr_effective_max": 5.888956025046355e-07, + "adam_stats/lr_effective_mean": -1.959855888689077e-12, + "adam_stats/lr_effective_min": -5.904707336412685e-07, + "adam_stats/m_t_max": 3.244217805331573e-05, + "adam_stats/m_t_mean": -1.7889826447493873e-13, + "adam_stats/m_t_min": -2.5934405130101368e-05, + "adam_stats/v_t_max": 2.7274820240563713e-05, + "adam_stats/v_t_mean": 5.915417895102637e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.045804738998413, + "all_logprobs/max": -0.00130462646484375, + "all_logprobs/median": -2.09375, + "all_logprobs/min": -19.0, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -5.15625, + "all_logprobs/p25": -3.25, + "all_logprobs/p5": -5.9375, + "all_logprobs/p75": -0.0419921875, + "all_logprobs/var": 4.05867862701416, + "clip_ratio": 0.0, + "completion_length": 609.15625, + "completion_length/incorrect": 609.15625, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 17.0, + "completion_length/incorrect/p25": 122.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 191125.3125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 17.0, + "completion_length/p25": 122.75, + "completion_length/p75": 1024.0, + "completion_length/var": 191125.3125, + "epoch": 0.112, + "feature_vector_variance/max_squared_error": 125425.1171875, + "feature_vector_variance/metric": 27832.732421875, + "generated_tokens/total": 5114090.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 3.750000000000002e-06, + "loss": 0.0, + "mean_logprobs": -3.0625, + "mean_logprobs/var": 2.640625, + "num_completions/total": 6720, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 244.86752319335938, + "per_token_feature_norm/max": 378.0, + "per_token_feature_norm/median": 244.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 207.0, + "per_token_feature_norm/p75": 280.0, + "per_token_feature_norm/var": 2453.1015625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5965331792831421, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.86328125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.03125, + "per_token_policy_error_norm/p75": 1.0078125, + "per_token_policy_error_norm/var": 0.21605484187602997, + "policy_entropy": 1.759526014328003, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.3125, + "policy_entropy/min": 0.005462646484375, + "policy_entropy/p25": 0.1640625, + "policy_entropy/p75": 2.921875, + "policy_entropy/var": 1.8467947244644165, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4731158912181854, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11349490284919739, + "policy_sharpness/min": 0.01679171808063984, + "policy_sharpness/p25": 0.08001430332660675, + "policy_sharpness/p75": 0.3391696512699127, + "policy_sharpness/var": 1.5554829835891724, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 70, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 4.938892743666656e-07, + "adam_stats/lm_head/lr_effective_mean": -6.391979502119727e-13, + "adam_stats/lm_head/lr_effective_min": -4.943747740071558e-07, + "adam_stats/lm_head/lr_effective_std": 1.3231014506231986e-08, + "adam_stats/lr_effective_max": 4.985403165846947e-07, + "adam_stats/lr_effective_mean": -1.6590629694709924e-12, + "adam_stats/lr_effective_min": -4.998738063477504e-07, + "adam_stats/m_t_max": 2.9197959520388395e-05, + "adam_stats/m_t_mean": -1.6100865757838478e-13, + "adam_stats/m_t_min": -2.334096461709123e-05, + "adam_stats/v_t_max": 2.7247546313446946e-05, + "adam_stats/v_t_mean": 5.909502921730425e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.252526044845581, + "all_logprobs/max": -0.0011749267578125, + "all_logprobs/median": -2.3125, + "all_logprobs/min": -15.5625, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.65625, + "all_logprobs/p25": -3.03125, + "all_logprobs/p5": -5.71875, + "all_logprobs/p75": -0.5478515625, + "all_logprobs/var": 2.991483688354492, + "clip_ratio": 0.0, + "completion_length": 787.3958740234375, + "completion_length/incorrect": 787.3958740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 8.0, + "completion_length/incorrect/p25": 378.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 152204.5, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 8.0, + "completion_length/p25": 378.75, + "completion_length/p75": 1024.0, + "completion_length/var": 152204.5, + "epoch": 0.1136, + "feature_vector_variance/max_squared_error": 108632.5, + "feature_vector_variance/metric": 27411.837890625, + "generated_tokens/total": 5189680.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 3.525605518250964e-06, + "loss": 0.0, + "mean_logprobs": -2.78125, + "mean_logprobs/var": 1.7421875, + "num_completions/total": 6816, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 253.06723022460938, + "per_token_feature_norm/max": 382.0, + "per_token_feature_norm/median": 256.0, + "per_token_feature_norm/min": 125.5, + "per_token_feature_norm/p25": 210.0, + "per_token_feature_norm/p75": 294.0, + "per_token_feature_norm/var": 2954.529296875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.714370846748352, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.90234375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.328125, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.16741324961185455, + "policy_entropy": 2.0272903442382812, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.359375, + "policy_entropy/min": 0.005523681640625, + "policy_entropy/p25": 1.1640625, + "policy_entropy/p75": 2.734375, + "policy_entropy/var": 1.272252082824707, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.28567105531692505, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12433842569589615, + "policy_sharpness/min": 0.017697326838970184, + "policy_sharpness/p25": 0.09371126443147659, + "policy_sharpness/p75": 0.3266693949699402, + "policy_sharpness/var": 0.4809857904911041, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 71, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 4.170251202140207e-07, + "adam_stats/lm_head/lr_effective_mean": -5.397005000025434e-13, + "adam_stats/lm_head/lr_effective_min": -4.1743533074622974e-07, + "adam_stats/lm_head/lr_effective_std": 1.1170718572373062e-08, + "adam_stats/lr_effective_max": 4.20955274194057e-07, + "adam_stats/lr_effective_mean": -1.4007967962664791e-12, + "adam_stats/lr_effective_min": -4.22081171791433e-07, + "adam_stats/m_t_max": 2.6278163204551674e-05, + "adam_stats/m_t_mean": -1.4490767255830733e-13, + "adam_stats/m_t_min": -2.1006868337281048e-05, + "adam_stats/v_t_max": 2.7220299671171233e-05, + "adam_stats/v_t_mean": 5.903594019890379e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.163172960281372, + "all_logprobs/max": -0.00128173828125, + "all_logprobs/median": -2.234375, + "all_logprobs/min": -22.125, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -5.1875, + "all_logprobs/p25": -3.234375, + "all_logprobs/p5": -5.96875, + "all_logprobs/p75": -0.05517578125, + "all_logprobs/var": 3.837449312210083, + "clip_ratio": 0.0, + "completion_length": 744.7396240234375, + "completion_length/incorrect": 744.7396240234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 21.0, + "completion_length/incorrect/p25": 269.5, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 154622.0, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 21.0, + "completion_length/p25": 269.5, + "completion_length/p75": 1024.0, + "completion_length/var": 154622.0, + "epoch": 0.1152, + "feature_vector_variance/max_squared_error": 98381.796875, + "feature_vector_variance/metric": 28225.7265625, + "generated_tokens/total": 5261175.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 3.3060532239694e-06, + "loss": 0.0, + "mean_logprobs": -2.828125, + "mean_logprobs/var": 2.3125, + "num_completions/total": 6912, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 247.47659301757812, + "per_token_feature_norm/max": 380.0, + "per_token_feature_norm/median": 249.0, + "per_token_feature_norm/min": 127.0, + "per_token_feature_norm/p25": 207.0, + "per_token_feature_norm/p75": 286.0, + "per_token_feature_norm/var": 2649.4677734375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6408794522285461, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.890625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0390625, + "per_token_policy_error_norm/p75": 1.0078125, + "per_token_policy_error_norm/var": 0.20479130744934082, + "policy_entropy": 1.8597126007080078, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.328125, + "policy_entropy/min": 0.006011962890625, + "policy_entropy/p25": 0.1943359375, + "policy_entropy/p75": 2.875, + "policy_entropy/var": 1.6920573711395264, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.42862704396247864, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1275390088558197, + "policy_sharpness/min": 0.017469987273216248, + "policy_sharpness/p25": 0.08303925395011902, + "policy_sharpness/p75": 0.3827352523803711, + "policy_sharpness/var": 1.0722800493240356, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 72, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 3.5115073160341126e-07, + "adam_stats/lm_head/lr_effective_mean": -4.5443271541600883e-13, + "adam_stats/lm_head/lr_effective_min": -3.5149642485521326e-07, + "adam_stats/lm_head/lr_effective_std": 9.405195733336313e-09, + "adam_stats/lr_effective_max": 3.544625997164985e-07, + "adam_stats/lr_effective_mean": -1.1794686321370262e-12, + "adam_stats/lr_effective_min": -3.5541063425625907e-07, + "adam_stats/m_t_max": 2.3650345610803925e-05, + "adam_stats/m_t_mean": -1.3041691749975104e-13, + "adam_stats/m_t_min": -1.8906181139755063e-05, + "adam_stats/v_t_max": 2.7193080313736573e-05, + "adam_stats/v_t_mean": 5.897690322220761e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.071028470993042, + "all_logprobs/max": -0.00122833251953125, + "all_logprobs/median": -2.25, + "all_logprobs/min": -19.875, + "all_logprobs/p1": -6.78125, + "all_logprobs/p10": -4.59375, + "all_logprobs/p25": -2.9375, + "all_logprobs/p5": -5.6875, + "all_logprobs/p75": -0.099365234375, + "all_logprobs/var": 3.275822401046753, + "clip_ratio": 0.0, + "completion_length": 730.03125, + "completion_length/incorrect": 730.03125, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 11.0, + "completion_length/incorrect/p25": 169.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 180138.4375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 11.0, + "completion_length/p25": 169.0, + "completion_length/p75": 1024.0, + "completion_length/var": 180138.4375, + "epoch": 0.1168, + "feature_vector_variance/max_squared_error": 130259.0234375, + "feature_vector_variance/metric": 27172.509765625, + "generated_tokens/total": 5331258.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 3.0916106078064522e-06, + "loss": 0.0, + "mean_logprobs": -2.765625, + "mean_logprobs/var": 2.125, + "num_completions/total": 7008, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 251.5552978515625, + "per_token_feature_norm/max": 384.0, + "per_token_feature_norm/median": 254.0, + "per_token_feature_norm/min": 127.0, + "per_token_feature_norm/p25": 216.0, + "per_token_feature_norm/p75": 288.0, + "per_token_feature_norm/var": 2495.42578125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.650252103805542, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.890625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0625, + "per_token_policy_error_norm/p75": 0.9921875, + "per_token_policy_error_norm/var": 0.19742989540100098, + "policy_entropy": 1.8497520685195923, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.328125, + "policy_entropy/min": 0.0042724609375, + "policy_entropy/p25": 0.361328125, + "policy_entropy/p75": 2.6875, + "policy_entropy/var": 1.4876734018325806, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4074256122112274, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1308988630771637, + "policy_sharpness/min": 0.016734691336750984, + "policy_sharpness/p25": 0.09249277412891388, + "policy_sharpness/p75": 0.36800310015678406, + "policy_sharpness/var": 1.050363540649414, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 73, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.948088138055027e-07, + "adam_stats/lm_head/lr_effective_mean": -3.8150624152855084e-13, + "adam_stats/lm_head/lr_effective_min": -2.950992268324626e-07, + "adam_stats/lm_head/lr_effective_std": 7.89532528244763e-09, + "adam_stats/lr_effective_max": 2.9759138442386757e-07, + "adam_stats/lr_effective_mean": -9.901767990608268e-13, + "adam_stats/lr_effective_min": -2.9838727755304717e-07, + "adam_stats/m_t_max": 2.128531014022883e-05, + "adam_stats/m_t_mean": -1.1737519864472162e-13, + "adam_stats/m_t_min": -1.7015561752486974e-05, + "adam_stats/v_t_max": 2.7165888241142966e-05, + "adam_stats/v_t_mean": 5.891793563445047e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.9998230934143066, + "all_logprobs/max": -0.00103759765625, + "all_logprobs/median": -1.828125, + "all_logprobs/min": -17.375, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -5.125, + "all_logprobs/p25": -3.296875, + "all_logprobs/p5": -5.9375, + "all_logprobs/p75": -0.033203125, + "all_logprobs/var": 4.062986373901367, + "clip_ratio": 0.0, + "completion_length": 695.6771240234375, + "completion_length/incorrect": 695.6771240234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 13.0, + "completion_length/incorrect/p25": 272.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 166519.171875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 13.0, + "completion_length/p25": 272.25, + "completion_length/p75": 1024.0, + "completion_length/var": 166519.171875, + "epoch": 0.1184, + "feature_vector_variance/max_squared_error": 179538.4375, + "feature_vector_variance/metric": 28539.828125, + "generated_tokens/total": 5398043.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 2.882538935057563e-06, + "loss": 0.0, + "mean_logprobs": -2.71875, + "mean_logprobs/var": 2.40625, + "num_completions/total": 7104, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 239.13412475585938, + "per_token_feature_norm/max": 396.0, + "per_token_feature_norm/median": 237.0, + "per_token_feature_norm/min": 127.0, + "per_token_feature_norm/p25": 201.0, + "per_token_feature_norm/p75": 276.0, + "per_token_feature_norm/var": 2400.154296875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5851604342460632, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.8125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0234375, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.2241719365119934, + "policy_entropy": 1.7152756452560425, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.1875, + "policy_entropy/min": 0.005401611328125, + "policy_entropy/p25": 0.134765625, + "policy_entropy/p75": 2.875, + "policy_entropy/var": 1.8022571802139282, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4167127311229706, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11562501639127731, + "policy_sharpness/min": 0.01682698167860508, + "policy_sharpness/p25": 0.07969687879085541, + "policy_sharpness/p75": 0.3552491068840027, + "policy_sharpness/var": 1.1956093311309814, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 74, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.4672291942806623e-07, + "adam_stats/lm_head/lr_effective_mean": -3.1926848317875844e-13, + "adam_stats/lm_head/lr_effective_min": -2.4696609557395277e-07, + "adam_stats/lm_head/lr_effective_std": 6.606849289880756e-09, + "adam_stats/lr_effective_max": 2.4905341433623107e-07, + "adam_stats/lr_effective_mean": -8.286318679828764e-13, + "adam_stats/lr_effective_min": -2.497194770967326e-07, + "adam_stats/m_t_max": 1.9156777852913365e-05, + "adam_stats/m_t_mean": -1.056378563183552e-13, + "adam_stats/m_t_min": -1.5314004485844634e-05, + "adam_stats/v_t_max": 2.7138723453390412e-05, + "adam_stats/v_t_mean": 5.885900707797154e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.1656394004821777, + "all_logprobs/max": -0.000774383544921875, + "all_logprobs/median": -2.25, + "all_logprobs/min": -19.5, + "all_logprobs/p1": -6.875, + "all_logprobs/p10": -5.125, + "all_logprobs/p25": -3.25, + "all_logprobs/p5": -5.9375, + "all_logprobs/p75": -0.06787109375, + "all_logprobs/var": 3.774149179458618, + "clip_ratio": 0.0, + "completion_length": 696.3333740234375, + "completion_length/incorrect": 696.3333740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 33.0, + "completion_length/incorrect/p25": 199.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 172404.53125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 33.0, + "completion_length/p25": 199.0, + "completion_length/p75": 1024.0, + "completion_length/var": 172404.53125, + "epoch": 0.12, + "feature_vector_variance/max_squared_error": 129333.390625, + "feature_vector_variance/metric": 27034.076171875, + "generated_tokens/total": 5464891.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 2.6790929273509547e-06, + "loss": 0.0, + "mean_logprobs": -2.9375, + "mean_logprobs/var": 2.296875, + "num_completions/total": 7200, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 241.3724365234375, + "per_token_feature_norm/max": 384.0, + "per_token_feature_norm/median": 239.0, + "per_token_feature_norm/min": 127.0, + "per_token_feature_norm/p25": 199.0, + "per_token_feature_norm/p75": 280.0, + "per_token_feature_norm/var": 2753.0224609375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.643451988697052, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.89453125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.05078125, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.20284253358840942, + "policy_entropy": 1.8684720993041992, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.328125, + "policy_entropy/min": 0.003448486328125, + "policy_entropy/p25": 0.240234375, + "policy_entropy/p75": 2.859375, + "policy_entropy/var": 1.6683162450790405, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4706818163394928, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12237507849931717, + "policy_sharpness/min": 0.01973814330995083, + "policy_sharpness/p25": 0.08636268973350525, + "policy_sharpness/p75": 0.32902684807777405, + "policy_sharpness/var": 1.671370267868042, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 75, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.057766153029661e-07, + "adam_stats/lm_head/lr_effective_mean": -2.6627346703424204e-13, + "adam_stats/lm_head/lr_effective_min": -2.0597958894086332e-07, + "adam_stats/lm_head/lr_effective_std": 5.50980550073632e-09, + "adam_stats/lr_effective_max": 2.0772179709638294e-07, + "adam_stats/lr_effective_mean": -6.910795720405094e-13, + "adam_stats/lr_effective_min": -2.082773136180549e-07, + "adam_stats/m_t_max": 1.7241100067622028e-05, + "adam_stats/m_t_mean": -9.50738985694248e-14, + "adam_stats/m_t_min": -1.378260367346229e-05, + "adam_stats/v_t_max": 2.711158595047891e-05, + "adam_stats/v_t_mean": 5.880015658404902e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.2154295444488525, + "all_logprobs/max": -0.001495361328125, + "all_logprobs/median": -2.25, + "all_logprobs/min": -16.5, + "all_logprobs/p1": -6.90625, + "all_logprobs/p10": -5.125, + "all_logprobs/p25": -3.28125, + "all_logprobs/p5": -5.96875, + "all_logprobs/p75": -0.134765625, + "all_logprobs/var": 3.7309329509735107, + "clip_ratio": 0.0, + "completion_length": 691.5416870117188, + "completion_length/incorrect": 691.5416870117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 9.0, + "completion_length/incorrect/p25": 181.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 177109.9375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 9.0, + "completion_length/p25": 181.0, + "completion_length/p75": 1024.0, + "completion_length/var": 177109.9375, + "epoch": 0.1216, + "feature_vector_variance/max_squared_error": 157500.296875, + "feature_vector_variance/metric": 29075.15625, + "generated_tokens/total": 5531279.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 2.4815204523085656e-06, + "loss": 0.0, + "mean_logprobs": -2.984375, + "mean_logprobs/var": 2.25, + "num_completions/total": 7296, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 244.08119201660156, + "per_token_feature_norm/max": 402.0, + "per_token_feature_norm/median": 243.0, + "per_token_feature_norm/min": 125.5, + "per_token_feature_norm/p25": 203.0, + "per_token_feature_norm/p75": 282.0, + "per_token_feature_norm/var": 2681.947509765625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6650264263153076, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.89453125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.08203125, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.19647763669490814, + "policy_entropy": 1.924115777015686, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.34375, + "policy_entropy/min": 0.00555419921875, + "policy_entropy/p25": 0.439453125, + "policy_entropy/p75": 2.875, + "policy_entropy/var": 1.576267957687378, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.30907294154167175, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11347726732492447, + "policy_sharpness/min": 0.02056872844696045, + "policy_sharpness/p25": 0.08069449663162231, + "policy_sharpness/p75": 0.2962890863418579, + "policy_sharpness/var": 0.5602290034294128, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 76, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.7099439730827726e-07, + "adam_stats/lm_head/lr_effective_mean": -2.2125804335394739e-13, + "adam_stats/lm_head/lr_effective_min": -1.7116320805143914e-07, + "adam_stats/lm_head/lr_effective_std": 4.578017520628919e-09, + "adam_stats/lr_effective_max": 1.7261203311136342e-07, + "adam_stats/lr_effective_mean": -5.742400370316314e-13, + "adam_stats/lr_effective_min": -1.730736158833679e-07, + "adam_stats/m_t_max": 1.5516989151365124e-05, + "adam_stats/m_t_mean": -8.556651616637226e-14, + "adam_stats/m_t_min": -1.240434266946977e-05, + "adam_stats/v_t_max": 2.708447391341906e-05, + "adam_stats/v_t_mean": 5.87413537950221e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.1222946643829346, + "all_logprobs/max": -0.00110626220703125, + "all_logprobs/median": -2.25, + "all_logprobs/min": -13.4375, + "all_logprobs/p1": -6.75, + "all_logprobs/p10": -4.75, + "all_logprobs/p25": -3.0, + "all_logprobs/p5": -5.75, + "all_logprobs/p75": -0.08740234375, + "all_logprobs/var": 3.3015289306640625, + "clip_ratio": 0.0, + "completion_length": 741.5416870117188, + "completion_length/incorrect": 741.5416870117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 5.0, + "completion_length/incorrect/p25": 216.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 170926.296875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 5.0, + "completion_length/p25": 216.0, + "completion_length/p75": 1024.0, + "completion_length/var": 170926.296875, + "epoch": 0.1232, + "feature_vector_variance/max_squared_error": 123110.4140625, + "feature_vector_variance/metric": 27787.21875, + "generated_tokens/total": 5602467.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 2.29006222155752e-06, + "loss": 0.0, + "mean_logprobs": -2.796875, + "mean_logprobs/var": 2.109375, + "num_completions/total": 7392, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 248.46429443359375, + "per_token_feature_norm/max": 382.0, + "per_token_feature_norm/median": 247.0, + "per_token_feature_norm/min": 126.5, + "per_token_feature_norm/p25": 212.0, + "per_token_feature_norm/p75": 284.0, + "per_token_feature_norm/var": 2558.27392578125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6585030555725098, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.89453125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.05859375, + "per_token_policy_error_norm/p75": 0.99609375, + "per_token_policy_error_norm/var": 0.19377221167087555, + "policy_entropy": 1.8925281763076782, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.34375, + "policy_entropy/min": 0.00537109375, + "policy_entropy/p25": 0.328125, + "policy_entropy/p75": 2.71875, + "policy_entropy/var": 1.5181007385253906, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.35770463943481445, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12914878129959106, + "policy_sharpness/min": 0.01984693855047226, + "policy_sharpness/p25": 0.09017932415008545, + "policy_sharpness/p75": 0.3477816581726074, + "policy_sharpness/var": 0.7220379114151001, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 77, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.415250068248497e-07, + "adam_stats/lm_head/lr_effective_mean": -1.8311978181636407e-13, + "adam_stats/lm_head/lr_effective_min": -1.4166482742439257e-07, + "adam_stats/lm_head/lr_effective_std": 3.788646285585173e-09, + "adam_stats/lr_effective_max": 1.4286487726167252e-07, + "adam_stats/lr_effective_mean": -4.752525413140307e-13, + "adam_stats/lr_effective_min": -1.4324689345812658e-07, + "adam_stats/m_t_max": 1.3965290236228611e-05, + "adam_stats/m_t_mean": -7.700976900441858e-14, + "adam_stats/m_t_min": -1.1163908311573323e-05, + "adam_stats/v_t_max": 2.7057389161200263e-05, + "adam_stats/v_t_mean": 5.8682616058125525e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.297010898590088, + "all_logprobs/max": -0.00145721435546875, + "all_logprobs/median": -2.3125, + "all_logprobs/min": -19.0, + "all_logprobs/p1": -6.9375, + "all_logprobs/p10": -5.125, + "all_logprobs/p25": -3.234375, + "all_logprobs/p5": -5.96875, + "all_logprobs/p75": -0.22265625, + "all_logprobs/var": 3.569291591644287, + "clip_ratio": 0.0, + "completion_length": 658.75, + "completion_length/incorrect": 658.75, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 2.0, + "completion_length/incorrect/p25": 162.5, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 188358.96875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 2.0, + "completion_length/p25": 162.5, + "completion_length/p75": 1024.0, + "completion_length/var": 188358.96875, + "epoch": 0.1248, + "feature_vector_variance/max_squared_error": 160199.0625, + "feature_vector_variance/metric": 27862.759765625, + "generated_tokens/total": 5665707.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 2.104951497460118e-06, + "loss": 0.0, + "mean_logprobs": -3.15625, + "mean_logprobs/var": 2.359375, + "num_completions/total": 7488, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 248.9510955810547, + "per_token_feature_norm/max": 390.0, + "per_token_feature_norm/median": 250.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 204.0, + "per_token_feature_norm/p75": 290.0, + "per_token_feature_norm/var": 3057.7373046875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6913828253746033, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.90625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.1328125, + "per_token_policy_error_norm/p75": 1.0078125, + "per_token_policy_error_norm/var": 0.18431898951530457, + "policy_entropy": 1.9911218881607056, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.359375, + "policy_entropy/min": 0.00640869140625, + "policy_entropy/p25": 0.66015625, + "policy_entropy/p75": 2.859375, + "policy_entropy/var": 1.4834113121032715, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.33240067958831787, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12410864233970642, + "policy_sharpness/min": 0.01923169009387493, + "policy_sharpness/p25": 0.08488211035728455, + "policy_sharpness/p75": 0.3097573220729828, + "policy_sharpness/var": 0.5777191519737244, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 78, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.1662645960086593e-07, + "adam_stats/lm_head/lr_effective_mean": -1.5089832324215874e-13, + "adam_stats/lm_head/lr_effective_min": -1.1674175937059772e-07, + "adam_stats/lm_head/lr_effective_std": 3.121787273130394e-09, + "adam_stats/lr_effective_max": 1.1773144592552853e-07, + "adam_stats/lr_effective_mean": -3.916227964747415e-13, + "adam_stats/lr_effective_min": -1.1804625188460705e-07, + "adam_stats/m_t_max": 1.256876112165628e-05, + "adam_stats/m_t_mean": -6.930893847127001e-14, + "adam_stats/m_t_min": -1.004751720756758e-05, + "adam_stats/v_t_max": 2.7030331693822518e-05, + "adam_stats/v_t_mean": 5.862392602612454e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.4960289001464844, + "all_logprobs/max": -0.00138092041015625, + "all_logprobs/median": -2.375, + "all_logprobs/min": -15.4375, + "all_logprobs/p1": -6.875, + "all_logprobs/p10": -5.09375, + "all_logprobs/p25": -3.21875, + "all_logprobs/p5": -5.9375, + "all_logprobs/p75": -1.515625, + "all_logprobs/var": 3.0050208568573, + "clip_ratio": 0.0, + "completion_length": 621.1875, + "completion_length/incorrect": 621.1875, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 6.0, + "completion_length/incorrect/p25": 115.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 196617.796875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 6.0, + "completion_length/p25": 115.0, + "completion_length/p75": 1024.0, + "completion_length/var": 196617.796875, + "epoch": 0.1264, + "feature_vector_variance/max_squared_error": 144404.21875, + "feature_vector_variance/metric": 27506.740234375, + "generated_tokens/total": 5725341.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.9264138089195424e-06, + "loss": 0.0, + "mean_logprobs": -3.3125, + "mean_logprobs/var": 1.7734375, + "num_completions/total": 7584, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 248.49868774414062, + "per_token_feature_norm/max": 388.0, + "per_token_feature_norm/median": 249.0, + "per_token_feature_norm/min": 125.0, + "per_token_feature_norm/p25": 205.0, + "per_token_feature_norm/p75": 288.0, + "per_token_feature_norm/var": 2786.50927734375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.7679651379585266, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.91796875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.703125, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.1419449895620346, + "policy_entropy": 2.2006988525390625, + "policy_entropy/max": 3.890625, + "policy_entropy/median": 2.359375, + "policy_entropy/min": 0.005096435546875, + "policy_entropy/p25": 2.125, + "policy_entropy/p75": 2.828125, + "policy_entropy/var": 1.1166387796401978, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.30071181058883667, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1261119395494461, + "policy_sharpness/min": 0.01641356572508812, + "policy_sharpness/p25": 0.0887700691819191, + "policy_sharpness/p75": 0.32269906997680664, + "policy_sharpness/var": 0.572364330291748, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 79, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 9.56529930817851e-08, + "adam_stats/lm_head/lr_effective_mean": -1.2375740894316617e-13, + "adam_stats/lm_head/lr_effective_min": -9.574762316333363e-08, + "adam_stats/lm_head/lr_effective_std": 2.560118561589775e-09, + "adam_stats/lr_effective_max": 9.655995825141872e-08, + "adam_stats/lr_effective_mean": -3.211806634453168e-13, + "adam_stats/lr_effective_min": -9.681813395445715e-08, + "adam_stats/m_t_max": 1.1311884918541182e-05, + "adam_stats/m_t_mean": -6.237792536190404e-14, + "adam_stats/m_t_min": -9.042765668709762e-06, + "adam_stats/v_t_max": 2.7003301511285827e-05, + "adam_stats/v_t_mean": 5.856530971987128e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.4706101417541504, + "all_logprobs/max": -0.00096893310546875, + "all_logprobs/median": -2.390625, + "all_logprobs/min": -20.75, + "all_logprobs/p1": -6.875, + "all_logprobs/p10": -5.25, + "all_logprobs/p25": -3.421875, + "all_logprobs/p5": -6.0, + "all_logprobs/p75": -0.87890625, + "all_logprobs/var": 3.4267847537994385, + "clip_ratio": 0.0, + "completion_length": 642.3646240234375, + "completion_length/incorrect": 642.3646240234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 22.0, + "completion_length/incorrect/p25": 184.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 177540.59375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 22.0, + "completion_length/p25": 184.25, + "completion_length/p75": 1024.0, + "completion_length/var": 177540.59375, + "epoch": 0.128, + "feature_vector_variance/max_squared_error": 96264.1875, + "feature_vector_variance/metric": 27272.06640625, + "generated_tokens/total": 5787008.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.7546666766076658e-06, + "loss": 0.0, + "mean_logprobs": -3.1875, + "mean_logprobs/var": 1.6953125, + "num_completions/total": 7680, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 240.78668212890625, + "per_token_feature_norm/max": 382.0, + "per_token_feature_norm/median": 237.0, + "per_token_feature_norm/min": 127.5, + "per_token_feature_norm/p25": 198.0, + "per_token_feature_norm/p75": 280.0, + "per_token_feature_norm/var": 2724.230712890625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.7383520007133484, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.91796875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.4765625, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.16302472352981567, + "policy_entropy": 2.1357691287994385, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.390625, + "policy_entropy/min": 0.004119873046875, + "policy_entropy/p25": 1.6796875, + "policy_entropy/p75": 2.9375, + "policy_entropy/var": 1.3678194284439087, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.34180861711502075, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11500261723995209, + "policy_sharpness/min": 0.01943577267229557, + "policy_sharpness/p25": 0.08088310062885284, + "policy_sharpness/p75": 0.27011221647262573, + "policy_sharpness/var": 0.9455800652503967, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 80, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 7.804326429550201e-08, + "adam_stats/lm_head/lr_effective_mean": -1.0097020333547924e-13, + "adam_stats/lm_head/lr_effective_min": -7.812052871258857e-08, + "adam_stats/lm_head/lr_effective_std": 2.088585526749398e-09, + "adam_stats/lr_effective_max": 7.878382746184798e-08, + "adam_stats/lr_effective_mean": -2.6203897992432834e-13, + "adam_stats/lr_effective_min": -7.89944607504367e-08, + "adam_stats/m_t_max": 1.0180696335737593e-05, + "adam_stats/m_t_mean": -5.614018839107497e-14, + "adam_stats/m_t_min": -8.138488738040905e-06, + "adam_stats/v_t_max": 2.697629861359019e-05, + "adam_stats/v_t_mean": 5.850674545532231e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.3503482341766357, + "all_logprobs/max": -0.00113677978515625, + "all_logprobs/median": -2.34375, + "all_logprobs/min": -13.0625, + "all_logprobs/p1": -6.90625, + "all_logprobs/p10": -5.28125, + "all_logprobs/p25": -3.4375, + "all_logprobs/p5": -6.03125, + "all_logprobs/p75": -0.28515625, + "all_logprobs/var": 3.7344510555267334, + "clip_ratio": 0.0, + "completion_length": 640.71875, + "completion_length/incorrect": 640.71875, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 39.0, + "completion_length/incorrect/p25": 186.0, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 181963.40625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 39.0, + "completion_length/p25": 186.0, + "completion_length/p75": 1024.0, + "completion_length/var": 181963.40625, + "epoch": 0.1296, + "feature_vector_variance/max_squared_error": 111497.4765625, + "feature_vector_variance/metric": 29068.23046875, + "generated_tokens/total": 5848517.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.5899193479495858e-06, + "loss": 0.0, + "mean_logprobs": -3.203125, + "mean_logprobs/var": 2.203125, + "num_completions/total": 7776, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 240.15711975097656, + "per_token_feature_norm/max": 382.0, + "per_token_feature_norm/median": 238.0, + "per_token_feature_norm/min": 124.5, + "per_token_feature_norm/p25": 200.0, + "per_token_feature_norm/p75": 278.0, + "per_token_feature_norm/var": 2654.74853515625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6953192353248596, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.9140625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.177734375, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.18930795788764954, + "policy_entropy": 2.0271849632263184, + "policy_entropy/max": 3.890625, + "policy_entropy/median": 2.375, + "policy_entropy/min": 0.004730224609375, + "policy_entropy/p25": 0.80859375, + "policy_entropy/p75": 2.921875, + "policy_entropy/var": 1.493227243423462, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.43876326084136963, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11130478978157043, + "policy_sharpness/min": 0.01596488617360592, + "policy_sharpness/p25": 0.07840786129236221, + "policy_sharpness/p75": 0.2400078922510147, + "policy_sharpness/var": 1.626269817352295, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 81, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 6.331006829896069e-08, + "adam_stats/lm_head/lr_effective_mean": -8.190594470020018e-14, + "adam_stats/lm_head/lr_effective_min": -6.337279501167359e-08, + "adam_stats/lm_head/lr_effective_std": 1.69412284201087e-09, + "adam_stats/lr_effective_max": 6.391127271854202e-08, + "adam_stats/lr_effective_mean": -2.1256076326164886e-13, + "adam_stats/lr_effective_min": -6.408215114106497e-08, + "adam_stats/m_t_max": 9.162626156467013e-06, + "adam_stats/m_t_mean": -5.052622342326292e-14, + "adam_stats/m_t_min": -7.3246396823378745e-06, + "adam_stats/v_t_max": 2.6949323000735603e-05, + "adam_stats/v_t_mean": 5.844823323247761e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.304459810256958, + "all_logprobs/max": -0.00131988525390625, + "all_logprobs/median": -2.296875, + "all_logprobs/min": -14.4375, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -5.125, + "all_logprobs/p25": -3.203125, + "all_logprobs/p5": -5.9375, + "all_logprobs/p75": -0.30859375, + "all_logprobs/var": 3.434603452682495, + "clip_ratio": 0.0, + "completion_length": 693.7396240234375, + "completion_length/incorrect": 693.7396240234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 44.0, + "completion_length/incorrect/p25": 239.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 172087.3125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 44.0, + "completion_length/p25": 239.25, + "completion_length/p75": 1024.0, + "completion_length/var": 172087.3125, + "epoch": 0.1312, + "feature_vector_variance/max_squared_error": 141912.03125, + "feature_vector_variance/metric": 28408.837890625, + "generated_tokens/total": 5915116.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.432372542187895e-06, + "loss": 0.0, + "mean_logprobs": -3.046875, + "mean_logprobs/var": 2.03125, + "num_completions/total": 7872, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 243.01754760742188, + "per_token_feature_norm/max": 402.0, + "per_token_feature_norm/median": 242.0, + "per_token_feature_norm/min": 127.5, + "per_token_feature_norm/p25": 201.0, + "per_token_feature_norm/p75": 282.0, + "per_token_feature_norm/var": 2702.264404296875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.7021322250366211, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.90234375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.18359375, + "per_token_policy_error_norm/p75": 1.0078125, + "per_token_policy_error_norm/var": 0.17939844727516174, + "policy_entropy": 2.017322301864624, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.34375, + "policy_entropy/min": 0.0072021484375, + "policy_entropy/p25": 0.78515625, + "policy_entropy/p75": 2.828125, + "policy_entropy/var": 1.3951327800750732, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.23890233039855957, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11760503798723221, + "policy_sharpness/min": 0.018288563936948776, + "policy_sharpness/p25": 0.08238201588392258, + "policy_sharpness/p75": 0.3098375201225281, + "policy_sharpness/var": 0.15418344736099243, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 82, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 5.103112954429889e-08, + "adam_stats/lm_head/lr_effective_mean": -6.601815448481485e-14, + "adam_stats/lm_head/lr_effective_min": -5.108172729251237e-08, + "adam_stats/lm_head/lr_effective_std": 1.3654084529690635e-09, + "adam_stats/lr_effective_max": 5.151609983045091e-08, + "adam_stats/lr_effective_mean": -1.7132665728822327e-13, + "adam_stats/lr_effective_min": -5.165382788163697e-08, + "adam_stats/m_t_max": 8.24636299512349e-06, + "adam_stats/m_t_mean": -4.547357024893735e-14, + "adam_stats/m_t_min": -6.5921753957809415e-06, + "adam_stats/v_t_max": 2.692237467272207e-05, + "adam_stats/v_t_mean": 5.838978172495457e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.9966681003570557, + "all_logprobs/max": -0.0013580322265625, + "all_logprobs/median": -2.171875, + "all_logprobs/min": -15.625, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -4.53125, + "all_logprobs/p25": -2.90625, + "all_logprobs/p5": -5.6875, + "all_logprobs/p75": -0.0654296875, + "all_logprobs/var": 3.3167128562927246, + "clip_ratio": 0.0, + "completion_length": 736.15625, + "completion_length/incorrect": 736.15625, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 25.0, + "completion_length/incorrect/p25": 182.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 180099.453125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 25.0, + "completion_length/p25": 182.25, + "completion_length/p75": 1024.0, + "completion_length/var": 180099.453125, + "epoch": 0.1328, + "feature_vector_variance/max_squared_error": 125246.9140625, + "feature_vector_variance/metric": 26046.009765625, + "generated_tokens/total": 5985787.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.282218205837188e-06, + "loss": 0.0, + "mean_logprobs": -2.6875, + "mean_logprobs/var": 2.109375, + "num_completions/total": 7968, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 250.64114379882812, + "per_token_feature_norm/max": 378.0, + "per_token_feature_norm/median": 252.0, + "per_token_feature_norm/min": 124.0, + "per_token_feature_norm/p25": 210.0, + "per_token_feature_norm/p75": 288.0, + "per_token_feature_norm/var": 2723.963134765625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6307916045188904, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.87890625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.046875, + "per_token_policy_error_norm/p75": 0.99609375, + "per_token_policy_error_norm/var": 0.20145824551582336, + "policy_entropy": 1.7859550714492798, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.3125, + "policy_entropy/min": 0.0064697265625, + "policy_entropy/p25": 0.248046875, + "policy_entropy/p75": 2.609375, + "policy_entropy/var": 1.4973567724227905, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.33637410402297974, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13248799741268158, + "policy_sharpness/min": 0.019825931638479233, + "policy_sharpness/p25": 0.09443332999944687, + "policy_sharpness/p75": 0.35089951753616333, + "policy_sharpness/var": 0.5841836333274841, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 83, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 4.084106564050671e-08, + "adam_stats/lm_head/lr_effective_mean": -5.283362926301015e-14, + "adam_stats/lm_head/lr_effective_min": -4.088159144544079e-08, + "adam_stats/lm_head/lr_effective_std": 1.0926466398331058e-09, + "adam_stats/lr_effective_max": 4.122949093243733e-08, + "adam_stats/lr_effective_mean": -1.3710920961952644e-13, + "adam_stats/lr_effective_min": -4.1339713874322115e-08, + "adam_stats/m_t_max": 7.421726422762731e-06, + "adam_stats/m_t_mean": -4.092618239204433e-14, + "adam_stats/m_t_min": -5.932957719778642e-06, + "adam_stats/v_t_max": 2.689545181056019e-05, + "adam_stats/v_t_mean": 5.833139093275319e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.096583843231201, + "all_logprobs/max": -0.00106048583984375, + "all_logprobs/median": -2.1875, + "all_logprobs/min": -15.5, + "all_logprobs/p1": -6.96875, + "all_logprobs/p10": -5.15625, + "all_logprobs/p25": -3.21875, + "all_logprobs/p5": -6.0, + "all_logprobs/p75": -0.03173828125, + "all_logprobs/var": 3.9769084453582764, + "clip_ratio": 0.0, + "completion_length": 680.78125, + "completion_length/incorrect": 680.78125, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 5.0, + "completion_length/incorrect/p25": 198.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 179524.71875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 5.0, + "completion_length/p25": 198.75, + "completion_length/p75": 1024.0, + "completion_length/var": 179524.71875, + "epoch": 0.1344, + "feature_vector_variance/max_squared_error": 155577.90625, + "feature_vector_variance/metric": 25405.0390625, + "generated_tokens/total": 6051142.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.1396392788268054e-06, + "loss": 0.0, + "mean_logprobs": -2.953125, + "mean_logprobs/var": 2.546875, + "num_completions/total": 8064, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 240.3625946044922, + "per_token_feature_norm/max": 384.0, + "per_token_feature_norm/median": 240.0, + "per_token_feature_norm/min": 121.5, + "per_token_feature_norm/p25": 197.0, + "per_token_feature_norm/p75": 278.0, + "per_token_feature_norm/var": 2831.714111328125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.614615261554718, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.8828125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.02734375, + "per_token_policy_error_norm/p75": 1.0078125, + "per_token_policy_error_norm/var": 0.21329425275325775, + "policy_entropy": 1.7765159606933594, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.296875, + "policy_entropy/min": 0.004180908203125, + "policy_entropy/p25": 0.1171875, + "policy_entropy/p75": 2.8125, + "policy_entropy/var": 1.7833400964736938, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.5015226602554321, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13613446056842804, + "policy_sharpness/min": 0.018431872129440308, + "policy_sharpness/p25": 0.08742797374725342, + "policy_sharpness/p75": 0.4043581485748291, + "policy_sharpness/var": 1.6137957572937012, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 84, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 3.2424246398932155e-08, + "adam_stats/lm_head/lr_effective_mean": -4.1943861984984274e-14, + "adam_stats/lm_head/lr_effective_min": -3.2456437537575766e-08, + "adam_stats/lm_head/lr_effective_std": 8.673767815103872e-10, + "adam_stats/lr_effective_max": 3.273285287264116e-08, + "adam_stats/lr_effective_mean": -1.0884765717878928e-13, + "adam_stats/lr_effective_min": -3.282035265783634e-08, + "adam_stats/m_t_max": 6.679553735011723e-06, + "adam_stats/m_t_mean": -3.683358007705931e-14, + "adam_stats/m_t_min": -5.339661584002897e-06, + "adam_stats/v_t_max": 2.686855623323936e-05, + "adam_stats/v_t_mean": 5.827307386629954e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.4179813861846924, + "all_logprobs/max": -0.00125885009765625, + "all_logprobs/median": -2.375, + "all_logprobs/min": -19.375, + "all_logprobs/p1": -6.875, + "all_logprobs/p10": -5.3125, + "all_logprobs/p25": -3.453125, + "all_logprobs/p5": -6.03125, + "all_logprobs/p75": -0.330078125, + "all_logprobs/var": 3.6770424842834473, + "clip_ratio": 0.0, + "completion_length": 621.3021240234375, + "completion_length/incorrect": 621.3021240234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 964.0, + "completion_length/incorrect/min": 18.0, + "completion_length/incorrect/p25": 182.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 181370.65625, + "completion_length/max": 1024.0, + "completion_length/median": 964.0, + "completion_length/min": 18.0, + "completion_length/p25": 182.25, + "completion_length/p75": 1024.0, + "completion_length/var": 181370.65625, + "epoch": 0.136, + "feature_vector_variance/max_squared_error": 163760.359375, + "feature_vector_variance/metric": 27189.974609375, + "generated_tokens/total": 6110787.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.0048094716167097e-06, + "loss": 0.0, + "mean_logprobs": -3.21875, + "mean_logprobs/var": 1.890625, + "num_completions/total": 8160, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 241.12388610839844, + "per_token_feature_norm/max": 380.0, + "per_token_feature_norm/median": 236.0, + "per_token_feature_norm/min": 126.5, + "per_token_feature_norm/p25": 197.0, + "per_token_feature_norm/p75": 282.0, + "per_token_feature_norm/var": 2998.496337890625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.7079335451126099, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.91796875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.203125, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.18101729452610016, + "policy_entropy": 2.073378801345825, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.390625, + "policy_entropy/min": 0.0048828125, + "policy_entropy/p25": 0.87109375, + "policy_entropy/p75": 2.96875, + "policy_entropy/var": 1.5304676294326782, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.40392041206359863, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11775847524404526, + "policy_sharpness/min": 0.019461285322904587, + "policy_sharpness/p25": 0.08125676214694977, + "policy_sharpness/p75": 0.3212592303752899, + "policy_sharpness/var": 1.2613458633422852, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 85, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.5508455792078166e-08, + "adam_stats/lm_head/lr_effective_mean": -3.299650727347017e-14, + "adam_stats/lm_head/lr_effective_min": -2.553380085146273e-08, + "adam_stats/lm_head/lr_effective_std": 6.823031029057347e-10, + "adam_stats/lr_effective_max": 2.5751425880571333e-08, + "adam_stats/lr_effective_mean": -8.56274618809931e-14, + "adam_stats/lr_effective_min": -2.582026148445493e-08, + "adam_stats/m_t_max": 6.01159808866214e-06, + "adam_stats/m_t_mean": -3.315019293141999e-14, + "adam_stats/m_t_min": -4.8056954256026074e-06, + "adam_stats/v_t_max": 2.6841687940759584e-05, + "adam_stats/v_t_mean": 5.821480450474148e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.1903817653656006, + "all_logprobs/max": -0.00099945068359375, + "all_logprobs/median": -2.234375, + "all_logprobs/min": -17.5, + "all_logprobs/p1": -6.9375, + "all_logprobs/p10": -5.21875, + "all_logprobs/p25": -3.328125, + "all_logprobs/p5": -6.0, + "all_logprobs/p75": -0.07373046875, + "all_logprobs/var": 3.9210872650146484, + "clip_ratio": 0.0, + "completion_length": 688.8229370117188, + "completion_length/incorrect": 688.8229370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 35.0, + "completion_length/incorrect/p25": 231.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 172770.296875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 35.0, + "completion_length/p25": 231.25, + "completion_length/p75": 1024.0, + "completion_length/var": 172770.296875, + "epoch": 0.1376, + "feature_vector_variance/max_squared_error": 114569.3203125, + "feature_vector_variance/metric": 27671.4453125, + "generated_tokens/total": 6176914.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 8.778930535580476e-07, + "loss": 0.0, + "mean_logprobs": -2.96875, + "mean_logprobs/var": 2.296875, + "num_completions/total": 8256, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 242.138671875, + "per_token_feature_norm/max": 382.0, + "per_token_feature_norm/median": 241.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 202.0, + "per_token_feature_norm/p75": 278.0, + "per_token_feature_norm/var": 2626.72216796875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6461805105209351, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.890625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.046875, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.20508792996406555, + "policy_entropy": 1.8847016096115112, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.328125, + "policy_entropy/min": 0.004638671875, + "policy_entropy/p25": 0.27734375, + "policy_entropy/p75": 2.90625, + "policy_entropy/var": 1.6626659631729126, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.41002920269966125, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11705132573843002, + "policy_sharpness/min": 0.01555597223341465, + "policy_sharpness/p25": 0.07896458357572556, + "policy_sharpness/p75": 0.3247525095939636, + "policy_sharpness/var": 1.1382019519805908, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 86, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.985940834003941e-08, + "adam_stats/lm_head/lr_effective_mean": -2.5688312086757752e-14, + "adam_stats/lm_head/lr_effective_min": -1.987915609902302e-08, + "adam_stats/lm_head/lr_effective_std": 5.311470152591369e-10, + "adam_stats/lr_effective_max": 2.0048714688414293e-08, + "adam_stats/lr_effective_mean": -6.666147939638628e-14, + "adam_stats/lr_effective_min": -2.010230204518848e-08, + "adam_stats/m_t_max": 5.4104380069475155e-06, + "adam_stats/m_t_mean": -2.9835200743332305e-14, + "adam_stats/m_t_min": -4.325125701143406e-06, + "adam_stats/v_t_max": 2.681484693312086e-05, + "adam_stats/v_t_mean": 5.81565871848877e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.8448392152786255, + "all_logprobs/max": -0.001068115234375, + "all_logprobs/median": -1.78125, + "all_logprobs/min": -15.5, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.75, + "all_logprobs/p25": -2.90625, + "all_logprobs/p5": -5.8125, + "all_logprobs/p75": -0.0294189453125, + "all_logprobs/var": 3.707240104675293, + "clip_ratio": 0.0, + "completion_length": 664.0, + "completion_length/incorrect": 664.0, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 5.0, + "completion_length/incorrect/p25": 134.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 194942.484375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 5.0, + "completion_length/p25": 134.25, + "completion_length/p75": 1024.0, + "completion_length/var": 194942.484375, + "epoch": 0.1392, + "feature_vector_variance/max_squared_error": 116346.6328125, + "feature_vector_variance/metric": 25172.8671875, + "generated_tokens/total": 6240658.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 7.59044652756249e-07, + "loss": 0.0, + "mean_logprobs": -2.796875, + "mean_logprobs/var": 2.796875, + "num_completions/total": 8352, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 250.03131103515625, + "per_token_feature_norm/max": 384.0, + "per_token_feature_norm/median": 256.0, + "per_token_feature_norm/min": 124.5, + "per_token_feature_norm/p25": 207.0, + "per_token_feature_norm/p75": 290.0, + "per_token_feature_norm/var": 2801.790771484375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5615822076797485, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.80078125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0234375, + "per_token_policy_error_norm/p75": 0.9921875, + "per_token_policy_error_norm/var": 0.22202271223068237, + "policy_entropy": 1.6074650287628174, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.15625, + "policy_entropy/min": 0.006134033203125, + "policy_entropy/p25": 0.1220703125, + "policy_entropy/p75": 2.59375, + "policy_entropy/var": 1.7284274101257324, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3895367980003357, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.14387455582618713, + "policy_sharpness/min": 0.017697328701615334, + "policy_sharpness/p25": 0.09439844638109207, + "policy_sharpness/p75": 0.3911431133747101, + "policy_sharpness/var": 0.9087679982185364, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 87, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.527581972027292e-08, + "adam_stats/lm_head/lr_effective_mean": -1.9758703679283174e-14, + "adam_stats/lm_head/lr_effective_min": -1.529102000574767e-08, + "adam_stats/lm_head/lr_effective_std": 4.085151661392672e-10, + "adam_stats/lr_effective_max": 1.542154137723628e-08, + "adam_stats/lr_effective_mean": -5.1273469113067086e-14, + "adam_stats/lr_effective_min": -1.5462761737694564e-08, + "adam_stats/m_t_max": 4.869394160778029e-06, + "adam_stats/m_t_mean": -2.6851662711900592e-14, + "adam_stats/m_t_min": -3.892613221978536e-06, + "adam_stats/v_t_max": 2.6788033210323192e-05, + "adam_stats/v_t_mean": 5.8098421906738196e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.8883107900619507, + "all_logprobs/max": 0.0, + "all_logprobs/median": -2.109375, + "all_logprobs/min": -17.0, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.5, + "all_logprobs/p25": -2.84375, + "all_logprobs/p5": -5.65625, + "all_logprobs/p75": -0.0230712890625, + "all_logprobs/var": 3.4306721687316895, + "clip_ratio": 0.0, + "completion_length": 775.75, + "completion_length/incorrect": 775.75, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 26.0, + "completion_length/incorrect/p25": 390.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 148220.28125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 26.0, + "completion_length/p25": 390.75, + "completion_length/p75": 1024.0, + "completion_length/var": 148220.28125, + "epoch": 0.1408, + "feature_vector_variance/max_squared_error": 202908.40625, + "feature_vector_variance/metric": 36821.1015625, + "generated_tokens/total": 6315130.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 6.484090676804927e-07, + "loss": 0.0, + "mean_logprobs": -2.453125, + "mean_logprobs/var": 2.390625, + "num_completions/total": 8448, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 254.51625061035156, + "per_token_feature_norm/max": 434.0, + "per_token_feature_norm/median": 256.0, + "per_token_feature_norm/min": 127.0, + "per_token_feature_norm/p25": 217.0, + "per_token_feature_norm/p75": 290.0, + "per_token_feature_norm/var": 2753.582275390625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5884768962860107, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.8671875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.015625, + "per_token_policy_error_norm/p75": 0.984375, + "per_token_policy_error_norm/var": 0.21571677923202515, + "policy_entropy": 1.6738399267196655, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.3125, + "policy_entropy/min": 1.0477378964424133e-09, + "policy_entropy/p25": 0.10791015625, + "policy_entropy/p75": 2.625, + "policy_entropy/var": 1.6601442098617554, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.9287739396095276, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13831032812595367, + "policy_sharpness/min": 0.017602790147066116, + "policy_sharpness/p25": 0.09232836961746216, + "policy_sharpness/p75": 0.4839470386505127, + "policy_sharpness/var": 4.753167152404785, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 88, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.1585126635793586e-08, + "adam_stats/lm_head/lr_effective_mean": -1.498441771867736e-14, + "adam_stats/lm_head/lr_effective_min": -1.159666318528707e-08, + "adam_stats/lm_head/lr_effective_std": 3.0978453136043527e-10, + "adam_stats/lr_effective_max": 1.1695727053506744e-08, + "adam_stats/lr_effective_mean": -3.888379960896582e-14, + "adam_stats/lr_effective_min": -1.172698649298809e-08, + "adam_stats/m_t_max": 4.382454790174961e-06, + "adam_stats/m_t_mean": -2.4166524223391203e-14, + "adam_stats/m_t_min": -3.503351763356477e-06, + "adam_stats/v_t_max": 2.6761244953377172e-05, + "adam_stats/v_t_mean": 5.804033035433642e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.0528643131256104, + "all_logprobs/max": -0.0003185272216796875, + "all_logprobs/median": -2.203125, + "all_logprobs/min": -20.375, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -4.65625, + "all_logprobs/p25": -2.96875, + "all_logprobs/p5": -5.71875, + "all_logprobs/p75": -0.08642578125, + "all_logprobs/var": 3.3629860877990723, + "clip_ratio": 0.0, + "completion_length": 776.9271240234375, + "completion_length/incorrect": 776.9271240234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 8.0, + "completion_length/incorrect/p25": 389.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 157741.4375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 8.0, + "completion_length/p25": 389.75, + "completion_length/p75": 1024.0, + "completion_length/var": 157741.4375, + "epoch": 0.1424, + "feature_vector_variance/max_squared_error": 116464.765625, + "feature_vector_variance/metric": 29373.609375, + "generated_tokens/total": 6389715.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 5.461210907490952e-07, + "loss": 0.0, + "mean_logprobs": -2.640625, + "mean_logprobs/var": 2.140625, + "num_completions/total": 8544, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 252.4384765625, + "per_token_feature_norm/max": 386.0, + "per_token_feature_norm/median": 255.0, + "per_token_feature_norm/min": 125.5, + "per_token_feature_norm/p25": 212.0, + "per_token_feature_norm/p75": 290.0, + "per_token_feature_norm/var": 2710.69091796875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6422688364982605, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.8828125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0625, + "per_token_policy_error_norm/p75": 0.99609375, + "per_token_policy_error_norm/var": 0.19816996157169342, + "policy_entropy": 1.8378427028656006, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.328125, + "policy_entropy/min": 0.0032501220703125, + "policy_entropy/p25": 0.3125, + "policy_entropy/p75": 2.703125, + "policy_entropy/var": 1.5145580768585205, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.2740511894226074, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12134602665901184, + "policy_sharpness/min": 0.02060324139893055, + "policy_sharpness/p25": 0.08493622392416, + "policy_sharpness/p75": 0.27715468406677246, + "policy_sharpness/var": 0.40959230065345764, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 89, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 8.639727155923538e-09, + "adam_stats/lm_head/lr_effective_mean": -1.1174409311818893e-14, + "adam_stats/lm_head/lr_effective_min": -8.648335381167271e-09, + "adam_stats/lm_head/lr_effective_std": 2.3100116264274106e-10, + "adam_stats/lr_effective_max": 8.722270017358369e-09, + "adam_stats/lr_effective_mean": -2.899663812555055e-14, + "adam_stats/lr_effective_min": -8.745582036340238e-09, + "adam_stats/m_t_max": 3.944209311157465e-06, + "adam_stats/m_t_mean": -2.1749874850370693e-14, + "adam_stats/m_t_min": -3.153016450596624e-06, + "adam_stats/v_t_max": 2.6734483981272206e-05, + "adam_stats/v_t_mean": 5.798229084363893e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.2301957607269287, + "all_logprobs/max": -0.0013275146484375, + "all_logprobs/median": -2.265625, + "all_logprobs/min": -18.125, + "all_logprobs/p1": -6.9375, + "all_logprobs/p10": -5.1875, + "all_logprobs/p25": -3.28125, + "all_logprobs/p5": -6.03125, + "all_logprobs/p75": -0.11474609375, + "all_logprobs/var": 3.798102855682373, + "clip_ratio": 0.0, + "completion_length": 668.6771240234375, + "completion_length/incorrect": 668.6771240234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 6.0, + "completion_length/incorrect/p25": 199.5, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 177248.828125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 6.0, + "completion_length/p25": 199.5, + "completion_length/p75": 1024.0, + "completion_length/var": 177248.828125, + "epoch": 0.144, + "feature_vector_variance/max_squared_error": 151026.3125, + "feature_vector_variance/metric": 27506.626953125, + "generated_tokens/total": 6453908.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 4.5230534410568764e-07, + "loss": 0.0, + "mean_logprobs": -3.015625, + "mean_logprobs/var": 2.171875, + "num_completions/total": 8640, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 242.06361389160156, + "per_token_feature_norm/max": 384.0, + "per_token_feature_norm/median": 241.0, + "per_token_feature_norm/min": 127.5, + "per_token_feature_norm/p25": 201.0, + "per_token_feature_norm/p75": 280.0, + "per_token_feature_norm/var": 2595.206298828125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6629259586334229, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.8984375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0703125, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.20054800808429718, + "policy_entropy": 1.9201328754425049, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.34375, + "policy_entropy/min": 0.00616455078125, + "policy_entropy/p25": 0.404296875, + "policy_entropy/p75": 2.890625, + "policy_entropy/var": 1.5793768167495728, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.2968829870223999, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11802614480257034, + "policy_sharpness/min": 0.017537515610456467, + "policy_sharpness/p25": 0.08188174664974213, + "policy_sharpness/p75": 0.30675479769706726, + "policy_sharpness/var": 0.5062239766120911, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 90, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 6.313654932199597e-09, + "adam_stats/lm_head/lr_effective_mean": -8.16564918031679e-15, + "adam_stats/lm_head/lr_effective_min": -6.319950340838432e-09, + "adam_stats/lm_head/lr_effective_std": 1.6879135589231709e-10, + "adam_stats/lr_effective_max": 6.374020422583726e-09, + "adam_stats/lr_effective_mean": -2.1188886623470064e-14, + "adam_stats/lr_effective_min": -6.391056128762784e-09, + "adam_stats/m_t_max": 3.5497882890922483e-06, + "adam_stats/m_t_mean": -1.957488228313594e-14, + "adam_stats/m_t_min": -2.837714646375389e-06, + "adam_stats/v_t_max": 2.6707750294008292e-05, + "adam_stats/v_t_mean": 5.792431204826309e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.8676749467849731, + "all_logprobs/max": -0.00054168701171875, + "all_logprobs/median": -1.9921875, + "all_logprobs/min": -20.25, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.5, + "all_logprobs/p25": -2.875, + "all_logprobs/p5": -5.65625, + "all_logprobs/p75": -0.036376953125, + "all_logprobs/var": 3.441495895385742, + "clip_ratio": 0.0, + "completion_length": 818.71875, + "completion_length/incorrect": 818.71875, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 3.0, + "completion_length/incorrect/p25": 751.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 133246.125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 3.0, + "completion_length/p25": 751.75, + "completion_length/p75": 1024.0, + "completion_length/var": 133246.125, + "epoch": 0.1456, + "feature_vector_variance/max_squared_error": 173055.09375, + "feature_vector_variance/metric": 27216.8046875, + "generated_tokens/total": 6532505.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 3.6707612778634855e-07, + "loss": 0.0, + "mean_logprobs": -2.390625, + "mean_logprobs/var": 1.984375, + "num_completions/total": 8736, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 246.58558654785156, + "per_token_feature_norm/max": 414.0, + "per_token_feature_norm/median": 247.0, + "per_token_feature_norm/min": 125.5, + "per_token_feature_norm/p25": 201.0, + "per_token_feature_norm/p75": 288.0, + "per_token_feature_norm/var": 2898.61572265625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5844165682792664, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.84375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0234375, + "per_token_policy_error_norm/p75": 0.98828125, + "per_token_policy_error_norm/var": 0.21676453948020935, + "policy_entropy": 1.6633297204971313, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.265625, + "policy_entropy/min": 0.00390625, + "policy_entropy/p25": 0.1533203125, + "policy_entropy/p75": 2.609375, + "policy_entropy/var": 1.6162270307540894, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4904923141002655, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.13721787929534912, + "policy_sharpness/min": 0.015922868624329567, + "policy_sharpness/p25": 0.09474977105855942, + "policy_sharpness/p75": 0.3929089605808258, + "policy_sharpness/var": 1.648516058921814, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 91, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 4.499694394866083e-09, + "adam_stats/lm_head/lr_effective_mean": -5.819394597960216e-15, + "adam_stats/lm_head/lr_effective_min": -4.504183692688457e-09, + "adam_stats/lm_head/lr_effective_std": 1.202839217118168e-10, + "adam_stats/lr_effective_max": 4.542748843761046e-09, + "adam_stats/lr_effective_mean": -1.510044598585815e-14, + "adam_stats/lr_effective_min": -4.554889354579927e-09, + "adam_stats/m_t_max": 3.194809323758818e-06, + "adam_stats/m_t_mean": -1.7617367119174623e-14, + "adam_stats/m_t_min": -2.5539432044752175e-06, + "adam_stats/v_t_max": 2.668104207259603e-05, + "adam_stats/v_t_mean": 5.786638095778285e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.515962600708008, + "all_logprobs/max": -0.00106048583984375, + "all_logprobs/median": -2.40625, + "all_logprobs/min": -20.625, + "all_logprobs/p1": -6.9375, + "all_logprobs/p10": -5.375, + "all_logprobs/p25": -3.484375, + "all_logprobs/p5": -6.09375, + "all_logprobs/p75": -0.931640625, + "all_logprobs/var": 3.5413594245910645, + "clip_ratio": 0.0, + "completion_length": 606.03125, + "completion_length/incorrect": 606.03125, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 690.0, + "completion_length/incorrect/min": 28.0, + "completion_length/incorrect/p25": 133.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 186210.765625, + "completion_length/max": 1024.0, + "completion_length/median": 690.0, + "completion_length/min": 28.0, + "completion_length/p25": 133.25, + "completion_length/p75": 1024.0, + "completion_length/var": 186210.765625, + "epoch": 0.1472, + "feature_vector_variance/max_squared_error": 99967.3359375, + "feature_vector_variance/metric": 27750.642578125, + "generated_tokens/total": 6590684.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 2.905372804626083e-07, + "loss": 0.0, + "mean_logprobs": -3.375, + "mean_logprobs/var": 1.8515625, + "num_completions/total": 8832, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 241.23414611816406, + "per_token_feature_norm/max": 384.0, + "per_token_feature_norm/median": 237.0, + "per_token_feature_norm/min": 125.5, + "per_token_feature_norm/p25": 197.0, + "per_token_feature_norm/p75": 282.0, + "per_token_feature_norm/var": 2922.050537109375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.7416924238204956, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.921875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.5234375, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.16156743466854095, + "policy_entropy": 2.15815806388855, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.40625, + "policy_entropy/min": 0.006195068359375, + "policy_entropy/p25": 1.65625, + "policy_entropy/p75": 3.015625, + "policy_entropy/var": 1.3996087312698364, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.2692645788192749, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11590336263179779, + "policy_sharpness/min": 0.019624849781394005, + "policy_sharpness/p25": 0.08011291176080704, + "policy_sharpness/p75": 0.26148730516433716, + "policy_sharpness/var": 0.5178601741790771, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 92, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 3.1068316808102736e-09, + "adam_stats/lm_head/lr_effective_mean": -4.017887656290091e-15, + "adam_stats/lm_head/lr_effective_min": -3.1099338659856812e-09, + "adam_stats/lm_head/lr_effective_std": 8.304194831776357e-11, + "adam_stats/lr_effective_max": 3.1365814390227342e-09, + "adam_stats/lr_effective_mean": -1.0425676232159349e-14, + "adam_stats/lr_effective_min": -3.144963622858654e-09, + "adam_stats/m_t_max": 2.8753283913829364e-06, + "adam_stats/m_t_mean": -1.5855641926905244e-14, + "adam_stats/m_t_min": -2.2985489067650633e-06, + "adam_stats/v_t_max": 2.6654361136024818e-05, + "adam_stats/v_t_mean": 5.780851925624164e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.8434178829193115, + "all_logprobs/max": -8.916854858398438e-05, + "all_logprobs/median": -1.8125, + "all_logprobs/min": -25.125, + "all_logprobs/p1": -6.78125, + "all_logprobs/p10": -4.625, + "all_logprobs/p25": -2.9375, + "all_logprobs/p5": -5.6875, + "all_logprobs/p75": -0.03125, + "all_logprobs/var": 3.611633777618408, + "clip_ratio": 0.0, + "completion_length": 738.8021240234375, + "completion_length/incorrect": 738.8021240234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 5.0, + "completion_length/incorrect/p25": 236.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 169580.15625, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 5.0, + "completion_length/p25": 236.25, + "completion_length/p75": 1024.0, + "completion_length/var": 169580.15625, + "epoch": 0.1488, + "feature_vector_variance/max_squared_error": 181541.328125, + "feature_vector_variance/metric": 31647.908203125, + "generated_tokens/total": 6661609.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 2.2278205293002645e-07, + "loss": 0.0, + "mean_logprobs": -2.578125, + "mean_logprobs/var": 2.390625, + "num_completions/total": 8928, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 255.5701904296875, + "per_token_feature_norm/max": 416.0, + "per_token_feature_norm/median": 256.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 217.0, + "per_token_feature_norm/p75": 294.0, + "per_token_feature_norm/var": 2832.611083984375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5664218068122864, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.8046875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0234375, + "per_token_policy_error_norm/p75": 0.9921875, + "per_token_policy_error_norm/var": 0.2231808751821518, + "policy_entropy": 1.6306153535842896, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.203125, + "policy_entropy/min": 0.000774383544921875, + "policy_entropy/p25": 0.11962890625, + "policy_entropy/p75": 2.703125, + "policy_entropy/var": 1.7033041715621948, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.583400309085846, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12819701433181763, + "policy_sharpness/min": 0.019115395843982697, + "policy_sharpness/p25": 0.08828012645244598, + "policy_sharpness/p75": 0.3848299980163574, + "policy_sharpness/var": 2.120398998260498, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 93, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 2.058042625208145e-09, + "adam_stats/lm_head/lr_effective_mean": -2.6614584119194745e-15, + "adam_stats/lm_head/lr_effective_min": -2.060098980294356e-09, + "adam_stats/lm_head/lr_effective_std": 5.5003376853157704e-11, + "adam_stats/lr_effective_max": 2.0777644049729815e-09, + "adam_stats/lr_effective_mean": -6.905901123578726e-15, + "adam_stats/lr_effective_min": -2.083316630319132e-09, + "adam_stats/m_t_max": 2.5877955067699077e-06, + "adam_stats/m_t_mean": -1.4270067400412763e-14, + "adam_stats/m_t_min": -2.0686939024017192e-06, + "adam_stats/v_t_max": 2.662770748429466e-05, + "adam_stats/v_t_mean": 5.7750713933213405e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.5947189331054688, + "all_logprobs/max": -0.00127410888671875, + "all_logprobs/median": -2.40625, + "all_logprobs/min": -20.125, + "all_logprobs/p1": -6.875, + "all_logprobs/p10": -5.46875, + "all_logprobs/p25": -3.640625, + "all_logprobs/p5": -6.09375, + "all_logprobs/p75": -1.28125, + "all_logprobs/var": 3.5453286170959473, + "clip_ratio": 0.0, + "completion_length": 605.7708740234375, + "completion_length/incorrect": 605.7708740234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 713.0, + "completion_length/incorrect/min": 15.0, + "completion_length/incorrect/p25": 183.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 177703.828125, + "completion_length/max": 1024.0, + "completion_length/median": 713.0, + "completion_length/min": 15.0, + "completion_length/p25": 183.75, + "completion_length/p75": 1024.0, + "completion_length/var": 177703.828125, + "epoch": 0.1504, + "feature_vector_variance/max_squared_error": 124664.8984375, + "feature_vector_variance/metric": 29676.458984375, + "generated_tokens/total": 6719763.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.6389299449645734e-07, + "loss": 0.0, + "mean_logprobs": -3.421875, + "mean_logprobs/var": 1.953125, + "num_completions/total": 9024, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 246.5509796142578, + "per_token_feature_norm/max": 386.0, + "per_token_feature_norm/median": 239.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 200.0, + "per_token_feature_norm/p75": 292.0, + "per_token_feature_norm/var": 3281.706787109375, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.755558431148529, + "per_token_policy_error_norm/max": 1.9921875, + "per_token_policy_error_norm/median": 0.921875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.625, + "per_token_policy_error_norm/p75": 1.015625, + "per_token_policy_error_norm/var": 0.15531785786151886, + "policy_entropy": 2.2141120433807373, + "policy_entropy/max": 3.890625, + "policy_entropy/median": 2.390625, + "policy_entropy/min": 0.007049560546875, + "policy_entropy/p25": 1.921875, + "policy_entropy/p75": 3.0625, + "policy_entropy/var": 1.33583402633667, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.26664796471595764, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11950480192899704, + "policy_sharpness/min": 0.01953481324017048, + "policy_sharpness/p25": 0.07810333371162415, + "policy_sharpness/p75": 0.30946364998817444, + "policy_sharpness/var": 0.42640364170074463, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 94, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.288349427142066e-09, + "adam_stats/lm_head/lr_effective_mean": -1.6660338685148116e-15, + "adam_stats/lm_head/lr_effective_min": -1.289637507895236e-09, + "adam_stats/lm_head/lr_effective_std": 3.4428956213750084e-11, + "adam_stats/lr_effective_max": 1.3007047661162119e-09, + "adam_stats/lr_effective_mean": -4.3229397959801494e-15, + "adam_stats/lr_effective_min": -1.304180319294801e-09, + "adam_stats/m_t_max": 2.329015842406079e-06, + "adam_stats/m_t_mean": -1.284307184120639e-14, + "adam_stats/m_t_min": -1.8618244439494447e-06, + "adam_stats/v_t_max": 2.6601079298416153e-05, + "adam_stats/v_t_mean": 5.769296932550683e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.0405709743499756, + "all_logprobs/max": -0.00089263916015625, + "all_logprobs/median": -2.21875, + "all_logprobs/min": -17.625, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.65625, + "all_logprobs/p25": -2.890625, + "all_logprobs/p5": -5.75, + "all_logprobs/p75": -0.0478515625, + "all_logprobs/var": 3.331998348236084, + "clip_ratio": 0.0, + "completion_length": 728.6146240234375, + "completion_length/incorrect": 728.6146240234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 28.0, + "completion_length/incorrect/p25": 237.75, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 173525.921875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 28.0, + "completion_length/p25": 237.75, + "completion_length/p75": 1024.0, + "completion_length/var": 173525.921875, + "epoch": 0.152, + "feature_vector_variance/max_squared_error": 110523.0078125, + "feature_vector_variance/metric": 25474.30859375, + "generated_tokens/total": 6789710.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.1394185240843985e-07, + "loss": 0.0, + "mean_logprobs": -2.71875, + "mean_logprobs/var": 2.125, + "num_completions/total": 9120, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 247.4848175048828, + "per_token_feature_norm/max": 386.0, + "per_token_feature_norm/median": 251.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 202.0, + "per_token_feature_norm/p75": 290.0, + "per_token_feature_norm/var": 3035.49560546875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.64007568359375, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.88671875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.046875, + "per_token_policy_error_norm/p75": 0.9921875, + "per_token_policy_error_norm/var": 0.19674938917160034, + "policy_entropy": 1.797654390335083, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.328125, + "policy_entropy/min": 0.004425048828125, + "policy_entropy/p25": 0.15625, + "policy_entropy/p75": 2.609375, + "policy_entropy/var": 1.5786373615264893, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4234839379787445, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1400570124387741, + "policy_sharpness/min": 0.019078630954027176, + "policy_sharpness/p25": 0.09516337513923645, + "policy_sharpness/p75": 0.37452784180641174, + "policy_sharpness/var": 1.1683604717254639, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 95, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 7.431339432706352e-10, + "adam_stats/lm_head/lr_effective_mean": -9.609544317370306e-16, + "adam_stats/lm_head/lr_effective_min": -7.438775151413779e-10, + "adam_stats/lm_head/lr_effective_std": 1.9856947683360993e-11, + "adam_stats/lr_effective_max": 7.502660714919784e-10, + "adam_stats/lr_effective_mean": -2.4934034752941954e-15, + "adam_stats/lr_effective_min": -7.522706346740904e-10, + "adam_stats/m_t_max": 2.0961142581654713e-06, + "adam_stats/m_t_mean": -1.1558765758228583e-14, + "adam_stats/m_t_min": -1.675642010923184e-06, + "adam_stats/v_t_max": 2.6574478397378698e-05, + "adam_stats/v_t_mean": 5.763527675950453e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.9163860082626343, + "all_logprobs/max": -0.00118255615234375, + "all_logprobs/median": -1.7734375, + "all_logprobs/min": -17.875, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.9375, + "all_logprobs/p25": -3.09375, + "all_logprobs/p5": -5.84375, + "all_logprobs/p75": -0.0291748046875, + "all_logprobs/var": 3.8813881874084473, + "clip_ratio": 0.0, + "completion_length": 679.4271240234375, + "completion_length/incorrect": 679.4271240234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 6.0, + "completion_length/incorrect/p25": 183.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 183864.59375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 6.0, + "completion_length/p25": 183.25, + "completion_length/p75": 1024.0, + "completion_length/var": 183864.59375, + "epoch": 0.1536, + "feature_vector_variance/max_squared_error": 105742.71875, + "feature_vector_variance/metric": 27283.01953125, + "generated_tokens/total": 6854935.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 7.298948443822229e-08, + "loss": 0.0, + "mean_logprobs": -2.765625, + "mean_logprobs/var": 2.46875, + "num_completions/total": 9216, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 244.6126708984375, + "per_token_feature_norm/max": 378.0, + "per_token_feature_norm/median": 244.0, + "per_token_feature_norm/min": 128.0, + "per_token_feature_norm/p25": 206.0, + "per_token_feature_norm/p75": 282.0, + "per_token_feature_norm/var": 2559.42236328125, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.5741140246391296, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.80078125, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0234375, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.22170376777648926, + "policy_entropy": 1.662736415863037, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.1875, + "policy_entropy/min": 0.005828857421875, + "policy_entropy/p25": 0.1142578125, + "policy_entropy/p75": 2.765625, + "policy_entropy/var": 1.7746602296829224, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.4437409043312073, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.11896130442619324, + "policy_sharpness/min": 0.01779336668550968, + "policy_sharpness/p25": 0.08397246152162552, + "policy_sharpness/p75": 0.339492529630661, + "policy_sharpness/var": 1.366356611251831, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 96, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 3.766645328973084e-10, + "adam_stats/lm_head/lr_effective_mean": -4.870523091215768e-16, + "adam_stats/lm_head/lr_effective_min": -3.770417311699248e-10, + "adam_stats/lm_head/lr_effective_std": 1.0063646097013823e-11, + "adam_stats/lr_effective_max": 3.802823056453519e-10, + "adam_stats/lr_effective_mean": -1.2637456287326303e-15, + "adam_stats/lr_effective_min": -3.8129832624633764e-10, + "adam_stats/m_t_max": 1.886502786874189e-06, + "adam_stats/m_t_mean": -1.0402885116647578e-14, + "adam_stats/m_t_min": -1.5080777302500792e-06, + "adam_stats/v_t_max": 2.6547904781182297e-05, + "adam_stats/v_t_mean": 5.757762322478044e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.029766798019409, + "all_logprobs/max": -0.000637054443359375, + "all_logprobs/median": -2.171875, + "all_logprobs/min": -16.0, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -4.8125, + "all_logprobs/p25": -3.03125, + "all_logprobs/p5": -5.84375, + "all_logprobs/p75": -0.07421875, + "all_logprobs/var": 3.54787278175354, + "clip_ratio": 0.0, + "completion_length": 667.0521240234375, + "completion_length/incorrect": 667.0521240234375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 16.0, + "completion_length/incorrect/p25": 136.5, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 185361.859375, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 16.0, + "completion_length/p25": 136.5, + "completion_length/p75": 1024.0, + "completion_length/var": 185361.859375, + "epoch": 0.1552, + "feature_vector_variance/max_squared_error": 149344.75, + "feature_vector_variance/metric": 26810.56640625, + "generated_tokens/total": 6918972.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 4.108578473795033e-08, + "loss": 0.0, + "mean_logprobs": -2.78125, + "mean_logprobs/var": 2.09375, + "num_completions/total": 9312, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 244.3739013671875, + "per_token_feature_norm/max": 386.0, + "per_token_feature_norm/median": 245.0, + "per_token_feature_norm/min": 124.5, + "per_token_feature_norm/p25": 202.0, + "per_token_feature_norm/p75": 284.0, + "per_token_feature_norm/var": 2710.35546875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.625590980052948, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.87890625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.046875, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.20711956918239594, + "policy_entropy": 1.7841613292694092, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.3125, + "policy_entropy/min": 0.005523681640625, + "policy_entropy/p25": 0.296875, + "policy_entropy/p75": 2.703125, + "policy_entropy/var": 1.5699673891067505, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.33850449323654175, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12657563388347626, + "policy_sharpness/min": 0.01939900778234005, + "policy_sharpness/p25": 0.08795017749071121, + "policy_sharpness/p75": 0.33730316162109375, + "policy_sharpness/var": 0.630307137966156, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 97, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 1.5081660920124307e-10, + "adam_stats/lm_head/lr_effective_mean": -1.9500929848214417e-16, + "adam_stats/lm_head/lr_effective_min": -1.5096773831047017e-10, + "adam_stats/lm_head/lr_effective_std": 4.029071347388946e-12, + "adam_stats/lr_effective_max": 1.5226624128228394e-10, + "adam_stats/lr_effective_mean": -5.059804249982766e-16, + "adam_stats/lr_effective_min": -1.5267305475408222e-10, + "adam_stats/m_t_max": 1.6978524399746675e-06, + "adam_stats/m_t_mean": -9.362608378740787e-15, + "adam_stats/m_t_min": -1.357269866275601e-06, + "adam_stats/v_t_max": 2.6521356630837545e-05, + "adam_stats/v_t_mean": 5.752005642623015e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.988638997077942, + "all_logprobs/max": -0.000736236572265625, + "all_logprobs/median": -2.09375, + "all_logprobs/min": -18.875, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -4.625, + "all_logprobs/p25": -3.015625, + "all_logprobs/p5": -5.65625, + "all_logprobs/p75": -0.08154296875, + "all_logprobs/var": 3.4085168838500977, + "clip_ratio": 0.0, + "completion_length": 702.625, + "completion_length/incorrect": 702.625, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 15.0, + "completion_length/incorrect/p25": 177.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 187357.703125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 15.0, + "completion_length/p25": 177.25, + "completion_length/p75": 1024.0, + "completion_length/var": 187357.703125, + "epoch": 0.1568, + "feature_vector_variance/max_squared_error": 178072.921875, + "feature_vector_variance/metric": 33894.75390625, + "generated_tokens/total": 6986424.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 1.8269623051318517e-08, + "loss": 0.0, + "mean_logprobs": -2.71875, + "mean_logprobs/var": 2.015625, + "num_completions/total": 9408, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 256.8287353515625, + "per_token_feature_norm/max": 426.0, + "per_token_feature_norm/median": 258.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 214.0, + "per_token_feature_norm/p75": 298.0, + "per_token_feature_norm/var": 3372.24560546875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6250008940696716, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.87109375, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.05078125, + "per_token_policy_error_norm/p75": 1.0, + "per_token_policy_error_norm/var": 0.20523451268672943, + "policy_entropy": 1.7744699716567993, + "policy_entropy/max": 3.875, + "policy_entropy/median": 2.28125, + "policy_entropy/min": 0.00494384765625, + "policy_entropy/p25": 0.30078125, + "policy_entropy/p75": 2.703125, + "policy_entropy/var": 1.5136524438858032, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3684443235397339, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.12543104588985443, + "policy_sharpness/min": 0.018212785944342613, + "policy_sharpness/p25": 0.08940060436725616, + "policy_sharpness/p75": 0.35800933837890625, + "policy_sharpness/var": 0.8859682083129883, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 98, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 3.396079598094737e-11, + "adam_stats/lm_head/lr_effective_mean": -4.3910585032356955e-17, + "adam_stats/lm_head/lr_effective_min": -3.3994855541674696e-11, + "adam_stats/lm_head/lr_effective_std": 9.071703349454452e-13, + "adam_stats/lr_effective_max": 3.4287472167049415e-11, + "adam_stats/lr_effective_mean": -1.1393101360338692e-16, + "adam_stats/lr_effective_min": -3.4379075974921847e-11, + "adam_stats/m_t_max": 1.5280671732398332e-06, + "adam_stats/m_t_mean": -8.426335428295562e-15, + "adam_stats/m_t_min": -1.2215429023854085e-06, + "adam_stats/v_t_max": 2.6494835765333846e-05, + "adam_stats/v_t_mean": 5.746254166938414e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -2.0631330013275146, + "all_logprobs/max": -0.00075531005859375, + "all_logprobs/median": -2.234375, + "all_logprobs/min": -21.0, + "all_logprobs/p1": -6.84375, + "all_logprobs/p10": -4.59375, + "all_logprobs/p25": -2.921875, + "all_logprobs/p5": -5.71875, + "all_logprobs/p75": -0.09033203125, + "all_logprobs/var": 3.255335569381714, + "clip_ratio": 0.0, + "completion_length": 717.375, + "completion_length/incorrect": 717.375, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 21.0, + "completion_length/incorrect/p25": 186.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 181335.28125, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 21.0, + "completion_length/p25": 186.25, + "completion_length/p75": 1024.0, + "completion_length/var": 181335.28125, + "epoch": 0.1584, + "feature_vector_variance/max_squared_error": 142633.6875, + "feature_vector_variance/metric": 26816.427734375, + "generated_tokens/total": 7055292.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 4.568797356781784e-09, + "loss": 0.0, + "mean_logprobs": -2.8125, + "mean_logprobs/var": 2.15625, + "num_completions/total": 9504, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 254.97080993652344, + "per_token_feature_norm/max": 384.0, + "per_token_feature_norm/median": 258.0, + "per_token_feature_norm/min": 124.5, + "per_token_feature_norm/p25": 217.0, + "per_token_feature_norm/p75": 292.0, + "per_token_feature_norm/var": 2709.477294921875, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6507272124290466, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.890625, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.0703125, + "per_token_policy_error_norm/p75": 0.9921875, + "per_token_policy_error_norm/var": 0.19167210161685944, + "policy_entropy": 1.8419439792633057, + "policy_entropy/max": 3.859375, + "policy_entropy/median": 2.328125, + "policy_entropy/min": 0.0067138671875, + "policy_entropy/p25": 0.30078125, + "policy_entropy/p75": 2.640625, + "policy_entropy/var": 1.4885120391845703, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.31643909215927124, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1353166103363037, + "policy_sharpness/min": 0.018412362784147263, + "policy_sharpness/p25": 0.09415183961391449, + "policy_sharpness/p75": 0.3219400644302368, + "policy_sharpness/var": 0.5786287784576416, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 99, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "accuracy_reward": 0.0, + "accuracy_reward/incorrect": 0.0, + "accuracy_reward/incorrect/max": 0.0, + "accuracy_reward/incorrect/median": 0.0, + "accuracy_reward/incorrect/min": 0.0, + "accuracy_reward/incorrect/p25": 0.0, + "accuracy_reward/incorrect/p75": 0.0, + "accuracy_reward/incorrect/var": 0.0, + "accuracy_reward/max": 0.0, + "accuracy_reward/median": 0.0, + "accuracy_reward/min": 0.0, + "accuracy_reward/p25": 0.0, + "accuracy_reward/p75": 0.0, + "accuracy_reward/var": 0.0, + "adam_stats/lm_head/lr_effective_max": 0.0, + "adam_stats/lm_head/lr_effective_mean": 0.0, + "adam_stats/lm_head/lr_effective_min": 0.0, + "adam_stats/lm_head/lr_effective_std": 0.0, + "adam_stats/lr_effective_max": 0.0, + "adam_stats/lr_effective_mean": 0.0, + "adam_stats/lr_effective_min": 0.0, + "adam_stats/m_t_max": 1.3752604672845337e-06, + "adam_stats/m_t_mean": -7.583707899399932e-15, + "adam_stats/m_t_min": -1.0993885553034488e-06, + "adam_stats/v_t_max": 2.6468340365681797e-05, + "adam_stats/v_t_mean": 5.7405078954242406e-12, + "adam_stats/v_t_min": 0.0, + "advantages": 0.0, + "advantages/max": 0.0, + "advantages/median": 0.0, + "advantages/min": 0.0, + "advantages/p25": 0.0, + "advantages/p75": 0.0, + "advantages/var": 0.0, + "all_logprobs": -1.8924311399459839, + "all_logprobs/max": -0.001220703125, + "all_logprobs/median": -2.109375, + "all_logprobs/min": -19.5, + "all_logprobs/p1": -6.8125, + "all_logprobs/p10": -4.375, + "all_logprobs/p25": -2.84375, + "all_logprobs/p5": -5.59375, + "all_logprobs/p75": -0.04052734375, + "all_logprobs/var": 3.3056678771972656, + "clip_ratio": 0.0, + "completion_length": 802.1354370117188, + "completion_length/incorrect": 802.1354370117188, + "completion_length/incorrect/max": 1024.0, + "completion_length/incorrect/median": 1024.0, + "completion_length/incorrect/min": 4.0, + "completion_length/incorrect/p25": 835.25, + "completion_length/incorrect/p75": 1024.0, + "completion_length/incorrect/var": 151314.796875, + "completion_length/max": 1024.0, + "completion_length/median": 1024.0, + "completion_length/min": 4.0, + "completion_length/p25": 835.25, + "completion_length/p75": 1024.0, + "completion_length/var": 151314.796875, + "epoch": 0.16, + "feature_vector_variance/max_squared_error": 173819.859375, + "feature_vector_variance/metric": 27491.94140625, + "generated_tokens/total": 7132297.0, + "global_fisher_curvature": 0.0, + "global_fisher_curvature/max": 0.0, + "global_fisher_curvature/median": 0.0, + "global_fisher_curvature/min": 0.0, + "global_fisher_curvature/p25": 0.0, + "global_fisher_curvature/p75": 0.0, + "global_fisher_curvature/p85": 0.0, + "global_fisher_curvature/p90": 0.0, + "global_fisher_curvature/p95": 0.0, + "global_fisher_curvature/p99": 0.0, + "global_fisher_curvature/var": NaN, + "global_fisher_kl_divergence": 0.0, + "global_fisher_kl_divergence/max": 0.0, + "global_fisher_kl_divergence/median": 0.0, + "global_fisher_kl_divergence/min": 0.0, + "global_fisher_kl_divergence/p25": 0.0, + "global_fisher_kl_divergence/p75": 0.0, + "global_fisher_kl_divergence/p85": 0.0, + "global_fisher_kl_divergence/p90": 0.0, + "global_fisher_kl_divergence/p95": 0.0, + "global_fisher_kl_divergence/p99": 0.0, + "global_fisher_kl_divergence/var": NaN, + "global_full_update_term": 0.0, + "global_full_update_term/max": 0.0, + "global_full_update_term/median": 0.0, + "global_full_update_term/min": 0.0, + "global_full_update_term/p25": 0.0, + "global_full_update_term/p75": 0.0, + "global_full_update_term/p85": 0.0, + "global_full_update_term/p90": 0.0, + "global_full_update_term/p95": 0.0, + "global_full_update_term/p99": 0.0, + "global_full_update_term/var": NaN, + "global_hessian_coeff": 0.0, + "global_hessian_coeff/max": 0.0, + "global_hessian_coeff/median": 0.0, + "global_hessian_coeff/min": 0.0, + "global_hessian_coeff/p25": 0.0, + "global_hessian_coeff/p75": 0.0, + "global_hessian_coeff/p99": 0.0, + "global_hessian_coeff/var": NaN, + "global_hessian_coeff_abs": 0.0, + "global_hessian_coeff_abs/max": 0.0, + "global_hessian_coeff_abs/median": 0.0, + "global_hessian_coeff_abs/min": 0.0, + "global_hessian_coeff_abs/p25": 0.0, + "global_hessian_coeff_abs/p75": 0.0, + "global_hessian_coeff_abs/p99": 0.0, + "global_hessian_coeff_abs/var": NaN, + "grad_norm": 0.0, + "grouped_std_rewards": 0.0, + "learning_rate": 0.0, + "loss": 0.0, + "mean_logprobs": -2.46875, + "mean_logprobs/var": 2.125, + "num_completions/total": 9600, + "per_sentence_gradient_norm": 0.0, + "per_sentence_gradient_norm/max": 0.0, + "per_sentence_gradient_norm/median": 0.0, + "per_sentence_gradient_norm/min": 0.0, + "per_sentence_gradient_norm/p25": 0.0, + "per_sentence_gradient_norm/p75": 0.0, + "per_sentence_gradient_norm/var": 0.0, + "per_token_feature_norm": 249.1795196533203, + "per_token_feature_norm/max": 400.0, + "per_token_feature_norm/median": 252.0, + "per_token_feature_norm/min": 126.0, + "per_token_feature_norm/p25": 211.0, + "per_token_feature_norm/p75": 286.0, + "per_token_feature_norm/var": 2691.851806640625, + "per_token_gradient_norm": 0.0, + "per_token_gradient_norm/max": 0.0, + "per_token_gradient_norm/median": 0.0, + "per_token_gradient_norm/min": 0.0, + "per_token_gradient_norm/p1": 0.0, + "per_token_gradient_norm/p10": 0.0, + "per_token_gradient_norm/p25": 0.0, + "per_token_gradient_norm/p5": 0.0, + "per_token_gradient_norm/p75": 0.0, + "per_token_gradient_norm/var": 0.0, + "per_token_policy_error_norm": 0.6008676290512085, + "per_token_policy_error_norm/max": 2.0, + "per_token_policy_error_norm/median": 0.8671875, + "per_token_policy_error_norm/min": 0.0, + "per_token_policy_error_norm/p25": 0.03125, + "per_token_policy_error_norm/p75": 0.984375, + "per_token_policy_error_norm/var": 0.21107588708400726, + "policy_entropy": 1.6934446096420288, + "policy_entropy/max": 3.84375, + "policy_entropy/median": 2.296875, + "policy_entropy/min": 0.0054931640625, + "policy_entropy/p25": 0.16796875, + "policy_entropy/p75": 2.59375, + "policy_entropy/var": 1.5732873678207397, + "policy_loss": 0.0, + "policy_loss/max": 0.0, + "policy_loss/median": 0.0, + "policy_loss/min": 0.0, + "policy_loss/p25": 0.0, + "policy_loss/p75": 0.0, + "policy_loss/var": 0.0, + "policy_sharpness": 0.3807973265647888, + "policy_sharpness/max": 10.0, + "policy_sharpness/median": 0.1355387270450592, + "policy_sharpness/min": 0.019281210377812386, + "policy_sharpness/p25": 0.09476514160633087, + "policy_sharpness/p75": 0.3637179434299469, + "policy_sharpness/var": 0.8657097220420837, + "reward": 0.0, + "reward/max": 0.0, + "reward/median": 0.0, + "reward/min": 0.0, + "reward/p25": 0.0, + "reward/p75": 0.0, + "reward/var": 0.0, + "rewards/accuracy_reward": 0.0, + "rewards/accuracy_reward/max": 0.0, + "rewards/accuracy_reward/median": 0.0, + "rewards/accuracy_reward/min": 0.0, + "rewards/accuracy_reward/p25": 0.0, + "rewards/accuracy_reward/p75": 0.0, + "rewards/accuracy_reward/var": 0.0, + "sentence_fisher_curvature": 0.0, + "sentence_fisher_curvature/max": 0.0, + "sentence_fisher_curvature/median": 0.0, + "sentence_fisher_curvature/min": 0.0, + "sentence_fisher_curvature/p25": 0.0, + "sentence_fisher_curvature/p75": 0.0, + "sentence_fisher_curvature/p85": 0.0, + "sentence_fisher_curvature/p90": 0.0, + "sentence_fisher_curvature/p95": 0.0, + "sentence_fisher_curvature/p99": 0.0, + "sentence_fisher_curvature/var": 0.0, + "sentence_fisher_kl_divergence": 0.0, + "sentence_fisher_kl_divergence/max": 0.0, + "sentence_fisher_kl_divergence/median": 0.0, + "sentence_fisher_kl_divergence/min": 0.0, + "sentence_fisher_kl_divergence/p25": 0.0, + "sentence_fisher_kl_divergence/p75": 0.0, + "sentence_fisher_kl_divergence/p85": 0.0, + "sentence_fisher_kl_divergence/p90": 0.0, + "sentence_fisher_kl_divergence/p95": 0.0, + "sentence_fisher_kl_divergence/p99": 0.0, + "sentence_fisher_kl_divergence/var": 0.0, + "sentence_full_gradient_variance/max_squared_error": 0.0, + "sentence_full_gradient_variance/metric": 0.0, + "sentence_full_gradient_variance/p75": 0.0, + "sentence_full_gradient_variance/p90": 0.0, + "sentence_full_gradient_variance/p95": 0.0, + "sentence_full_gradient_variance/p99": 0.0, + "sentence_full_update_term": 0.0, + "sentence_full_update_term/max": 0.0, + "sentence_full_update_term/median": 0.0, + "sentence_full_update_term/min": 0.0, + "sentence_full_update_term/p25": 0.0, + "sentence_full_update_term/p75": 0.0, + "sentence_full_update_term/p85": 0.0, + "sentence_full_update_term/p90": 0.0, + "sentence_full_update_term/p95": 0.0, + "sentence_full_update_term/p99": 0.0, + "sentence_full_update_term/var": 0.0, + "sentence_hessian_coeff": 0.0, + "sentence_hessian_coeff/max": 0.0, + "sentence_hessian_coeff/median": 0.0, + "sentence_hessian_coeff/min": 0.0, + "sentence_hessian_coeff/p25": 0.0, + "sentence_hessian_coeff/p75": 0.0, + "sentence_hessian_coeff/p99": 0.0, + "sentence_hessian_coeff/var": 0.0, + "sentence_hessian_coeff_abs": 0.0, + "sentence_hessian_coeff_abs/max": 0.0, + "sentence_hessian_coeff_abs/median": 0.0, + "sentence_hessian_coeff_abs/min": 0.0, + "sentence_hessian_coeff_abs/p25": 0.0, + "sentence_hessian_coeff_abs/p75": 0.0, + "sentence_hessian_coeff_abs/p99": 0.0, + "sentence_hessian_coeff_abs/var": 0.0, + "step": 100, + "token_fisher_curvature": 0.0, + "token_fisher_curvature/max": 0.0, + "token_fisher_curvature/median": 0.0, + "token_fisher_curvature/min": 0.0, + "token_fisher_curvature/p25": 0.0, + "token_fisher_curvature/p75": 0.0, + "token_fisher_curvature/p85": 0.0, + "token_fisher_curvature/p90": 0.0, + "token_fisher_curvature/p95": 0.0, + "token_fisher_curvature/p99": 0.0, + "token_fisher_curvature/var": 0.0, + "token_fisher_kl_divergence": 0.0, + "token_fisher_kl_divergence/max": 0.0, + "token_fisher_kl_divergence/median": 0.0, + "token_fisher_kl_divergence/min": 0.0, + "token_fisher_kl_divergence/p25": 0.0, + "token_fisher_kl_divergence/p75": 0.0, + "token_fisher_kl_divergence/p85": 0.0, + "token_fisher_kl_divergence/p90": 0.0, + "token_fisher_kl_divergence/p95": 0.0, + "token_fisher_kl_divergence/p99": 0.0, + "token_fisher_kl_divergence/var": 0.0, + "token_full_update_term": 0.0, + "token_full_update_term/max": 0.0, + "token_full_update_term/median": 0.0, + "token_full_update_term/min": 0.0, + "token_full_update_term/p25": 0.0, + "token_full_update_term/p75": 0.0, + "token_full_update_term/p85": 0.0, + "token_full_update_term/p90": 0.0, + "token_full_update_term/p95": 0.0, + "token_full_update_term/p99": 0.0, + "token_full_update_term/var": 0.0, + "token_hessian_coeff": 0.0, + "token_hessian_coeff/max": 0.0, + "token_hessian_coeff/median": 0.0, + "token_hessian_coeff/min": 0.0, + "token_hessian_coeff/p25": 0.0, + "token_hessian_coeff/p75": 0.0, + "token_hessian_coeff/p99": 0.0, + "token_hessian_coeff/var": 0.0, + "token_hessian_coeff_abs": 0.0, + "token_hessian_coeff_abs/max": 0.0, + "token_hessian_coeff_abs/median": 0.0, + "token_hessian_coeff_abs/min": 0.0, + "token_hessian_coeff_abs/p25": 0.0, + "token_hessian_coeff_abs/p75": 0.0, + "token_hessian_coeff_abs/p99": 0.0, + "token_hessian_coeff_abs/var": 0.0 + }, + { + "adam_stats/lm_head/lr_effective_max": 0.0, + "adam_stats/lm_head/lr_effective_mean": 0.0, + "adam_stats/lm_head/lr_effective_min": 0.0, + "adam_stats/lm_head/lr_effective_std": 0.0, + "adam_stats/lr_effective_max": 0.0, + "adam_stats/lr_effective_mean": 0.0, + "adam_stats/lr_effective_min": 0.0, + "adam_stats/m_t_max": 1.3752604672845337e-06, + "adam_stats/m_t_mean": -7.583707899399932e-15, + "adam_stats/m_t_min": -1.0993885553034488e-06, + "adam_stats/v_t_max": 2.6468340365681797e-05, + "adam_stats/v_t_mean": 5.7405078954242406e-12, + "adam_stats/v_t_min": 0.0, + "epoch": 0.16, + "step": 100, + "total_flos": 0.0, + "train_loss": -8.506079812731216e-10, + "train_runtime": 13270.9339, + "train_samples_per_second": 0.723, + "train_steps_per_second": 0.008 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": null +}