| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.6666666666666665, | |
| "eval_steps": 500, | |
| "global_step": 4000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 642.14, | |
| "completions/max_terminated_length": 570.34, | |
| "completions/mean_length": 420.1125, | |
| "completions/mean_terminated_length": 398.3580334472656, | |
| "completions/min_length": 250.16, | |
| "completions/min_terminated_length": 250.16, | |
| "epoch": 0.03333333333333333, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 4.710774621900829, | |
| "learning_rate": 9.918333333333334e-07, | |
| "loss": 0.0113, | |
| "num_tokens": 461361.0, | |
| "reward": 6.184720268249512, | |
| "reward_std": 1.6460176765918733, | |
| "rewards/accuracy_reward/mean": 0.31, | |
| "rewards/accuracy_reward/std": 0.393804452419281, | |
| "rewards/chart_type_reward/mean": 0.83, | |
| "rewards/chart_type_reward/std": 0.20554168462753297, | |
| "rewards/format_reward/mean": 1.31, | |
| "rewards/format_reward/std": 0.8051172530651093, | |
| "rewards/length_think_reward/mean": 1.116875, | |
| "rewards/length_think_reward/std": 0.35811117276549337, | |
| "rewards/num_token_reward/mean": 0.645, | |
| "rewards/num_token_reward/std": 0.4071964037418365, | |
| "rewards/process_style_reward/mean": 0.7380022585391999, | |
| "rewards/process_style_reward/std": 0.20786408737301826, | |
| "rewards/table_style_reward/mean": 1.2348430597782134, | |
| "rewards/table_style_reward/std": 0.6372630500793457, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.055, | |
| "completions/max_length": 627.98, | |
| "completions/max_terminated_length": 567.12, | |
| "completions/mean_length": 419.4975, | |
| "completions/mean_terminated_length": 399.8742425537109, | |
| "completions/min_length": 271.46, | |
| "completions/min_terminated_length": 271.46, | |
| "epoch": 0.06666666666666667, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.617454327420967, | |
| "learning_rate": 9.835e-07, | |
| "loss": 0.033, | |
| "num_tokens": 922132.0, | |
| "reward": 7.684444198608398, | |
| "reward_std": 1.0128395134210586, | |
| "rewards/accuracy_reward/mean": 0.3675, | |
| "rewards/accuracy_reward/std": 0.39272902250289915, | |
| "rewards/chart_type_reward/mean": 0.92, | |
| "rewards/chart_type_reward/std": 0.10311741709709167, | |
| "rewards/format_reward/mean": 1.81, | |
| "rewards/format_reward/std": 0.39947535157203673, | |
| "rewards/length_think_reward/mean": 1.36, | |
| "rewards/length_think_reward/std": 0.18411283910274506, | |
| "rewards/num_token_reward/mean": 0.8975, | |
| "rewards/num_token_reward/std": 0.21008866012096405, | |
| "rewards/process_style_reward/mean": 0.8522701609134674, | |
| "rewards/process_style_reward/std": 0.2383432410657406, | |
| "rewards/table_style_reward/mean": 1.4771740126609803, | |
| "rewards/table_style_reward/std": 0.538226346373558, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.055, | |
| "completions/max_length": 663.8, | |
| "completions/max_terminated_length": 622.22, | |
| "completions/mean_length": 444.205, | |
| "completions/mean_terminated_length": 427.1011260986328, | |
| "completions/min_length": 294.06, | |
| "completions/min_terminated_length": 294.06, | |
| "epoch": 0.1, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 2.298847294243072, | |
| "learning_rate": 9.751666666666666e-07, | |
| "loss": 0.0272, | |
| "num_tokens": 1393874.0, | |
| "reward": 8.196055917739868, | |
| "reward_std": 0.8822205343842506, | |
| "rewards/accuracy_reward/mean": 0.405, | |
| "rewards/accuracy_reward/std": 0.4406168383359909, | |
| "rewards/chart_type_reward/mean": 0.91, | |
| "rewards/chart_type_reward/std": 0.10892393231391907, | |
| "rewards/format_reward/mean": 1.85, | |
| "rewards/format_reward/std": 0.3395550119876862, | |
| "rewards/length_think_reward/mean": 1.48125, | |
| "rewards/length_think_reward/std": 0.04037815436720848, | |
| "rewards/num_token_reward/mean": 0.9275, | |
| "rewards/num_token_reward/std": 0.16868472278118132, | |
| "rewards/process_style_reward/mean": 1.0046203970909118, | |
| "rewards/process_style_reward/std": 0.28071307986974714, | |
| "rewards/table_style_reward/mean": 1.617685569524765, | |
| "rewards/table_style_reward/std": 0.43435441348701714, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0375, | |
| "completions/max_length": 590.62, | |
| "completions/max_terminated_length": 560.86, | |
| "completions/mean_length": 419.71, | |
| "completions/mean_terminated_length": 407.2615026855469, | |
| "completions/min_length": 300.28, | |
| "completions/min_terminated_length": 300.28, | |
| "epoch": 0.13333333333333333, | |
| "frac_reward_zero_std": 0.04, | |
| "grad_norm": 5.01614642751583, | |
| "learning_rate": 9.668333333333332e-07, | |
| "loss": 0.018, | |
| "num_tokens": 1854582.0, | |
| "reward": 8.454336786270142, | |
| "reward_std": 0.7186576825380325, | |
| "rewards/accuracy_reward/mean": 0.445, | |
| "rewards/accuracy_reward/std": 0.37084252953529356, | |
| "rewards/chart_type_reward/mean": 0.8825, | |
| "rewards/chart_type_reward/std": 0.13139761447906495, | |
| "rewards/format_reward/mean": 1.875, | |
| "rewards/format_reward/std": 0.25387449383735655, | |
| "rewards/length_think_reward/mean": 1.491875, | |
| "rewards/length_think_reward/std": 0.016481903940439226, | |
| "rewards/num_token_reward/mean": 0.935, | |
| "rewards/num_token_reward/std": 0.12912438035011292, | |
| "rewards/process_style_reward/mean": 1.1697046744823456, | |
| "rewards/process_style_reward/std": 0.3359800568223, | |
| "rewards/table_style_reward/mean": 1.6552570796012878, | |
| "rewards/table_style_reward/std": 0.4633850826323032, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0125, | |
| "completions/max_length": 520.38, | |
| "completions/max_terminated_length": 496.48, | |
| "completions/mean_length": 381.645, | |
| "completions/mean_terminated_length": 376.8010729980469, | |
| "completions/min_length": 291.64, | |
| "completions/min_terminated_length": 291.64, | |
| "epoch": 0.16666666666666666, | |
| "frac_reward_zero_std": 0.03, | |
| "grad_norm": 3.8768085436240014, | |
| "learning_rate": 9.585e-07, | |
| "loss": 0.0049, | |
| "num_tokens": 2300628.0, | |
| "reward": 8.882754230499268, | |
| "reward_std": 0.7584551328420639, | |
| "rewards/accuracy_reward/mean": 0.6175, | |
| "rewards/accuracy_reward/std": 0.41537604093551633, | |
| "rewards/chart_type_reward/mean": 0.905, | |
| "rewards/chart_type_reward/std": 0.11220384895801544, | |
| "rewards/format_reward/mean": 1.93, | |
| "rewards/format_reward/std": 0.17845415830612182, | |
| "rewards/length_think_reward/mean": 1.49375, | |
| "rewards/length_think_reward/std": 0.01767767071723938, | |
| "rewards/num_token_reward/mean": 0.965, | |
| "rewards/num_token_reward/std": 0.08922707915306091, | |
| "rewards/process_style_reward/mean": 1.2509180808067322, | |
| "rewards/process_style_reward/std": 0.35428043991327285, | |
| "rewards/table_style_reward/mean": 1.7205862057209016, | |
| "rewards/table_style_reward/std": 0.3823927499353886, | |
| "step": 250 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.005, | |
| "completions/max_length": 476.06, | |
| "completions/max_terminated_length": 464.94, | |
| "completions/mean_length": 359.98, | |
| "completions/mean_terminated_length": 357.9764294433594, | |
| "completions/min_length": 277.3, | |
| "completions/min_terminated_length": 277.3, | |
| "epoch": 0.2, | |
| "frac_reward_zero_std": 0.03, | |
| "grad_norm": 4.04444430042266, | |
| "learning_rate": 9.501666666666667e-07, | |
| "loss": -0.0013, | |
| "num_tokens": 2738260.0, | |
| "reward": 9.100077533721924, | |
| "reward_std": 0.5911612424254418, | |
| "rewards/accuracy_reward/mean": 0.64, | |
| "rewards/accuracy_reward/std": 0.38784352123737337, | |
| "rewards/chart_type_reward/mean": 0.9475, | |
| "rewards/chart_type_reward/std": 0.06052331507205963, | |
| "rewards/format_reward/mean": 1.96, | |
| "rewards/format_reward/std": 0.10336921453475952, | |
| "rewards/length_think_reward/mean": 1.489375, | |
| "rewards/length_think_reward/std": 0.026885675489902495, | |
| "rewards/num_token_reward/mean": 0.98, | |
| "rewards/num_token_reward/std": 0.05168460726737976, | |
| "rewards/process_style_reward/mean": 1.3212374341487885, | |
| "rewards/process_style_reward/std": 0.343754044175148, | |
| "rewards/table_style_reward/mean": 1.7619650173187256, | |
| "rewards/table_style_reward/std": 0.41162539228796957, | |
| "step": 300 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0025, | |
| "completions/max_length": 433.68, | |
| "completions/max_terminated_length": 429.78, | |
| "completions/mean_length": 335.8, | |
| "completions/mean_terminated_length": 334.99607177734373, | |
| "completions/min_length": 257.7, | |
| "completions/min_terminated_length": 257.7, | |
| "epoch": 0.23333333333333334, | |
| "frac_reward_zero_std": 0.07, | |
| "grad_norm": 1.820258234118004, | |
| "learning_rate": 9.418333333333332e-07, | |
| "loss": -0.0019, | |
| "num_tokens": 3165744.0, | |
| "reward": 9.284861507415771, | |
| "reward_std": 0.4822974817454815, | |
| "rewards/accuracy_reward/mean": 0.715, | |
| "rewards/accuracy_reward/std": 0.3768920677900314, | |
| "rewards/chart_type_reward/mean": 0.955, | |
| "rewards/chart_type_reward/std": 0.05690393328666687, | |
| "rewards/format_reward/mean": 1.99, | |
| "rewards/format_reward/std": 0.028284270763397217, | |
| "rewards/length_think_reward/mean": 1.49125, | |
| "rewards/length_think_reward/std": 0.01931762829422951, | |
| "rewards/num_token_reward/mean": 0.9925, | |
| "rewards/num_token_reward/std": 0.021213203072547912, | |
| "rewards/process_style_reward/mean": 1.3195704579353333, | |
| "rewards/process_style_reward/std": 0.3678068408370018, | |
| "rewards/table_style_reward/mean": 1.821540994644165, | |
| "rewards/table_style_reward/std": 0.3932980696856976, | |
| "step": 350 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.005, | |
| "completions/max_length": 433.7, | |
| "completions/max_terminated_length": 419.2, | |
| "completions/mean_length": 331.6575, | |
| "completions/mean_terminated_length": 329.5703582763672, | |
| "completions/min_length": 261.88, | |
| "completions/min_terminated_length": 261.88, | |
| "epoch": 0.26666666666666666, | |
| "frac_reward_zero_std": 0.04, | |
| "grad_norm": 3.996548131664497, | |
| "learning_rate": 9.334999999999999e-07, | |
| "loss": 0.0016, | |
| "num_tokens": 3591319.0, | |
| "reward": 9.078651866912843, | |
| "reward_std": 0.5775595012307168, | |
| "rewards/accuracy_reward/mean": 0.6775, | |
| "rewards/accuracy_reward/std": 0.360657674074173, | |
| "rewards/chart_type_reward/mean": 0.8875, | |
| "rewards/chart_type_reward/std": 0.10328511297702789, | |
| "rewards/format_reward/mean": 1.95, | |
| "rewards/format_reward/std": 0.13165348529815674, | |
| "rewards/length_think_reward/mean": 1.48875, | |
| "rewards/length_think_reward/std": 0.021778347939252853, | |
| "rewards/num_token_reward/mean": 0.975, | |
| "rewards/num_token_reward/std": 0.06582674264907837, | |
| "rewards/process_style_reward/mean": 1.3236911845207215, | |
| "rewards/process_style_reward/std": 0.39317745611071586, | |
| "rewards/table_style_reward/mean": 1.776210721731186, | |
| "rewards/table_style_reward/std": 0.3345204618573189, | |
| "step": 400 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.005, | |
| "completions/max_length": 460.6, | |
| "completions/max_terminated_length": 458.46, | |
| "completions/mean_length": 363.545, | |
| "completions/mean_terminated_length": 361.9766668701172, | |
| "completions/min_length": 284.26, | |
| "completions/min_terminated_length": 284.26, | |
| "epoch": 0.3, | |
| "frac_reward_zero_std": 0.08, | |
| "grad_norm": 7.628999451616129, | |
| "learning_rate": 9.251666666666666e-07, | |
| "loss": 0.01, | |
| "num_tokens": 4030253.0, | |
| "reward": 9.122887582778931, | |
| "reward_std": 0.406713061761111, | |
| "rewards/accuracy_reward/mean": 0.74, | |
| "rewards/accuracy_reward/std": 0.33945011138916015, | |
| "rewards/chart_type_reward/mean": 0.865, | |
| "rewards/chart_type_reward/std": 0.13173707962036132, | |
| "rewards/format_reward/mean": 1.99, | |
| "rewards/format_reward/std": 0.01851640224456787, | |
| "rewards/length_think_reward/mean": 1.49375, | |
| "rewards/length_think_reward/std": 0.01767766922712326, | |
| "rewards/num_token_reward/mean": 0.995, | |
| "rewards/num_token_reward/std": 0.009258201122283935, | |
| "rewards/process_style_reward/mean": 1.2878882658481599, | |
| "rewards/process_style_reward/std": 0.34421144127845765, | |
| "rewards/table_style_reward/mean": 1.751249282360077, | |
| "rewards/table_style_reward/std": 0.3605493099242449, | |
| "step": 450 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 431.84, | |
| "completions/max_terminated_length": 431.84, | |
| "completions/mean_length": 340.835, | |
| "completions/mean_terminated_length": 340.835, | |
| "completions/min_length": 259.66, | |
| "completions/min_terminated_length": 259.66, | |
| "epoch": 0.3333333333333333, | |
| "frac_reward_zero_std": 0.03, | |
| "grad_norm": 3.667965991041236, | |
| "learning_rate": 9.168333333333333e-07, | |
| "loss": 0.0008, | |
| "num_tokens": 4459843.0, | |
| "reward": 9.339048709869385, | |
| "reward_std": 0.4699262708425522, | |
| "rewards/accuracy_reward/mean": 0.6875, | |
| "rewards/accuracy_reward/std": 0.3332241028547287, | |
| "rewards/chart_type_reward/mean": 0.925, | |
| "rewards/chart_type_reward/std": 0.09570688426494599, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.3655757594108582, | |
| "rewards/process_style_reward/std": 0.40603267412632704, | |
| "rewards/table_style_reward/mean": 1.860972990989685, | |
| "rewards/table_style_reward/std": 0.4130638699233532, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 476.36, | |
| "eval_completions/max_terminated_length": 476.36, | |
| "eval_completions/mean_length": 301.6075, | |
| "eval_completions/mean_terminated_length": 301.6075, | |
| "eval_completions/min_length": 198.44, | |
| "eval_completions/min_terminated_length": 198.44, | |
| "eval_frac_reward_zero_std": 0.495, | |
| "eval_loss": 0.0033517335541546345, | |
| "eval_num_tokens": 4459843.0, | |
| "eval_reward": 7.484729690551758, | |
| "eval_reward_std": 0.2337803066149354, | |
| "eval_rewards/accuracy_reward/mean": 0.80875, | |
| "eval_rewards/accuracy_reward/std": 0.3655660229921341, | |
| "eval_rewards/chart_type_reward/mean": 0.6275, | |
| "eval_rewards/chart_type_reward/std": 0.46189448714256287, | |
| "eval_rewards/format_reward/mean": 1.9775, | |
| "eval_rewards/format_reward/std": 0.09993488192558289, | |
| "eval_rewards/length_think_reward/mean": 1.5, | |
| "eval_rewards/length_think_reward/std": 0.0, | |
| "eval_rewards/num_token_reward/mean": 0.98875, | |
| "eval_rewards/num_token_reward/std": 0.049967440962791446, | |
| "eval_rewards/process_style_reward/mean": 0.8432698702812195, | |
| "eval_rewards/process_style_reward/std": 0.27860497415065766, | |
| "eval_rewards/table_style_reward/mean": 0.7389598202705383, | |
| "eval_rewards/table_style_reward/std": 0.0863262665271759, | |
| "eval_runtime": 357.3984, | |
| "eval_samples_per_second": 0.56, | |
| "eval_steps_per_second": 0.02, | |
| "step": 500 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 381.68, | |
| "completions/max_terminated_length": 381.68, | |
| "completions/mean_length": 302.93, | |
| "completions/mean_terminated_length": 302.93, | |
| "completions/min_length": 236.18, | |
| "completions/min_terminated_length": 236.18, | |
| "epoch": 0.36666666666666664, | |
| "frac_reward_zero_std": 0.04, | |
| "grad_norm": 4.242303838921506, | |
| "learning_rate": 9.085e-07, | |
| "loss": -0.0004, | |
| "num_tokens": 4874635.0, | |
| "reward": 9.339553604125976, | |
| "reward_std": 0.3770983973145485, | |
| "rewards/accuracy_reward/mean": 0.74, | |
| "rewards/accuracy_reward/std": 0.3310369694232941, | |
| "rewards/chart_type_reward/mean": 0.9175, | |
| "rewards/chart_type_reward/std": 0.09259466350078582, | |
| "rewards/format_reward/mean": 1.995, | |
| "rewards/format_reward/std": 0.014142135381698609, | |
| "rewards/length_think_reward/mean": 1.49875, | |
| "rewards/length_think_reward/std": 0.003535533845424652, | |
| "rewards/num_token_reward/mean": 0.9975, | |
| "rewards/num_token_reward/std": 0.007071067690849304, | |
| "rewards/process_style_reward/mean": 1.3991042220592498, | |
| "rewards/process_style_reward/std": 0.37223585724830627, | |
| "rewards/table_style_reward/mean": 1.791699321269989, | |
| "rewards/table_style_reward/std": 0.3688546184077859, | |
| "step": 550 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 382.36, | |
| "completions/max_terminated_length": 382.36, | |
| "completions/mean_length": 313.265, | |
| "completions/mean_terminated_length": 313.265, | |
| "completions/min_length": 249.0, | |
| "completions/min_terminated_length": 249.0, | |
| "epoch": 0.4, | |
| "frac_reward_zero_std": 0.06, | |
| "grad_norm": 2.6386198633761295, | |
| "learning_rate": 9.001666666666667e-07, | |
| "loss": -0.0002, | |
| "num_tokens": 5292769.0, | |
| "reward": 9.358322076797485, | |
| "reward_std": 0.45063421681523325, | |
| "rewards/accuracy_reward/mean": 0.7475, | |
| "rewards/accuracy_reward/std": 0.32666426956653594, | |
| "rewards/chart_type_reward/mean": 0.9275, | |
| "rewards/chart_type_reward/std": 0.08190421402454376, | |
| "rewards/format_reward/mean": 1.985, | |
| "rewards/format_reward/std": 0.03265853762626648, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 0.9925, | |
| "rewards/num_token_reward/std": 0.01632926881313324, | |
| "rewards/process_style_reward/mean": 1.4022967970371247, | |
| "rewards/process_style_reward/std": 0.3624314972758293, | |
| "rewards/table_style_reward/mean": 1.8035252857208253, | |
| "rewards/table_style_reward/std": 0.3688652907311916, | |
| "step": 600 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0475, | |
| "completions/max_length": 502.48, | |
| "completions/max_terminated_length": 479.38, | |
| "completions/mean_length": 390.065, | |
| "completions/mean_terminated_length": 376.88559692382813, | |
| "completions/min_length": 295.92, | |
| "completions/min_terminated_length": 295.92, | |
| "epoch": 0.43333333333333335, | |
| "frac_reward_zero_std": 0.08, | |
| "grad_norm": 1.6276070180776299, | |
| "learning_rate": 8.918333333333333e-07, | |
| "loss": 0.0067, | |
| "num_tokens": 5742743.0, | |
| "reward": 8.926296434402467, | |
| "reward_std": 0.6193729147315026, | |
| "rewards/accuracy_reward/mean": 0.7025, | |
| "rewards/accuracy_reward/std": 0.33726297795772553, | |
| "rewards/chart_type_reward/mean": 0.87, | |
| "rewards/chart_type_reward/std": 0.13173707962036132, | |
| "rewards/format_reward/mean": 1.88, | |
| "rewards/format_reward/std": 0.18836578965187073, | |
| "rewards/length_think_reward/mean": 1.476875, | |
| "rewards/length_think_reward/std": 0.04876347452402115, | |
| "rewards/num_token_reward/mean": 0.9325, | |
| "rewards/num_token_reward/std": 0.09855559468269348, | |
| "rewards/process_style_reward/mean": 1.3070782232284546, | |
| "rewards/process_style_reward/std": 0.3663827758282423, | |
| "rewards/table_style_reward/mean": 1.7573432433605194, | |
| "rewards/table_style_reward/std": 0.41278132781386373, | |
| "step": 650 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.04, | |
| "completions/max_length": 582.46, | |
| "completions/max_terminated_length": 561.88, | |
| "completions/mean_length": 474.75, | |
| "completions/mean_terminated_length": 462.6513354492188, | |
| "completions/min_length": 382.0, | |
| "completions/min_terminated_length": 382.0, | |
| "epoch": 0.4666666666666667, | |
| "frac_reward_zero_std": 0.03, | |
| "grad_norm": 3.113389277396801, | |
| "learning_rate": 8.834999999999999e-07, | |
| "loss": 0.0051, | |
| "num_tokens": 6226015.0, | |
| "reward": 9.087711658477783, | |
| "reward_std": 0.6245314812660218, | |
| "rewards/accuracy_reward/mean": 0.7075, | |
| "rewards/accuracy_reward/std": 0.3379362678527832, | |
| "rewards/chart_type_reward/mean": 0.875, | |
| "rewards/chart_type_reward/std": 0.1228942984342575, | |
| "rewards/format_reward/mean": 1.915, | |
| "rewards/format_reward/std": 0.14443274736404418, | |
| "rewards/length_think_reward/mean": 1.434375, | |
| "rewards/length_think_reward/std": 0.12082115039229394, | |
| "rewards/num_token_reward/mean": 0.9425, | |
| "rewards/num_token_reward/std": 0.11464277982711792, | |
| "rewards/process_style_reward/mean": 1.4090502178668975, | |
| "rewards/process_style_reward/std": 0.4188565620034933, | |
| "rewards/table_style_reward/mean": 1.8042864727973937, | |
| "rewards/table_style_reward/std": 0.367302486859262, | |
| "step": 700 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08, | |
| "completions/max_length": 669.88, | |
| "completions/max_terminated_length": 636.52, | |
| "completions/mean_length": 543.3925, | |
| "completions/mean_terminated_length": 523.3314343261719, | |
| "completions/min_length": 434.66, | |
| "completions/min_terminated_length": 434.66, | |
| "epoch": 0.5, | |
| "frac_reward_zero_std": 0.07, | |
| "grad_norm": 2.705954822615204, | |
| "learning_rate": 8.751666666666666e-07, | |
| "loss": 0.0099, | |
| "num_tokens": 6735800.0, | |
| "reward": 9.080023832321167, | |
| "reward_std": 0.6411656188964844, | |
| "rewards/accuracy_reward/mean": 0.695, | |
| "rewards/accuracy_reward/std": 0.3173846417665482, | |
| "rewards/chart_type_reward/mean": 0.9, | |
| "rewards/chart_type_reward/std": 0.10690449476242066, | |
| "rewards/format_reward/mean": 1.84, | |
| "rewards/format_reward/std": 0.2742329239845276, | |
| "rewards/length_think_reward/mean": 1.445625, | |
| "rewards/length_think_reward/std": 0.09453705742955208, | |
| "rewards/num_token_reward/mean": 0.9175, | |
| "rewards/num_token_reward/std": 0.1441875296831131, | |
| "rewards/process_style_reward/mean": 1.434592843055725, | |
| "rewards/process_style_reward/std": 0.4128647920489311, | |
| "rewards/table_style_reward/mean": 1.8473060631752014, | |
| "rewards/table_style_reward/std": 0.384682634845376, | |
| "step": 750 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0425, | |
| "completions/max_length": 555.72, | |
| "completions/max_terminated_length": 536.3, | |
| "completions/mean_length": 459.7675, | |
| "completions/mean_terminated_length": 450.3732165527344, | |
| "completions/min_length": 376.74, | |
| "completions/min_terminated_length": 376.74, | |
| "epoch": 0.5333333333333333, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 2.744641896369063, | |
| "learning_rate": 8.668333333333333e-07, | |
| "loss": 0.0041, | |
| "num_tokens": 7212935.0, | |
| "reward": 9.342824554443359, | |
| "reward_std": 0.41833467945456504, | |
| "rewards/accuracy_reward/mean": 0.7575, | |
| "rewards/accuracy_reward/std": 0.27658932030200956, | |
| "rewards/chart_type_reward/mean": 0.9025, | |
| "rewards/chart_type_reward/std": 0.10656502962112427, | |
| "rewards/format_reward/mean": 1.92, | |
| "rewards/format_reward/std": 0.11177420973777771, | |
| "rewards/length_think_reward/mean": 1.48875, | |
| "rewards/length_think_reward/std": 0.018290950953960418, | |
| "rewards/num_token_reward/mean": 0.9575, | |
| "rewards/num_token_reward/std": 0.06295817255973817, | |
| "rewards/process_style_reward/mean": 1.4902155125141143, | |
| "rewards/process_style_reward/std": 0.3751195715367794, | |
| "rewards/table_style_reward/mean": 1.8263590598106385, | |
| "rewards/table_style_reward/std": 0.3030733197927475, | |
| "step": 800 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.02, | |
| "completions/max_length": 507.02, | |
| "completions/max_terminated_length": 496.5, | |
| "completions/mean_length": 406.635, | |
| "completions/mean_terminated_length": 400.53512084960937, | |
| "completions/min_length": 317.5, | |
| "completions/min_terminated_length": 317.5, | |
| "epoch": 0.5666666666666667, | |
| "frac_reward_zero_std": 0.07, | |
| "grad_norm": 4.8860738140378, | |
| "learning_rate": 8.585e-07, | |
| "loss": 0.0018, | |
| "num_tokens": 7668721.0, | |
| "reward": 9.412762422561645, | |
| "reward_std": 0.4385050618648529, | |
| "rewards/accuracy_reward/mean": 0.705, | |
| "rewards/accuracy_reward/std": 0.333148148059845, | |
| "rewards/chart_type_reward/mean": 0.92, | |
| "rewards/chart_type_reward/std": 0.09823348283767701, | |
| "rewards/format_reward/mean": 1.955, | |
| "rewards/format_reward/std": 0.08232370734214783, | |
| "rewards/length_think_reward/mean": 1.490625, | |
| "rewards/length_think_reward/std": 0.018341146558523178, | |
| "rewards/num_token_reward/mean": 0.9775, | |
| "rewards/num_token_reward/std": 0.04116185367107392, | |
| "rewards/process_style_reward/mean": 1.4954944217205048, | |
| "rewards/process_style_reward/std": 0.4515664022415876, | |
| "rewards/table_style_reward/mean": 1.8691429781913758, | |
| "rewards/table_style_reward/std": 0.2963829467073083, | |
| "step": 850 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 464.54, | |
| "completions/max_terminated_length": 464.54, | |
| "completions/mean_length": 374.935, | |
| "completions/mean_terminated_length": 374.935, | |
| "completions/min_length": 296.92, | |
| "completions/min_terminated_length": 296.92, | |
| "epoch": 0.6, | |
| "frac_reward_zero_std": 0.09, | |
| "grad_norm": 1.8355684342913006, | |
| "learning_rate": 8.501666666666666e-07, | |
| "loss": 0.0025, | |
| "num_tokens": 8111331.0, | |
| "reward": 9.84747314453125, | |
| "reward_std": 0.36430649772286416, | |
| "rewards/accuracy_reward/mean": 0.7225, | |
| "rewards/accuracy_reward/std": 0.2612554532289505, | |
| "rewards/chart_type_reward/mean": 0.9475, | |
| "rewards/chart_type_reward/std": 0.0391424161195755, | |
| "rewards/format_reward/mean": 1.995, | |
| "rewards/format_reward/std": 0.014142135381698609, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 0.9975, | |
| "rewards/num_token_reward/std": 0.007071067690849304, | |
| "rewards/process_style_reward/mean": 1.6478522050380706, | |
| "rewards/process_style_reward/std": 0.39793143898248673, | |
| "rewards/table_style_reward/mean": 2.0371209740638734, | |
| "rewards/table_style_reward/std": 0.3056399393081665, | |
| "step": 900 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0075, | |
| "completions/max_length": 482.28, | |
| "completions/max_terminated_length": 480.06, | |
| "completions/mean_length": 378.5375, | |
| "completions/mean_terminated_length": 376.3790002441406, | |
| "completions/min_length": 300.54, | |
| "completions/min_terminated_length": 300.54, | |
| "epoch": 0.6333333333333333, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 1.3347305735332344, | |
| "learning_rate": 8.418333333333333e-07, | |
| "loss": 0.008, | |
| "num_tokens": 8555522.0, | |
| "reward": 9.62767692565918, | |
| "reward_std": 0.3843289668299258, | |
| "rewards/accuracy_reward/mean": 0.7425, | |
| "rewards/accuracy_reward/std": 0.31665275037288665, | |
| "rewards/chart_type_reward/mean": 0.93, | |
| "rewards/chart_type_reward/std": 0.07483314633369446, | |
| "rewards/format_reward/mean": 1.985, | |
| "rewards/format_reward/std": 0.02070196866989136, | |
| "rewards/length_think_reward/mean": 1.496875, | |
| "rewards/length_think_reward/std": 0.004580627083778381, | |
| "rewards/num_token_reward/mean": 0.9925, | |
| "rewards/num_token_reward/std": 0.01035098433494568, | |
| "rewards/process_style_reward/mean": 1.6256941604614257, | |
| "rewards/process_style_reward/std": 0.46362122789025306, | |
| "rewards/table_style_reward/mean": 1.8551077818870545, | |
| "rewards/table_style_reward/std": 0.3706050312891602, | |
| "step": 950 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.005, | |
| "completions/max_length": 462.0, | |
| "completions/max_terminated_length": 455.9, | |
| "completions/mean_length": 378.345, | |
| "completions/mean_terminated_length": 376.565, | |
| "completions/min_length": 300.4, | |
| "completions/min_terminated_length": 300.4, | |
| "epoch": 0.6666666666666666, | |
| "frac_reward_zero_std": 0.06, | |
| "grad_norm": 2.6667420842164096, | |
| "learning_rate": 8.334999999999999e-07, | |
| "loss": 0.0022, | |
| "num_tokens": 9000848.0, | |
| "reward": 9.415374546051025, | |
| "reward_std": 0.4214027213305235, | |
| "rewards/accuracy_reward/mean": 0.7225, | |
| "rewards/accuracy_reward/std": 0.2530916023254395, | |
| "rewards/chart_type_reward/mean": 0.895, | |
| "rewards/chart_type_reward/std": 0.12104663014411926, | |
| "rewards/format_reward/mean": 1.99, | |
| "rewards/format_reward/std": 0.01851640224456787, | |
| "rewards/length_think_reward/mean": 1.49375, | |
| "rewards/length_think_reward/std": 0.011572750806808472, | |
| "rewards/num_token_reward/mean": 0.995, | |
| "rewards/num_token_reward/std": 0.009258201122283935, | |
| "rewards/process_style_reward/mean": 1.488259848356247, | |
| "rewards/process_style_reward/std": 0.3761888966709375, | |
| "rewards/table_style_reward/mean": 1.8308646750450135, | |
| "rewards/table_style_reward/std": 0.3701925078779459, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.005, | |
| "eval_completions/max_length": 588.16, | |
| "eval_completions/max_terminated_length": 586.0, | |
| "eval_completions/mean_length": 377.64875, | |
| "eval_completions/mean_terminated_length": 375.7753918457031, | |
| "eval_completions/min_length": 247.76, | |
| "eval_completions/min_terminated_length": 247.76, | |
| "eval_frac_reward_zero_std": 0.645, | |
| "eval_loss": 0.001064616721123457, | |
| "eval_num_tokens": 9000848.0, | |
| "eval_reward": 7.492321758270264, | |
| "eval_reward_std": 0.13835911433212458, | |
| "eval_rewards/accuracy_reward/mean": 0.79625, | |
| "eval_rewards/accuracy_reward/std": 0.3533613955974579, | |
| "eval_rewards/chart_type_reward/mean": 0.61375, | |
| "eval_rewards/chart_type_reward/std": 0.46422410249710083, | |
| "eval_rewards/format_reward/mean": 1.9875, | |
| "eval_rewards/format_reward/std": 0.05197583675384521, | |
| "eval_rewards/length_think_reward/mean": 1.49625, | |
| "eval_rewards/length_think_reward/std": 0.019564387649297715, | |
| "eval_rewards/num_token_reward/mean": 0.9925, | |
| "eval_rewards/num_token_reward/std": 0.03305898606777191, | |
| "eval_rewards/process_style_reward/mean": 0.8522592496871948, | |
| "eval_rewards/process_style_reward/std": 0.25637542963027954, | |
| "eval_rewards/table_style_reward/mean": 0.7538124990463256, | |
| "eval_rewards/table_style_reward/std": 0.035973691046237946, | |
| "eval_runtime": 425.3173, | |
| "eval_samples_per_second": 0.47, | |
| "eval_steps_per_second": 0.016, | |
| "step": 1000 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01, | |
| "completions/max_length": 489.34, | |
| "completions/max_terminated_length": 481.02, | |
| "completions/mean_length": 393.5375, | |
| "completions/mean_terminated_length": 389.2625, | |
| "completions/min_length": 313.12, | |
| "completions/min_terminated_length": 313.12, | |
| "epoch": 0.7, | |
| "frac_reward_zero_std": 0.06, | |
| "grad_norm": 2.333315545051143, | |
| "learning_rate": 8.251666666666667e-07, | |
| "loss": 0.0042, | |
| "num_tokens": 9451267.0, | |
| "reward": 9.452465152740478, | |
| "reward_std": 0.3454449198395014, | |
| "rewards/accuracy_reward/mean": 0.695, | |
| "rewards/accuracy_reward/std": 0.2591600608825684, | |
| "rewards/chart_type_reward/mean": 0.8875, | |
| "rewards/chart_type_reward/std": 0.08190421402454376, | |
| "rewards/format_reward/mean": 1.98, | |
| "rewards/format_reward/std": 0.021380898952484132, | |
| "rewards/length_think_reward/mean": 1.49125, | |
| "rewards/length_think_reward/std": 0.010938137769699097, | |
| "rewards/num_token_reward/mean": 0.99, | |
| "rewards/num_token_reward/std": 0.010690449476242066, | |
| "rewards/process_style_reward/mean": 1.5581933534145356, | |
| "rewards/process_style_reward/std": 0.46272379651665685, | |
| "rewards/table_style_reward/mean": 1.8505217921733856, | |
| "rewards/table_style_reward/std": 0.37696966528892517, | |
| "step": 1050 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 496.34, | |
| "completions/max_terminated_length": 496.34, | |
| "completions/mean_length": 406.19, | |
| "completions/mean_terminated_length": 406.19, | |
| "completions/min_length": 335.7, | |
| "completions/min_terminated_length": 335.7, | |
| "epoch": 0.7333333333333333, | |
| "frac_reward_zero_std": 0.09, | |
| "grad_norm": 3.6502729861830896, | |
| "learning_rate": 8.168333333333333e-07, | |
| "loss": 0.0033, | |
| "num_tokens": 9907007.0, | |
| "reward": 9.746276054382324, | |
| "reward_std": 0.30796022541588175, | |
| "rewards/accuracy_reward/mean": 0.795, | |
| "rewards/accuracy_reward/std": 0.25512682616710664, | |
| "rewards/chart_type_reward/mean": 0.94, | |
| "rewards/chart_type_reward/std": 0.06127820014953613, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5490337193012238, | |
| "rewards/process_style_reward/std": 0.4507550221681595, | |
| "rewards/table_style_reward/mean": 1.962242330312729, | |
| "rewards/table_style_reward/std": 0.3748661072552204, | |
| "step": 1100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 463.02, | |
| "completions/max_terminated_length": 463.02, | |
| "completions/mean_length": 380.8375, | |
| "completions/mean_terminated_length": 380.8375, | |
| "completions/min_length": 315.86, | |
| "completions/min_terminated_length": 315.86, | |
| "epoch": 0.7666666666666667, | |
| "frac_reward_zero_std": 0.08, | |
| "grad_norm": 1.6467571445573885, | |
| "learning_rate": 8.085e-07, | |
| "loss": 0.0081, | |
| "num_tokens": 10352734.0, | |
| "reward": 9.6218558883667, | |
| "reward_std": 0.378871104568243, | |
| "rewards/accuracy_reward/mean": 0.75, | |
| "rewards/accuracy_reward/std": 0.27910871148109434, | |
| "rewards/chart_type_reward/mean": 0.92, | |
| "rewards/chart_type_reward/std": 0.08552359580993653, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.496875, | |
| "rewards/length_think_reward/std": 0.00883883535861969, | |
| "rewards/num_token_reward/mean": 0.995, | |
| "rewards/num_token_reward/std": 0.014142135381698609, | |
| "rewards/process_style_reward/mean": 1.5185503327846528, | |
| "rewards/process_style_reward/std": 0.408857840411365, | |
| "rewards/table_style_reward/mean": 1.9414305424690246, | |
| "rewards/table_style_reward/std": 0.4009686389937997, | |
| "step": 1150 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015, | |
| "completions/max_length": 441.24, | |
| "completions/max_terminated_length": 433.9, | |
| "completions/mean_length": 362.795, | |
| "completions/mean_terminated_length": 357.56416687011716, | |
| "completions/min_length": 293.56, | |
| "completions/min_terminated_length": 293.56, | |
| "epoch": 0.8, | |
| "frac_reward_zero_std": 0.06, | |
| "grad_norm": 3.0683170001047517, | |
| "learning_rate": 8.001666666666667e-07, | |
| "loss": -0.0007, | |
| "num_tokens": 10790708.0, | |
| "reward": 9.64956069946289, | |
| "reward_std": 0.394139921143651, | |
| "rewards/accuracy_reward/mean": 0.72, | |
| "rewards/accuracy_reward/std": 0.3199311000108719, | |
| "rewards/chart_type_reward/mean": 0.91, | |
| "rewards/chart_type_reward/std": 0.09427463591098785, | |
| "rewards/format_reward/mean": 1.97, | |
| "rewards/format_reward/std": 0.039897301197052, | |
| "rewards/length_think_reward/mean": 1.4975, | |
| "rewards/length_think_reward/std": 0.004629100561141968, | |
| "rewards/num_token_reward/mean": 0.985, | |
| "rewards/num_token_reward/std": 0.019948650598526, | |
| "rewards/process_style_reward/mean": 1.6004662060737609, | |
| "rewards/process_style_reward/std": 0.36032520439475774, | |
| "rewards/table_style_reward/mean": 1.966594467163086, | |
| "rewards/table_style_reward/std": 0.3408663283288479, | |
| "step": 1200 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0275, | |
| "completions/max_length": 510.16, | |
| "completions/max_terminated_length": 501.4, | |
| "completions/mean_length": 409.38, | |
| "completions/mean_terminated_length": 402.9113104248047, | |
| "completions/min_length": 333.52, | |
| "completions/min_terminated_length": 333.52, | |
| "epoch": 0.8333333333333334, | |
| "frac_reward_zero_std": 0.13, | |
| "grad_norm": 4.893266426475079, | |
| "learning_rate": 7.918333333333333e-07, | |
| "loss": -0.0018, | |
| "num_tokens": 11247124.0, | |
| "reward": 9.399707975387573, | |
| "reward_std": 0.4224707083776593, | |
| "rewards/accuracy_reward/mean": 0.705, | |
| "rewards/accuracy_reward/std": 0.3079745310544968, | |
| "rewards/chart_type_reward/mean": 0.91, | |
| "rewards/chart_type_reward/std": 0.09621404528617859, | |
| "rewards/format_reward/mean": 1.945, | |
| "rewards/format_reward/std": 0.09107224106788635, | |
| "rewards/length_think_reward/mean": 1.486875, | |
| "rewards/length_think_reward/std": 0.025552249252796172, | |
| "rewards/num_token_reward/mean": 0.9725, | |
| "rewards/num_token_reward/std": 0.045536120533943174, | |
| "rewards/process_style_reward/mean": 1.527708315849304, | |
| "rewards/process_style_reward/std": 0.4601225584745407, | |
| "rewards/table_style_reward/mean": 1.8526247000694276, | |
| "rewards/table_style_reward/std": 0.363852179646492, | |
| "step": 1250 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0325, | |
| "completions/max_length": 504.7, | |
| "completions/max_terminated_length": 489.56, | |
| "completions/mean_length": 413.09, | |
| "completions/mean_terminated_length": 403.423857421875, | |
| "completions/min_length": 329.26, | |
| "completions/min_terminated_length": 329.26, | |
| "epoch": 0.8666666666666667, | |
| "frac_reward_zero_std": 0.08, | |
| "grad_norm": 4.064899633242494, | |
| "learning_rate": 7.834999999999999e-07, | |
| "loss": 0.0022, | |
| "num_tokens": 11706160.0, | |
| "reward": 9.2975998878479, | |
| "reward_std": 0.42783591762185097, | |
| "rewards/accuracy_reward/mean": 0.68, | |
| "rewards/accuracy_reward/std": 0.3520024484395981, | |
| "rewards/chart_type_reward/mean": 0.9075, | |
| "rewards/chart_type_reward/std": 0.10328511297702789, | |
| "rewards/format_reward/mean": 1.935, | |
| "rewards/format_reward/std": 0.09544337391853333, | |
| "rewards/length_think_reward/mean": 1.488125, | |
| "rewards/length_think_reward/std": 0.019788713902235033, | |
| "rewards/num_token_reward/mean": 0.9675, | |
| "rewards/num_token_reward/std": 0.047721686959266665, | |
| "rewards/process_style_reward/mean": 1.474567185640335, | |
| "rewards/process_style_reward/std": 0.4061433684825897, | |
| "rewards/table_style_reward/mean": 1.8449076747894286, | |
| "rewards/table_style_reward/std": 0.3341277042776346, | |
| "step": 1300 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025, | |
| "completions/max_length": 530.7, | |
| "completions/max_terminated_length": 514.46, | |
| "completions/mean_length": 428.025, | |
| "completions/mean_terminated_length": 420.09643005371095, | |
| "completions/min_length": 341.4, | |
| "completions/min_terminated_length": 341.4, | |
| "epoch": 0.9, | |
| "frac_reward_zero_std": 0.07, | |
| "grad_norm": 2.5562933049105356, | |
| "learning_rate": 7.751666666666666e-07, | |
| "loss": 0.0079, | |
| "num_tokens": 12170214.0, | |
| "reward": 9.505103206634521, | |
| "reward_std": 0.33932688400149347, | |
| "rewards/accuracy_reward/mean": 0.7425, | |
| "rewards/accuracy_reward/std": 0.28146197378635407, | |
| "rewards/chart_type_reward/mean": 0.895, | |
| "rewards/chart_type_reward/std": 0.11616269588470458, | |
| "rewards/format_reward/mean": 1.945, | |
| "rewards/format_reward/std": 0.1008401095867157, | |
| "rewards/length_think_reward/mean": 1.495, | |
| "rewards/length_think_reward/std": 0.007967560291290284, | |
| "rewards/num_token_reward/mean": 0.9725, | |
| "rewards/num_token_reward/std": 0.05042005479335785, | |
| "rewards/process_style_reward/mean": 1.5487450003623962, | |
| "rewards/process_style_reward/std": 0.460056491792202, | |
| "rewards/table_style_reward/mean": 1.9063581895828248, | |
| "rewards/table_style_reward/std": 0.34742515232414006, | |
| "step": 1350 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0075, | |
| "completions/max_length": 482.48, | |
| "completions/max_terminated_length": 480.16, | |
| "completions/mean_length": 397.7925, | |
| "completions/mean_terminated_length": 396.51416748046876, | |
| "completions/min_length": 326.3, | |
| "completions/min_terminated_length": 326.3, | |
| "epoch": 0.9333333333333333, | |
| "frac_reward_zero_std": 0.14, | |
| "grad_norm": 2.5170689556650028, | |
| "learning_rate": 7.668333333333333e-07, | |
| "loss": 0.0022, | |
| "num_tokens": 12621947.0, | |
| "reward": 9.625234622955322, | |
| "reward_std": 0.38073745464906095, | |
| "rewards/accuracy_reward/mean": 0.775, | |
| "rewards/accuracy_reward/std": 0.24745278298854828, | |
| "rewards/chart_type_reward/mean": 0.9275, | |
| "rewards/chart_type_reward/std": 0.08190421402454376, | |
| "rewards/format_reward/mean": 1.975, | |
| "rewards/format_reward/std": 0.0609428083896637, | |
| "rewards/length_think_reward/mean": 1.499375, | |
| "rewards/length_think_reward/std": 0.001767766922712326, | |
| "rewards/num_token_reward/mean": 0.9875, | |
| "rewards/num_token_reward/std": 0.03047140419483185, | |
| "rewards/process_style_reward/mean": 1.5226418220996856, | |
| "rewards/process_style_reward/std": 0.409429362565279, | |
| "rewards/table_style_reward/mean": 1.9382178807258605, | |
| "rewards/table_style_reward/std": 0.3078057858347893, | |
| "step": 1400 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01, | |
| "completions/max_length": 460.66, | |
| "completions/max_terminated_length": 455.88, | |
| "completions/mean_length": 374.6425, | |
| "completions/mean_terminated_length": 372.0279779052734, | |
| "completions/min_length": 303.96, | |
| "completions/min_terminated_length": 303.96, | |
| "epoch": 0.9666666666666667, | |
| "frac_reward_zero_std": 0.11, | |
| "grad_norm": 3.2253024096985468, | |
| "learning_rate": 7.584999999999999e-07, | |
| "loss": 0.0018, | |
| "num_tokens": 13064872.0, | |
| "reward": 9.732238264083863, | |
| "reward_std": 0.35529854300431907, | |
| "rewards/accuracy_reward/mean": 0.71, | |
| "rewards/accuracy_reward/std": 0.2705294406414032, | |
| "rewards/chart_type_reward/mean": 0.945, | |
| "rewards/chart_type_reward/std": 0.06271044850349426, | |
| "rewards/format_reward/mean": 1.97, | |
| "rewards/format_reward/std": 0.07508494377136231, | |
| "rewards/length_think_reward/mean": 1.495, | |
| "rewards/length_think_reward/std": 0.014142136126756667, | |
| "rewards/num_token_reward/mean": 0.985, | |
| "rewards/num_token_reward/std": 0.037542471885681154, | |
| "rewards/process_style_reward/mean": 1.6616894614696502, | |
| "rewards/process_style_reward/std": 0.4416278822161257, | |
| "rewards/table_style_reward/mean": 1.9655487489700318, | |
| "rewards/table_style_reward/std": 0.31115186443552373, | |
| "step": 1450 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01, | |
| "completions/max_length": 430.54, | |
| "completions/max_terminated_length": 430.2, | |
| "completions/mean_length": 355.42, | |
| "completions/mean_terminated_length": 352.83666748046875, | |
| "completions/min_length": 287.88, | |
| "completions/min_terminated_length": 287.88, | |
| "epoch": 1.0, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 2.660840181002644, | |
| "learning_rate": 7.501666666666666e-07, | |
| "loss": 0.0016, | |
| "num_tokens": 13500408.0, | |
| "reward": 9.479499158859253, | |
| "reward_std": 0.3713982145488262, | |
| "rewards/accuracy_reward/mean": 0.76, | |
| "rewards/accuracy_reward/std": 0.2704376995563507, | |
| "rewards/chart_type_reward/mean": 0.905, | |
| "rewards/chart_type_reward/std": 0.10294564783573151, | |
| "rewards/format_reward/mean": 1.98, | |
| "rewards/format_reward/std": 0.03703280448913574, | |
| "rewards/length_think_reward/mean": 1.4975, | |
| "rewards/length_think_reward/std": 0.007071067690849304, | |
| "rewards/num_token_reward/mean": 0.99, | |
| "rewards/num_token_reward/std": 0.01851640224456787, | |
| "rewards/process_style_reward/mean": 1.5098418951034547, | |
| "rewards/process_style_reward/std": 0.43727574720978735, | |
| "rewards/table_style_reward/mean": 1.8371572828292846, | |
| "rewards/table_style_reward/std": 0.34460590325295926, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 529.84, | |
| "eval_completions/max_terminated_length": 529.84, | |
| "eval_completions/mean_length": 342.17125, | |
| "eval_completions/mean_terminated_length": 342.17125, | |
| "eval_completions/min_length": 239.0, | |
| "eval_completions/min_terminated_length": 239.0, | |
| "eval_frac_reward_zero_std": 0.655, | |
| "eval_loss": 0.0062618558295071125, | |
| "eval_num_tokens": 13500408.0, | |
| "eval_reward": 7.520802612304688, | |
| "eval_reward_std": 0.08726703974418343, | |
| "eval_rewards/accuracy_reward/mean": 0.795, | |
| "eval_rewards/accuracy_reward/std": 0.36195871770381927, | |
| "eval_rewards/chart_type_reward/mean": 0.605, | |
| "eval_rewards/chart_type_reward/std": 0.4631432008743286, | |
| "eval_rewards/format_reward/mean": 2.0, | |
| "eval_rewards/format_reward/std": 0.0, | |
| "eval_rewards/length_think_reward/mean": 1.5, | |
| "eval_rewards/length_think_reward/std": 0.0, | |
| "eval_rewards/num_token_reward/mean": 1.0, | |
| "eval_rewards/num_token_reward/std": 0.0, | |
| "eval_rewards/process_style_reward/mean": 0.8638026165962219, | |
| "eval_rewards/process_style_reward/std": 0.26793761402368543, | |
| "eval_rewards/table_style_reward/mean": 0.7570000004768371, | |
| "eval_rewards/table_style_reward/std": 0.022176709175109863, | |
| "eval_runtime": 388.0338, | |
| "eval_samples_per_second": 0.515, | |
| "eval_steps_per_second": 0.018, | |
| "step": 1500 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01, | |
| "completions/max_length": 406.2, | |
| "completions/max_terminated_length": 398.56, | |
| "completions/mean_length": 341.1325, | |
| "completions/mean_terminated_length": 337.2075, | |
| "completions/min_length": 283.22, | |
| "completions/min_terminated_length": 283.22, | |
| "epoch": 1.0333333333333334, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 3.8348164536534792, | |
| "learning_rate": 7.418333333333333e-07, | |
| "loss": -0.002, | |
| "num_tokens": 13929829.0, | |
| "reward": 9.53626503944397, | |
| "reward_std": 0.3728026695176959, | |
| "rewards/accuracy_reward/mean": 0.7, | |
| "rewards/accuracy_reward/std": 0.3113518291711807, | |
| "rewards/chart_type_reward/mean": 0.8775, | |
| "rewards/chart_type_reward/std": 0.13535646140575408, | |
| "rewards/format_reward/mean": 1.975, | |
| "rewards/format_reward/std": 0.03552303433418274, | |
| "rewards/length_think_reward/mean": 1.495625, | |
| "rewards/length_think_reward/std": 0.0061997933685779575, | |
| "rewards/num_token_reward/mean": 0.9875, | |
| "rewards/num_token_reward/std": 0.01776151716709137, | |
| "rewards/process_style_reward/mean": 1.5947026538848876, | |
| "rewards/process_style_reward/std": 0.43727443397045135, | |
| "rewards/table_style_reward/mean": 1.905937442779541, | |
| "rewards/table_style_reward/std": 0.3321183892339468, | |
| "step": 1550 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 399.86, | |
| "completions/max_terminated_length": 399.86, | |
| "completions/mean_length": 318.575, | |
| "completions/mean_terminated_length": 318.575, | |
| "completions/min_length": 249.92, | |
| "completions/min_terminated_length": 249.92, | |
| "epoch": 1.0666666666666667, | |
| "frac_reward_zero_std": 0.08, | |
| "grad_norm": 7.001967911074153, | |
| "learning_rate": 7.335e-07, | |
| "loss": -0.0028, | |
| "num_tokens": 14350287.0, | |
| "reward": 9.724173564910888, | |
| "reward_std": 0.34998391315340993, | |
| "rewards/accuracy_reward/mean": 0.8025, | |
| "rewards/accuracy_reward/std": 0.2594077849388123, | |
| "rewards/chart_type_reward/mean": 0.93, | |
| "rewards/chart_type_reward/std": 0.07483314633369446, | |
| "rewards/format_reward/mean": 1.995, | |
| "rewards/format_reward/std": 0.014142135381698609, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 0.9975, | |
| "rewards/num_token_reward/std": 0.007071067690849304, | |
| "rewards/process_style_reward/mean": 1.4950222504138946, | |
| "rewards/process_style_reward/std": 0.4426997843384743, | |
| "rewards/table_style_reward/mean": 2.004151337146759, | |
| "rewards/table_style_reward/std": 0.32994183532893656, | |
| "step": 1600 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0025, | |
| "completions/max_length": 388.28, | |
| "completions/max_terminated_length": 388.08, | |
| "completions/mean_length": 315.705, | |
| "completions/mean_terminated_length": 314.9003576660156, | |
| "completions/min_length": 250.5, | |
| "completions/min_terminated_length": 250.5, | |
| "epoch": 1.1, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 9.088691176190808, | |
| "learning_rate": 7.251666666666665e-07, | |
| "loss": -0.0016, | |
| "num_tokens": 14770033.0, | |
| "reward": 9.393507385253907, | |
| "reward_std": 0.37204572021961213, | |
| "rewards/accuracy_reward/mean": 0.6925, | |
| "rewards/accuracy_reward/std": 0.31723429918289187, | |
| "rewards/chart_type_reward/mean": 0.91, | |
| "rewards/chart_type_reward/std": 0.09621404528617859, | |
| "rewards/format_reward/mean": 1.995, | |
| "rewards/format_reward/std": 0.014142135381698609, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 0.9975, | |
| "rewards/num_token_reward/std": 0.007071067690849304, | |
| "rewards/process_style_reward/mean": 1.388883638381958, | |
| "rewards/process_style_reward/std": 0.37191372729837896, | |
| "rewards/table_style_reward/mean": 1.909623656272888, | |
| "rewards/table_style_reward/std": 0.4070011004060507, | |
| "step": 1650 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 399.28, | |
| "completions/max_terminated_length": 399.28, | |
| "completions/mean_length": 331.9925, | |
| "completions/mean_terminated_length": 331.9925, | |
| "completions/min_length": 274.02, | |
| "completions/min_terminated_length": 274.02, | |
| "epoch": 1.1333333333333333, | |
| "frac_reward_zero_std": 0.09, | |
| "grad_norm": 2.910463682760087, | |
| "learning_rate": 7.168333333333333e-07, | |
| "loss": -0.0014, | |
| "num_tokens": 15195754.0, | |
| "reward": 9.631321449279785, | |
| "reward_std": 0.3087148568034172, | |
| "rewards/accuracy_reward/mean": 0.75, | |
| "rewards/accuracy_reward/std": 0.22186374604701997, | |
| "rewards/chart_type_reward/mean": 0.91, | |
| "rewards/chart_type_reward/std": 0.09621404528617859, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5510436356067658, | |
| "rewards/process_style_reward/std": 0.4377661471068859, | |
| "rewards/table_style_reward/mean": 1.9202778291702272, | |
| "rewards/table_style_reward/std": 0.3158441584557295, | |
| "step": 1700 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 403.14, | |
| "completions/max_terminated_length": 403.14, | |
| "completions/mean_length": 334.7425, | |
| "completions/mean_terminated_length": 334.7425, | |
| "completions/min_length": 271.28, | |
| "completions/min_terminated_length": 271.28, | |
| "epoch": 1.1666666666666667, | |
| "frac_reward_zero_std": 0.08, | |
| "grad_norm": 2.309058895100409, | |
| "learning_rate": 7.085e-07, | |
| "loss": -0.002, | |
| "num_tokens": 15622895.0, | |
| "reward": 9.747770833969117, | |
| "reward_std": 0.3141957564931363, | |
| "rewards/accuracy_reward/mean": 0.835, | |
| "rewards/accuracy_reward/std": 0.1784460115432739, | |
| "rewards/chart_type_reward/mean": 0.93, | |
| "rewards/chart_type_reward/std": 0.07483314633369446, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5280090761184693, | |
| "rewards/process_style_reward/std": 0.40883125707507134, | |
| "rewards/table_style_reward/mean": 1.954761769771576, | |
| "rewards/table_style_reward/std": 0.2725959676504135, | |
| "step": 1750 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.005, | |
| "completions/max_length": 411.5, | |
| "completions/max_terminated_length": 407.24, | |
| "completions/mean_length": 346.24, | |
| "completions/mean_terminated_length": 344.6991668701172, | |
| "completions/min_length": 293.46, | |
| "completions/min_terminated_length": 293.46, | |
| "epoch": 1.2, | |
| "frac_reward_zero_std": 0.15, | |
| "grad_norm": 2.902060665970371, | |
| "learning_rate": 7.001666666666667e-07, | |
| "loss": 0.0016, | |
| "num_tokens": 16054907.0, | |
| "reward": 9.750760135650635, | |
| "reward_std": 0.31334371890872714, | |
| "rewards/accuracy_reward/mean": 0.79, | |
| "rewards/accuracy_reward/std": 0.23819708824157715, | |
| "rewards/chart_type_reward/mean": 0.95, | |
| "rewards/chart_type_reward/std": 0.05345224738121033, | |
| "rewards/format_reward/mean": 1.99, | |
| "rewards/format_reward/std": 0.01851640224456787, | |
| "rewards/length_think_reward/mean": 1.49875, | |
| "rewards/length_think_reward/std": 0.002314550280570984, | |
| "rewards/num_token_reward/mean": 0.995, | |
| "rewards/num_token_reward/std": 0.009258201122283935, | |
| "rewards/process_style_reward/mean": 1.5837038934230805, | |
| "rewards/process_style_reward/std": 0.40160174869000914, | |
| "rewards/table_style_reward/mean": 1.9433062982559204, | |
| "rewards/table_style_reward/std": 0.311205018684268, | |
| "step": 1800 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 413.98, | |
| "completions/max_terminated_length": 413.98, | |
| "completions/mean_length": 346.11, | |
| "completions/mean_terminated_length": 346.11, | |
| "completions/min_length": 287.76, | |
| "completions/min_terminated_length": 287.76, | |
| "epoch": 1.2333333333333334, | |
| "frac_reward_zero_std": 0.06, | |
| "grad_norm": 3.4920562160852544, | |
| "learning_rate": 6.918333333333333e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 16486455.0, | |
| "reward": 9.697437267303467, | |
| "reward_std": 0.3302338109910488, | |
| "rewards/accuracy_reward/mean": 0.775, | |
| "rewards/accuracy_reward/std": 0.26244561791419985, | |
| "rewards/chart_type_reward/mean": 0.89, | |
| "rewards/chart_type_reward/std": 0.12432654678821564, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.6095642995834352, | |
| "rewards/process_style_reward/std": 0.4244466606155038, | |
| "rewards/table_style_reward/mean": 1.922872955799103, | |
| "rewards/table_style_reward/std": 0.3481115462630987, | |
| "step": 1850 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.005, | |
| "completions/max_length": 435.06, | |
| "completions/max_terminated_length": 435.04, | |
| "completions/mean_length": 364.25, | |
| "completions/mean_terminated_length": 363.41583374023435, | |
| "completions/min_length": 306.08, | |
| "completions/min_terminated_length": 306.08, | |
| "epoch": 1.2666666666666666, | |
| "frac_reward_zero_std": 0.12, | |
| "grad_norm": 3.4554706037204777, | |
| "learning_rate": 6.835e-07, | |
| "loss": -0.0042, | |
| "num_tokens": 16925067.0, | |
| "reward": 9.758917856216431, | |
| "reward_std": 0.3286038258485496, | |
| "rewards/accuracy_reward/mean": 0.7975, | |
| "rewards/accuracy_reward/std": 0.2225467497110367, | |
| "rewards/chart_type_reward/mean": 0.8925, | |
| "rewards/chart_type_reward/std": 0.08190421402454376, | |
| "rewards/format_reward/mean": 1.99, | |
| "rewards/format_reward/std": 0.01851640224456787, | |
| "rewards/length_think_reward/mean": 1.499375, | |
| "rewards/length_think_reward/std": 0.001767766922712326, | |
| "rewards/num_token_reward/mean": 0.995, | |
| "rewards/num_token_reward/std": 0.009258201122283935, | |
| "rewards/process_style_reward/mean": 1.619047586917877, | |
| "rewards/process_style_reward/std": 0.4610636526346207, | |
| "rewards/table_style_reward/mean": 1.9654952633380889, | |
| "rewards/table_style_reward/std": 0.279680118188262, | |
| "step": 1900 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 470.18, | |
| "completions/max_terminated_length": 470.18, | |
| "completions/mean_length": 388.0075, | |
| "completions/mean_terminated_length": 388.0075, | |
| "completions/min_length": 316.4, | |
| "completions/min_terminated_length": 316.4, | |
| "epoch": 1.3, | |
| "frac_reward_zero_std": 0.15, | |
| "grad_norm": 2.4363619566126182, | |
| "learning_rate": 6.751666666666667e-07, | |
| "loss": 0.0043, | |
| "num_tokens": 17373038.0, | |
| "reward": 9.906033611297607, | |
| "reward_std": 0.30710218355059626, | |
| "rewards/accuracy_reward/mean": 0.81, | |
| "rewards/accuracy_reward/std": 0.2447742748260498, | |
| "rewards/chart_type_reward/mean": 0.935, | |
| "rewards/chart_type_reward/std": 0.07340089797973633, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.6697776758670806, | |
| "rewards/process_style_reward/std": 0.4667322512716055, | |
| "rewards/table_style_reward/mean": 1.9912559032440185, | |
| "rewards/table_style_reward/std": 0.33009951261803505, | |
| "step": 1950 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 484.88, | |
| "completions/max_terminated_length": 484.88, | |
| "completions/mean_length": 407.5, | |
| "completions/mean_terminated_length": 407.5, | |
| "completions/min_length": 341.34, | |
| "completions/min_terminated_length": 341.34, | |
| "epoch": 1.3333333333333333, | |
| "frac_reward_zero_std": 0.26, | |
| "grad_norm": 4.715118687872951, | |
| "learning_rate": 6.668333333333332e-07, | |
| "loss": 0.0029, | |
| "num_tokens": 17829218.0, | |
| "reward": 9.806926441192626, | |
| "reward_std": 0.23523858685046434, | |
| "rewards/accuracy_reward/mean": 0.805, | |
| "rewards/accuracy_reward/std": 0.2573123925924301, | |
| "rewards/chart_type_reward/mean": 0.965, | |
| "rewards/chart_type_reward/std": 0.0462134838104248, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.586816476583481, | |
| "rewards/process_style_reward/std": 0.42196598000824453, | |
| "rewards/table_style_reward/mean": 1.9501098704338073, | |
| "rewards/table_style_reward/std": 0.2974155292659998, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.00125, | |
| "eval_completions/max_length": 584.64, | |
| "eval_completions/max_terminated_length": 584.32, | |
| "eval_completions/mean_length": 393.495, | |
| "eval_completions/mean_terminated_length": 393.06229736328123, | |
| "eval_completions/min_length": 277.64, | |
| "eval_completions/min_terminated_length": 277.64, | |
| "eval_frac_reward_zero_std": 0.65, | |
| "eval_loss": 0.005326578393578529, | |
| "eval_num_tokens": 17829218.0, | |
| "eval_reward": 7.547686309814453, | |
| "eval_reward_std": 0.10336805308703333, | |
| "eval_rewards/accuracy_reward/mean": 0.83125, | |
| "eval_rewards/accuracy_reward/std": 0.3227571386098862, | |
| "eval_rewards/chart_type_reward/mean": 0.61625, | |
| "eval_rewards/chart_type_reward/std": 0.46573135137557986, | |
| "eval_rewards/format_reward/mean": 1.995, | |
| "eval_rewards/format_reward/std": 0.019674774408340454, | |
| "eval_rewards/length_think_reward/mean": 1.5, | |
| "eval_rewards/length_think_reward/std": 0.0, | |
| "eval_rewards/num_token_reward/mean": 0.9975, | |
| "eval_rewards/num_token_reward/std": 0.009837387204170227, | |
| "eval_rewards/process_style_reward/mean": 0.8522800421714782, | |
| "eval_rewards/process_style_reward/std": 0.25847422659397123, | |
| "eval_rewards/table_style_reward/mean": 0.7554062509536743, | |
| "eval_rewards/table_style_reward/std": 0.032950252890586854, | |
| "eval_runtime": 422.8962, | |
| "eval_samples_per_second": 0.473, | |
| "eval_steps_per_second": 0.017, | |
| "step": 2000 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 453.4, | |
| "completions/max_terminated_length": 453.4, | |
| "completions/mean_length": 380.1175, | |
| "completions/mean_terminated_length": 380.1175, | |
| "completions/min_length": 314.92, | |
| "completions/min_terminated_length": 314.92, | |
| "epoch": 1.3666666666666667, | |
| "frac_reward_zero_std": 0.14, | |
| "grad_norm": 1.3071829991089465, | |
| "learning_rate": 6.584999999999999e-07, | |
| "loss": 0.0006, | |
| "num_tokens": 18274577.0, | |
| "reward": 9.592507076263427, | |
| "reward_std": 0.2941449248045683, | |
| "rewards/accuracy_reward/mean": 0.71, | |
| "rewards/accuracy_reward/std": 0.34862515032291413, | |
| "rewards/chart_type_reward/mean": 0.95, | |
| "rewards/chart_type_reward/std": 0.05345224738121033, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5190071046352387, | |
| "rewards/process_style_reward/std": 0.406856027841568, | |
| "rewards/table_style_reward/mean": 1.9135000681877137, | |
| "rewards/table_style_reward/std": 0.33413831643760206, | |
| "step": 2050 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0025, | |
| "completions/max_length": 449.9, | |
| "completions/max_terminated_length": 449.66, | |
| "completions/mean_length": 374.58, | |
| "completions/mean_terminated_length": 373.85250061035157, | |
| "completions/min_length": 307.08, | |
| "completions/min_terminated_length": 307.08, | |
| "epoch": 1.4, | |
| "frac_reward_zero_std": 0.12, | |
| "grad_norm": 17.23901734959147, | |
| "learning_rate": 6.501666666666666e-07, | |
| "loss": 0.0013, | |
| "num_tokens": 18717565.0, | |
| "reward": 9.62658073425293, | |
| "reward_std": 0.41098684968426824, | |
| "rewards/accuracy_reward/mean": 0.7425, | |
| "rewards/accuracy_reward/std": 0.3153866308927536, | |
| "rewards/chart_type_reward/mean": 0.915, | |
| "rewards/chart_type_reward/std": 0.09478179693222046, | |
| "rewards/format_reward/mean": 1.99, | |
| "rewards/format_reward/std": 0.028284270763397217, | |
| "rewards/length_think_reward/mean": 1.499375, | |
| "rewards/length_think_reward/std": 0.001767766922712326, | |
| "rewards/num_token_reward/mean": 0.995, | |
| "rewards/num_token_reward/std": 0.014142135381698609, | |
| "rewards/process_style_reward/mean": 1.5444307351112365, | |
| "rewards/process_style_reward/std": 0.40891373321413993, | |
| "rewards/table_style_reward/mean": 1.9402749633789063, | |
| "rewards/table_style_reward/std": 0.3305619989708066, | |
| "step": 2100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 408.34, | |
| "completions/max_terminated_length": 408.34, | |
| "completions/mean_length": 335.9, | |
| "completions/mean_terminated_length": 335.9, | |
| "completions/min_length": 273.7, | |
| "completions/min_terminated_length": 273.7, | |
| "epoch": 1.4333333333333333, | |
| "frac_reward_zero_std": 0.11, | |
| "grad_norm": 3.329962314867653, | |
| "learning_rate": 6.418333333333333e-07, | |
| "loss": -0.0013, | |
| "num_tokens": 19145669.0, | |
| "reward": 9.794802322387696, | |
| "reward_std": 0.2854239001870155, | |
| "rewards/accuracy_reward/mean": 0.7575, | |
| "rewards/accuracy_reward/std": 0.28735102355480197, | |
| "rewards/chart_type_reward/mean": 0.95, | |
| "rewards/chart_type_reward/std": 0.05345224738121033, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5570359444618225, | |
| "rewards/process_style_reward/std": 0.418922475874424, | |
| "rewards/table_style_reward/mean": 2.0302664685249328, | |
| "rewards/table_style_reward/std": 0.30267744371667504, | |
| "step": 2150 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 395.74, | |
| "completions/max_terminated_length": 395.74, | |
| "completions/mean_length": 329.2375, | |
| "completions/mean_terminated_length": 329.2375, | |
| "completions/min_length": 270.2, | |
| "completions/min_terminated_length": 270.2, | |
| "epoch": 1.4666666666666668, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 6.069260839474777, | |
| "learning_rate": 6.335e-07, | |
| "loss": -0.002, | |
| "num_tokens": 19570032.0, | |
| "reward": 9.73668830871582, | |
| "reward_std": 0.34456304393708703, | |
| "rewards/accuracy_reward/mean": 0.765, | |
| "rewards/accuracy_reward/std": 0.1955270314216614, | |
| "rewards/chart_type_reward/mean": 0.92, | |
| "rewards/chart_type_reward/std": 0.08552359580993653, | |
| "rewards/format_reward/mean": 1.99, | |
| "rewards/format_reward/std": 0.028284270763397217, | |
| "rewards/length_think_reward/mean": 1.49625, | |
| "rewards/length_think_reward/std": 0.010606602281332016, | |
| "rewards/num_token_reward/mean": 0.995, | |
| "rewards/num_token_reward/std": 0.014142135381698609, | |
| "rewards/process_style_reward/mean": 1.5674074435234069, | |
| "rewards/process_style_reward/std": 0.415806692391634, | |
| "rewards/table_style_reward/mean": 2.00303085565567, | |
| "rewards/table_style_reward/std": 0.2655815637484193, | |
| "step": 2200 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 425.58, | |
| "completions/max_terminated_length": 425.58, | |
| "completions/mean_length": 349.7875, | |
| "completions/mean_terminated_length": 349.7875, | |
| "completions/min_length": 279.34, | |
| "completions/min_terminated_length": 279.34, | |
| "epoch": 1.5, | |
| "frac_reward_zero_std": 0.13, | |
| "grad_norm": 2.1846728325533844, | |
| "learning_rate": 6.251666666666667e-07, | |
| "loss": 0.0, | |
| "num_tokens": 20002643.0, | |
| "reward": 9.64163824081421, | |
| "reward_std": 0.32703839337453244, | |
| "rewards/accuracy_reward/mean": 0.74, | |
| "rewards/accuracy_reward/std": 0.3088252305984497, | |
| "rewards/chart_type_reward/mean": 0.93, | |
| "rewards/chart_type_reward/std": 0.07483314633369446, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.584349147081375, | |
| "rewards/process_style_reward/std": 0.3927363380789757, | |
| "rewards/table_style_reward/mean": 1.887289083003998, | |
| "rewards/table_style_reward/std": 0.3619528949260712, | |
| "step": 2250 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 402.0, | |
| "completions/max_terminated_length": 402.0, | |
| "completions/mean_length": 343.5925, | |
| "completions/mean_terminated_length": 343.5925, | |
| "completions/min_length": 286.94, | |
| "completions/min_terminated_length": 286.94, | |
| "epoch": 1.5333333333333332, | |
| "frac_reward_zero_std": 0.12, | |
| "grad_norm": 6.91609600549848, | |
| "learning_rate": 6.168333333333333e-07, | |
| "loss": -0.0033, | |
| "num_tokens": 20433580.0, | |
| "reward": 9.802964372634888, | |
| "reward_std": 0.2991816225461662, | |
| "rewards/accuracy_reward/mean": 0.7675, | |
| "rewards/accuracy_reward/std": 0.261347194314003, | |
| "rewards/chart_type_reward/mean": 0.94, | |
| "rewards/chart_type_reward/std": 0.06414269685745239, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.6691261100769044, | |
| "rewards/process_style_reward/std": 0.37101606719195845, | |
| "rewards/table_style_reward/mean": 1.9263382363319397, | |
| "rewards/table_style_reward/std": 0.32372167307883504, | |
| "step": 2300 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01, | |
| "completions/max_length": 431.12, | |
| "completions/max_terminated_length": 424.64, | |
| "completions/mean_length": 358.315, | |
| "completions/mean_terminated_length": 354.0025, | |
| "completions/min_length": 294.78, | |
| "completions/min_terminated_length": 294.78, | |
| "epoch": 1.5666666666666667, | |
| "frac_reward_zero_std": 0.24, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.085e-07, | |
| "loss": 0.0007, | |
| "num_tokens": 20870814.0, | |
| "reward": 9.584474143981934, | |
| "reward_std": 0.24904431821312756, | |
| "rewards/accuracy_reward/mean": 0.705, | |
| "rewards/accuracy_reward/std": 0.27204171717166903, | |
| "rewards/chart_type_reward/mean": 0.94, | |
| "rewards/chart_type_reward/std": 0.06414269685745239, | |
| "rewards/format_reward/mean": 1.98, | |
| "rewards/format_reward/std": 0.021380898952484132, | |
| "rewards/length_think_reward/mean": 1.499375, | |
| "rewards/length_think_reward/std": 0.001767766922712326, | |
| "rewards/num_token_reward/mean": 0.99, | |
| "rewards/num_token_reward/std": 0.010690449476242066, | |
| "rewards/process_style_reward/mean": 1.5493565130233764, | |
| "rewards/process_style_reward/std": 0.4589155162498355, | |
| "rewards/table_style_reward/mean": 1.920742678642273, | |
| "rewards/table_style_reward/std": 0.3032746136933565, | |
| "step": 2350 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 391.84, | |
| "completions/max_terminated_length": 391.84, | |
| "completions/mean_length": 330.47, | |
| "completions/mean_terminated_length": 330.47, | |
| "completions/min_length": 274.26, | |
| "completions/min_terminated_length": 274.26, | |
| "epoch": 1.6, | |
| "frac_reward_zero_std": 0.18, | |
| "grad_norm": 2.026677468691953, | |
| "learning_rate": 6.001666666666666e-07, | |
| "loss": -0.0019, | |
| "num_tokens": 21297010.0, | |
| "reward": 9.688363914489747, | |
| "reward_std": 0.27519391929730774, | |
| "rewards/accuracy_reward/mean": 0.76, | |
| "rewards/accuracy_reward/std": 0.2853331530094147, | |
| "rewards/chart_type_reward/mean": 0.94, | |
| "rewards/chart_type_reward/std": 0.06414269685745239, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.6027523398399353, | |
| "rewards/process_style_reward/std": 0.3893206799030304, | |
| "rewards/table_style_reward/mean": 1.8856116580963134, | |
| "rewards/table_style_reward/std": 0.3420239106938243, | |
| "step": 2400 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 380.06, | |
| "completions/max_terminated_length": 380.06, | |
| "completions/mean_length": 313.7925, | |
| "completions/mean_terminated_length": 313.7925, | |
| "completions/min_length": 256.48, | |
| "completions/min_terminated_length": 256.48, | |
| "epoch": 1.6333333333333333, | |
| "frac_reward_zero_std": 0.13, | |
| "grad_norm": 5.555080099940961, | |
| "learning_rate": 5.918333333333333e-07, | |
| "loss": -0.0004, | |
| "num_tokens": 21715431.0, | |
| "reward": 9.615154209136962, | |
| "reward_std": 0.2913367236009799, | |
| "rewards/accuracy_reward/mean": 0.765, | |
| "rewards/accuracy_reward/std": 0.2847459638118744, | |
| "rewards/chart_type_reward/mean": 0.92, | |
| "rewards/chart_type_reward/std": 0.08552359580993653, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5297248589992523, | |
| "rewards/process_style_reward/std": 0.3912577797472477, | |
| "rewards/table_style_reward/mean": 1.9004293370246887, | |
| "rewards/table_style_reward/std": 0.3435662076622248, | |
| "step": 2450 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 381.04, | |
| "completions/max_terminated_length": 381.04, | |
| "completions/mean_length": 315.7675, | |
| "completions/mean_terminated_length": 315.7675, | |
| "completions/min_length": 256.08, | |
| "completions/min_terminated_length": 256.08, | |
| "epoch": 1.6666666666666665, | |
| "frac_reward_zero_std": 0.15, | |
| "grad_norm": 15.55412501444908, | |
| "learning_rate": 5.835e-07, | |
| "loss": -0.0012, | |
| "num_tokens": 22134838.0, | |
| "reward": 9.620207805633544, | |
| "reward_std": 0.3084538695216179, | |
| "rewards/accuracy_reward/mean": 0.75, | |
| "rewards/accuracy_reward/std": 0.26951261222362516, | |
| "rewards/chart_type_reward/mean": 0.8775, | |
| "rewards/chart_type_reward/std": 0.1279459285736084, | |
| "rewards/format_reward/mean": 1.99, | |
| "rewards/format_reward/std": 0.01851640224456787, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 0.995, | |
| "rewards/num_token_reward/std": 0.009258201122283935, | |
| "rewards/process_style_reward/mean": 1.5722642850875854, | |
| "rewards/process_style_reward/std": 0.4096125695109367, | |
| "rewards/table_style_reward/mean": 1.9354435563087464, | |
| "rewards/table_style_reward/std": 0.25229784632101654, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 444.2, | |
| "eval_completions/max_terminated_length": 444.2, | |
| "eval_completions/mean_length": 306.56375, | |
| "eval_completions/mean_terminated_length": 306.56375, | |
| "eval_completions/min_length": 212.04, | |
| "eval_completions/min_terminated_length": 212.04, | |
| "eval_frac_reward_zero_std": 0.69, | |
| "eval_loss": 0.0007887376705184579, | |
| "eval_num_tokens": 22134838.0, | |
| "eval_reward": 7.574729671478272, | |
| "eval_reward_std": 0.06697057218523696, | |
| "eval_rewards/accuracy_reward/mean": 0.84625, | |
| "eval_rewards/accuracy_reward/std": 0.3025382542610168, | |
| "eval_rewards/chart_type_reward/mean": 0.625, | |
| "eval_rewards/chart_type_reward/std": 0.463611079454422, | |
| "eval_rewards/format_reward/mean": 2.0, | |
| "eval_rewards/format_reward/std": 0.0, | |
| "eval_rewards/length_think_reward/mean": 1.5, | |
| "eval_rewards/length_think_reward/std": 0.0, | |
| "eval_rewards/num_token_reward/mean": 1.0, | |
| "eval_rewards/num_token_reward/std": 0.0, | |
| "eval_rewards/process_style_reward/mean": 0.8455421495437622, | |
| "eval_rewards/process_style_reward/std": 0.2721589285135269, | |
| "eval_rewards/table_style_reward/mean": 0.7579375004768372, | |
| "eval_rewards/table_style_reward/std": 0.020825568437576294, | |
| "eval_runtime": 332.6249, | |
| "eval_samples_per_second": 0.601, | |
| "eval_steps_per_second": 0.021, | |
| "step": 2500 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 370.08, | |
| "completions/max_terminated_length": 370.08, | |
| "completions/mean_length": 310.395, | |
| "completions/mean_terminated_length": 310.395, | |
| "completions/min_length": 252.64, | |
| "completions/min_terminated_length": 252.64, | |
| "epoch": 1.7, | |
| "frac_reward_zero_std": 0.18, | |
| "grad_norm": 1.9862931479259052, | |
| "learning_rate": 5.751666666666667e-07, | |
| "loss": -0.0015, | |
| "num_tokens": 22551672.0, | |
| "reward": 9.761021976470948, | |
| "reward_std": 0.2495887253805995, | |
| "rewards/accuracy_reward/mean": 0.715, | |
| "rewards/accuracy_reward/std": 0.2910652804374695, | |
| "rewards/chart_type_reward/mean": 0.92, | |
| "rewards/chart_type_reward/std": 0.08552359580993653, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.6147488832473755, | |
| "rewards/process_style_reward/std": 0.4376735435426235, | |
| "rewards/table_style_reward/mean": 2.0112730717658995, | |
| "rewards/table_style_reward/std": 0.25577211238443853, | |
| "step": 2550 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 378.76, | |
| "completions/max_terminated_length": 378.76, | |
| "completions/mean_length": 305.165, | |
| "completions/mean_terminated_length": 305.165, | |
| "completions/min_length": 235.9, | |
| "completions/min_terminated_length": 235.9, | |
| "epoch": 1.7333333333333334, | |
| "frac_reward_zero_std": 0.18, | |
| "grad_norm": 3.8761139220743552, | |
| "learning_rate": 5.668333333333333e-07, | |
| "loss": -0.0004, | |
| "num_tokens": 22966918.0, | |
| "reward": 9.719212608337402, | |
| "reward_std": 0.2860183835402131, | |
| "rewards/accuracy_reward/mean": 0.72, | |
| "rewards/accuracy_reward/std": 0.27398112654685974, | |
| "rewards/chart_type_reward/mean": 0.875, | |
| "rewards/chart_type_reward/std": 0.09478179693222046, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5869954061508178, | |
| "rewards/process_style_reward/std": 0.3870758730173111, | |
| "rewards/table_style_reward/mean": 2.037217149734497, | |
| "rewards/table_style_reward/std": 0.29504469502717257, | |
| "step": 2600 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 404.16, | |
| "completions/max_terminated_length": 404.16, | |
| "completions/mean_length": 327.3175, | |
| "completions/mean_terminated_length": 327.3175, | |
| "completions/min_length": 258.78, | |
| "completions/min_terminated_length": 258.78, | |
| "epoch": 1.7666666666666666, | |
| "frac_reward_zero_std": 0.12, | |
| "grad_norm": 4.5948173272275, | |
| "learning_rate": 5.584999999999999e-07, | |
| "loss": -0.0004, | |
| "num_tokens": 23391041.0, | |
| "reward": 9.734212398529053, | |
| "reward_std": 0.3367015665024519, | |
| "rewards/accuracy_reward/mean": 0.7325, | |
| "rewards/accuracy_reward/std": 0.3094827342033386, | |
| "rewards/chart_type_reward/mean": 0.9, | |
| "rewards/chart_type_reward/std": 0.10690449476242066, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.649822280406952, | |
| "rewards/process_style_reward/std": 0.43425638109445575, | |
| "rewards/table_style_reward/mean": 1.9518901491165161, | |
| "rewards/table_style_reward/std": 0.2947008777409792, | |
| "step": 2650 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 373.46, | |
| "completions/max_terminated_length": 373.46, | |
| "completions/mean_length": 316.095, | |
| "completions/mean_terminated_length": 316.095, | |
| "completions/min_length": 264.2, | |
| "completions/min_terminated_length": 264.2, | |
| "epoch": 1.8, | |
| "frac_reward_zero_std": 0.12, | |
| "grad_norm": 2.8524341866405294, | |
| "learning_rate": 5.501666666666666e-07, | |
| "loss": -0.0016, | |
| "num_tokens": 23809963.0, | |
| "reward": 9.839047288894653, | |
| "reward_std": 0.2964446726441383, | |
| "rewards/accuracy_reward/mean": 0.7825, | |
| "rewards/accuracy_reward/std": 0.22414826095104218, | |
| "rewards/chart_type_reward/mean": 0.88, | |
| "rewards/chart_type_reward/std": 0.12828539371490477, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.6252618777751922, | |
| "rewards/process_style_reward/std": 0.43019559178501365, | |
| "rewards/table_style_reward/mean": 2.0512854528427122, | |
| "rewards/table_style_reward/std": 0.3453987674787641, | |
| "step": 2700 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.005, | |
| "completions/max_length": 405.08, | |
| "completions/max_terminated_length": 405.02, | |
| "completions/mean_length": 330.52, | |
| "completions/mean_terminated_length": 328.8041668701172, | |
| "completions/min_length": 260.76, | |
| "completions/min_terminated_length": 260.76, | |
| "epoch": 1.8333333333333335, | |
| "frac_reward_zero_std": 0.21, | |
| "grad_norm": 3.086957169930822, | |
| "learning_rate": 5.418333333333332e-07, | |
| "loss": 0.0012, | |
| "num_tokens": 24235339.0, | |
| "reward": 9.4709245967865, | |
| "reward_std": 0.3313306954503059, | |
| "rewards/accuracy_reward/mean": 0.6475, | |
| "rewards/accuracy_reward/std": 0.315896298289299, | |
| "rewards/chart_type_reward/mean": 0.92, | |
| "rewards/chart_type_reward/std": 0.08552359580993653, | |
| "rewards/format_reward/mean": 1.985, | |
| "rewards/format_reward/std": 0.03265853762626648, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 0.9925, | |
| "rewards/num_token_reward/std": 0.01632926881313324, | |
| "rewards/process_style_reward/mean": 1.5009058022499084, | |
| "rewards/process_style_reward/std": 0.41441123938187957, | |
| "rewards/table_style_reward/mean": 1.9250187492370605, | |
| "rewards/table_style_reward/std": 0.3126064923405647, | |
| "step": 2750 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 386.7, | |
| "completions/max_terminated_length": 386.7, | |
| "completions/mean_length": 320.0475, | |
| "completions/mean_terminated_length": 320.0475, | |
| "completions/min_length": 260.54, | |
| "completions/min_terminated_length": 260.54, | |
| "epoch": 1.8666666666666667, | |
| "frac_reward_zero_std": 0.17, | |
| "grad_norm": 12.10983335214368, | |
| "learning_rate": 5.335e-07, | |
| "loss": 0.0026, | |
| "num_tokens": 24656314.0, | |
| "reward": 9.687625350952148, | |
| "reward_std": 0.2753653322113678, | |
| "rewards/accuracy_reward/mean": 0.7375, | |
| "rewards/accuracy_reward/std": 0.27269922077655795, | |
| "rewards/chart_type_reward/mean": 0.9, | |
| "rewards/chart_type_reward/std": 0.10690449476242066, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.6192821896076202, | |
| "rewards/process_style_reward/std": 0.4189461704902351, | |
| "rewards/table_style_reward/mean": 1.9308432340621948, | |
| "rewards/table_style_reward/std": 0.31184935322031376, | |
| "step": 2800 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 415.46, | |
| "completions/max_terminated_length": 415.46, | |
| "completions/mean_length": 346.0825, | |
| "completions/mean_terminated_length": 346.0825, | |
| "completions/min_length": 281.58, | |
| "completions/min_terminated_length": 281.58, | |
| "epoch": 1.9, | |
| "frac_reward_zero_std": 0.14, | |
| "grad_norm": 1.9872501325277399, | |
| "learning_rate": 5.251666666666667e-07, | |
| "loss": 0.0008, | |
| "num_tokens": 25088451.0, | |
| "reward": 9.661721420288085, | |
| "reward_std": 0.27916239865124226, | |
| "rewards/accuracy_reward/mean": 0.75, | |
| "rewards/accuracy_reward/std": 0.2755652844905853, | |
| "rewards/chart_type_reward/mean": 0.8975, | |
| "rewards/chart_type_reward/std": 0.11397556245326995, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5982041406631469, | |
| "rewards/process_style_reward/std": 0.3970548979192972, | |
| "rewards/table_style_reward/mean": 1.9160172486305236, | |
| "rewards/table_style_reward/std": 0.2998810928501189, | |
| "step": 2850 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 395.2, | |
| "completions/max_terminated_length": 395.2, | |
| "completions/mean_length": 320.93, | |
| "completions/mean_terminated_length": 320.93, | |
| "completions/min_length": 261.28, | |
| "completions/min_terminated_length": 261.28, | |
| "epoch": 1.9333333333333333, | |
| "frac_reward_zero_std": 0.14, | |
| "grad_norm": 2.9893443101936286, | |
| "learning_rate": 5.168333333333334e-07, | |
| "loss": 0.0022, | |
| "num_tokens": 25510639.0, | |
| "reward": 9.695433053970337, | |
| "reward_std": 0.26316976999863984, | |
| "rewards/accuracy_reward/mean": 0.78, | |
| "rewards/accuracy_reward/std": 0.30066137969493867, | |
| "rewards/chart_type_reward/mean": 0.91, | |
| "rewards/chart_type_reward/std": 0.05345224738121033, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5607620286941528, | |
| "rewards/process_style_reward/std": 0.42447459913790225, | |
| "rewards/table_style_reward/mean": 1.9446710109710694, | |
| "rewards/table_style_reward/std": 0.30673831250518563, | |
| "step": 2900 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 391.78, | |
| "completions/max_terminated_length": 391.78, | |
| "completions/mean_length": 321.075, | |
| "completions/mean_terminated_length": 321.075, | |
| "completions/min_length": 255.9, | |
| "completions/min_terminated_length": 255.9, | |
| "epoch": 1.9666666666666668, | |
| "frac_reward_zero_std": 0.14, | |
| "grad_norm": 1.9919505127604515, | |
| "learning_rate": 5.085e-07, | |
| "loss": -0.0031, | |
| "num_tokens": 25932085.0, | |
| "reward": 9.60394895553589, | |
| "reward_std": 0.3279293935373426, | |
| "rewards/accuracy_reward/mean": 0.69, | |
| "rewards/accuracy_reward/std": 0.31488103687763214, | |
| "rewards/chart_type_reward/mean": 0.9, | |
| "rewards/chart_type_reward/std": 0.08552359580993653, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5492809236049652, | |
| "rewards/process_style_reward/std": 0.43841719649732114, | |
| "rewards/table_style_reward/mean": 1.9646680784225463, | |
| "rewards/table_style_reward/std": 0.2879634938389063, | |
| "step": 2950 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 395.38, | |
| "completions/max_terminated_length": 395.38, | |
| "completions/mean_length": 328.125, | |
| "completions/mean_terminated_length": 328.125, | |
| "completions/min_length": 269.92, | |
| "completions/min_terminated_length": 269.92, | |
| "epoch": 2.0, | |
| "frac_reward_zero_std": 0.15, | |
| "grad_norm": 2.558583506193219, | |
| "learning_rate": 5.001666666666666e-07, | |
| "loss": 0.0015, | |
| "num_tokens": 26356915.0, | |
| "reward": 9.858510990142822, | |
| "reward_std": 0.2589301385357976, | |
| "rewards/accuracy_reward/mean": 0.8175, | |
| "rewards/accuracy_reward/std": 0.23172805666923524, | |
| "rewards/chart_type_reward/mean": 0.8875, | |
| "rewards/chart_type_reward/std": 0.12651368021965026, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.6852520942687987, | |
| "rewards/process_style_reward/std": 0.3970717826485634, | |
| "rewards/table_style_reward/mean": 1.968258823156357, | |
| "rewards/table_style_reward/std": 0.29191130749881267, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 442.64, | |
| "eval_completions/max_terminated_length": 442.64, | |
| "eval_completions/mean_length": 303.75625, | |
| "eval_completions/mean_terminated_length": 303.75625, | |
| "eval_completions/min_length": 209.52, | |
| "eval_completions/min_terminated_length": 209.52, | |
| "eval_frac_reward_zero_std": 0.695, | |
| "eval_loss": 0.0015344952698796988, | |
| "eval_num_tokens": 26356915.0, | |
| "eval_reward": 7.581516437530517, | |
| "eval_reward_std": 0.062049417817033825, | |
| "eval_rewards/accuracy_reward/mean": 0.85375, | |
| "eval_rewards/accuracy_reward/std": 0.3139296269416809, | |
| "eval_rewards/chart_type_reward/mean": 0.62875, | |
| "eval_rewards/chart_type_reward/std": 0.4629242014884949, | |
| "eval_rewards/format_reward/mean": 2.0, | |
| "eval_rewards/format_reward/std": 0.0, | |
| "eval_rewards/length_think_reward/mean": 1.5, | |
| "eval_rewards/length_think_reward/std": 0.0, | |
| "eval_rewards/num_token_reward/mean": 1.0, | |
| "eval_rewards/num_token_reward/std": 0.0, | |
| "eval_rewards/process_style_reward/mean": 0.840136501789093, | |
| "eval_rewards/process_style_reward/std": 0.2791509646177292, | |
| "eval_rewards/table_style_reward/mean": 0.7588799333572388, | |
| "eval_rewards/table_style_reward/std": 0.02718144789338112, | |
| "eval_runtime": 333.1256, | |
| "eval_samples_per_second": 0.6, | |
| "eval_steps_per_second": 0.021, | |
| "step": 3000 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 375.1, | |
| "completions/max_terminated_length": 375.1, | |
| "completions/mean_length": 312.9625, | |
| "completions/mean_terminated_length": 312.9625, | |
| "completions/min_length": 254.74, | |
| "completions/min_terminated_length": 254.74, | |
| "epoch": 2.033333333333333, | |
| "frac_reward_zero_std": 0.21, | |
| "grad_norm": 2.2475136992847617, | |
| "learning_rate": 4.918333333333333e-07, | |
| "loss": -0.0057, | |
| "num_tokens": 26775592.0, | |
| "reward": 9.69863618850708, | |
| "reward_std": 0.28390467911958694, | |
| "rewards/accuracy_reward/mean": 0.7375, | |
| "rewards/accuracy_reward/std": 0.2776748961210251, | |
| "rewards/chart_type_reward/mean": 0.8975, | |
| "rewards/chart_type_reward/std": 0.10656502962112427, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.6072617268562317, | |
| "rewards/process_style_reward/std": 0.3515561890602112, | |
| "rewards/table_style_reward/mean": 1.956374499797821, | |
| "rewards/table_style_reward/std": 0.3190466545522213, | |
| "step": 3050 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 388.64, | |
| "completions/max_terminated_length": 388.64, | |
| "completions/mean_length": 331.075, | |
| "completions/mean_terminated_length": 331.075, | |
| "completions/min_length": 278.64, | |
| "completions/min_terminated_length": 278.64, | |
| "epoch": 2.066666666666667, | |
| "frac_reward_zero_std": 0.21, | |
| "grad_norm": 1.6861060018846619, | |
| "learning_rate": 4.835e-07, | |
| "loss": 0.0028, | |
| "num_tokens": 27200726.0, | |
| "reward": 9.909198265075684, | |
| "reward_std": 0.26314487379044293, | |
| "rewards/accuracy_reward/mean": 0.7575, | |
| "rewards/accuracy_reward/std": 0.24443480968475342, | |
| "rewards/chart_type_reward/mean": 0.92, | |
| "rewards/chart_type_reward/std": 0.08552359580993653, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.714371042251587, | |
| "rewards/process_style_reward/std": 0.40186877727508546, | |
| "rewards/table_style_reward/mean": 2.017327206134796, | |
| "rewards/table_style_reward/std": 0.3402495227381587, | |
| "step": 3100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0025, | |
| "completions/max_length": 411.52, | |
| "completions/max_terminated_length": 411.02, | |
| "completions/mean_length": 335.7075, | |
| "completions/mean_terminated_length": 335.0121435546875, | |
| "completions/min_length": 269.54, | |
| "completions/min_terminated_length": 269.54, | |
| "epoch": 2.1, | |
| "frac_reward_zero_std": 0.18, | |
| "grad_norm": 1.7582515763042907, | |
| "learning_rate": 4.7516666666666667e-07, | |
| "loss": -0.0049, | |
| "num_tokens": 27628089.0, | |
| "reward": 9.542483215332032, | |
| "reward_std": 0.29263645596802235, | |
| "rewards/accuracy_reward/mean": 0.7425, | |
| "rewards/accuracy_reward/std": 0.29206632256507875, | |
| "rewards/chart_type_reward/mean": 0.88, | |
| "rewards/chart_type_reward/std": 0.10690449476242066, | |
| "rewards/format_reward/mean": 1.995, | |
| "rewards/format_reward/std": 0.014142135381698609, | |
| "rewards/length_think_reward/mean": 1.498125, | |
| "rewards/length_think_reward/std": 0.005303300619125366, | |
| "rewards/num_token_reward/mean": 0.9975, | |
| "rewards/num_token_reward/std": 0.007071067690849304, | |
| "rewards/process_style_reward/mean": 1.4842811024188995, | |
| "rewards/process_style_reward/std": 0.40244101256132125, | |
| "rewards/table_style_reward/mean": 1.9450771474838258, | |
| "rewards/table_style_reward/std": 0.3524085796624422, | |
| "step": 3150 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 389.78, | |
| "completions/max_terminated_length": 389.78, | |
| "completions/mean_length": 324.99, | |
| "completions/mean_terminated_length": 324.99, | |
| "completions/min_length": 267.42, | |
| "completions/min_terminated_length": 267.42, | |
| "epoch": 2.1333333333333333, | |
| "frac_reward_zero_std": 0.23, | |
| "grad_norm": 5.2003103180671655, | |
| "learning_rate": 4.668333333333333e-07, | |
| "loss": -0.0014, | |
| "num_tokens": 28050929.0, | |
| "reward": 9.795648279190063, | |
| "reward_std": 0.27396415136754515, | |
| "rewards/accuracy_reward/mean": 0.745, | |
| "rewards/accuracy_reward/std": 0.26900545120239255, | |
| "rewards/chart_type_reward/mean": 0.96, | |
| "rewards/chart_type_reward/std": 0.042761797904968264, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.6426741647720338, | |
| "rewards/process_style_reward/std": 0.4128600428253412, | |
| "rewards/table_style_reward/mean": 1.947974135875702, | |
| "rewards/table_style_reward/std": 0.27850259508937597, | |
| "step": 3200 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0075, | |
| "completions/max_length": 378.94, | |
| "completions/max_terminated_length": 378.38, | |
| "completions/mean_length": 312.8675, | |
| "completions/mean_terminated_length": 310.1270001220703, | |
| "completions/min_length": 253.22, | |
| "completions/min_terminated_length": 253.22, | |
| "epoch": 2.1666666666666665, | |
| "frac_reward_zero_std": 0.27, | |
| "grad_norm": 8.03823522353211, | |
| "learning_rate": 4.585e-07, | |
| "loss": 0.003, | |
| "num_tokens": 28468656.0, | |
| "reward": 9.798268947601319, | |
| "reward_std": 0.23171548346057536, | |
| "rewards/accuracy_reward/mean": 0.8225, | |
| "rewards/accuracy_reward/std": 0.21219169199466706, | |
| "rewards/chart_type_reward/mean": 0.895, | |
| "rewards/chart_type_reward/std": 0.11616269588470458, | |
| "rewards/format_reward/mean": 1.985, | |
| "rewards/format_reward/std": 0.02070196866989136, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 0.9925, | |
| "rewards/num_token_reward/std": 0.01035098433494568, | |
| "rewards/process_style_reward/mean": 1.56619358420372, | |
| "rewards/process_style_reward/std": 0.4133440139889717, | |
| "rewards/table_style_reward/mean": 2.037075364589691, | |
| "rewards/table_style_reward/std": 0.29497910317033527, | |
| "step": 3250 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 354.38, | |
| "completions/max_terminated_length": 354.38, | |
| "completions/mean_length": 292.085, | |
| "completions/mean_terminated_length": 292.085, | |
| "completions/min_length": 233.4, | |
| "completions/min_terminated_length": 233.4, | |
| "epoch": 2.2, | |
| "frac_reward_zero_std": 0.26, | |
| "grad_norm": 3.7658961156032484, | |
| "learning_rate": 4.5016666666666664e-07, | |
| "loss": -0.0009, | |
| "num_tokens": 28878814.0, | |
| "reward": 9.694805870056152, | |
| "reward_std": 0.183432172909379, | |
| "rewards/accuracy_reward/mean": 0.85, | |
| "rewards/accuracy_reward/std": 0.13963742017745973, | |
| "rewards/chart_type_reward/mean": 0.9125, | |
| "rewards/chart_type_reward/std": 0.07306143283843994, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5138389551639557, | |
| "rewards/process_style_reward/std": 0.44110236927866936, | |
| "rewards/table_style_reward/mean": 1.9184669041633606, | |
| "rewards/table_style_reward/std": 0.3392397094517946, | |
| "step": 3300 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 384.02, | |
| "completions/max_terminated_length": 384.02, | |
| "completions/mean_length": 319.64, | |
| "completions/mean_terminated_length": 319.64, | |
| "completions/min_length": 260.52, | |
| "completions/min_terminated_length": 260.52, | |
| "epoch": 2.2333333333333334, | |
| "frac_reward_zero_std": 0.26, | |
| "grad_norm": 2.7631543444157165, | |
| "learning_rate": 4.4183333333333335e-07, | |
| "loss": -0.0016, | |
| "num_tokens": 29299838.0, | |
| "reward": 9.804728012084961, | |
| "reward_std": 0.2311769995908253, | |
| "rewards/accuracy_reward/mean": 0.8175, | |
| "rewards/accuracy_reward/std": 0.22684662878513337, | |
| "rewards/chart_type_reward/mean": 0.91, | |
| "rewards/chart_type_reward/std": 0.09621404528617859, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.630505404472351, | |
| "rewards/process_style_reward/std": 0.40631200328469275, | |
| "rewards/table_style_reward/mean": 1.9467226195335388, | |
| "rewards/table_style_reward/std": 0.25668422447517514, | |
| "step": 3350 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 390.78, | |
| "completions/max_terminated_length": 390.78, | |
| "completions/mean_length": 311.04, | |
| "completions/mean_terminated_length": 311.04, | |
| "completions/min_length": 244.4, | |
| "completions/min_terminated_length": 244.4, | |
| "epoch": 2.2666666666666666, | |
| "frac_reward_zero_std": 0.12, | |
| "grad_norm": 3.7039274908559348, | |
| "learning_rate": 4.3349999999999996e-07, | |
| "loss": -0.0003, | |
| "num_tokens": 29716978.0, | |
| "reward": 9.710132846832275, | |
| "reward_std": 0.27550872176885605, | |
| "rewards/accuracy_reward/mean": 0.78, | |
| "rewards/accuracy_reward/std": 0.258406742811203, | |
| "rewards/chart_type_reward/mean": 0.95, | |
| "rewards/chart_type_reward/std": 0.05345224738121033, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5488644635677338, | |
| "rewards/process_style_reward/std": 0.4340578323602676, | |
| "rewards/table_style_reward/mean": 1.9312683844566345, | |
| "rewards/table_style_reward/std": 0.29763940557837487, | |
| "step": 3400 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 365.52, | |
| "completions/max_terminated_length": 365.52, | |
| "completions/mean_length": 307.235, | |
| "completions/mean_terminated_length": 307.235, | |
| "completions/min_length": 259.8, | |
| "completions/min_terminated_length": 259.8, | |
| "epoch": 2.3, | |
| "frac_reward_zero_std": 0.19, | |
| "grad_norm": 3.2607533668304947, | |
| "learning_rate": 4.2516666666666667e-07, | |
| "loss": -0.0013, | |
| "num_tokens": 30133056.0, | |
| "reward": 9.773553447723389, | |
| "reward_std": 0.30064396366477014, | |
| "rewards/accuracy_reward/mean": 0.7575, | |
| "rewards/accuracy_reward/std": 0.2782620853185654, | |
| "rewards/chart_type_reward/mean": 0.9225, | |
| "rewards/chart_type_reward/std": 0.08518413066864014, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.6084586906433105, | |
| "rewards/process_style_reward/std": 0.41134398311376574, | |
| "rewards/table_style_reward/mean": 1.9850947809219361, | |
| "rewards/table_style_reward/std": 0.31916536355391145, | |
| "step": 3450 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.005, | |
| "completions/max_length": 375.18, | |
| "completions/max_terminated_length": 374.2, | |
| "completions/mean_length": 316.03, | |
| "completions/mean_terminated_length": 313.87, | |
| "completions/min_length": 264.38, | |
| "completions/min_terminated_length": 264.38, | |
| "epoch": 2.3333333333333335, | |
| "frac_reward_zero_std": 0.19, | |
| "grad_norm": 1.990920818414865, | |
| "learning_rate": 4.1683333333333333e-07, | |
| "loss": 0.0034, | |
| "num_tokens": 30552900.0, | |
| "reward": 9.88123031616211, | |
| "reward_std": 0.2634401721076574, | |
| "rewards/accuracy_reward/mean": 0.78, | |
| "rewards/accuracy_reward/std": 0.2654762434959412, | |
| "rewards/chart_type_reward/mean": 0.9525, | |
| "rewards/chart_type_reward/std": 0.05311278223991394, | |
| "rewards/format_reward/mean": 1.99, | |
| "rewards/format_reward/std": 0.01851640224456787, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 0.995, | |
| "rewards/num_token_reward/std": 0.009258201122283935, | |
| "rewards/process_style_reward/mean": 1.6290400648117065, | |
| "rewards/process_style_reward/std": 0.41983593456447127, | |
| "rewards/table_style_reward/mean": 2.0346901965141297, | |
| "rewards/table_style_reward/std": 0.28429963132366537, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 434.36, | |
| "eval_completions/max_terminated_length": 434.36, | |
| "eval_completions/mean_length": 297.61625, | |
| "eval_completions/mean_terminated_length": 297.61625, | |
| "eval_completions/min_length": 204.8, | |
| "eval_completions/min_terminated_length": 204.8, | |
| "eval_frac_reward_zero_std": 0.665, | |
| "eval_loss": 0.0004025402304250747, | |
| "eval_num_tokens": 30552900.0, | |
| "eval_reward": 7.552074928283691, | |
| "eval_reward_std": 0.0732093141740188, | |
| "eval_rewards/accuracy_reward/mean": 0.855, | |
| "eval_rewards/accuracy_reward/std": 0.3025389724969864, | |
| "eval_rewards/chart_type_reward/mean": 0.6025, | |
| "eval_rewards/chart_type_reward/std": 0.4713040769100189, | |
| "eval_rewards/format_reward/mean": 2.0, | |
| "eval_rewards/format_reward/std": 0.0, | |
| "eval_rewards/length_think_reward/mean": 1.5, | |
| "eval_rewards/length_think_reward/std": 0.0, | |
| "eval_rewards/num_token_reward/mean": 1.0, | |
| "eval_rewards/num_token_reward/std": 0.0, | |
| "eval_rewards/process_style_reward/mean": 0.8350749087333679, | |
| "eval_rewards/process_style_reward/std": 0.28666666328907014, | |
| "eval_rewards/table_style_reward/mean": 0.7595000004768372, | |
| "eval_rewards/table_style_reward/std": 0.023632641285657882, | |
| "eval_runtime": 327.7189, | |
| "eval_samples_per_second": 0.61, | |
| "eval_steps_per_second": 0.021, | |
| "step": 3500 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 368.26, | |
| "completions/max_terminated_length": 368.26, | |
| "completions/mean_length": 310.085, | |
| "completions/mean_terminated_length": 310.085, | |
| "completions/min_length": 255.86, | |
| "completions/min_terminated_length": 255.86, | |
| "epoch": 2.3666666666666667, | |
| "frac_reward_zero_std": 0.15, | |
| "grad_norm": 2.559054191104846, | |
| "learning_rate": 4.0849999999999993e-07, | |
| "loss": -0.0016, | |
| "num_tokens": 30970094.0, | |
| "reward": 9.84461862564087, | |
| "reward_std": 0.31480611886829135, | |
| "rewards/accuracy_reward/mean": 0.7875, | |
| "rewards/accuracy_reward/std": 0.24949051320552826, | |
| "rewards/chart_type_reward/mean": 0.9325, | |
| "rewards/chart_type_reward/std": 0.08047196567058564, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.6563871276378632, | |
| "rewards/process_style_reward/std": 0.47597916625440123, | |
| "rewards/table_style_reward/mean": 1.9682315516471862, | |
| "rewards/table_style_reward/std": 0.31802774131298067, | |
| "step": 3550 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.005, | |
| "completions/max_length": 392.88, | |
| "completions/max_terminated_length": 392.72, | |
| "completions/mean_length": 328.53, | |
| "completions/mean_terminated_length": 327.8175, | |
| "completions/min_length": 269.42, | |
| "completions/min_terminated_length": 269.42, | |
| "epoch": 2.4, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 1.3441577493975225, | |
| "learning_rate": 4.0016666666666664e-07, | |
| "loss": 0.0021, | |
| "num_tokens": 31394678.0, | |
| "reward": 9.768132333755494, | |
| "reward_std": 0.2865133846178651, | |
| "rewards/accuracy_reward/mean": 0.7325, | |
| "rewards/accuracy_reward/std": 0.3207777261734009, | |
| "rewards/chart_type_reward/mean": 0.8875, | |
| "rewards/chart_type_reward/std": 0.11254331409931183, | |
| "rewards/format_reward/mean": 1.99, | |
| "rewards/format_reward/std": 0.01851640224456787, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 0.995, | |
| "rewards/num_token_reward/std": 0.009258201122283935, | |
| "rewards/process_style_reward/mean": 1.6195254147052764, | |
| "rewards/process_style_reward/std": 0.4645379837602377, | |
| "rewards/table_style_reward/mean": 2.0436069059371946, | |
| "rewards/table_style_reward/std": 0.28304173408076166, | |
| "step": 3600 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 357.54, | |
| "completions/max_terminated_length": 357.54, | |
| "completions/mean_length": 299.01, | |
| "completions/mean_terminated_length": 299.01, | |
| "completions/min_length": 244.34, | |
| "completions/min_terminated_length": 244.34, | |
| "epoch": 2.4333333333333336, | |
| "frac_reward_zero_std": 0.21, | |
| "grad_norm": 2.849518244313829, | |
| "learning_rate": 3.918333333333333e-07, | |
| "loss": 0.0003, | |
| "num_tokens": 31807526.0, | |
| "reward": 9.800979099273682, | |
| "reward_std": 0.25777424886822703, | |
| "rewards/accuracy_reward/mean": 0.7925, | |
| "rewards/accuracy_reward/std": 0.24965977609157564, | |
| "rewards/chart_type_reward/mean": 0.89, | |
| "rewards/chart_type_reward/std": 0.11759494423866272, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5430667841434478, | |
| "rewards/process_style_reward/std": 0.4256218123435974, | |
| "rewards/table_style_reward/mean": 2.075412368774414, | |
| "rewards/table_style_reward/std": 0.3282872153446078, | |
| "step": 3650 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 370.08, | |
| "completions/max_terminated_length": 370.08, | |
| "completions/mean_length": 309.9025, | |
| "completions/mean_terminated_length": 309.9025, | |
| "completions/min_length": 254.94, | |
| "completions/min_terminated_length": 254.94, | |
| "epoch": 2.466666666666667, | |
| "frac_reward_zero_std": 0.22, | |
| "grad_norm": 2.1631571281627955, | |
| "learning_rate": 3.835e-07, | |
| "loss": -0.0024, | |
| "num_tokens": 32224583.0, | |
| "reward": 9.830465087890625, | |
| "reward_std": 0.23616183903533966, | |
| "rewards/accuracy_reward/mean": 0.755, | |
| "rewards/accuracy_reward/std": 0.27062118172645566, | |
| "rewards/chart_type_reward/mean": 0.925, | |
| "rewards/chart_type_reward/std": 0.08409134745597839, | |
| "rewards/format_reward/mean": 1.995, | |
| "rewards/format_reward/std": 0.014142135381698609, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 0.9975, | |
| "rewards/num_token_reward/std": 0.007071067690849304, | |
| "rewards/process_style_reward/mean": 1.6658795142173768, | |
| "rewards/process_style_reward/std": 0.4328185883164406, | |
| "rewards/table_style_reward/mean": 1.992085530757904, | |
| "rewards/table_style_reward/std": 0.30545895665884015, | |
| "step": 3700 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.005, | |
| "completions/max_length": 374.04, | |
| "completions/max_terminated_length": 374.02, | |
| "completions/mean_length": 316.38, | |
| "completions/mean_terminated_length": 315.13416748046876, | |
| "completions/min_length": 261.26, | |
| "completions/min_terminated_length": 261.26, | |
| "epoch": 2.5, | |
| "frac_reward_zero_std": 0.18, | |
| "grad_norm": 3.743160685463559, | |
| "learning_rate": 3.751666666666666e-07, | |
| "loss": -0.0015, | |
| "num_tokens": 32644799.0, | |
| "reward": 9.782908029556275, | |
| "reward_std": 0.2735483956709504, | |
| "rewards/accuracy_reward/mean": 0.825, | |
| "rewards/accuracy_reward/std": 0.2005802285671234, | |
| "rewards/chart_type_reward/mean": 0.91, | |
| "rewards/chart_type_reward/std": 0.07483314633369446, | |
| "rewards/format_reward/mean": 1.99, | |
| "rewards/format_reward/std": 0.01851640224456787, | |
| "rewards/length_think_reward/mean": 1.499375, | |
| "rewards/length_think_reward/std": 0.001767766922712326, | |
| "rewards/num_token_reward/mean": 0.995, | |
| "rewards/num_token_reward/std": 0.009258201122283935, | |
| "rewards/process_style_reward/mean": 1.5454747760295868, | |
| "rewards/process_style_reward/std": 0.431058616489172, | |
| "rewards/table_style_reward/mean": 2.0180582451820372, | |
| "rewards/table_style_reward/std": 0.3305081824213266, | |
| "step": 3750 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 400.0, | |
| "completions/max_terminated_length": 400.0, | |
| "completions/mean_length": 333.855, | |
| "completions/mean_terminated_length": 333.855, | |
| "completions/min_length": 273.8, | |
| "completions/min_terminated_length": 273.8, | |
| "epoch": 2.533333333333333, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 5.8673809839930975, | |
| "learning_rate": 3.6683333333333333e-07, | |
| "loss": -0.0002, | |
| "num_tokens": 33072297.0, | |
| "reward": 9.738230361938477, | |
| "reward_std": 0.2538567354902625, | |
| "rewards/accuracy_reward/mean": 0.7525, | |
| "rewards/accuracy_reward/std": 0.27658367991447447, | |
| "rewards/chart_type_reward/mean": 0.9, | |
| "rewards/chart_type_reward/std": 0.10690449476242066, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.6415416550636293, | |
| "rewards/process_style_reward/std": 0.433953458070755, | |
| "rewards/table_style_reward/mean": 1.9441887497901917, | |
| "rewards/table_style_reward/std": 0.32067165344953535, | |
| "step": 3800 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 385.32, | |
| "completions/max_terminated_length": 385.32, | |
| "completions/mean_length": 327.8975, | |
| "completions/mean_terminated_length": 327.8975, | |
| "completions/min_length": 274.0, | |
| "completions/min_terminated_length": 274.0, | |
| "epoch": 2.5666666666666664, | |
| "frac_reward_zero_std": 0.19, | |
| "grad_norm": 3.653425622305845, | |
| "learning_rate": 3.585e-07, | |
| "loss": -0.0016, | |
| "num_tokens": 33496748.0, | |
| "reward": 9.902756881713866, | |
| "reward_std": 0.27108980235410857, | |
| "rewards/accuracy_reward/mean": 0.8125, | |
| "rewards/accuracy_reward/std": 0.20225769460201262, | |
| "rewards/chart_type_reward/mean": 0.89, | |
| "rewards/chart_type_reward/std": 0.11759494423866272, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.700443229675293, | |
| "rewards/process_style_reward/std": 0.37070401668548586, | |
| "rewards/table_style_reward/mean": 1.9998136401176452, | |
| "rewards/table_style_reward/std": 0.26890223439782857, | |
| "step": 3850 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 405.18, | |
| "completions/max_terminated_length": 405.18, | |
| "completions/mean_length": 334.3775, | |
| "completions/mean_terminated_length": 334.3775, | |
| "completions/min_length": 270.76, | |
| "completions/min_terminated_length": 270.76, | |
| "epoch": 2.6, | |
| "frac_reward_zero_std": 0.21, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.5016666666666665e-07, | |
| "loss": 0.0018, | |
| "num_tokens": 33923707.0, | |
| "reward": 9.698110904693603, | |
| "reward_std": 0.2213594539882615, | |
| "rewards/accuracy_reward/mean": 0.7375, | |
| "rewards/accuracy_reward/std": 0.27844556748867033, | |
| "rewards/chart_type_reward/mean": 0.91, | |
| "rewards/chart_type_reward/std": 0.1040399980545044, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.4740183496475219, | |
| "rewards/process_style_reward/std": 0.416292352899909, | |
| "rewards/table_style_reward/mean": 2.076592493057251, | |
| "rewards/table_style_reward/std": 0.3206081053614616, | |
| "step": 3900 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 381.28, | |
| "completions/max_terminated_length": 381.28, | |
| "completions/mean_length": 318.2575, | |
| "completions/mean_terminated_length": 318.2575, | |
| "completions/min_length": 256.24, | |
| "completions/min_terminated_length": 256.24, | |
| "epoch": 2.6333333333333333, | |
| "frac_reward_zero_std": 0.23, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.418333333333333e-07, | |
| "loss": -0.0002, | |
| "num_tokens": 34343654.0, | |
| "reward": 9.784586238861085, | |
| "reward_std": 0.21556221422739327, | |
| "rewards/accuracy_reward/mean": 0.77, | |
| "rewards/accuracy_reward/std": 0.24662194311618804, | |
| "rewards/chart_type_reward/mean": 0.95, | |
| "rewards/chart_type_reward/std": 0.03207134842872619, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.499375, | |
| "rewards/length_think_reward/std": 0.001767766922712326, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5625994729995727, | |
| "rewards/process_style_reward/std": 0.4333411505073309, | |
| "rewards/table_style_reward/mean": 2.002611801624298, | |
| "rewards/table_style_reward/std": 0.2887386105395853, | |
| "step": 3950 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 385.1, | |
| "completions/max_terminated_length": 385.1, | |
| "completions/mean_length": 332.6775, | |
| "completions/mean_terminated_length": 332.6775, | |
| "completions/min_length": 280.72, | |
| "completions/min_terminated_length": 280.72, | |
| "epoch": 2.6666666666666665, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 3.284695127062566, | |
| "learning_rate": 3.335e-07, | |
| "loss": -0.0007, | |
| "num_tokens": 34769621.0, | |
| "reward": 9.657586326599121, | |
| "reward_std": 0.25254386219428854, | |
| "rewards/accuracy_reward/mean": 0.73, | |
| "rewards/accuracy_reward/std": 0.30427919447422025, | |
| "rewards/chart_type_reward/mean": 0.93, | |
| "rewards/chart_type_reward/std": 0.07483314633369446, | |
| "rewards/format_reward/mean": 2.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/length_think_reward/mean": 1.5, | |
| "rewards/length_think_reward/std": 0.0, | |
| "rewards/num_token_reward/mean": 1.0, | |
| "rewards/num_token_reward/std": 0.0, | |
| "rewards/process_style_reward/mean": 1.5347757422924042, | |
| "rewards/process_style_reward/std": 0.3837696108222008, | |
| "rewards/table_style_reward/mean": 1.9628105711936952, | |
| "rewards/table_style_reward/std": 0.36233584862202406, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 440.6, | |
| "eval_completions/max_terminated_length": 440.6, | |
| "eval_completions/mean_length": 307.24125, | |
| "eval_completions/mean_terminated_length": 307.24125, | |
| "eval_completions/min_length": 217.76, | |
| "eval_completions/min_terminated_length": 217.76, | |
| "eval_frac_reward_zero_std": 0.71, | |
| "eval_loss": -0.0005738374311476946, | |
| "eval_num_tokens": 34769621.0, | |
| "eval_reward": 7.5656030654907225, | |
| "eval_reward_std": 0.0531126305134967, | |
| "eval_rewards/accuracy_reward/mean": 0.8525, | |
| "eval_rewards/accuracy_reward/std": 0.2918598711490631, | |
| "eval_rewards/chart_type_reward/mean": 0.61375, | |
| "eval_rewards/chart_type_reward/std": 0.466957231760025, | |
| "eval_rewards/format_reward/mean": 2.0, | |
| "eval_rewards/format_reward/std": 0.0, | |
| "eval_rewards/length_think_reward/mean": 1.5, | |
| "eval_rewards/length_think_reward/std": 0.0, | |
| "eval_rewards/num_token_reward/mean": 1.0, | |
| "eval_rewards/num_token_reward/std": 0.0, | |
| "eval_rewards/process_style_reward/mean": 0.8417280244827271, | |
| "eval_rewards/process_style_reward/std": 0.2765642327070236, | |
| "eval_rewards/table_style_reward/mean": 0.7576250004768371, | |
| "eval_rewards/table_style_reward/std": 0.030703708827495575, | |
| "eval_runtime": 331.3654, | |
| "eval_samples_per_second": 0.604, | |
| "eval_steps_per_second": 0.021, | |
| "step": 4000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 6000, | |
| "num_input_tokens_seen": 34769621, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |