File size: 52,458 Bytes
7bb727d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.285714285714286,
  "eval_steps": 500,
  "global_step": 30,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "advantages_interactive_phase": -7.450580596923828e-09,
      "clip_ratio": 0.009601324272807688,
      "clue_civilian_adjusted_reward_mean": 0.04645636805253976,
      "clue_civilian_advantage_adjustment": -0.0340991875030158,
      "clue_civilian_baseline": 0.0340991875030158,
      "clue_civilian_raw_reward_mean": 0.08055555555555557,
      "clue_civilian_votes_avg": 0.5833333333333333,
      "clue_invalid_votes": 2.875,
      "clue_na_votes": 0.375,
      "clue_spy_adjusted_reward": -0.13936911300005017,
      "clue_spy_advantage_adjustment": 0.10229755366661651,
      "clue_spy_baseline": -0.10229755366661651,
      "clue_spy_raw_reward": -0.24166666666666672,
      "clue_spy_votes_received": 3.0,
      "clue_suspicion_potential_psi": 2.4166666666666665,
      "clue_total_valid_votes": 4.75,
      "completion_length": 468.671875,
      "epoch": 0.14285714285714285,
      "grad_norm": 1.5805621147155762,
      "kl": 0.0014362335205078125,
      "learning_rate": 3.5714285714285716e-07,
      "loss": 0.0009,
      "loss_interactive_phase": 0.0008710725232958794,
      "reward": 0.04593749572205778,
      "reward_original_clue_max": 0.13422530971612442,
      "reward_original_clue_mean": -2.210607728011897e-09,
      "reward_original_clue_min": -0.2361905828296588,
      "reward_original_clue_std": 0.15096539176990595,
      "reward_original_decision_max": 2.1412500000000003,
      "reward_original_decision_mean": 0.18375000000000005,
      "reward_original_decision_min": -1.5,
      "reward_original_decision_std": 1.5282191941515817,
      "reward_original_overall_mean": 0.09187499889469616,
      "reward_original_overall_std": 1.1872952864471322,
      "reward_std": 1.1281701095850887,
      "step": 1
    },
    {
      "advantages_interactive_phase": 1.1699739843606949e-08,
      "clip_ratio": 0.01009189459728077,
      "clue_civilian_adjusted_reward_mean": 0.032244541979207814,
      "clue_civilian_advantage_adjustment": -0.04969990246523662,
      "clue_civilian_baseline": 0.04969990246523663,
      "clue_civilian_raw_reward_mean": 0.08194444444444444,
      "clue_civilian_votes_avg": 0.29166666666666663,
      "clue_invalid_votes": 3.375,
      "clue_na_votes": 1.0,
      "clue_spy_adjusted_reward": -0.09673363023500019,
      "clue_spy_advantage_adjustment": 0.14909970309833318,
      "clue_spy_baseline": -0.14909970309833318,
      "clue_spy_raw_reward": -0.24583333333333335,
      "clue_spy_votes_received": 2.75,
      "clue_suspicion_potential_psi": 2.458333333333333,
      "clue_total_valid_votes": 3.625,
      "completion_length": 346.546875,
      "epoch": 0.2857142857142857,
      "grad_norm": 0.9062501192092896,
      "kl": 0.0015811920166015625,
      "learning_rate": 7.142857142857143e-07,
      "loss": 0.0006,
      "loss_interactive_phase": 0.000601769017521292,
      "reward": 0.02179688058128391,
      "reward_original_clue_max": 0.12385744329827358,
      "reward_original_clue_mean": -1.0743441833859987e-09,
      "reward_original_clue_min": -0.2033983952724114,
      "reward_original_clue_std": 0.13295147035012433,
      "reward_original_decision_max": 1.3687500000000001,
      "reward_original_decision_mean": 0.08718750000000001,
      "reward_original_decision_min": -1.5,
      "reward_original_decision_std": 1.2333472361619537,
      "reward_original_overall_mean": 0.04359374946282798,
      "reward_original_overall_std": 1.0888861639122744,
      "reward_std": 0.9135217889060865,
      "step": 2
    },
    {
      "advantages_interactive_phase": -3.91155481338501e-08,
      "clip_ratio": 0.010360982327256352,
      "clue_civilian_adjusted_reward_mean": -0.012963048645367207,
      "clue_civilian_advantage_adjustment": -0.06712971531203388,
      "clue_civilian_baseline": 0.06712971531203388,
      "clue_civilian_raw_reward_mean": 0.05416666666666666,
      "clue_civilian_votes_avg": 0.5,
      "clue_invalid_votes": 4.125,
      "clue_na_votes": 0.25,
      "clue_spy_adjusted_reward": 0.03888914454732575,
      "clue_spy_advantage_adjustment": 0.20138914454732576,
      "clue_spy_baseline": -0.20138914454732576,
      "clue_spy_raw_reward": -0.16249999999999998,
      "clue_spy_votes_received": 2.125,
      "clue_suspicion_potential_psi": 1.6249999999999998,
      "clue_total_valid_votes": 3.625,
      "completion_length": 469.40625,
      "epoch": 0.42857142857142855,
      "grad_norm": 1.7866086959838867,
      "kl": 0.0020046234130859375,
      "learning_rate": 1.0714285714285714e-06,
      "loss": 0.001,
      "loss_interactive_phase": 0.0009609556873328984,
      "reward": -0.07253908214457255,
      "reward_original_clue_max": 0.17909297508117578,
      "reward_original_clue_mean": -3.4719396715260364e-10,
      "reward_original_clue_min": -0.1906668791792172,
      "reward_original_clue_std": 0.148119477615202,
      "reward_original_decision_max": 0.8700000000000001,
      "reward_original_decision_mean": -0.29015625,
      "reward_original_decision_min": -1.5,
      "reward_original_decision_std": 1.012053904965003,
      "reward_original_overall_mean": -0.14507812517359697,
      "reward_original_overall_std": 1.0090200646515894,
      "reward_std": 1.0390325135884786,
      "step": 3
    },
    {
      "advantages_interactive_phase": 1.618172973394394e-08,
      "clip_ratio": 0.011920451768673956,
      "clue_civilian_adjusted_reward_mean": -0.016897605204000857,
      "clue_civilian_advantage_adjustment": -0.05578649409288976,
      "clue_civilian_baseline": 0.05578649409288975,
      "clue_civilian_raw_reward_mean": 0.038888888888888896,
      "clue_civilian_votes_avg": 0.5833333333333333,
      "clue_invalid_votes": 3.75,
      "clue_na_votes": 0.75,
      "clue_spy_adjusted_reward": 0.05069280585926233,
      "clue_spy_advantage_adjustment": 0.16735947252592903,
      "clue_spy_baseline": -0.16735947252592903,
      "clue_spy_raw_reward": -0.11666666666666668,
      "clue_spy_votes_received": 1.75,
      "clue_suspicion_potential_psi": 1.1666666666666665,
      "clue_total_valid_votes": 3.5,
      "completion_length": 349.9765625,
      "epoch": 0.5714285714285714,
      "grad_norm": 0.8659637570381165,
      "kl": 0.0020847320556640625,
      "learning_rate": 1.4285714285714286e-06,
      "loss": 0.0006,
      "loss_interactive_phase": 0.0005855783383594826,
      "reward": -0.09585936751868138,
      "reward_original_clue_max": 0.12609553713896215,
      "reward_original_clue_mean": -2.438185060966991e-09,
      "reward_original_clue_min": -0.1539659667175763,
      "reward_original_clue_std": 0.11102248687112745,
      "reward_original_decision_max": 1.7175,
      "reward_original_decision_mean": -0.3834375,
      "reward_original_decision_min": -1.5,
      "reward_original_decision_std": 1.318500391473726,
      "reward_original_overall_mean": -0.1917187512190925,
      "reward_original_overall_std": 1.0483758882247372,
      "reward_std": 0.8920968279522142,
      "step": 4
    },
    {
      "advantages_interactive_phase": -9.313225746154785e-09,
      "clip_ratio": 0.011668159277178347,
      "clue_civilian_adjusted_reward_mean": 0.012703904980288123,
      "clue_civilian_advantage_adjustment": -0.06090720613082299,
      "clue_civilian_baseline": 0.060907206130823004,
      "clue_civilian_raw_reward_mean": 0.07361111111111111,
      "clue_civilian_votes_avg": 0.6666666666666666,
      "clue_invalid_votes": 3.125,
      "clue_na_votes": 0.0,
      "clue_spy_adjusted_reward": -0.038111721525748175,
      "clue_spy_advantage_adjustment": 0.18272161180758517,
      "clue_spy_baseline": -0.18272161180758517,
      "clue_spy_raw_reward": -0.22083333333333333,
      "clue_spy_votes_received": 2.875,
      "clue_suspicion_potential_psi": 2.208333333333334,
      "clue_total_valid_votes": 4.875,
      "completion_length": 454.71875,
      "epoch": 0.7142857142857143,
      "grad_norm": 1.5932543277740479,
      "kl": 0.0032215118408203125,
      "learning_rate": 1.7857142857142859e-06,
      "loss": 0.0011,
      "loss_interactive_phase": 0.0011206967756152153,
      "reward": 0.020976557431831903,
      "reward_original_clue_max": 0.14661833481616776,
      "reward_original_clue_mean": -1.646220952741459e-09,
      "reward_original_clue_min": -0.24318362303306112,
      "reward_original_clue_std": 0.15305170150846256,
      "reward_original_decision_max": 1.6425,
      "reward_original_decision_mean": 0.08390625000000004,
      "reward_original_decision_min": -1.5,
      "reward_original_decision_std": 1.4370029970655516,
      "reward_original_overall_mean": 0.04195312417688955,
      "reward_original_overall_std": 1.1718515588438811,
      "reward_std": 1.1204482532340438,
      "step": 5
    },
    {
      "advantages_interactive_phase": 1.3998942449688911e-08,
      "clip_ratio": 0.010447208071127534,
      "clue_civilian_adjusted_reward_mean": 0.019708324802807216,
      "clue_civilian_advantage_adjustment": -0.06084723075274834,
      "clue_civilian_baseline": 0.06084723075274835,
      "clue_civilian_raw_reward_mean": 0.08055555555555557,
      "clue_civilian_votes_avg": 0.5833333333333333,
      "clue_invalid_votes": 1.875,
      "clue_na_votes": 1.375,
      "clue_spy_adjusted_reward": -0.05912497942333426,
      "clue_spy_advantage_adjustment": 0.18254168724333242,
      "clue_spy_baseline": -0.18254168724333242,
      "clue_spy_raw_reward": -0.2416666666666667,
      "clue_spy_votes_received": 3.0,
      "clue_suspicion_potential_psi": 2.416666666666667,
      "clue_total_valid_votes": 4.75,
      "completion_length": 334.2265625,
      "epoch": 0.8571428571428571,
      "grad_norm": 0.8248350024223328,
      "kl": 0.0023393630981445312,
      "learning_rate": 2.1428571428571427e-06,
      "loss": 0.0006,
      "loss_interactive_phase": 0.0005622203752864152,
      "reward": 0.0834375066860392,
      "reward_original_clue_max": 0.1274395314956929,
      "reward_original_clue_mean": -1.253728150484891e-09,
      "reward_original_clue_min": -0.188569575974057,
      "reward_original_clue_std": 0.12904785603086397,
      "reward_original_decision_max": 2.1412500000000003,
      "reward_original_decision_mean": 0.33375000000000005,
      "reward_original_decision_min": -1.425,
      "reward_original_decision_std": 1.405177486682367,
      "reward_original_overall_mean": 0.16687499937313596,
      "reward_original_overall_std": 1.1144284606169126,
      "reward_std": 0.9262521229778956,
      "step": 6
    },
    {
      "advantages_interactive_phase": 2.421438694000244e-08,
      "clip_ratio": 0.011843746062368155,
      "clue_civilian_adjusted_reward_mean": 0.06142050400483314,
      "clue_civilian_advantage_adjustment": -0.09830171821738909,
      "clue_civilian_baseline": 0.0983017182173891,
      "clue_civilian_raw_reward_mean": 0.15972222222222224,
      "clue_civilian_votes_avg": 0.5833333333333333,
      "clue_invalid_votes": 0.625,
      "clue_na_votes": 0.25,
      "clue_spy_adjusted_reward": -0.18426152068943952,
      "clue_spy_advantage_adjustment": 0.2949051459772272,
      "clue_spy_baseline": -0.2949051459772272,
      "clue_spy_raw_reward": -0.47916666666666674,
      "clue_spy_votes_received": 5.375,
      "clue_suspicion_potential_psi": 4.791666666666667,
      "clue_total_valid_votes": 7.125,
      "completion_length": 403.921875,
      "epoch": 1.0,
      "grad_norm": 1.83256995677948,
      "kl": 0.008373260498046875,
      "learning_rate": 2.5e-06,
      "loss": 0.0082,
      "loss_interactive_phase": 0.008202007971704006,
      "reward": 0.33738282406500975,
      "reward_original_clue_max": 0.16317140174540612,
      "reward_original_clue_mean": -2.1687350280430798e-09,
      "reward_original_clue_min": -0.2651790850966792,
      "reward_original_clue_std": 0.1737400336164854,
      "reward_original_decision_max": 2.1412500000000003,
      "reward_original_decision_mean": 1.34953125,
      "reward_original_decision_min": -1.05,
      "reward_original_decision_std": 1.2417606105114989,
      "reward_original_overall_mean": 0.6747656239156326,
      "reward_original_overall_std": 1.2304992480944565,
      "reward_std": 1.149772112760493,
      "step": 7
    },
    {
      "advantages_interactive_phase": 7.101334631443024e-09,
      "clip_ratio": 0.010323125752620399,
      "clue_civilian_adjusted_reward_mean": 0.014969077351818358,
      "clue_civilian_advantage_adjustment": -0.13919758931484832,
      "clue_civilian_baseline": 0.13919758931484832,
      "clue_civilian_raw_reward_mean": 0.1541666666666667,
      "clue_civilian_votes_avg": 0.6249999999999999,
      "clue_invalid_votes": 0.5,
      "clue_na_votes": 0.375,
      "clue_spy_adjusted_reward": -0.0449072601736762,
      "clue_spy_advantage_adjustment": 0.41759273982632383,
      "clue_spy_baseline": -0.41759273982632383,
      "clue_spy_raw_reward": -0.4625,
      "clue_spy_votes_received": 5.25,
      "clue_suspicion_potential_psi": 4.625,
      "clue_total_valid_votes": 7.125,
      "completion_length": 296.3125,
      "epoch": 1.1428571428571428,
      "grad_norm": 0.8603692054748535,
      "kl": 0.00751495361328125,
      "learning_rate": 2.8571428571428573e-06,
      "loss": 0.0006,
      "loss_interactive_phase": 0.0005805297987535596,
      "reward": 0.32882812679327855,
      "reward_original_clue_max": 0.14714505206208714,
      "reward_original_clue_mean": -7.029555287255823e-09,
      "reward_original_clue_min": -0.25208323488394496,
      "reward_original_clue_std": 0.1584445317000752,
      "reward_original_decision_max": 2.49,
      "reward_original_decision_mean": 1.3153125,
      "reward_original_decision_min": -1.05,
      "reward_original_decision_std": 1.3998854722184924,
      "reward_original_overall_mean": 0.6576562464852225,
      "reward_original_overall_std": 1.2712911404891047,
      "reward_std": 1.006177807237865,
      "step": 8
    },
    {
      "advantages_interactive_phase": 8.754432201385498e-08,
      "clip_ratio": 0.00764932727906853,
      "clue_civilian_adjusted_reward_mean": -0.04392835440414705,
      "clue_civilian_advantage_adjustment": -0.12309502107081373,
      "clue_civilian_baseline": 0.12309502107081373,
      "clue_civilian_raw_reward_mean": 0.07916666666666668,
      "clue_civilian_votes_avg": 1.25,
      "clue_invalid_votes": 0.375,
      "clue_na_votes": 0.25,
      "clue_spy_adjusted_reward": 0.13178503837842287,
      "clue_spy_advantage_adjustment": 0.36928503837842286,
      "clue_spy_baseline": -0.36928503837842286,
      "clue_spy_raw_reward": -0.23750000000000002,
      "clue_spy_votes_received": 3.625,
      "clue_suspicion_potential_psi": 2.375,
      "clue_total_valid_votes": 7.375,
      "completion_length": 383.28125,
      "epoch": 1.2857142857142856,
      "grad_norm": 1.2030161619186401,
      "kl": 0.021331787109375,
      "learning_rate": 3.2142857142857147e-06,
      "loss": 0.0013,
      "loss_interactive_phase": 0.0013305692409630865,
      "reward": 0.15667972972003488,
      "reward_original_clue_max": 0.31233062085321145,
      "reward_original_clue_mean": -6.208504575678717e-09,
      "reward_original_clue_min": -0.36730352417905254,
      "reward_original_clue_std": 0.2591782484917472,
      "reward_original_decision_max": 1.3687500000000001,
      "reward_original_decision_mean": 0.6267187500000002,
      "reward_original_decision_min": -0.7012499999999999,
      "reward_original_decision_std": 0.7808322541858488,
      "reward_original_overall_mean": 0.31335937189574775,
      "reward_original_overall_std": 0.9976451254390301,
      "reward_std": 0.8997144106280117,
      "step": 9
    },
    {
      "advantages_interactive_phase": -5.820766091346741e-10,
      "clip_ratio": 0.009187435440253466,
      "clue_civilian_adjusted_reward_mean": 0.03822758988085955,
      "clue_civilian_advantage_adjustment": -0.11732796567469601,
      "clue_civilian_baseline": 0.11732796567469601,
      "clue_civilian_raw_reward_mean": 0.15555555555555556,
      "clue_civilian_votes_avg": 0.7083333333333333,
      "clue_invalid_votes": 0.125,
      "clue_na_votes": 0.375,
      "clue_spy_adjusted_reward": -0.11468278722393674,
      "clue_spy_advantage_adjustment": 0.35198387944273,
      "clue_spy_baseline": -0.35198387944273,
      "clue_spy_raw_reward": -0.4666666666666667,
      "clue_spy_votes_received": 5.375,
      "clue_suspicion_potential_psi": 4.666666666666667,
      "clue_total_valid_votes": 7.5,
      "completion_length": 267.59375,
      "epoch": 1.4285714285714286,
      "grad_norm": 1.3427386283874512,
      "kl": 0.0283355712890625,
      "learning_rate": 3.5714285714285718e-06,
      "loss": 0.002,
      "loss_interactive_phase": 0.0019903209613403305,
      "reward": 0.34910156111012686,
      "reward_original_clue_max": 0.21486971111570505,
      "reward_original_clue_mean": -4.395339522032804e-09,
      "reward_original_clue_min": -0.3329915751254489,
      "reward_original_clue_std": 0.2213489408298035,
      "reward_original_decision_max": 2.1412500000000003,
      "reward_original_decision_mean": 1.39640625,
      "reward_original_decision_min": -0.052499999999999936,
      "reward_original_decision_std": 0.777539948847112,
      "reward_original_overall_mean": 0.6982031228023303,
      "reward_original_overall_std": 1.1362766329525933,
      "reward_std": 0.8716088880016916,
      "step": 10
    },
    {
      "advantages_interactive_phase": 7.450580596923828e-09,
      "clip_ratio": 0.007512057200074196,
      "clue_civilian_adjusted_reward_mean": 0.029820853200277782,
      "clue_civilian_advantage_adjustment": -0.14240136902194445,
      "clue_civilian_baseline": 0.14240136902194445,
      "clue_civilian_raw_reward_mean": 0.17222222222222225,
      "clue_civilian_votes_avg": 0.45833333333333337,
      "clue_invalid_votes": 0.375,
      "clue_na_votes": 0.625,
      "clue_spy_adjusted_reward": -0.08946257515700651,
      "clue_spy_advantage_adjustment": 0.42720409150966016,
      "clue_spy_baseline": -0.42720409150966016,
      "clue_spy_raw_reward": -0.5166666666666667,
      "clue_spy_votes_received": 5.625,
      "clue_suspicion_potential_psi": 5.166666666666667,
      "clue_total_valid_votes": 7.0,
      "completion_length": 345.90625,
      "epoch": 1.5714285714285714,
      "grad_norm": 1.4723293781280518,
      "kl": 0.03619384765625,
      "learning_rate": 3.928571428571429e-06,
      "loss": 0.0017,
      "loss_interactive_phase": 0.0016628647717880085,
      "reward": 0.37558594025302955,
      "reward_original_clue_max": 0.16687582015889904,
      "reward_original_clue_mean": -3.8890432892298155e-09,
      "reward_original_clue_min": -0.2556842087822944,
      "reward_original_clue_std": 0.1723200812438524,
      "reward_original_decision_max": 2.49,
      "reward_original_decision_mean": 1.50234375,
      "reward_original_decision_min": -0.1274999999999999,
      "reward_original_decision_std": 0.9362325016572549,
      "reward_original_overall_mean": 0.7511718730554785,
      "reward_original_overall_std": 1.2049933860634257,
      "reward_std": 1.0033885483907903,
      "step": 11
    },
    {
      "advantages_interactive_phase": 4.249159246683121e-09,
      "clip_ratio": 0.00809995060262736,
      "clue_civilian_adjusted_reward_mean": 0.022188266754369242,
      "clue_civilian_advantage_adjustment": -0.187533955467853,
      "clue_civilian_baseline": 0.187533955467853,
      "clue_civilian_raw_reward_mean": 0.20972222222222225,
      "clue_civilian_votes_avg": 0.33333333333333337,
      "clue_invalid_votes": 0.125,
      "clue_na_votes": 0.25,
      "clue_spy_adjusted_reward": -0.0665648172282207,
      "clue_spy_advantage_adjustment": 0.5626018494384459,
      "clue_spy_baseline": -0.5626018494384459,
      "clue_spy_raw_reward": -0.6291666666666668,
      "clue_spy_votes_received": 6.625,
      "clue_suspicion_potential_psi": 6.291666666666667,
      "clue_total_valid_votes": 7.625,
      "completion_length": 252.8515625,
      "epoch": 1.7142857142857144,
      "grad_norm": 0.6337663531303406,
      "kl": 0.0308074951171875,
      "learning_rate": 4.2857142857142855e-06,
      "loss": 0.0016,
      "loss_interactive_phase": 0.0016085498355096206,
      "reward": 0.4791796885642602,
      "reward_original_clue_max": 0.13644402464982577,
      "reward_original_clue_mean": -4.2412782478781774e-09,
      "reward_original_clue_min": -0.2099872417903439,
      "reward_original_clue_std": 0.13869593786155765,
      "reward_original_decision_max": 2.49,
      "reward_original_decision_mean": 1.91671875,
      "reward_original_decision_min": 0.2962500000000001,
      "reward_original_decision_std": 0.7221503219446256,
      "reward_original_overall_mean": 0.9583593728793611,
      "reward_original_overall_std": 1.2329093618898852,
      "reward_std": 0.8671444384671382,
      "step": 12
    },
    {
      "advantages_interactive_phase": 1.862645149230957e-08,
      "clip_ratio": 0.0037154321908019483,
      "clue_civilian_adjusted_reward_mean": -0.0063334161387741394,
      "clue_civilian_advantage_adjustment": -0.17300008280544082,
      "clue_civilian_baseline": 0.1730000828054408,
      "clue_civilian_raw_reward_mean": 0.16666666666666663,
      "clue_civilian_votes_avg": 0.75,
      "clue_invalid_votes": 0.0,
      "clue_na_votes": 0.0,
      "clue_spy_adjusted_reward": 0.019000245031720445,
      "clue_spy_advantage_adjustment": 0.5190002450317205,
      "clue_spy_baseline": -0.5190002450317205,
      "clue_spy_raw_reward": -0.5,
      "clue_spy_votes_received": 5.75,
      "clue_suspicion_potential_psi": 5.000000000000001,
      "clue_total_valid_votes": 8.0,
      "completion_length": 337.15625,
      "epoch": 1.8571428571428572,
      "grad_norm": 0.6522341370582581,
      "kl": 0.060089111328125,
      "learning_rate": 4.642857142857144e-06,
      "loss": 0.0026,
      "loss_interactive_phase": 0.002614069191622548,
      "reward": 0.3841406341016882,
      "reward_original_clue_max": 0.2507983958494952,
      "reward_original_clue_mean": -8.461504942894327e-10,
      "reward_original_clue_min": -0.3617781823290228,
      "reward_original_clue_std": 0.23592987163167928,
      "reward_original_decision_max": 1.6425,
      "reward_original_decision_mean": 1.5365625,
      "reward_original_decision_min": 1.21875,
      "reward_original_decision_std": 0.183489132426828,
      "reward_original_overall_mean": 0.7682812495769249,
      "reward_original_overall_std": 1.0923944210890442,
      "reward_std": 0.6130125272216156,
      "step": 13
    },
    {
      "advantages_interactive_phase": 1.6996636986732483e-08,
      "clip_ratio": 0.00847951346077025,
      "clue_civilian_adjusted_reward_mean": -0.008037120541629665,
      "clue_civilian_advantage_adjustment": -0.1774815649860741,
      "clue_civilian_baseline": 0.1774815649860741,
      "clue_civilian_raw_reward_mean": 0.16944444444444443,
      "clue_civilian_votes_avg": 0.6666666666666667,
      "clue_invalid_votes": 0.0,
      "clue_na_votes": 0.25,
      "clue_spy_adjusted_reward": 0.024111363006878495,
      "clue_spy_advantage_adjustment": 0.5324446963402119,
      "clue_spy_baseline": -0.5324446963402119,
      "clue_spy_raw_reward": -0.5083333333333333,
      "clue_spy_votes_received": 5.75,
      "clue_suspicion_potential_psi": 5.083333333333334,
      "clue_total_valid_votes": 7.75,
      "completion_length": 263.8359375,
      "epoch": 2.0,
      "grad_norm": 0.6224793195724487,
      "kl": 0.05419921875,
      "learning_rate": 5e-06,
      "loss": 0.0029,
      "loss_interactive_phase": 0.0029011927836108953,
      "reward": 0.388828133584693,
      "reward_original_clue_max": 0.2085423028111688,
      "reward_original_clue_mean": 3.4549737831963967e-10,
      "reward_original_clue_min": -0.2378369221284988,
      "reward_original_clue_std": 0.17641129318052418,
      "reward_original_decision_max": 2.1412500000000003,
      "reward_original_decision_mean": 1.5553125000000003,
      "reward_original_decision_min": 0.3712500000000002,
      "reward_original_decision_std": 0.668291458945668,
      "reward_original_overall_mean": 0.777656250172749,
      "reward_original_overall_std": 1.157252940433482,
      "reward_std": 0.8331386712800759,
      "step": 14
    },
    {
      "advantages_interactive_phase": 1.6763806343078613e-08,
      "clip_ratio": 0.003465894202236086,
      "clue_civilian_adjusted_reward_mean": 0.023268842590345305,
      "clue_civilian_advantage_adjustment": -0.18367560185409915,
      "clue_civilian_baseline": 0.18367560185409915,
      "clue_civilian_raw_reward_mean": 0.20694444444444446,
      "clue_civilian_votes_avg": 0.41666666666666663,
      "clue_invalid_votes": 0.125,
      "clue_na_votes": 0.0,
      "clue_spy_adjusted_reward": -0.06980651791005336,
      "clue_spy_advantage_adjustment": 0.55102681542328,
      "clue_spy_baseline": -0.55102681542328,
      "clue_spy_raw_reward": -0.6208333333333333,
      "clue_spy_votes_received": 6.625,
      "clue_suspicion_potential_psi": 6.208333333333333,
      "clue_total_valid_votes": 7.875,
      "completion_length": 355.0625,
      "epoch": 2.142857142857143,
      "grad_norm": 0.7302604913711548,
      "kl": 0.08294677734375,
      "learning_rate": 5.357142857142857e-06,
      "loss": 0.0034,
      "loss_interactive_phase": 0.0033593956904951483,
      "reward": 0.4744921964982146,
      "reward_original_clue_max": 0.15940368854312534,
      "reward_original_clue_mean": 2.465245637836566e-09,
      "reward_original_clue_min": -0.24915193754399267,
      "reward_original_clue_std": 0.16536223128526092,
      "reward_original_decision_max": 2.06625,
      "reward_original_decision_mean": 1.8979687500000002,
      "reward_original_decision_min": 1.14375,
      "reward_original_decision_std": 0.34843519072601103,
      "reward_original_overall_mean": 0.9489843762326229,
      "reward_original_overall_std": 1.1879908640849528,
      "reward_std": 0.7276260579460828,
      "step": 15
    },
    {
      "advantages_interactive_phase": -5.820766091346741e-09,
      "clip_ratio": 0.006389049158315174,
      "clue_civilian_adjusted_reward_mean": -0.046890086079656224,
      "clue_civilian_advantage_adjustment": -0.1635567527463229,
      "clue_civilian_baseline": 0.1635567527463229,
      "clue_civilian_raw_reward_mean": 0.11666666666666668,
      "clue_civilian_votes_avg": 0.625,
      "clue_invalid_votes": 0.125,
      "clue_na_votes": 1.875,
      "clue_spy_adjusted_reward": 0.14067026040217548,
      "clue_spy_advantage_adjustment": 0.49067026040217543,
      "clue_spy_baseline": -0.49067026040217543,
      "clue_spy_raw_reward": -0.35000000000000003,
      "clue_spy_votes_received": 4.125,
      "clue_suspicion_potential_psi": 3.5,
      "clue_total_valid_votes": 6.0,
      "completion_length": 298.203125,
      "epoch": 2.2857142857142856,
      "grad_norm": 18.787372589111328,
      "kl": 0.0643310546875,
      "learning_rate": 5.7142857142857145e-06,
      "loss": 0.0227,
      "loss_interactive_phase": 0.02273198706097901,
      "reward": 0.2448046847248174,
      "reward_original_clue_max": 0.31508776732274035,
      "reward_original_clue_mean": 5.408017063682213e-10,
      "reward_original_clue_min": -0.296307593000221,
      "reward_original_clue_std": 0.24153634073282915,
      "reward_original_decision_max": 1.7925,
      "reward_original_decision_mean": 0.97921875,
      "reward_original_decision_min": 0.37125000000000014,
      "reward_original_decision_std": 0.4794102853203757,
      "reward_original_overall_mean": 0.4896093752704009,
      "reward_original_overall_std": 0.942394237518095,
      "reward_std": 0.732489835712951,
      "step": 16
    },
    {
      "advantages_interactive_phase": 3.91155481338501e-08,
      "clip_ratio": 0.003673899220302701,
      "clue_civilian_adjusted_reward_mean": 0.04772389224517483,
      "clue_civilian_advantage_adjustment": -0.16199832997704738,
      "clue_civilian_baseline": 0.16199832997704738,
      "clue_civilian_raw_reward_mean": 0.2097222222222222,
      "clue_civilian_votes_avg": 0.20833333333333331,
      "clue_invalid_votes": 0.375,
      "clue_na_votes": 0.5,
      "clue_spy_adjusted_reward": -0.1431716761674981,
      "clue_spy_advantage_adjustment": 0.4859949904991686,
      "clue_spy_baseline": -0.4859949904991686,
      "clue_spy_raw_reward": -0.6291666666666668,
      "clue_spy_votes_received": 6.5,
      "clue_suspicion_potential_psi": 6.291666666666667,
      "clue_total_valid_votes": 7.125,
      "completion_length": 363.796875,
      "epoch": 2.4285714285714284,
      "grad_norm": 1.1137752532958984,
      "kl": 0.099365234375,
      "learning_rate": 6.071428571428571e-06,
      "loss": 0.0042,
      "loss_interactive_phase": 0.0041947553982026875,
      "reward": 0.4659375195932758,
      "reward_original_clue_max": 0.13827404245616234,
      "reward_original_clue_mean": 1.420066000554221e-10,
      "reward_original_clue_min": -0.25038849304515226,
      "reward_original_clue_std": 0.16193395395069143,
      "reward_original_decision_max": 2.1412500000000003,
      "reward_original_decision_mean": 1.86375,
      "reward_original_decision_min": 0.7200000000000001,
      "reward_original_decision_std": 0.50161616427093,
      "reward_original_overall_mean": 0.9318750000710034,
      "reward_original_overall_std": 1.1870721598954623,
      "reward_std": 0.8607973168537828,
      "step": 17
    },
    {
      "advantages_interactive_phase": 2.837623469531536e-09,
      "clip_ratio": 0.00735139346215874,
      "clue_civilian_adjusted_reward_mean": -0.0017000992579862158,
      "clue_civilian_advantage_adjustment": -0.19475565481354176,
      "clue_civilian_baseline": 0.19475565481354176,
      "clue_civilian_raw_reward_mean": 0.19305555555555554,
      "clue_civilian_votes_avg": 0.20833333333333331,
      "clue_invalid_votes": 0.5,
      "clue_na_votes": 0.875,
      "clue_spy_adjusted_reward": 0.0051002900798029355,
      "clue_spy_advantage_adjustment": 0.5842669567464696,
      "clue_spy_baseline": -0.5842669567464696,
      "clue_spy_raw_reward": -0.5791666666666666,
      "clue_spy_votes_received": 6.0,
      "clue_suspicion_potential_psi": 5.791666666666667,
      "clue_total_valid_votes": 6.625,
      "completion_length": 296.6875,
      "epoch": 2.571428571428571,
      "grad_norm": 0.6697026491165161,
      "kl": 0.083984375,
      "learning_rate": 6.4285714285714295e-06,
      "loss": 0.0038,
      "loss_interactive_phase": 0.003771388641325757,
      "reward": 0.417656250937927,
      "reward_original_clue_max": 0.112484931462635,
      "reward_original_clue_mean": -1.923538928399332e-09,
      "reward_original_clue_min": -0.12320458815006033,
      "reward_original_clue_std": 0.09749921717912478,
      "reward_original_decision_max": 2.49,
      "reward_original_decision_mean": 1.6706250000000002,
      "reward_original_decision_min": -0.27749999999999997,
      "reward_original_decision_std": 1.025463516531797,
      "reward_original_overall_mean": 0.8353124990382306,
      "reward_original_overall_std": 1.2562141688103865,
      "reward_std": 0.9142232616213656,
      "step": 18
    },
    {
      "advantages_interactive_phase": 2.421438694000244e-08,
      "clip_ratio": 0.00690445041982457,
      "clue_civilian_adjusted_reward_mean": -0.02088481382776897,
      "clue_civilian_advantage_adjustment": -0.19588481382776896,
      "clue_civilian_baseline": 0.19588481382776896,
      "clue_civilian_raw_reward_mean": 0.17500000000000002,
      "clue_civilian_votes_avg": 0.375,
      "clue_invalid_votes": 0.125,
      "clue_na_votes": 1.125,
      "clue_spy_adjusted_reward": 0.06265442930658618,
      "clue_spy_advantage_adjustment": 0.5876544293065862,
      "clue_spy_baseline": -0.5876544293065862,
      "clue_spy_raw_reward": -0.525,
      "clue_spy_votes_received": 5.625,
      "clue_suspicion_potential_psi": 5.249999999999999,
      "clue_total_valid_votes": 6.75,
      "completion_length": 345.75,
      "epoch": 2.7142857142857144,
      "grad_norm": 1.3586255311965942,
      "kl": 0.135986328125,
      "learning_rate": 6.785714285714287e-06,
      "loss": 0.006,
      "loss_interactive_phase": 0.00598463078495115,
      "reward": 0.38964844884614847,
      "reward_original_clue_max": 0.19217094683455577,
      "reward_original_clue_mean": -3.044180179635758e-09,
      "reward_original_clue_min": -0.18790133135573855,
      "reward_original_clue_std": 0.15165388026680496,
      "reward_original_decision_max": 2.1412500000000003,
      "reward_original_decision_mean": 1.55859375,
      "reward_original_decision_min": 0.3712500000000002,
      "reward_original_decision_std": 0.6276860023276215,
      "reward_original_overall_mean": 0.77929687347791,
      "reward_original_overall_std": 1.1151671426286367,
      "reward_std": 0.8916601099963023,
      "step": 19
    },
    {
      "advantages_interactive_phase": 1.1408701539039612e-08,
      "clip_ratio": 0.0066332001879345626,
      "clue_civilian_adjusted_reward_mean": -0.014852289498390544,
      "clue_civilian_advantage_adjustment": -0.15790784505394606,
      "clue_civilian_baseline": 0.1579078450539461,
      "clue_civilian_raw_reward_mean": 0.14305555555555555,
      "clue_civilian_votes_avg": 0.7083333333333334,
      "clue_invalid_votes": 0.0,
      "clue_na_votes": 0.875,
      "clue_spy_adjusted_reward": 0.04455685833978199,
      "clue_spy_advantage_adjustment": 0.4737235250064487,
      "clue_spy_baseline": -0.47372352500644876,
      "clue_spy_raw_reward": -0.4291666666666667,
      "clue_spy_votes_received": 5.0,
      "clue_suspicion_potential_psi": 4.291666666666667,
      "clue_total_valid_votes": 7.125,
      "completion_length": 291.84375,
      "epoch": 2.857142857142857,
      "grad_norm": 0.7186980247497559,
      "kl": 0.120361328125,
      "learning_rate": 7.1428571428571436e-06,
      "loss": 0.0053,
      "loss_interactive_phase": 0.005272059410344809,
      "reward": 0.321093755069639,
      "reward_original_clue_max": 0.23781143176053088,
      "reward_original_clue_mean": -2.538847402276667e-09,
      "reward_original_clue_min": -0.3372735295858061,
      "reward_original_clue_std": 0.22314844267377926,
      "reward_original_decision_max": 2.1412500000000003,
      "reward_original_decision_mean": 1.2843750000000003,
      "reward_original_decision_min": 0.44625000000000015,
      "reward_original_decision_std": 0.682584747416193,
      "reward_original_overall_mean": 0.6421874987305765,
      "reward_original_overall_std": 1.0984288921667873,
      "reward_std": 0.8115738822191327,
      "step": 20
    },
    {
      "advantages_interactive_phase": 1.862645149230957e-09,
      "clip_ratio": 0.011862117098644376,
      "clue_civilian_adjusted_reward_mean": -0.011171407247594729,
      "clue_civilian_advantage_adjustment": -0.1500602961364836,
      "clue_civilian_baseline": 0.1500602961364836,
      "clue_civilian_raw_reward_mean": 0.1388888888888889,
      "clue_civilian_votes_avg": 0.4583333333333333,
      "clue_invalid_votes": 0.125,
      "clue_na_votes": 1.875,
      "clue_spy_adjusted_reward": 0.03351421315180729,
      "clue_spy_advantage_adjustment": 0.450180879818474,
      "clue_spy_baseline": -0.450180879818474,
      "clue_spy_raw_reward": -0.41666666666666674,
      "clue_spy_votes_received": 4.625,
      "clue_suspicion_potential_psi": 4.166666666666667,
      "clue_total_valid_votes": 6.0,
      "completion_length": 341.484375,
      "epoch": 3.0,
      "grad_norm": 2.092695713043213,
      "kl": 0.1983642578125,
      "learning_rate": 7.500000000000001e-06,
      "loss": 0.0115,
      "loss_interactive_phase": 0.011498338542878628,
      "reward": 0.29777343789438654,
      "reward_original_clue_max": 0.180558073555015,
      "reward_original_clue_mean": -2.147744222486675e-09,
      "reward_original_clue_min": -0.16238193431746906,
      "reward_original_clue_std": 0.138092222998997,
      "reward_original_decision_max": 1.7925,
      "reward_original_decision_mean": 1.19109375,
      "reward_original_decision_min": -0.8249999999999998,
      "reward_original_decision_std": 1.0596252090888885,
      "reward_original_overall_mean": 0.5955468739261279,
      "reward_original_overall_std": 1.0734861999424956,
      "reward_std": 1.0712655663327704,
      "step": 21
    },
    {
      "advantages_interactive_phase": 2.3166649043560028e-08,
      "clip_ratio": 0.007374117994913831,
      "clue_civilian_adjusted_reward_mean": -0.013323967362128385,
      "clue_civilian_advantage_adjustment": -0.1397128562510173,
      "clue_civilian_baseline": 0.1397128562510173,
      "clue_civilian_raw_reward_mean": 0.1263888888888889,
      "clue_civilian_votes_avg": 0.8333333333333333,
      "clue_invalid_votes": 0.25,
      "clue_na_votes": 0.625,
      "clue_spy_adjusted_reward": 0.03997188807820764,
      "clue_spy_advantage_adjustment": 0.4191385547448744,
      "clue_spy_baseline": -0.4191385547448744,
      "clue_spy_raw_reward": -0.3791666666666667,
      "clue_spy_votes_received": 4.625,
      "clue_suspicion_potential_psi": 3.7916666666666665,
      "clue_total_valid_votes": 7.125,
      "completion_length": 314.3828125,
      "epoch": 3.142857142857143,
      "grad_norm": 0.7247025966644287,
      "kl": 0.14990234375,
      "learning_rate": 7.857142857142858e-06,
      "loss": 0.0065,
      "loss_interactive_phase": 0.006495335896033794,
      "reward": 0.27199219820781345,
      "reward_original_clue_max": 0.20973680630488367,
      "reward_original_clue_mean": -3.5020443879887897e-09,
      "reward_original_clue_min": -0.25500620188084894,
      "reward_original_clue_std": 0.17895936766287218,
      "reward_original_decision_max": 2.06625,
      "reward_original_decision_mean": 1.0879687500000002,
      "reward_original_decision_min": -0.55125,
      "reward_original_decision_std": 1.0594716886601967,
      "reward_original_overall_mean": 0.5439843732489779,
      "reward_original_overall_std": 1.143192035744574,
      "reward_std": 0.8779618775670658,
      "step": 22
    },
    {
      "advantages_interactive_phase": 1.210719347000122e-08,
      "clip_ratio": 0.009816105710342526,
      "clue_civilian_adjusted_reward_mean": 0.012905466579270795,
      "clue_civilian_advantage_adjustment": -0.13431675564295142,
      "clue_civilian_baseline": 0.13431675564295142,
      "clue_civilian_raw_reward_mean": 0.14722222222222223,
      "clue_civilian_votes_avg": 0.8333333333333334,
      "clue_invalid_votes": 0.25,
      "clue_na_votes": 0.0,
      "clue_spy_adjusted_reward": -0.03871641069401783,
      "clue_spy_advantage_adjustment": 0.4029502559726489,
      "clue_spy_baseline": -0.4029502559726489,
      "clue_spy_raw_reward": -0.44166666666666665,
      "clue_spy_votes_received": 5.25,
      "clue_suspicion_potential_psi": 4.416666666666667,
      "clue_total_valid_votes": 7.75,
      "completion_length": 349.46875,
      "epoch": 3.2857142857142856,
      "grad_norm": 1.495042085647583,
      "kl": 0.20458984375,
      "learning_rate": 8.214285714285714e-06,
      "loss": 0.0098,
      "loss_interactive_phase": 0.009842151892371476,
      "reward": 0.3264843803688339,
      "reward_original_clue_max": 0.18625860761540888,
      "reward_original_clue_mean": -2.7390513624575563e-09,
      "reward_original_clue_min": -0.25603524557359103,
      "reward_original_clue_std": 0.16967942428353516,
      "reward_original_decision_max": 2.49,
      "reward_original_decision_mean": 1.3059375000000002,
      "reward_original_decision_min": -0.62625,
      "reward_original_decision_std": 1.2001143050462124,
      "reward_original_overall_mean": 0.6529687486304744,
      "reward_original_overall_std": 1.2300136329570874,
      "reward_std": 1.0827139959652952,
      "step": 23
    },
    {
      "advantages_interactive_phase": 2.0721927285194397e-08,
      "clip_ratio": 0.007756809587590396,
      "clue_civilian_adjusted_reward_mean": -0.04694887868266013,
      "clue_civilian_advantage_adjustment": -0.11500443423821569,
      "clue_civilian_baseline": 0.11500443423821569,
      "clue_civilian_raw_reward_mean": 0.06805555555555555,
      "clue_civilian_votes_avg": 1.3333333333333333,
      "clue_invalid_votes": 0.5,
      "clue_na_votes": 0.125,
      "clue_spy_adjusted_reward": 0.14084662848473525,
      "clue_spy_advantage_adjustment": 0.345013295151402,
      "clue_spy_baseline": -0.345013295151402,
      "clue_spy_raw_reward": -0.20416666666666672,
      "clue_spy_votes_received": 3.375,
      "clue_suspicion_potential_psi": 2.041666666666667,
      "clue_total_valid_votes": 7.375,
      "completion_length": 362.4375,
      "epoch": 3.4285714285714284,
      "grad_norm": 0.721821129322052,
      "kl": 0.15087890625,
      "learning_rate": 8.571428571428571e-06,
      "loss": 0.0064,
      "loss_interactive_phase": 0.006402550439815968,
      "reward": 0.12550782238826086,
      "reward_original_clue_max": 0.2615399846222246,
      "reward_original_clue_mean": -1.8908112909589436e-09,
      "reward_original_clue_min": -0.29680578730801793,
      "reward_original_clue_std": 0.21245343365808805,
      "reward_original_decision_max": 2.06625,
      "reward_original_decision_mean": 0.50203125,
      "reward_original_decision_min": -1.125,
      "reward_original_decision_std": 1.2899370730874167,
      "reward_original_overall_mean": 0.25101562405459443,
      "reward_original_overall_std": 1.1078002049178977,
      "reward_std": 0.8967364060953804,
      "step": 24
    },
    {
      "advantages_interactive_phase": 1.1175870895385742e-08,
      "clip_ratio": 0.0030952056986279786,
      "clue_civilian_adjusted_reward_mean": 0.05641245992546336,
      "clue_civilian_advantage_adjustment": -0.1352542067412033,
      "clue_civilian_baseline": 0.1352542067412033,
      "clue_civilian_raw_reward_mean": 0.19166666666666668,
      "clue_civilian_votes_avg": 0.5,
      "clue_invalid_votes": 0.25,
      "clue_na_votes": 0.0,
      "clue_spy_adjusted_reward": -0.16923738346885245,
      "clue_spy_advantage_adjustment": 0.40576261653114754,
      "clue_spy_baseline": -0.40576261653114754,
      "clue_spy_raw_reward": -0.575,
      "clue_spy_votes_received": 6.25,
      "clue_suspicion_potential_psi": 5.75,
      "clue_total_valid_votes": 7.75,
      "completion_length": 378.28125,
      "epoch": 3.571428571428571,
      "grad_norm": 1.2147719860076904,
      "kl": 0.2017822265625,
      "learning_rate": 8.92857142857143e-06,
      "loss": 0.0086,
      "loss_interactive_phase": 0.008552196552045643,
      "reward": 0.4324218803571566,
      "reward_original_clue_max": 0.17644798867648273,
      "reward_original_clue_mean": -9.231155991516748e-10,
      "reward_original_clue_min": -0.3267729122198718,
      "reward_original_clue_std": 0.20414503627659822,
      "reward_original_decision_max": 2.49,
      "reward_original_decision_mean": 1.7296875000000003,
      "reward_original_decision_min": 0.645,
      "reward_original_decision_std": 0.6397808895425074,
      "reward_original_overall_mean": 0.8648437495384423,
      "reward_original_overall_std": 1.219408722443206,
      "reward_std": 0.876965590677074,
      "step": 25
    },
    {
      "advantages_interactive_phase": 2.421438694000244e-08,
      "clip_ratio": 0.005824881722219288,
      "clue_civilian_adjusted_reward_mean": -0.007093781877094507,
      "clue_civilian_advantage_adjustment": -0.13209378187709453,
      "clue_civilian_baseline": 0.13209378187709453,
      "clue_civilian_raw_reward_mean": 0.125,
      "clue_civilian_votes_avg": 1.0,
      "clue_invalid_votes": 0.25,
      "clue_na_votes": 0.0,
      "clue_spy_adjusted_reward": 0.021281343083350023,
      "clue_spy_advantage_adjustment": 0.39628134308335006,
      "clue_spy_baseline": -0.39628134308335006,
      "clue_spy_raw_reward": -0.375,
      "clue_spy_votes_received": 4.75,
      "clue_suspicion_potential_psi": 3.7499999999999996,
      "clue_total_valid_votes": 7.75,
      "completion_length": 333.84375,
      "epoch": 3.7142857142857144,
      "grad_norm": 0.8488478660583496,
      "kl": 0.18359375,
      "learning_rate": 9.285714285714288e-06,
      "loss": 0.0078,
      "loss_interactive_phase": 0.007767571427393705,
      "reward": 0.27351563694794767,
      "reward_original_clue_max": 0.25575890463160234,
      "reward_original_clue_mean": -6.369833735996033e-10,
      "reward_original_clue_min": -0.3290713434253468,
      "reward_original_clue_std": 0.22778135711245912,
      "reward_original_decision_max": 2.06625,
      "reward_original_decision_mean": 1.0940625000000002,
      "reward_original_decision_min": -0.1274999999999999,
      "reward_original_decision_std": 0.8632276294383053,
      "reward_original_overall_mean": 0.5470312496815084,
      "reward_original_overall_std": 1.1264277840598063,
      "reward_std": 0.8660217167160729,
      "step": 26
    },
    {
      "advantages_interactive_phase": -7.450580596923828e-09,
      "clip_ratio": 0.0031035091960802674,
      "clue_civilian_adjusted_reward_mean": 0.03245208673288981,
      "clue_civilian_advantage_adjustment": -0.1689368021559991,
      "clue_civilian_baseline": 0.1689368021559991,
      "clue_civilian_raw_reward_mean": 0.2013888888888889,
      "clue_civilian_votes_avg": 0.4583333333333333,
      "clue_invalid_votes": 0.125,
      "clue_na_votes": 0.0,
      "clue_spy_adjusted_reward": -0.09735625958223877,
      "clue_spy_advantage_adjustment": 0.5068104070844279,
      "clue_spy_baseline": -0.5068104070844279,
      "clue_spy_raw_reward": -0.6041666666666666,
      "clue_spy_votes_received": 6.5,
      "clue_suspicion_potential_psi": 6.041666666666667,
      "clue_total_valid_votes": 7.875,
      "completion_length": 306.40625,
      "epoch": 3.857142857142857,
      "grad_norm": 1.9208674430847168,
      "kl": 0.3026123046875,
      "learning_rate": 9.642857142857144e-06,
      "loss": 0.0125,
      "loss_interactive_phase": 0.01246593298856169,
      "reward": 0.46124999631323677,
      "reward_original_clue_max": 0.1545202636139022,
      "reward_original_clue_mean": 1.5410765849196983e-10,
      "reward_original_clue_min": -0.24859110312991783,
      "reward_original_clue_std": 0.16558966332718913,
      "reward_original_decision_max": 2.49,
      "reward_original_decision_mean": 1.845,
      "reward_original_decision_min": 0.7200000000000002,
      "reward_original_decision_std": 0.7004523304839636,
      "reward_original_overall_mean": 0.9225000000770541,
      "reward_original_overall_std": 1.2500439582236307,
      "reward_std": 0.8922832234684476,
      "step": 27
    },
    {
      "advantages_interactive_phase": 9.66247171163559e-09,
      "clip_ratio": 0.004918134000035934,
      "clue_civilian_adjusted_reward_mean": -0.03922444833702034,
      "clue_civilian_advantage_adjustment": -0.168391115003687,
      "clue_civilian_baseline": 0.168391115003687,
      "clue_civilian_raw_reward_mean": 0.12916666666666665,
      "clue_civilian_votes_avg": 1.0,
      "clue_invalid_votes": 0.125,
      "clue_na_votes": 0.0,
      "clue_spy_adjusted_reward": 0.11767334568750121,
      "clue_spy_advantage_adjustment": 0.5051733456875013,
      "clue_spy_baseline": -0.5051733456875013,
      "clue_spy_raw_reward": -0.3875000000000001,
      "clue_spy_votes_received": 4.875,
      "clue_suspicion_potential_psi": 3.8750000000000004,
      "clue_total_valid_votes": 7.875,
      "completion_length": 290.8359375,
      "epoch": 4.0,
      "grad_norm": 0.778830885887146,
      "kl": 0.2080078125,
      "learning_rate": 1e-05,
      "loss": 0.0085,
      "loss_interactive_phase": 0.008535244385711849,
      "reward": 0.29144531737351337,
      "reward_original_clue_max": 0.2624036269472061,
      "reward_original_clue_mean": 1.691100571209192e-10,
      "reward_original_clue_min": -0.2589547295967252,
      "reward_original_clue_std": 0.20231139453953567,
      "reward_original_decision_max": 2.49,
      "reward_original_decision_mean": 1.16578125,
      "reward_original_decision_min": -0.05249999999999988,
      "reward_original_decision_std": 0.9492047780780623,
      "reward_original_overall_mean": 0.582890625084555,
      "reward_original_overall_std": 1.1522968480359421,
      "reward_std": 0.8734021281821482,
      "step": 28
    },
    {
      "advantages_interactive_phase": 2.60770320892334e-08,
      "clip_ratio": 0.004597238206770271,
      "clue_civilian_adjusted_reward_mean": -0.017886474267241367,
      "clue_civilian_advantage_adjustment": -0.13455314093390802,
      "clue_civilian_baseline": 0.13455314093390805,
      "clue_civilian_raw_reward_mean": 0.11666666666666667,
      "clue_civilian_votes_avg": 1.125,
      "clue_invalid_votes": 0.0,
      "clue_na_votes": 0.0,
      "clue_spy_adjusted_reward": 0.05365942077661086,
      "clue_spy_advantage_adjustment": 0.4036594207766109,
      "clue_spy_baseline": -0.4036594207766109,
      "clue_spy_raw_reward": -0.35,
      "clue_spy_votes_received": 4.625,
      "clue_suspicion_potential_psi": 3.5,
      "clue_total_valid_votes": 8.0,
      "completion_length": 304.515625,
      "epoch": 4.142857142857143,
      "grad_norm": 1.3750234842300415,
      "kl": 0.3173828125,
      "learning_rate": 9.999611462404874e-06,
      "loss": 0.0125,
      "loss_interactive_phase": 0.012478827498853207,
      "reward": 0.26496095041194656,
      "reward_original_clue_max": 0.26774659185022603,
      "reward_original_clue_mean": -5.062783100737156e-10,
      "reward_original_clue_min": -0.31947364534085654,
      "reward_original_clue_std": 0.23106213560541738,
      "reward_original_decision_max": 2.06625,
      "reward_original_decision_mean": 1.05984375,
      "reward_original_decision_min": -0.05249999999999988,
      "reward_original_decision_std": 0.8307205925127058,
      "reward_original_overall_mean": 0.529921874746861,
      "reward_original_overall_std": 1.1095292288061827,
      "reward_std": 0.8888411754368172,
      "step": 29
    },
    {
      "advantages_interactive_phase": 3.259629011154175e-09,
      "clip_ratio": 0.004411640649777837,
      "clue_civilian_adjusted_reward_mean": -0.019376439787606455,
      "clue_civilian_advantage_adjustment": -0.10826532867649533,
      "clue_civilian_baseline": 0.10826532867649533,
      "clue_civilian_raw_reward_mean": 0.08888888888888888,
      "clue_civilian_votes_avg": 1.3333333333333333,
      "clue_invalid_votes": 0.0,
      "clue_na_votes": 0.0,
      "clue_spy_adjusted_reward": 0.058129320628341516,
      "clue_spy_advantage_adjustment": 0.3247959872950082,
      "clue_spy_baseline": -0.3247959872950082,
      "clue_spy_raw_reward": -0.2666666666666667,
      "clue_spy_votes_received": 4.0,
      "clue_suspicion_potential_psi": 2.6666666666666665,
      "clue_total_valid_votes": 8.0,
      "completion_length": 261.4453125,
      "epoch": 4.285714285714286,
      "grad_norm": 0.8253775238990784,
      "kl": 0.2550048828125,
      "learning_rate": 9.998445910004082e-06,
      "loss": 0.0103,
      "loss_interactive_phase": 0.010343844187445939,
      "reward": 0.19875000170890966,
      "reward_original_clue_max": 0.2470865782057527,
      "reward_original_clue_mean": 3.1638053678631484e-10,
      "reward_original_clue_min": -0.258379067343329,
      "reward_original_clue_std": 0.19545089761013185,
      "reward_original_decision_max": 2.06625,
      "reward_original_decision_mean": 0.7949999999999999,
      "reward_original_decision_min": -0.47625000000000006,
      "reward_original_decision_std": 1.1225616644698808,
      "reward_original_overall_mean": 0.3975000001581903,
      "reward_original_overall_std": 1.1179099397348429,
      "reward_std": 0.8648673956089749,
      "step": 30
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 280,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 40,
  "save_steps": 5,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}