broadfield-dev commited on
Commit
d793c11
·
verified ·
1 Parent(s): 315ff10

Checkpoint for SFT at step 150

Browse files
Files changed (4) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. scheduler.pt +1 -1
  4. training_state.json +251 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6f3f829ccadbb2f93e07f1978674e13b21252372b106df92a46105f5f49d74c
3
  size 22080496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0290d1dc2193a03b39037ef2a3eb7b6afabee7bd240a3581decb36e85260d239
3
  size 22080496
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e462ea37e7728bdf1a1f2654d61e26d70bc67039dcbf555f35fc710a214316db
3
  size 44193018
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4e2d6fe8d19f03b32584724d889d3ce5aeb6e1d9d3509a6586092621adf2a45
3
  size 44193018
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93e83a7ddf03610f6f8d003650d6b2e8d89dcab55806551915ca784fea2779fe
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea7fd64a788bd82addddc94b5c58f0cc158d2b4edf51ac5c62f232f520c28b37
3
  size 1000
training_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "phase": "SFT",
3
  "epoch": 0,
4
- "total_sft_steps": 100,
5
  "metrics": [
6
  {
7
  "Step": 0,
@@ -507,6 +507,256 @@
507
  "Step": 100,
508
  "Metric": "SFT Loss",
509
  "Value": 8.235538482666016
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
510
  }
511
  ],
512
  "sft_complete": false
 
1
  {
2
  "phase": "SFT",
3
  "epoch": 0,
4
+ "total_sft_steps": 150,
5
  "metrics": [
6
  {
7
  "Step": 0,
 
507
  "Step": 100,
508
  "Metric": "SFT Loss",
509
  "Value": 8.235538482666016
510
+ },
511
+ {
512
+ "Step": 101,
513
+ "Metric": "SFT Loss",
514
+ "Value": 8.270655632019043
515
+ },
516
+ {
517
+ "Step": 102,
518
+ "Metric": "SFT Loss",
519
+ "Value": 8.269576072692871
520
+ },
521
+ {
522
+ "Step": 103,
523
+ "Metric": "SFT Loss",
524
+ "Value": 8.274468421936035
525
+ },
526
+ {
527
+ "Step": 104,
528
+ "Metric": "SFT Loss",
529
+ "Value": 8.309279441833496
530
+ },
531
+ {
532
+ "Step": 105,
533
+ "Metric": "SFT Loss",
534
+ "Value": 8.239542961120605
535
+ },
536
+ {
537
+ "Step": 106,
538
+ "Metric": "SFT Loss",
539
+ "Value": 8.188124656677246
540
+ },
541
+ {
542
+ "Step": 107,
543
+ "Metric": "SFT Loss",
544
+ "Value": 8.163637161254883
545
+ },
546
+ {
547
+ "Step": 108,
548
+ "Metric": "SFT Loss",
549
+ "Value": 8.162602424621582
550
+ },
551
+ {
552
+ "Step": 109,
553
+ "Metric": "SFT Loss",
554
+ "Value": 8.176586151123047
555
+ },
556
+ {
557
+ "Step": 110,
558
+ "Metric": "SFT Loss",
559
+ "Value": 8.174212455749512
560
+ },
561
+ {
562
+ "Step": 111,
563
+ "Metric": "SFT Loss",
564
+ "Value": 8.19734001159668
565
+ },
566
+ {
567
+ "Step": 112,
568
+ "Metric": "SFT Loss",
569
+ "Value": 8.076298713684082
570
+ },
571
+ {
572
+ "Step": 113,
573
+ "Metric": "SFT Loss",
574
+ "Value": 8.18896770477295
575
+ },
576
+ {
577
+ "Step": 114,
578
+ "Metric": "SFT Loss",
579
+ "Value": 8.258679389953613
580
+ },
581
+ {
582
+ "Step": 115,
583
+ "Metric": "SFT Loss",
584
+ "Value": 8.18835735321045
585
+ },
586
+ {
587
+ "Step": 116,
588
+ "Metric": "SFT Loss",
589
+ "Value": 8.127300262451172
590
+ },
591
+ {
592
+ "Step": 117,
593
+ "Metric": "SFT Loss",
594
+ "Value": 8.145550727844238
595
+ },
596
+ {
597
+ "Step": 118,
598
+ "Metric": "SFT Loss",
599
+ "Value": 8.09005069732666
600
+ },
601
+ {
602
+ "Step": 119,
603
+ "Metric": "SFT Loss",
604
+ "Value": 7.936030387878418
605
+ },
606
+ {
607
+ "Step": 120,
608
+ "Metric": "SFT Loss",
609
+ "Value": 8.104690551757812
610
+ },
611
+ {
612
+ "Step": 121,
613
+ "Metric": "SFT Loss",
614
+ "Value": 8.058869361877441
615
+ },
616
+ {
617
+ "Step": 122,
618
+ "Metric": "SFT Loss",
619
+ "Value": 8.058721542358398
620
+ },
621
+ {
622
+ "Step": 123,
623
+ "Metric": "SFT Loss",
624
+ "Value": 7.885481357574463
625
+ },
626
+ {
627
+ "Step": 124,
628
+ "Metric": "SFT Loss",
629
+ "Value": 8.024746894836426
630
+ },
631
+ {
632
+ "Step": 125,
633
+ "Metric": "SFT Loss",
634
+ "Value": 8.017570495605469
635
+ },
636
+ {
637
+ "Step": 126,
638
+ "Metric": "SFT Loss",
639
+ "Value": 8.01369571685791
640
+ },
641
+ {
642
+ "Step": 127,
643
+ "Metric": "SFT Loss",
644
+ "Value": 7.956777572631836
645
+ },
646
+ {
647
+ "Step": 128,
648
+ "Metric": "SFT Loss",
649
+ "Value": 8.059258460998535
650
+ },
651
+ {
652
+ "Step": 129,
653
+ "Metric": "SFT Loss",
654
+ "Value": 8.099721908569336
655
+ },
656
+ {
657
+ "Step": 130,
658
+ "Metric": "SFT Loss",
659
+ "Value": 7.995508193969727
660
+ },
661
+ {
662
+ "Step": 131,
663
+ "Metric": "SFT Loss",
664
+ "Value": 7.869641304016113
665
+ },
666
+ {
667
+ "Step": 132,
668
+ "Metric": "SFT Loss",
669
+ "Value": 7.967761993408203
670
+ },
671
+ {
672
+ "Step": 133,
673
+ "Metric": "SFT Loss",
674
+ "Value": 8.04328441619873
675
+ },
676
+ {
677
+ "Step": 134,
678
+ "Metric": "SFT Loss",
679
+ "Value": 7.801205158233643
680
+ },
681
+ {
682
+ "Step": 135,
683
+ "Metric": "SFT Loss",
684
+ "Value": 7.971508979797363
685
+ },
686
+ {
687
+ "Step": 136,
688
+ "Metric": "SFT Loss",
689
+ "Value": 7.872110843658447
690
+ },
691
+ {
692
+ "Step": 137,
693
+ "Metric": "SFT Loss",
694
+ "Value": 8.073846817016602
695
+ },
696
+ {
697
+ "Step": 138,
698
+ "Metric": "SFT Loss",
699
+ "Value": 7.676604270935059
700
+ },
701
+ {
702
+ "Step": 139,
703
+ "Metric": "SFT Loss",
704
+ "Value": 7.913880825042725
705
+ },
706
+ {
707
+ "Step": 140,
708
+ "Metric": "SFT Loss",
709
+ "Value": 8.005086898803711
710
+ },
711
+ {
712
+ "Step": 141,
713
+ "Metric": "SFT Loss",
714
+ "Value": 7.983025074005127
715
+ },
716
+ {
717
+ "Step": 142,
718
+ "Metric": "SFT Loss",
719
+ "Value": 7.844381809234619
720
+ },
721
+ {
722
+ "Step": 143,
723
+ "Metric": "SFT Loss",
724
+ "Value": 7.966534614562988
725
+ },
726
+ {
727
+ "Step": 144,
728
+ "Metric": "SFT Loss",
729
+ "Value": 7.799246311187744
730
+ },
731
+ {
732
+ "Step": 145,
733
+ "Metric": "SFT Loss",
734
+ "Value": 7.769224166870117
735
+ },
736
+ {
737
+ "Step": 146,
738
+ "Metric": "SFT Loss",
739
+ "Value": 7.8659234046936035
740
+ },
741
+ {
742
+ "Step": 147,
743
+ "Metric": "SFT Loss",
744
+ "Value": 7.893615245819092
745
+ },
746
+ {
747
+ "Step": 148,
748
+ "Metric": "SFT Loss",
749
+ "Value": 7.57839822769165
750
+ },
751
+ {
752
+ "Step": 149,
753
+ "Metric": "SFT Loss",
754
+ "Value": 7.796144485473633
755
+ },
756
+ {
757
+ "Step": 150,
758
+ "Metric": "SFT Loss",
759
+ "Value": 7.760519504547119
760
  }
761
  ],
762
  "sft_complete": false