File size: 3,510 Bytes
800b2b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c897f3
f7a50a0
2c897f3
800b2b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c897f3
f7a50a0
2c897f3
f7a50a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800b2b6
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
{
    "average_CPS": 61.436353666666676,
    "config": {
        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
        "num_fewshot": "10",
        "batch_size": 1,
        "LANG": "SL",
        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
        "base_model": "Qwen2ForCausalLM",
        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
        "submitted_time": "2025-01-23 13:23:24+00:00",
        "num_params_billion": 14.770033664,
        "language": "en"
    },
    "tasks": {
        "NER": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 64.67,
                    "stderr": 0.0
                },
                {
                    "prompt": "p2",
                    "metric": "f1",
                    "value": 61.78,
                    "stderr": 0.0
                },
                {
                    "prompt": "p3",
                    "metric": "f1",
                    "value": 64.67,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": 63.70666666666667,
            "best_prompt": 64.67,
            "prompt_id": "p1",
            "CPS": 64.04701233333334,
            "is_dummy": false,
            "std_accuracy": 1.6685422779580188
        },
        "RE": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 59.489999999999995,
                    "stderr": 0.0
                },
                {
                    "prompt": "p2",
                    "metric": "f1",
                    "value": 57.82000000000001,
                    "stderr": 0.0
                },
                {
                    "prompt": "p3",
                    "metric": "f1",
                    "value": 57.809999999999995,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": 58.373333333333335,
            "best_prompt": 59.489999999999995,
            "prompt_id": "p1",
            "CPS": 58.825695,
            "is_dummy": false,
            "std_accuracy": 0.967074626558533
        },
        "RML": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 0.0,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": null,
            "std_accuracy": null,
            "best_prompt": null,
            "prompt_id": null,
            "CPS": null,
            "is_dummy": true
        },
        "HIS": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 0.0,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": null,
            "std_accuracy": null,
            "best_prompt": null,
            "prompt_id": null,
            "CPS": null,
            "is_dummy": true
        },
        "DIA": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 0.0,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": null,
            "std_accuracy": null,
            "best_prompt": null,
            "prompt_id": null,
            "CPS": null,
            "is_dummy": true
        }
    }
}