File size: 3,491 Bytes
800b2b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c897f3
f7a50a0
2c897f3
800b2b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c897f3
f7a50a0
2c897f3
f7a50a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800b2b6
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
{
    "average_CPS": 37.45460366666667,
    "config": {
        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
        "num_fewshot": "0",
        "batch_size": 1,
        "LANG": "SL",
        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
        "base_model": "Qwen2ForCausalLM",
        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
        "submitted_time": "2025-01-23 13:23:24+00:00",
        "num_params_billion": 14.770033664,
        "language": "en"
    },
    "tasks": {
        "NER": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 39.1,
                    "stderr": 0.0
                },
                {
                    "prompt": "p2",
                    "metric": "f1",
                    "value": 23.75,
                    "stderr": 0.0
                },
                {
                    "prompt": "p3",
                    "metric": "f1",
                    "value": 39.1,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": 33.983333333333334,
            "best_prompt": 39.1,
            "prompt_id": "p1",
            "CPS": 37.099383333333336,
            "is_dummy": false,
            "std_accuracy": 8.862326632060757
        },
        "RE": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 37.75,
                    "stderr": 0.0
                },
                {
                    "prompt": "p2",
                    "metric": "f1",
                    "value": 37.830000000000005,
                    "stderr": 0.0
                },
                {
                    "prompt": "p3",
                    "metric": "f1",
                    "value": 37.75,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": 37.77666666666667,
            "best_prompt": 37.830000000000005,
            "prompt_id": "p2",
            "CPS": 37.809824000000006,
            "is_dummy": false,
            "std_accuracy": 0.04618802153517318
        },
        "RML": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 0.0,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": null,
            "std_accuracy": null,
            "best_prompt": null,
            "prompt_id": null,
            "CPS": null,
            "is_dummy": true
        },
        "HIS": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 0.0,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": null,
            "std_accuracy": null,
            "best_prompt": null,
            "prompt_id": null,
            "CPS": null,
            "is_dummy": true
        },
        "DIA": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 0.0,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": null,
            "std_accuracy": null,
            "best_prompt": null,
            "prompt_id": null,
            "CPS": null,
            "is_dummy": true
        }
    }
}