File size: 3,479 Bytes
800b2b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c897f3
f7a50a0
2c897f3
800b2b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c897f3
f7a50a0
2c897f3
f7a50a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800b2b6
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
{
    "average_CPS": 59.57754466666667,
    "config": {
        "model_name": "Qwen/Qwen2.5-32B-Instruct",
        "num_fewshot": "10",
        "batch_size": 1,
        "LANG": "PL",
        "model": "Qwen/Qwen2.5-32B-Instruct",
        "base_model": "Qwen2ForCausalLM",
        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
        "submitted_time": "2024-09-17 04:17:55+00:00",
        "num_params_billion": 32.763876352,
        "language": "en"
    },
    "tasks": {
        "NER": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 60.08,
                    "stderr": 0.0
                },
                {
                    "prompt": "p2",
                    "metric": "f1",
                    "value": 60.040000000000006,
                    "stderr": 0.0
                },
                {
                    "prompt": "p3",
                    "metric": "f1",
                    "value": 60.08,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": 60.06666666666666,
            "best_prompt": 60.08,
            "prompt_id": "p1",
            "CPS": 60.071989333333335,
            "is_dummy": false,
            "std_accuracy": 0.023094010767580435
        },
        "RE": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 58.58,
                    "stderr": 0.0
                },
                {
                    "prompt": "p2",
                    "metric": "f1",
                    "value": 58.68,
                    "stderr": 0.0
                },
                {
                    "prompt": "p3",
                    "metric": "f1",
                    "value": 59.38,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": 58.879999999999995,
            "best_prompt": 59.38,
            "prompt_id": "p3",
            "CPS": 59.083099999999995,
            "is_dummy": false,
            "std_accuracy": 0.43588989435406944
        },
        "RML": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 0.0,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": null,
            "std_accuracy": null,
            "best_prompt": null,
            "prompt_id": null,
            "CPS": null,
            "is_dummy": true
        },
        "HIS": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 0.0,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": null,
            "std_accuracy": null,
            "best_prompt": null,
            "prompt_id": null,
            "CPS": null,
            "is_dummy": true
        },
        "DIA": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 0.0,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": null,
            "std_accuracy": null,
            "best_prompt": null,
            "prompt_id": null,
            "CPS": null,
            "is_dummy": true
        }
    }
}