File size: 3,493 Bytes
800b2b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c897f3
f7a50a0
2c897f3
800b2b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c897f3
f7a50a0
2c897f3
f7a50a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800b2b6
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
{
    "average_CPS": 62.042391166666675,
    "config": {
        "model_name": "Qwen/Qwen2.5-32B-Instruct",
        "num_fewshot": "10",
        "batch_size": 1,
        "LANG": "SK",
        "model": "Qwen/Qwen2.5-32B-Instruct",
        "base_model": "Qwen2ForCausalLM",
        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
        "submitted_time": "2024-09-17 04:17:55+00:00",
        "num_params_billion": 32.763876352,
        "language": "en"
    },
    "tasks": {
        "NER": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 67.43,
                    "stderr": 0.0
                },
                {
                    "prompt": "p2",
                    "metric": "f1",
                    "value": 66.73,
                    "stderr": 0.0
                },
                {
                    "prompt": "p3",
                    "metric": "f1",
                    "value": 67.43,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": 67.19666666666667,
            "best_prompt": 67.43,
            "prompt_id": "p1",
            "CPS": 67.27266333333334,
            "is_dummy": false,
            "std_accuracy": 0.40414518843273967
        },
        "RE": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 57.330000000000005,
                    "stderr": 0.0
                },
                {
                    "prompt": "p2",
                    "metric": "f1",
                    "value": 55.86,
                    "stderr": 0.0
                },
                {
                    "prompt": "p3",
                    "metric": "f1",
                    "value": 56.089999999999996,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": 56.42666666666667,
            "best_prompt": 57.330000000000005,
            "prompt_id": "p1",
            "CPS": 56.812119,
            "is_dummy": false,
            "std_accuracy": 0.7907169742286678
        },
        "RML": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 0.0,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": null,
            "std_accuracy": null,
            "best_prompt": null,
            "prompt_id": null,
            "CPS": null,
            "is_dummy": true
        },
        "HIS": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 0.0,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": null,
            "std_accuracy": null,
            "best_prompt": null,
            "prompt_id": null,
            "CPS": null,
            "is_dummy": true
        },
        "DIA": {
            "prompts": [
                {
                    "prompt": "p1",
                    "metric": "f1",
                    "value": 0.0,
                    "stderr": 0.0
                }
            ],
            "average_accuracy": null,
            "std_accuracy": null,
            "best_prompt": null,
            "prompt_id": null,
            "CPS": null,
            "is_dummy": true
        }
    }
}