Spaces:
Runtime error
Runtime error
Andrea Seveso
commited on
Commit
·
9df8442
1
Parent(s):
6aa8d26
Remove columns from eval
Browse files- .gitignore +1 -0
- src/display/utils.py +36 -36
.gitignore
CHANGED
|
@@ -11,3 +11,4 @@ eval-results/
|
|
| 11 |
eval-queue-bk/
|
| 12 |
eval-results-bk/
|
| 13 |
logs/
|
|
|
|
|
|
| 11 |
eval-queue-bk/
|
| 12 |
eval-results-bk/
|
| 13 |
logs/
|
| 14 |
+
results/*
|
src/display/utils.py
CHANGED
|
@@ -5,6 +5,7 @@ import pandas as pd
|
|
| 5 |
|
| 6 |
from src.about import Tasks
|
| 7 |
|
|
|
|
| 8 |
def fields(raw_class):
|
| 9 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
| 10 |
|
|
@@ -20,52 +21,59 @@ class ColumnContent:
|
|
| 20 |
hidden: bool = False
|
| 21 |
never_hidden: bool = False
|
| 22 |
|
| 23 |
-
|
|
|
|
| 24 |
auto_eval_column_dict = []
|
| 25 |
# Init
|
| 26 |
-
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent(
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
# auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
| 30 |
for task in Tasks:
|
| 31 |
-
auto_eval_column_dict.append(
|
|
|
|
| 32 |
# Model information
|
| 33 |
-
auto_eval_column_dict.append(
|
| 34 |
-
|
| 35 |
-
auto_eval_column_dict.append(
|
| 36 |
-
|
| 37 |
-
auto_eval_column_dict.append(
|
| 38 |
-
|
| 39 |
-
auto_eval_column_dict.append(
|
| 40 |
-
|
| 41 |
-
auto_eval_column_dict.append(
|
|
|
|
| 42 |
|
| 43 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 44 |
-
AutoEvalColumn = make_dataclass(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
## For the queue columns in the submission tab
|
| 47 |
@dataclass(frozen=True)
|
| 48 |
class EvalQueueColumn: # Queue column
|
| 49 |
model = ColumnContent("model", "markdown", True)
|
| 50 |
revision = ColumnContent("revision", "str", True)
|
| 51 |
private = ColumnContent("private", "bool", True)
|
| 52 |
precision = ColumnContent("precision", "str", True)
|
| 53 |
-
weight_type = ColumnContent("weight_type", "str", "Original")
|
| 54 |
status = ColumnContent("status", "str", True)
|
| 55 |
|
| 56 |
-
|
|
|
|
|
|
|
| 57 |
@dataclass
|
| 58 |
class ModelDetails:
|
| 59 |
name: str
|
| 60 |
display_name: str = ""
|
| 61 |
-
symbol: str = ""
|
| 62 |
|
| 63 |
|
| 64 |
class ModelType(Enum):
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
|
| 68 |
-
RL = ModelDetails(name="RL-tuned", symbol="🟦")
|
| 69 |
Unknown = ModelDetails(name="", symbol="?")
|
| 70 |
|
| 71 |
def to_str(self, separator=" "):
|
|
@@ -73,20 +81,12 @@ class ModelType(Enum):
|
|
| 73 |
|
| 74 |
@staticmethod
|
| 75 |
def from_str(type):
|
| 76 |
-
if "
|
| 77 |
-
return ModelType.
|
| 78 |
-
if "
|
| 79 |
-
return ModelType.
|
| 80 |
-
if "RL-tuned" in type or "🟦" in type:
|
| 81 |
-
return ModelType.RL
|
| 82 |
-
if "instruction-tuned" in type or "⭕" in type:
|
| 83 |
-
return ModelType.IFT
|
| 84 |
return ModelType.Unknown
|
| 85 |
|
| 86 |
-
class WeightType(Enum):
|
| 87 |
-
Adapter = ModelDetails("Adapter")
|
| 88 |
-
Original = ModelDetails("Original")
|
| 89 |
-
Delta = ModelDetails("Delta")
|
| 90 |
|
| 91 |
class Precision(Enum):
|
| 92 |
float16 = ModelDetails("float16")
|
|
@@ -100,6 +100,7 @@ class Precision(Enum):
|
|
| 100 |
return Precision.bfloat16
|
| 101 |
return Precision.Unknown
|
| 102 |
|
|
|
|
| 103 |
# Column selection
|
| 104 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
| 105 |
|
|
@@ -107,4 +108,3 @@ EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
|
| 107 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
| 108 |
|
| 109 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
| 110 |
-
|
|
|
|
| 5 |
|
| 6 |
from src.about import Tasks
|
| 7 |
|
| 8 |
+
|
| 9 |
def fields(raw_class):
|
| 10 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
| 11 |
|
|
|
|
| 21 |
hidden: bool = False
|
| 22 |
never_hidden: bool = False
|
| 23 |
|
| 24 |
+
|
| 25 |
+
# Leaderboard columns
|
| 26 |
auto_eval_column_dict = []
|
| 27 |
# Init
|
| 28 |
+
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent(
|
| 29 |
+
"T", "str", True, never_hidden=True)])
|
| 30 |
+
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent(
|
| 31 |
+
"Model", "markdown", True, never_hidden=True)])
|
| 32 |
+
# Scores
|
| 33 |
# auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
| 34 |
for task in Tasks:
|
| 35 |
+
auto_eval_column_dict.append(
|
| 36 |
+
[task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 37 |
# Model information
|
| 38 |
+
auto_eval_column_dict.append(
|
| 39 |
+
["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 40 |
+
auto_eval_column_dict.append(
|
| 41 |
+
["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
| 42 |
+
auto_eval_column_dict.append(
|
| 43 |
+
["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
| 44 |
+
auto_eval_column_dict.append(
|
| 45 |
+
["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
| 46 |
+
auto_eval_column_dict.append(
|
| 47 |
+
["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 48 |
|
| 49 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 50 |
+
AutoEvalColumn = make_dataclass(
|
| 51 |
+
"AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
| 52 |
+
|
| 53 |
+
# For the queue columns in the submission tab
|
| 54 |
+
|
| 55 |
|
|
|
|
| 56 |
@dataclass(frozen=True)
|
| 57 |
class EvalQueueColumn: # Queue column
|
| 58 |
model = ColumnContent("model", "markdown", True)
|
| 59 |
revision = ColumnContent("revision", "str", True)
|
| 60 |
private = ColumnContent("private", "bool", True)
|
| 61 |
precision = ColumnContent("precision", "str", True)
|
|
|
|
| 62 |
status = ColumnContent("status", "str", True)
|
| 63 |
|
| 64 |
+
# All the model information that we might need
|
| 65 |
+
|
| 66 |
+
|
| 67 |
@dataclass
|
| 68 |
class ModelDetails:
|
| 69 |
name: str
|
| 70 |
display_name: str = ""
|
| 71 |
+
symbol: str = "" # emoji
|
| 72 |
|
| 73 |
|
| 74 |
class ModelType(Enum):
|
| 75 |
+
OP = ModelDetails(name="pretrained", symbol="🟢")
|
| 76 |
+
CL = ModelDetails(name="instruction-tuned", symbol="⭕")
|
|
|
|
|
|
|
| 77 |
Unknown = ModelDetails(name="", symbol="?")
|
| 78 |
|
| 79 |
def to_str(self, separator=" "):
|
|
|
|
| 81 |
|
| 82 |
@staticmethod
|
| 83 |
def from_str(type):
|
| 84 |
+
if "open" in type or "🟢" in type:
|
| 85 |
+
return ModelType.OP
|
| 86 |
+
if "closed" in type or "⭕" in type:
|
| 87 |
+
return ModelType.CL
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
return ModelType.Unknown
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
class Precision(Enum):
|
| 92 |
float16 = ModelDetails("float16")
|
|
|
|
| 100 |
return Precision.bfloat16
|
| 101 |
return Precision.Unknown
|
| 102 |
|
| 103 |
+
|
| 104 |
# Column selection
|
| 105 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
| 106 |
|
|
|
|
| 108 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
| 109 |
|
| 110 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
|
|