exclude mock models
Browse files- data_loader.py +28 -0
data_loader.py
CHANGED
|
@@ -304,6 +304,28 @@ def add_first_timer_score(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 304 |
return df
|
| 305 |
|
| 306 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
def get_first_timer_friendly_models(df: pd.DataFrame, limit_per_task: int = 3) -> pd.DataFrame:
|
| 308 |
"""Identify first-timer-friendly models based on popularity and performance, grouped by task.
|
| 309 |
|
|
@@ -326,6 +348,9 @@ def get_first_timer_friendly_models(df: pd.DataFrame, limit_per_task: int = 3) -
|
|
| 326 |
# Filter only successful benchmarks
|
| 327 |
filtered = df[df["status"] == "completed"].copy() if "status" in df.columns else df.copy()
|
| 328 |
|
|
|
|
|
|
|
|
|
|
| 329 |
if filtered.empty:
|
| 330 |
return pd.DataFrame()
|
| 331 |
|
|
@@ -446,6 +471,9 @@ def get_webgpu_beginner_friendly_models(
|
|
| 446 |
|
| 447 |
filtered = df[webgpu_filter].copy()
|
| 448 |
|
|
|
|
|
|
|
|
|
|
| 449 |
if filtered.empty:
|
| 450 |
logger.warning("No successful WebGPU benchmarks found")
|
| 451 |
return pd.DataFrame()
|
|
|
|
| 304 |
return df
|
| 305 |
|
| 306 |
|
| 307 |
+
def filter_excluded_models(df: pd.DataFrame) -> pd.DataFrame:
|
| 308 |
+
"""Filter out models that should be excluded from recommendations.
|
| 309 |
+
|
| 310 |
+
This function removes test models and other non-production models that
|
| 311 |
+
should not be recommended to users.
|
| 312 |
+
|
| 313 |
+
Args:
|
| 314 |
+
df: DataFrame containing model data with a 'modelId' column
|
| 315 |
+
|
| 316 |
+
Returns:
|
| 317 |
+
DataFrame with excluded models removed
|
| 318 |
+
"""
|
| 319 |
+
if df.empty or "modelId" not in df.columns:
|
| 320 |
+
return df
|
| 321 |
+
|
| 322 |
+
# Exclude tiny-random test models (e.g., Xenova/tiny-random-RoFormerForMaskedLM)
|
| 323 |
+
# These are small test models not meant for production use
|
| 324 |
+
filtered = df[~df["modelId"].str.contains("tiny-random", case=False, na=False)]
|
| 325 |
+
|
| 326 |
+
return filtered
|
| 327 |
+
|
| 328 |
+
|
| 329 |
def get_first_timer_friendly_models(df: pd.DataFrame, limit_per_task: int = 3) -> pd.DataFrame:
|
| 330 |
"""Identify first-timer-friendly models based on popularity and performance, grouped by task.
|
| 331 |
|
|
|
|
| 348 |
# Filter only successful benchmarks
|
| 349 |
filtered = df[df["status"] == "completed"].copy() if "status" in df.columns else df.copy()
|
| 350 |
|
| 351 |
+
# Exclude test models and other non-production models
|
| 352 |
+
filtered = filter_excluded_models(filtered)
|
| 353 |
+
|
| 354 |
if filtered.empty:
|
| 355 |
return pd.DataFrame()
|
| 356 |
|
|
|
|
| 471 |
|
| 472 |
filtered = df[webgpu_filter].copy()
|
| 473 |
|
| 474 |
+
# Exclude test models and other non-production models
|
| 475 |
+
filtered = filter_excluded_models(filtered)
|
| 476 |
+
|
| 477 |
if filtered.empty:
|
| 478 |
logger.warning("No successful WebGPU benchmarks found")
|
| 479 |
return pd.DataFrame()
|