whitphx HF Staff commited on
Commit
955b6a7
·
1 Parent(s): 953bdc5

exclude mock models

Browse files
Files changed (1) hide show
  1. data_loader.py +28 -0
data_loader.py CHANGED
@@ -304,6 +304,28 @@ def add_first_timer_score(df: pd.DataFrame) -> pd.DataFrame:
304
  return df
305
 
306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  def get_first_timer_friendly_models(df: pd.DataFrame, limit_per_task: int = 3) -> pd.DataFrame:
308
  """Identify first-timer-friendly models based on popularity and performance, grouped by task.
309
 
@@ -326,6 +348,9 @@ def get_first_timer_friendly_models(df: pd.DataFrame, limit_per_task: int = 3) -
326
  # Filter only successful benchmarks
327
  filtered = df[df["status"] == "completed"].copy() if "status" in df.columns else df.copy()
328
 
 
 
 
329
  if filtered.empty:
330
  return pd.DataFrame()
331
 
@@ -446,6 +471,9 @@ def get_webgpu_beginner_friendly_models(
446
 
447
  filtered = df[webgpu_filter].copy()
448
 
 
 
 
449
  if filtered.empty:
450
  logger.warning("No successful WebGPU benchmarks found")
451
  return pd.DataFrame()
 
304
  return df
305
 
306
 
307
+ def filter_excluded_models(df: pd.DataFrame) -> pd.DataFrame:
308
+ """Filter out models that should be excluded from recommendations.
309
+
310
+ This function removes test models and other non-production models that
311
+ should not be recommended to users.
312
+
313
+ Args:
314
+ df: DataFrame containing model data with a 'modelId' column
315
+
316
+ Returns:
317
+ DataFrame with excluded models removed
318
+ """
319
+ if df.empty or "modelId" not in df.columns:
320
+ return df
321
+
322
+ # Exclude tiny-random test models (e.g., Xenova/tiny-random-RoFormerForMaskedLM)
323
+ # These are small test models not meant for production use
324
+ filtered = df[~df["modelId"].str.contains("tiny-random", case=False, na=False)]
325
+
326
+ return filtered
327
+
328
+
329
  def get_first_timer_friendly_models(df: pd.DataFrame, limit_per_task: int = 3) -> pd.DataFrame:
330
  """Identify first-timer-friendly models based on popularity and performance, grouped by task.
331
 
 
348
  # Filter only successful benchmarks
349
  filtered = df[df["status"] == "completed"].copy() if "status" in df.columns else df.copy()
350
 
351
+ # Exclude test models and other non-production models
352
+ filtered = filter_excluded_models(filtered)
353
+
354
  if filtered.empty:
355
  return pd.DataFrame()
356
 
 
471
 
472
  filtered = df[webgpu_filter].copy()
473
 
474
+ # Exclude test models and other non-production models
475
+ filtered = filter_excluded_models(filtered)
476
+
477
  if filtered.empty:
478
  logger.warning("No successful WebGPU benchmarks found")
479
  return pd.DataFrame()