Spaces:

whitphx
/

transformersjs-bench-leaderboard

Sleeping

App Files Files Community

whitphx HF Staff commited on 8 days ago

Commit

955b6a7

1 Parent(s): 953bdc5

exclude mock models

Browse files

Files changed (1) hide show

data_loader.py +28 -0

data_loader.py CHANGED Viewed

@@ -304,6 +304,28 @@ def add_first_timer_score(df: pd.DataFrame) -> pd.DataFrame:
     return df
 def get_first_timer_friendly_models(df: pd.DataFrame, limit_per_task: int = 3) -> pd.DataFrame:
     """Identify first-timer-friendly models based on popularity and performance, grouped by task.
@@ -326,6 +348,9 @@ def get_first_timer_friendly_models(df: pd.DataFrame, limit_per_task: int = 3) -
     # Filter only successful benchmarks
     filtered = df[df["status"] == "completed"].copy() if "status" in df.columns else df.copy()
     if filtered.empty:
         return pd.DataFrame()
@@ -446,6 +471,9 @@ def get_webgpu_beginner_friendly_models(
     filtered = df[webgpu_filter].copy()
     if filtered.empty:
         logger.warning("No successful WebGPU benchmarks found")
         return pd.DataFrame()

     return df
+def filter_excluded_models(df: pd.DataFrame) -> pd.DataFrame:
+    """Filter out models that should be excluded from recommendations.
+    This function removes test models and other non-production models that
+    should not be recommended to users.
+    Args:
+        df: DataFrame containing model data with a 'modelId' column
+    Returns:
+        DataFrame with excluded models removed
+    """
+    if df.empty or "modelId" not in df.columns:
+        return df
+    # Exclude tiny-random test models (e.g., Xenova/tiny-random-RoFormerForMaskedLM)
+    # These are small test models not meant for production use
+    filtered = df[~df["modelId"].str.contains("tiny-random", case=False, na=False)]
+    return filtered
 def get_first_timer_friendly_models(df: pd.DataFrame, limit_per_task: int = 3) -> pd.DataFrame:
     """Identify first-timer-friendly models based on popularity and performance, grouped by task.
     # Filter only successful benchmarks
     filtered = df[df["status"] == "completed"].copy() if "status" in df.columns else df.copy()
+    # Exclude test models and other non-production models
+    filtered = filter_excluded_models(filtered)
     if filtered.empty:
         return pd.DataFrame()
     filtered = df[webgpu_filter].copy()
+    # Exclude test models and other non-production models
+    filtered = filter_excluded_models(filtered)
     if filtered.empty:
         logger.warning("No successful WebGPU benchmarks found")
         return pd.DataFrame()