Spaces:

JetBrains-Research
/

SWE-bench-Costs-Calculator

Running

IgorSlinko commited on 15 days ago

Commit

723fdc8

1 Parent(s): 071222d

Improve Tokenizer Overhead UX

- Apply overhead to all token types (prompt, completion, cache_read, cache_creation)
- Rename 'Thinking Overhead' to 'Tokenizer Overhead' for clarity
- Hide Tokenizer Overhead field when 'Metadata' mode is selected
- Show Tokenizer Overhead only when 'Calculated' mode is active

Files changed (1) hide show

app.py +16 -4

app.py CHANGED Viewed

@@ -132,14 +132,16 @@ def calculate_tokens_from_trajectory(traj_path: Path, model_name: str) -> dict:
 def apply_thinking_overhead(df: pd.DataFrame, overhead: float) -> pd.DataFrame:
-    """Apply thinking overhead multiplier to completion tokens"""
     if df.empty or overhead == 1.0:
         return df
     df = df.copy()
     df["completion_tokens"] = (df["completion_tokens"] * overhead).astype(int)
-    df["total_tokens"] = df["prompt_tokens"] + df["completion_tokens"]
     df["cache_creation_tokens"] = (df["cache_creation_tokens"] * overhead).astype(int)
     return df
@@ -927,12 +929,22 @@ def build_app():
                     value="Metadata",
                 )
                 thinking_overhead = gr.Number(
-                    label="🧠 Thinking Overhead (multiplier)",
                     value=1.21,
                     precision=2,
-                    info="Applies to Calculated tokens",
                 )
         leaderboard_table.select(
             fn=on_row_select,
             inputs=[leaderboard_table],

 def apply_thinking_overhead(df: pd.DataFrame, overhead: float) -> pd.DataFrame:
+    """Apply tokenizer overhead multiplier to all token counts"""
     if df.empty or overhead == 1.0:
         return df
     df = df.copy()
+    df["prompt_tokens"] = (df["prompt_tokens"] * overhead).astype(int)
     df["completion_tokens"] = (df["completion_tokens"] * overhead).astype(int)
+    df["cache_read_tokens"] = (df["cache_read_tokens"] * overhead).astype(int)
     df["cache_creation_tokens"] = (df["cache_creation_tokens"] * overhead).astype(int)
+    df["total_tokens"] = df["prompt_tokens"] + df["completion_tokens"]
     return df
                     value="Metadata",
                 )
                 thinking_overhead = gr.Number(
+                    label="🔢 Tokenizer Overhead",
                     value=1.21,
                     precision=2,
+                    info="Multiplier for Calculated tokens (tiktoken → native)",
+                    visible=False,
                 )
+        def update_overhead_visibility(source):
+            return gr.update(visible=(source == "Calculated"))
+        token_source.change(
+            fn=update_overhead_visibility,
+            inputs=[token_source],
+            outputs=[thinking_overhead],
+        )
         leaderboard_table.select(
             fn=on_row_select,
             inputs=[leaderboard_table],