IgorSlinko commited on
Commit
723fdc8
·
1 Parent(s): 071222d

Improve Tokenizer Overhead UX

Browse files

- Apply overhead to all token types (prompt, completion, cache_read, cache_creation)
- Rename 'Thinking Overhead' to 'Tokenizer Overhead' for clarity
- Hide Tokenizer Overhead field when 'Metadata' mode is selected
- Show Tokenizer Overhead only when 'Calculated' mode is active

Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -132,14 +132,16 @@ def calculate_tokens_from_trajectory(traj_path: Path, model_name: str) -> dict:
132
 
133
 
134
  def apply_thinking_overhead(df: pd.DataFrame, overhead: float) -> pd.DataFrame:
135
- """Apply thinking overhead multiplier to completion tokens"""
136
  if df.empty or overhead == 1.0:
137
  return df
138
 
139
  df = df.copy()
 
140
  df["completion_tokens"] = (df["completion_tokens"] * overhead).astype(int)
141
- df["total_tokens"] = df["prompt_tokens"] + df["completion_tokens"]
142
  df["cache_creation_tokens"] = (df["cache_creation_tokens"] * overhead).astype(int)
 
143
  return df
144
 
145
 
@@ -927,12 +929,22 @@ def build_app():
927
  value="Metadata",
928
  )
929
  thinking_overhead = gr.Number(
930
- label="🧠 Thinking Overhead (multiplier)",
931
  value=1.21,
932
  precision=2,
933
- info="Applies to Calculated tokens",
 
934
  )
935
 
 
 
 
 
 
 
 
 
 
936
  leaderboard_table.select(
937
  fn=on_row_select,
938
  inputs=[leaderboard_table],
 
132
 
133
 
134
  def apply_thinking_overhead(df: pd.DataFrame, overhead: float) -> pd.DataFrame:
135
+ """Apply tokenizer overhead multiplier to all token counts"""
136
  if df.empty or overhead == 1.0:
137
  return df
138
 
139
  df = df.copy()
140
+ df["prompt_tokens"] = (df["prompt_tokens"] * overhead).astype(int)
141
  df["completion_tokens"] = (df["completion_tokens"] * overhead).astype(int)
142
+ df["cache_read_tokens"] = (df["cache_read_tokens"] * overhead).astype(int)
143
  df["cache_creation_tokens"] = (df["cache_creation_tokens"] * overhead).astype(int)
144
+ df["total_tokens"] = df["prompt_tokens"] + df["completion_tokens"]
145
  return df
146
 
147
 
 
929
  value="Metadata",
930
  )
931
  thinking_overhead = gr.Number(
932
+ label="🔢 Tokenizer Overhead",
933
  value=1.21,
934
  precision=2,
935
+ info="Multiplier for Calculated tokens (tiktoken → native)",
936
+ visible=False,
937
  )
938
 
939
+ def update_overhead_visibility(source):
940
+ return gr.update(visible=(source == "Calculated"))
941
+
942
+ token_source.change(
943
+ fn=update_overhead_visibility,
944
+ inputs=[token_source],
945
+ outputs=[thinking_overhead],
946
+ )
947
+
948
  leaderboard_table.select(
949
  fn=on_row_select,
950
  inputs=[leaderboard_table],