IgorSlinko commited on
Commit
bb3fde6
·
1 Parent(s): 843e0a2

Add Grep routing strategy (v0.3.17)

Browse files

- Add 'Grep' strategy matching words in assistant messages
- Support | (OR) and & (AND) operators (cannot mix)
- Use word boundaries (\b) for whole word matching
- Default: M1='ls|find', M2='cat|echo|printf|tee', M3='python&.py'
- First model has priority on overlaps
- Store assistant message content in step data for grep matching

Files changed (1) hide show
  1. app.py +61 -5
app.py CHANGED
@@ -188,6 +188,7 @@ def parse_trajectory_to_steps(traj_path: Path, model_name: str) -> list[dict]:
188
  "system_user": system_user_tokens if not steps else 0,
189
  "completion": tokens,
190
  "observation": None,
 
191
  }
192
  steps.append(step)
193
  system_user_tokens = 0
@@ -224,6 +225,7 @@ def _parse_trajectory_format_to_steps(trajectory_data: list, model_name: str) ->
224
  "system_user": system_user_tokens,
225
  "completion": completion_tokens,
226
  "observation": observation_tokens,
 
227
  }
228
  steps.append(step)
229
 
@@ -1321,7 +1323,7 @@ def build_app():
1321
  """)
1322
  trajectories_state = gr.State(None)
1323
 
1324
- gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.16`")
1325
  gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
1326
 
1327
  with gr.Row():
@@ -1489,7 +1491,7 @@ def build_app():
1489
  gr.Markdown("### 🎯 Router Strategy")
1490
 
1491
  selected_strategy = gr.Radio(
1492
- choices=["Random router", "Every k-th step", "Python list slices", "Replace part of trajectory"],
1493
  value="Random router",
1494
  label="",
1495
  interactive=True,
@@ -1515,6 +1517,12 @@ def build_app():
1515
  slice_model_2 = gr.Textbox(label="M2 slice", value="[1::3]", interactive=True, visible=False)
1516
  slice_model_3 = gr.Textbox(label="M3 slice", value="[2::3]", interactive=True, visible=False)
1517
 
 
 
 
 
 
 
1518
  with gr.Column(visible=False) as part_block:
1519
  part_hint = gr.Markdown("*Ranges must not overlap*")
1520
  part_mode = gr.Radio(
@@ -1547,6 +1555,7 @@ def build_app():
1547
  show_random = strategy == "Random router"
1548
  show_every_k = strategy == "Every k-th step"
1549
  show_slice = strategy == "Python list slices"
 
1550
  show_part = strategy == "Replace part of trajectory"
1551
  has_m2 = num_models >= 2
1552
  has_m3 = num_models >= 3
@@ -1554,6 +1563,7 @@ def build_app():
1554
  gr.update(visible=show_random), # random_block
1555
  gr.update(visible=show_every_k), # every_k_block
1556
  gr.update(visible=show_slice), # slice_block
 
1557
  gr.update(visible=show_part), # part_block
1558
  gr.update(visible=show_random), # random_hint
1559
  gr.update(visible=show_random), # weight_base
@@ -1568,6 +1578,10 @@ def build_app():
1568
  gr.update(visible=show_slice), # slice_model_1
1569
  gr.update(visible=show_slice and has_m2), # slice_model_2
1570
  gr.update(visible=show_slice and has_m3), # slice_model_3
 
 
 
 
1571
  gr.update(visible=show_part), # part_hint
1572
  gr.update(visible=show_part), # part_mode
1573
  gr.update(visible=show_part), # start_1
@@ -1582,10 +1596,11 @@ def build_app():
1582
  fn=on_strategy_change,
1583
  inputs=[selected_strategy, num_routing_models],
1584
  outputs=[
1585
- random_block, every_k_block, slice_block, part_block,
1586
  random_hint, weight_base, weight_model_1, weight_model_2, weight_model_3,
1587
  every_k_hint, k_model_1, k_model_2, k_model_3,
1588
  slice_hint, slice_model_1, slice_model_2, slice_model_3,
 
1589
  part_hint, part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
1590
  ],
1591
  )
@@ -1678,6 +1693,7 @@ def build_app():
1678
  is_random = strategy == "Random router"
1679
  is_every_k = strategy == "Every k-th step"
1680
  is_slice = strategy == "Python list slices"
 
1681
  is_part = strategy == "Replace part of trajectory"
1682
  return (
1683
  gr.update(visible=True), # show block 2
@@ -1685,6 +1701,7 @@ def build_app():
1685
  gr.update(visible=is_random), # weight2
1686
  gr.update(visible=is_every_k), # k2
1687
  gr.update(visible=is_slice), # slice2
 
1688
  gr.update(visible=is_part), # start2
1689
  gr.update(visible=is_part), # end2
1690
  2,
@@ -1693,7 +1710,7 @@ def build_app():
1693
  add_model_2_btn.click(
1694
  fn=show_model_2,
1695
  inputs=[selected_strategy],
1696
- outputs=[routing_block_2, add_model_2_btn, weight_model_2, k_model_2, slice_model_2, start_2, end_2, num_routing_models],
1697
  )
1698
 
1699
  routing_model_2.change(
@@ -1706,6 +1723,7 @@ def build_app():
1706
  is_random = strategy == "Random router"
1707
  is_every_k = strategy == "Every k-th step"
1708
  is_slice = strategy == "Python list slices"
 
1709
  is_part = strategy == "Replace part of trajectory"
1710
  return (
1711
  gr.update(visible=True), # show block 3
@@ -1713,6 +1731,7 @@ def build_app():
1713
  gr.update(visible=is_random), # weight3
1714
  gr.update(visible=is_every_k), # k3
1715
  gr.update(visible=is_slice), # slice3
 
1716
  gr.update(visible=is_part), # start3
1717
  gr.update(visible=is_part), # end3
1718
  3,
@@ -1721,7 +1740,7 @@ def build_app():
1721
  add_model_3_btn.click(
1722
  fn=show_model_3,
1723
  inputs=[selected_strategy],
1724
- outputs=[routing_block_3, add_model_3_btn, weight_model_3, k_model_3, slice_model_3, start_3, end_3, num_routing_models],
1725
  )
1726
 
1727
  routing_model_3.change(
@@ -1740,6 +1759,7 @@ def build_app():
1740
  weight_base_val, weight_1_val, weight_2_val, weight_3_val,
1741
  k_1_val, k_2_val, k_3_val,
1742
  slice_1_val, slice_2_val, slice_3_val,
 
1743
  part_mode_val, start_1_val, end_1_val, start_2_val, end_2_val, start_3_val, end_3_val,
1744
  overhead, with_cache
1745
  ):
@@ -1841,8 +1861,35 @@ def build_app():
1841
 
1842
  k_values = [k_1_val, k_2_val, k_3_val][:len(routing_models)]
1843
  slice_values = [slice_1_val, slice_2_val, slice_3_val][:len(routing_models)]
 
1844
  part_ranges = [(start_1_val, end_1_val), (start_2_val, end_2_val), (start_3_val, end_3_val)][:len(routing_models)]
1845
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1846
  def parse_slice(slice_str, length):
1847
  """Parse Python slice notation like [0::3] and return list of indices"""
1848
  slice_str = slice_str.strip()
@@ -1899,6 +1946,14 @@ def build_app():
1899
  except Exception:
1900
  pass
1901
 
 
 
 
 
 
 
 
 
1902
  elif strategy_val == "Replace part of trajectory":
1903
  for j, (start_val, end_val) in enumerate(part_ranges):
1904
  if part_mode_val == "Percentages":
@@ -2025,6 +2080,7 @@ def build_app():
2025
  weight_base, weight_model_1, weight_model_2, weight_model_3,
2026
  k_model_1, k_model_2, k_model_3,
2027
  slice_model_1, slice_model_2, slice_model_3,
 
2028
  part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
2029
  thinking_overhead, use_cache,
2030
  ],
 
188
  "system_user": system_user_tokens if not steps else 0,
189
  "completion": tokens,
190
  "observation": None,
191
+ "content": str(content),
192
  }
193
  steps.append(step)
194
  system_user_tokens = 0
 
225
  "system_user": system_user_tokens,
226
  "completion": completion_tokens,
227
  "observation": observation_tokens,
228
+ "content": str(response_text) if response_text else "",
229
  }
230
  steps.append(step)
231
 
 
1323
  """)
1324
  trajectories_state = gr.State(None)
1325
 
1326
+ gr.Markdown("# 🧮 SWE-bench Bash-Only Leaderboard `v0.3.17`")
1327
  gr.Markdown("## 🎯 Select a base model for cost analysis (click a row)")
1328
 
1329
  with gr.Row():
 
1491
  gr.Markdown("### 🎯 Router Strategy")
1492
 
1493
  selected_strategy = gr.Radio(
1494
+ choices=["Random router", "Every k-th step", "Python list slices", "Grep", "Replace part of trajectory"],
1495
  value="Random router",
1496
  label="",
1497
  interactive=True,
 
1517
  slice_model_2 = gr.Textbox(label="M2 slice", value="[1::3]", interactive=True, visible=False)
1518
  slice_model_3 = gr.Textbox(label="M3 slice", value="[2::3]", interactive=True, visible=False)
1519
 
1520
+ with gr.Column(visible=False) as grep_block:
1521
+ grep_hint = gr.Markdown("*Use `|` for OR, `&` for AND (don't mix). First model has priority on overlaps*")
1522
+ grep_model_1 = gr.Textbox(label="M1 grep", value="ls|find", interactive=True)
1523
+ grep_model_2 = gr.Textbox(label="M2 grep", value="cat|echo|printf|tee", interactive=True, visible=False)
1524
+ grep_model_3 = gr.Textbox(label="M3 grep", value="python&.py", interactive=True, visible=False)
1525
+
1526
  with gr.Column(visible=False) as part_block:
1527
  part_hint = gr.Markdown("*Ranges must not overlap*")
1528
  part_mode = gr.Radio(
 
1555
  show_random = strategy == "Random router"
1556
  show_every_k = strategy == "Every k-th step"
1557
  show_slice = strategy == "Python list slices"
1558
+ show_grep = strategy == "Grep"
1559
  show_part = strategy == "Replace part of trajectory"
1560
  has_m2 = num_models >= 2
1561
  has_m3 = num_models >= 3
 
1563
  gr.update(visible=show_random), # random_block
1564
  gr.update(visible=show_every_k), # every_k_block
1565
  gr.update(visible=show_slice), # slice_block
1566
+ gr.update(visible=show_grep), # grep_block
1567
  gr.update(visible=show_part), # part_block
1568
  gr.update(visible=show_random), # random_hint
1569
  gr.update(visible=show_random), # weight_base
 
1578
  gr.update(visible=show_slice), # slice_model_1
1579
  gr.update(visible=show_slice and has_m2), # slice_model_2
1580
  gr.update(visible=show_slice and has_m3), # slice_model_3
1581
+ gr.update(visible=show_grep), # grep_hint
1582
+ gr.update(visible=show_grep), # grep_model_1
1583
+ gr.update(visible=show_grep and has_m2), # grep_model_2
1584
+ gr.update(visible=show_grep and has_m3), # grep_model_3
1585
  gr.update(visible=show_part), # part_hint
1586
  gr.update(visible=show_part), # part_mode
1587
  gr.update(visible=show_part), # start_1
 
1596
  fn=on_strategy_change,
1597
  inputs=[selected_strategy, num_routing_models],
1598
  outputs=[
1599
+ random_block, every_k_block, slice_block, grep_block, part_block,
1600
  random_hint, weight_base, weight_model_1, weight_model_2, weight_model_3,
1601
  every_k_hint, k_model_1, k_model_2, k_model_3,
1602
  slice_hint, slice_model_1, slice_model_2, slice_model_3,
1603
+ grep_hint, grep_model_1, grep_model_2, grep_model_3,
1604
  part_hint, part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
1605
  ],
1606
  )
 
1693
  is_random = strategy == "Random router"
1694
  is_every_k = strategy == "Every k-th step"
1695
  is_slice = strategy == "Python list slices"
1696
+ is_grep = strategy == "Grep"
1697
  is_part = strategy == "Replace part of trajectory"
1698
  return (
1699
  gr.update(visible=True), # show block 2
 
1701
  gr.update(visible=is_random), # weight2
1702
  gr.update(visible=is_every_k), # k2
1703
  gr.update(visible=is_slice), # slice2
1704
+ gr.update(visible=is_grep), # grep2
1705
  gr.update(visible=is_part), # start2
1706
  gr.update(visible=is_part), # end2
1707
  2,
 
1710
  add_model_2_btn.click(
1711
  fn=show_model_2,
1712
  inputs=[selected_strategy],
1713
+ outputs=[routing_block_2, add_model_2_btn, weight_model_2, k_model_2, slice_model_2, grep_model_2, start_2, end_2, num_routing_models],
1714
  )
1715
 
1716
  routing_model_2.change(
 
1723
  is_random = strategy == "Random router"
1724
  is_every_k = strategy == "Every k-th step"
1725
  is_slice = strategy == "Python list slices"
1726
+ is_grep = strategy == "Grep"
1727
  is_part = strategy == "Replace part of trajectory"
1728
  return (
1729
  gr.update(visible=True), # show block 3
 
1731
  gr.update(visible=is_random), # weight3
1732
  gr.update(visible=is_every_k), # k3
1733
  gr.update(visible=is_slice), # slice3
1734
+ gr.update(visible=is_grep), # grep3
1735
  gr.update(visible=is_part), # start3
1736
  gr.update(visible=is_part), # end3
1737
  3,
 
1740
  add_model_3_btn.click(
1741
  fn=show_model_3,
1742
  inputs=[selected_strategy],
1743
+ outputs=[routing_block_3, add_model_3_btn, weight_model_3, k_model_3, slice_model_3, grep_model_3, start_3, end_3, num_routing_models],
1744
  )
1745
 
1746
  routing_model_3.change(
 
1759
  weight_base_val, weight_1_val, weight_2_val, weight_3_val,
1760
  k_1_val, k_2_val, k_3_val,
1761
  slice_1_val, slice_2_val, slice_3_val,
1762
+ grep_1_val, grep_2_val, grep_3_val,
1763
  part_mode_val, start_1_val, end_1_val, start_2_val, end_2_val, start_3_val, end_3_val,
1764
  overhead, with_cache
1765
  ):
 
1861
 
1862
  k_values = [k_1_val, k_2_val, k_3_val][:len(routing_models)]
1863
  slice_values = [slice_1_val, slice_2_val, slice_3_val][:len(routing_models)]
1864
+ grep_values = [grep_1_val, grep_2_val, grep_3_val][:len(routing_models)]
1865
  part_ranges = [(start_1_val, end_1_val), (start_2_val, end_2_val), (start_3_val, end_3_val)][:len(routing_models)]
1866
 
1867
+ if strategy_val == "Grep":
1868
+ for i, gv in enumerate(grep_values):
1869
+ if gv and "|" in gv and "&" in gv:
1870
+ yield (gr.update(visible=True, value=f"❌ M{i+1} grep: cannot mix | and & operators"), gr.update(visible=False), None, None)
1871
+ return
1872
+
1873
+ def grep_matches(text, pattern):
1874
+ """Check if text matches grep pattern (words with | or &)"""
1875
+ if not pattern or not text:
1876
+ return False
1877
+ pattern = pattern.strip()
1878
+ if "|" in pattern:
1879
+ words = [w.strip() for w in pattern.split("|") if w.strip()]
1880
+ for word in words:
1881
+ if re.search(r'\b' + re.escape(word) + r'\b', text):
1882
+ return True
1883
+ return False
1884
+ elif "&" in pattern:
1885
+ words = [w.strip() for w in pattern.split("&") if w.strip()]
1886
+ for word in words:
1887
+ if not re.search(r'\b' + re.escape(word) + r'\b', text):
1888
+ return False
1889
+ return True
1890
+ else:
1891
+ return bool(re.search(r'\b' + re.escape(pattern) + r'\b', text))
1892
+
1893
  def parse_slice(slice_str, length):
1894
  """Parse Python slice notation like [0::3] and return list of indices"""
1895
  slice_str = slice_str.strip()
 
1946
  except Exception:
1947
  pass
1948
 
1949
+ elif strategy_val == "Grep":
1950
+ for i, step in enumerate(steps):
1951
+ content = step.get("content", "")
1952
+ for j, grep_val in enumerate(grep_values):
1953
+ if grep_val and i not in step_to_model:
1954
+ if grep_matches(content, grep_val):
1955
+ step_to_model[i] = f"__routing_{j}__"
1956
+
1957
  elif strategy_val == "Replace part of trajectory":
1958
  for j, (start_val, end_val) in enumerate(part_ranges):
1959
  if part_mode_val == "Percentages":
 
2080
  weight_base, weight_model_1, weight_model_2, weight_model_3,
2081
  k_model_1, k_model_2, k_model_3,
2082
  slice_model_1, slice_model_2, slice_model_3,
2083
+ grep_model_1, grep_model_2, grep_model_3,
2084
  part_mode, start_1, end_1, start_2, end_2, start_3, end_3,
2085
  thinking_overhead, use_cache,
2086
  ],