Update src/md.py
Browse files
src/md.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
from datetime import datetime
|
|
|
|
| 2 |
|
| 3 |
ABOUT_TEXT = """
|
| 4 |
We compute the win percentage for a reward model on hand curated chosen-rejected pairs for each prompt.
|
|
@@ -94,12 +95,12 @@ Lengths (mean, std. dev.) include the prompt
|
|
| 94 |
For more details, see the [dataset](https://huggingface.co/datasets/allenai/reward-bench).
|
| 95 |
"""
|
| 96 |
|
| 97 |
-
# Get
|
| 98 |
-
|
|
|
|
| 99 |
|
| 100 |
TOP_TEXT = f"""# RewardBench: Evaluating Reward Models
|
| 101 |
### Evaluating the capabilities, safety, and pitfalls of reward models
|
| 102 |
-
Last restart: {current_time}
|
| 103 |
-
[Code](https://github.com/allenai/reward-bench) | [Eval. Dataset](https://huggingface.co/datasets/allenai/reward-bench) | [Prior Test Sets](https://huggingface.co/datasets/allenai/pref-test-sets) | [Results](https://huggingface.co/datasets/allenai/reward-bench-results) | [Paper](https://arxiv.org/abs/2403.13787) | Total models: {{}} | * Unverified models | ⚠️ Dataset Contamination
|
| 104 |
|
| 105 |
⚠️ Many of the top models were trained on unintentionally contaminated, AI-generated data, for more information, see this [gist](https://gist.github.com/natolambert/1aed306000c13e0e8c5bc17c1a5dd300)."""
|
|
|
|
| 1 |
from datetime import datetime
|
| 2 |
+
import pytz
|
| 3 |
|
| 4 |
ABOUT_TEXT = """
|
| 5 |
We compute the win percentage for a reward model on hand curated chosen-rejected pairs for each prompt.
|
|
|
|
| 95 |
For more details, see the [dataset](https://huggingface.co/datasets/allenai/reward-bench).
|
| 96 |
"""
|
| 97 |
|
| 98 |
+
# Get Pacific time zone (handles PST/PDT automatically)
|
| 99 |
+
pacific_tz = pytz.timezone('America/Los_Angeles')
|
| 100 |
+
current_time = datetime.now(pacific_tz).strftime("%H:%M %Z, %d %b %Y")
|
| 101 |
|
| 102 |
TOP_TEXT = f"""# RewardBench: Evaluating Reward Models
|
| 103 |
### Evaluating the capabilities, safety, and pitfalls of reward models
|
| 104 |
+
[Code](https://github.com/allenai/reward-bench) | [Eval. Dataset](https://huggingface.co/datasets/allenai/reward-bench) | [Prior Test Sets](https://huggingface.co/datasets/allenai/pref-test-sets) | [Results](https://huggingface.co/datasets/allenai/reward-bench-results) | [Paper](https://arxiv.org/abs/2403.13787) | Total models: {{}} | * Unverified models | ⚠️ Dataset Contamination | Last restart (PST): {current_time}
|
|
|
|
| 105 |
|
| 106 |
⚠️ Many of the top models were trained on unintentionally contaminated, AI-generated data, for more information, see this [gist](https://gist.github.com/natolambert/1aed306000c13e0e8c5bc17c1a5dd300)."""
|