Spaces:

m-ric
/

llm-race-to-the-top

Paused

App Files Files Community

m-ric commited on Nov 22, 2024

Commit

31aa24a

1 Parent(s): 79f07b1

Interactive sliders

Browse files

Files changed (2) hide show

app.py +53 -58
utils.py +45 -0

app.py CHANGED Viewed

@@ -18,7 +18,8 @@ from utils import (
     format_data,
     get_trendlines,
     find_crossover_point,
-    sigmoid_transition
 )
 ###################
@@ -105,21 +106,14 @@ merged_dfs = {k: format_data(v) for k, v in merged_dfs.items()}
 # get constants
 min_elo_score, max_elo_score, _ = get_constants(merged_dfs)
 date_updated = elo_results["full"]["last_updated_datetime"].split(" ")[0]
-orgs = merged_dfs["Overall"].Organization.unique().tolist()
 ###################
 ### Build and Plot Data
 ###################
-df = merged_dfs["Overall"]
-top_orgs = df.groupby("Organization")["rating"].max().nlargest(11).index.tolist()
-df = df.loc[(df["Organization"].isin(top_orgs)) & (df["rating"] > 1000)]
-print(df)
-df = df.loc[~df["Release Date"].isna()]
 def get_data_split(dfs, set_name):
     df = dfs[set_name].copy(deep=True)
     return df.reset_index(drop=True)
@@ -272,45 +266,32 @@ def make_figure(df):
     speak_french = False
     if speak_french:
         fig.update_layout(
-            xaxis_title="Date",
             title="La course au classement",
             yaxis_title="Score ELO",
             legend_title="Classement en Novembre 2024",
-            xaxis_range=[pd.Timestamp("2024-01-01"), current_date],  # Extend x-axis for labels
-            yaxis_range=[1103, 1350],
         )
     else:
         fig.update_layout(
-            xaxis_title="Date",
             yaxis_title="ELO score on Chatbot Arena",
             legend_title="Ranking as of November 2024",
             title="The race for the best LLM",
-            hovermode="closest",
-            xaxis_range=[pd.Timestamp("2024-01-01"), current_date],  # Extend x-axis for labels
-            yaxis_range=[1103, 1350],
         )
-    # apply_template(fig)
     fig.update_xaxes(
         tickformat="%m-%Y",
     )
-    print(fig)
     return fig, df
-def filter_df():
-    return df
-set_dark_mode = """
-function refresh() {
-    const url = new URL(window.location);
-    if (url.searchParams.get('__theme') !== 'dark') {
-        url.searchParams.set('__theme', 'dark');
-        window.location.href = url.href;
-    }
-}
-"""
 with gr.Blocks(
     theme=gr.themes.Soft(
@@ -320,45 +301,49 @@ with gr.Blocks(
         text_size=gr.themes.sizes.text_sm,
         font=[
             gr.themes.GoogleFont("Open Sans"),
-            "ui-sans-serif",
             "system-ui",
-            "sans-serif",
         ],
     ),
-    js=set_dark_mode,
 ) as demo:
-    gr.Markdown(
-        """
-        <div style="text-align: center; max-width: 650px; margin: auto;">
-            <h1 style="font-weight: 900; margin-top: 5px;">🚀 The race for the best LLM 🚀</h1>
-            <p style="text-align: left; margin-top: 30px; margin-bottom: 30px; line-height: 20px;">
-            This app visualizes the progress of LLMs over time as scored by the <a href="https://leaderboard.lmsys.org/">LMSYS Chatbot Arena</a>.
-            The app is adapted from <a href="https://huggingface.co/spaces/andrewrreed/closed-vs-open-arena-elo"> this app</a> by Andew Reed,
-            and is intended to stay up-to-date as new models are released and evaluated.
-            <div style="text-align: left;">
-            <strong>Plot info:</strong>
-            <br>
-            <ul style="padding-left: 20px;">
-                <li> The ELO score (y-axis) is a measure of the relative strength of a model based on its performance against other models in the arena. </li>
-                <li> The Release Date (x-axis) corresponds to when the model was first publicly released or when its ELO results were first reported (for ease of automated updates). </li>
-                <li> Trend lines are based on Ordinary Least Squares (OLS) regression and adjust based on the filter criteria. </li>
-            <ul>
-            </div>
-            </p>
-        </div>
-        """
-    )
     filtered_df = gr.State()
     with gr.Group():
         with gr.Tab("Plot"):
             plot = gr.Plot(show_label=False)
         with gr.Tab("Raw Data"):
             display_df = gr.DataFrame()
     demo.load(
         fn=filter_df,
-        inputs=[],
         outputs=filtered_df,
     ).then(
         fn=make_figure,
@@ -366,4 +351,14 @@ with gr.Blocks(
         outputs=[plot, display_df],
     )
 demo.launch()

     format_data,
     get_trendlines,
     find_crossover_point,
+    sigmoid_transition,
+    apply_template,
 )
 ###################
 # get constants
 min_elo_score, max_elo_score, _ = get_constants(merged_dfs)
 date_updated = elo_results["full"]["last_updated_datetime"].split(" ")[0]
+ratings_df = merged_dfs["Overall"]
+ratings_df = ratings_df.loc[~ratings_df["Release Date"].isna()]
 ###################
 ### Build and Plot Data
 ###################
 def get_data_split(dfs, set_name):
     df = dfs[set_name].copy(deep=True)
     return df.reset_index(drop=True)
     speak_french = False
     if speak_french:
         fig.update_layout(
             title="La course au classement",
             yaxis_title="Score ELO",
             legend_title="Classement en Novembre 2024",
         )
     else:
         fig.update_layout(
             yaxis_title="ELO score on Chatbot Arena",
             legend_title="Ranking as of November 2024",
             title="The race for the best LLM",
         )
+    fig.update_layout(
+        xaxis_title="Date",
+        hovermode="closest",
+        xaxis_range=[pd.Timestamp("2024-01-01"), current_date],  # Extend x-axis for labels
+        yaxis_range=[best_models_df["rating"].min() - 10, df["rating"].max() + 30],
+    )
+    apply_template(fig, annotation_text="Aymeric Roucher")
     fig.update_xaxes(
         tickformat="%m-%Y",
     )
     return fig, df
+def filter_df(top_n_orgs=11, minimum_rating=1000):
+    top_orgs = ratings_df.groupby("Organization")["rating"].max().nlargest(top_n_orgs).index.tolist()
+    return ratings_df.loc[(ratings_df["Organization"].isin(top_orgs)) & (ratings_df["rating"] > minimum_rating)]
 with gr.Blocks(
     theme=gr.themes.Soft(
         text_size=gr.themes.sizes.text_sm,
         font=[
             gr.themes.GoogleFont("Open Sans"),
+            "ui-serif",
             "system-ui",
+            "serif",
         ],
     ),
 ) as demo:
     filtered_df = gr.State()
+    with gr.Row():
+        top_n_orgs = gr.Slider(minimum=1, maximum=30, value=10, label="View top N companies")
+        minimum_rating = gr.Slider(minimum=800, maximum=1300, value=1000, label="Restrict to ELO scores above N")
     with gr.Group():
         with gr.Tab("Plot"):
             plot = gr.Plot(show_label=False)
         with gr.Tab("Raw Data"):
             display_df = gr.DataFrame()
+    gr.Markdown(
+        """
+        This app visualizes the progress of LLMs over time as scored by the [LMSYS Chatbot Arena](https://leaderboard.lmsys.org/).
+        The app is adapted from [this app](https://huggingface.co/spaces/andrewrreed/closed-vs-open-arena-elo) by Andew Reed,
+        and is intended to stay up-to-date as new models are released and evaluated.
+        > ### Plot info
+        > The ELO score (y-axis) is a measure of the relative strength of a model based on its performance against other models in the arena.
+        > The Release Date (x-axis) corresponds to when the model was first publicly released or when its ELO results were first reported (for ease of automated updates).
+        > Trend lines are based on Ordinary Least Squares (OLS) regression and adjust based on the filter criteria.
+        """
+    )
     demo.load(
         fn=filter_df,
+        inputs=[top_n_orgs, minimum_rating],
+        outputs=filtered_df,
+    ).then(
+        fn=make_figure,
+        inputs=[filtered_df],
+        outputs=[plot, display_df],
+    )
+    minimum_rating.change(
+        fn=filter_df,
+        inputs=[top_n_orgs, minimum_rating],
         outputs=filtered_df,
     ).then(
         fn=make_figure,
         outputs=[plot, display_df],
     )
+    top_n_orgs.change(
+        fn=filter_df,
+        inputs=[top_n_orgs, minimum_rating],
+        outputs=filtered_df,
+    ).then(
+        fn=make_figure,
+        inputs=[filtered_df],
+        outputs=[plot, display_df],
+    )
 demo.launch()

utils.py CHANGED Viewed

@@ -233,3 +233,48 @@ def find_crossover_point(b1, m1, b2, m2):
 # Function to create sigmoid transition
 def sigmoid_transition(x, x0, k=0.1):
     return expit(k * (x - x0))

 # Function to create sigmoid transition
 def sigmoid_transition(x, x0, k=0.1):
     return expit(k * (x - x0))
+def apply_template(
+    fig,
+    template="none",
+    annotation_text="",
+    title=None,
+    width=1200,
+    height=600,
+):
+    """Applies template in-place to input fig."""
+    layout_updates = {
+        "template": template,
+        "width": width,
+        "height": height,
+        "font": dict(family="Garamond", size=14),
+        "title_font_family": "Garamond",
+        "title_font_size": 24,
+        "title_xanchor": "center",
+        "legend": dict(
+            itemsizing="constant",
+            title_font_family="Garamond",
+            font=dict(family="Garamond", size=14),
+            itemwidth=30,
+        ),
+    }
+    if len(annotation_text) > 0:
+        layout_updates["annotations"] = [
+            dict(
+                text=f"<i>{annotation_text}</i>",
+                xref="paper",
+                yref="paper",
+                x=1.05,
+                y=-0.05,
+                xanchor="left",
+                yanchor="top",
+                showarrow=False,
+                font=dict(size=14),
+            )
+        ]
+    if title is not None:
+        layout_updates["title"] = title
+    fig.update_layout(layout_updates)
+    fig.update_xaxes(title_font_family="Garamond", tickfont_family="Garamond")
+    fig.update_yaxes(title_font_family="Garamond", tickfont_family="Garamond")
+    return