From eb2c58cc2a98eca2a4aac44a65539cd5e74eac45 Mon Sep 17 00:00:00 2001 From: Jackie Lian Date: Wed, 30 Oct 2024 19:41:16 -0700 Subject: [PATCH 1/2] modify arena leaderboard to include input & output columns --- fastchat/serve/monitor/monitor.py | 62 +++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/fastchat/serve/monitor/monitor.py b/fastchat/serve/monitor/monitor.py index e34d046a6..9e1244760 100644 --- a/fastchat/serve/monitor/monitor.py +++ b/fastchat/serve/monitor/monitor.py @@ -191,6 +191,9 @@ def load_leaderboard_table_csv(filename, add_hyperlink=True): v = round(ast.literal_eval(v), 2) else: v = np.nan + elif h == "input_token_price" or h == "output_token_price": + if v == "null": + v = "-" item[h] = v if add_hyperlink: item["Model"] = model_hyperlink(item["Model"], item["Link"]) @@ -242,7 +245,11 @@ def get_full_table(arena_df, model_table_df, model_to_score): row.append(model_table_df.iloc[i]["Organization"]) # license row.append(model_table_df.iloc[i]["License"]) - + # Input Price + row.append(model_table_df.iloc[i]["input_token_price"]) + # Output Price + row.append(model_table_df.iloc[i]["output_token_price"]) + values.append(row) values.sort(key=lambda x: -x[1] if not np.isnan(x[1]) else 1e9) return values @@ -350,12 +357,14 @@ def process_row(row): f"+{round(row['rating_q975'] - row['rating'])}/-{round(row['rating'] - row['rating_q025'])}", round(row["num_battles"]), model_info.get("Organization", "Unknown"), - model_info.get("License", "Unknown"), - ( - "Unknown" - if model_info.get("Knowledge cutoff date", "-") == "-" - else model_info.get("Knowledge cutoff date", "Unknown") - ), + # model_info.get("License", "Unknown"), + # ( + # "Unknown" + # if model_info.get("Knowledge cutoff date", "-") == "-" + # else model_info.get("Knowledge cutoff date", "Unknown") + # ), + model_info.get("input_token_price", "-"), + model_info.get("output_token_price", "-"), ] ) return result @@ -378,8 +387,10 @@ def update_leaderboard_df(arena_table_vals): "95% CI", "Votes", "Organization", - "License", - "Knowledge Cutoff", + # "License", + # "Knowledge Cutoff", + "Input Token Price", + "Output Token Price", ] elo_dataframe = pd.DataFrame(arena_table_vals, columns=columns) @@ -407,8 +418,10 @@ def update_overall_leaderboard_df(arena_table_vals): "95% CI", "Votes", "Organization", - "License", - "Knowledge Cutoff", + # "License", + # "Knowledge Cutoff", + "Input Token Price", + "Output Token Price", ] elo_dataframe = pd.DataFrame(arena_table_vals, columns=columns) @@ -514,8 +527,10 @@ def update_leaderboard_and_plots(category, filters): "95% CI", "Votes", "Organization", - "License", - "Knowledge Cutoff", + # "License", + # "Knowledge Cutoff", + "Input Token Price", + "Output Token Price", ], datatype=[ "number", @@ -531,7 +546,7 @@ def update_leaderboard_and_plots(category, filters): value=arena_values, elem_id="arena_leaderboard_dataframe", height=1000, - column_widths=[75, 75, 180, 60, 60, 60, 70, 80, 60], + column_widths=[75, 75, 180, 60, 60, 60, 70, 55, 55], wrap=True, ) else: @@ -545,8 +560,10 @@ def update_leaderboard_and_plots(category, filters): "95% CI", "Votes", "Organization", - "License", - "Knowledge Cutoff", + # "License", + # "Knowledge Cutoff", + "Input Token Price", + "Output Token Price", ], datatype=[ "number", @@ -562,7 +579,7 @@ def update_leaderboard_and_plots(category, filters): value=arena_values, elem_id="arena_leaderboard_dataframe", height=1000, - column_widths=[75, 75, 180, 60, 60, 60, 70, 80, 60], + column_widths=[75, 75, 180, 60, 60, 60, 70, 55, 55], wrap=True, ) @@ -631,8 +648,10 @@ def update_leaderboard_and_plots(category, filters): "95% CI", "Votes", "Organization", - "License", - "Knowledge Cutoff", + # "License", + # "Knowledge Cutoff", + "Input Token Price", + "Output Token Price", ], datatype=[ "number", @@ -648,7 +667,7 @@ def update_leaderboard_and_plots(category, filters): value=arena_vals, elem_id="arena_leaderboard_dataframe", height=1000, - column_widths=[75, 75, 180, 60, 60, 60, 70, 80, 60], + column_widths=[75, 75, 180, 60, 60, 60, 70, 55, 55], wrap=True, ) @@ -895,6 +914,8 @@ def get_model_name(model_key): combined_table = [] for category in elo_results.keys(): + if category not in key_to_category_name: + continue df = elo_results[category]["leaderboard_table_df"].copy() # remove deprecated models df = df.loc[~df.index.isin(deprecated_model_name)] @@ -1187,3 +1208,4 @@ def build_demo(elo_results_file, leaderboard_table_file, arena_hard_leaderboard) max_threads=200, auth=(args.password[0], args.password[1]) if args.password else None, ) + \ No newline at end of file From 615b92e294fc7ac99e04b686c80398dccc3cbe9f Mon Sep 17 00:00:00 2001 From: Jackie Lian Date: Tue, 5 Nov 2024 10:37:49 -0800 Subject: [PATCH 2/2] updated monitor to have new price columns --- fastchat/serve/monitor/monitor.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fastchat/serve/monitor/monitor.py b/fastchat/serve/monitor/monitor.py index 9e1244760..66e46a822 100644 --- a/fastchat/serve/monitor/monitor.py +++ b/fastchat/serve/monitor/monitor.py @@ -249,7 +249,7 @@ def get_full_table(arena_df, model_table_df, model_to_score): row.append(model_table_df.iloc[i]["input_token_price"]) # Output Price row.append(model_table_df.iloc[i]["output_token_price"]) - + values.append(row) values.sort(key=lambda x: -x[1] if not np.isnan(x[1]) else 1e9) return values @@ -363,7 +363,7 @@ def process_row(row): # if model_info.get("Knowledge cutoff date", "-") == "-" # else model_info.get("Knowledge cutoff date", "Unknown") # ), - model_info.get("input_token_price", "-"), + model_info.get("input_token_price", "-"), model_info.get("output_token_price", "-"), ] ) @@ -389,8 +389,8 @@ def update_leaderboard_df(arena_table_vals): "Organization", # "License", # "Knowledge Cutoff", - "Input Token Price", - "Output Token Price", + "Input Token Price", + "Output Token Price", ] elo_dataframe = pd.DataFrame(arena_table_vals, columns=columns) @@ -1208,4 +1208,3 @@ def build_demo(elo_results_file, leaderboard_table_file, arena_hard_leaderboard) max_threads=200, auth=(args.password[0], args.password[1]) if args.password else None, ) - \ No newline at end of file