@@ -317,11 +317,31 @@ def _aggregate_summary_metrics(
317
317
f"{ metric .metric_name } /{ constants .MetricResult .PAIRWISE_CHOICE_KEY } "
318
318
)
319
319
if pairwise_choice_col_name in metrics_table :
320
+ candidate_model_win_rate_choices = [
321
+ "CANDIDATE" ,
322
+ utils .RATING_TO_VERDICT ["B>>A" ],
323
+ utils .RATING_TO_VERDICT ["A<<B" ],
324
+ utils .RATING_TO_VERDICT ["B>A" ],
325
+ utils .RATING_TO_VERDICT ["A<B" ],
326
+ ]
327
+ baseline_model_win_rate_choices = [
328
+ "BASELINE" ,
329
+ utils .RATING_TO_VERDICT ["A>>B" ],
330
+ utils .RATING_TO_VERDICT ["B<<A" ],
331
+ utils .RATING_TO_VERDICT ["B<A" ],
332
+ utils .RATING_TO_VERDICT ["A>B" ],
333
+ ]
320
334
summary_metrics [
321
335
f"{ metric .metric_name } /candidate_model_win_rate"
322
- ] = (metrics_table [pairwise_choice_col_name ] == "CANDIDATE" ).mean ()
336
+ ] = (
337
+ metrics_table [pairwise_choice_col_name ].isin (
338
+ candidate_model_win_rate_choices
339
+ )
340
+ ).mean ()
323
341
summary_metrics [f"{ metric .metric_name } /baseline_model_win_rate" ] = (
324
- metrics_table [pairwise_choice_col_name ] == "BASELINE"
342
+ metrics_table [pairwise_choice_col_name ].isin (
343
+ baseline_model_win_rate_choices
344
+ )
325
345
).mean ()
326
346
else :
327
347
score_col_name = f"{ str (metric )} /{ constants .MetricResult .SCORE_KEY } "
0 commit comments