Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/source/developer_guide/add_benchmarks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,10 @@ For ``@build_table``, the value returned should be of the form:
...
}

This will generate a table with columns for each metric, as well as "MLIP", "Score",
and "Rank" columns. Tooltips for each column header can also be set by the decorator,
as well as the location to save the JSON file to be loaded when building the app,
which typically would be placed in ``ml_peg/app/data/[category]/[benchmark_name]``.
This will generate a table with columns for each metric, as well as "MLIP" and "Score"
columns. Tooltips for each column header can also be set by the decorator, as well as
the location to save the JSON file to be loaded when building the app, which typically
would be placed in ``ml_peg/app/data/[category]/[benchmark_name]``.

Every benchmark should have at least one of these tables, which includes
the score for each metric, and allowing the table to calculate an overall score for the
Expand Down
13 changes: 4 additions & 9 deletions ml_peg/analysis/utils/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import numpy as np
import plotly.graph_objects as go

from ml_peg.analysis.utils.utils import calc_ranks, calc_table_scores
from ml_peg.analysis.utils.utils import calc_table_scores
from ml_peg.app.utils.utils import Thresholds


Expand Down Expand Up @@ -274,8 +274,7 @@ def build_table(
filename
Filename to save table. Default is "table.json".
metric_tooltips
Tooltips for table metric headers. Defaults are set for "MLIP", "Score", and
"Rank".
Tooltips for table metric headers. Defaults are set for "MLIP" and "Score".
normalize
Whether to apply normalisation when calculating the score. Default is True.
normalizer
Expand Down Expand Up @@ -365,10 +364,7 @@ def build_table_wrapper(*args, **kwargs) -> dict[str, Any]:
| {"id": mlip},
)

summary_tooltips = {
"MLIP": "Name of the model",
"Rank": "Model rank based on score (lower is better)",
}
summary_tooltips = {"MLIP": "Name of the model"}
if normalize:
summary_tooltips["Score"] = (
"Average of normalised metrics (higher is better)"
Expand All @@ -391,8 +387,7 @@ def build_table_wrapper(*args, **kwargs) -> dict[str, Any]:
else:
metrics_data = calc_table_scores(metrics_data)

metrics_data = calc_ranks(metrics_data)
metrics_columns += ("Score", "Rank")
metrics_columns += ("Score",)

metric_weights = weights if weights else {}
for column in results:
Expand Down
45 changes: 5 additions & 40 deletions ml_peg/analysis/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from matplotlib import cm
from matplotlib.colors import Colormap
import numpy as np
from scipy.stats import rankdata
from sklearn.metrics import mean_absolute_error, mean_squared_error
from yaml import safe_load

Expand Down Expand Up @@ -145,7 +144,7 @@ def calc_metric_scores(
for row in metrics_scores:
for key, value in row.items():
# Value may be ``None`` if missing for a benchmark
if key not in {"MLIP", "Score", "Rank", "id"} and value is not None:
if key not in {"MLIP", "Score", "id"} and value is not None:
if cleaned_thresholds is None or key not in cleaned_thresholds:
row[key] = value
continue
Expand Down Expand Up @@ -194,7 +193,7 @@ def calc_table_scores(
weights_list = []
for key, value in metrics_row.items():
# Value may be ``None`` if missing for a benchmark
if key not in {"MLIP", "Score", "Rank", "id"} and value is not None:
if key not in {"MLIP", "Score", "id"} and value is not None:
scores_list.append(scores_row[key])
weights_list.append(weights.get(key, 1.0))

Expand All @@ -210,36 +209,6 @@ def calc_table_scores(
return metrics_data


def calc_ranks(metrics_data: list[dict]) -> list[dict]:
"""
Calculate rank for each model and add to table data.

Parameters
----------
metrics_data
Rows data containing model name, metric values, and Score.
The "Score" column is used to calculate the rank, with the highest score ranked
1.

Returns
-------
list[dict]
Rows of data with rank for each model added.
"""
# If a score is None, set to NaN for ranking purposes, but do not rank
ranked_scores = rankdata(
[x["Score"] if x.get("Score") is not None else np.nan for x in metrics_data],
nan_policy="omit",
method="max",
)
for i, row in enumerate(metrics_data):
if np.isnan(ranked_scores[i]):
row["Rank"] = None
else:
row["Rank"] = len(ranked_scores) - int(ranked_scores[i]) + 1
return metrics_data


def get_table_style(
data: list[TableRow],
*,
Expand Down Expand Up @@ -339,10 +308,7 @@ def rgba_from_val(val: float, vmin: float, vmax: float, cmap: Colormap) -> str:

# Use thresholds
if normalized:
if col != "Rank":
min_value, max_value = 1, 0
else:
min_value, max_value = 1, len(numeric_values)
min_value, max_value = 1, 0
else:
min_value = min(numeric_values)
max_value = max(numeric_values)
Expand Down Expand Up @@ -370,13 +336,13 @@ def rgba_from_val(val: float, vmin: float, vmax: float, cmap: Colormap) -> str:
return style_data_conditional


def update_score_rank_style(
def update_score_style(
data: list[MetricRow],
weights: dict[str, float] | None = None,
thresholds: Thresholds | None = None,
) -> tuple[list[MetricRow], list[TableRow]]:
"""
Update table scores, ranks, and table styles.
Update table scores and table styles.

Parameters
----------
Expand All @@ -395,7 +361,6 @@ def update_score_rank_style(
"""
weights = clean_weights(weights)
data = calc_table_scores(data, weights, thresholds)
data = calc_ranks(data)
scored_data = calc_metric_scores(data, thresholds)
style = get_table_style(data, scored_data=scored_data)
return data, style
Expand Down
12 changes: 4 additions & 8 deletions ml_peg/app/build_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
from dash.html import H1, H3, Div
from yaml import safe_load

from ml_peg.analysis.utils.utils import calc_ranks, calc_table_scores, get_table_style
from ml_peg.analysis.utils.utils import calc_table_scores, get_table_style
from ml_peg.app import APP_ROOT
from ml_peg.app.utils.build_components import build_weight_components
from ml_peg.app.utils.register_callbacks import register_benchmark_to_category_callback
from ml_peg.app.utils.utils import calculate_column_widths, rank_format, sig_fig_format
from ml_peg.app.utils.utils import calculate_column_widths, sig_fig_format
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models

Expand Down Expand Up @@ -194,17 +194,13 @@ def build_summary_table(
data.append({"MLIP": mlip} | summary_data[mlip])

data = calc_table_scores(data)
data = calc_ranks(data)

columns_headers = ("MLIP",) + tuple(tables.keys()) + ("Score", "Rank")
columns_headers = ("MLIP",) + tuple(tables.keys()) + ("Score",)

columns = [{"name": headers, "id": headers} for headers in columns_headers]
for column in columns:
column_id = column["id"]
if column_id == "Rank":
column["type"] = "numeric"
column["format"] = rank_format()
elif column_id != "MLIP":
if column_id != "MLIP":
column["type"] = "numeric"
column["format"] = sig_fig_format()

Expand Down
31 changes: 15 additions & 16 deletions ml_peg/app/utils/build_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def grid_template_from_widths(
widths
Mapping of column names to pixel widths.
column_order
Ordered metric column names to render between the MLIP, Score, and Rank columns.
Ordered metric column names to render between the MLIP and Score columns.

Returns
-------
Expand All @@ -41,7 +41,6 @@ def grid_template_from_widths(
tracks: list[tuple[str, int]] = [("MLIP", widths["MLIP"])]
tracks.extend((col, widths[col]) for col in column_order)
tracks.append(("Score", widths["Score"]))
tracks.append(("Rank", widths["Rank"]))

template_parts: list[str] = []
for _, width in tracks:
Expand Down Expand Up @@ -146,7 +145,7 @@ def build_weight_components(
"Threshold metadata must be provided when use_thresholds=True."
)
# Identify metric columns (exclude reserved columns)
reserved = {"MLIP", "Score", "Rank", "id"}
reserved = {"MLIP", "Score", "id"}
columns = [col["id"] for col in table.columns if col.get("id") not in reserved]

if not columns:
Expand Down Expand Up @@ -367,7 +366,7 @@ def build_test_layout(

# Inline normalization thresholds when metadata is supplied
if thresholds is not None:
reserved = {"MLIP", "Score", "Rank", "id"}
reserved = {"MLIP", "Score", "id"}
metric_columns = [
col["id"] for col in table.columns if col.get("id") not in reserved
]
Expand Down Expand Up @@ -663,19 +662,19 @@ def build_threshold_inputs(
)
)

for _ in ("Score", "Rank"):
cells.append(
Div(
"",
style={
"width": "100%",
"minWidth": "0",
"maxWidth": "100%",
"boxSizing": "border-box",
"border": "1px solid transparent",
},
)
# Score
cells.append(
Div(
"",
style={
"width": "100%",
"minWidth": "0",
"maxWidth": "100%",
"boxSizing": "border-box",
"border": "1px solid transparent",
},
)
)

store = Store(
id=f"{table_id}-thresholds-store",
Expand Down
6 changes: 1 addition & 5 deletions ml_peg/app/utils/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
clean_thresholds,
clean_weights,
is_numeric_column,
rank_format,
sig_fig_format,
)

Expand Down Expand Up @@ -65,10 +64,7 @@ def rebuild_table(filename: str | Path, id: str) -> DataTable:
width_labels.append(label_source)
if column_id is None:
continue
if column_id == "Rank":
column["type"] = "numeric"
column.setdefault("format", rank_format())
elif column.get("type") == "numeric" or is_numeric_column(data, column_id):
if column.get("type") == "numeric" or is_numeric_column(data, column_id):
column["type"] = "numeric"
column.setdefault("format", sig_fig_format())
if column_name is not None and not isinstance(column_name, str):
Expand Down
10 changes: 5 additions & 5 deletions ml_peg/app/utils/register_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
calc_metric_scores,
calc_table_scores,
get_table_style,
update_score_rank_style,
update_score_style,
)
from ml_peg.app.utils.utils import (
Thresholds,
Expand Down Expand Up @@ -67,14 +67,14 @@ def update_summary_table(
row[tab] = values[row["MLIP"]]

# Update table contents
return update_score_rank_style(summary_data, stored_weights)
return update_score_style(summary_data, stored_weights)


def register_category_table_callbacks(
table_id: str, use_thresholds: bool = False
) -> None:
"""
Register callback to update table scores/rankings when stored values change.
Register callback to update table scores when stored values change.

Parameters
----------
Expand Down Expand Up @@ -218,7 +218,7 @@ def update_table_scores(
if not table_data:
raise PreventUpdate

scored_rows, style = update_score_rank_style(table_data, stored_weights)
scored_rows, style = update_score_style(table_data, stored_weights)
return scored_rows, style, scored_rows

@callback(
Expand Down Expand Up @@ -337,7 +337,7 @@ def update_category_from_benchmark(
if mlip in benchmark_scores:
row[benchmark_column] = benchmark_scores[mlip]

category_rows, style = update_score_rank_style(category_rows, category_weights)
category_rows, style = update_score_style(category_rows, category_weights)
return category_rows, style, category_rows


Expand Down
19 changes: 3 additions & 16 deletions ml_peg/app/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,9 @@ def calculate_column_widths(
# Fixed widths for static columns
widths.setdefault("MLIP", 150)
widths.setdefault("Score", 100)
widths.setdefault("Rank", 100)

for col in columns:
if col not in ("MLIP", "Score", "Rank"):
if col not in ("MLIP", "Score"):
# Calculate width based on column title length
calculated_width = len(col) * char_width + padding
# Enforce minimum width
Expand Down Expand Up @@ -109,18 +108,6 @@ def sig_fig_format() -> TableFormat.Format:
)


def rank_format() -> TableFormat.Format:
"""
Build a formatter that displays integer ranks.

Returns
-------
TableFormat.Format
Dash table format configured for integer values.
"""
return TableFormat.Format().scheme(TableFormat.Scheme.decimal_integer)


def clean_thresholds(
raw_thresholds: Mapping[str, Mapping[str, object]] | None,
) -> Thresholds:
Expand Down Expand Up @@ -271,7 +258,7 @@ def format_metric_columns(
return None

thresholds = thresholds or {}
reserved = {"MLIP", "Score", "Rank", "id"}
reserved = {"MLIP", "Score", "id"}
updated_columns: list[dict[str, object]] = []

for column in columns:
Expand Down Expand Up @@ -352,7 +339,7 @@ def format_tooltip_headers(
return None

thresholds = thresholds or {}
reserved = {"MLIP", "Score", "Rank", "id"}
reserved = {"MLIP", "Score", "id"}

updated: dict[str, str] = {}
for key, text in tooltip_header.items():
Expand Down