Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add test metric for LCBenchTabular #14

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/mfpbench/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def __init__( # noqa: PLR0913
prior: str | Path | C | Mapping[str, Any] | None = None,
perturb_prior: float | None = None,
value_metric: str | None = None,
value_metric_test: str | None = None,
cost_metric: str | None = None,
):
"""Initialize the benchmark.
Expand Down Expand Up @@ -102,6 +103,8 @@ def __init__( # noqa: PLR0913
"""
if value_metric is None:
value_metric = result_type.default_value_metric
if value_metric_test is None:
value_metric_test = result_type.default_value_metric_test

if cost_metric is None:
cost_metric = result_type.default_cost_metric
Expand All @@ -110,6 +113,7 @@ def __init__( # noqa: PLR0913
self.seed = seed
self.space = space
self.value_metric = value_metric
self.value_metric_test = value_metric_test
self.cost_metric = cost_metric
self.fidelity_range: tuple[F, F, F] = fidelity_range
self.fidelity_name = fidelity_name
Expand Down Expand Up @@ -250,6 +254,7 @@ def query(
*,
at: F | None = None,
value_metric: str | None = None,
value_metric_test: str | None = None,
cost_metric: str | None = None,
) -> R:
"""Submit a query and get a result.
Expand Down Expand Up @@ -282,13 +287,15 @@ def query(
__config = {k: __config.get(v, v) for k, v in _reverse_renames.items()}

value_metric = value_metric if value_metric is not None else self.value_metric
value_metric_test = value_metric_test if value_metric_test is not None else self.value_metric_test
cost_metric = cost_metric if cost_metric is not None else self.cost_metric

return self.Result.from_dict(
config=config,
fidelity=at,
result=self._objective_function(__config, at=at),
value_metric=str(value_metric),
value_metric_test=value_metric_test,
cost_metric=str(cost_metric),
renames=self._result_renames,
)
Expand All @@ -301,6 +308,7 @@ def trajectory(
to: F | None = None,
step: F | None = None,
value_metric: str | None = None,
value_metric_test: str | None = None,
cost_metric: str | None = None,
) -> list[R]:
"""Get the full trajectory of a configuration.
Expand Down Expand Up @@ -330,6 +338,7 @@ def trajectory(
__config = {k: __config.get(v, v) for k, v in _reverse_renames.items()}

value_metric = value_metric if value_metric is not None else self.value_metric
value_metric_test = value_metric_test if value_metric_test is not None else self.value_metric_test
cost_metric = cost_metric if cost_metric is not None else self.cost_metric

return [
Expand All @@ -338,6 +347,7 @@ def trajectory(
fidelity=fidelity,
result=result,
value_metric=str(value_metric),
value_metric_test=value_metric_test,
cost_metric=str(cost_metric),
renames=self._result_renames,
)
Expand Down
7 changes: 5 additions & 2 deletions src/mfpbench/lcbench_tabular/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,15 @@ class LCBenchTabularConfig(TabularConfig):
class LCBenchTabularResult(Result[LCBenchTabularConfig, int]):
metric_defs: ClassVar[Mapping[str, Metric]] = {
"val_accuracy": Metric(minimize=False, bounds=(0, 100)),
"val_balanced_accuracy": Metric(minimize=False, bounds=(0, 100)),
"val_balanced_accuracy": Metric(minimize=False, bounds=(0, 1)),
"val_cross_entropy": Metric(minimize=True, bounds=(0, np.inf)),
"test_accuracy": Metric(minimize=False, bounds=(0, 100)),
"test_balanced_accuracy": Metric(minimize=False, bounds=(0, 100)),
"test_balanced_accuracy": Metric(minimize=False, bounds=(0, 1)),
"test_cross_entropy": Metric(minimize=True, bounds=(0, np.inf)),
"time": Metric(minimize=True, bounds=(0, np.inf)),
}
default_value_metric: ClassVar[str] = "val_balanced_accuracy"
default_value_metric_test: ClassVar[str] = "test_balanced_accuracy"
default_cost_metric: ClassVar[str] = "time"

time: Metric.Value
Expand Down Expand Up @@ -214,6 +215,7 @@ def __init__(
prior: str | Path | LCBenchTabularConfig | Mapping[str, Any] | None = None,
perturb_prior: float | None = None,
value_metric: str | None = None,
value_metric_test: str | None = None,
cost_metric: str | None = None,
) -> None:
"""Initialize the benchmark.
Expand Down Expand Up @@ -282,6 +284,7 @@ def __init__(
result_type=LCBenchTabularResult,
config_type=LCBenchTabularConfig,
value_metric=value_metric,
value_metric_test=value_metric_test,
cost_metric=cost_metric,
space=space,
seed=seed,
Expand Down
27 changes: 27 additions & 0 deletions src/mfpbench/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ class Result(ABC, Generic[C, F]):
default_value_metric: ClassVar[str]
"""The default metric to use for this result."""

default_value_metric_test: ClassVar[str]
"""The default test metric to use for this result."""

default_cost_metric: ClassVar[str]
"""The default cost to use for this result."""

Expand All @@ -39,6 +42,9 @@ class Result(ABC, Generic[C, F]):
value_metric: str
"""The metric to use for this result."""

value_metric_test: str
"""The metric to use for this result."""

cost_metric: str
"""The cost to use for this result."""

Expand All @@ -50,6 +56,7 @@ def from_dict(
result: Mapping[str, float],
*,
value_metric: str | None = None,
value_metric_test: str | None = None,
cost_metric: str | None = None,
renames: Mapping[str, str] | None = None,
) -> Self:
Expand All @@ -64,15 +71,19 @@ def from_dict(
}
if renames is not None:
values = {renames.get(k, k): v for k, v in values.items()}

if value_metric is None:
value_metric = cls.default_value_metric
value_metric_test = cls.default_value_metric_test

if cost_metric is None:
cost_metric = cls.default_cost_metric

return cls(
config=config,
fidelity=fidelity,
value_metric=value_metric,
value_metric_test=value_metric_test,
cost_metric=cost_metric,
**values, # type: ignore
)
Expand All @@ -96,11 +107,27 @@ def error(self) -> float:
"""The error of interest."""
return self[self.value_metric].error

@property
def test_error(self) -> float:
"""The error of interest."""
return self[self.value_metric_test].error

@property
def score(self) -> float:
"""The score of interest."""
return self[self.value_metric].score

@property
def val_score(self) -> float:
"""The score of interest."""
# to maintain backward compatibility
return self.score

@property
def test_score(self) -> float:
"""The score of interest."""
return self[self.value_metric_test].score

@property
def values(self) -> dict[str, Any]:
"""Create a dict from this result with the raw values."""
Expand Down
8 changes: 8 additions & 0 deletions src/mfpbench/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def __init__( # noqa: PLR0913
result_type: type[R],
config_type: type[CTabular],
value_metric: str | None = None,
value_metric_test: str | None = None,
cost_metric: str | None = None,
space: ConfigurationSpace | None = None,
seed: int | None = None,
Expand Down Expand Up @@ -171,6 +172,7 @@ def __init__( # noqa: PLR0913
prior=prior,
perturb_prior=perturb_prior,
value_metric=value_metric,
value_metric_test=value_metric_test,
cost_metric=cost_metric,
)

Expand All @@ -196,6 +198,7 @@ def query(
*,
at: F | None = None,
value_metric: str | None = None,
value_metric_test: str | None = None,
cost_metric: str | None = None,
) -> R:
"""Submit a query and get a result.
Expand Down Expand Up @@ -241,13 +244,15 @@ def query(
__config = {k: __config.get(v, v) for k, v in _reverse_renames.items()}

value_metric = value_metric if value_metric is not None else self.value_metric
value_metric_test = value_metric_test if value_metric_test is not None else self.value_metric_test
cost_metric = cost_metric if cost_metric is not None else self.cost_metric

return self.Result.from_dict(
config=config,
fidelity=at,
result=self._objective_function(__config, at=at),
value_metric=str(value_metric),
value_metric_test=value_metric_test,
cost_metric=str(cost_metric),
renames=self._result_renames,
)
Expand All @@ -261,6 +266,7 @@ def trajectory(
to: F | None = None,
step: F | None = None,
value_metric: str | None = None,
value_metric_test: str | None = None,
cost_metric: str | None = None,
) -> list[R]:
"""Submit a query and get a result.
Expand Down Expand Up @@ -309,6 +315,7 @@ def trajectory(
__config = {k: __config.get(v, v) for k, v in _reverse_renames.items()}

value_metric = value_metric if value_metric is not None else self.value_metric
value_metric_test = value_metric_test if value_metric_test is not None else self.value_metric_test
cost_metric = cost_metric if cost_metric is not None else self.cost_metric

return [
Expand All @@ -317,6 +324,7 @@ def trajectory(
fidelity=fidelity,
result=result,
value_metric=str(value_metric),
value_metric_test=value_metric_test,
cost_metric=str(cost_metric),
renames=self._result_renames,
)
Expand Down
Loading