Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating with LCBenchTabular original search space #6

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/mfpbench/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,6 @@ def trajectory(
to = to if to is not None else self.end
frm = frm if frm is not None else self.start
step = step if step is not None else self.step

if not isinstance(config, self.Config):
_config = self.Config.from_dict(config)
else:
Expand Down
94 changes: 92 additions & 2 deletions src/mfpbench/lcbench_tabular/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from typing import Any, ClassVar, Mapping

import pandas as pd
from ConfigSpace import (
ConfigurationSpace, Constant, UniformFloatHyperparameter, UniformIntegerHyperparameter
)

from mfpbench.config import TabularConfig
from mfpbench.result import Result
Expand All @@ -15,7 +18,7 @@
@dataclass(frozen=True, eq=False, unsafe_hash=True) # type: ignore[misc]
class LCBenchTabularConfig(TabularConfig):
batch_size: int
loss: str
# loss: str
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good spot

imputation_strategy: str
learning_rate_scheduler: str
network: str
Expand All @@ -35,6 +38,7 @@ class LCBenchTabularConfig(TabularConfig):

@dataclass(frozen=True) # type: ignore[misc]
class LCBenchTabularResult(Result[LCBenchTabularConfig, int]):
loss: float
time: float
val_accuracy: float
val_cross_entropy: float
Expand Down Expand Up @@ -178,7 +182,7 @@ def __init__(
super().__init__(
table=table,
name=f"lcbench_tabular-{task_id}",
config_name="config_id",
config_name="id",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is used to extract out the config_id's from the table, this should be "config_id" as that's what's in the table. This should not effect query() and the fix was something else.

fidelity_name=cls.fidelity_name,
result_keys=LCBenchTabularResult.names(),
config_keys=LCBenchTabularConfig.names(),
Expand All @@ -187,3 +191,89 @@ def __init__(
prior=prior,
perturb_prior=perturb_prior,
)

@property
def fidelity_range(self) -> tuple[int, int, int]:
return (1, 51, 1)
Comment on lines +195 to +197
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotten from the table now. I don't want to indicate that people should overwrite this as then there is a mismatch between the table contents and what the benchmark advertises. Fixed this for lcbench by dropping the 0'th epoch


def get_raw_space(self, name: int | None = None, seed: int | None = None) -> ConfigurationSpace:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm, I'm not sure how to approach this actually. I would prefer this to be a property of the benchmark but of course it's going to be specific to each tabular benchmark, which some may not have:

My suggestion that minimizes type difference is:

  • Classic Benchmark will always have a .space: ConfigurationSpace, as it is currently.
  • TabularBenchmark will also always have a .space. They will be required to either pass in an empty ConfigurationSpace if there is none available, otherwise they pass in whatever space they can associate with it (i.e. as you've done here).

Changes:

  • This will remove the current version of .space for TabularBenchmark which currently is just categorical of ids. These can still be access through self.configs: dict[config_id, Config] or through .config_keys

This design serves two purposes:

  • Someone using the GenericTabularBenchmark interface can pass in the raw space they want to incorporate and it will act like any prebuilt benchmark.
  • Keep behaviour more uniform, i.e. there's less chance people do weird funky shit in their own overridden get_raw_space(). They either pass in a space or not and we can handle logic for that in one place.

"""Create the configuration space for the benchmark.

Args:
name: The name for the configuration space.
seed: The seed to use for the configuration space.

Returns:
The configuration space for the benchmark.
"""
# obtained from https://github.com/automl/lcbench#dataset-overview
cs = ConfigurationSpace(name=name, seed=seed)
cs.add_hyperparameters(
[
UniformIntegerHyperparameter(
"batch_size",
lower=16,
upper=512,
log=True,
default_value=128, # approximately log-spaced middle of range
),
UniformFloatHyperparameter(
"learning_rate",
lower=1.0e-4,
upper=1.0e-1,
log=True,
default_value=1.0e-3, # popular choice of LR
),
UniformFloatHyperparameter(
"momentum",
lower=0.1,
upper=0.99,
log=False,
default_value=0.9, # popular choice, also not on the boundary
),
UniformFloatHyperparameter(
"weight_decay",
lower=1.0e-5,
upper=1.0e-1,
log=False,
default_value=1.0e-2, # reasonable default
),
UniformIntegerHyperparameter(
"num_layers",
lower=1,
upper=5,
log=False,
default_value=3, # middle of range
),
UniformIntegerHyperparameter(
"max_units",
lower=64,
upper=1024,
log=True,
default_value=256, # approximately log-spaced middle of range
),
UniformFloatHyperparameter(
"max_dropout",
lower=0,
upper=1,
log=False,
default_value=0.2, # reasonable default
),
*self._get_constant_hyperparameters(),
],
)
return cs

def _get_constant_hyperparameters(self) -> list:
constants = [
Constant("cosine_annealing_T_max", 50),
Constant("cosine_annealing_eta_min", 0.0),
Constant("normalization_strategy", "standardize"),
Constant("optimizer", "sgd"),
Constant("learning_rate_scheduler", "cosine_annealing"),
Constant("network", "shapedmlpnet"),
Constant("activation", "relu"),
Constant("mlp_shape", "funnel"),
Constant("imputation_strategy", "mean"),
]
return constants
36 changes: 35 additions & 1 deletion src/mfpbench/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,18 +168,27 @@ def is_constant(_s: pd.Series) -> bool:
start = sorted_fids[0]
end = sorted_fids[-1]
step = sorted_fids[1] - sorted_fids[0]
self._start = sorted_fids[0]
self._end = sorted_fids[-1]
self._step = sorted_fids[1] - sorted_fids[0]
Comment on lines +171 to +173
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are set in benchmark and obtained from the self.fidelity_range. I've made sure to drop the 0 epoch from lcbench tabular data such that:

bench.start = 1
bench.end = 51
bench.step = 1
bench.fidelity_range = (1, 51, 1)


# Create the configuration space with just the ids
space = ConfigurationSpace(name, seed=seed)
space.add_hyperparameter(CategoricalHyperparameter("id", list(configs)))

# Create the raw configuration space
try:
self.raw_space = self.get_raw_space(name=name, seed=seed)
except NotImplementedError:
self.raw_space = None

self.table = table
self.configs = configs
self.fidelity_name = fidelity_name
self.config_name = config_name
self.config_keys = sorted(config_keys)
self.result_keys = sorted(result_keys)
self.fidelity_range = (start, end, step) # type: ignore
# self.fidelity_range = (start, end, step) # type: ignore

super().__init__(
name=name,
Expand All @@ -189,6 +198,31 @@ def is_constant(_s: pd.Series) -> bool:
perturb_prior=perturb_prior,
)

@property
def fidelity_range(self) -> tuple[int, int, int]:
"""Get the range of fidelities for this benchmark.

Returns:
The range of fidelities for this benchmark.
"""
return (self._start, self._end, self._step)

@property
def raw_search_space(self) -> ConfigurationSpace:
return self.raw_space

def get_raw_space(self, name: int | None = None, seed: int | None = None) -> ConfigurationSpace:
"""Create the configuration space for the benchmark.

Args:
name: The name for the configuration space.
seed: The seed to use for the configuration space.

Returns:
The configuration space for the benchmark.
"""
raise NotImplementedError

def query(
self,
config: CTabular | Mapping[str, Any] | str,
Expand Down
Loading