-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Updating with LCBenchTabular original search space #6
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,9 @@ | |
from typing import Any, ClassVar, Mapping | ||
|
||
import pandas as pd | ||
from ConfigSpace import ( | ||
ConfigurationSpace, Constant, UniformFloatHyperparameter, UniformIntegerHyperparameter | ||
) | ||
|
||
from mfpbench.config import TabularConfig | ||
from mfpbench.result import Result | ||
|
@@ -15,7 +18,7 @@ | |
@dataclass(frozen=True, eq=False, unsafe_hash=True) # type: ignore[misc] | ||
class LCBenchTabularConfig(TabularConfig): | ||
batch_size: int | ||
loss: str | ||
# loss: str | ||
imputation_strategy: str | ||
learning_rate_scheduler: str | ||
network: str | ||
|
@@ -35,6 +38,7 @@ class LCBenchTabularConfig(TabularConfig): | |
|
||
@dataclass(frozen=True) # type: ignore[misc] | ||
class LCBenchTabularResult(Result[LCBenchTabularConfig, int]): | ||
loss: float | ||
time: float | ||
val_accuracy: float | ||
val_cross_entropy: float | ||
|
@@ -178,7 +182,7 @@ def __init__( | |
super().__init__( | ||
table=table, | ||
name=f"lcbench_tabular-{task_id}", | ||
config_name="config_id", | ||
config_name="id", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is used to extract out the |
||
fidelity_name=cls.fidelity_name, | ||
result_keys=LCBenchTabularResult.names(), | ||
config_keys=LCBenchTabularConfig.names(), | ||
|
@@ -187,3 +191,89 @@ def __init__( | |
prior=prior, | ||
perturb_prior=perturb_prior, | ||
) | ||
|
||
@property | ||
def fidelity_range(self) -> tuple[int, int, int]: | ||
return (1, 51, 1) | ||
Comment on lines
+195
to
+197
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gotten from the table now. I don't want to indicate that people should overwrite this as then there is a mismatch between the table contents and what the benchmark advertises. Fixed this for lcbench by dropping the 0'th epoch |
||
|
||
def get_raw_space(self, name: int | None = None, seed: int | None = None) -> ConfigurationSpace: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm, I'm not sure how to approach this actually. I would prefer this to be a property of the benchmark but of course it's going to be specific to each tabular benchmark, which some may not have: My suggestion that minimizes type difference is:
Changes:
This design serves two purposes:
|
||
"""Create the configuration space for the benchmark. | ||
|
||
Args: | ||
name: The name for the configuration space. | ||
seed: The seed to use for the configuration space. | ||
|
||
Returns: | ||
The configuration space for the benchmark. | ||
""" | ||
# obtained from https://github.com/automl/lcbench#dataset-overview | ||
cs = ConfigurationSpace(name=name, seed=seed) | ||
cs.add_hyperparameters( | ||
[ | ||
UniformIntegerHyperparameter( | ||
"batch_size", | ||
lower=16, | ||
upper=512, | ||
log=True, | ||
default_value=128, # approximately log-spaced middle of range | ||
), | ||
UniformFloatHyperparameter( | ||
"learning_rate", | ||
lower=1.0e-4, | ||
upper=1.0e-1, | ||
log=True, | ||
default_value=1.0e-3, # popular choice of LR | ||
), | ||
UniformFloatHyperparameter( | ||
"momentum", | ||
lower=0.1, | ||
upper=0.99, | ||
log=False, | ||
default_value=0.9, # popular choice, also not on the boundary | ||
), | ||
UniformFloatHyperparameter( | ||
"weight_decay", | ||
lower=1.0e-5, | ||
upper=1.0e-1, | ||
log=False, | ||
default_value=1.0e-2, # reasonable default | ||
), | ||
UniformIntegerHyperparameter( | ||
"num_layers", | ||
lower=1, | ||
upper=5, | ||
log=False, | ||
default_value=3, # middle of range | ||
), | ||
UniformIntegerHyperparameter( | ||
"max_units", | ||
lower=64, | ||
upper=1024, | ||
log=True, | ||
default_value=256, # approximately log-spaced middle of range | ||
), | ||
UniformFloatHyperparameter( | ||
"max_dropout", | ||
lower=0, | ||
upper=1, | ||
log=False, | ||
default_value=0.2, # reasonable default | ||
), | ||
*self._get_constant_hyperparameters(), | ||
], | ||
) | ||
return cs | ||
|
||
def _get_constant_hyperparameters(self) -> list: | ||
constants = [ | ||
Constant("cosine_annealing_T_max", 50), | ||
Constant("cosine_annealing_eta_min", 0.0), | ||
Constant("normalization_strategy", "standardize"), | ||
Constant("optimizer", "sgd"), | ||
Constant("learning_rate_scheduler", "cosine_annealing"), | ||
Constant("network", "shapedmlpnet"), | ||
Constant("activation", "relu"), | ||
Constant("mlp_shape", "funnel"), | ||
Constant("imputation_strategy", "mean"), | ||
] | ||
return constants |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -168,18 +168,27 @@ def is_constant(_s: pd.Series) -> bool: | |
start = sorted_fids[0] | ||
end = sorted_fids[-1] | ||
step = sorted_fids[1] - sorted_fids[0] | ||
self._start = sorted_fids[0] | ||
self._end = sorted_fids[-1] | ||
self._step = sorted_fids[1] - sorted_fids[0] | ||
Comment on lines
+171
to
+173
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are set in bench.start = 1
bench.end = 51
bench.step = 1
bench.fidelity_range = (1, 51, 1) |
||
|
||
# Create the configuration space with just the ids | ||
space = ConfigurationSpace(name, seed=seed) | ||
space.add_hyperparameter(CategoricalHyperparameter("id", list(configs))) | ||
|
||
# Create the raw configuration space | ||
try: | ||
self.raw_space = self.get_raw_space(name=name, seed=seed) | ||
except NotImplementedError: | ||
self.raw_space = None | ||
|
||
self.table = table | ||
self.configs = configs | ||
self.fidelity_name = fidelity_name | ||
self.config_name = config_name | ||
self.config_keys = sorted(config_keys) | ||
self.result_keys = sorted(result_keys) | ||
self.fidelity_range = (start, end, step) # type: ignore | ||
# self.fidelity_range = (start, end, step) # type: ignore | ||
|
||
super().__init__( | ||
name=name, | ||
|
@@ -189,6 +198,31 @@ def is_constant(_s: pd.Series) -> bool: | |
perturb_prior=perturb_prior, | ||
) | ||
|
||
@property | ||
def fidelity_range(self) -> tuple[int, int, int]: | ||
"""Get the range of fidelities for this benchmark. | ||
|
||
Returns: | ||
The range of fidelities for this benchmark. | ||
""" | ||
return (self._start, self._end, self._step) | ||
|
||
@property | ||
def raw_search_space(self) -> ConfigurationSpace: | ||
return self.raw_space | ||
|
||
def get_raw_space(self, name: int | None = None, seed: int | None = None) -> ConfigurationSpace: | ||
"""Create the configuration space for the benchmark. | ||
|
||
Args: | ||
name: The name for the configuration space. | ||
seed: The seed to use for the configuration space. | ||
|
||
Returns: | ||
The configuration space for the benchmark. | ||
""" | ||
raise NotImplementedError | ||
|
||
def query( | ||
self, | ||
config: CTabular | Mapping[str, Any] | str, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good spot