automl · Neeratyoy · Oct 20, 2023 · eddiebergman · Oct 20, 2023 · eddiebergman
diff --git a/src/mfpbench/benchmark.py b/src/mfpbench/benchmark.py
@@ -263,7 +263,6 @@ def trajectory(
         to = to if to is not None else self.end
         frm = frm if frm is not None else self.start
         step = step if step is not None else self.step
-
         if not isinstance(config, self.Config):
             _config = self.Config.from_dict(config)
         else:

diff --git a/src/mfpbench/lcbench_tabular/benchmark.py b/src/mfpbench/lcbench_tabular/benchmark.py
@@ -5,6 +5,9 @@
 from typing import Any, ClassVar, Mapping
 
 import pandas as pd
+from ConfigSpace import (
+    ConfigurationSpace, Constant, UniformFloatHyperparameter, UniformIntegerHyperparameter
+)
 
 from mfpbench.config import TabularConfig
 from mfpbench.result import Result
@@ -15,7 +18,7 @@
 @dataclass(frozen=True, eq=False, unsafe_hash=True)  # type: ignore[misc]
 class LCBenchTabularConfig(TabularConfig):
     batch_size: int
-    loss: str
+    # loss: str
     imputation_strategy: str
     learning_rate_scheduler: str
     network: str
@@ -35,6 +38,7 @@ class LCBenchTabularConfig(TabularConfig):
 
 @dataclass(frozen=True)  # type: ignore[misc]
 class LCBenchTabularResult(Result[LCBenchTabularConfig, int]):
+    loss: float
     time: float
     val_accuracy: float
     val_cross_entropy: float
@@ -178,7 +182,7 @@ def __init__(
         super().__init__(
             table=table,
             name=f"lcbench_tabular-{task_id}",
-            config_name="config_id",
+            config_name="id",
             fidelity_name=cls.fidelity_name,
             result_keys=LCBenchTabularResult.names(),
             config_keys=LCBenchTabularConfig.names(),
@@ -187,3 +191,89 @@ def __init__(
             prior=prior,
             perturb_prior=perturb_prior,
         )
+
+    @property
+    def fidelity_range(self) -> tuple[int, int, int]:
+        return (1, 51, 1)
+
+    def get_raw_space(self, name: int | None = None, seed: int | None = None) -> ConfigurationSpace:
+        """Create the configuration space for the benchmark.
+
+        Args:
+            name: The name for the configuration space.
+            seed: The seed to use for the configuration space.
+
+        Returns:
+            The configuration space for the benchmark.
+        """
+        # obtained from https://github.com/automl/lcbench#dataset-overview
+        cs = ConfigurationSpace(name=name, seed=seed)
+        cs.add_hyperparameters(
+            [
+                UniformIntegerHyperparameter(
+                    "batch_size",
+                    lower=16,
+                    upper=512,
+                    log=True,
+                    default_value=128,  # approximately log-spaced middle of range
+                ),
+                UniformFloatHyperparameter(
+                    "learning_rate",
+                    lower=1.0e-4,
+                    upper=1.0e-1,
+                    log=True,
+                    default_value=1.0e-3,  # popular choice of LR
+                ),
+                UniformFloatHyperparameter(
+                    "momentum",
+                    lower=0.1,
+                    upper=0.99,
+                    log=False,
+                    default_value=0.9,  # popular choice, also not on the boundary
+                ),
+                UniformFloatHyperparameter(
+                    "weight_decay",
+                    lower=1.0e-5,
+                    upper=1.0e-1,
+                    log=False,
+                    default_value=1.0e-2,  # reasonable default
+                ),
+                UniformIntegerHyperparameter(
+                    "num_layers",
+                    lower=1,
+                    upper=5,
+                    log=False,
+                    default_value=3,  # middle of range
+                ),
+                UniformIntegerHyperparameter(
+                    "max_units",
+                    lower=64,
+                    upper=1024,
+                    log=True,
+                    default_value=256,  # approximately log-spaced middle of range
+                ),
+                UniformFloatHyperparameter(
+                    "max_dropout",
+                    lower=0,
+                    upper=1,
+                    log=False,
+                    default_value=0.2,  # reasonable default
+                ),
+                *self._get_constant_hyperparameters(),
+            ],
+        )
+        return cs
+
+    def _get_constant_hyperparameters(self) -> list:
+        constants = [
+            Constant("cosine_annealing_T_max", 50),
+            Constant("cosine_annealing_eta_min", 0.0),
+            Constant("normalization_strategy", "standardize"),
+            Constant("optimizer", "sgd"),
+            Constant("learning_rate_scheduler", "cosine_annealing"),
+            Constant("network", "shapedmlpnet"),
+            Constant("activation", "relu"),
+            Constant("mlp_shape", "funnel"),
+            Constant("imputation_strategy", "mean"),
+        ]
+        return constants
diff --git a/src/mfpbench/tabular.py b/src/mfpbench/tabular.py
@@ -168,18 +168,27 @@ def is_constant(_s: pd.Series) -> bool:
         start = sorted_fids[0]
         end = sorted_fids[-1]
         step = sorted_fids[1] - sorted_fids[0]
+        self._start = sorted_fids[0]
+        self._end = sorted_fids[-1]
+        self._step = sorted_fids[1] - sorted_fids[0]
 
         # Create the configuration space with just the ids
         space = ConfigurationSpace(name, seed=seed)
         space.add_hyperparameter(CategoricalHyperparameter("id", list(configs)))
 
+        # Create the raw configuration space
+        try:
+            self.raw_space = self.get_raw_space(name=name, seed=seed)
+        except NotImplementedError:
+            self.raw_space = None
+
         self.table = table
         self.configs = configs
         self.fidelity_name = fidelity_name
         self.config_name = config_name
         self.config_keys = sorted(config_keys)
         self.result_keys = sorted(result_keys)
-        self.fidelity_range = (start, end, step)  # type: ignore
+        # self.fidelity_range = (start, end, step)  # type: ignore
 
         super().__init__(
             name=name,
@@ -189,6 +198,31 @@ def is_constant(_s: pd.Series) -> bool:
             perturb_prior=perturb_prior,
         )
 
+    @property
+    def fidelity_range(self) -> tuple[int, int, int]:
+        """Get the range of fidelities for this benchmark.
+
+        Returns:
+            The range of fidelities for this benchmark.
+        """
+        return (self._start, self._end, self._step)
+
+    @property
+    def raw_search_space(self) -> ConfigurationSpace:
+        return self.raw_space
+
+    def get_raw_space(self, name: int | None = None, seed: int | None = None) -> ConfigurationSpace:
+        """Create the configuration space for the benchmark.
+
+        Args:
+            name: The name for the configuration space.
+            seed: The seed to use for the configuration space.
+
+        Returns:
+            The configuration space for the benchmark.
+        """
+        raise NotImplementedError
+
     def query(
         self,
         config: CTabular | Mapping[str, Any] | str,