-
Notifications
You must be signed in to change notification settings - Fork 27
Add refactored LGBM model to experimental emulators #399
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 47 commits
2379ee2
1ace801
ed9a3eb
211f836
619b554
6e63257
20549f9
81ccb76
40eff87
5e5da2e
68053ed
77feda8
97740f2
d77e5a0
43f4871
ae75b39
3cd7da6
ce5a0e2
45c7080
be4f285
4de6c9e
3dfe87a
d7ac488
b47f62c
5f58648
0f8fb51
844ca9b
6d771e8
98be151
7cd5d46
98038d9
8282273
519d841
ae435d0
ed056f1
2a91f08
ca4bff4
474e7e0
17f926e
a85ddee
1778e25
8a17f2b
61f65e9
68e9008
1d9cf5a
ec2f45d
159c106
245a33c
2c0d715
77d2703
121ccc9
30157e3
b257955
4c9f2a7
caa2fef
134090e
83a239f
8e84a95
dd28231
7f3e73b
5b09b41
a90abbf
abe18fd
93f6e09
a827cad
e33385b
ef8b272
cf2042b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,125 @@ | ||
| import numpy as np | ||
| from lightgbm import LGBMRegressor | ||
| from sklearn.base import BaseEstimator, RegressorMixin | ||
| from sklearn.utils.validation import check_array, check_is_fitted, check_X_y | ||
| from torch import Tensor | ||
|
|
||
| from autoemulate.experimental.emulators.base import ( | ||
| Emulator, | ||
| InputTypeMixin, | ||
| ) | ||
| from autoemulate.experimental.types import InputLike, OutputLike | ||
|
|
||
|
|
||
| class LightGBM(Emulator, InputTypeMixin, BaseEstimator, RegressorMixin): | ||
edwardchalstrey1 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
edwardchalstrey1 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| """LightGBM Emulator. | ||
| Wraps LightGBM regression from LightGBM. | ||
| """ | ||
|
|
||
| def __init__( # noqa: PLR0913 allow too many arguments since all currently required | ||
| self, | ||
| boosting_type="gbdt", | ||
| num_leaves=31, | ||
| max_depth=-1, | ||
| learning_rate=0.1, | ||
| n_estimators=100, | ||
| subsample_for_bin=200000, | ||
| objective=None, | ||
| class_weight=None, | ||
| min_split_gain=0.0, | ||
| min_child_weight=0.001, | ||
| min_child_samples=20, | ||
| subsample=1.0, | ||
| colsample_bytree=1.0, | ||
| reg_alpha=0.0, | ||
| reg_lambda=0.0, | ||
| random_state=None, | ||
| n_jobs=1, | ||
| importance_type="split", | ||
| verbose=-1, | ||
| ): | ||
| """Initializes a LightGBM object.""" | ||
| self.boosting_type = boosting_type | ||
| self.num_leaves = num_leaves | ||
| self.max_depth = max_depth | ||
| self.learning_rate = learning_rate | ||
| self.n_estimators = n_estimators | ||
| self.subsample_for_bin = subsample_for_bin | ||
| self.objective = objective | ||
| self.class_weight = class_weight | ||
| self.min_split_gain = min_split_gain | ||
| self.min_child_weight = min_child_weight | ||
| self.min_child_samples = min_child_samples | ||
| self.subsample = subsample | ||
| self.colsample_bytree = colsample_bytree | ||
| self.reg_alpha = reg_alpha | ||
| self.reg_lambda = reg_lambda | ||
| self.random_state = random_state | ||
| self.n_jobs = n_jobs | ||
| self.importance_type = importance_type | ||
| self.verbose = verbose | ||
|
|
||
| def fit(self, x: InputLike, y: InputLike | None, sample_weight=None, **kwargs): | ||
radka-j marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| """Fits the emulator to the data.""" | ||
|
|
||
| x, y = self._convert_to_numpy(x, y) | ||
|
|
||
| self.n_features_in_ = x.shape[1] | ||
|
|
||
| x, y = check_X_y( | ||
|
||
| x, y, multi_output=self._more_tags()["multioutput"], y_numeric=True | ||
| ) | ||
|
|
||
| self.model_ = LGBMRegressor( | ||
| boosting_type=self.boosting_type, | ||
| num_leaves=self.num_leaves, | ||
| max_depth=self.max_depth, | ||
| learning_rate=self.learning_rate, | ||
| n_estimators=self.n_estimators, | ||
| subsample_for_bin=self.subsample_for_bin, | ||
| objective=self.objective, | ||
| class_weight=self.class_weight, | ||
| min_split_gain=self.min_split_gain, | ||
| min_child_weight=self.min_child_weight, | ||
| min_child_samples=self.min_child_samples, | ||
| subsample=self.subsample, | ||
| colsample_bytree=self.colsample_bytree, | ||
| reg_alpha=self.reg_alpha, | ||
| reg_lambda=self.reg_lambda, | ||
| random_state=self.random_state, | ||
| n_jobs=self.n_jobs, | ||
| importance_type=self.importance_type, | ||
| verbose=self.verbose, | ||
| ) | ||
|
|
||
| self.model_.fit(x, y, sample_weight=sample_weight) | ||
| self.is_fitted_ = True | ||
|
|
||
| def predict(self, x: InputLike) -> OutputLike: | ||
| """Predicts the output of the emulator for a given input.""" | ||
| x = check_array(x) | ||
edwardchalstrey1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| check_is_fitted(self, "is_fitted_") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If using this check function is dependent on this object inheriting the the sklearn base objects I'd be in favour of not doing the inheritance and just getting rid of this (and replacing it with our own check if we think that's necessary) |
||
| y_pred = self.model_.predict(x) | ||
| # Ensure the output is a 2D tensor array with shape (n_samples, 1) | ||
| return Tensor(y_pred.reshape(-1, 1)) # type: ignore PGH003 | ||
|
|
||
| @staticmethod | ||
| def get_tune_config(): | ||
| # Note: 10 ** np.random.uniform(-3, 0) | ||
| # is equivalent to scipy.stats.loguniform(0.001, 0.1) | ||
| return { | ||
| "num_leaves": [np.random.randint(10, 100)], | ||
| "max_depth": [np.random.randint(-1, 12)], | ||
| "learning_rate": [10 ** np.random.uniform(-3, -1)], | ||
| "n_estimators": [np.random.randint(50, 1000)], | ||
| "reg_alpha": [10 ** np.random.uniform(-3, 0)], | ||
| "reg_lambda": [10 ** np.random.uniform(-3, 0)], | ||
| } | ||
|
|
||
| @property | ||
| def model_name(self): | ||
| return self.__class__.__name__ | ||
edwardchalstrey1 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| def _more_tags(self): | ||
|
||
| return {"multioutput": False} | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| from autoemulate.experimental.emulators.lightgbm import ( | ||
| LightGBM, | ||
| ) | ||
| from autoemulate.experimental.tuner import Tuner | ||
| from autoemulate.experimental.types import TensorLike | ||
|
|
||
|
|
||
| def test_predict_lightgbm(sample_data_y1d, new_data_y1d): | ||
| x, y = sample_data_y1d | ||
| lgbm = LightGBM() | ||
| lgbm.fit(x, y) | ||
| x2, _ = new_data_y1d | ||
| y_pred = lgbm.predict(x2) | ||
| assert isinstance(y_pred, TensorLike) | ||
|
|
||
|
|
||
| def test_tune_lightgbm(sample_data_y1d): | ||
| x, y = sample_data_y1d | ||
| tuner = Tuner(x, y, n_iter=5) | ||
| scores, configs = tuner.run(LightGBM) | ||
| assert len(scores) == 5 | ||
| assert len(configs) == 5 |
Uh oh!
There was an error while loading. Please reload this page.