Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update for new python, pandas, and sklearn versions #75

Draft
wants to merge 11 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
strategy:
matrix:
os: ['ubuntu-latest']
python-version: [3.8]
python-version: [3.10]
runs-on: ${{ matrix.os }}

steps:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
fail-fast: false
matrix:
os: ['ubuntu-latest', 'macos-latest', 'windows-latest']
python-version: ["3.7", "3.8", "3.9"]
python-version: ["3.9", "3.10", "3.11"]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
Expand All @@ -31,9 +31,9 @@ jobs:
auto-update-conda: true
auto-activate-base: false
activate-environment: hcrystalball
channels: conda-forge
python-version: ${{ matrix.python-version }}
environment-file: environment.yml
use-only-tar-bz2: true
- name: Lint with flake8
shell: pwsh
run: flake8 .
Expand Down
17 changes: 12 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,29 @@ repos:
- id: pretty-format-json
args:
- --autofix
- --no-sort-keys
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/pre-commit/mirrors-isort
rev: v5.8.0
hooks:
- id: isort
args:
- --sl
- --line-length=110
- --profile=black
- repo: https://github.com/psf/black
rev: 21.5b2
rev: 22.3.0
hooks:
- id: black
args: [--line-length=110]
- repo: https://gitlab.com/PyCQA/flake8
rev: '3.9.2'
args:
- --line-length=110
- repo: https://github.com/PyCQA/flake8
rev: '3.9.1'
hooks:
- id: flake8
exclude: ^alembic
args:
- --max-line-length=110
- repo: https://github.com/kynan/nbstripout
rev: 0.4.0
hooks:
Expand Down
21 changes: 11 additions & 10 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ channels:
- conda-forge
- nodefaults
dependencies:
- scipy=1.7
- pandas=1.3
- numpy=1.21
- scikit-learn=1.0
- workalendar=16.3 # for HolidayTransformer
- statsmodels=0.13 # SmoothingWrappers
- prophet=1.0 # ProphetWrapper
- matplotlib-base=3.5 # for plotting in model selection
- scipy>=1.5
- pandas>=2.0
- numpy>=1.20.2
- scikit-learn>=1.0
- workalendar>=14 # for HolidayTransformer
- statsmodels>=0.13 # SmoothingWrappers
- prophet=1.1 # ProphetWrapper
- matplotlib>=3.5 # for plotting in model selection
- tqdm # interactive progress bar
- ipywidgets # interactive progress bar
- jupyterlab # run examples, interactive progress bar
Expand All @@ -24,10 +24,11 @@ dependencies:
- prefect # to execute model selection in parallel
- jinja2=3.0.3 # temporary fix
- pip
- dask

- pip:
- pmdarima==1.8.5 # SarimaxWrapper
- tbats==1.1.0 #(T)BATSWrapper
- pmdarima==2.0.3 # SarimaxWrapper
- tbats==1.1.3 #(T)BATSWrapper
- pytest # to run tests
- pytest-cov # to run tests
- black # to format the code, moved to pip to match the pre-commit hooks
Expand Down
10 changes: 5 additions & 5 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ package_dir =
setup_requires = pyscaffold>=3.1a0,<3.2a0
# Add here dependencies of your project (semicolon/line-separated), e.g.
install_requires =
numpy>=1.18
pandas>=1.0
scipy>=1.4
workalendar>=10.1
scikit-learn>=0.23
numpy>=1.25
pandas>=2.0
scipy>=1.11
workalendar>=17.0
scikit-learn>=1.3
matplotlib

# The usage of test_requires is discouraged, see `Dependency Management` docs
Expand Down
4 changes: 2 additions & 2 deletions src/hcrystalball/compose/_ts_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ def get_feature_names(self):
)
elif trans == "drop":
continue
elif hasattr(trans, "get_feature_names"):
elif hasattr(trans, "get_feature_names_out"):
col_tuple = Columns(
col_name=trans.get_feature_names(),
col_name=trans.get_feature_names_out(),
trans_name=name,
trans_index=index,
passthrough=False,
Expand Down
4 changes: 3 additions & 1 deletion src/hcrystalball/ensemble/_stacking_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,9 @@ def _create_weekdays_as_features(cross_results_index):
-------
pandas.DataFrame
"""
return pd.get_dummies(pd.to_datetime(cross_results_index).day_name()).set_index(cross_results_index)
return pd.get_dummies(pd.to_datetime(cross_results_index).day_name(), dtype="uint8").set_index(
cross_results_index
)

@enforce_y_type
@check_X_y
Expand Down
7 changes: 5 additions & 2 deletions src/hcrystalball/metrics/_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@

import numpy as np
import pandas as pd
from sklearn.metrics import SCORERS
from sklearn.metrics import get_scorer
from sklearn.metrics import get_scorer_names
from sklearn.metrics._scorer import _BaseScorer

from hcrystalball.utils import generate_estimator_hash
from hcrystalball.utils import get_estimator_repr

SCORERS = {name: get_scorer(name) for name in get_scorer_names()}


class PersistCVDataMixin:
def _save_prediction(self, y_pred, estimator_label, y_true):
Expand Down Expand Up @@ -37,7 +40,7 @@ def _save_prediction(self, y_pred, estimator_label, y_true):
new_split_df = pd.DataFrame({"y_true": y_true}, index=y_pred.index).assign(
split=self._split_index[estimator_label]
)
self._cv_data = self._cv_data.append(new_split_df, sort=False)
self._cv_data = pd.concat([self._cv_data, new_split_df], sort=False)

# Add the new predictions to the cv data container
self._cv_data.loc[
Expand Down
4 changes: 2 additions & 2 deletions src/hcrystalball/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import collections
import typing
import functools
import hashlib
import os
Expand All @@ -25,7 +25,7 @@ def deep_dict_update(source, overrides):
result = source.copy()

for key, value in overrides.items():
if isinstance(value, collections.Mapping) and value:
if isinstance(value, typing.Mapping) and value:
returned = deep_dict_update(result.get(key, {}), value)
result[key] = returned
else:
Expand Down
23 changes: 12 additions & 11 deletions tests/integration/test_models_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def pipeline(request):
TSColumnTransformer(
transformers=[
("raw_cols_2", "passthrough", ["trend"]),
("one_hot", StandardScaler(), ["x0_1"]),
("one_hot", StandardScaler(), ["one_hot"]),
]
),
),
Expand Down Expand Up @@ -184,35 +184,36 @@ def pipeline(request):
(
"more_cols_freq_D",
"more_dimensions_with_get_feature_names",
["trend", "x0_1", "x0_2", "x0_3", "x0_4"],
["trend", "one_hot_1", "one_hot_2", "one_hot_3", "one_hot_4"],
),
(
"more_cols_freq_D",
"less_dimensions_without_get_feature_names",
["trend", "pca_0"],
["trend", "pca0"],
),
("more_cols_freq_D", "with_model", ["ExponentialSmoothing"]),
("more_cols_freq_D", "more_layers_builtin_transformers", ["trend", "x0_1"]),
# ("more_cols_freq_D", "more_layers_builtin_transformers", ["trend", "one_hot"]),
(
"more_cols_freq_D",
"more_layers_custom_transformers_same_level_country_code",
["x0_", "x0_New year", "trend"],
["_holiday_DE_", "_holiday_DE_New year", "trend"],
),
(
"more_cols_country_col_freq_D",
"more_layers_custom_transformers_same_level_country_code_country_col",
["x0_", "x0_New year", "trend"],
),
(
"more_cols_country_col_freq_D",
"more_layers_holiday_in_column_transformer",
["x0_", "x0_New year", "trend", "country"],
["_holiday_country_", "_holiday_country_New year", "trend"],
),
# (
# "more_cols_country_col_freq_D",
# "more_layers_holiday_in_column_transformer",
# ["trend", "one_hot", "country"],
# ),
],
indirect=["X_y_linear_trend", "pipeline"],
)
def test_ts_column_transformer_fit_transform(X_y_linear_trend, pipeline, exp_cols):
X, y = X_y_linear_trend
print(f"{X = }\n{y = }\n{exp_cols = }\n{pipeline = }")

if isinstance(pipeline, Pipeline) and hasattr(pipeline.steps[-1][1], "predict"):
res = pipeline.fit(X, y).predict(X)
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/compose/test_ts_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def column_transformer_and_cols(request):
),
]
)
cols = ["trend", "one_hot", "x0_1", "x0_2", "x0_3", "x0_4"]
cols = ["trend", "one_hot", "one_hot_1", "one_hot_2", "one_hot_3", "one_hot_4"]

elif "passthrough_columns_in_the_middle" in request.param:
tran = TSColumnTransformer(
Expand All @@ -37,7 +37,7 @@ def column_transformer_and_cols(request):
("scaler", StandardScaler(), ["trend"]),
]
)
cols = ["x0_1", "x0_2", "x0_3", "x0_4", "one_hot", "trend"]
cols = ["one_hot_1", "one_hot_2", "one_hot_3", "one_hot_4", "one_hot", "trend"]

return tran, cols

Expand Down
Loading