Skip to content

Commit

Permalink
Merge branch 'online-ml:main' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
yuritpinheiro authored Aug 28, 2024
2 parents ebe9568 + 7de4751 commit b71b822
Show file tree
Hide file tree
Showing 142 changed files with 815 additions and 749 deletions.
32 changes: 17 additions & 15 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
files: river
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.2.0
rev: v4.4.0
hooks:
- id: check-json
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- id: mixed-line-ending

- repo: local
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.5.7
hooks:
# Run the linter.
- id: ruff
name: ruff
language: python
types: [python]
entry: ruff
args:
- --fix
types_or: [python, pyi, jupyter]
args: [--fix]
# Run the formatter.
- id: ruff-format
types_or: [python, pyi, jupyter]

- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v1.1.1"
hooks:
- id: mypy
name: mypy
language: python
types: [python]
entry: mypy --implicit-optional
args:
- "--config-file=pyproject.toml"
- "--python-version=3.11"
- "--implicit-optional"
163 changes: 39 additions & 124 deletions docs/recipes/on-hoeffding-trees.ipynb

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions docs/releases/unreleased.md
Original file line number Diff line number Diff line change
@@ -1 +1,18 @@
# Unreleased

- The units used in River have been corrected to be based on powers of 2 (KiB, MiB). This only changes the display, the behaviour is unchanged.

## cluster

- Update the description of `cluster.ODAC`.
- Change `draw` in `cluster.ODAC` to draw the hierarchical cluster's structure as a Graphviz graph.
- Add `render_ascii` in `cluster.ODAC` to render the hierarchical cluster's structure in text format.
- Work with `stats.Var` in `cluster.ODAC` when cluster has only one time series.

## stats

- Removed the unexported class `stats.CentralMoments`.

## tree

- Instead of letting trees grow indefinitely, setting the `max_depth` parameter to `None` will stop the trees from growing when they reach the system recursion limit.
1 change: 0 additions & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ plugins:

extra_javascript:
- javascripts/config.js
- https://polyfill.io/v3/polyfill.min.js?features=es6
- https://cdn.jsdelivr.net/npm/[email protected]/es5/tex-mml-chtml.js
- https://cdn.jsdelivr.net/npm/vega@5
- https://cdn.jsdelivr.net/npm/vega-lite@5
Expand Down
189 changes: 95 additions & 94 deletions poetry.lock

Large diffs are not rendered by default.

33 changes: 26 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ pandas = "^2.1"

[tool.poetry.group.dev.dependencies]
graphviz = "^0.20.1"
gym = "^0.26.2"
gymnasium = "^0.29.0"
matplotlib = "^3.0.2"
mypy = "^1.6.1"
mypy = "^1.11.1"
pre-commit = "^3.5.0"
pytest = "^7.4.2"
ruff = "^0.1.1"
ruff = "^0.4.10"
scikit-learn = "^1.3.1"
sqlalchemy = "^2.0.22"
sympy = "^1.10.1"
Expand Down Expand Up @@ -117,20 +117,36 @@ markers = [
]

[tool.ruff]
select = ["E", "F", "I", "UP"] # https://beta.ruff.rs/docs/rules/
line-length = 100
target-version = 'py310'
extend-include = ["*.ipynb"]

[tool.ruff.lint]
select = [
# pycodestyle
"E",
# Pyflakes
"F",
# pyupgrade
"UP",
# isort
"I",
]
ignore = ["E501"]
fixable = ["ALL"]

[tool.ruff.isort]
[tool.ruff.lint.isort]
required-imports = ["from __future__ import annotations"]

[tool.ruff.format]
quote-style = "double"
indent-style = "space"

[tool.mypy]
files = "river"

[[tool.mypy.overrides]]
module = [
"river.*",
"mmh3.*",
"numpy.*",
"sklearn.*",
Expand All @@ -141,6 +157,9 @@ module = [
"vaex.*",
"torch.*",
"sqlalchemy.*",
"requests.*"
"requests.*",
"gymnasium.*",
"sympy.*",
"polars.*"
]
ignore_missing_imports = true
1 change: 1 addition & 0 deletions river/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
memory, or simply when it isn't available all at once. river's API is heavily inspired from that of
scikit-learn, enough so that users who are familiar with scikit-learn should feel right at home.
"""

from __future__ import annotations

from .__version__ import __version__ # noqa: F401
1 change: 1 addition & 0 deletions river/active/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Online active learning."""

from __future__ import annotations

from . import base
Expand Down
3 changes: 1 addition & 2 deletions river/active/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ def _wrapped_model(self):
return self.classifier

@abc.abstractmethod
def _ask_for_label(self, x, y_pred) -> bool:
...
def _ask_for_label(self, x, y_pred) -> bool: ...

def predict_proba_one(self, x, **kwargs):
"""Predict the probability of each label for `x` and indicate whether a label is needed.
Expand Down
4 changes: 1 addition & 3 deletions river/active/entropy.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,7 @@ class EntropySampler(ActiveLearningClassifier):
"""

def __init__(
self, classifier: base.Classifier, discount_factor: float = 3, seed=None
):
def __init__(self, classifier: base.Classifier, discount_factor: float = 3, seed=None):
super().__init__(classifier, seed=seed)
self.discount_factor = discount_factor

Expand Down
7 changes: 2 additions & 5 deletions river/anomaly/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,7 @@ class ThresholdFilter(anomaly.base.AnomalyFilter):
"""

def __init__(
self, anomaly_detector, threshold: float, protect_anomaly_detector=True
):
def __init__(self, anomaly_detector, threshold: float, protect_anomaly_detector=True):
super().__init__(
anomaly_detector=anomaly_detector,
protect_anomaly_detector=protect_anomaly_detector,
Expand Down Expand Up @@ -188,7 +186,6 @@ def _unit_test_params(cls):
from river import preprocessing

yield {
"anomaly_detector": preprocessing.StandardScaler()
| anomaly.OneClassSVM(nu=0.2),
"anomaly_detector": preprocessing.StandardScaler() | anomaly.OneClassSVM(nu=0.2),
"q": 0.995,
}
5 changes: 1 addition & 4 deletions river/anomaly/pad.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ def __init__(
n_std: float = 3.0,
warmup_period: int = 0,
):

self.predictive_model = (
predictive_model
if predictive_model is not None
Expand All @@ -123,9 +122,7 @@ def learn_one(self, x: dict | None, y: base.typing.Target | float):
self.iter += 1

# Check whether the model is a time-series forecasting or regression/classification model
if isinstance(
self.predictive_model, time_series.base.Forecaster
) and isinstance(y, float):
if isinstance(self.predictive_model, time_series.base.Forecaster) and isinstance(y, float):
# When there's no data point as dict of features, the target will be passed
# to the forecaster as an exogenous variable.
if not x:
Expand Down
1 change: 1 addition & 0 deletions river/api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""River API module."""

from __future__ import annotations

from . import (
Expand Down
1 change: 1 addition & 0 deletions river/bandit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
(see `model_selection.BanditRegressor`).
"""

from __future__ import annotations

from . import base, datasets, envs
Expand Down
3 changes: 1 addition & 2 deletions river/bandit/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ def __post_init__(self):
)

@abc.abstractmethod
def _pull(self, arm_ids: list[ArmID]) -> ArmID:
...
def _pull(self, arm_ids: list[ArmID]) -> ArmID: ...

def pull(self, arm_ids: list[ArmID]) -> ArmID:
"""Pull arm(s).
Expand Down
8 changes: 4 additions & 4 deletions river/bandit/bayes_ucb.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class BayesUCB(bandit.base.Policy):
Examples
--------
>>> import gym
>>> import gymnasium as gym
>>> from river import bandit
>>> from river import proba
>>> from river import stats
Expand Down Expand Up @@ -63,9 +63,9 @@ class BayesUCB(bandit.base.Policy):

def __init__(self, reward_obj=None, burn_in=0, seed: int | None = None):
super().__init__(reward_obj, burn_in)
self._posteriors: collections.defaultdict[
bandit.base.ArmID, proba.Beta
] = collections.defaultdict(proba.Beta)
self._posteriors: collections.defaultdict[bandit.base.ArmID, proba.Beta] = (
collections.defaultdict(proba.Beta)
)
self.seed = seed
self._rng = random.Random(seed)

Expand Down
2 changes: 1 addition & 1 deletion river/bandit/envs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

try:
import gym
import gymnasium as gym

GYM_INSTALLED = True
except ImportError:
Expand Down
12 changes: 4 additions & 8 deletions river/bandit/envs/candy_cane.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import dataclasses

import gym
import gymnasium as gym


@dataclasses.dataclass
Expand All @@ -25,7 +25,7 @@ class CandyCaneContest(gym.Env):
Examples
--------
>>> import gym
>>> import gymnasium as gym
>>> from river import stats
>>> env = gym.make('river_bandits/CandyCaneContest-v0')
Expand Down Expand Up @@ -58,12 +58,8 @@ def __init__(self, n_machines=100, reward_decay=0.03):
self.action_space = gym.spaces.Discrete(n_machines)
self.observation_space = gym.spaces.Dict(
{
"attempts": gym.spaces.Tuple(
[gym.spaces.Discrete(self.n_steps)] * n_machines
),
"successes": gym.spaces.Tuple(
[gym.spaces.Discrete(self.n_steps)] * n_machines
),
"attempts": gym.spaces.Tuple([gym.spaces.Discrete(self.n_steps)] * n_machines),
"successes": gym.spaces.Tuple([gym.spaces.Discrete(self.n_steps)] * n_machines),
}
)
self.reward_range = (0.0, 1.0)
Expand Down
2 changes: 1 addition & 1 deletion river/bandit/envs/testbed.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import math

import gym
import gymnasium as gym


class KArmedTestbed(gym.Env):
Expand Down
2 changes: 1 addition & 1 deletion river/bandit/epsilon_greedy.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class EpsilonGreedy(bandit.base.Policy):
Examples
--------
>>> import gym
>>> import gymnasium as gym
>>> from river import bandit
>>> from river import stats
Expand Down
4 changes: 2 additions & 2 deletions river/bandit/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import typing

try:
import gym
import gymnasium as gym
except ImportError:
...

Expand Down Expand Up @@ -52,7 +52,7 @@ def evaluate(
Examples
--------
>>> import gym
>>> import gymnasium as gym
>>> from river import bandit
>>> trace = bandit.evaluate(
Expand Down
9 changes: 3 additions & 6 deletions river/bandit/exp3.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Exp3(bandit.base.Policy):
Examples
--------
>>> import gym
>>> import gymnasium as gym
>>> from river import bandit
>>> from river import proba
>>> from river import stats
Expand Down Expand Up @@ -77,9 +77,7 @@ def __init__(
burn_in=0,
seed: int | None = None,
):
super().__init__(
reward_obj=reward_obj, reward_scaler=reward_scaler, burn_in=burn_in
)
super().__init__(reward_obj=reward_obj, reward_scaler=reward_scaler, burn_in=burn_in)
self.seed = seed
self.gamma = gamma
self._rng = random.Random(seed)
Expand All @@ -91,8 +89,7 @@ def __init__(
def _pull(self, arm_ids):
total = sum(self._weights[arm_id] for arm_id in arm_ids)
self._probabilities = {
arm_id: (1 - self.gamma) * (self._weights[arm_id] / total)
+ self.gamma / len(arm_ids)
arm_id: (1 - self.gamma) * (self._weights[arm_id] / total) + self.gamma / len(arm_ids)
for arm_id in arm_ids
}
return self._rng.choices(arm_ids, weights=self._probabilities.values())[0]
Expand Down
8 changes: 4 additions & 4 deletions river/bandit/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class RandomPolicy(bandit.base.Policy):
Examples
--------
>>> import gym
>>> import gymnasium as gym
>>> from river import bandit
>>> from river import proba
>>> from river import stats
Expand Down Expand Up @@ -52,9 +52,9 @@ class RandomPolicy(bandit.base.Policy):

def __init__(self, reward_obj=None, burn_in=0, seed: int | None = None):
super().__init__(reward_obj, burn_in)
self._posteriors: collections.defaultdict[
bandit.base.ArmID, proba.Beta
] = collections.defaultdict(proba.Beta)
self._posteriors: collections.defaultdict[bandit.base.ArmID, proba.Beta] = (
collections.defaultdict(proba.Beta)
)
self.seed = seed
self._rng = random.Random(seed)

Expand Down
3 changes: 2 additions & 1 deletion river/bandit/test_envs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import gym.utils.env_checker
import gymnasium as gym
import gymnasium.utils.env_checker
import pytest

from river import bandit
Expand Down
Loading

0 comments on commit b71b822

Please sign in to comment.