From 9642892e3cc1ab0c221f02d60d8e8a4a94de2b1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89mile=20Royer?= <emile.royer@inria.fr>
Date: Thu, 25 Jul 2024 16:26:35 +0200
Subject: [PATCH 1/2] Use units based on powers of 2

This reflects how they are interpreted in the code.
---
 river/forest/adaptive_random_forest.py           | 4 ++--
 river/forest/online_extra_trees.py               | 2 +-
 river/stream/twitch_chat_stream.py               | 2 +-
 river/tree/extremely_fast_decision_tree.py       | 2 +-
 river/tree/hoeffding_adaptive_tree_classifier.py | 2 +-
 river/tree/hoeffding_adaptive_tree_regressor.py  | 2 +-
 river/tree/hoeffding_tree.py                     | 4 ++--
 river/tree/hoeffding_tree_classifier.py          | 2 +-
 river/tree/hoeffding_tree_regressor.py           | 2 +-
 river/tree/isoup_tree_regressor.py               | 2 +-
 river/tree/utils.py                              | 6 +++---
 river/utils/pretty.py                            | 2 +-
 12 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/river/forest/adaptive_random_forest.py b/river/forest/adaptive_random_forest.py
index a3553c1d9d..ba4a75282f 100644
--- a/river/forest/adaptive_random_forest.py
+++ b/river/forest/adaptive_random_forest.py
@@ -534,7 +534,7 @@ class ARFClassifier(BaseForest, base.Classifier):
         in the majority class is smaller than this parameter value. This parameter avoids
         performing splits when most of the data belongs to a single class.
     max_size
-        [*Tree parameter*] Maximum memory (MB) consumed by the tree.
+        [*Tree parameter*] Maximum memory (MiB) consumed by the tree.
     memory_estimate_period
         [*Tree parameter*] Number of instances between memory consumption checks.
     stop_mem_management
@@ -808,7 +808,7 @@ class ARFRegressor(BaseForest, base.Regressor):
     binary_split
         [*Tree parameter*] If True, only allow binary splits.
     max_size
-        [*Tree parameter*] Maximum memory (MB) consumed by the tree.
+        [*Tree parameter*] Maximum memory (MiB) consumed by the tree.
     memory_estimate_period
         [*Tree parameter*] Number of instances between memory consumption checks.
     stop_mem_management
diff --git a/river/forest/online_extra_trees.py b/river/forest/online_extra_trees.py
index 6ed958df46..a13707bdb0 100644
--- a/river/forest/online_extra_trees.py
+++ b/river/forest/online_extra_trees.py
@@ -583,7 +583,7 @@ class OXTRegressor(ExtraTrees, base.Regressor):
     binary_split
         [*Tree parameter*] If True, only allow binary splits.
     max_size
-        [*Tree parameter*] Maximum memory (MB) consumed by the tree.
+        [*Tree parameter*] Maximum memory (MiB) consumed by the tree.
     memory_estimate_period
         [*Tree parameter*] Number of instances between memory consumption checks.
     stop_mem_management
diff --git a/river/stream/twitch_chat_stream.py b/river/stream/twitch_chat_stream.py
index ddb6397981..fff0aafbee 100644
--- a/river/stream/twitch_chat_stream.py
+++ b/river/stream/twitch_chat_stream.py
@@ -46,7 +46,7 @@ class TwitchChatStream:
     channels
         A list of channel names like `["asmongold", "shroud"]` you want to collect messages from.
     buffer_size
-        Size of buffer in bytes used for receiving responses from Twitch with IRC (default 2 kB).
+        Size of buffer in bytes used for receiving responses from Twitch with IRC (default 2 KiB).
     timeout
         A timeout value in seconds for waiting response from Twitch (default 60s). It can be useful if all requested channels are offline or chat is not active enough.
 
diff --git a/river/tree/extremely_fast_decision_tree.py b/river/tree/extremely_fast_decision_tree.py
index bac7ed8be6..d9972522e7 100755
--- a/river/tree/extremely_fast_decision_tree.py
+++ b/river/tree/extremely_fast_decision_tree.py
@@ -80,7 +80,7 @@ class ExtremelyFastDecisionTreeClassifier(HoeffdingTreeClassifier):
         smaller than this parameter value. This parameter avoids performing splits when most
         of the data belongs to a single class.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management
diff --git a/river/tree/hoeffding_adaptive_tree_classifier.py b/river/tree/hoeffding_adaptive_tree_classifier.py
index 290d34dad1..f63c2ba013 100644
--- a/river/tree/hoeffding_adaptive_tree_classifier.py
+++ b/river/tree/hoeffding_adaptive_tree_classifier.py
@@ -77,7 +77,7 @@ class HoeffdingAdaptiveTreeClassifier(HoeffdingTreeClassifier):
         smaller than this parameter value. This parameter avoids performing splits when most
         of the data belongs to a single class.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management
diff --git a/river/tree/hoeffding_adaptive_tree_regressor.py b/river/tree/hoeffding_adaptive_tree_regressor.py
index a2066a9e91..1055472002 100644
--- a/river/tree/hoeffding_adaptive_tree_regressor.py
+++ b/river/tree/hoeffding_adaptive_tree_regressor.py
@@ -84,7 +84,7 @@ class HoeffdingAdaptiveTreeRegressor(HoeffdingTreeRegressor):
     binary_split
         If True, only allow binary splits.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management
diff --git a/river/tree/hoeffding_tree.py b/river/tree/hoeffding_tree.py
index 9a7addeeef..4dc4a1c801 100644
--- a/river/tree/hoeffding_tree.py
+++ b/river/tree/hoeffding_tree.py
@@ -34,7 +34,7 @@ class HoeffdingTree(ABC):
     binary_split
         If True, only allow binary splits.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management
@@ -111,7 +111,7 @@ def _hoeffding_bound(range_val, confidence, n):
 
     @property
     def max_size(self):
-        """Max allowed size tree can reach (in MB)."""
+        """Max allowed size tree can reach (in MiB)."""
         return self._max_size
 
     @max_size.setter
diff --git a/river/tree/hoeffding_tree_classifier.py b/river/tree/hoeffding_tree_classifier.py
index 6ce2c09f57..841cdbe4d5 100755
--- a/river/tree/hoeffding_tree_classifier.py
+++ b/river/tree/hoeffding_tree_classifier.py
@@ -58,7 +58,7 @@ class HoeffdingTreeClassifier(HoeffdingTree, base.Classifier):
         smaller than this parameter value. This parameter avoids performing splits when most
         of the data belongs to a single class.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management
diff --git a/river/tree/hoeffding_tree_regressor.py b/river/tree/hoeffding_tree_regressor.py
index 1bcc1c96b3..16604e2d6c 100644
--- a/river/tree/hoeffding_tree_regressor.py
+++ b/river/tree/hoeffding_tree_regressor.py
@@ -57,7 +57,7 @@ class HoeffdingTreeRegressor(HoeffdingTree, base.Regressor):
     binary_split
         If True, only allow binary splits.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management
diff --git a/river/tree/isoup_tree_regressor.py b/river/tree/isoup_tree_regressor.py
index 2f92a34756..9411d1a9c9 100644
--- a/river/tree/isoup_tree_regressor.py
+++ b/river/tree/isoup_tree_regressor.py
@@ -62,7 +62,7 @@ class iSOUPTreeRegressor(tree.HoeffdingTreeRegressor, base.MultiTargetRegressor)
     binary_split
         If True, only allow binary splits.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management
diff --git a/river/tree/utils.py b/river/tree/utils.py
index 3826d02af4..7dff6859e4 100644
--- a/river/tree/utils.py
+++ b/river/tree/utils.py
@@ -251,7 +251,7 @@ def calculate_object_size(obj: typing.Any, unit: str = "byte") -> int:
         Object to evaluate.
     unit
         The unit in which the accounted value is going to be returned.
-        Values: 'byte', 'kB', 'MB' (Default: 'byte').
+        Values: 'byte', 'KiB', 'MiB' (Default: 'byte').
 
     Returns
     -------
@@ -295,9 +295,9 @@ def calculate_object_size(obj: typing.Any, unit: str = "byte") -> int:
             for i in obj:
                 to_visit.append(i)
 
-    if unit == "kB":
+    if unit == "KiB":
         final_size = byte_size / 1024
-    elif unit == "MB":
+    elif unit == "MiB":
         final_size = byte_size / (2**20)
     else:
         final_size = byte_size
diff --git a/river/utils/pretty.py b/river/utils/pretty.py
index 6a3678128e..2ceec3f85a 100644
--- a/river/utils/pretty.py
+++ b/river/utils/pretty.py
@@ -68,7 +68,7 @@ def humanize_bytes(n_bytes: int):
     n_bytes
 
     """
-    suffixes = ["B", "KB", "MB", "GB", "TB", "PB"]
+    suffixes = ["B", "KiB", "MiB", "GiB", "TiB", "PiB"]
     human = float(n_bytes)
     rank = 0
     if n_bytes != 0:

From abd1f16c8d15fa7259dc96fdbfc5910a3b37ec4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89mile=20Royer?= <emile.royer@inria.fr>
Date: Thu, 25 Jul 2024 16:48:28 +0200
Subject: [PATCH 2/2] Add a changelog entry

---
 docs/releases/unreleased.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/releases/unreleased.md b/docs/releases/unreleased.md
index 79e701b844..219b4c1a2d 100644
--- a/docs/releases/unreleased.md
+++ b/docs/releases/unreleased.md
@@ -1 +1,3 @@
 # Unreleased
+
+- The units used in River have been corrected to be based on powers of 2 (KiB, MiB). This only changes the display, the behaviour is unchanged.