online-ml · MaxHalford · Jul 25, 2024 · Jul 25, 2024 · Jul 25, 2024
@@ -1 +1,3 @@
 # Unreleased
+
+- The units used in River have been corrected to be based on powers of 2 (KiB, MiB). This only changes the display, the behaviour is unchanged.
@@ -534,7 +534,7 @@ class ARFClassifier(BaseForest, base.Classifier):
         in the majority class is smaller than this parameter value. This parameter avoids
         performing splits when most of the data belongs to a single class.
     max_size
-        [*Tree parameter*] Maximum memory (MB) consumed by the tree.
+        [*Tree parameter*] Maximum memory (MiB) consumed by the tree.
     memory_estimate_period
         [*Tree parameter*] Number of instances between memory consumption checks.
     stop_mem_management
@@ -808,7 +808,7 @@ class ARFRegressor(BaseForest, base.Regressor):
     binary_split
         [*Tree parameter*] If True, only allow binary splits.
     max_size
-        [*Tree parameter*] Maximum memory (MB) consumed by the tree.
+        [*Tree parameter*] Maximum memory (MiB) consumed by the tree.
     memory_estimate_period
         [*Tree parameter*] Number of instances between memory consumption checks.
     stop_mem_management

@@ -583,7 +583,7 @@ class OXTRegressor(ExtraTrees, base.Regressor):
     binary_split
         [*Tree parameter*] If True, only allow binary splits.
     max_size
-        [*Tree parameter*] Maximum memory (MB) consumed by the tree.
+        [*Tree parameter*] Maximum memory (MiB) consumed by the tree.
     memory_estimate_period
         [*Tree parameter*] Number of instances between memory consumption checks.
     stop_mem_management

@@ -46,7 +46,7 @@ class TwitchChatStream:
     channels
         A list of channel names like `["asmongold", "shroud"]` you want to collect messages from.
     buffer_size
-        Size of buffer in bytes used for receiving responses from Twitch with IRC (default 2 kB).
+        Size of buffer in bytes used for receiving responses from Twitch with IRC (default 2 KiB).
     timeout
         A timeout value in seconds for waiting response from Twitch (default 60s). It can be useful if all requested channels are offline or chat is not active enough.
 

@@ -80,7 +80,7 @@ class ExtremelyFastDecisionTreeClassifier(HoeffdingTreeClassifier):
         smaller than this parameter value. This parameter avoids performing splits when most
         of the data belongs to a single class.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management

@@ -77,7 +77,7 @@ class HoeffdingAdaptiveTreeClassifier(HoeffdingTreeClassifier):
         smaller than this parameter value. This parameter avoids performing splits when most
         of the data belongs to a single class.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management

@@ -84,7 +84,7 @@ class HoeffdingAdaptiveTreeRegressor(HoeffdingTreeRegressor):
     binary_split
         If True, only allow binary splits.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management

@@ -34,7 +34,7 @@ class HoeffdingTree(ABC):
     binary_split
         If True, only allow binary splits.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management
@@ -111,7 +111,7 @@ def _hoeffding_bound(range_val, confidence, n):
 
     @property
     def max_size(self):
-        """Max allowed size tree can reach (in MB)."""
+        """Max allowed size tree can reach (in MiB)."""
         return self._max_size
 
     @max_size.setter

@@ -58,7 +58,7 @@ class HoeffdingTreeClassifier(HoeffdingTree, base.Classifier):
         smaller than this parameter value. This parameter avoids performing splits when most
         of the data belongs to a single class.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management

@@ -57,7 +57,7 @@ class HoeffdingTreeRegressor(HoeffdingTree, base.Regressor):
     binary_split
         If True, only allow binary splits.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management

@@ -62,7 +62,7 @@ class iSOUPTreeRegressor(tree.HoeffdingTreeRegressor, base.MultiTargetRegressor)
     binary_split
         If True, only allow binary splits.
     max_size
-        The max size of the tree, in Megabytes (MB).
+        The max size of the tree, in mebibytes (MiB).
     memory_estimate_period
         Interval (number of processed instances) between memory consumption checks.
     stop_mem_management

@@ -251,7 +251,7 @@ def calculate_object_size(obj: typing.Any, unit: str = "byte") -> int:
         Object to evaluate.
     unit
         The unit in which the accounted value is going to be returned.
-        Values: 'byte', 'kB', 'MB' (Default: 'byte').
+        Values: 'byte', 'KiB', 'MiB' (Default: 'byte').
 
     Returns
     -------
@@ -295,9 +295,9 @@ def calculate_object_size(obj: typing.Any, unit: str = "byte") -> int:
             for i in obj:
                 to_visit.append(i)
 
-    if unit == "kB":
+    if unit == "KiB":
         final_size = byte_size / 1024
-    elif unit == "MB":
+    elif unit == "MiB":
         final_size = byte_size / (2**20)
     else:
         final_size = byte_size

@@ -68,7 +68,7 @@ def humanize_bytes(n_bytes: int):
     n_bytes
 
     """
-    suffixes = ["B", "KB", "MB", "GB", "TB", "PB"]
+    suffixes = ["B", "KiB", "MiB", "GiB", "TiB", "PiB"]
     human = float(n_bytes)
     rank = 0
     if n_bytes != 0: