From 9642892e3cc1ab0c221f02d60d8e8a4a94de2b1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89mile=20Royer?= Date: Thu, 25 Jul 2024 16:26:35 +0200 Subject: [PATCH 1/2] Use units based on powers of 2 This reflects how they are interpreted in the code. --- river/forest/adaptive_random_forest.py | 4 ++-- river/forest/online_extra_trees.py | 2 +- river/stream/twitch_chat_stream.py | 2 +- river/tree/extremely_fast_decision_tree.py | 2 +- river/tree/hoeffding_adaptive_tree_classifier.py | 2 +- river/tree/hoeffding_adaptive_tree_regressor.py | 2 +- river/tree/hoeffding_tree.py | 4 ++-- river/tree/hoeffding_tree_classifier.py | 2 +- river/tree/hoeffding_tree_regressor.py | 2 +- river/tree/isoup_tree_regressor.py | 2 +- river/tree/utils.py | 6 +++--- river/utils/pretty.py | 2 +- 12 files changed, 16 insertions(+), 16 deletions(-) diff --git a/river/forest/adaptive_random_forest.py b/river/forest/adaptive_random_forest.py index a3553c1d9d..ba4a75282f 100644 --- a/river/forest/adaptive_random_forest.py +++ b/river/forest/adaptive_random_forest.py @@ -534,7 +534,7 @@ class ARFClassifier(BaseForest, base.Classifier): in the majority class is smaller than this parameter value. This parameter avoids performing splits when most of the data belongs to a single class. max_size - [*Tree parameter*] Maximum memory (MB) consumed by the tree. + [*Tree parameter*] Maximum memory (MiB) consumed by the tree. memory_estimate_period [*Tree parameter*] Number of instances between memory consumption checks. stop_mem_management @@ -808,7 +808,7 @@ class ARFRegressor(BaseForest, base.Regressor): binary_split [*Tree parameter*] If True, only allow binary splits. max_size - [*Tree parameter*] Maximum memory (MB) consumed by the tree. + [*Tree parameter*] Maximum memory (MiB) consumed by the tree. memory_estimate_period [*Tree parameter*] Number of instances between memory consumption checks. stop_mem_management diff --git a/river/forest/online_extra_trees.py b/river/forest/online_extra_trees.py index 6ed958df46..a13707bdb0 100644 --- a/river/forest/online_extra_trees.py +++ b/river/forest/online_extra_trees.py @@ -583,7 +583,7 @@ class OXTRegressor(ExtraTrees, base.Regressor): binary_split [*Tree parameter*] If True, only allow binary splits. max_size - [*Tree parameter*] Maximum memory (MB) consumed by the tree. + [*Tree parameter*] Maximum memory (MiB) consumed by the tree. memory_estimate_period [*Tree parameter*] Number of instances between memory consumption checks. stop_mem_management diff --git a/river/stream/twitch_chat_stream.py b/river/stream/twitch_chat_stream.py index ddb6397981..fff0aafbee 100644 --- a/river/stream/twitch_chat_stream.py +++ b/river/stream/twitch_chat_stream.py @@ -46,7 +46,7 @@ class TwitchChatStream: channels A list of channel names like `["asmongold", "shroud"]` you want to collect messages from. buffer_size - Size of buffer in bytes used for receiving responses from Twitch with IRC (default 2 kB). + Size of buffer in bytes used for receiving responses from Twitch with IRC (default 2 KiB). timeout A timeout value in seconds for waiting response from Twitch (default 60s). It can be useful if all requested channels are offline or chat is not active enough. diff --git a/river/tree/extremely_fast_decision_tree.py b/river/tree/extremely_fast_decision_tree.py index bac7ed8be6..d9972522e7 100755 --- a/river/tree/extremely_fast_decision_tree.py +++ b/river/tree/extremely_fast_decision_tree.py @@ -80,7 +80,7 @@ class ExtremelyFastDecisionTreeClassifier(HoeffdingTreeClassifier): smaller than this parameter value. This parameter avoids performing splits when most of the data belongs to a single class. max_size - The max size of the tree, in Megabytes (MB). + The max size of the tree, in mebibytes (MiB). memory_estimate_period Interval (number of processed instances) between memory consumption checks. stop_mem_management diff --git a/river/tree/hoeffding_adaptive_tree_classifier.py b/river/tree/hoeffding_adaptive_tree_classifier.py index 290d34dad1..f63c2ba013 100644 --- a/river/tree/hoeffding_adaptive_tree_classifier.py +++ b/river/tree/hoeffding_adaptive_tree_classifier.py @@ -77,7 +77,7 @@ class HoeffdingAdaptiveTreeClassifier(HoeffdingTreeClassifier): smaller than this parameter value. This parameter avoids performing splits when most of the data belongs to a single class. max_size - The max size of the tree, in Megabytes (MB). + The max size of the tree, in mebibytes (MiB). memory_estimate_period Interval (number of processed instances) between memory consumption checks. stop_mem_management diff --git a/river/tree/hoeffding_adaptive_tree_regressor.py b/river/tree/hoeffding_adaptive_tree_regressor.py index a2066a9e91..1055472002 100644 --- a/river/tree/hoeffding_adaptive_tree_regressor.py +++ b/river/tree/hoeffding_adaptive_tree_regressor.py @@ -84,7 +84,7 @@ class HoeffdingAdaptiveTreeRegressor(HoeffdingTreeRegressor): binary_split If True, only allow binary splits. max_size - The max size of the tree, in Megabytes (MB). + The max size of the tree, in mebibytes (MiB). memory_estimate_period Interval (number of processed instances) between memory consumption checks. stop_mem_management diff --git a/river/tree/hoeffding_tree.py b/river/tree/hoeffding_tree.py index 9a7addeeef..4dc4a1c801 100644 --- a/river/tree/hoeffding_tree.py +++ b/river/tree/hoeffding_tree.py @@ -34,7 +34,7 @@ class HoeffdingTree(ABC): binary_split If True, only allow binary splits. max_size - The max size of the tree, in Megabytes (MB). + The max size of the tree, in mebibytes (MiB). memory_estimate_period Interval (number of processed instances) between memory consumption checks. stop_mem_management @@ -111,7 +111,7 @@ def _hoeffding_bound(range_val, confidence, n): @property def max_size(self): - """Max allowed size tree can reach (in MB).""" + """Max allowed size tree can reach (in MiB).""" return self._max_size @max_size.setter diff --git a/river/tree/hoeffding_tree_classifier.py b/river/tree/hoeffding_tree_classifier.py index 6ce2c09f57..841cdbe4d5 100755 --- a/river/tree/hoeffding_tree_classifier.py +++ b/river/tree/hoeffding_tree_classifier.py @@ -58,7 +58,7 @@ class HoeffdingTreeClassifier(HoeffdingTree, base.Classifier): smaller than this parameter value. This parameter avoids performing splits when most of the data belongs to a single class. max_size - The max size of the tree, in Megabytes (MB). + The max size of the tree, in mebibytes (MiB). memory_estimate_period Interval (number of processed instances) between memory consumption checks. stop_mem_management diff --git a/river/tree/hoeffding_tree_regressor.py b/river/tree/hoeffding_tree_regressor.py index 1bcc1c96b3..16604e2d6c 100644 --- a/river/tree/hoeffding_tree_regressor.py +++ b/river/tree/hoeffding_tree_regressor.py @@ -57,7 +57,7 @@ class HoeffdingTreeRegressor(HoeffdingTree, base.Regressor): binary_split If True, only allow binary splits. max_size - The max size of the tree, in Megabytes (MB). + The max size of the tree, in mebibytes (MiB). memory_estimate_period Interval (number of processed instances) between memory consumption checks. stop_mem_management diff --git a/river/tree/isoup_tree_regressor.py b/river/tree/isoup_tree_regressor.py index 2f92a34756..9411d1a9c9 100644 --- a/river/tree/isoup_tree_regressor.py +++ b/river/tree/isoup_tree_regressor.py @@ -62,7 +62,7 @@ class iSOUPTreeRegressor(tree.HoeffdingTreeRegressor, base.MultiTargetRegressor) binary_split If True, only allow binary splits. max_size - The max size of the tree, in Megabytes (MB). + The max size of the tree, in mebibytes (MiB). memory_estimate_period Interval (number of processed instances) between memory consumption checks. stop_mem_management diff --git a/river/tree/utils.py b/river/tree/utils.py index 3826d02af4..7dff6859e4 100644 --- a/river/tree/utils.py +++ b/river/tree/utils.py @@ -251,7 +251,7 @@ def calculate_object_size(obj: typing.Any, unit: str = "byte") -> int: Object to evaluate. unit The unit in which the accounted value is going to be returned. - Values: 'byte', 'kB', 'MB' (Default: 'byte'). + Values: 'byte', 'KiB', 'MiB' (Default: 'byte'). Returns ------- @@ -295,9 +295,9 @@ def calculate_object_size(obj: typing.Any, unit: str = "byte") -> int: for i in obj: to_visit.append(i) - if unit == "kB": + if unit == "KiB": final_size = byte_size / 1024 - elif unit == "MB": + elif unit == "MiB": final_size = byte_size / (2**20) else: final_size = byte_size diff --git a/river/utils/pretty.py b/river/utils/pretty.py index 6a3678128e..2ceec3f85a 100644 --- a/river/utils/pretty.py +++ b/river/utils/pretty.py @@ -68,7 +68,7 @@ def humanize_bytes(n_bytes: int): n_bytes """ - suffixes = ["B", "KB", "MB", "GB", "TB", "PB"] + suffixes = ["B", "KiB", "MiB", "GiB", "TiB", "PiB"] human = float(n_bytes) rank = 0 if n_bytes != 0: From abd1f16c8d15fa7259dc96fdbfc5910a3b37ec4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89mile=20Royer?= Date: Thu, 25 Jul 2024 16:48:28 +0200 Subject: [PATCH 2/2] Add a changelog entry --- docs/releases/unreleased.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/releases/unreleased.md b/docs/releases/unreleased.md index 79e701b844..219b4c1a2d 100644 --- a/docs/releases/unreleased.md +++ b/docs/releases/unreleased.md @@ -1 +1,3 @@ # Unreleased + +- The units used in River have been corrected to be based on powers of 2 (KiB, MiB). This only changes the display, the behaviour is unchanged.