huggingface · lhoestq · Apr 15, 2025 · Mar 25, 2025 · Apr 15, 2025 · Apr 15, 2025
diff --git a/setup.py b/setup.py
@@ -141,7 +141,7 @@
 AUDIO_REQUIRE = [
     "soundfile>=0.12.1",
     "librosa",
-    "soxr>=0.4.0; python_version>='3.9'",  # Supports numpy-2
+    "soxr>=0.4.0",  # Supports numpy-2
 ]
 
 VISION_REQUIRE = [

diff --git a/src/datasets/formatting/formatting.py b/src/datasets/formatting/formatting.py
@@ -22,9 +22,7 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
-from packaging import version
 
-from .. import config
 from ..features import Features
 from ..features.features import _ArrayXDExtensionType, _is_zero_copy_only, decode_nested_example, pandas_types_mapper
 from ..table import Table
@@ -304,49 +302,46 @@ def __repr__(self):
         self._format_all()
         return repr(self.data)
 
-    if config.PY_VERSION >= version.parse("3.9"):
-        # merging with the union ("|") operator is supported in Python 3.9+
-
-        def __or__(self, other):
-            if isinstance(other, LazyDict):
-                inst = self.copy()
-                other = other.copy()
-                other._format_all()
-                inst.keys_to_format -= other.data.keys()
-                inst.data = inst.data | other.data
-                return inst
-            if isinstance(other, dict):
-                inst = self.copy()
-                inst.keys_to_format -= other.keys()
-                inst.data = inst.data | other
-                return inst
-            return NotImplemented
-
-        def __ror__(self, other):
-            if isinstance(other, LazyDict):
-                inst = self.copy()
-                other = other.copy()
-                other._format_all()
-                inst.keys_to_format -= other.data.keys()
-                inst.data = other.data | inst.data
-                return inst
-            if isinstance(other, dict):
-                inst = self.copy()
-                inst.keys_to_format -= other.keys()
-                inst.data = other | inst.data
-                return inst
-            return NotImplemented
-
-        def __ior__(self, other):
-            if isinstance(other, LazyDict):
-                other = other.copy()
-                other._format_all()
-                self.keys_to_format -= other.data.keys()
-                self.data |= other.data
-            else:
-                self.keys_to_format -= other.keys()
-                self.data |= other
-            return self
+    def __or__(self, other):
+        if isinstance(other, LazyDict):
+            inst = self.copy()
+            other = other.copy()
+            other._format_all()
+            inst.keys_to_format -= other.data.keys()
+            inst.data = inst.data | other.data
+            return inst
+        if isinstance(other, dict):
+            inst = self.copy()
+            inst.keys_to_format -= other.keys()
+            inst.data = inst.data | other
+            return inst
+        return NotImplemented
+
+    def __ror__(self, other):
+        if isinstance(other, LazyDict):
+            inst = self.copy()
+            other = other.copy()
+            other._format_all()
+            inst.keys_to_format -= other.data.keys()
+            inst.data = other.data | inst.data
+            return inst
+        if isinstance(other, dict):
+            inst = self.copy()
+            inst.keys_to_format -= other.keys()
+            inst.data = other | inst.data
+            return inst
+        return NotImplemented
+
+    def __ior__(self, other):
+        if isinstance(other, LazyDict):
+            other = other.copy()
+            other._format_all()
+            self.keys_to_format -= other.data.keys()
+            self.data |= other.data
+        else:
+            self.keys_to_format -= other.keys()
+            self.data |= other
+        return self
 
     def __copy__(self):
         # Identical to `UserDict.__copy__`

diff --git a/tests/test_arrow_dataset.py b/tests/test_arrow_dataset.py
@@ -3175,12 +3175,11 @@ def test_tf_dataset_options(self, in_memory):
             self.assertEqual(len(tf_dataset), 2)  # One batch of 3 and one batch of 1
             self.assertEqual(len(tf_dataset_with_drop), 1)  # Incomplete batch of 1 is dropped
         # Test that `NotImplementedError` is raised `batch_size` is None and `num_workers` is > 0
-        if sys.version_info >= (3, 8):
-            with self._create_dummy_dataset(in_memory, tmp_dir.name, multiple_columns=True) as dset:
-                with self.assertRaisesRegex(
-                    NotImplementedError, "`batch_size` must be specified when using multiple workers"
-                ):
-                    dset.to_tf_dataset(columns="col_1", batch_size=None, num_workers=2)
+        with self._create_dummy_dataset(in_memory, tmp_dir.name, multiple_columns=True) as dset:
+            with self.assertRaisesRegex(
+                NotImplementedError, "`batch_size` must be specified when using multiple workers"
+            ):
+                dset.to_tf_dataset(columns="col_1", batch_size=None, num_workers=2)
         del tf_dataset  # For correct cleanup
         del tf_dataset_with_drop