From 93e52b9034f9999d248e4c745134434b163de582 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Sun, 21 Apr 2024 22:15:24 +0200
Subject: [PATCH 01/25] Add initial vesion of tqdm progress bar for sequential
 calculation

---
 src/pydvl/influence/array.py                | 72 +++++++++++++++++----
 src/pydvl/influence/influence_calculator.py | 29 ++++++++-
 src/pydvl/influence/torch/util.py           | 29 ++++++++-
 3 files changed, 113 insertions(+), 17 deletions(-)

diff --git a/src/pydvl/influence/array.py b/src/pydvl/influence/array.py
index d549eee9d..e0cb1e8a7 100644
--- a/src/pydvl/influence/array.py
+++ b/src/pydvl/influence/array.py
@@ -6,13 +6,13 @@
 (chunked in one resp. two dimensions), with support for efficient storage and retrieval
 using the Zarr library.
 """
-
 import logging
 from abc import ABC, abstractmethod
 from typing import Callable, Generator, Generic, List, Optional, Tuple, Union
 
 import zarr
 from numpy.typing import NDArray
+from tqdm import tqdm
 from zarr.storage import StoreLike
 
 from ..utils import log_duration
@@ -35,7 +35,11 @@ def from_numpy(self, x: NDArray) -> TensorType:
 
 class SequenceAggregator(Generic[TensorType], ABC):
     @abstractmethod
-    def __call__(self, tensor_generator: Generator[TensorType, None, None]):
+    def __call__(
+        self,
+        tensor_generator: Generator[TensorType, None, None],
+        len_generator: Optional[int] = None,
+    ):
         """
         Aggregates tensors from a generator.
 
@@ -46,7 +50,9 @@ def __call__(self, tensor_generator: Generator[TensorType, None, None]):
 
 class ListAggregator(SequenceAggregator):
     def __call__(
-        self, tensor_generator: Generator[TensorType, None, None]
+        self,
+        tensor_generator: Generator[TensorType, None, None],
+        len_generator: Optional[int] = None,
     ) -> List[TensorType]:
         """
         Aggregates tensors from a single-level generator into a list. This method simply
@@ -54,11 +60,19 @@ def __call__(
 
         Args:
             tensor_generator: A generator that yields TensorType objects.
+            len_generator: if the number of elements from the generator is
+                known, this optional parameter can be used to improve logging
+                by adding a progressbar.
 
         Returns:
             A list containing all the tensors provided by the tensor_generator.
         """
-        return [t for t in tensor_generator]
+        gen: Union[tqdm[TensorType], ] = tensor_generator
+
+        if len_generator is not None:
+            gen = tqdm(gen, total=len_generator, desc="Blocks")
+
+        return [t for t in gen]
 
 
 class NestedSequenceAggregator(Generic[TensorType], ABC):
@@ -68,6 +82,7 @@ def __call__(
         nested_generators_of_tensors: Generator[
             Generator[TensorType, None, None], None, None
         ],
+        len_outer_generator: Optional[int] = None,
     ):
         """
         Aggregates tensors from a generator of generators.
@@ -84,6 +99,7 @@ def __call__(
         nested_generators_of_tensors: Generator[
             Generator[TensorType, None, None], None, None
         ],
+        len_outer_generator: Optional[int] = None,
     ) -> List[List[TensorType]]:
         """
          Aggregates tensors from a nested generator structure into a list of lists.
@@ -93,12 +109,20 @@ def __call__(
          Args:
              nested_generators_of_tensors: A generator of generators, where each inner
                 generator yields TensorType objects.
+             len_outer_generator: if the number of elements from the outer generator is
+                known from the context, this optional parameter can be used to improve
+                logging by adding a progressbar.
 
         Returns:
             A list of lists, where each inner list contains tensors returned from one
                 of the inner generators.
         """
-        return [list(tensor_gen) for tensor_gen in nested_generators_of_tensors]
+        outer_gen = nested_generators_of_tensors
+
+        if len_outer_generator is not None:
+            outer_gen = tqdm(outer_gen, total=len_outer_generator, desc="Row blocks")
+
+        return [list(tensor_gen) for tensor_gen in outer_gen]
 
 
 class LazyChunkSequence:
@@ -114,12 +138,18 @@ class LazyChunkSequence:
     Attributes:
         generator_factory: A factory function that returns
             a generator. This generator yields chunks of the large array when called.
+        len_generator: if the number of elements from the generator is
+            known from the context, this optional parameter can be used to improve
+            logging by adding a progressbar.
     """
 
     def __init__(
-        self, generator_factory: Callable[[], Generator[TensorType, None, None]]
+        self,
+        generator_factory: Callable[[], Generator[TensorType, None, None]],
+        len_generator: Optional[int] = None,
     ):
         self.generator_factory = generator_factory
+        self.len_generator = len_generator
 
     @log_duration(log_level=logging.INFO)
     def compute(self, aggregator: Optional[SequenceAggregator] = None):
@@ -140,7 +170,7 @@ def compute(self, aggregator: Optional[SequenceAggregator] = None):
         """
         if aggregator is None:
             aggregator = ListAggregator()
-        return aggregator(self.generator_factory())
+        return aggregator(self.generator_factory(), len_generator=self.len_generator)
 
     @log_duration(log_level=logging.INFO)
     def to_zarr(
@@ -171,7 +201,13 @@ def to_zarr(
         """
         row_idx = 0
         z = None
-        for block in self.generator_factory():
+
+        gen = self.generator_factory()
+
+        if self.len_generator is not None:
+            gen = tqdm(gen, total=self.len_generator, desc="Blocks")
+
+        for block in gen:
             numpy_block = converter.to_numpy(block)
 
             if z is None:
@@ -216,7 +252,10 @@ class NestedLazyChunkSequence:
 
     Attributes:
         generator_factory: A factory function that returns a generator of generators.
-            Each inner generator yields chunks.
+            Each inner generator yields chunks
+        len_outer_generator: if the number of elements from the outer generator is
+            known from the context, this optional parameter can be used to improve
+            logging by adding a progressbar.
     """
 
     def __init__(
@@ -224,8 +263,10 @@ def __init__(
         generator_factory: Callable[
             [], Generator[Generator[TensorType, None, None], None, None]
         ],
+        len_outer_generator: Optional[int] = None,
     ):
         self.generator_factory = generator_factory
+        self.len_outer_generator = len_outer_generator
 
     @log_duration(log_level=logging.INFO)
     def compute(self, aggregator: Optional[NestedSequenceAggregator] = None):
@@ -247,7 +288,9 @@ def compute(self, aggregator: Optional[NestedSequenceAggregator] = None):
         """
         if aggregator is None:
             aggregator = NestedListAggregator()
-        return aggregator(self.generator_factory())
+        return aggregator(
+            self.generator_factory(), len_outer_generator=self.len_outer_generator
+        )
 
     @log_duration(log_level=logging.INFO)
     def to_zarr(
@@ -280,7 +323,14 @@ def to_zarr(
         row_idx = 0
         z = None
         numpy_block = None
-        for row_blocks in self.generator_factory():
+        block_generator = self.generator_factory()
+
+        if self.len_outer_generator is not None:
+            block_generator = tqdm(
+                block_generator, total=self.len_outer_generator, desc="Row blocks"
+            )
+
+        for row_blocks in block_generator:
             col_idx = 0
             for block in row_blocks:
                 numpy_block = converter.to_numpy(block)
diff --git a/src/pydvl/influence/influence_calculator.py b/src/pydvl/influence/influence_calculator.py
index 7e1186f29..1a40bdd5a 100644
--- a/src/pydvl/influence/influence_calculator.py
+++ b/src/pydvl/influence/influence_calculator.py
@@ -619,8 +619,14 @@ def influence_factors(
         Returns:
             A lazy data structure representing the chunks of the resulting tensor
         """
+        try:
+            len_iterable = len(data_iterable)
+        except Exception as e:
+            logger.debug(f"Failed to retrieve len of data iterable: {e}")
+            len_iterable = None
+
         tensors_gen_factory = partial(self._influence_factors_gen, data_iterable)
-        return LazyChunkSequence(tensors_gen_factory)
+        return LazyChunkSequence(tensors_gen_factory, len_generator=len_iterable)
 
     def _influences_gen(
         self,
@@ -677,7 +683,15 @@ def influences(
             mode,
         )
 
-        return NestedLazyChunkSequence(nested_tensor_gen_factory)
+        try:
+            len_iterable = len(test_data_iterable)
+        except Exception as e:
+            logger.debug(f"Failed to retrieve len of test data iterable: {e}")
+            len_iterable = None
+
+        return NestedLazyChunkSequence(
+            nested_tensor_gen_factory, len_outer_generator=len_iterable
+        )
 
     def _influences_from_factors_gen(
         self,
@@ -735,4 +749,13 @@ def influences_from_factors(
             train_data_iterable,
             mode,
         )
-        return NestedLazyChunkSequence(nested_tensor_gen)
+
+        try:
+            len_iterable = len(z_test_factors)
+        except Exception as e:
+            logger.debug(f"Failed to retrieve len of factors iterable: {e}")
+            len_iterable = None
+
+        return NestedLazyChunkSequence(
+            nested_tensor_gen, len_outer_generator=len_iterable
+        )
diff --git a/src/pydvl/influence/torch/util.py b/src/pydvl/influence/torch/util.py
index 079acf9c9..58385df5b 100644
--- a/src/pydvl/influence/torch/util.py
+++ b/src/pydvl/influence/torch/util.py
@@ -21,6 +21,7 @@
 from dask import array as da
 from numpy.typing import NDArray
 from torch.utils.data import Dataset
+from tqdm import tqdm
 
 from ..array import NestedSequenceAggregator, NumpyConverter, SequenceAggregator
 
@@ -398,7 +399,11 @@ class TorchCatAggregator(SequenceAggregator[torch.Tensor]):
     function. Concatenation is done along the first dimension of the chunks.
     """
 
-    def __call__(self, tensor_generator: Generator[torch.Tensor, None, None]):
+    def __call__(
+        self,
+        tensor_generator: Generator[torch.Tensor, None, None],
+        len_generator: Optional[int] = None,
+    ):
         """
         Aggregates tensors from a single-level generator into a single tensor by
         concatenating them. This method is a straightforward way to combine a sequence
@@ -406,12 +411,20 @@ def __call__(self, tensor_generator: Generator[torch.Tensor, None, None]):
 
         Args:
             tensor_generator: A generator that yields `torch.Tensor` objects.
+            len_generator: if the number of elements from the generator is
+                known, this optional parameter can be used to improve logging
+                by adding a progressbar.
 
         Returns:
             A single tensor formed by concatenating all tensors from the generator.
                 The concatenation is performed along the default dimension (0).
         """
-        return torch.cat(list(tensor_generator))
+        t_gen = tensor_generator
+
+        if len_generator is not None:
+            t_gen = tqdm(t_gen, total=len_generator, desc="Blocks")
+
+        return torch.cat(list(t_gen))
 
 
 class NestedTorchCatAggregator(NestedSequenceAggregator[torch.Tensor]):
@@ -425,6 +438,7 @@ def __call__(
         nested_generators_of_tensors: Generator[
             Generator[torch.Tensor, None, None], None, None
         ],
+        len_outer_generator: Optional[int] = None,
     ):
         """
         Aggregates tensors from a nested generator structure into a single tensor by
@@ -435,17 +449,26 @@ def __call__(
         Args:
             nested_generators_of_tensors: A generator of generators, where each inner
                 generator yields `torch.Tensor` objects.
+            len_outer_generator: if the number of elements from the outer generator is
+                known from the context, this optional parameter can be used to improve
+                logging by adding a progressbar.
 
         Returns:
             A single tensor formed by concatenating all tensors from the nested
             generators.
 
         """
+
+        outer_gen = nested_generators_of_tensors
+
+        if len_outer_generator is not None:
+            outer_gen = tqdm(outer_gen, total=len_outer_generator, desc="Row blocks")
+
         return torch.cat(
             list(
                 map(
                     lambda tensor_gen: torch.cat(list(tensor_gen), dim=1),
-                    nested_generators_of_tensors,
+                    outer_gen,
                 )
             )
         )

From 56166d39dc33db809297717b612e4bae4e6423d4 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 22 Apr 2024 09:18:31 +0200
Subject: [PATCH 02/25] Add type casting to satisfy mypy

---
 src/pydvl/influence/array.py                  | 45 ++++++++++++++-----
 .../base_influence_function_model.py          |  2 +
 src/pydvl/influence/influence_calculator.py   |  8 ++--
 .../torch/influence_function_model.py         |  9 ++++
 src/pydvl/influence/torch/util.py             | 27 +++++++----
 5 files changed, 66 insertions(+), 25 deletions(-)

diff --git a/src/pydvl/influence/array.py b/src/pydvl/influence/array.py
index e0cb1e8a7..c60e11693 100644
--- a/src/pydvl/influence/array.py
+++ b/src/pydvl/influence/array.py
@@ -8,7 +8,17 @@
 """
 import logging
 from abc import ABC, abstractmethod
-from typing import Callable, Generator, Generic, List, Optional, Tuple, Union
+from typing import (
+    Callable,
+    Generator,
+    Generic,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+    Union,
+    cast,
+)
 
 import zarr
 from numpy.typing import NDArray
@@ -67,10 +77,13 @@ def __call__(
         Returns:
             A list containing all the tensors provided by the tensor_generator.
         """
-        gen: Union[tqdm[TensorType], ] = tensor_generator
+
+        gen = cast(Iterator[TensorType], tensor_generator)
 
         if len_generator is not None:
-            gen = tqdm(gen, total=len_generator, desc="Blocks")
+            gen = cast(
+                Iterator[TensorType], tqdm(gen, total=len_generator, desc="Blocks")
+            )
 
         return [t for t in gen]
 
@@ -117,15 +130,18 @@ def __call__(
             A list of lists, where each inner list contains tensors returned from one
                 of the inner generators.
         """
-        outer_gen = nested_generators_of_tensors
+        outer_gen = cast(Iterator[Iterator[TensorType]], nested_generators_of_tensors)
 
         if len_outer_generator is not None:
-            outer_gen = tqdm(outer_gen, total=len_outer_generator, desc="Row blocks")
+            outer_gen = cast(
+                Iterator[Iterator[TensorType]],
+                tqdm(outer_gen, total=len_outer_generator, desc="Row blocks"),
+            )
 
         return [list(tensor_gen) for tensor_gen in outer_gen]
 
 
-class LazyChunkSequence:
+class LazyChunkSequence(Generic[TensorType]):
     """
     A class representing a chunked, and lazily evaluated array,
     where the chunking is restricted to the first dimension
@@ -202,10 +218,12 @@ def to_zarr(
         row_idx = 0
         z = None
 
-        gen = self.generator_factory()
+        gen = cast(Iterator[TensorType], self.generator_factory())
 
         if self.len_generator is not None:
-            gen = tqdm(gen, total=self.len_generator, desc="Blocks")
+            gen = cast(
+                Iterator[TensorType], tqdm(gen, total=self.len_generator, desc="Blocks")
+            )
 
         for block in gen:
             numpy_block = converter.to_numpy(block)
@@ -240,7 +258,7 @@ def _initialize_zarr_array(block: NDArray, path_or_url: str, overwrite: bool):
         )
 
 
-class NestedLazyChunkSequence:
+class NestedLazyChunkSequence(Generic[TensorType]):
     """
     A class representing chunked, and lazily evaluated array, where the chunking is
     restricted to the first two dimensions.
@@ -323,11 +341,14 @@ def to_zarr(
         row_idx = 0
         z = None
         numpy_block = None
-        block_generator = self.generator_factory()
+        block_generator = cast(Iterator[Iterator[TensorType]], self.generator_factory())
 
         if self.len_outer_generator is not None:
-            block_generator = tqdm(
-                block_generator, total=self.len_outer_generator, desc="Row blocks"
+            block_generator = cast(
+                Iterator[Iterator[TensorType]],
+                tqdm(
+                    block_generator, total=self.len_outer_generator, desc="Row blocks"
+                ),
             )
 
         for row_blocks in block_generator:
diff --git a/src/pydvl/influence/base_influence_function_model.py b/src/pydvl/influence/base_influence_function_model.py
index 73fe53d8f..541fbedf0 100644
--- a/src/pydvl/influence/base_influence_function_model.py
+++ b/src/pydvl/influence/base_influence_function_model.py
@@ -4,6 +4,8 @@
 from enum import Enum
 from typing import Collection, Generic, Iterable, Optional, Type, TypeVar
 
+__all__ = ["InfluenceMode"]
+
 
 class InfluenceMode(str, Enum):
     """
diff --git a/src/pydvl/influence/influence_calculator.py b/src/pydvl/influence/influence_calculator.py
index 1a40bdd5a..7c48e8636 100644
--- a/src/pydvl/influence/influence_calculator.py
+++ b/src/pydvl/influence/influence_calculator.py
@@ -7,7 +7,7 @@
 
 import logging
 from functools import partial
-from typing import Generator, Iterable, Optional, Tuple, Type, Union
+from typing import Generator, Iterable, Optional, Sized, Tuple, Type, Union, cast
 
 import distributed
 from dask import array as da
@@ -620,7 +620,7 @@ def influence_factors(
             A lazy data structure representing the chunks of the resulting tensor
         """
         try:
-            len_iterable = len(data_iterable)
+            len_iterable = len(cast(Sized, data_iterable))
         except Exception as e:
             logger.debug(f"Failed to retrieve len of data iterable: {e}")
             len_iterable = None
@@ -684,7 +684,7 @@ def influences(
         )
 
         try:
-            len_iterable = len(test_data_iterable)
+            len_iterable = len(cast(Sized, test_data_iterable))
         except Exception as e:
             logger.debug(f"Failed to retrieve len of test data iterable: {e}")
             len_iterable = None
@@ -751,7 +751,7 @@ def influences_from_factors(
         )
 
         try:
-            len_iterable = len(z_test_factors)
+            len_iterable = len(cast(Sized, z_test_factors))
         except Exception as e:
             logger.debug(f"Failed to retrieve len of factors iterable: {e}")
             len_iterable = None
diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py
index 46a5fa16e..fe3290195 100644
--- a/src/pydvl/influence/torch/influence_function_model.py
+++ b/src/pydvl/influence/torch/influence_function_model.py
@@ -41,6 +41,15 @@
     flatten_dimensions,
 )
 
+__all__ = [
+    "DirectInfluence",
+    "CgInfluence",
+    "LissaInfluence",
+    "ArnoldiInfluence",
+    "EkfacInfluence",
+    "NystroemSketchInfluence",
+]
+
 logger = logging.getLogger(__name__)
 
 
diff --git a/src/pydvl/influence/torch/util.py b/src/pydvl/influence/torch/util.py
index 58385df5b..581894af2 100644
--- a/src/pydvl/influence/torch/util.py
+++ b/src/pydvl/influence/torch/util.py
@@ -7,12 +7,14 @@
     Dict,
     Generator,
     Iterable,
+    Iterator,
     List,
     Mapping,
     Optional,
     Tuple,
     Type,
     Union,
+    cast,
 )
 
 import dask
@@ -37,6 +39,8 @@
     "TorchCatAggregator",
     "NestedTorchCatAggregator",
     "torch_dataset_to_dask_array",
+    "EkfacRepresentation",
+    "empirical_cross_entropy_loss_fn",
 ]
 
 
@@ -297,11 +301,11 @@ def _infer_data_len(d_set: Dataset):
                 return total_size
             else:
                 logger.warning(
-                    err_msg + f" Infer the number of samples from the dataset, "
-                    f"via iterating the dataset once. "
-                    f"This might induce severe overhead, so consider"
-                    f"providing total_size, if you know the number of samples "
-                    f"beforehand."
+                    err_msg + " Infer the number of samples from the dataset, "
+                    "via iterating the dataset once. "
+                    "This might induce severe overhead, so consider"
+                    "providing total_size, if you know the number of samples "
+                    "beforehand."
                 )
                 idx = 0
                 while True:
@@ -419,10 +423,12 @@ def __call__(
             A single tensor formed by concatenating all tensors from the generator.
                 The concatenation is performed along the default dimension (0).
         """
-        t_gen = tensor_generator
+        t_gen = cast(Iterator[torch.Tensor], tensor_generator)
 
         if len_generator is not None:
-            t_gen = tqdm(t_gen, total=len_generator, desc="Blocks")
+            t_gen = cast(
+                Iterator[torch.Tensor], tqdm(t_gen, total=len_generator, desc="Blocks")
+            )
 
         return torch.cat(list(t_gen))
 
@@ -459,10 +465,13 @@ def __call__(
 
         """
 
-        outer_gen = nested_generators_of_tensors
+        outer_gen = cast(Iterator[Iterator[torch.Tensor]], nested_generators_of_tensors)
 
         if len_outer_generator is not None:
-            outer_gen = tqdm(outer_gen, total=len_outer_generator, desc="Row blocks")
+            outer_gen = cast(
+                Iterator[Iterator[torch.Tensor]],
+                tqdm(outer_gen, total=len_outer_generator, desc="Row blocks"),
+            )
 
         return torch.cat(
             list(

From e4a29a61f595cb94d933c1d90016ed355c6c0f4c Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 22 Apr 2024 11:19:59 +0200
Subject: [PATCH 03/25] =?UTF-8?q?Bump=20version:=200.9.1=20=E2=86=92=200.9?=
 =?UTF-8?q?.2.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg      | 2 +-
 setup.py              | 2 +-
 src/pydvl/__init__.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index f27d47b90..e59d38df8 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.9.1
+current_version = 0.9.2.dev0
 commit = False
 tag = False
 allow_dirty = False
diff --git a/setup.py b/setup.py
index 805a93f22..a17918e49 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
     package_data={"pydvl": ["py.typed"]},
     packages=find_packages(where="src"),
     include_package_data=True,
-    version="0.9.1",
+    version="0.9.2.dev0",
     description="The Python Data Valuation Library",
     install_requires=[
         line
diff --git a/src/pydvl/__init__.py b/src/pydvl/__init__.py
index ba22fa08d..b90f353a0 100644
--- a/src/pydvl/__init__.py
+++ b/src/pydvl/__init__.py
@@ -7,4 +7,4 @@
 The two main modules you will want to look at are [value][pydvl.value] and
 [influence][pydvl.influence].
 """
-__version__ = "0.9.1"
+__version__ = "0.9.2.dev0"

From 5327cac0311637c62a28cf1d04cfefc403c0d4a9 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 22 Apr 2024 11:24:00 +0200
Subject: [PATCH 04/25] Update version number in CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0092f6045..52bc910a4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Changelog
 
-## Unreleased
+## 0.9.1 - Bug fixes, logging improvement
 
 ### Fixed
 

From 0c81e5355b53eafa241b033b321d13de9ecbcf2d Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Fri, 3 May 2024 00:28:57 +0200
Subject: [PATCH 05/25] Refactor aggregators call interface to take sequence
 objects instead of generators and optional int paramter, adapt docstrings

---
 src/pydvl/influence/array.py      | 62 +++++++++++++------------------
 src/pydvl/influence/torch/util.py | 41 ++++++++++----------
 2 files changed, 45 insertions(+), 58 deletions(-)

diff --git a/src/pydvl/influence/array.py b/src/pydvl/influence/array.py
index c60e11693..7e71050f9 100644
--- a/src/pydvl/influence/array.py
+++ b/src/pydvl/influence/array.py
@@ -6,6 +6,8 @@
 (chunked in one resp. two dimensions), with support for efficient storage and retrieval
 using the Zarr library.
 """
+from __future__ import annotations
+
 import logging
 from abc import ABC, abstractmethod
 from typing import (
@@ -47,11 +49,10 @@ class SequenceAggregator(Generic[TensorType], ABC):
     @abstractmethod
     def __call__(
         self,
-        tensor_generator: Generator[TensorType, None, None],
-        len_generator: Optional[int] = None,
+        tensor_sequence: LazyChunkSequence,
     ):
         """
-        Aggregates tensors from a generator.
+        Aggregates tensors from a sequence.
 
         Implement this method to define how a sequence of tensors, provided by a
         generator, should be combined.
@@ -61,28 +62,26 @@ def __call__(
 class ListAggregator(SequenceAggregator):
     def __call__(
         self,
-        tensor_generator: Generator[TensorType, None, None],
-        len_generator: Optional[int] = None,
+        tensor_sequence: LazyChunkSequence,
     ) -> List[TensorType]:
         """
         Aggregates tensors from a single-level generator into a list. This method simply
         collects each tensor emitted by the generator into a single list.
 
         Args:
-            tensor_generator: A generator that yields TensorType objects.
-            len_generator: if the number of elements from the generator is
-                known, this optional parameter can be used to improve logging
-                by adding a progressbar.
+            tensor_sequence: Object wrapping a generator that yields `TensorType`
+                objects.
 
         Returns:
             A list containing all the tensors provided by the tensor_generator.
         """
 
-        gen = cast(Iterator[TensorType], tensor_generator)
+        gen = cast(Iterator[TensorType], tensor_sequence.generator_factory())
 
-        if len_generator is not None:
+        if tensor_sequence.len_generator is not None:
             gen = cast(
-                Iterator[TensorType], tqdm(gen, total=len_generator, desc="Blocks")
+                Iterator[TensorType],
+                tqdm(gen, total=tensor_sequence.len_generator, desc="Blocks"),
             )
 
         return [t for t in gen]
@@ -90,15 +89,9 @@ def __call__(
 
 class NestedSequenceAggregator(Generic[TensorType], ABC):
     @abstractmethod
-    def __call__(
-        self,
-        nested_generators_of_tensors: Generator[
-            Generator[TensorType, None, None], None, None
-        ],
-        len_outer_generator: Optional[int] = None,
-    ):
+    def __call__(self, nested_sequence_of_tensors: NestedLazyChunkSequence):
         """
-        Aggregates tensors from a generator of generators.
+        Aggregates tensors from a nested sequence of tensors.
 
         Implement this method to specify how tensors, nested in two layers of
         generators, should be combined. Useful for complex data structures where tensors
@@ -109,10 +102,7 @@ def __call__(
 class NestedListAggregator(NestedSequenceAggregator):
     def __call__(
         self,
-        nested_generators_of_tensors: Generator[
-            Generator[TensorType, None, None], None, None
-        ],
-        len_outer_generator: Optional[int] = None,
+        nested_sequence_of_tensors: NestedLazyChunkSequence,
     ) -> List[List[TensorType]]:
         """
          Aggregates tensors from a nested generator structure into a list of lists.
@@ -120,22 +110,22 @@ def __call__(
          list structure.
 
          Args:
-             nested_generators_of_tensors: A generator of generators, where each inner
-                generator yields TensorType objects.
-             len_outer_generator: if the number of elements from the outer generator is
-                known from the context, this optional parameter can be used to improve
-                logging by adding a progressbar.
+             nested_sequence_of_tensors: Object wrapping a generator of generators,
+                where each inner generator yields TensorType objects.
 
         Returns:
             A list of lists, where each inner list contains tensors returned from one
                 of the inner generators.
         """
-        outer_gen = cast(Iterator[Iterator[TensorType]], nested_generators_of_tensors)
-
-        if len_outer_generator is not None:
+        outer_gen = cast(
+            Iterator[Iterator[TensorType]],
+            nested_sequence_of_tensors.generator_factory(),
+        )
+        len_outer_gen = nested_sequence_of_tensors.len_outer_generator
+        if len_outer_gen is not None:
             outer_gen = cast(
                 Iterator[Iterator[TensorType]],
-                tqdm(outer_gen, total=len_outer_generator, desc="Row blocks"),
+                tqdm(outer_gen, total=len_outer_gen, desc="Row blocks"),
             )
 
         return [list(tensor_gen) for tensor_gen in outer_gen]
@@ -186,7 +176,7 @@ def compute(self, aggregator: Optional[SequenceAggregator] = None):
         """
         if aggregator is None:
             aggregator = ListAggregator()
-        return aggregator(self.generator_factory(), len_generator=self.len_generator)
+        return aggregator(self)
 
     @log_duration(log_level=logging.INFO)
     def to_zarr(
@@ -306,9 +296,7 @@ def compute(self, aggregator: Optional[NestedSequenceAggregator] = None):
         """
         if aggregator is None:
             aggregator = NestedListAggregator()
-        return aggregator(
-            self.generator_factory(), len_outer_generator=self.len_outer_generator
-        )
+        return aggregator(self)
 
     @log_duration(log_level=logging.INFO)
     def to_zarr(
diff --git a/src/pydvl/influence/torch/util.py b/src/pydvl/influence/torch/util.py
index 581894af2..17813421b 100644
--- a/src/pydvl/influence/torch/util.py
+++ b/src/pydvl/influence/torch/util.py
@@ -25,7 +25,13 @@
 from torch.utils.data import Dataset
 from tqdm import tqdm
 
-from ..array import NestedSequenceAggregator, NumpyConverter, SequenceAggregator
+from ..array import (
+    LazyChunkSequence,
+    NestedLazyChunkSequence,
+    NestedSequenceAggregator,
+    NumpyConverter,
+    SequenceAggregator,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -405,8 +411,7 @@ class TorchCatAggregator(SequenceAggregator[torch.Tensor]):
 
     def __call__(
         self,
-        tensor_generator: Generator[torch.Tensor, None, None],
-        len_generator: Optional[int] = None,
+        tensor_sequence: LazyChunkSequence[torch.Tensor],
     ):
         """
         Aggregates tensors from a single-level generator into a single tensor by
@@ -414,17 +419,15 @@ def __call__(
         of tensors into one larger tensor.
 
         Args:
-            tensor_generator: A generator that yields `torch.Tensor` objects.
-            len_generator: if the number of elements from the generator is
-                known, this optional parameter can be used to improve logging
-                by adding a progressbar.
+            tensor_sequence: Object wrapping a generator that yields `torch.Tensor`
+                objects.
 
         Returns:
             A single tensor formed by concatenating all tensors from the generator.
                 The concatenation is performed along the default dimension (0).
         """
-        t_gen = cast(Iterator[torch.Tensor], tensor_generator)
-
+        t_gen = cast(Iterator[torch.Tensor], tensor_sequence.generator_factory())
+        len_generator = tensor_sequence.len_generator
         if len_generator is not None:
             t_gen = cast(
                 Iterator[torch.Tensor], tqdm(t_gen, total=len_generator, desc="Blocks")
@@ -440,11 +443,7 @@ class NestedTorchCatAggregator(NestedSequenceAggregator[torch.Tensor]):
     """
 
     def __call__(
-        self,
-        nested_generators_of_tensors: Generator[
-            Generator[torch.Tensor, None, None], None, None
-        ],
-        len_outer_generator: Optional[int] = None,
+        self, nested_sequence_of_tensors: NestedLazyChunkSequence[torch.Tensor]
     ):
         """
         Aggregates tensors from a nested generator structure into a single tensor by
@@ -453,11 +452,8 @@ def __call__(
         form the final tensor.
 
         Args:
-            nested_generators_of_tensors: A generator of generators, where each inner
-                generator yields `torch.Tensor` objects.
-            len_outer_generator: if the number of elements from the outer generator is
-                known from the context, this optional parameter can be used to improve
-                logging by adding a progressbar.
+            nested_sequence_of_tensors: Object wrapping a generator of generators,
+                where each inner generator yields `torch.Tensor` objects.
 
         Returns:
             A single tensor formed by concatenating all tensors from the nested
@@ -465,8 +461,11 @@ def __call__(
 
         """
 
-        outer_gen = cast(Iterator[Iterator[torch.Tensor]], nested_generators_of_tensors)
-
+        outer_gen = cast(
+            Iterator[Iterator[torch.Tensor]],
+            nested_sequence_of_tensors.generator_factory(),
+        )
+        len_outer_generator = nested_sequence_of_tensors.len_outer_generator
         if len_outer_generator is not None:
             outer_gen = cast(
                 Iterator[Iterator[torch.Tensor]],

From cafa32a117a76cb47d57c70c9460505c93f75d3d Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Fri, 3 May 2024 01:28:19 +0200
Subject: [PATCH 06/25] Fix missing move to model device for EkfacInfluence
 implementation

---
 .../influence/torch/influence_function_model.py     | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py
index 46a5fa16e..4a6cb638c 100644
--- a/src/pydvl/influence/torch/influence_function_model.py
+++ b/src/pydvl/influence/torch/influence_function_model.py
@@ -1195,7 +1195,7 @@ def _get_kfac_blocks(
             data, disable=not self.progress, desc="K-FAC blocks - batch progress"
         ):
             data_len += x.shape[0]
-            pred_y = self.model(x)
+            pred_y = self.model(x.to(self.model_device))
             loss = empirical_cross_entropy_loss_fn(pred_y)
             loss.backward()
 
@@ -1319,7 +1319,7 @@ def _update_diag(
             data, disable=not self.progress, desc="Update Diagonal - batch progress"
         ):
             data_len += x.shape[0]
-            pred_y = self.model(x)
+            pred_y = self.model(x.to(self.model_device))
             loss = empirical_cross_entropy_loss_fn(pred_y)
             loss.backward()
 
@@ -1526,7 +1526,10 @@ def influences_from_factors_by_layer(
             influences = {}
             for layer_id, layer_z_test in z_test_factors.items():
                 end_idx = start_idx + layer_z_test.shape[1]
-                influences[layer_id] = layer_z_test @ total_grad[:, start_idx:end_idx].T
+                influences[layer_id] = (
+                    layer_z_test.to(self.model_device)
+                    @ total_grad[:, start_idx:end_idx].T
+                )
                 start_idx = end_idx
             return influences
         elif mode == InfluenceMode.Perturbation:
@@ -1539,7 +1542,7 @@ def influences_from_factors_by_layer(
                 end_idx = start_idx + layer_z_test.shape[1]
                 influences[layer_id] = torch.einsum(
                     "ia,j...a->ij...",
-                    layer_z_test,
+                    layer_z_test.to(self.model_device),
                     total_mixed_grad[:, start_idx:end_idx],
                 )
                 start_idx = end_idx
@@ -1626,7 +1629,7 @@ def explore_hessian_regularization(
             being dictionaries containing the influences for each layer of the model,
             with the layer name as key.
         """
-        grad = self._loss_grad(x, y)
+        grad = self._loss_grad(x.to(self.model_device), y.to(self.model_device))
         influences_by_reg_value = {}
         for reg_value in regularization_values:
             reg_factors = self._solve_hvp_by_layer(

From a151422d31dff79624b5750a42d280b5ab7ee061 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Fri, 3 May 2024 01:35:01 +0200
Subject: [PATCH 07/25] Update CHANGELOG.md

---
 CHANGELOG.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 52bc910a4..abea5f5ac 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## Unreleased
+
+### Fixed
+
+- Fixed missing move of tensors to model device in `EkfacInfluence` 
+  implementation [PR #570](https://github.com/aai-institute/pyDVL/pull/570)
+
 ## 0.9.1 - Bug fixes, logging improvement
 
 ### Fixed

From 36ea3bada2221febe1b1cb75ff687ad122ce2fdb Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Fri, 3 May 2024 11:31:39 +0200
Subject: [PATCH 08/25] Add device move in influence_from_factors method in
 base class TorchInfluenceFunctionModel

---
 src/pydvl/influence/torch/influence_function_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py
index 4a6cb638c..08fd64a14 100644
--- a/src/pydvl/influence/torch/influence_function_model.py
+++ b/src/pydvl/influence/torch/influence_function_model.py
@@ -303,13 +303,13 @@ def influences_from_factors(
         """
         if mode == InfluenceMode.Up:
             return (
-                z_test_factors
+                z_test_factors.to(self.model_device)
                 @ self._loss_grad(x.to(self.model_device), y.to(self.model_device)).T
             )
         elif mode == InfluenceMode.Perturbation:
             return torch.einsum(
                 "ia,j...a->ij...",
-                z_test_factors,
+                z_test_factors.to(self.model_device),
                 self._flat_loss_mixed_grad(
                     x.to(self.model_device), y.to(self.model_device)
                 ),

From 919e73f17063aaaa00515a67ec3b3d17338f51bb Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Fri, 3 May 2024 12:16:53 +0200
Subject: [PATCH 09/25] Overwrite `to` method of `CgInfluence`, add `to` method
 to preconditoners, fix wrong device for indices array in block CG
 implementation

---
 .../influence/torch/influence_function_model.py |  9 ++++++++-
 src/pydvl/influence/torch/pre_conditioner.py    | 17 +++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py
index 46a5fa16e..b4ec964cc 100644
--- a/src/pydvl/influence/torch/influence_function_model.py
+++ b/src/pydvl/influence/torch/influence_function_model.py
@@ -706,7 +706,9 @@ def mat_mat(x: torch.Tensor):
         R = (rhs - mat_mat(X)).T
         Z = R if self.pre_conditioner is None else self.pre_conditioner.solve(R)
         P, _, _ = torch.linalg.svd(Z, full_matrices=False)
-        active_indices = torch.as_tensor(list(range(X.shape[-1])), dtype=torch.long)
+        active_indices = torch.as_tensor(
+            list(range(X.shape[-1])), dtype=torch.long, device=self.model_device
+        )
 
         maxiter = self.maxiter if self.maxiter is not None else len(rhs) * 10
         y_norm = torch.linalg.norm(rhs, dim=1)
@@ -758,6 +760,11 @@ def mat_mat(x: torch.Tensor):
 
         return X.T
 
+    def to(self, device: torch.device):
+        if self.pre_conditioner is not None:
+            self.pre_conditioner = self.pre_conditioner.to(device)
+        return super().to(device)
+
 
 class LissaInfluence(TorchInfluenceFunctionModel):
     r"""
diff --git a/src/pydvl/influence/torch/pre_conditioner.py b/src/pydvl/influence/torch/pre_conditioner.py
index 4497d81c2..f42852c2c 100644
--- a/src/pydvl/influence/torch/pre_conditioner.py
+++ b/src/pydvl/influence/torch/pre_conditioner.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
 from typing import Callable, Optional
 
@@ -70,6 +72,11 @@ def solve(self, rhs: torch.Tensor):
     def _solve(self, rhs: torch.Tensor):
         pass
 
+    @abstractmethod
+    def to(self, device: torch.device) -> PreConditioner:
+        """Implement this to move the (potentially fitted) preconditioner to a
+        specific device"""
+
 
 class JacobiPreConditioner(PreConditioner):
     r"""
@@ -141,6 +148,11 @@ def _solve(self, rhs: torch.Tensor):
 
         return rhs * inv_diag.unsqueeze(-1)
 
+    def to(self, device: torch.device) -> JacobiPreConditioner:
+        if self._diag is not None:
+            self._diag = self._diag.to(device)
+        return self
+
 
 class NystroemPreConditioner(PreConditioner):
     r"""
@@ -233,3 +245,8 @@ def _solve(self, rhs: torch.Tensor):
             result = result.squeeze()
 
         return result
+
+    def to(self, device: torch.device) -> NystroemPreConditioner:
+        if self._low_rank_approx is not None:
+            self._low_rank_approx = self._low_rank_approx.to(device)
+        return self

From 6375afe31d900698bb30ac42a69657d65fbdc31c Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Fri, 3 May 2024 12:24:26 +0200
Subject: [PATCH 10/25] Update CHANGELOG.md

---
 CHANGELOG.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 52bc910a4..e2d4bf923 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## Unreleased
+
+### Fixed
+
+- Missing move to device of `preconditioner` in `CgInfluence` implementation
+  [PR #572](https://github.com/aai-institute/pyDVL/pull/572)
+
 ## 0.9.1 - Bug fixes, logging improvement
 
 ### Fixed

From 18d4fb8ebd1acf0ab12a81bc5543e2619e313cc2 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Fri, 3 May 2024 13:25:22 +0200
Subject: [PATCH 11/25] Add functionality to set a device fixture depending on
 the availability of cuda and a user input (pytest --with-cuda)

---
 CONTRIBUTING.md                               |   7 +
 tests/conftest.py                             |   6 +
 tests/influence/torch/conftest.py             |  12 ++
 tests/influence/torch/test_influence_model.py | 143 ++++++++++++------
 4 files changed, 121 insertions(+), 47 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 56d8ead7b..ecd1288de 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -131,6 +131,13 @@ There are a few important arguments:
 - `--slow-tests` enables running slow tests. See below for a description
   of slow tests.
 
+- `--with-cuda` sets the device fixture in [tests/influence/torch/conftest.py](
+  tests/influence/torch/conftest.py) to `cuda` if it is available.
+  Using this fixture within tests, you can run parts of your tests on a `cuda` 
+  device. Be aware, that you still have to take care of the usage of the device
+  manually in a specific test. Setting this flag does not result in
+  running all tests on a GPU.
+
 ### Markers
 
 We use a few different markers to differentiate between tests and runs
diff --git a/tests/conftest.py b/tests/conftest.py
index b08f09377..d8594c314 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -48,6 +48,12 @@ def pytest_addoption(parser):
         default=False,
         help="Disable reporting. Verbose mode takes precedence.",
     )
+    parser.addoption(
+        "--with-cuda",
+        action="store_true",
+        default=False,
+        help="Set device fixture to 'cuda' if available",
+    )
 
 
 @pytest.fixture
diff --git a/tests/influence/torch/conftest.py b/tests/influence/torch/conftest.py
index b16a2d856..37459f1cc 100644
--- a/tests/influence/torch/conftest.py
+++ b/tests/influence/torch/conftest.py
@@ -1,5 +1,6 @@
 from typing import Tuple
 
+import pytest
 import torch
 from numpy.typing import NDArray
 from torch.optim import LBFGS
@@ -59,3 +60,14 @@ def closure():
 def torch_linear_model_to_numpy(model: torch.nn.Linear) -> Tuple[NDArray, NDArray]:
     model.eval()
     return model.weight.data.numpy(), model.bias.data.numpy()
+
+
+@pytest.fixture(scope="session")
+def device(request):
+    import torch
+
+    use_cuda = request.config.getoption("--with-cuda")
+    if use_cuda and torch.cuda.is_available():
+        return torch.device("cuda")
+    else:
+        return torch.device("cpu")
diff --git a/tests/influence/torch/test_influence_model.py b/tests/influence/torch/test_influence_model.py
index 0631c60fc..d2203a84e 100644
--- a/tests/influence/torch/test_influence_model.py
+++ b/tests/influence/torch/test_influence_model.py
@@ -340,6 +340,7 @@ def test_influence_linear_model(
     rtol,
     mode: InfluenceMode,
     train_set_size: int,
+    device: torch.device,
     hessian_reg: float = 0.1,
     test_set_size: int = 20,
     problem_dimension: Tuple[int, int] = (4, 20),
@@ -373,16 +374,20 @@ def test_influence_linear_model(
 
     train_data_set = TensorDataset(*list(map(torch.from_numpy, train_data)))
     train_data_loader = DataLoader(train_data_set, batch_size=40, num_workers=0)
-    influence = influence_factory(linear_layer, loss, train_data_loader, hessian_reg)
+    influence = influence_factory(
+        linear_layer.to(device), loss, train_data_loader, hessian_reg
+    )
 
     x_train, y_train = tuple(map(torch.from_numpy, train_data))
     x_test, y_test = tuple(map(torch.from_numpy, test_data))
-    influence_values = influence.influences(
-        x_test, y_test, x_train, y_train, mode=mode
-    ).numpy()
-    sym_influence_values = influence.influences(
-        x_train, y_train, x_train, y_train, mode=mode
-    ).numpy()
+    influence_values = (
+        influence.influences(x_test, y_test, x_train, y_train, mode=mode).cpu().numpy()
+    )
+    sym_influence_values = (
+        influence.influences(x_train, y_train, x_train, y_train, mode=mode)
+        .cpu()
+        .numpy()
+    )
 
     with pytest.raises(ValueError):
         influence.influences(x_test, y_test, x=x_train, mode=mode)
@@ -431,6 +436,7 @@ def test_influences_lissa(
     ],
     direct_influences,
     influence_factory,
+    device,
 ):
     model, loss, x_train, y_train, x_test, y_test = model_and_data
 
@@ -438,11 +444,15 @@ def test_influences_lissa(
         TensorDataset(x_train, y_train), batch_size=test_case.batch_size
     )
     influence_model = influence_factory(
-        model, loss, train_dataloader, test_case.hessian_reg
+        model.to(device), loss, train_dataloader, test_case.hessian_reg
+    )
+    approx_influences = (
+        influence_model.influences(
+            x_test, y_test, x_train, y_train, mode=test_case.mode
+        )
+        .cpu()
+        .numpy()
     )
-    approx_influences = influence_model.influences(
-        x_test, y_test, x_train, y_train, mode=test_case.mode
-    ).numpy()
 
     assert not np.any(np.isnan(approx_influences))
 
@@ -497,9 +507,10 @@ def test_influences_low_rank(
     direct_sym_influences,
     direct_factors,
     influence_factory,
+    device: torch.device,
 ):
-    atol = 1e-8
-    rtol = 1e-5
+    atol = 1e-7
+    rtol = 1e-4
     model, loss, x_train, y_train, x_test, y_test = model_and_data
 
     num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
@@ -509,7 +520,7 @@ def test_influences_low_rank(
     )
 
     influence_func_model = influence_factory(
-        model,
+        model.to(device),
         loss,
         test_case.hessian_reg,
         num_parameters - 1,
@@ -525,33 +536,47 @@ def test_influences_low_rank(
 
     influence_func_model = influence_func_model.fit(train_dataloader)
 
-    low_rank_influence = influence_func_model.influences(
-        x_test, y_test, x_train, y_train, mode=test_case.mode
-    ).numpy()
+    low_rank_influence = (
+        influence_func_model.influences(
+            x_test, y_test, x_train, y_train, mode=test_case.mode
+        )
+        .cpu()
+        .numpy()
+    )
 
-    sym_low_rank_influence = influence_func_model.influences(
-        x_train, y_train, mode=test_case.mode
-    ).numpy()
+    sym_low_rank_influence = (
+        influence_func_model.influences(x_train, y_train, mode=test_case.mode)
+        .cpu()
+        .numpy()
+    )
 
     low_rank_factors = influence_func_model.influence_factors(x_test, y_test)
     assert np.allclose(
         direct_factors,
-        influence_func_model.influence_factors(x_train, y_train).numpy(),
+        influence_func_model.influence_factors(x_train, y_train).cpu().numpy(),
         atol=atol,
         rtol=rtol,
     )
 
     if test_case.mode is InfluenceMode.Up:
-        low_rank_influence_transpose = influence_func_model.influences(
-            x_train, y_train, x_test, y_test, mode=test_case.mode
-        ).numpy()
+        low_rank_influence_transpose = (
+            influence_func_model.influences(
+                x_train, y_train, x_test, y_test, mode=test_case.mode
+            )
+            .cpu()
+            .numpy()
+        )
         assert np.allclose(
             low_rank_influence_transpose, low_rank_influence.swapaxes(0, 1)
         )
 
-    low_rank_values_from_factors = influence_func_model.influences_from_factors(
-        low_rank_factors, x_train, y_train, mode=test_case.mode
-    ).numpy()
+    low_rank_values_from_factors = (
+        influence_func_model.influences_from_factors(
+            low_rank_factors, x_train, y_train, mode=test_case.mode
+        )
+        .cpu()
+        .numpy()
+    )
     assert np.allclose(direct_influences, low_rank_influence, atol=atol, rtol=rtol)
     assert np.allclose(
         direct_sym_influences, sym_low_rank_influence, atol=atol, rtol=rtol
@@ -578,6 +603,7 @@ def test_influences_ekfac(
     ],
     direct_influences,
     direct_sym_influences,
+    device: torch.device,
 ):
     model, loss, x_train, y_train, x_test, y_test = model_and_data
 
@@ -589,7 +615,7 @@ def test_influences_ekfac(
         model,
         update_diagonal=True,
         hessian_regularization=test_case.hessian_reg,
-    )
+    ).to(device)
 
     with pytest.raises(NotFittedException):
         ekfac_influence.influences(
@@ -604,9 +630,13 @@ def test_influences_ekfac(
             ekfac_influence.fit(train_dataloader)
     elif isinstance(loss, nn.CrossEntropyLoss):
         ekfac_influence = ekfac_influence.fit(train_dataloader)
-        ekfac_influence_values = ekfac_influence.influences(
-            x_test, y_test, x_train, y_train, mode=test_case.mode
-        ).numpy()
+        ekfac_influence_values = (
+            ekfac_influence.influences(
+                x_test, y_test, x_train, y_train, mode=test_case.mode
+            )
+            .cpu()
+            .numpy()
+        )
 
         ekfac_influences_by_layer = ekfac_influence.influences_by_layer(
             x_test, y_test, x_train, y_train, mode=test_case.mode
@@ -614,22 +644,32 @@ def test_influences_ekfac(
 
         accumulated_inf_by_layer = np.zeros_like(ekfac_influence_values)
         for layer, infl in ekfac_influences_by_layer.items():
-            accumulated_inf_by_layer += infl.detach().numpy()
+            accumulated_inf_by_layer += infl.detach().cpu().numpy()
 
-        ekfac_self_influence = ekfac_influence.influences(
-            x_train, y_train, mode=test_case.mode
-        ).numpy()
+        ekfac_self_influence = (
+            ekfac_influence.influences(x_train, y_train, mode=test_case.mode)
+            .cpu()
+            .numpy()
+        )
 
         ekfac_factors = ekfac_influence.influence_factors(x_test, y_test)
 
-        influence_from_factors = ekfac_influence.influences_from_factors(
-            ekfac_factors, x_train, y_train, mode=test_case.mode
-        ).numpy()
+        influence_from_factors = (
+            ekfac_influence.influences_from_factors(
+                ekfac_factors, x_train, y_train, mode=test_case.mode
+            )
+            .cpu()
+            .numpy()
+        )
 
         assert np.allclose(ekfac_influence_values, influence_from_factors)
         assert np.allclose(ekfac_influence_values, accumulated_inf_by_layer)
-        check_influence_correlations(direct_influences, ekfac_influence_values)
-        check_influence_correlations(direct_sym_influences, ekfac_self_influence)
+        check_influence_correlations(
+            direct_influences, ekfac_influence_values, threshold=0.94
+        )
+        check_influence_correlations(
+            direct_sym_influences, ekfac_self_influence, threshold=0.94
+        )
 
 
 @pytest.mark.torch
@@ -656,6 +696,7 @@ def test_influences_cg(
     direct_factors,
     use_block_cg: bool,
     pre_conditioner: PreConditioner,
+    device: torch.device,
 ):
     model, loss, x_train, y_train, x_test, y_test = model_and_data
 
@@ -663,7 +704,7 @@ def test_influences_cg(
         TensorDataset(x_train, y_train), batch_size=test_case.batch_size
     )
     influence_model = CgInfluence(
-        model,
+        model.to(device),
         loss,
         test_case.hessian_reg,
         maxiter=5,
@@ -672,9 +713,13 @@ def test_influences_cg(
     )
     influence_model = influence_model.fit(train_dataloader)
 
-    approx_influences = influence_model.influences(
-        x_test, y_test, x_train, y_train, mode=test_case.mode
-    ).numpy()
+    approx_influences = (
+        influence_model.influences(
+            x_test, y_test, x_train, y_train, mode=test_case.mode
+        )
+        .cpu()
+        .numpy()
+    )
 
     assert not np.any(np.isnan(approx_influences))
 
@@ -701,7 +746,11 @@ def test_influences_cg(
     # check that block variant returns the correct vector, if only one right hand side
     # is provided
     if use_block_cg:
-        single_influence = influence_model.influence_factors(
-            x_train[0].unsqueeze(0), y_train[0].unsqueeze(0)
-        ).numpy()
+        single_influence = (
+            influence_model.influence_factors(
+                x_train[0].unsqueeze(0), y_train[0].unsqueeze(0)
+            )
+            .cpu()
+            .numpy()
+        )
         assert np.allclose(single_influence, direct_factors[0], atol=1e-6, rtol=1e-4)

From 4cf4ac2a8cf4a8a86dbc3c5caa9a25b59c775faa Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Fri, 3 May 2024 13:32:45 +0200
Subject: [PATCH 12/25] Update CHANGELOG.md

---
 CHANGELOG.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 52bc910a4..a0e27a8d3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## Unreleased
+
+### Added
+
+- Add a device fixture for `pytest`, which depending on the availability and 
+  user input (`pytest --with-cuda`) resolves to cuda device
+  [PR #574](https://github.com/aai-institute/pyDVL/pull/574)
+
 ## 0.9.1 - Bug fixes, logging improvement
 
 ### Fixed

From b267fcf2349e3c3890c2752db778d60d6ef63a90 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 6 May 2024 13:45:57 +0200
Subject: [PATCH 13/25] In the log_duration decorator, do not instantiate a new
 logger, but use the logger of the module pydvl.utils.progress

---
 src/pydvl/utils/progress.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/pydvl/utils/progress.py b/src/pydvl/utils/progress.py
index 9ad931938..c66db8f44 100644
--- a/src/pydvl/utils/progress.py
+++ b/src/pydvl/utils/progress.py
@@ -49,13 +49,11 @@ def decorator_log_duration(func):
         @wraps(func)
         def wrapper_log_duration(*args, **kwargs):
             func_name = func.__qualname__
-            duration_logger = logging.getLogger(func_name)
-            duration_logger.setLevel(log_level)
-            duration_logger.log(log_level, f"Function '{func_name}' is starting.")
+            logger.log(log_level, f"Function '{func_name}' is starting.")
             start_time = time()
             result = func(*args, **kwargs)
             duration = time() - start_time
-            duration_logger.log(
+            logger.log(
                 log_level,
                 f"Function '{func_name}' completed. " f"Duration: {duration:.2f} sec",
             )

From 8cf4f24341b0198cf1cd9daedb765b81226ac706 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 6 May 2024 13:48:28 +0200
Subject: [PATCH 14/25] Improve logging for LissaInfluence, add parameter
 `warn_on_max_iteration`

---
 .../torch/influence_function_model.py         | 32 ++++++++++++++-----
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py
index 3e55cf8b3..a9b0f47db 100644
--- a/src/pydvl/influence/torch/influence_function_model.py
+++ b/src/pydvl/influence/torch/influence_function_model.py
@@ -802,6 +802,9 @@ class LissaInfluence(TorchInfluenceFunctionModel):
         h0: Initial guess for hvp.
         rtol: tolerance to use for early stopping
         progress: If True, display progress bars.
+        warn_on_max_iteration: If True, logs a warning, if the desired tolerance is not
+            achieved within `maxiter` iterations. If False, the log level for this
+            information is `logging.DEBUG`
     """
 
     def __init__(
@@ -815,8 +818,10 @@ def __init__(
         h0: Optional[torch.Tensor] = None,
         rtol: float = 1e-4,
         progress: bool = False,
+        warn_on_max_iteration: bool = True,
     ):
         super().__init__(model, loss)
+        self.warn_on_max_iteration = warn_on_max_iteration
         self.maxiter = maxiter
         self.hessian_regularization = hessian_regularization
         self.progress = progress
@@ -871,7 +876,9 @@ def lissa_step(
             create_batch_hvp_function(self.model, self.loss),
             in_dims=(None, None, None, 0),
         )
-        for _ in tqdm(range(self.maxiter), disable=not self.progress, desc="Lissa"):
+        for k in tqdm(
+            range(self.maxiter), disable=not self.progress, desc="Lissa iteration"
+        ):
             x, y = next(iter(shuffled_training_data))
             x = x.to(self.model_device)
             y = y.to(self.model_device)
@@ -884,14 +891,23 @@ def lissa_step(
                 raise RuntimeError("NaNs in h_estimate. Increase scale or dampening.")
             max_residual = torch.max(torch.abs(residual / h_estimate))
             if max_residual < self.rtol:
+                mean_residual = torch.mean(torch.abs(residual / h_estimate))
+                logger.debug(
+                    f"Terminated Lissa after {k} iterations with "
+                    f"{max_residual*100:.2f} % max residual and"
+                    f" mean residual {mean_residual*100:.5f} %"
+                )
                 break
-
-        mean_residual = torch.mean(torch.abs(residual / h_estimate))
-
-        logger.info(
-            f"Terminated Lissa with {max_residual*100:.2f} % max residual."
-            f" Mean residual: {mean_residual*100:.5f} %"
-        )
+        else:
+            mean_residual = torch.mean(torch.abs(residual / h_estimate))
+            log_level = logging.WARNING if self.warn_on_max_iteration else logging.DEBUG
+            logger.log(
+                log_level,
+                f"Reached max number of iterations {self.maxiter} without "
+                f"achieving the desired tolerance {self.rtol}.\n "
+                f"Achieved max residual {max_residual*100:.2f} % and"
+                f" {mean_residual*100:.5f} % mean residual",
+            )
         return h_estimate / self.scale
 
 

From 4af80a61c193f577aade86dd230684f687b208ab Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 6 May 2024 13:49:44 +0200
Subject: [PATCH 15/25] Improve logging for CgInfluence, add parameter
 warn_on_max_iteration

---
 .../torch/influence_function_model.py         | 34 ++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py
index a9b0f47db..613359cd0 100644
--- a/src/pydvl/influence/torch/influence_function_model.py
+++ b/src/pydvl/influence/torch/influence_function_model.py
@@ -464,7 +464,8 @@ class CgInfluence(TorchInfluenceFunctionModel):
         rtol: Maximum relative tolerance of result.
         atol: Absolute tolerance of result.
         maxiter: Maximum number of iterations. If None, defaults to 10*len(b).
-        progress: If True, display progress bars.
+        progress: If True, display progress bars for computing in the non-block mode
+            (use_block_cg=False).
         precompute_grad: If True, the full data gradient is precomputed and kept
             in memory, which can speed up the hessian vector product computation.
             Set this to False, if you can't afford to keep the full computation graph
@@ -473,6 +474,9 @@ class CgInfluence(TorchInfluenceFunctionModel):
             gradient method
         use_block_cg: If True, use block variant of conjugate gradient method, which
             solves several right hand sides simultaneously
+        warn_on_max_iteration: If True, logs a warning, if the desired tolerance is not
+            achieved within `maxiter` iterations. If False, the log level for this
+            information is `logging.DEBUG`
 
     """
 
@@ -489,8 +493,10 @@ def __init__(
         precompute_grad: bool = False,
         pre_conditioner: Optional[PreConditioner] = None,
         use_block_cg: bool = False,
+        warn_on_max_iteration: bool = True,
     ):
         super().__init__(model, loss)
+        self.warn_on_max_iteration = warn_on_max_iteration
         self.use_block_cg = use_block_cg
         self.pre_conditioner = pre_conditioner
         self.precompute_grad = precompute_grad
@@ -661,6 +667,7 @@ def _solve_pcg(
 
         for k in range(maxiter):
             if torch.norm(r0) < tol:
+                logger.debug(f"Terminated cg after {k} iterations with residuum={r0}")
                 break
             Ap = hvp(p)
             alpha = torch.dot(r0, z0) / torch.dot(p, Ap)
@@ -677,6 +684,16 @@ def _solve_pcg(
             r0 = r
             p = z + beta * p
             z0 = z
+        else:
+            log_level = logging.WARNING if self.warn_on_max_iteration else logging.DEBUG
+            logger.log(
+                log_level,
+                f"Reached max number of iterations {maxiter=} without "
+                f"achieving the desired tolerance {tol}. \n"
+                f"Achieved residuum is {torch.norm(r0)}.\n"
+                f"Consider increasing 'maxiter', the desired tolerance or the "
+                f"parameter 'hessian_regularization'.",
+            )
 
         return x
 
@@ -745,6 +762,10 @@ def mat_mat(x: torch.Tensor):
                 non_finished_indices = non_finished_indices.unsqueeze(-1)
 
             if num_remaining_indices == 0:
+                logger.debug(
+                    f"Terminated block cg after {k} iterations with max "
+                    f"residuum={B.max()}"
+                )
                 break
 
             # Reduce problem size by removing finished columns from the iteration
@@ -766,6 +787,17 @@ def mat_mat(x: torch.Tensor):
             # Orthogonalization search directions to stabilize the action of
             # (P^tAP)^{-1}
             P, _, _ = torch.linalg.svd(Z_tmp, full_matrices=False)
+        else:
+            log_level = logging.WARNING if self.warn_on_max_iteration else logging.DEBUG
+            logger.log(
+                log_level,
+                f"Reached max number of iterations {maxiter=} of block cg "
+                f"without achieving the desired tolerance {tol.min()}. \n"
+                f"Achieved max residuum is "
+                f"{torch.linalg.norm(R, dim=0).max()}.\n"
+                f"Consider increasing 'maxiter', the desired tolerance or "
+                f"the parameter 'hessian_regularization'.",
+            )
 
         return X.T
 

From 590e1f89139ec890a70f85fe36cb1213b418fd4e Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 6 May 2024 14:02:32 +0200
Subject: [PATCH 16/25] Update CHANGELOG.md

---
 CHANGELOG.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1b23a36eb..540bc5de5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,17 +4,30 @@
 
 ### Added
 
+- Add progress bars to the computation of `LazyChunkSequence` and
+  `NestedLazyChunkSequence` 
+  [PR #567](https://github.com/aai-institute/pyDVL/pull/567)
 - Add a device fixture for `pytest`, which depending on the availability and 
   user input (`pytest --with-cuda`) resolves to cuda device
   [PR #574](https://github.com/aai-institute/pyDVL/pull/574)
 
 ### Fixed
 
+- Fixed logging issue in decorator `log_duration`
+  [PR #567](https://github.com/aai-institute/pyDVL/pull/567)
 - Fixed missing move of tensors to model device in `EkfacInfluence` 
   implementation [PR #570](https://github.com/aai-institute/pyDVL/pull/570)
 - Missing move to device of `preconditioner` in `CgInfluence` implementation
   [PR #572](https://github.com/aai-institute/pyDVL/pull/572)
 
+### Changed
+
+- Changed logging behavior of iterative methods `LissaInfluence` and
+  `CgInfluence` to warn on not achieving desired tolerance within `maxiter`,
+  add parameter `warn_on_max_iteration` to set the level for this information
+  to `logging.DEBUG`
+  [PR #567](https://github.com/aai-institute/pyDVL/pull/567)
+
 ## 0.9.1 - Bug fixes, logging improvement
 
 ### Fixed

From bf4d8b74a35f93071600bb1023039232afbc355e Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 6 May 2024 16:39:39 +0200
Subject: [PATCH 17/25] Add decorator function to use for raising specific
 exceptions based on a specified exception type to catch

---
 src/pydvl/utils/exceptions.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 src/pydvl/utils/exceptions.py

diff --git a/src/pydvl/utils/exceptions.py b/src/pydvl/utils/exceptions.py
new file mode 100644
index 000000000..aeecbafe4
--- /dev/null
+++ b/src/pydvl/utils/exceptions.py
@@ -0,0 +1,34 @@
+from functools import wraps
+from typing import TypeVar, Type, Callable
+
+CatchExceptionType = TypeVar("CatchExceptionType", bound=BaseException)
+RaiseExceptionType = TypeVar("RaiseExceptionType", bound=BaseException)
+
+
+def catch_and_raise_exception(
+    catch_exception_type: Type[CatchExceptionType],
+    raise_exception_factory: Callable[[CatchExceptionType], RaiseExceptionType],
+) -> Callable:
+    """
+    A decorator that catches exceptions of a specified exception type and raises
+    another specified exception.
+
+    Args:
+        catch_exception_type: The type of the exception to catch.
+        raise_exception_factory: A factory function that creates a new exception.
+
+    Returns:
+        A decorator function that wraps the target function.
+    """
+
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            try:
+                return func(*args, **kwargs)
+            except catch_exception_type as e:
+                raise raise_exception_factory(e) from e
+
+        return wrapper
+
+    return decorator

From 739c2c43f4dcdbbc2bc17972523a688c420ce683 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 6 May 2024 16:40:56 +0200
Subject: [PATCH 18/25] Add safe_torch_linalg_eigh function to
 influence.torch.util to catch torch RunTimeError of torch.linalg.eigh and
 raise a specific exception pointing to a known torch issue, add tests

---
 .../torch/influence_function_model.py         |  5 ++-
 src/pydvl/influence/torch/util.py             | 44 +++++++++++++++++++
 tests/conftest.py                             |  5 +++
 tests/influence/torch/test_util.py            | 21 +++++++++
 4 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py
index a41844482..7d9f75df4 100644
--- a/src/pydvl/influence/torch/influence_function_model.py
+++ b/src/pydvl/influence/torch/influence_function_model.py
@@ -39,6 +39,7 @@
     EkfacRepresentation,
     empirical_cross_entropy_loss_fn,
     flatten_dimensions,
+    safe_torch_linalg_eigh,
 )
 
 logger = logging.getLogger(__name__)
@@ -1227,8 +1228,8 @@ def fit(self, data: DataLoader) -> EkfacInfluence:
         layers_evect_g = {}
         layers_diags = {}
         for key in self.active_layers.keys():
-            evals_a, evecs_a = torch.linalg.eigh(forward_x[key])
-            evals_g, evecs_g = torch.linalg.eigh(grad_y[key])
+            evals_a, evecs_a = safe_torch_linalg_eigh(forward_x[key])
+            evals_g, evecs_g = safe_torch_linalg_eigh(grad_y[key])
             layers_evecs_a[key] = evecs_a
             layers_evect_g[key] = evecs_g
             layers_diags[key] = torch.kron(evals_g.view(-1, 1), evals_a.view(-1, 1))
diff --git a/src/pydvl/influence/torch/util.py b/src/pydvl/influence/torch/util.py
index 079acf9c9..dc2fc9479 100644
--- a/src/pydvl/influence/torch/util.py
+++ b/src/pydvl/influence/torch/util.py
@@ -22,6 +22,7 @@
 from numpy.typing import NDArray
 from torch.utils.data import Dataset
 
+from ...utils.exceptions import catch_and_raise_exception
 from ..array import NestedSequenceAggregator, NumpyConverter, SequenceAggregator
 
 logger = logging.getLogger(__name__)
@@ -521,3 +522,46 @@ def empirical_cross_entropy_loss_fn(
         torch.isfinite(log_probs_), log_probs_, torch.zeros_like(log_probs_)
     )
     return torch.sum(log_probs_ * probs_.detach() ** 0.5)
+
+
+@catch_and_raise_exception(RuntimeError, lambda e: TorchLinalgEighException(e))
+def safe_torch_linalg_eigh(*args, **kwargs):
+    """
+    A wrapper around `torch.linalg.eigh` that safely handles potential runtime errors
+    by raising a custom `TorchLinalgEighException` with more context,
+    especially related to the issues reported in
+    https://github.com/pytorch/pytorch/issues/92141.
+
+    Args:
+        *args: Positional arguments passed to `torch.linalg.eigh`.
+        **kwargs: Keyword arguments passed to `torch.linalg.eigh`.
+
+    Returns:
+        The result of calling `torch.linalg.eigh` with the provided arguments.
+
+    Raises:
+        TorchLinalgEighException: If a `RuntimeError` occurs during the execution of
+            `torch.linalg.eigh`.
+    """
+    return torch.linalg.eigh(*args, **kwargs)
+
+
+class TorchLinalgEighException(Exception):
+    """
+    Exception to wrap a RunTimeError raised by torch.linalg.eigh, when used
+    with large matrices, see https://github.com/pytorch/pytorch/issues/92141
+    """
+
+    def __init__(self, original_exception: RuntimeError):
+        func = torch.linalg.eigh
+        err_msg = (
+            f"A RunTimeError occurred in '{func.__module__}.{func.__qualname__}'. "
+            "This might be related to known issues with "
+            "[torch.linalg.eigh][torch.linalg.eigh] on certain matrix sizes.\n "
+            "For more details, refer to "
+            "https://github.com/pytorch/pytorch/issues/92141. \n"
+            "In this case, consider to use a different implementation, which does not "
+            "depend on the usage of [torch.linalg.eigh][torch.linalg.eigh].\n"
+            f" Inspect the original exception message: \n{str(original_exception)}"
+        )
+        super().__init__(err_msg)
diff --git a/tests/conftest.py b/tests/conftest.py
index d8594c314..3c25c2823 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +1,6 @@
 import logging
 import os
+import platform
 from dataclasses import asdict
 from typing import TYPE_CHECKING, Optional, Tuple
 
@@ -264,3 +265,7 @@ def pytest_terminal_summary(
 ):
     tolerate_session = terminalreporter.config._tolerate_session
     tolerate_session.display(terminalreporter)
+
+
+def is_osx_arm64():
+    return platform.system() == "Darwin" and platform.machine() == "arm64"
diff --git a/tests/influence/torch/test_util.py b/tests/influence/torch/test_util.py
index 6e675b18f..c63a34253 100644
--- a/tests/influence/torch/test_util.py
+++ b/tests/influence/torch/test_util.py
@@ -17,11 +17,14 @@
     lanzcos_low_rank_hessian_approx,
 )
 from pydvl.influence.torch.util import (
+    TorchLinalgEighException,
     TorchTensorContainerType,
     align_structure,
     flatten_dimensions,
+    safe_torch_linalg_eigh,
     torch_dataset_to_dask_array,
 )
+from tests.conftest import is_osx_arm64
 from tests.influence.conftest import linear_hessian_analytical, linear_model
 
 
@@ -297,3 +300,21 @@ def are_active_layers_linear(model):
                 if any(param_requires_grad):
                     return False
     return True
+
+
+@pytest.mark.torch
+def test_safe_torch_linalg_eigh():
+    t = torch.randn([10, 10])
+    t = t @ t.t()
+    safe_eigs, safe_eigvec = safe_torch_linalg_eigh(t)
+    eigs, eigvec = torch.linalg.eigh(t)
+    assert torch.allclose(safe_eigs, eigs)
+    assert torch.allclose(safe_eigvec, eigvec)
+
+
+@pytest.mark.torch
+@pytest.mark.slow
+@pytest.mark.skipif(not is_osx_arm64(), reason="Requires macOS ARM64.")
+def test_safe_torch_linalg_eigh_exception():
+    with pytest.raises(TorchLinalgEighException):
+        safe_torch_linalg_eigh(torch.randn([53000, 53000]))

From 99b652160f9774f4198b1f0036e99e8953b68573 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 6 May 2024 17:23:23 +0200
Subject: [PATCH 19/25] Add example usage to docstring of
 catch_and_raise_exception

---
 src/pydvl/utils/exceptions.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/src/pydvl/utils/exceptions.py b/src/pydvl/utils/exceptions.py
index aeecbafe4..1c57e0ba3 100644
--- a/src/pydvl/utils/exceptions.py
+++ b/src/pydvl/utils/exceptions.py
@@ -1,5 +1,5 @@
 from functools import wraps
-from typing import TypeVar, Type, Callable
+from typing import Callable, Type, TypeVar
 
 CatchExceptionType = TypeVar("CatchExceptionType", bound=BaseException)
 RaiseExceptionType = TypeVar("RaiseExceptionType", bound=BaseException)
@@ -19,6 +19,32 @@ def catch_and_raise_exception(
 
     Returns:
         A decorator function that wraps the target function.
+
+    ??? Example
+
+        ```python
+            @catch_and_raise_exception(RuntimeError, lambda e: TorchLinalgEighException(e))
+            def safe_torch_linalg_eigh(*args, **kwargs):
+                '''
+                A wrapper around `torch.linalg.eigh` that safely handles potential runtime errors
+                by raising a custom `TorchLinalgEighException` with more context,
+                especially related to the issues reported in
+                https://github.com/pytorch/pytorch/issues/92141.
+
+                Args:
+                *args: Positional arguments passed to `torch.linalg.eigh`.
+                **kwargs: Keyword arguments passed to `torch.linalg.eigh`.
+
+                Returns:
+                The result of calling `torch.linalg.eigh` with the provided arguments.
+
+                Raises:
+                TorchLinalgEighException: If a `RuntimeError` occurs during the execution of
+                `torch.linalg.eigh`.
+                '''
+                return torch.linalg.eigh(*args, **kwargs)
+
+        ```
     """
 
     def decorator(func):

From c3df95457d0b89be8593f158c98365589af44881 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 6 May 2024 17:33:52 +0200
Subject: [PATCH 20/25] Fix link in docstring

---
 src/pydvl/influence/torch/util.py |  7 ++++--
 src/pydvl/utils/exceptions.py     | 41 +++++++++++++++----------------
 2 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/src/pydvl/influence/torch/util.py b/src/pydvl/influence/torch/util.py
index dc2fc9479..cf5143a20 100644
--- a/src/pydvl/influence/torch/util.py
+++ b/src/pydvl/influence/torch/util.py
@@ -530,7 +530,8 @@ def safe_torch_linalg_eigh(*args, **kwargs):
     A wrapper around `torch.linalg.eigh` that safely handles potential runtime errors
     by raising a custom `TorchLinalgEighException` with more context,
     especially related to the issues reported in
-    https://github.com/pytorch/pytorch/issues/92141.
+    [https://github.com/pytorch/pytorch/issues/92141](
+    https://github.com/pytorch/pytorch/issues/92141).
 
     Args:
         *args: Positional arguments passed to `torch.linalg.eigh`.
@@ -549,7 +550,9 @@ def safe_torch_linalg_eigh(*args, **kwargs):
 class TorchLinalgEighException(Exception):
     """
     Exception to wrap a RunTimeError raised by torch.linalg.eigh, when used
-    with large matrices, see https://github.com/pytorch/pytorch/issues/92141
+    with large matrices,
+    see [https://github.com/pytorch/pytorch/issues/92141](
+    https://github.com/pytorch/pytorch/issues/92141)
     """
 
     def __init__(self, original_exception: RuntimeError):
diff --git a/src/pydvl/utils/exceptions.py b/src/pydvl/utils/exceptions.py
index 1c57e0ba3..f53e18194 100644
--- a/src/pydvl/utils/exceptions.py
+++ b/src/pydvl/utils/exceptions.py
@@ -23,27 +23,26 @@ def catch_and_raise_exception(
     ??? Example
 
         ```python
-            @catch_and_raise_exception(RuntimeError, lambda e: TorchLinalgEighException(e))
-            def safe_torch_linalg_eigh(*args, **kwargs):
-                '''
-                A wrapper around `torch.linalg.eigh` that safely handles potential runtime errors
-                by raising a custom `TorchLinalgEighException` with more context,
-                especially related to the issues reported in
-                https://github.com/pytorch/pytorch/issues/92141.
-
-                Args:
-                *args: Positional arguments passed to `torch.linalg.eigh`.
-                **kwargs: Keyword arguments passed to `torch.linalg.eigh`.
-
-                Returns:
-                The result of calling `torch.linalg.eigh` with the provided arguments.
-
-                Raises:
-                TorchLinalgEighException: If a `RuntimeError` occurs during the execution of
-                `torch.linalg.eigh`.
-                '''
-                return torch.linalg.eigh(*args, **kwargs)
-
+        @catch_and_raise_exception(RuntimeError, lambda e: TorchLinalgEighException(e))
+        def safe_torch_linalg_eigh(*args, **kwargs):
+            '''
+            A wrapper around `torch.linalg.eigh` that safely handles potential runtime errors
+            by raising a custom `TorchLinalgEighException` with more context,
+            especially related to the issues reported in
+            https://github.com/pytorch/pytorch/issues/92141.
+
+            Args:
+            *args: Positional arguments passed to `torch.linalg.eigh`.
+            **kwargs: Keyword arguments passed to `torch.linalg.eigh`.
+
+            Returns:
+            The result of calling `torch.linalg.eigh` with the provided arguments.
+
+            Raises:
+            TorchLinalgEighException: If a `RuntimeError` occurs during the execution of
+            `torch.linalg.eigh`.
+            '''
+            return torch.linalg.eigh(*args, **kwargs)
         ```
     """
 

From 81187657ae7b8a770c884ad1fd824dea9b1989dd Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 6 May 2024 17:37:31 +0200
Subject: [PATCH 21/25] Update CHANGELOG.md

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1b23a36eb..8cf6729df 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,10 @@
   implementation [PR #570](https://github.com/aai-institute/pyDVL/pull/570)
 - Missing move to device of `preconditioner` in `CgInfluence` implementation
   [PR #572](https://github.com/aai-institute/pyDVL/pull/572)
+- Raise a more specific error message, when a `RunTimeError` occurs in 
+  `torch.linalg.eigh`, so the user can check if it is related to a known
+  issue
+  [PR #578](https://github.com/aai-institute/pyDVL/pull/578)
 
 ## 0.9.1 - Bug fixes, logging improvement
 

From b36080a2ba62e9577f11c0c594eb6a4e4ffc6ea9 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 6 May 2024 19:36:52 +0200
Subject: [PATCH 22/25] Add a temporary fix to handle an edge case in the test
 test_classwise_scorer_accuracies_manual_derivation

---
 tests/value/shapley/test_classwise.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/value/shapley/test_classwise.py b/tests/value/shapley/test_classwise.py
index d73e86a0b..85f4b9f30 100644
--- a/tests/value/shapley/test_classwise.py
+++ b/tests/value/shapley/test_classwise.py
@@ -266,6 +266,14 @@ def test_classwise_scorer_accuracies_manual_derivation(
     for set_zero_idx in range(len(subsets_zero)):
         for set_one_idx in range(len(subsets_one)):
             indices = list(subsets_zero[set_zero_idx] + subsets_one[set_one_idx])
+
+            # TODO the powersets subsets_zero, subsets_one contain the empty set, having
+            #  this leads to an empty index set with the consequence of undefined
+            #  behavior (due to nan values). This is NOT the correct fix, this test
+            #  must be completely revised!
+            if len(indices) == 0:
+                continue
+
             (
                 x_train,
                 y_train,

From 84c42b68fb148583822f79557d89db9223fda1f2 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Tue, 7 May 2024 12:39:51 +0200
Subject: [PATCH 23/25] Update CHANGELOG.md

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 78747c709..c25de8f88 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,11 @@
   `torch.linalg.eigh`, so the user can check if it is related to a known
   issue
   [PR #578](https://github.com/aai-institute/pyDVL/pull/578)
+- Fix an edge case (empty train data) in the test 
+  `test_classwise_scorer_accuracies_manual_derivation`, which resulted
+  in undefined behavior (`np.nan` to `int` conversion with different results
+  depending on OS)
+  [PR #579](https://github.com/aai-institute/pyDVL/pull/579)
 
 ### Changed
 

From 76aa3565fb73cda5f99d2216ff0c9435d2e37bf8 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Tue, 7 May 2024 14:16:41 +0200
Subject: [PATCH 24/25] Change Unrealeased to version 0.9.2 in CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c25de8f88..076d4d927 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Changelog
 
-## Unreleased
+## 0.9.2 - 🏗  Bug fixes, logging improvement
 
 ### Added
 

From 36ffd53afea3ef48887d98ed8db7dfa4d9eff209 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Tue, 7 May 2024 15:09:56 +0200
Subject: [PATCH 25/25] =?UTF-8?q?Bump=20version:=200.9.2.dev0=20=E2=86=92?=
 =?UTF-8?q?=200.9.2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg      | 2 +-
 setup.py              | 2 +-
 src/pydvl/__init__.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index e59d38df8..202ce7495 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.9.2.dev0
+current_version = 0.9.2
 commit = False
 tag = False
 allow_dirty = False
diff --git a/setup.py b/setup.py
index a17918e49..1d5dd2e74 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
     package_data={"pydvl": ["py.typed"]},
     packages=find_packages(where="src"),
     include_package_data=True,
-    version="0.9.2.dev0",
+    version="0.9.2",
     description="The Python Data Valuation Library",
     install_requires=[
         line
diff --git a/src/pydvl/__init__.py b/src/pydvl/__init__.py
index b90f353a0..a311b1e9c 100644
--- a/src/pydvl/__init__.py
+++ b/src/pydvl/__init__.py
@@ -7,4 +7,4 @@
 The two main modules you will want to look at are [value][pydvl.value] and
 [influence][pydvl.influence].
 """
-__version__ = "0.9.2.dev0"
+__version__ = "0.9.2"