diff --git a/src/datasets/iterable_dataset.py b/src/datasets/iterable_dataset.py index 5f5c49f1556..57b98575c1f 100644 --- a/src/datasets/iterable_dataset.py +++ b/src/datasets/iterable_dataset.py @@ -556,12 +556,7 @@ def _init_state_dict(self) -> dict: def __iter__(self): ex_iterator = iter(self.ex_iterable) - while True: - batch = list(islice(ex_iterator, self.step)) - if len(batch) > self.offset: - yield batch[self.offset] - else: - break + return islice(ex_iterator, self.offset, None, self.step) def shuffle_data_sources(self, generator: np.random.Generator) -> "StepExamplesIterable": return StepExamplesIterable(