From 25f9ffd464e07fcc19014173eec39af01adee76b Mon Sep 17 00:00:00 2001 From: Stijn Peeters Date: Wed, 23 Oct 2024 15:42:38 +0200 Subject: [PATCH] Add some progress logs --- processors/filtering/unique_images.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/processors/filtering/unique_images.py b/processors/filtering/unique_images.py index 819e4b9d..0970d1f8 100644 --- a/processors/filtering/unique_images.py +++ b/processors/filtering/unique_images.py @@ -104,11 +104,15 @@ def process(self): processed = 0 staging_area = self.dataset.get_staging_area() + self.dataset.update_progress("Processing images and looking for duplicates") for image_file in self.iterate_archive_contents(self.source_file): if self.interrupted: raise ProcessorInterruptedException("Interrupted while filtering for unique images") self.dataset.update_progress(processed / self.source_dataset.num_rows) + if processed % 100 == 0: + self.dataset.update_progress(f"Processed {processed:,} of {self.source_dataset.num_rows:,} images, " + f"found {dupes:,} duplicate(s)") processed += 1 if image_file.name == ".metadata.json":