Skip to content

Commit

Permalink
Add some progress logs
Browse files Browse the repository at this point in the history
  • Loading branch information
stijn-uva committed Oct 23, 2024
1 parent aad7d57 commit 25f9ffd
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions processors/filtering/unique_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,15 @@ def process(self):
processed = 0
staging_area = self.dataset.get_staging_area()

self.dataset.update_progress("Processing images and looking for duplicates")
for image_file in self.iterate_archive_contents(self.source_file):
if self.interrupted:
raise ProcessorInterruptedException("Interrupted while filtering for unique images")

self.dataset.update_progress(processed / self.source_dataset.num_rows)
if processed % 100 == 0:
self.dataset.update_progress(f"Processed {processed:,} of {self.source_dataset.num_rows:,} images, "
f"found {dupes:,} duplicate(s)")
processed += 1

if image_file.name == ".metadata.json":
Expand Down

0 comments on commit 25f9ffd

Please sign in to comment.