Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 54 additions & 17 deletions luxonis_ml/data/datasets/luxonis_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,30 @@
dfs = [pl.read_parquet(file) for file in path.glob("*.parquet")]
return pl.concat(dfs) if dfs else None

def _get_file_index(self) -> Optional[pl.DataFrame]:
@overload
def _get_file_index(
self, lazy: Literal[False] = ...
) -> Optional[pl.DataFrame]: ...

@overload
def _get_file_index(
self, lazy: Literal[True] = ...
) -> Optional[pl.LazyFrame]: ...

def _get_file_index(
self, lazy: bool = False
) -> Optional[Union[pl.DataFrame, pl.LazyFrame]]:
path = get_file(
self.fs, "metadata/file_index.parquet", self.media_path
)
if path is not None and path.exists():
return pl.read_parquet(path).select(
pl.all().exclude("^__index_level_.*$")
)
if not lazy:
df = pl.read_parquet(path)
else:
df = pl.scan_parquet(path)

return df.select(pl.all().exclude("^__index_level_.*$"))

return None

def _write_index(
Expand Down Expand Up @@ -438,7 +454,7 @@
uuid_dict[str(ann.path)] = uuid
ann.path = Path(uuid).with_suffix(ann.path.suffix)
else:
ann.path = ann.path.absolute()
ann.path = ann.path.absolute().resolve()
self.progress.stop()
self.progress.remove_task(task)
if self.is_remote:
Expand Down Expand Up @@ -496,7 +512,7 @@
new_index["uuid"].append(uuid)
new_index["file"].append(file)
new_index["original_filepath"].append(
str(filepath.absolute())
str(filepath.absolute().resolve())
)
processed_uuids.add(uuid)

Expand All @@ -514,7 +530,9 @@

batch_data: list[DatasetRecord] = []

classes_per_task: Dict[str, OrderedSet[str]] = defaultdict(OrderedSet)
classes_per_task: Dict[str, OrderedSet[str]] = defaultdict(
lambda: OrderedSet([])
)
num_kpts_per_task: Dict[str, int] = {}

annotations_path = get_dir(
Expand Down Expand Up @@ -584,36 +602,55 @@

def _warn_on_duplicates(self) -> None:
df = self._load_df_offline(lazy=True)
if df is None:
index_df = self._get_file_index(lazy=True)
if df is None or index_df is None:
return
df = df.join(index_df, on="uuid").drop("file_right")
# Warn on duplicate UUIDs
duplicates_paired = (
df.group_by("uuid")
.agg(pl.col("file").n_unique().alias("file_count"))
.filter(pl.col("file_count") > 1)
.join(df, on="uuid")
.select(["uuid", "file"])
.select("uuid", "file")
.unique()
.group_by("uuid")
.agg([pl.col("file").alias("files")])
.agg(pl.col("file").alias("files"))
.filter(pl.col("files").len() > 1)
.collect()
)
duplicates_paired_df = duplicates_paired.collect()
for uuid, files in duplicates_paired_df.iter_rows():
for uuid, files in duplicates_paired.iter_rows():
self.logger.warning(
f"UUID: {uuid} has multiple file names: {files}"
)

# Warn on duplicate annotations
duplicate_annotation = (
df.group_by(["file", "annotation"])
df.group_by(
"original_filepath",
"task",
"type",
"annotation",
"instance_id",
)
.agg(pl.len().alias("count"))
.filter(pl.col("count") > 1)
)
duplicate_annotation_df = duplicate_annotation.collect()
for file_name, annotation, _ in duplicate_annotation_df.iter_rows():
.filter(pl.col("annotation") != "{}")
.drop("instance_id")
).collect()

for (
file_name,
task,
type_,
annotation,
count,
) in duplicate_annotation.iter_rows():
if "RLE" in type_ or "Mask" in type_:
annotation = "<binary mask>"

Check warning on line 650 in luxonis_ml/data/datasets/luxonis_dataset.py

View check run for this annotation

Codecov / codecov/patch

luxonis_ml/data/datasets/luxonis_dataset.py#L649-L650

Added lines #L649 - L650 were not covered by tests
self.logger.warning(
f"File '{file_name}' has the same annotation '{annotation}' added multiple times."
f"File '{file_name}' has the same '{type_}' annotation "
f"'{annotation}' ({task=}) added {count} times."
)

def get_splits(self) -> Optional[Dict[str, List[str]]]:
Expand Down
2 changes: 1 addition & 1 deletion luxonis_ml/data/datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def find_filepath_uuid(
if index is None:
return None

abs_path = str(Path(filepath).absolute())
abs_path = str(Path(filepath).absolute().resolve())
matched = index.filter(pl.col("original_filepath") == abs_path)

if len(matched):
Expand Down
6 changes: 4 additions & 2 deletions luxonis_ml/data/loaders/luxonis_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __init__(
for view in self.view:
self.instances.extend(splits[view])

self.idx_to_df_row = []
self.idx_to_df_row: list[list[int]] = []
for uuid in self.instances:
boolean_mask = df["uuid"] == uuid
row_indexes = boolean_mask.arg_true().to_list()
Expand Down Expand Up @@ -139,7 +139,9 @@ def __getitem__(self, idx: int) -> LuxonisLoaderOutput:
else:
picked_indices = set()
max_val = len(self)
while len(picked_indices) < self.augmentations.aug_batch_size - 1:
while (
len(picked_indices) < self.augmentations.aug_batch_size - 1
):
rand_idx = random.randint(0, max_val - 1)
if rand_idx != idx and rand_idx not in picked_indices:
picked_indices.add(rand_idx)
Expand Down
2 changes: 1 addition & 1 deletion luxonis_ml/data/parsers/classification_directory_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def generator() -> DatasetIterator:
for class_name in class_names:
for img_path in (class_dir / class_name).iterdir():
yield {
"file": str(img_path.absolute()),
"file": str(img_path.absolute().resolve()),
"annotation": {
"type": "classification",
"class": class_name,
Expand Down
2 changes: 1 addition & 1 deletion luxonis_ml/data/parsers/coco_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def generator() -> DatasetIterator:
ann_dict[img_id].append(ann)

for img_id, img in img_dict.items():
path = image_dir.absolute() / img["file_name"]
path = image_dir.absolute().resolve() / img["file_name"]
if not path.exists():
continue
path = str(path)
Expand Down
2 changes: 1 addition & 1 deletion luxonis_ml/data/parsers/create_ml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def from_split(
class_names = set()
images_annotations = []
for annotations in annotations_data:
path = image_dir.absolute() / annotations["image"]
path = image_dir.absolute().resolve() / annotations["image"]
if not path.exists():
continue
file = str(path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def from_split(
def generator() -> DatasetIterator:
for mask_path in seg_dir.glob("*_mask.*"):
image_path = next(image_dir.glob(f"{mask_path.stem[:-5]}.*"))
file = str(image_path.absolute())
file = str(image_path.absolute().resolve())
mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)

ids = np.unique(mask)
Expand Down
4 changes: 3 additions & 1 deletion luxonis_ml/data/parsers/voc_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ def from_split(
annotation_data = ET.parse(anno_xml)
root = annotation_data.getroot()

path = image_dir.absolute() / self._xml_find(root, "filename")
path = image_dir.absolute().resolve() / self._xml_find(
root, "filename"
)
if not path.exists():
continue

Expand Down
2 changes: 1 addition & 1 deletion luxonis_ml/data/parsers/yolov4_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def generator() -> DatasetIterator:
data = ann_line.split(" ")
img_path = data[0]

path = image_dir.absolute() / img_path
path = image_dir.absolute().resolve() / img_path
if not path.exists():
continue

Expand Down
Loading