Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 66 additions & 56 deletions luxonis_ml/data/parsers/yolov8_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,61 +94,61 @@ def fit_boundingbox(self, points: np.ndarray) -> dict[str, float]:
def _detect_dataset_dir_format(
dataset_dir: Path,
) -> tuple[Format | None, list[str]]:
"""Checks if dataset directory structure is in FiftyOne or
"""Checks if dataset directory structure is in Ultralytics or
Roboflow format."""
split_folders = ["train", "valid"] # test folder is optional
non_split_folders = ["images", "labels"]
roboflow_folders = ["train", "valid"] # test folder is optional
ultralytics_folders = ["images", "labels"]

existing = [d.name for d in dataset_dir.iterdir() if d.is_dir()]

if all(folder in existing for folder in split_folders):
if all(folder in existing for folder in roboflow_folders):
return Format.ROBOFLOW, existing
if all(folder in existing for folder in non_split_folders):
if all(folder in existing for folder in ultralytics_folders):
return Format.ULTRALYTICS, existing
return None, []

@staticmethod
def validate_split(
split_path: Path, dir_format: Format
) -> dict[str, Any] | None:
if dir_format is Format.ROBOFLOW:
images_path = split_path / "images"
label_path = split_path / "labels"
elif dir_format is Format.ULTRALYTICS:
images_path = split_path.parent.parent / "images" / split_path.name
label_path = split_path.parent.parent / "labels" / split_path.name
else:
def validate_split(split_path: Path) -> dict[str, Any] | None:
if not split_path.exists():
return None

if not images_path.exists():
return None
if not label_path.exists():
candidates = [
(
split_path / "images",
split_path / "labels",
split_path.parent,
), # ROBOFLOW
(
split_path.parent.parent / "images" / split_path.name,
split_path.parent.parent / "labels" / split_path.name,
split_path.parent.parent,
), # ULTRALYTICS
]

images_path = labels_path = yaml_root = None
for img_dir, lbl_dir, yroot in candidates:
if img_dir.exists() and lbl_dir.exists():
images_path, labels_path, yaml_root = img_dir, lbl_dir, yroot
break

if images_path is None or labels_path is None or yaml_root is None:
return None

labels = label_path.glob("*.txt")
label_files = list(labels_path.glob("*.txt"))
images = BaseParser._list_images(images_path)
if not BaseParser._compare_stem_files(images, labels):
if not BaseParser._compare_stem_files(images, label_files):
return None

if dir_format is Format.ROBOFLOW:
yaml_file_location = split_path.parent
elif dir_format is Format.ULTRALYTICS:
yaml_file_location = split_path.parent.parent

yaml_file = next(
(
f
for ext in ("*.yaml", "*.yml")
for f in yaml_file_location.glob(ext)
),
(f for ext in ("*.yaml", "*.yml") for f in yaml_root.glob(ext)),
None,
)
if not yaml_file:
if yaml_file is None:
return None

return {
"image_dir": images_path,
"annotation_dir": label_path,
"annotation_dir": labels_path,
"classes_path": yaml_file,
}

Expand All @@ -173,22 +173,20 @@ def validate(cls, dataset_dir: Path) -> bool:
]
if "train" not in splits or len(splits) < 2:
return False
return all(
cls.validate_split(dataset_dir / s, dir_format) for s in splits
)
return all(cls.validate_split(dataset_dir / s) for s in splits)

if dir_format is Format.ULTRALYTICS:
non_split_folders = ["images", "labels"]
folders = [d.name for d in dataset_dir.iterdir() if d.is_dir()]
if not all(f in non_split_folders for f in folders):
if not all(f in folders for f in non_split_folders):
return False

subfolders = [
d.name
for d in (dataset_dir / "images").iterdir()
if d.is_dir()
]
return all(
cls.validate_split(dataset_dir / "images" / split, dir_format)
cls.validate_split(dataset_dir / "images" / split)
for split in subfolders
)

Expand All @@ -206,30 +204,42 @@ def from_dir(
classes_path = dataset_dir / yaml_file.name
dir_format, splits = self._detect_dataset_dir_format(dataset_dir)
added_train_imgs = self._parse_split(
image_dir=dataset_dir / "images" / "train"
if dir_format is Format.ULTRALYTICS
else dataset_dir / "train" / "images",
annotation_dir=dataset_dir / "labels" / "train"
if dir_format is Format.ULTRALYTICS
else dataset_dir / "train" / "labels",
image_dir=(
dataset_dir / "images" / "train"
if dir_format is Format.ULTRALYTICS
else dataset_dir / "train" / "images"
),
annotation_dir=(
dataset_dir / "labels" / "train"
if dir_format is Format.ULTRALYTICS
else dataset_dir / "train" / "labels"
),
classes_path=classes_path,
)
added_val_imgs = self._parse_split(
image_dir=dataset_dir / "images" / "val"
if dir_format is Format.ULTRALYTICS
else dataset_dir / "valid" / "images",
annotation_dir=dataset_dir / "labels" / "val"
if dir_format is Format.ULTRALYTICS
else dataset_dir / "valid" / "labels",
image_dir=(
dataset_dir / "images" / "val"
if dir_format is Format.ULTRALYTICS
else dataset_dir / "valid" / "images"
),
annotation_dir=(
dataset_dir / "labels" / "val"
if dir_format is Format.ULTRALYTICS
else dataset_dir / "valid" / "labels"
),
classes_path=classes_path,
)
added_test_imgs = self._parse_split(
image_dir=dataset_dir / "images" / "test"
if dir_format is Format.ULTRALYTICS
else dataset_dir / "test" / "images",
annotation_dir=dataset_dir / "labels" / "test"
if dir_format is Format.ULTRALYTICS
else dataset_dir / "test" / "labels",
image_dir=(
dataset_dir / "images" / "test"
if dir_format is Format.ULTRALYTICS
else dataset_dir / "test" / "images"
),
annotation_dir=(
dataset_dir / "labels" / "test"
if dir_format is Format.ULTRALYTICS
else dataset_dir / "test" / "labels"
),
classes_path=classes_path,
)

Expand Down
Loading