Skip to content

Commit 7f15cba

Browse files
authored
Fixed bug with validate_split in yolov8 parser (#368)
1 parent 571af92 commit 7f15cba

File tree

1 file changed

+66
-56
lines changed

1 file changed

+66
-56
lines changed

luxonis_ml/data/parsers/yolov8_parser.py

Lines changed: 66 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -94,61 +94,61 @@ def fit_boundingbox(self, points: np.ndarray) -> dict[str, float]:
9494
def _detect_dataset_dir_format(
9595
dataset_dir: Path,
9696
) -> tuple[Format | None, list[str]]:
97-
"""Checks if dataset directory structure is in FiftyOne or
97+
"""Checks if dataset directory structure is in Ultralytics or
9898
Roboflow format."""
99-
split_folders = ["train", "valid"] # test folder is optional
100-
non_split_folders = ["images", "labels"]
99+
roboflow_folders = ["train", "valid"] # test folder is optional
100+
ultralytics_folders = ["images", "labels"]
101101

102102
existing = [d.name for d in dataset_dir.iterdir() if d.is_dir()]
103103

104-
if all(folder in existing for folder in split_folders):
104+
if all(folder in existing for folder in roboflow_folders):
105105
return Format.ROBOFLOW, existing
106-
if all(folder in existing for folder in non_split_folders):
106+
if all(folder in existing for folder in ultralytics_folders):
107107
return Format.ULTRALYTICS, existing
108108
return None, []
109109

110110
@staticmethod
111-
def validate_split(
112-
split_path: Path, dir_format: Format
113-
) -> dict[str, Any] | None:
114-
if dir_format is Format.ROBOFLOW:
115-
images_path = split_path / "images"
116-
label_path = split_path / "labels"
117-
elif dir_format is Format.ULTRALYTICS:
118-
images_path = split_path.parent.parent / "images" / split_path.name
119-
label_path = split_path.parent.parent / "labels" / split_path.name
120-
else:
111+
def validate_split(split_path: Path) -> dict[str, Any] | None:
112+
if not split_path.exists():
121113
return None
122114

123-
if not images_path.exists():
124-
return None
125-
if not label_path.exists():
115+
candidates = [
116+
(
117+
split_path / "images",
118+
split_path / "labels",
119+
split_path.parent,
120+
), # ROBOFLOW
121+
(
122+
split_path.parent.parent / "images" / split_path.name,
123+
split_path.parent.parent / "labels" / split_path.name,
124+
split_path.parent.parent,
125+
), # ULTRALYTICS
126+
]
127+
128+
images_path = labels_path = yaml_root = None
129+
for img_dir, lbl_dir, yroot in candidates:
130+
if img_dir.exists() and lbl_dir.exists():
131+
images_path, labels_path, yaml_root = img_dir, lbl_dir, yroot
132+
break
133+
134+
if images_path is None or labels_path is None or yaml_root is None:
126135
return None
127136

128-
labels = label_path.glob("*.txt")
137+
label_files = list(labels_path.glob("*.txt"))
129138
images = BaseParser._list_images(images_path)
130-
if not BaseParser._compare_stem_files(images, labels):
139+
if not BaseParser._compare_stem_files(images, label_files):
131140
return None
132141

133-
if dir_format is Format.ROBOFLOW:
134-
yaml_file_location = split_path.parent
135-
elif dir_format is Format.ULTRALYTICS:
136-
yaml_file_location = split_path.parent.parent
137-
138142
yaml_file = next(
139-
(
140-
f
141-
for ext in ("*.yaml", "*.yml")
142-
for f in yaml_file_location.glob(ext)
143-
),
143+
(f for ext in ("*.yaml", "*.yml") for f in yaml_root.glob(ext)),
144144
None,
145145
)
146-
if not yaml_file:
146+
if yaml_file is None:
147147
return None
148148

149149
return {
150150
"image_dir": images_path,
151-
"annotation_dir": label_path,
151+
"annotation_dir": labels_path,
152152
"classes_path": yaml_file,
153153
}
154154

@@ -173,22 +173,20 @@ def validate(cls, dataset_dir: Path) -> bool:
173173
]
174174
if "train" not in splits or len(splits) < 2:
175175
return False
176-
return all(
177-
cls.validate_split(dataset_dir / s, dir_format) for s in splits
178-
)
176+
return all(cls.validate_split(dataset_dir / s) for s in splits)
177+
179178
if dir_format is Format.ULTRALYTICS:
180179
non_split_folders = ["images", "labels"]
181180
folders = [d.name for d in dataset_dir.iterdir() if d.is_dir()]
182-
if not all(f in non_split_folders for f in folders):
181+
if not all(f in folders for f in non_split_folders):
183182
return False
184-
185183
subfolders = [
186184
d.name
187185
for d in (dataset_dir / "images").iterdir()
188186
if d.is_dir()
189187
]
190188
return all(
191-
cls.validate_split(dataset_dir / "images" / split, dir_format)
189+
cls.validate_split(dataset_dir / "images" / split)
192190
for split in subfolders
193191
)
194192

@@ -206,30 +204,42 @@ def from_dir(
206204
classes_path = dataset_dir / yaml_file.name
207205
dir_format, splits = self._detect_dataset_dir_format(dataset_dir)
208206
added_train_imgs = self._parse_split(
209-
image_dir=dataset_dir / "images" / "train"
210-
if dir_format is Format.ULTRALYTICS
211-
else dataset_dir / "train" / "images",
212-
annotation_dir=dataset_dir / "labels" / "train"
213-
if dir_format is Format.ULTRALYTICS
214-
else dataset_dir / "train" / "labels",
207+
image_dir=(
208+
dataset_dir / "images" / "train"
209+
if dir_format is Format.ULTRALYTICS
210+
else dataset_dir / "train" / "images"
211+
),
212+
annotation_dir=(
213+
dataset_dir / "labels" / "train"
214+
if dir_format is Format.ULTRALYTICS
215+
else dataset_dir / "train" / "labels"
216+
),
215217
classes_path=classes_path,
216218
)
217219
added_val_imgs = self._parse_split(
218-
image_dir=dataset_dir / "images" / "val"
219-
if dir_format is Format.ULTRALYTICS
220-
else dataset_dir / "valid" / "images",
221-
annotation_dir=dataset_dir / "labels" / "val"
222-
if dir_format is Format.ULTRALYTICS
223-
else dataset_dir / "valid" / "labels",
220+
image_dir=(
221+
dataset_dir / "images" / "val"
222+
if dir_format is Format.ULTRALYTICS
223+
else dataset_dir / "valid" / "images"
224+
),
225+
annotation_dir=(
226+
dataset_dir / "labels" / "val"
227+
if dir_format is Format.ULTRALYTICS
228+
else dataset_dir / "valid" / "labels"
229+
),
224230
classes_path=classes_path,
225231
)
226232
added_test_imgs = self._parse_split(
227-
image_dir=dataset_dir / "images" / "test"
228-
if dir_format is Format.ULTRALYTICS
229-
else dataset_dir / "test" / "images",
230-
annotation_dir=dataset_dir / "labels" / "test"
231-
if dir_format is Format.ULTRALYTICS
232-
else dataset_dir / "test" / "labels",
233+
image_dir=(
234+
dataset_dir / "images" / "test"
235+
if dir_format is Format.ULTRALYTICS
236+
else dataset_dir / "test" / "images"
237+
),
238+
annotation_dir=(
239+
dataset_dir / "labels" / "test"
240+
if dir_format is Format.ULTRALYTICS
241+
else dataset_dir / "test" / "labels"
242+
),
233243
classes_path=classes_path,
234244
)
235245

0 commit comments

Comments
 (0)