1- from pathlib import Path
2- from typing import Any , Dict
31from enum import Enum
4- import yaml
5- import numpy as np
6- import cv2
2+ from pathlib import Path
3+ from typing import Any
74
5+ import cv2
6+ import numpy as np
7+ import yaml
88from loguru import logger
99
1010from luxonis_ml .data import DatasetIterator
1111
1212from .base_parser import BaseParser , ParserOutput
1313
14+
1415class Format (str , Enum ):
1516 SPLIT_FIRST = "split_first"
1617 SPLIT_SECOND = "split_second"
1718
19+
1820class UltralyticsParser (BaseParser ):
19- """Parses annotations from Ultralytics and YOLOv8 annotations to LDF.
21+ """Parses annotations from Ultralytics and YOLOv8 annotations to
22+ LDF.
2023
2124 Expected format::
2225
@@ -38,7 +41,7 @@ class UltralyticsParser(BaseParser):
3841 └── *.yaml
3942
4043 OR::
41-
44+
4245 dataset_dir/
4346 ├── train/
4447 │ ├── images/
@@ -75,7 +78,7 @@ class UltralyticsParser(BaseParser):
7578 U{Roboflow <https://roboflow.com/>}.
7679 """
7780
78- def fit_boundingbox (self , points : np .ndarray ) -> Dict [str , float ]:
81+ def fit_boundingbox (self , points : np .ndarray ) -> dict [str , float ]:
7982 """Fits a bounding box of the polygon (mask)."""
8083 x_min = np .min (points [:, 0 ])
8184 y_min = np .min (points [:, 1 ])
@@ -87,25 +90,28 @@ def fit_boundingbox(self, points: np.ndarray) -> Dict[str, float]:
8790 "w" : x_max - x_min ,
8891 "h" : y_max - y_min ,
8992 }
90-
93+
9194 @staticmethod
92- def _detect_dataset_dir_format (dataset_dir : Path ) -> tuple [Format | None , list [str ]]:
95+ def _detect_dataset_dir_format (
96+ dataset_dir : Path ,
97+ ) -> tuple [Format | None , list [str ]]:
9398 """Checks if dataset directory structure is in FiftyOne or
9499 Roboflow format."""
95100 split_folders = ["train" , "val" ] # test folder is optional
96101 non_split_folders = ["images" , "labels" ]
97-
102+
98103 existing = [d .name for d in dataset_dir .iterdir () if d .is_dir ()]
99-
104+
100105 if all (folder in existing for folder in split_folders ):
101106 return Format .SPLIT_FIRST , existing
102107 if all (folder in existing for folder in non_split_folders ):
103108 return Format .SPLIT_SECOND , existing
104109 return None , []
105-
106110
107111 @staticmethod
108- def validate_split (split_path : Path , dir_format : Format ) -> dict [str , Any ] | None :
112+ def validate_split (
113+ split_path : Path , dir_format : Format
114+ ) -> dict [str , Any ] | None :
109115 if dir_format is Format .SPLIT_FIRST :
110116 images_path = split_path / "images"
111117 label_path = split_path / "labels"
@@ -114,12 +120,12 @@ def validate_split(split_path: Path, dir_format: Format) -> dict[str, Any] | Non
114120 label_path = split_path .parent .parent / "labels" / split_path .name
115121 else :
116122 return None
117-
123+
118124 if not images_path .exists ():
119125 return None
120126 if not label_path .exists ():
121127 return None
122-
128+
123129 labels = label_path .glob ("*.txt" )
124130 images = BaseParser ._list_images (images_path )
125131 if not BaseParser ._compare_stem_files (images , labels ):
@@ -131,7 +137,11 @@ def validate_split(split_path: Path, dir_format: Format) -> dict[str, Any] | Non
131137 yaml_file_location = split_path .parent .parent
132138
133139 yaml_file = next (
134- (f for ext in ("*.yaml" , "*.yml" ) for f in yaml_file_location .glob (ext )),
140+ (
141+ f
142+ for ext in ("*.yaml" , "*.yml" )
143+ for f in yaml_file_location .glob (ext )
144+ ),
135145 None ,
136146 )
137147 if not yaml_file :
@@ -156,18 +166,27 @@ def validate(cls, dataset_dir: Path) -> bool:
156166 )
157167 if not yaml_file :
158168 return False
159-
169+
160170 if dir_format is Format .SPLIT_FIRST :
161- splits = [d .name for d in dataset_dir .iterdir () if d .is_dir () and d .name in ("train" , "val" , "test" )]
171+ splits = [
172+ d .name
173+ for d in dataset_dir .iterdir ()
174+ if d .is_dir () and d .name in ("train" , "val" , "test" )
175+ ]
162176 if "train" not in splits or len (splits ) < 2 :
163177 return False
164- return all (cls .validate_split (dataset_dir / s , dir_format ) for s in splits )
165- elif dir_format is Format .SPLIT_SECOND :
178+ return all (
179+ cls .validate_split (dataset_dir / s , dir_format ) for s in splits
180+ )
181+ if dir_format is Format .SPLIT_SECOND :
166182 non_split_folders = ["images" , "labels" ]
167183 folders = [d .name for d in dataset_dir .iterdir () if d .is_dir ()]
168184 if not all (f in non_split_folders for f in folders ):
169185 return False
170- return all (cls .validate_split (dataset_dir / s , dir_format ) for s in folders )
186+ return all (
187+ cls .validate_split (dataset_dir / s , dir_format )
188+ for s in folders
189+ )
171190
172191 return False
173192
@@ -183,18 +202,30 @@ def from_dir(
183202 classes_path = dataset_dir / yaml_file
184203 dir_format , splits = self ._detect_dataset_dir_format (dataset_dir )
185204 added_train_imgs = self ._parse_split (
186- image_dir = dataset_dir / "images" / "train" if dir_format is Format .SPLIT_SECOND else dataset_dir / "train" / "images" ,
187- annotation_dir = dataset_dir / "labels" / "train" if dir_format is Format .SPLIT_SECOND else dataset_dir / "train" / "labels" ,
205+ image_dir = dataset_dir / "images" / "train"
206+ if dir_format is Format .SPLIT_SECOND
207+ else dataset_dir / "train" / "images" ,
208+ annotation_dir = dataset_dir / "labels" / "train"
209+ if dir_format is Format .SPLIT_SECOND
210+ else dataset_dir / "train" / "labels" ,
188211 classes_path = classes_path ,
189212 )
190213 added_val_imgs = self ._parse_split (
191- image_dir = dataset_dir / "images" / "val" if dir_format is Format .SPLIT_SECOND else dataset_dir / "val" / "images" ,
192- annotation_dir = dataset_dir / "labels" / "val" if dir_format is Format .SPLIT_SECOND else dataset_dir / "val" / "labels" ,
214+ image_dir = dataset_dir / "images" / "val"
215+ if dir_format is Format .SPLIT_SECOND
216+ else dataset_dir / "val" / "images" ,
217+ annotation_dir = dataset_dir / "labels" / "val"
218+ if dir_format is Format .SPLIT_SECOND
219+ else dataset_dir / "val" / "labels" ,
193220 classes_path = classes_path ,
194221 )
195222 added_test_imgs = self ._parse_split (
196- image_dir = dataset_dir / "images" / "test" if dir_format is Format .SPLIT_SECOND else dataset_dir / "test" / "images" ,
197- annotation_dir = dataset_dir / "labels" / "test" if dir_format is Format .SPLIT_SECOND else dataset_dir / "test" / "labels" ,
223+ image_dir = dataset_dir / "images" / "test"
224+ if dir_format is Format .SPLIT_SECOND
225+ else dataset_dir / "test" / "images" ,
226+ annotation_dir = dataset_dir / "labels" / "test"
227+ if dir_format is Format .SPLIT_SECOND
228+ else dataset_dir / "test" / "labels" ,
198229 classes_path = classes_path ,
199230 )
200231
@@ -203,8 +234,9 @@ def from_dir(
203234 def from_split (
204235 self , image_dir : Path , annotation_dir : Path , classes_path : Path
205236 ) -> ParserOutput :
206- """Parses annotations from YoloV8 or Ultralytics format to LDF. Annotations
207- include object detection, instance segmentation and keypoints.
237+ """Parses annotations from YoloV8 or Ultralytics format to LDF.
238+ Annotations include object detection, instance segmentation and
239+ keypoints.
208240
209241 @type image_dir: Path
210242 @param image_dir: Path to directory with images
@@ -220,7 +252,9 @@ def from_split(
220252 classes_data = yaml .safe_load (f )
221253
222254 if isinstance (classes_data ["names" ], list ):
223- # names: ["class1", "class2", "class3"]
255+ """
256+ names: ["class1", "class2", "class3"]
257+ """
224258 class_names = dict (enumerate (classes_data ["names" ]))
225259 else :
226260 """
@@ -246,19 +280,21 @@ def generator() -> DatasetIterator:
246280 # object detection format: class_id x_center y_center width height
247281 # segmentation format: class_id x1 y1 x2 y2 x3 y3 ... xn yn (min 3 points)
248282 # keypoints format: class_id x_center y_center width height kp1_x kp1_y kp2_x kp2_y ... kpn_x kpn_y (it can also have 3rd dimension for visibility)
249-
283+
250284 if len (annotation_elements ) == 5 :
251285 task_type = "detection"
252286 elif len (annotation_elements ) > 5 :
253287 if classes_data .get ("kpt_shape" , None ) is not None :
254288 task_type = "keypoints"
255289 else :
256290 task_type = "segmentation"
257-
291+
258292 if task_type == "detection" :
259- class_id , x_center , y_center , width , height = annotation_elements
293+ class_id , x_center , y_center , width , height = (
294+ annotation_elements
295+ )
260296 class_name = class_names [int (class_id )]
261-
297+
262298 yield {
263299 "file" : str (img_path ),
264300 "annotation" : {
@@ -271,18 +307,18 @@ def generator() -> DatasetIterator:
271307 },
272308 },
273309 }
274-
310+
275311 elif task_type == "segmentation" :
276312 img = cv2 .imread (str (img_path ))
277313 height , width = img .shape [:2 ]
278-
314+
279315 class_id , * points = annotation_elements
280316 points = [float (p ) for p in points ]
281317 points = np .array (points ).reshape (- 1 , 2 )
282318 boundingbox = self .fit_boundingbox (points )
283319 points = [(p [0 ], p [1 ]) for p in points .tolist ()]
284320 class_name = class_names [int (class_id )]
285-
321+
286322 yield {
287323 "file" : str (img_path ),
288324 "annotation" : {
@@ -292,7 +328,7 @@ def generator() -> DatasetIterator:
292328 "height" : height ,
293329 "width" : width ,
294330 "points" : points ,
295- }
331+ },
296332 },
297333 }
298334
@@ -302,14 +338,20 @@ def generator() -> DatasetIterator:
302338 class_id , * points = annotation_elements
303339 x_center , y_center , width , height , * keypoints = points
304340 keypoints = [float (p ) for p in keypoints ]
305- keypoints = np .array (keypoints ).reshape (n_kpts , kpt_dim )
341+ keypoints = np .array (keypoints ).reshape (
342+ n_kpts , kpt_dim
343+ )
306344 class_name = class_names [int (class_id )]
307345
308346 if kpt_dim == 2 :
309347 # add full visibility as last dimension
310- keypoints = np .concatenate ([keypoints , np .ones ((n_kpts , 1 )) * 2 ], axis = 1 )
348+ keypoints = np .concatenate (
349+ [keypoints , np .ones ((n_kpts , 1 )) * 2 ], axis = 1
350+ )
311351
312- keypoints = [(p [0 ], p [1 ], int (p [2 ])) for p in keypoints .tolist ()]
352+ keypoints = [
353+ (p [0 ], p [1 ], int (p [2 ])) for p in keypoints .tolist ()
354+ ]
313355
314356 yield {
315357 "file" : str (img_path ),
@@ -323,11 +365,10 @@ def generator() -> DatasetIterator:
323365 },
324366 "keypoints" : {
325367 "keypoints" : keypoints ,
326- }
368+ },
327369 },
328370 }
329371
330-
331372 added_images = self ._get_added_images (generator ())
332373
333374 return generator (), {}, added_images
0 commit comments