@@ -94,61 +94,61 @@ def fit_boundingbox(self, points: np.ndarray) -> dict[str, float]:
9494 def _detect_dataset_dir_format (
9595 dataset_dir : Path ,
9696 ) -> tuple [Format | None , list [str ]]:
97- """Checks if dataset directory structure is in FiftyOne or
97+ """Checks if dataset directory structure is in Ultralytics or
9898 Roboflow format."""
99- split_folders = ["train" , "valid" ] # test folder is optional
100- non_split_folders = ["images" , "labels" ]
99+ roboflow_folders = ["train" , "valid" ] # test folder is optional
100+ ultralytics_folders = ["images" , "labels" ]
101101
102102 existing = [d .name for d in dataset_dir .iterdir () if d .is_dir ()]
103103
104- if all (folder in existing for folder in split_folders ):
104+ if all (folder in existing for folder in roboflow_folders ):
105105 return Format .ROBOFLOW , existing
106- if all (folder in existing for folder in non_split_folders ):
106+ if all (folder in existing for folder in ultralytics_folders ):
107107 return Format .ULTRALYTICS , existing
108108 return None , []
109109
110110 @staticmethod
111- def validate_split (
112- split_path : Path , dir_format : Format
113- ) -> dict [str , Any ] | None :
114- if dir_format is Format .ROBOFLOW :
115- images_path = split_path / "images"
116- label_path = split_path / "labels"
117- elif dir_format is Format .ULTRALYTICS :
118- images_path = split_path .parent .parent / "images" / split_path .name
119- label_path = split_path .parent .parent / "labels" / split_path .name
120- else :
111+ def validate_split (split_path : Path ) -> dict [str , Any ] | None :
112+ if not split_path .exists ():
121113 return None
122114
123- if not images_path .exists ():
124- return None
125- if not label_path .exists ():
115+ candidates = [
116+ (
117+ split_path / "images" ,
118+ split_path / "labels" ,
119+ split_path .parent ,
120+ ), # ROBOFLOW
121+ (
122+ split_path .parent .parent / "images" / split_path .name ,
123+ split_path .parent .parent / "labels" / split_path .name ,
124+ split_path .parent .parent ,
125+ ), # ULTRALYTICS
126+ ]
127+
128+ images_path = labels_path = yaml_root = None
129+ for img_dir , lbl_dir , yroot in candidates :
130+ if img_dir .exists () and lbl_dir .exists ():
131+ images_path , labels_path , yaml_root = img_dir , lbl_dir , yroot
132+ break
133+
134+ if images_path is None or labels_path is None or yaml_root is None :
126135 return None
127136
128- labels = label_path .glob ("*.txt" )
137+ label_files = list ( labels_path .glob ("*.txt" ) )
129138 images = BaseParser ._list_images (images_path )
130- if not BaseParser ._compare_stem_files (images , labels ):
139+ if not BaseParser ._compare_stem_files (images , label_files ):
131140 return None
132141
133- if dir_format is Format .ROBOFLOW :
134- yaml_file_location = split_path .parent
135- elif dir_format is Format .ULTRALYTICS :
136- yaml_file_location = split_path .parent .parent
137-
138142 yaml_file = next (
139- (
140- f
141- for ext in ("*.yaml" , "*.yml" )
142- for f in yaml_file_location .glob (ext )
143- ),
143+ (f for ext in ("*.yaml" , "*.yml" ) for f in yaml_root .glob (ext )),
144144 None ,
145145 )
146- if not yaml_file :
146+ if yaml_file is None :
147147 return None
148148
149149 return {
150150 "image_dir" : images_path ,
151- "annotation_dir" : label_path ,
151+ "annotation_dir" : labels_path ,
152152 "classes_path" : yaml_file ,
153153 }
154154
@@ -173,22 +173,20 @@ def validate(cls, dataset_dir: Path) -> bool:
173173 ]
174174 if "train" not in splits or len (splits ) < 2 :
175175 return False
176- return all (
177- cls .validate_split (dataset_dir / s , dir_format ) for s in splits
178- )
176+ return all (cls .validate_split (dataset_dir / s ) for s in splits )
177+
179178 if dir_format is Format .ULTRALYTICS :
180179 non_split_folders = ["images" , "labels" ]
181180 folders = [d .name for d in dataset_dir .iterdir () if d .is_dir ()]
182- if not all (f in non_split_folders for f in folders ):
181+ if not all (f in folders for f in non_split_folders ):
183182 return False
184-
185183 subfolders = [
186184 d .name
187185 for d in (dataset_dir / "images" ).iterdir ()
188186 if d .is_dir ()
189187 ]
190188 return all (
191- cls .validate_split (dataset_dir / "images" / split , dir_format )
189+ cls .validate_split (dataset_dir / "images" / split )
192190 for split in subfolders
193191 )
194192
@@ -206,30 +204,42 @@ def from_dir(
206204 classes_path = dataset_dir / yaml_file .name
207205 dir_format , splits = self ._detect_dataset_dir_format (dataset_dir )
208206 added_train_imgs = self ._parse_split (
209- image_dir = dataset_dir / "images" / "train"
210- if dir_format is Format .ULTRALYTICS
211- else dataset_dir / "train" / "images" ,
212- annotation_dir = dataset_dir / "labels" / "train"
213- if dir_format is Format .ULTRALYTICS
214- else dataset_dir / "train" / "labels" ,
207+ image_dir = (
208+ dataset_dir / "images" / "train"
209+ if dir_format is Format .ULTRALYTICS
210+ else dataset_dir / "train" / "images"
211+ ),
212+ annotation_dir = (
213+ dataset_dir / "labels" / "train"
214+ if dir_format is Format .ULTRALYTICS
215+ else dataset_dir / "train" / "labels"
216+ ),
215217 classes_path = classes_path ,
216218 )
217219 added_val_imgs = self ._parse_split (
218- image_dir = dataset_dir / "images" / "val"
219- if dir_format is Format .ULTRALYTICS
220- else dataset_dir / "valid" / "images" ,
221- annotation_dir = dataset_dir / "labels" / "val"
222- if dir_format is Format .ULTRALYTICS
223- else dataset_dir / "valid" / "labels" ,
220+ image_dir = (
221+ dataset_dir / "images" / "val"
222+ if dir_format is Format .ULTRALYTICS
223+ else dataset_dir / "valid" / "images"
224+ ),
225+ annotation_dir = (
226+ dataset_dir / "labels" / "val"
227+ if dir_format is Format .ULTRALYTICS
228+ else dataset_dir / "valid" / "labels"
229+ ),
224230 classes_path = classes_path ,
225231 )
226232 added_test_imgs = self ._parse_split (
227- image_dir = dataset_dir / "images" / "test"
228- if dir_format is Format .ULTRALYTICS
229- else dataset_dir / "test" / "images" ,
230- annotation_dir = dataset_dir / "labels" / "test"
231- if dir_format is Format .ULTRALYTICS
232- else dataset_dir / "test" / "labels" ,
233+ image_dir = (
234+ dataset_dir / "images" / "test"
235+ if dir_format is Format .ULTRALYTICS
236+ else dataset_dir / "test" / "images"
237+ ),
238+ annotation_dir = (
239+ dataset_dir / "labels" / "test"
240+ if dir_format is Format .ULTRALYTICS
241+ else dataset_dir / "test" / "labels"
242+ ),
233243 classes_path = classes_path ,
234244 )
235245
0 commit comments