luxonis · JSabadin · Jul 16, 2025 · Jul 11, 2025 · Jul 11, 2025 · Jul 14, 2025
@@ -12,6 +12,8 @@
 
 
 class BaseParser(ABC):
+    SPLIT_NAMES: tuple[str, ...] = ("train", "valid", "test")
+
     def __init__(
         self,
         dataset: BaseDataset,
@@ -52,17 +54,24 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
         """
         ...
 
-    @staticmethod
-    @abstractmethod
-    def validate(dataset_dir: Path) -> bool:
+    @classmethod
+    def validate(cls, dataset_dir: Path) -> bool:
         """Validates if the dataset is in an expected format.
 
         @type dataset_dir: Path
         @param dataset_dir: Path to source dataset directory.
         @rtype: bool
         @return: If the dataset is in the expected format.
         """
-        ...
+        splits = [
+            d.name
+            for d in dataset_dir.iterdir()
+            if d.is_dir() and d.name in cls.SPLIT_NAMES
+        ]
+        if len(splits) == 0:
+            return False
+
+        return all(cls.validate_split(dataset_dir / split) for split in splits)
 
     @abstractmethod
     def from_dir(

@@ -42,17 +42,6 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
             return None
         return {"class_dir": split_path}
 
-    @staticmethod
-    def validate(dataset_dir: Path) -> bool:
-        for split in ["train", "valid", "test"]:
-            split_path = dataset_dir / split
-            if (
-                ClassificationDirectoryParser.validate_split(split_path)
-                is None
-            ):
-                return False
-        return True
-
     def from_dir(
         self, dataset_dir: Path
     ) -> tuple[list[Path], list[Path], list[Path]]:

@@ -60,10 +60,20 @@
         Roboflow format."""
         fiftyone_splits = ["train", "validation", "test"]
         roboflow_splits = ["train", "valid", "test"]
-        if all((dataset_dir / split).exists() for split in fiftyone_splits):
-            return Format.FIFTYONE, fiftyone_splits
-        if all((dataset_dir / split).exists() for split in roboflow_splits):
-            return Format.ROBOFLOW, roboflow_splits
+
+        existing = [d.name for d in dataset_dir.iterdir() if d.is_dir()]
+
+        # Clash with NATIVE format
+        if "val" in existing:
+            return None, []
+
+        fo = [s for s in fiftyone_splits if s in existing]
+        rf = [s for s in roboflow_splits if s in existing]
+
+        if len(fo) != 0 and len(fo) >= len(rf):
+            return Format.FIFTYONE, fo
+        if len(rf) != 0:
+            return Format.ROBOFLOW, rf
         return None, []
 
     @staticmethod
@@ -84,17 +94,13 @@
         image_dir = dirs[0]
         return {"image_dir": image_dir, "annotation_path": json_path}
 
-    @staticmethod
-    def validate(dataset_dir: Path) -> bool:
-        dir_format, splits = COCOParser._detect_dataset_dir_format(dataset_dir)
+    @classmethod
+    def validate(cls, dataset_dir: Path) -> bool:
+        dir_format, splits = cls._detect_dataset_dir_format(dataset_dir)
         if dir_format is None:
             return False
 
-        for split in splits:
-            split_path = dataset_dir / split
-            if COCOParser.validate_split(split_path) is None:
-                return False
-        return True
+        return all(cls.validate_split(dataset_dir / split) for split in splits)
 
     def from_dir(
         self,

@@ -40,14 +40,6 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
             "annotation_path": split_path / "_annotations.createml.json",
         }
 
-    @staticmethod
-    def validate(dataset_dir: Path) -> bool:
-        for split in ["train", "valid", "test"]:
-            split_path = dataset_dir / split
-            if CreateMLParser.validate_split(split_path) is None:
-                return False
-        return True
-
     def from_dir(
         self, dataset_dir: Path
     ) -> tuple[list[Path], list[Path], list[Path]]:

@@ -39,14 +39,6 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
             "classes_path": split_path / "_darknet.labels",
         }
 
-    @staticmethod
-    def validate(dataset_dir: Path) -> bool:
-        for split in ["train", "valid", "test"]:
-            split_path = dataset_dir / split
-            if DarknetParser.validate_split(split_path) is None:
-                return False
-        return True
-
     def from_dir(
         self, dataset_dir: Path
     ) -> tuple[list[Path], list[Path], list[Path]]:

@@ -10,6 +10,7 @@
 
 
 class NativeParser(BaseParser):
+    SPLIT_NAMES: tuple[str, ...] = ("train", "val", "test")
     """Parses directory with native LDF annotations.
 
     Expected format::
@@ -32,14 +33,6 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
             return None
         return {"annotation_path": annotation_path}
 
-    @staticmethod
-    def validate(dataset_dir: Path) -> bool:
-        for split in ["train", "val", "test"]:
-            split_path = dataset_dir / split
-            if NativeParser.validate_split(split_path) is None:
-                return False
-        return True
-
     def from_dir(
         self, dataset_dir: Path
     ) -> tuple[list[Path], list[Path], list[Path]]:

@@ -47,17 +47,6 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
             "classes_path": split_path / "_classes.csv",
         }
 
-    @staticmethod
-    def validate(dataset_dir: Path) -> bool:
-        for split in ["train", "valid", "test"]:
-            split_path = dataset_dir / split
-            if (
-                SegmentationMaskDirectoryParser.validate_split(split_path)
-                is None
-            ):
-                return False
-        return True
-
     def from_dir(
         self, dataset_dir: Path
     ) -> tuple[list[Path], list[Path], list[Path]]:

@@ -68,21 +68,6 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
             )
         return {"split_path": split_path}
 
-    @staticmethod
-    def validate(dataset_dir: Path) -> bool:
-        """Validates if the dataset is in an expected format.
-
-        @type dataset_dir: Path
-        @param dataset_dir: Path to source dataset directory.
-        @rtype: bool
-        @return: True if the dataset is in the expected format.
-        """
-        for split in ["train", "valid", "test"]:
-            split_path = dataset_dir / split
-            if SOLOParser.validate_split(split_path) is None:
-                return False
-        return True
-
     def from_dir(
         self, dataset_dir: Path
     ) -> tuple[list[Path], list[Path], list[Path]]:

@@ -40,14 +40,6 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
             "annotation_path": split_path / "_annotations.csv",
         }
 
-    @staticmethod
-    def validate(dataset_dir: Path) -> bool:
-        for split in ["train", "valid", "test"]:
-            split_path = dataset_dir / split
-            if TensorflowCSVParser.validate_split(split_path) is None:
-                return False
-        return True
-
     def from_dir(
         self, dataset_dir: Path
     ) -> tuple[list[Path], list[Path], list[Path]]:

@@ -38,14 +38,6 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
             return None
         return {"image_dir": split_path, "annotation_dir": split_path}
 
-    @staticmethod
-    def validate(dataset_dir: Path) -> bool:
-        for split in ["train", "valid", "test"]:
-            split_path = dataset_dir / split
-            if VOCParser.validate_split(split_path) is None:
-                return False
-        return True
-
     def from_dir(
         self, dataset_dir: Path
     ) -> tuple[list[Path], list[Path], list[Path]]:

@@ -41,14 +41,6 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
             "classes_path": classes,
         }
 
-    @staticmethod
-    def validate(dataset_dir: Path) -> bool:
-        for split in ["train", "valid", "test"]:
-            split_path = dataset_dir / split
-            if YoloV4Parser.validate_split(split_path) is None:
-                return False
-        return True
-
     def from_dir(
         self, dataset_dir: Path
     ) -> tuple[list[Path], list[Path], list[Path]]:

@@ -58,13 +58,19 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
             "classes_path": data_yaml,
         }
 
-    @staticmethod
-    def validate(dataset_dir: Path) -> bool:
-        for split in ["train", "valid", "test"]:
-            img_split = dataset_dir / "images" / split
-            if YoloV6Parser.validate_split(img_split) is None:
-                return False
-        return True
+    @classmethod
+    def validate(cls, dataset_dir: Path) -> bool:
+        img_root = dataset_dir / "images"
+        if not img_root.exists():
+            return False
+        splits = [
+            d.name
+            for d in img_root.iterdir()
+            if d.is_dir() and d.name in ("train", "valid", "test")
+        ]
+        if "train" not in splits or len(splits) < 2:
+            return False
+        return all(cls.validate_split(img_root / s) for s in splits)
 
     def from_dir(
         self, dataset_dir: Path