luxonis
diff --git a/‎luxonis_ml/data/README.md‎
Lines changed: 61 additions & 12 deletions b/‎luxonis_ml/data/README.md‎
Lines changed: 61 additions & 12 deletions
diff --git a/‎luxonis_ml/data/datasets/luxonis_dataset.py‎
Lines changed: 4 additions & 0 deletions b/‎luxonis_ml/data/datasets/luxonis_dataset.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎luxonis_ml/data/exporters/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎luxonis_ml/data/exporters/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎luxonis_ml/data/exporters/fiftyone_classification_exporter.py‎
Lines changed: 186 additions & 0 deletions b/‎luxonis_ml/data/exporters/fiftyone_classification_exporter.py‎
Lines changed: 186 additions & 0 deletions
diff --git a/‎luxonis_ml/data/parsers/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎luxonis_ml/data/parsers/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎luxonis_ml/data/parsers/classification_directory_parser.py‎
Lines changed: 33 additions & 6 deletions b/‎luxonis_ml/data/parsers/classification_directory_parser.py‎
Lines changed: 33 additions & 6 deletions
@@ -557,19 +557,68 @@ The supported formats are:
     └── test/
   ```
 
-- **Classification Directory** - A directory with subdirectories for each class
+- **Classification Directory** - A directory with subdirectories for each class. Two structures are supported:
 
-  ```plaintext
-  dataset_dir/
-  ├── train/
-  │   ├── class1/
-  │   │   ├── img1.jpg
-  │   │   ├── img2.jpg
-  │   │   └── ...
-  │   ├── class2/
-  │   └── ...
-  ├── valid/
-  └── test/
+  - Split structure with train/valid/test subdirectories:
+    ```plaintext
+    dataset_dir/
+    ├── train/
+    │   ├── class1/
+    │   │   ├── img1.jpg
+    │   │   ├── img2.jpg
+    │   │   └── ...
+    │   ├── class2/
+    │   └── ...
+    ├── valid/
+    └── test/
+    ```
+  - Flat structure (class subdirectories directly in root, random splits applied at parse time):
+    ```plaintext
+    dataset_dir/
+    ├── class1/
+    │   ├── img1.jpg
+    │   └── ...
+    ├── class2/
+    │   └── ...
+    └── info.json  (optional metadata file)
+    ```
+
+- [**FiftyOne Classification**](https://docs.voxel51.com/user_guide/export_datasets.html#fiftyone-image-classification-dataset) - FiftyOneImageClassificationDataset format with images in a `data/` folder and labels in `labels.json`. Two structures are supported:
+
+  - Split structure with train/validation/test subdirectories:
+    ```plaintext
+    dataset_dir/
+    ├── train/
+    │   ├── data/
+    │   │   ├── img1.jpg
+    │   │   └── ...
+    │   └── labels.json
+    ├── validation/
+    │   ├── data/
+    │   └── labels.json
+    └── test/
+        ├── data/
+        └── labels.json
+    ```
+  - Flat structure (random splits applied at parse time):
+    ```plaintext
+    dataset_dir/
+    ├── data/
+    │   ├── img1.jpg
+    │   └── ...
+    └── labels.json
+    ```
+
+  The `labels.json` format:
+
+  ```json
+  {
+      "classes": ["class1", "class2", ...],
+      "labels": {
+          "image_stem": class_index,
+          ...
+      }
+  }
   ```
 
 - **Segmentation Mask Directory** - A directory with images and corresponding masks.
 
@@ -23,6 +23,7 @@
     CocoExporter,
     CreateMLExporter,
     DarknetExporter,
+    FiftyOneClassificationExporter,
     NativeExporter,
     PreparedLDF,
     SegmentationMaskDirectoryExporter,
@@ -1543,6 +1544,9 @@ def export(
             DatasetType.CLSDIR: ExporterSpec(
                 ClassificationDirectoryExporter, {}
             ),
+            DatasetType.FIFTYONECLS: ExporterSpec(
+                FiftyOneClassificationExporter, {}
+            ),
             DatasetType.SEGMASK: ExporterSpec(
                 SegmentationMaskDirectoryExporter, {}
             ),
 
@@ -4,6 +4,7 @@
 from .createml_exporter import CreateMLExporter
 from .darknet_exporter import DarknetExporter
 from .exporter_utils import PreparedLDF
+from .fiftyone_classification_exporter import FiftyOneClassificationExporter
 from .native_exporter import NativeExporter
 from .segmentation_mask_directory_exporter import (
     SegmentationMaskDirectoryExporter,
@@ -24,6 +25,7 @@
     "CocoExporter",
     "CreateMLExporter",
     "DarknetExporter",
+    "FiftyOneClassificationExporter",
     "NativeExporter",
     "PreparedLDF",
     "SegmentationMaskDirectoryExporter",
 
@@ -0,0 +1,186 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any, cast
+
+from luxonis_ml.data.exporters.base_exporter import BaseExporter
+from luxonis_ml.data.exporters.exporter_utils import (
+    PreparedLDF,
+    check_group_file_correspondence,
+    exporter_specific_annotation_warning,
+    split_of_group,
+)
+
+
+class FiftyOneClassificationExporter(BaseExporter):
+    """Output structure::
+
+        <dataset_name>/
+            train/
+                data/
+                    000001.jpg
+                    000002.jpg
+                    ...
+                labels.json
+            val/
+                data/
+                    ...
+                labels.json
+            test/
+                data/
+                    ...
+                labels.json
+
+    The labels.json has structure::
+
+        E{lb}
+            "classes": ["class1", "class2", ...],
+            "labels": E{lb}
+                "000001": 0,  # index into classes array
+                "000002": 1,
+                ...
+            E{rb}
+        E{rb}
+    """
+
+    def __init__(
+        self,
+        dataset_identifier: str,
+        output_path: Path,
+        max_partition_size_gb: float | None,
+    ):
+        super().__init__(
+            dataset_identifier, output_path, max_partition_size_gb
+        )
+        self.class_to_idx: dict[str, int] = {}
+        self.split_labels: dict[str, dict[str, int]] = {}
+        self.split_image_counter: dict[str, int] = {}
+
+    def get_split_names(self) -> dict[str, str]:
+        return {"train": "train", "val": "validation", "test": "test"}
+
+    def supported_ann_types(self) -> list[str]:
+        return ["classification"]
+
+    def export(self, prepared_ldf: PreparedLDF) -> None:
+        check_group_file_correspondence(prepared_ldf)
+        exporter_specific_annotation_warning(
+            prepared_ldf, self.supported_ann_types()
+        )
+
+        for split in self.get_split_names():
+            self.split_labels[split] = {}
+            self.split_image_counter[split] = 0
+
+        all_classes: set[str] = set()
+        for row in prepared_ldf.processed_df.iter_rows(named=True):
+            if (
+                row["task_type"] == "classification"
+                and row["instance_id"] == -1
+            ):
+                cname = row["class_name"]
+                if cname:
+                    all_classes.add(str(cname))
+
+        sorted_classes = sorted(all_classes)
+        self.class_to_idx = {
+            cls: idx for idx, cls in enumerate(sorted_classes)
+        }
+
+        grouped = prepared_ldf.processed_df.group_by(
+            ["file", "group_id"], maintain_order=True
+        )
+
+        copied_pairs: set[tuple[Path, str]] = set()
+
+        for key, entry in grouped:
+            file_name, group_id = cast(tuple[str, Any], key)
+            file_path = Path(str(file_name))
+
+            split = split_of_group(prepared_ldf, group_id)
+
+            class_name: str | None = None
+            for row in entry.iter_rows(named=True):
+                if (
+                    row["task_type"] == "classification"
+                    and row["instance_id"] == -1
+                ):
+                    cname = row["class_name"]
+                    if cname:
+                        class_name = str(cname)
+                        break  # Take first classification label
+
+            if class_name is None:
+                continue
+
+            self.split_image_counter[split] += 1
+            idx = self.split_image_counter[split]
+
+            new_name = f"{idx:06d}{file_path.suffix}"
+
+            target_dir = self._get_data_path(
+                self.output_path, split, self.part
+            )
+            target_dir.mkdir(parents=True, exist_ok=True)
+
+            dest = target_dir / new_name
+            pair_key = (file_path, str(dest))
+
+            if pair_key not in copied_pairs:
+                copied_pairs.add(pair_key)
+                if dest != file_path:
+                    dest.write_bytes(file_path.read_bytes())
+
+            # Store label mapping (without extension, just the padded number)
+            label_key = f"{idx:06d}"
+            self.split_labels[split][label_key] = self.class_to_idx[class_name]
+
+        self._dump_annotations(
+            {"classes": sorted_classes, "split_labels": self.split_labels},
+            self.output_path,
+            self.part,
+        )
+
+    def _dump_annotations(
+        self,
+        annotation_data: dict[str, Any],
+        output_path: Path,
+        part: int | None = None,
+    ) -> None:
+        classes = annotation_data["classes"]
+        split_labels = annotation_data["split_labels"]
+
+        for split_name, labels in split_labels.items():
+            if not labels:
+                continue
+
+            save_name = self.get_split_names().get(split_name, split_name)
+            base = (
+                output_path / f"{self.dataset_identifier}_part{part}"
+                if part is not None
+                else output_path / self.dataset_identifier
+            )
+            split_path = base / (
+                save_name if save_name is not None else str(split_name)
+            )
+            split_path.mkdir(parents=True, exist_ok=True)
+
+            labels_data = {
+                "classes": classes,
+                "labels": labels,
+            }
+            (split_path / "labels.json").write_text(
+                json.dumps(labels_data), encoding="utf-8"
+            )
+
+    def _get_data_path(
+        self, output_path: Path, split: str, part: int | None = None
+    ) -> Path:
+        split_name = self.get_split_names().get(split, split)
+        base = (
+            output_path / f"{self.dataset_identifier}_part{part}"
+            if part is not None
+            else output_path / self.dataset_identifier
+        )
+        return base / split_name / "data"
@@ -3,6 +3,7 @@
 from .coco_parser import COCOParser
 from .create_ml_parser import CreateMLParser
 from .darknet_parser import DarknetParser
+from .fiftyone_classification_parser import FiftyOneClassificationParser
 from .luxonis_parser import LuxonisParser
 from .segmentation_mask_directory_parser import SegmentationMaskDirectoryParser
 from .solo_parser import SOLOParser
@@ -18,6 +19,7 @@
     "ClassificationDirectoryParser",
     "CreateMLParser",
     "DarknetParser",
+    "FiftyOneClassificationParser",
     "LuxonisParser",
     "SOLOParser",
     "SegmentationMaskDirectoryParser",
 
@@ -9,7 +9,9 @@
 class ClassificationDirectoryParser(BaseParser):
     """Parses directory with ClassificationDirectory annotations to LDF.
 
-    Expected format::
+    Supports two directory structures:
+
+    Split structure with train/valid/test subdirectories::
 
         dataset_dir/
         ├── train/
@@ -22,7 +24,18 @@ class ClassificationDirectoryParser(BaseParser):
         ├── valid/
         └── test/
 
-    This is one of the formats that can be generated by
+    Flat structure (class subdirectories directly in root,
+    random splits applied at parse time)::
+
+        dataset_dir/
+        ├── class1/
+        │   ├── img1.jpg
+        │   └── ...
+        ├── class2/
+        │   └── ...
+        └── info.json  (optional metadata file)
+
+    The split structure is one of the formats that can be generated by
     U{Roboflow <https://roboflow.com/>}.
     """
 
@@ -34,11 +47,25 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
             d
             for d in split_path.iterdir()
             if d.is_dir()
-            and d.name not in {"train", "valid", "test", "images", "labels"}
+            and d.name
+            not in {
+                "train",
+                "valid",
+                "test",
+                "val",
+                "validation",
+                "images",
+                "labels",
+            }
         ]
         if not classes:
             return None
-        fnames = [f for f in split_path.iterdir() if f.is_file()]
+        # For now allow info.json, can be extended to other metadata files
+        fnames = [
+            f
+            for f in split_path.iterdir()
+            if f.is_file() and f.name not in ["info.json"]
+        ]
         if fnames:
             return None
         return {"class_dir": split_path}
@@ -52,9 +79,9 @@ def from_dir(
         return added_train_imgs, added_val_imgs, added_test_imgs
 
     def from_split(self, class_dir: Path) -> ParserOutput:
-        """Parses annotations from classification directory format to
-        LDF. Annotations include classification.
+        """Parses annotations from classification directory format to.
 
+        LDF. Annotations include classification
         @type class_dir: Path
         @param class_dir: Path to top level directory
         @rtype: L{ParserOutput}