Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 61 additions & 12 deletions luxonis_ml/data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -557,19 +557,68 @@ The supported formats are:
└── test/
```

- **Classification Directory** - A directory with subdirectories for each class
- **Classification Directory** - A directory with subdirectories for each class. Two structures are supported:

```plaintext
dataset_dir/
├── train/
│ ├── class1/
│ │ ├── img1.jpg
│ │ ├── img2.jpg
│ │ └── ...
│ ├── class2/
│ └── ...
├── valid/
└── test/
- Split structure with train/valid/test subdirectories:
```plaintext
dataset_dir/
├── train/
│ ├── class1/
│ │ ├── img1.jpg
│ │ ├── img2.jpg
│ │ └── ...
│ ├── class2/
│ └── ...
├── valid/
└── test/
```
- Flat structure (class subdirectories directly in root, random splits applied at parse time):
```plaintext
dataset_dir/
├── class1/
│ ├── img1.jpg
│ └── ...
├── class2/
│ └── ...
└── info.json (optional metadata file)
```

- [**FiftyOne Classification**](https://docs.voxel51.com/user_guide/export_datasets.html#fiftyone-image-classification-dataset) - FiftyOneImageClassificationDataset format with images in a `data/` folder and labels in `labels.json`. Two structures are supported:

- Split structure with train/validation/test subdirectories:
```plaintext
dataset_dir/
├── train/
│ ├── data/
│ │ ├── img1.jpg
│ │ └── ...
│ └── labels.json
├── validation/
│ ├── data/
│ └── labels.json
└── test/
├── data/
└── labels.json
```
- Flat structure (random splits applied at parse time):
```plaintext
dataset_dir/
├── data/
│ ├── img1.jpg
│ └── ...
└── labels.json
```

The `labels.json` format:

```json
{
"classes": ["class1", "class2", ...],
"labels": {
"image_stem": class_index,
...
}
}
```

- **Segmentation Mask Directory** - A directory with images and corresponding masks.
Expand Down
4 changes: 4 additions & 0 deletions luxonis_ml/data/datasets/luxonis_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
CocoExporter,
CreateMLExporter,
DarknetExporter,
FiftyOneClassificationExporter,
NativeExporter,
PreparedLDF,
SegmentationMaskDirectoryExporter,
Expand Down Expand Up @@ -1543,6 +1544,9 @@ def export(
DatasetType.CLSDIR: ExporterSpec(
ClassificationDirectoryExporter, {}
),
DatasetType.FIFTYONECLS: ExporterSpec(
FiftyOneClassificationExporter, {}
),
DatasetType.SEGMASK: ExporterSpec(
SegmentationMaskDirectoryExporter, {}
),
Expand Down
2 changes: 2 additions & 0 deletions luxonis_ml/data/exporters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from .createml_exporter import CreateMLExporter
from .darknet_exporter import DarknetExporter
from .exporter_utils import PreparedLDF
from .fiftyone_classification_exporter import FiftyOneClassificationExporter
from .native_exporter import NativeExporter
from .segmentation_mask_directory_exporter import (
SegmentationMaskDirectoryExporter,
Expand All @@ -24,6 +25,7 @@
"CocoExporter",
"CreateMLExporter",
"DarknetExporter",
"FiftyOneClassificationExporter",
"NativeExporter",
"PreparedLDF",
"SegmentationMaskDirectoryExporter",
Expand Down
186 changes: 186 additions & 0 deletions luxonis_ml/data/exporters/fiftyone_classification_exporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
from __future__ import annotations

import json
from pathlib import Path
from typing import Any, cast

from luxonis_ml.data.exporters.base_exporter import BaseExporter
from luxonis_ml.data.exporters.exporter_utils import (
PreparedLDF,
check_group_file_correspondence,
exporter_specific_annotation_warning,
split_of_group,
)


class FiftyOneClassificationExporter(BaseExporter):
"""Output structure::

<dataset_name>/
train/
data/
000001.jpg
000002.jpg
...
labels.json
val/
data/
...
labels.json
test/
data/
...
labels.json

The labels.json has structure::

E{lb}
"classes": ["class1", "class2", ...],
"labels": E{lb}
"000001": 0, # index into classes array
"000002": 1,
...
E{rb}
E{rb}
"""

def __init__(
self,
dataset_identifier: str,
output_path: Path,
max_partition_size_gb: float | None,
):
super().__init__(
dataset_identifier, output_path, max_partition_size_gb
)
self.class_to_idx: dict[str, int] = {}
self.split_labels: dict[str, dict[str, int]] = {}
self.split_image_counter: dict[str, int] = {}

def get_split_names(self) -> dict[str, str]:
return {"train": "train", "val": "validation", "test": "test"}

def supported_ann_types(self) -> list[str]:
return ["classification"]

def export(self, prepared_ldf: PreparedLDF) -> None:
check_group_file_correspondence(prepared_ldf)
exporter_specific_annotation_warning(
prepared_ldf, self.supported_ann_types()
)

for split in self.get_split_names():
self.split_labels[split] = {}
self.split_image_counter[split] = 0

all_classes: set[str] = set()
for row in prepared_ldf.processed_df.iter_rows(named=True):
if (
row["task_type"] == "classification"
and row["instance_id"] == -1
):
cname = row["class_name"]
if cname:
all_classes.add(str(cname))

sorted_classes = sorted(all_classes)
self.class_to_idx = {
cls: idx for idx, cls in enumerate(sorted_classes)
}

grouped = prepared_ldf.processed_df.group_by(
["file", "group_id"], maintain_order=True
)

copied_pairs: set[tuple[Path, str]] = set()

for key, entry in grouped:
file_name, group_id = cast(tuple[str, Any], key)
file_path = Path(str(file_name))

split = split_of_group(prepared_ldf, group_id)

class_name: str | None = None
for row in entry.iter_rows(named=True):
if (
row["task_type"] == "classification"
and row["instance_id"] == -1
):
cname = row["class_name"]
if cname:
class_name = str(cname)
break # Take first classification label

if class_name is None:
continue

self.split_image_counter[split] += 1
idx = self.split_image_counter[split]

new_name = f"{idx:06d}{file_path.suffix}"

target_dir = self._get_data_path(
self.output_path, split, self.part
)
target_dir.mkdir(parents=True, exist_ok=True)

dest = target_dir / new_name
pair_key = (file_path, str(dest))

if pair_key not in copied_pairs:
copied_pairs.add(pair_key)
if dest != file_path:
dest.write_bytes(file_path.read_bytes())

# Store label mapping (without extension, just the padded number)
label_key = f"{idx:06d}"
self.split_labels[split][label_key] = self.class_to_idx[class_name]

self._dump_annotations(
{"classes": sorted_classes, "split_labels": self.split_labels},
self.output_path,
self.part,
)

def _dump_annotations(
self,
annotation_data: dict[str, Any],
output_path: Path,
part: int | None = None,
) -> None:
classes = annotation_data["classes"]
split_labels = annotation_data["split_labels"]

for split_name, labels in split_labels.items():
if not labels:
continue

save_name = self.get_split_names().get(split_name, split_name)
base = (
output_path / f"{self.dataset_identifier}_part{part}"
if part is not None
else output_path / self.dataset_identifier
)
split_path = base / (
save_name if save_name is not None else str(split_name)
)
split_path.mkdir(parents=True, exist_ok=True)

labels_data = {
"classes": classes,
"labels": labels,
}
(split_path / "labels.json").write_text(
json.dumps(labels_data), encoding="utf-8"
)

def _get_data_path(
self, output_path: Path, split: str, part: int | None = None
) -> Path:
split_name = self.get_split_names().get(split, split)
base = (
output_path / f"{self.dataset_identifier}_part{part}"
if part is not None
else output_path / self.dataset_identifier
)
return base / split_name / "data"
2 changes: 2 additions & 0 deletions luxonis_ml/data/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .coco_parser import COCOParser
from .create_ml_parser import CreateMLParser
from .darknet_parser import DarknetParser
from .fiftyone_classification_parser import FiftyOneClassificationParser
from .luxonis_parser import LuxonisParser
from .segmentation_mask_directory_parser import SegmentationMaskDirectoryParser
from .solo_parser import SOLOParser
Expand All @@ -18,6 +19,7 @@
"ClassificationDirectoryParser",
"CreateMLParser",
"DarknetParser",
"FiftyOneClassificationParser",
"LuxonisParser",
"SOLOParser",
"SegmentationMaskDirectoryParser",
Expand Down
39 changes: 33 additions & 6 deletions luxonis_ml/data/parsers/classification_directory_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
class ClassificationDirectoryParser(BaseParser):
"""Parses directory with ClassificationDirectory annotations to LDF.

Expected format::
Supports two directory structures:

Split structure with train/valid/test subdirectories::

dataset_dir/
├── train/
Expand All @@ -22,7 +24,18 @@ class ClassificationDirectoryParser(BaseParser):
├── valid/
└── test/

This is one of the formats that can be generated by
Flat structure (class subdirectories directly in root,
random splits applied at parse time)::

dataset_dir/
├── class1/
│ ├── img1.jpg
│ └── ...
├── class2/
│ └── ...
└── info.json (optional metadata file)

The split structure is one of the formats that can be generated by
U{Roboflow <https://roboflow.com/>}.
"""

Expand All @@ -34,11 +47,25 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
d
for d in split_path.iterdir()
if d.is_dir()
and d.name not in {"train", "valid", "test", "images", "labels"}
and d.name
not in {
"train",
"valid",
"test",
"val",
"valdation",
"images",
"labels",
}
]
if not classes:
return None
fnames = [f for f in split_path.iterdir() if f.is_file()]
# For now allow info.json, can be extended to other metadata files
fnames = [
f
for f in split_path.iterdir()
if f.is_file() and f.name not in ["info.json"]
]
if fnames:
return None
return {"class_dir": split_path}
Expand All @@ -52,9 +79,9 @@ def from_dir(
return added_train_imgs, added_val_imgs, added_test_imgs

def from_split(self, class_dir: Path) -> ParserOutput:
"""Parses annotations from classification directory format to
LDF. Annotations include classification.
"""Parses annotations from classification directory format to.

LDF. Annotations include classification
@type class_dir: Path
@param class_dir: Path to top level directory
@rtype: L{ParserOutput}
Expand Down
Loading
Loading