Skip to content

Commit 6fb9b0a

Browse files
authored
Feat/fiftyone classification (#379)
1 parent 78600da commit 6fb9b0a

File tree

12 files changed

+454
-21
lines changed

12 files changed

+454
-21
lines changed

luxonis_ml/data/README.md

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -557,19 +557,68 @@ The supported formats are:
557557
└── test/
558558
```
559559

560-
- **Classification Directory** - A directory with subdirectories for each class
560+
- **Classification Directory** - A directory with subdirectories for each class. Two structures are supported:
561561

562-
```plaintext
563-
dataset_dir/
564-
├── train/
565-
│ ├── class1/
566-
│ │ ├── img1.jpg
567-
│ │ ├── img2.jpg
568-
│ │ └── ...
569-
│ ├── class2/
570-
│ └── ...
571-
├── valid/
572-
└── test/
562+
- Split structure with train/valid/test subdirectories:
563+
```plaintext
564+
dataset_dir/
565+
├── train/
566+
│ ├── class1/
567+
│ │ ├── img1.jpg
568+
│ │ ├── img2.jpg
569+
│ │ └── ...
570+
│ ├── class2/
571+
│ └── ...
572+
├── valid/
573+
└── test/
574+
```
575+
- Flat structure (class subdirectories directly in root, random splits applied at parse time):
576+
```plaintext
577+
dataset_dir/
578+
├── class1/
579+
│ ├── img1.jpg
580+
│ └── ...
581+
├── class2/
582+
│ └── ...
583+
└── info.json (optional metadata file)
584+
```
585+
586+
- [**FiftyOne Classification**](https://docs.voxel51.com/user_guide/export_datasets.html#fiftyone-image-classification-dataset) - FiftyOneImageClassificationDataset format with images in a `data/` folder and labels in `labels.json`. Two structures are supported:
587+
588+
- Split structure with train/validation/test subdirectories:
589+
```plaintext
590+
dataset_dir/
591+
├── train/
592+
│ ├── data/
593+
│ │ ├── img1.jpg
594+
│ │ └── ...
595+
│ └── labels.json
596+
├── validation/
597+
│ ├── data/
598+
│ └── labels.json
599+
└── test/
600+
├── data/
601+
└── labels.json
602+
```
603+
- Flat structure (random splits applied at parse time):
604+
```plaintext
605+
dataset_dir/
606+
├── data/
607+
│ ├── img1.jpg
608+
│ └── ...
609+
└── labels.json
610+
```
611+
612+
The `labels.json` format:
613+
614+
```json
615+
{
616+
"classes": ["class1", "class2", ...],
617+
"labels": {
618+
"image_stem": class_index,
619+
...
620+
}
621+
}
573622
```
574623

575624
- **Segmentation Mask Directory** - A directory with images and corresponding masks.

luxonis_ml/data/datasets/luxonis_dataset.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
CocoExporter,
2424
CreateMLExporter,
2525
DarknetExporter,
26+
FiftyOneClassificationExporter,
2627
NativeExporter,
2728
PreparedLDF,
2829
SegmentationMaskDirectoryExporter,
@@ -1543,6 +1544,9 @@ def export(
15431544
DatasetType.CLSDIR: ExporterSpec(
15441545
ClassificationDirectoryExporter, {}
15451546
),
1547+
DatasetType.FIFTYONECLS: ExporterSpec(
1548+
FiftyOneClassificationExporter, {}
1549+
),
15461550
DatasetType.SEGMASK: ExporterSpec(
15471551
SegmentationMaskDirectoryExporter, {}
15481552
),

luxonis_ml/data/exporters/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from .createml_exporter import CreateMLExporter
55
from .darknet_exporter import DarknetExporter
66
from .exporter_utils import PreparedLDF
7+
from .fiftyone_classification_exporter import FiftyOneClassificationExporter
78
from .native_exporter import NativeExporter
89
from .segmentation_mask_directory_exporter import (
910
SegmentationMaskDirectoryExporter,
@@ -24,6 +25,7 @@
2425
"CocoExporter",
2526
"CreateMLExporter",
2627
"DarknetExporter",
28+
"FiftyOneClassificationExporter",
2729
"NativeExporter",
2830
"PreparedLDF",
2931
"SegmentationMaskDirectoryExporter",
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
from __future__ import annotations
2+
3+
import json
4+
from pathlib import Path
5+
from typing import Any, cast
6+
7+
from luxonis_ml.data.exporters.base_exporter import BaseExporter
8+
from luxonis_ml.data.exporters.exporter_utils import (
9+
PreparedLDF,
10+
check_group_file_correspondence,
11+
exporter_specific_annotation_warning,
12+
split_of_group,
13+
)
14+
15+
16+
class FiftyOneClassificationExporter(BaseExporter):
17+
"""Output structure::
18+
19+
<dataset_name>/
20+
train/
21+
data/
22+
000001.jpg
23+
000002.jpg
24+
...
25+
labels.json
26+
val/
27+
data/
28+
...
29+
labels.json
30+
test/
31+
data/
32+
...
33+
labels.json
34+
35+
The labels.json has structure::
36+
37+
E{lb}
38+
"classes": ["class1", "class2", ...],
39+
"labels": E{lb}
40+
"000001": 0, # index into classes array
41+
"000002": 1,
42+
...
43+
E{rb}
44+
E{rb}
45+
"""
46+
47+
def __init__(
48+
self,
49+
dataset_identifier: str,
50+
output_path: Path,
51+
max_partition_size_gb: float | None,
52+
):
53+
super().__init__(
54+
dataset_identifier, output_path, max_partition_size_gb
55+
)
56+
self.class_to_idx: dict[str, int] = {}
57+
self.split_labels: dict[str, dict[str, int]] = {}
58+
self.split_image_counter: dict[str, int] = {}
59+
60+
def get_split_names(self) -> dict[str, str]:
61+
return {"train": "train", "val": "validation", "test": "test"}
62+
63+
def supported_ann_types(self) -> list[str]:
64+
return ["classification"]
65+
66+
def export(self, prepared_ldf: PreparedLDF) -> None:
67+
check_group_file_correspondence(prepared_ldf)
68+
exporter_specific_annotation_warning(
69+
prepared_ldf, self.supported_ann_types()
70+
)
71+
72+
for split in self.get_split_names():
73+
self.split_labels[split] = {}
74+
self.split_image_counter[split] = 0
75+
76+
all_classes: set[str] = set()
77+
for row in prepared_ldf.processed_df.iter_rows(named=True):
78+
if (
79+
row["task_type"] == "classification"
80+
and row["instance_id"] == -1
81+
):
82+
cname = row["class_name"]
83+
if cname:
84+
all_classes.add(str(cname))
85+
86+
sorted_classes = sorted(all_classes)
87+
self.class_to_idx = {
88+
cls: idx for idx, cls in enumerate(sorted_classes)
89+
}
90+
91+
grouped = prepared_ldf.processed_df.group_by(
92+
["file", "group_id"], maintain_order=True
93+
)
94+
95+
copied_pairs: set[tuple[Path, str]] = set()
96+
97+
for key, entry in grouped:
98+
file_name, group_id = cast(tuple[str, Any], key)
99+
file_path = Path(str(file_name))
100+
101+
split = split_of_group(prepared_ldf, group_id)
102+
103+
class_name: str | None = None
104+
for row in entry.iter_rows(named=True):
105+
if (
106+
row["task_type"] == "classification"
107+
and row["instance_id"] == -1
108+
):
109+
cname = row["class_name"]
110+
if cname:
111+
class_name = str(cname)
112+
break # Take first classification label
113+
114+
if class_name is None:
115+
continue
116+
117+
self.split_image_counter[split] += 1
118+
idx = self.split_image_counter[split]
119+
120+
new_name = f"{idx:06d}{file_path.suffix}"
121+
122+
target_dir = self._get_data_path(
123+
self.output_path, split, self.part
124+
)
125+
target_dir.mkdir(parents=True, exist_ok=True)
126+
127+
dest = target_dir / new_name
128+
pair_key = (file_path, str(dest))
129+
130+
if pair_key not in copied_pairs:
131+
copied_pairs.add(pair_key)
132+
if dest != file_path:
133+
dest.write_bytes(file_path.read_bytes())
134+
135+
# Store label mapping (without extension, just the padded number)
136+
label_key = f"{idx:06d}"
137+
self.split_labels[split][label_key] = self.class_to_idx[class_name]
138+
139+
self._dump_annotations(
140+
{"classes": sorted_classes, "split_labels": self.split_labels},
141+
self.output_path,
142+
self.part,
143+
)
144+
145+
def _dump_annotations(
146+
self,
147+
annotation_data: dict[str, Any],
148+
output_path: Path,
149+
part: int | None = None,
150+
) -> None:
151+
classes = annotation_data["classes"]
152+
split_labels = annotation_data["split_labels"]
153+
154+
for split_name, labels in split_labels.items():
155+
if not labels:
156+
continue
157+
158+
save_name = self.get_split_names().get(split_name, split_name)
159+
base = (
160+
output_path / f"{self.dataset_identifier}_part{part}"
161+
if part is not None
162+
else output_path / self.dataset_identifier
163+
)
164+
split_path = base / (
165+
save_name if save_name is not None else str(split_name)
166+
)
167+
split_path.mkdir(parents=True, exist_ok=True)
168+
169+
labels_data = {
170+
"classes": classes,
171+
"labels": labels,
172+
}
173+
(split_path / "labels.json").write_text(
174+
json.dumps(labels_data), encoding="utf-8"
175+
)
176+
177+
def _get_data_path(
178+
self, output_path: Path, split: str, part: int | None = None
179+
) -> Path:
180+
split_name = self.get_split_names().get(split, split)
181+
base = (
182+
output_path / f"{self.dataset_identifier}_part{part}"
183+
if part is not None
184+
else output_path / self.dataset_identifier
185+
)
186+
return base / split_name / "data"

luxonis_ml/data/parsers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from .coco_parser import COCOParser
44
from .create_ml_parser import CreateMLParser
55
from .darknet_parser import DarknetParser
6+
from .fiftyone_classification_parser import FiftyOneClassificationParser
67
from .luxonis_parser import LuxonisParser
78
from .segmentation_mask_directory_parser import SegmentationMaskDirectoryParser
89
from .solo_parser import SOLOParser
@@ -18,6 +19,7 @@
1819
"ClassificationDirectoryParser",
1920
"CreateMLParser",
2021
"DarknetParser",
22+
"FiftyOneClassificationParser",
2123
"LuxonisParser",
2224
"SOLOParser",
2325
"SegmentationMaskDirectoryParser",

luxonis_ml/data/parsers/classification_directory_parser.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
class ClassificationDirectoryParser(BaseParser):
1010
"""Parses directory with ClassificationDirectory annotations to LDF.
1111
12-
Expected format::
12+
Supports two directory structures:
13+
14+
Split structure with train/valid/test subdirectories::
1315
1416
dataset_dir/
1517
├── train/
@@ -22,7 +24,18 @@ class ClassificationDirectoryParser(BaseParser):
2224
├── valid/
2325
└── test/
2426
25-
This is one of the formats that can be generated by
27+
Flat structure (class subdirectories directly in root,
28+
random splits applied at parse time)::
29+
30+
dataset_dir/
31+
├── class1/
32+
│ ├── img1.jpg
33+
│ └── ...
34+
├── class2/
35+
│ └── ...
36+
└── info.json (optional metadata file)
37+
38+
The split structure is one of the formats that can be generated by
2639
U{Roboflow <https://roboflow.com/>}.
2740
"""
2841

@@ -34,11 +47,25 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
3447
d
3548
for d in split_path.iterdir()
3649
if d.is_dir()
37-
and d.name not in {"train", "valid", "test", "images", "labels"}
50+
and d.name
51+
not in {
52+
"train",
53+
"valid",
54+
"test",
55+
"val",
56+
"validation",
57+
"images",
58+
"labels",
59+
}
3860
]
3961
if not classes:
4062
return None
41-
fnames = [f for f in split_path.iterdir() if f.is_file()]
63+
# For now allow info.json, can be extended to other metadata files
64+
fnames = [
65+
f
66+
for f in split_path.iterdir()
67+
if f.is_file() and f.name not in ["info.json"]
68+
]
4269
if fnames:
4370
return None
4471
return {"class_dir": split_path}
@@ -52,9 +79,9 @@ def from_dir(
5279
return added_train_imgs, added_val_imgs, added_test_imgs
5380

5481
def from_split(self, class_dir: Path) -> ParserOutput:
55-
"""Parses annotations from classification directory format to
56-
LDF. Annotations include classification.
82+
"""Parses annotations from classification directory format to.
5783
84+
LDF. Annotations include classification
5885
@type class_dir: Path
5986
@param class_dir: Path to top level directory
6087
@rtype: L{ParserOutput}

0 commit comments

Comments
 (0)