Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions luxonis_ml/data/datasets/luxonis_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
CocoExporter,
CreateMLExporter,
DarknetExporter,
FiftyOneClassificationExporter,
NativeExporter,
PreparedLDF,
SegmentationMaskDirectoryExporter,
Expand Down Expand Up @@ -1543,6 +1544,9 @@ def export(
DatasetType.CLSDIR: ExporterSpec(
ClassificationDirectoryExporter, {}
),
DatasetType.FIFTYONECLS: ExporterSpec(
FiftyOneClassificationExporter, {}
),
DatasetType.SEGMASK: ExporterSpec(
SegmentationMaskDirectoryExporter, {}
),
Expand Down
2 changes: 2 additions & 0 deletions luxonis_ml/data/exporters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from .createml_exporter import CreateMLExporter
from .darknet_exporter import DarknetExporter
from .exporter_utils import PreparedLDF
from .fiftyone_classification_exporter import FiftyOneClassificationExporter
from .native_exporter import NativeExporter
from .segmentation_mask_directory_exporter import (
SegmentationMaskDirectoryExporter,
Expand All @@ -24,6 +25,7 @@
"CocoExporter",
"CreateMLExporter",
"DarknetExporter",
"FiftyOneClassificationExporter",
"NativeExporter",
"PreparedLDF",
"SegmentationMaskDirectoryExporter",
Expand Down
186 changes: 186 additions & 0 deletions luxonis_ml/data/exporters/fiftyone_classification_exporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
from __future__ import annotations

import json
from pathlib import Path
from typing import Any, cast

from luxonis_ml.data.exporters.base_exporter import BaseExporter
from luxonis_ml.data.exporters.exporter_utils import (
PreparedLDF,
check_group_file_correspondence,
exporter_specific_annotation_warning,
split_of_group,
)


class FiftyOneClassificationExporter(BaseExporter):
"""Output structure::

<dataset_name>/
train/
data/
000001.jpg
000002.jpg
...
labels.json
val/
data/
...
labels.json
test/
data/
...
labels.json

The labels.json has structure::

E{lb}
"classes": ["class1", "class2", ...],
"labels": E{lb}
"000001": 0, # index into classes array
"000002": 1,
...
E{rb}
E{rb}
"""

def __init__(
self,
dataset_identifier: str,
output_path: Path,
max_partition_size_gb: float | None,
):
super().__init__(
dataset_identifier, output_path, max_partition_size_gb
)
self.class_to_idx: dict[str, int] = {}
self.split_labels: dict[str, dict[str, int]] = {}
self.split_image_counter: dict[str, int] = {}

def get_split_names(self) -> dict[str, str]:
return {"train": "train", "val": "validation", "test": "test"}

def supported_ann_types(self) -> list[str]:
return ["classification"]

def export(self, prepared_ldf: PreparedLDF) -> None:
check_group_file_correspondence(prepared_ldf)
exporter_specific_annotation_warning(
prepared_ldf, self.supported_ann_types()
)

for split in self.get_split_names():
self.split_labels[split] = {}
self.split_image_counter[split] = 0

all_classes: set[str] = set()
for row in prepared_ldf.processed_df.iter_rows(named=True):
if (
row["task_type"] == "classification"
and row["instance_id"] == -1
):
cname = row["class_name"]
if cname:
all_classes.add(str(cname))

sorted_classes = sorted(all_classes)
self.class_to_idx = {
cls: idx for idx, cls in enumerate(sorted_classes)
}

grouped = prepared_ldf.processed_df.group_by(
["file", "group_id"], maintain_order=True
)

copied_pairs: set[tuple[Path, str]] = set()

for key, entry in grouped:
file_name, group_id = cast(tuple[str, Any], key)
file_path = Path(str(file_name))

split = split_of_group(prepared_ldf, group_id)

class_name: str | None = None
for row in entry.iter_rows(named=True):
if (
row["task_type"] == "classification"
and row["instance_id"] == -1
):
cname = row["class_name"]
if cname:
class_name = str(cname)
break # Take first classification label

if class_name is None:
continue

self.split_image_counter[split] += 1
idx = self.split_image_counter[split]

new_name = f"{idx:06d}{file_path.suffix}"

target_dir = self._get_data_path(
self.output_path, split, self.part
)
target_dir.mkdir(parents=True, exist_ok=True)

dest = target_dir / new_name
pair_key = (file_path, str(dest))

if pair_key not in copied_pairs:
copied_pairs.add(pair_key)
if dest != file_path:
dest.write_bytes(file_path.read_bytes())

# Store label mapping (without extension, just the padded number)
label_key = f"{idx:06d}"
self.split_labels[split][label_key] = self.class_to_idx[class_name]

self._dump_annotations(
{"classes": sorted_classes, "split_labels": self.split_labels},
self.output_path,
self.part,
)

def _dump_annotations(
self,
annotation_data: dict[str, Any],
output_path: Path,
part: int | None = None,
) -> None:
classes = annotation_data["classes"]
split_labels = annotation_data["split_labels"]

for split_name, labels in split_labels.items():
if not labels:
continue

save_name = self.get_split_names().get(split_name, split_name)
base = (
output_path / f"{self.dataset_identifier}_part{part}"
if part is not None
else output_path / self.dataset_identifier
)
split_path = base / (
save_name if save_name is not None else str(split_name)
)
split_path.mkdir(parents=True, exist_ok=True)

labels_data = {
"classes": classes,
"labels": labels,
}
(split_path / "labels.json").write_text(
json.dumps(labels_data), encoding="utf-8"
)

def _get_data_path(
self, output_path: Path, split: str, part: int | None = None
) -> Path:
split_name = self.get_split_names().get(split, split)
base = (
output_path / f"{self.dataset_identifier}_part{part}"
if part is not None
else output_path / self.dataset_identifier
)
return base / split_name / "data"
2 changes: 2 additions & 0 deletions luxonis_ml/data/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .coco_parser import COCOParser
from .create_ml_parser import CreateMLParser
from .darknet_parser import DarknetParser
from .fiftyone_classification_parser import FiftyOneClassificationParser
from .luxonis_parser import LuxonisParser
from .segmentation_mask_directory_parser import SegmentationMaskDirectoryParser
from .solo_parser import SOLOParser
Expand All @@ -18,6 +19,7 @@
"ClassificationDirectoryParser",
"CreateMLParser",
"DarknetParser",
"FiftyOneClassificationParser",
"LuxonisParser",
"SOLOParser",
"SegmentationMaskDirectoryParser",
Expand Down
39 changes: 33 additions & 6 deletions luxonis_ml/data/parsers/classification_directory_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
class ClassificationDirectoryParser(BaseParser):
"""Parses directory with ClassificationDirectory annotations to LDF.

Expected format::
Supports two directory structures:

Split structure with train/valid/test subdirectories::

dataset_dir/
├── train/
Expand All @@ -22,7 +24,18 @@ class ClassificationDirectoryParser(BaseParser):
├── valid/
└── test/

This is one of the formats that can be generated by
Flat structure (class subdirectories directly in root,
random splits applied at parse time)::

dataset_dir/
├── class1/
│ ├── img1.jpg
│ └── ...
├── class2/
│ └── ...
└── info.json (optional metadata file)

The split structure is one of the formats that can be generated by
U{Roboflow <https://roboflow.com/>}.
"""

Expand All @@ -34,11 +47,25 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
d
for d in split_path.iterdir()
if d.is_dir()
and d.name not in {"train", "valid", "test", "images", "labels"}
and d.name
not in {
"train",
"valid",
"test",
"val",
"valdation",
"images",
"labels",
}
]
if not classes:
return None
fnames = [f for f in split_path.iterdir() if f.is_file()]
# For now allow info.json, can be extended to other metadata files
fnames = [
f
for f in split_path.iterdir()
if f.is_file() and f.name not in ["info.json"]
]
if fnames:
return None
return {"class_dir": split_path}
Expand All @@ -52,9 +79,9 @@ def from_dir(
return added_train_imgs, added_val_imgs, added_test_imgs

def from_split(self, class_dir: Path) -> ParserOutput:
"""Parses annotations from classification directory format to
LDF. Annotations include classification.
"""Parses annotations from classification directory format to.

LDF. Annotations include classification
@type class_dir: Path
@param class_dir: Path to top level directory
@rtype: L{ParserOutput}
Expand Down
20 changes: 20 additions & 0 deletions luxonis_ml/data/parsers/coco_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,22 @@ def _detect_dataset_dir_format(
return COCOFormat.ROBOFLOW, rf
return None, []

@staticmethod
def _is_coco_json(json_path: Path) -> bool:
"""Check if JSON file has required COCO format fields."""
try:
with open(json_path) as f:
data = json.load(f)
# Distinguish between COCO and FiftyOne classification as they both have labels.json files
return (
isinstance(data, dict)
and "images" in data
and "annotations" in data
and "categories" in data
)
except (json.JSONDecodeError, OSError):
return False

@staticmethod
def validate_split(split_path: Path) -> dict[str, Any] | None:
if not split_path.exists():
Expand All @@ -79,9 +95,13 @@ def validate_split(split_path: Path) -> dict[str, Any] | None:
if not json_path:
return None
if json_path.name == "_annotations.coco.json":
if not COCOParser._is_coco_json(json_path):
return None
logger.info("Identified Roboflow format")
image_dir = split_path
return {"image_dir": image_dir, "annotation_path": json_path}
if not COCOParser._is_coco_json(json_path):
return None
logger.info("Identified FiftyOne format")
dirs = [d for d in split_path.iterdir() if d.is_dir()]
if len(dirs) != 1:
Expand Down
Loading
Loading