Skip to content

Commit 8739124

Browse files
committed
More changes and extensions to existing exporters and tests
1 parent b719107 commit 8739124

File tree

9 files changed

+335
-146
lines changed

9 files changed

+335
-146
lines changed

luxonis_ml/data/datasets/luxonis_dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,8 +1544,8 @@ def export(
15441544
SegmentationMaskDirectoryExporter, {}
15451545
),
15461546
DatasetType.VOC: ExporterSpec(VOCExporter, {}),
1547-
DatasetType.CREATEML: CreateMLExporter,
1548-
DatasetType.TFCSV: TensorflowCSVExporter,
1547+
DatasetType.CREATEML: ExporterSpec(CreateMLExporter, {}),
1548+
DatasetType.TFCSV: ExporterSpec(TensorflowCSVExporter, {}),
15491549
}
15501550
spec = EXPORTER_MAP.get(dataset_type)
15511551
if spec is None:

luxonis_ml/data/exporters/base_exporter.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from abc import ABC, abstractmethod
44
from pathlib import Path
5-
from typing import TYPE_CHECKING, Any
5+
from typing import TYPE_CHECKING
66

77
if TYPE_CHECKING:
88
from luxonis_ml.data.exporters.exporter_utils import PreparedLDF
@@ -30,9 +30,7 @@ def __init__(
3030
self.current_size = 0
3131

3232
@abstractmethod
33-
def transform(
34-
self, prepared_ldf: PreparedLDF
35-
) -> dict[str, list[dict[str, Any]]]:
33+
def transform(self, prepared_ldf: PreparedLDF) -> None:
3634
"""Convert the prepared dataset into the exporter's format."""
3735
raise NotImplementedError
3836

luxonis_ml/data/exporters/createml_exporter.py

Lines changed: 60 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,6 @@
1111

1212

1313
class CreateMLExporter(BaseExporter):
14-
"""Exports LDF to CreateML format.
15-
16-
Output structure:
17-
18-
output/
19-
└── <dataset_identifier>[_partN]/
20-
├── train/
21-
│ ├── 0.jpg
22-
│ ├── 1.jpg
23-
│ └── _annotations.createml.json
24-
├── valid/
25-
│ └── ...
26-
└── test/
27-
└── ...
28-
29-
Notes:
30-
- CreateML uses center-based pixel coordinates.
31-
- This exporter converts from normalized TL (x,y,w,h) to pixel centers.
32-
"""
33-
3414
def __init__(
3515
self,
3616
dataset_identifier: str,
@@ -74,44 +54,17 @@ def transform(self, prepared_ldf: PreparedLDF) -> None:
7454
with Image.open(file_path) as im:
7555
width, height = im.size
7656

77-
per_image_anns: list[dict[str, Any]] = []
78-
for row in group_df.iter_rows(named=True):
79-
ttype = row.get("task_type")
80-
ann_str = row.get("annotation")
81-
cname = row.get("class_name")
82-
83-
if ttype != "boundingbox" or ann_str is None or not cname:
84-
continue
85-
86-
data = json.loads(ann_str)
87-
x_tl = float(data.get("x", 0.0))
88-
y_tl = float(data.get("y", 0.0))
89-
w = float(data.get("w", 0.0))
90-
h = float(data.get("h", 0.0))
91-
92-
x_px = x_tl * width
93-
y_px = y_tl * height
94-
w_px = w * width
95-
h_px = h * height
96-
cx_px = x_px + w_px / 2.0
97-
cy_px = y_px + h_px / 2.0
98-
99-
per_image_anns.append(
100-
{
101-
"label": cname,
102-
"coordinates": {
103-
"x": cx_px,
104-
"y": cy_px,
105-
"width": w_px,
106-
"height": h_px,
107-
},
108-
}
109-
)
57+
per_image_anns = self._collect_bounding_box_annotations(
58+
group_df=group_df, width=width, height=height
59+
)
11060

11161
anns_by_split[split_name][new_name] = per_image_anns
11262

113-
ann_size_est = sum(
114-
64 + len(a.get("label", "")) for a in per_image_anns
63+
per_image_anns = self._collect_bounding_box_annotations(
64+
group_df, width, height
65+
)
66+
ann_size_est = self._estimate_annotation_bytes(
67+
new_name, per_image_anns
11568
)
11669
img_size = file_path.stat().st_size
11770

@@ -131,9 +84,60 @@ def transform(self, prepared_ldf: PreparedLDF) -> None:
13184
dest_img.write_bytes(file_path.read_bytes())
13285
self.current_size += img_size
13386

134-
# Final dump
13587
self._dump_annotations(anns_by_split, self.output_path, self.part)
13688

89+
@staticmethod
90+
def _estimate_annotation_bytes(
91+
img_name: str, anns: list[dict[str, Any]]
92+
) -> int:
93+
payload = {"image": img_name, "annotations": anns}
94+
return len(
95+
(json.dumps(payload, ensure_ascii=False) + "\n").encode("utf-8")
96+
)
97+
98+
def _collect_bounding_box_annotations(
99+
self,
100+
group_df: Any,
101+
width: int,
102+
height: int,
103+
) -> list[dict[str, Any]]:
104+
per_image_anns: list[dict[str, Any]] = []
105+
106+
for row in group_df.iter_rows(named=True):
107+
ttype = row.get("task_type")
108+
ann_str = row.get("annotation")
109+
cname = row.get("class_name")
110+
111+
if ttype != "boundingbox" or ann_str is None or not cname:
112+
continue
113+
114+
data = json.loads(ann_str)
115+
x_tl = float(data.get("x", 0.0))
116+
y_tl = float(data.get("y", 0.0))
117+
w = float(data.get("w", 0.0))
118+
h = float(data.get("h", 0.0))
119+
120+
x_px = x_tl * width
121+
y_px = y_tl * height
122+
w_px = w * width
123+
h_px = h * height
124+
cx_px = x_px + w_px / 2.0
125+
cy_px = y_px + h_px / 2.0
126+
127+
per_image_anns.append(
128+
{
129+
"label": cname,
130+
"coordinates": {
131+
"x": cx_px,
132+
"y": cy_px,
133+
"width": w_px,
134+
"height": h_px,
135+
},
136+
}
137+
)
138+
139+
return per_image_anns
140+
137141
def _maybe_roll_partition(
138142
self,
139143
anns_by_split: dict[str, dict[str, list[dict[str, Any]]]],

luxonis_ml/data/exporters/darknet_exporter.py

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -49,35 +49,7 @@ def transform(self, prepared_ldf: PreparedLDF) -> None:
4949
new_name = f"{idx}{file_path.suffix}"
5050
new_stem = Path(new_name).stem
5151

52-
label_lines: list[str] = []
53-
for row in group_df.iter_rows(named=True):
54-
ttype = row.get("task_type")
55-
ann_str = row.get("annotation")
56-
cname = row.get("class_name")
57-
58-
if ttype != "boundingbox" or ann_str is None:
59-
continue
60-
61-
if cname and cname not in self.class_to_id:
62-
self.class_to_id[cname] = len(self.class_to_id)
63-
self.class_names.append(cname)
64-
65-
if not cname or cname not in self.class_to_id:
66-
continue
67-
68-
data = json.loads(ann_str)
69-
x_tl = float(data.get("x", 0.0))
70-
y_tl = float(data.get("y", 0.0))
71-
w = float(data.get("w", 0.0))
72-
h = float(data.get("h", 0.0))
73-
74-
cx = x_tl + w / 2.0
75-
cy = y_tl + h / 2.0
76-
77-
cid = self.class_to_id[cname]
78-
label_lines.append(
79-
f"{cid} {cx:.12f} {cy:.12f} {w:.12f} {h:.12f}"
80-
)
52+
label_lines = self._collect_darknet_bounding_box_labels(group_df)
8153

8254
labels_by_split[split_name][new_stem] = label_lines
8355

@@ -101,6 +73,42 @@ def transform(self, prepared_ldf: PreparedLDF) -> None:
10173

10274
self._dump_annotations(labels_by_split, self.output_path, self.part)
10375

76+
def _collect_darknet_bounding_box_labels(
77+
self,
78+
group_df: Any,
79+
) -> list[str]:
80+
label_lines: list[str] = []
81+
82+
for row in group_df.iter_rows(named=True):
83+
ttype = row.get("task_type")
84+
ann_str = row.get("annotation")
85+
cname = row.get("class_name")
86+
87+
if ttype != "boundingbox" or ann_str is None:
88+
continue
89+
90+
# Register class if new
91+
if cname and cname not in self.class_to_id:
92+
self.class_to_id[cname] = len(self.class_to_id)
93+
self.class_names.append(cname)
94+
95+
if not cname or cname not in self.class_to_id:
96+
continue
97+
98+
data = json.loads(ann_str)
99+
x_tl = float(data.get("x", 0.0))
100+
y_tl = float(data.get("y", 0.0))
101+
w = float(data.get("w", 0.0))
102+
h = float(data.get("h", 0.0))
103+
104+
cx = x_tl + w / 2.0
105+
cy = y_tl + h / 2.0
106+
107+
cid = self.class_to_id[cname]
108+
label_lines.append(f"{cid} {cx:.12f} {cy:.12f} {w:.12f} {h:.12f}")
109+
110+
return label_lines
111+
104112
def _maybe_roll_partition(
105113
self,
106114
labels_by_split: dict[str, dict[str, list[str]]],

luxonis_ml/data/exporters/exporter_utils.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
from pathlib import Path
44
from typing import TYPE_CHECKING, Any
55

6+
import cv2
67
import polars as pl
8+
from pycocotools import mask
79

810
if TYPE_CHECKING:
911
from luxonis_ml.data.datasets.luxonis_dataset import LuxonisDataset
@@ -154,6 +156,28 @@ def get_single_skeleton(
154156
skeleton_1_based = [[a + 1, b + 1] for a, b in edges]
155157
return labels, skeleton_1_based
156158

159+
@staticmethod
160+
def rle_to_yolo_polygon(rle: str, height: int, width: int) -> list:
161+
# Decode RLE to binary mask
162+
m = mask.decode({"size": [height, width], "counts": rle})
163+
164+
# Each contour = one polygon
165+
contours, _ = cv2.findContours(
166+
m, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
167+
)
168+
169+
polygons = []
170+
for contour in contours:
171+
contour = contour.squeeze()
172+
if len(contour.shape) != 2:
173+
continue
174+
polygon = []
175+
for x, y in contour:
176+
polygon.extend([x / width, y / height])
177+
polygons.append(polygon)
178+
179+
return polygons
180+
157181
def _normalize(
158182
self, xs: list[float], ys: list[float], w: float, h: float
159183
) -> list[float]:

luxonis_ml/data/exporters/segmentation_mask_directory_exporter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def get_split_names(self) -> dict[str, str]:
4646
def _class_id_for(self, split: str, class_name: str) -> int:
4747
cmap = self.split_class_maps[split]
4848
if class_name not in cmap:
49-
cmap[class_name] = len(cmap) + 1 # start at 1 (0 is background)
49+
cmap[class_name] = len(cmap) + 1
5050
return cmap[class_name]
5151

5252
def _write_classes_csv(self, split: str, split_dir: Path) -> None:

luxonis_ml/data/exporters/tensorflow_csv_exporter.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import csv
4+
import json
45
from pathlib import Path
56
from typing import Any, cast
67

@@ -58,6 +59,7 @@ def transform(self, prepared_ldf: PreparedLDF) -> None:
5859
if row.get("task_type") != "boundingbox":
5960
continue
6061
ann = row.get("annotation")
62+
ann = json.loads(ann)
6163
cname = row.get("class_name")
6264
if ann is None or not cname:
6365
continue

0 commit comments

Comments
 (0)