1111
1212
1313class CreateMLExporter (BaseExporter ):
14- """Exports LDF to CreateML format.
15-
16- Output structure:
17-
18- output/
19- └── <dataset_identifier>[_partN]/
20- ├── train/
21- │ ├── 0.jpg
22- │ ├── 1.jpg
23- │ └── _annotations.createml.json
24- ├── valid/
25- │ └── ...
26- └── test/
27- └── ...
28-
29- Notes:
30- - CreateML uses center-based pixel coordinates.
31- - This exporter converts from normalized TL (x,y,w,h) to pixel centers.
32- """
33-
3414 def __init__ (
3515 self ,
3616 dataset_identifier : str ,
@@ -74,44 +54,17 @@ def transform(self, prepared_ldf: PreparedLDF) -> None:
7454 with Image .open (file_path ) as im :
7555 width , height = im .size
7656
77- per_image_anns : list [dict [str , Any ]] = []
78- for row in group_df .iter_rows (named = True ):
79- ttype = row .get ("task_type" )
80- ann_str = row .get ("annotation" )
81- cname = row .get ("class_name" )
82-
83- if ttype != "boundingbox" or ann_str is None or not cname :
84- continue
85-
86- data = json .loads (ann_str )
87- x_tl = float (data .get ("x" , 0.0 ))
88- y_tl = float (data .get ("y" , 0.0 ))
89- w = float (data .get ("w" , 0.0 ))
90- h = float (data .get ("h" , 0.0 ))
91-
92- x_px = x_tl * width
93- y_px = y_tl * height
94- w_px = w * width
95- h_px = h * height
96- cx_px = x_px + w_px / 2.0
97- cy_px = y_px + h_px / 2.0
98-
99- per_image_anns .append (
100- {
101- "label" : cname ,
102- "coordinates" : {
103- "x" : cx_px ,
104- "y" : cy_px ,
105- "width" : w_px ,
106- "height" : h_px ,
107- },
108- }
109- )
57+ per_image_anns = self ._collect_bounding_box_annotations (
58+ group_df = group_df , width = width , height = height
59+ )
11060
11161 anns_by_split [split_name ][new_name ] = per_image_anns
11262
113- ann_size_est = sum (
114- 64 + len (a .get ("label" , "" )) for a in per_image_anns
63+ per_image_anns = self ._collect_bounding_box_annotations (
64+ group_df , width , height
65+ )
66+ ann_size_est = self ._estimate_annotation_bytes (
67+ new_name , per_image_anns
11568 )
11669 img_size = file_path .stat ().st_size
11770
@@ -131,9 +84,60 @@ def transform(self, prepared_ldf: PreparedLDF) -> None:
13184 dest_img .write_bytes (file_path .read_bytes ())
13285 self .current_size += img_size
13386
134- # Final dump
13587 self ._dump_annotations (anns_by_split , self .output_path , self .part )
13688
89+ @staticmethod
90+ def _estimate_annotation_bytes (
91+ img_name : str , anns : list [dict [str , Any ]]
92+ ) -> int :
93+ payload = {"image" : img_name , "annotations" : anns }
94+ return len (
95+ (json .dumps (payload , ensure_ascii = False ) + "\n " ).encode ("utf-8" )
96+ )
97+
98+ def _collect_bounding_box_annotations (
99+ self ,
100+ group_df : Any ,
101+ width : int ,
102+ height : int ,
103+ ) -> list [dict [str , Any ]]:
104+ per_image_anns : list [dict [str , Any ]] = []
105+
106+ for row in group_df .iter_rows (named = True ):
107+ ttype = row .get ("task_type" )
108+ ann_str = row .get ("annotation" )
109+ cname = row .get ("class_name" )
110+
111+ if ttype != "boundingbox" or ann_str is None or not cname :
112+ continue
113+
114+ data = json .loads (ann_str )
115+ x_tl = float (data .get ("x" , 0.0 ))
116+ y_tl = float (data .get ("y" , 0.0 ))
117+ w = float (data .get ("w" , 0.0 ))
118+ h = float (data .get ("h" , 0.0 ))
119+
120+ x_px = x_tl * width
121+ y_px = y_tl * height
122+ w_px = w * width
123+ h_px = h * height
124+ cx_px = x_px + w_px / 2.0
125+ cy_px = y_px + h_px / 2.0
126+
127+ per_image_anns .append (
128+ {
129+ "label" : cname ,
130+ "coordinates" : {
131+ "x" : cx_px ,
132+ "y" : cy_px ,
133+ "width" : w_px ,
134+ "height" : h_px ,
135+ },
136+ }
137+ )
138+
139+ return per_image_anns
140+
137141 def _maybe_roll_partition (
138142 self ,
139143 anns_by_split : dict [str , dict [str , list [dict [str , Any ]]]],
0 commit comments