@@ -68,6 +68,76 @@ def __init__(self, img_size):
6868 color = self .FOUNDATION_COLOR , is_evaluate = True )
6969
7070
71+ def _generate_preprocessed_filenames (
72+ self , image_filename , output_dir , x , y , suffix
73+ ):
74+ """Generate preprocessed image and label filenames on the file system,
75+ starting from a raw image filename
76+
77+ Parameters
78+ ----------
79+ image_filename : str
80+ Original image filename
81+ output_dir : str
82+ Output folder for preprocessed material
83+ x : int
84+ Extracted image west coordinates
85+ y : int
86+ Extracted image north coordinates
87+ suffix : str
88+ Preprocessed filename complement
89+
90+ Returns
91+ -------
92+ dict
93+ Preprocessed image and corresponding label filenames
94+ """
95+ basename_decomp = os .path .splitext (os .path .basename (image_filename ))
96+ img_id_str = (str (self .image_size ) + '_'
97+ + str (self .image_size ) + '_'
98+ + str (x ) + '_' + str (y ) + "_" + suffix )
99+ new_filename = basename_decomp [0 ] + '_' + img_id_str + ".png"
100+ out_image_name = os .path .join (output_dir , 'images' , new_filename )
101+ out_label_name = out_image_name .replace ("images" , "labels" )
102+ return {"image" : out_image_name , "labels" : out_label_name }
103+
104+
105+ def _serialize (
106+ self , tile_image , labelled_image , label_dict ,
107+ image_filename , output_dir , x , y , suffix
108+ ):
109+ """Serialize a tiled image generated from an original high-resolution
110+ raster as well as the labelled version of the tile
111+
112+ The method returns a dict that contains image-related file paths.
113+
114+ Parameters
115+ ----------
116+ tile_image : PIL.Image
117+ labelled_image : PIL.Image
118+ label_dict : dict
119+ image_filename : str
120+ output_dir : str
121+ x : int
122+ y : int
123+
124+ Returns
125+ -------
126+ dict
127+ Information related to the serialized tile (file paths, encountered
128+ labels)
129+ """
130+ dirs = self ._generate_preprocessed_filenames (
131+ image_filename , output_dir , x , y , suffix
132+ )
133+ tile_image .save (dirs ["image" ])
134+ labelled_image .save (dirs ["labels" ])
135+ return {"raw_filename" : image_filename ,
136+ "image_filename" : dirs ["image" ],
137+ "label_filename" : dirs ["labels" ],
138+ "labels" : label_dict }
139+
140+
71141 def _preprocess_tile (self , x , y , image_filename , output_dir ,
72142 raster , labels = None ):
73143 """Preprocess one single tile built from `image_filename`, with respect
@@ -94,51 +164,24 @@ def _preprocess_tile(self, x, y, image_filename, output_dir,
94164 Key/values with the filenames and label ids
95165
96166 """
97- basename_decomp = os .path .splitext (
98- os .path .basename (image_filename ))
99- img_id_str = (str (self .image_size ) + '_'
100- + str (self .image_size ) + '_'
101- + str (x ) + '_' + str (y ))
102- new_in_filename = (basename_decomp [0 ] + '_'
103- + img_id_str + ".png" )
104- new_in_path = os .path .join (output_dir , 'images' ,
105- new_in_filename )
106- gdal .Translate (new_in_path , raster ,
167+ dirs = self ._generate_preprocessed_filenames (
168+ image_filename , output_dir , x , y
169+ )
170+ gdal .Translate (dirs ["image" ], raster ,
107171 format = "PNG" ,
108172 srcWin = [x , y , self .image_size , self .image_size ])
109- if not labels is None :
110- raster_features = get_image_features (raster )
111- tile_items = extract_tile_items (raster_features , labels ,
112- x , y ,
113- self .image_size ,
114- self .image_size ,
115- tile_srid = 32737 )
116- out_labelname = (new_in_path
117- .replace ("images" , "labels" ))
118- mask = self .load_mask (tile_items , raster_features , x , y )
119- label_dict = utils .build_labels (mask ,
120- range (self .get_nb_labels ()),
121- "tanzania" )
122- labelled_image = utils .build_image_from_config (mask ,
123- self .labels )
124- labelled_image .save (out_labelname )
125- return {"raw_filename" : image_filename ,
126- "image_filename" : new_in_path ,
127- "label_filename" : out_labelname ,
128- "labels" : label_dict }
129- else :
130- return {"raw_filename" : image_filename ,
131- "image_filename" : new_in_path }
173+ return {"raw_filename" : image_filename ,
174+ "image_filename" : dirs ["image" ]}
132175
133176
134- def _preprocess (self , image_filename , output_dir , labelling ):
177+ def _preprocess_for_inference (self , image_filename , output_dir ):
135178 """Resize/crop then save the training & label images
136179
137180 Parameters
138181 ----------
139182 image_filename : str
140183 Full path towards the image on the disk
141- datadir : str
184+ output_dir : str
142185 Output path where preprocessed image must be saved
143186
144187 Returns
@@ -150,29 +193,121 @@ def _preprocess(self, image_filename, output_dir, labelling):
150193 raw_img_width = raster .RasterXSize
151194 raw_img_height = raster .RasterYSize
152195 result_dicts = []
153- logger .info ("Raw image size: %s, %s" % (raw_img_width , raw_img_height ))
154196 logger .info ("Image filename: %s" % image_filename )
155-
156- labels = None
157- if labelling :
158- label_filename = (image_filename
159- .replace ("images" , "labels" )
160- .replace (".tif" , ".geojson" ))
161- labels = gpd .read_file (label_filename )
162- labels = labels .loc [~ labels .geometry .isna (), ["condition" , "geometry" ]]
163- none_mask = [lc is None for lc in labels .condition ]
164- labels .loc [none_mask , "condition" ] = "Complete"
197+ logger .info ("Raw image size: %s, %s" % (raw_img_width , raw_img_height ))
165198
166199 for x in range (0 , raw_img_width , self .image_size ):
167200 for y in range (0 , raw_img_height , self .image_size ):
168201 tile_results = self ._preprocess_tile (x , y , image_filename ,
169- output_dir ,
170- raster , labels )
202+ output_dir , raster )
171203 result_dicts .append (tile_results )
172204 del raster
173205 return result_dicts
174206
175207
208+ def _preprocess_for_training (self , image_filename , output_dir , nb_images ):
209+ """Resize/crop then save the training & label images
210+
211+ Parameters
212+ ----------
213+ image_filename : str
214+ Full path towards the image on the disk
215+ output_dir : str
216+ Output path where preprocessed image must be saved
217+
218+ Returns
219+ -------
220+ dict
221+ Key/values with the filenames and label ids
222+ """
223+ raster = gdal .Open (image_filename )
224+ raw_img_width = raster .RasterXSize
225+ raw_img_height = raster .RasterYSize
226+ image_data = raster .ReadAsArray ()
227+ image_data = np .swapaxes (image_data , 0 , 2 )
228+ result_dicts = []
229+ logger .info ("Image filename: %s" % image_filename )
230+ logger .info ("Raw image size: %s, %s" % (raw_img_width , raw_img_height ))
231+
232+ label_filename = (image_filename
233+ .replace ("images" , "labels" )
234+ .replace (".tif" , ".geojson" ))
235+ labels = gpd .read_file (label_filename )
236+ labels = labels .loc [~ labels .geometry .isna (), ["condition" , "geometry" ]]
237+ none_mask = [lc is None for lc in labels .condition ]
238+ labels .loc [none_mask , "condition" ] = "Complete"
239+
240+ nb_attempts = 0
241+ image_counter = 0
242+ empty_image_counter = 0
243+ while image_counter < nb_images and nb_attempts < 2 * nb_images :
244+ # randomly pick an image
245+ x = np .random .randint (0 , raw_img_width - self .image_size )
246+ y = np .random .randint (0 , raw_img_height - self .image_size )
247+
248+ tile_data = image_data [x :(x + self .image_size ),
249+ y :(y + self .image_size )]
250+ tile_image = Image .fromarray (tile_data )
251+ raster_features = get_image_features (raster )
252+ tile_items = extract_tile_items (raster_features , labels ,
253+ x , y ,
254+ self .image_size ,
255+ self .image_size ,
256+ tile_srid = 32737 )
257+ mask = self .load_mask (tile_items , raster_features , x , y )
258+ label_dict = utils .build_labels (mask ,
259+ range (self .get_nb_labels ()),
260+ "tanzania" )
261+ labelled_image = utils .build_image_from_config (mask , self .labels )
262+ if len (tile_items ) > 0 :
263+ tiled_results = self ._serialize (
264+ tile_image , labelled_image , label_dict ,
265+ image_filename , output_dir , x , y , "nw"
266+ )
267+ result_dicts .append (tiled_results )
268+ image_counter += 1
269+ tile_image_ne = tile_image .transpose (Image .FLIP_LEFT_RIGHT )
270+ labelled_image_ne = labelled_image .transpose (Image .FLIP_LEFT_RIGHT )
271+ tiled_results_ne = self ._serialize (
272+ tile_image_ne , labelled_image_ne , label_dict ,
273+ image_filename , output_dir , x , y , "ne"
274+ )
275+ result_dicts .append (tiled_results_ne )
276+ image_counter += 1
277+ tile_image_sw = tile_image .transpose (Image .FLIP_TOP_BOTTOM )
278+ labelled_image_sw = labelled_image .transpose (Image .FLIP_TOP_BOTTOM )
279+ tiled_results_sw = self ._serialize (
280+ tile_image_sw , labelled_image_sw , label_dict ,
281+ image_filename , output_dir , x , y , "sw"
282+ )
283+ result_dicts .append (tiled_results_sw )
284+ image_counter += 1
285+ tile_image_se = tile_image_sw .transpose (Image .FLIP_LEFT_RIGHT )
286+ labelled_image_se = labelled_image_sw .transpose (Image .FLIP_LEFT_RIGHT )
287+ tiled_results_se = self ._serialize (
288+ tile_image_se , labelled_image_se , label_dict ,
289+ image_filename , output_dir , x , y , "se"
290+ )
291+ result_dicts .append (tiled_results_se )
292+ image_counter += 1
293+ del tile_image_se , tile_image_sw , tile_image_ne
294+ del labelled_image_se , labelled_image_sw , labelled_image_ne
295+ else :
296+ if empty_image_counter < 0.1 * nb_images :
297+ tiled_results = self ._serialize (
298+ tile_image , labelled_image , label_dict ,
299+ image_filename , output_dir , x , y , "nw"
300+ )
301+ result_dicts .append (tiled_results )
302+ image_counter += 1
303+ empty_image_counter += 1
304+ nb_attempts += 1
305+ del raster
306+ logger .info ("Generate %s images after %s attempts."
307+ % (image_counter , nb_attempts ))
308+ return result_dicts
309+
310+
176311 def populate (self , output_dir , input_dir , nb_images = None ,
177312 aggregate = False , labelling = True ):
178313 """ Populate the dataset with images contained into `datadir` directory
@@ -195,15 +330,24 @@ class method genericity
195330 image_list = os .listdir (os .path .join (input_dir , "images" ))
196331 image_list_longname = [os .path .join (input_dir , "images" , l )
197332 for l in image_list
198- if not l .startswith ('.' )][:nb_images ]
333+ if not l .startswith ('.' )]
334+ nb_image_files = len (image_list_longname )
199335
200336 logger .info ("Getting %s images to preprocess..."
201- % len ( image_list_longname ) )
337+ % nb_image_files )
202338 logger .info (image_list_longname )
203- with Pool () as p :
204- self .image_info = p .starmap (self ._preprocess ,
205- [(x , output_dir , labelling )
206- for x in image_list_longname ])
339+ if labelling :
340+ nb_tile_per_image = int (nb_images / nb_image_files )
341+ with Pool (processes = 3 ) as p :
342+ self .image_info = p .starmap (self ._preprocess_for_training ,
343+ [(x , output_dir , nb_tile_per_image )
344+ for x in image_list_longname ])
345+ else :
346+ with Pool (processes = 3 ) as p :
347+ self .image_info = p .starmap (self ._preprocess_for_inference ,
348+ [(x , output_dir )
349+ for x in image_list_longname ])
350+
207351 self .image_info = [item for sublist in self .image_info
208352 for item in sublist ]
209353 logger .info ("Saved %s images in the preprocessed dataset."
@@ -242,36 +386,45 @@ def load_mask(self, buildings, raster_features, min_x, min_y):
242386 if buildings .shape [0 ] == 0 :
243387 return mask
244388 for idx , row in buildings .iterrows ():
245- points = self .extract_points_from_polygon (row ["geometry" ],
246- raster_features )
247- points [:, 0 ] -= min_x
248- points [:, 1 ] -= min_y
389+ points = extract_points_from_polygon (row ["geometry" ],
390+ raster_features ,
391+ min_x , min_y )
249392 label_id = [label ["id" ] for label in self .labels
250393 if label ["name" ] == row ["condition" ].lower ()][0 ]
251394 mask = cv2 .fillPoly (mask , [points ], label_id )
252395 return mask
253396
254397
255- def extract_points_from_polygon (self , p , features ):
256- """Extract points from a polygon
398+ def extract_points_from_polygon (p , features , min_x , min_y ):
399+ """Extract points from a polygon
257400
258- Parameters
259- ----------
260- p : shapely.geometry.Polygon
261- Polygon to detail
262- features : dict
263- Geographical features associated to the image
264- Returns
265- -------
266- np.array
267- Polygon vertices
401+ Parameters
402+ ----------
403+ p : shapely.geometry.Polygon
404+ Polygon to detail
405+ features : dict
406+ Geographical features associated to the image
407+ min_x : int
408+ Minimal x-coordinate (west)
409+ min_y : int
410+ Minimal y-coordinate (north)
411+ Returns
412+ -------
413+ np.array
414+ Polygon vertices
268415
269- """
270- raw_xs , raw_ys = p .exterior .xy
271- xs = get_x_pixel (raw_xs , features ["east" ], features ["west" ], features ["width" ])
272- ys = get_y_pixel (raw_ys , features ["south" ], features ["north" ], features ["height" ])
273- points = np .array ([[x , y ] for x , y in zip (xs , ys )], dtype = np .int32 )
274- return points
416+ """
417+ raw_xs , raw_ys = p .exterior .xy
418+ xs = get_x_pixel (
419+ raw_xs , features ["east" ], features ["west" ], features ["width" ]
420+ )
421+ ys = get_y_pixel (
422+ raw_ys , features ["south" ], features ["north" ], features ["height" ]
423+ )
424+ points = np .array ([[y , x ] for x , y in zip (xs , ys )], dtype = np .int32 )
425+ points [:, 0 ] -= min_y
426+ points [:, 1 ] -= min_x
427+ return points
275428
276429
277430def get_x_pixel (coord , east , west , width ):
0 commit comments