Skip to content
This repository was archived by the owner on Jun 3, 2020. It is now read-only.

Commit ceef570

Browse files
authored
Merge pull request #101 from Oslandia/tanzania_datagen
Tanzania datagen
2 parents d8e613a + 8c95603 commit ceef570

File tree

5 files changed

+237
-86
lines changed

5 files changed

+237
-86
lines changed

deeposlandia/datasets/tanzania.py

Lines changed: 229 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,76 @@ def __init__(self, img_size):
6868
color=self.FOUNDATION_COLOR, is_evaluate=True)
6969

7070

71+
def _generate_preprocessed_filenames(
72+
self, image_filename, output_dir, x, y, suffix
73+
):
74+
"""Generate preprocessed image and label filenames on the file system,
75+
starting from a raw image filename
76+
77+
Parameters
78+
----------
79+
image_filename : str
80+
Original image filename
81+
output_dir : str
82+
Output folder for preprocessed material
83+
x : int
84+
Extracted image west coordinates
85+
y : int
86+
Extracted image north coordinates
87+
suffix : str
88+
Preprocessed filename complement
89+
90+
Returns
91+
-------
92+
dict
93+
Preprocessed image and corresponding label filenames
94+
"""
95+
basename_decomp = os.path.splitext(os.path.basename(image_filename))
96+
img_id_str = (str(self.image_size) + '_'
97+
+ str(self.image_size) + '_'
98+
+ str(x) + '_' + str(y) + "_" + suffix)
99+
new_filename = basename_decomp[0] + '_' + img_id_str + ".png"
100+
out_image_name = os.path.join(output_dir, 'images', new_filename)
101+
out_label_name = out_image_name.replace("images", "labels")
102+
return {"image": out_image_name, "labels": out_label_name}
103+
104+
105+
def _serialize(
106+
self, tile_image, labelled_image, label_dict,
107+
image_filename, output_dir, x, y, suffix
108+
):
109+
"""Serialize a tiled image generated from an original high-resolution
110+
raster as well as the labelled version of the tile
111+
112+
The method returns a dict that contains image-related file paths.
113+
114+
Parameters
115+
----------
116+
tile_image : PIL.Image
117+
labelled_image : PIL.Image
118+
label_dict : dict
119+
image_filename : str
120+
output_dir : str
121+
x : int
122+
y : int
123+
124+
Returns
125+
-------
126+
dict
127+
Information related to the serialized tile (file paths, encountered
128+
labels)
129+
"""
130+
dirs = self._generate_preprocessed_filenames(
131+
image_filename, output_dir, x, y, suffix
132+
)
133+
tile_image.save(dirs["image"])
134+
labelled_image.save(dirs["labels"])
135+
return {"raw_filename": image_filename,
136+
"image_filename": dirs["image"],
137+
"label_filename": dirs["labels"],
138+
"labels": label_dict}
139+
140+
71141
def _preprocess_tile(self, x, y, image_filename, output_dir,
72142
raster, labels=None):
73143
"""Preprocess one single tile built from `image_filename`, with respect
@@ -94,51 +164,24 @@ def _preprocess_tile(self, x, y, image_filename, output_dir,
94164
Key/values with the filenames and label ids
95165
96166
"""
97-
basename_decomp = os.path.splitext(
98-
os.path.basename(image_filename))
99-
img_id_str = (str(self.image_size) + '_'
100-
+ str(self.image_size) + '_'
101-
+ str(x) + '_' + str(y))
102-
new_in_filename = (basename_decomp[0] + '_'
103-
+ img_id_str + ".png")
104-
new_in_path = os.path.join(output_dir, 'images',
105-
new_in_filename)
106-
gdal.Translate(new_in_path, raster,
167+
dirs = self._generate_preprocessed_filenames(
168+
image_filename, output_dir, x, y
169+
)
170+
gdal.Translate(dirs["image"], raster,
107171
format="PNG",
108172
srcWin=[x, y, self.image_size, self.image_size])
109-
if not labels is None:
110-
raster_features = get_image_features(raster)
111-
tile_items = extract_tile_items(raster_features, labels,
112-
x, y,
113-
self.image_size,
114-
self.image_size,
115-
tile_srid=32737)
116-
out_labelname = (new_in_path
117-
.replace("images", "labels"))
118-
mask = self.load_mask(tile_items, raster_features, x, y)
119-
label_dict = utils.build_labels(mask,
120-
range(self.get_nb_labels()),
121-
"tanzania")
122-
labelled_image = utils.build_image_from_config(mask,
123-
self.labels)
124-
labelled_image.save(out_labelname)
125-
return {"raw_filename": image_filename,
126-
"image_filename": new_in_path,
127-
"label_filename": out_labelname,
128-
"labels": label_dict}
129-
else:
130-
return {"raw_filename": image_filename,
131-
"image_filename": new_in_path}
173+
return {"raw_filename": image_filename,
174+
"image_filename": dirs["image"]}
132175

133176

134-
def _preprocess(self, image_filename, output_dir, labelling):
177+
def _preprocess_for_inference(self, image_filename, output_dir):
135178
"""Resize/crop then save the training & label images
136179
137180
Parameters
138181
----------
139182
image_filename : str
140183
Full path towards the image on the disk
141-
datadir : str
184+
output_dir : str
142185
Output path where preprocessed image must be saved
143186
144187
Returns
@@ -150,29 +193,121 @@ def _preprocess(self, image_filename, output_dir, labelling):
150193
raw_img_width = raster.RasterXSize
151194
raw_img_height = raster.RasterYSize
152195
result_dicts = []
153-
logger.info("Raw image size: %s, %s" % (raw_img_width, raw_img_height))
154196
logger.info("Image filename: %s" % image_filename)
155-
156-
labels = None
157-
if labelling:
158-
label_filename = (image_filename
159-
.replace("images", "labels")
160-
.replace(".tif", ".geojson"))
161-
labels = gpd.read_file(label_filename)
162-
labels = labels.loc[~labels.geometry.isna(), ["condition", "geometry"]]
163-
none_mask = [lc is None for lc in labels.condition]
164-
labels.loc[none_mask, "condition"] = "Complete"
197+
logger.info("Raw image size: %s, %s" % (raw_img_width, raw_img_height))
165198

166199
for x in range(0, raw_img_width, self.image_size):
167200
for y in range(0, raw_img_height, self.image_size):
168201
tile_results = self._preprocess_tile(x, y, image_filename,
169-
output_dir,
170-
raster, labels)
202+
output_dir, raster)
171203
result_dicts.append(tile_results)
172204
del raster
173205
return result_dicts
174206

175207

208+
def _preprocess_for_training(self, image_filename, output_dir, nb_images):
209+
"""Resize/crop then save the training & label images
210+
211+
Parameters
212+
----------
213+
image_filename : str
214+
Full path towards the image on the disk
215+
output_dir : str
216+
Output path where preprocessed image must be saved
217+
218+
Returns
219+
-------
220+
dict
221+
Key/values with the filenames and label ids
222+
"""
223+
raster = gdal.Open(image_filename)
224+
raw_img_width = raster.RasterXSize
225+
raw_img_height = raster.RasterYSize
226+
image_data = raster.ReadAsArray()
227+
image_data = np.swapaxes(image_data, 0, 2)
228+
result_dicts = []
229+
logger.info("Image filename: %s" % image_filename)
230+
logger.info("Raw image size: %s, %s" % (raw_img_width, raw_img_height))
231+
232+
label_filename = (image_filename
233+
.replace("images", "labels")
234+
.replace(".tif", ".geojson"))
235+
labels = gpd.read_file(label_filename)
236+
labels = labels.loc[~labels.geometry.isna(), ["condition", "geometry"]]
237+
none_mask = [lc is None for lc in labels.condition]
238+
labels.loc[none_mask, "condition"] = "Complete"
239+
240+
nb_attempts = 0
241+
image_counter = 0
242+
empty_image_counter = 0
243+
while image_counter < nb_images and nb_attempts < 2 * nb_images:
244+
# randomly pick an image
245+
x = np.random.randint(0, raw_img_width - self.image_size)
246+
y = np.random.randint(0, raw_img_height - self.image_size)
247+
248+
tile_data = image_data[x:(x+self.image_size),
249+
y:(y+self.image_size)]
250+
tile_image = Image.fromarray(tile_data)
251+
raster_features = get_image_features(raster)
252+
tile_items = extract_tile_items(raster_features, labels,
253+
x, y,
254+
self.image_size,
255+
self.image_size,
256+
tile_srid=32737)
257+
mask = self.load_mask(tile_items, raster_features, x, y)
258+
label_dict = utils.build_labels(mask,
259+
range(self.get_nb_labels()),
260+
"tanzania")
261+
labelled_image = utils.build_image_from_config(mask, self.labels)
262+
if len(tile_items) > 0:
263+
tiled_results = self._serialize(
264+
tile_image, labelled_image, label_dict,
265+
image_filename, output_dir, x, y, "nw"
266+
)
267+
result_dicts.append(tiled_results)
268+
image_counter += 1
269+
tile_image_ne = tile_image.transpose(Image.FLIP_LEFT_RIGHT)
270+
labelled_image_ne = labelled_image.transpose(Image.FLIP_LEFT_RIGHT)
271+
tiled_results_ne = self._serialize(
272+
tile_image_ne, labelled_image_ne, label_dict,
273+
image_filename, output_dir, x, y, "ne"
274+
)
275+
result_dicts.append(tiled_results_ne)
276+
image_counter += 1
277+
tile_image_sw = tile_image.transpose(Image.FLIP_TOP_BOTTOM)
278+
labelled_image_sw = labelled_image.transpose(Image.FLIP_TOP_BOTTOM)
279+
tiled_results_sw = self._serialize(
280+
tile_image_sw, labelled_image_sw, label_dict,
281+
image_filename, output_dir, x, y, "sw"
282+
)
283+
result_dicts.append(tiled_results_sw)
284+
image_counter += 1
285+
tile_image_se = tile_image_sw.transpose(Image.FLIP_LEFT_RIGHT)
286+
labelled_image_se = labelled_image_sw.transpose(Image.FLIP_LEFT_RIGHT)
287+
tiled_results_se = self._serialize(
288+
tile_image_se, labelled_image_se, label_dict,
289+
image_filename, output_dir, x, y, "se"
290+
)
291+
result_dicts.append(tiled_results_se)
292+
image_counter += 1
293+
del tile_image_se, tile_image_sw, tile_image_ne
294+
del labelled_image_se, labelled_image_sw, labelled_image_ne
295+
else:
296+
if empty_image_counter < 0.1 * nb_images:
297+
tiled_results = self._serialize(
298+
tile_image, labelled_image, label_dict,
299+
image_filename, output_dir, x, y, "nw"
300+
)
301+
result_dicts.append(tiled_results)
302+
image_counter += 1
303+
empty_image_counter += 1
304+
nb_attempts += 1
305+
del raster
306+
logger.info("Generate %s images after %s attempts."
307+
% (image_counter, nb_attempts))
308+
return result_dicts
309+
310+
176311
def populate(self, output_dir, input_dir, nb_images=None,
177312
aggregate=False, labelling=True):
178313
""" Populate the dataset with images contained into `datadir` directory
@@ -195,15 +330,24 @@ class method genericity
195330
image_list = os.listdir(os.path.join(input_dir, "images"))
196331
image_list_longname = [os.path.join(input_dir, "images", l)
197332
for l in image_list
198-
if not l.startswith('.')][:nb_images]
333+
if not l.startswith('.')]
334+
nb_image_files = len(image_list_longname)
199335

200336
logger.info("Getting %s images to preprocess..."
201-
% len(image_list_longname))
337+
% nb_image_files)
202338
logger.info(image_list_longname)
203-
with Pool() as p:
204-
self.image_info = p.starmap(self._preprocess,
205-
[(x, output_dir, labelling)
206-
for x in image_list_longname])
339+
if labelling:
340+
nb_tile_per_image = int(nb_images/nb_image_files)
341+
with Pool(processes=3) as p:
342+
self.image_info = p.starmap(self._preprocess_for_training,
343+
[(x, output_dir, nb_tile_per_image)
344+
for x in image_list_longname])
345+
else:
346+
with Pool(processes=3) as p:
347+
self.image_info = p.starmap(self._preprocess_for_inference,
348+
[(x, output_dir)
349+
for x in image_list_longname])
350+
207351
self.image_info = [item for sublist in self.image_info
208352
for item in sublist]
209353
logger.info("Saved %s images in the preprocessed dataset."
@@ -242,36 +386,45 @@ def load_mask(self, buildings, raster_features, min_x, min_y):
242386
if buildings.shape[0] == 0:
243387
return mask
244388
for idx, row in buildings.iterrows():
245-
points = self.extract_points_from_polygon(row["geometry"],
246-
raster_features)
247-
points[:, 0] -= min_x
248-
points[:, 1] -= min_y
389+
points = extract_points_from_polygon(row["geometry"],
390+
raster_features,
391+
min_x, min_y)
249392
label_id = [label["id"] for label in self.labels
250393
if label["name"] == row["condition"].lower()][0]
251394
mask = cv2.fillPoly(mask, [points], label_id)
252395
return mask
253396

254397

255-
def extract_points_from_polygon(self, p, features):
256-
"""Extract points from a polygon
398+
def extract_points_from_polygon(p, features, min_x, min_y):
399+
"""Extract points from a polygon
257400
258-
Parameters
259-
----------
260-
p : shapely.geometry.Polygon
261-
Polygon to detail
262-
features : dict
263-
Geographical features associated to the image
264-
Returns
265-
-------
266-
np.array
267-
Polygon vertices
401+
Parameters
402+
----------
403+
p : shapely.geometry.Polygon
404+
Polygon to detail
405+
features : dict
406+
Geographical features associated to the image
407+
min_x : int
408+
Minimal x-coordinate (west)
409+
min_y : int
410+
Minimal y-coordinate (north)
411+
Returns
412+
-------
413+
np.array
414+
Polygon vertices
268415
269-
"""
270-
raw_xs, raw_ys = p.exterior.xy
271-
xs = get_x_pixel(raw_xs, features["east"], features["west"], features["width"])
272-
ys = get_y_pixel(raw_ys, features["south"], features["north"], features["height"])
273-
points = np.array([[x, y] for x, y in zip(xs, ys)], dtype=np.int32)
274-
return points
416+
"""
417+
raw_xs, raw_ys = p.exterior.xy
418+
xs = get_x_pixel(
419+
raw_xs, features["east"], features["west"], features["width"]
420+
)
421+
ys = get_y_pixel(
422+
raw_ys, features["south"], features["north"], features["height"]
423+
)
424+
points = np.array([[y, x] for x, y in zip(xs, ys)], dtype=np.int32)
425+
points[:, 0] -= min_y
426+
points[:, 1] -= min_x
427+
return points
275428

276429

277430
def get_x_pixel(coord, east, west, width):

deeposlandia/paramoptim.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,7 @@ def run_model(train_generator, validation_generator, dl_model, output_folder,
292292
save_weights_only=False,
293293
mode='auto', period=1)
294294
terminate_on_nan = callbacks.TerminateOnNaN()
295-
earlystop = callbacks.EarlyStopping(monitor='val_acc',
296-
min_delta=0.001,
295+
earlystop = callbacks.EarlyStopping(monitor='val_loss',
297296
patience=10,
298297
verbose=1,
299298
mode='max')

deeposlandia/train.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,7 @@ def add_training_arguments(parser):
241241
save_weights_only=False,
242242
mode='auto', period=1)
243243
terminate_on_nan = callbacks.TerminateOnNaN()
244-
earlystop = callbacks.EarlyStopping(monitor='val_acc',
245-
min_delta=0.001,
244+
earlystop = callbacks.EarlyStopping(monitor='val_loss',
246245
patience=10,
247246
verbose=1,
248247
mode='max')

deeposlandia/webapp/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def recover_image_info(dataset, filename):
112112
elif dataset == "aerial":
113113
size_aggregation = "250_full"
114114
elif dataset == "tanzania":
115-
size_aggregation = "384_full"
115+
size_aggregation = "512_full"
116116
elif dataset == "shapes":
117117
size_aggregation = "64_full"
118118
else:

0 commit comments

Comments
 (0)