Skip to content

Commit

Permalink
Merge pull request #47 from apyskir/dev-morpho
Browse files Browse the repository at this point in the history
Dev morpho
  • Loading branch information
jakubczakon authored May 9, 2018
2 parents 57f29d1 + c21f4a5 commit 7210a61
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 26 deletions.
28 changes: 19 additions & 9 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
ctx = neptune.Context()
params = read_params(ctx)

set_seed(1234)
seed = 1234
set_seed(seed)


@click.group()
Expand All @@ -37,6 +38,7 @@ def prepare_metadata(train_data, valid_data, test_data, public_paths):
logger.info('creating metadata')
meta = generate_metadata(data_dir=params.data_dir,
masks_overlayed_dir=params.masks_overlayed_dir,
masks_overlayed_eroded_dir=params.masks_overlayed_eroded_dir,
competition_stage=params.competition_stage,
process_train_data=train_data,
process_validation_data=valid_data,
Expand All @@ -49,14 +51,22 @@ def prepare_metadata(train_data, valid_data, test_data, public_paths):


@action.command()
def prepare_masks():
@click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False)
def prepare_masks(dev_mode):
if params.erode_selem_size > 0:
erode = params.erode_selem_size
target_dir = params.masks_overlayed_eroded_dir
else:
erode = 0
target_dir = params.masks_overlayed_dir
for dataset in ["train", "val"]:
logger.info('Overlaying masks, dataset: {}'.format(dataset))
overlay_masks(data_dir=params.data_dir,
dataset=dataset,
target_dir=params.masks_overlayed_dir,
target_dir=target_dir,
category_ids=CATEGORY_IDS,
is_small=False)
erode=erode,
is_small=dev_mode)


@action.command()
Expand All @@ -77,8 +87,8 @@ def _train(pipeline_name, dev_mode):
meta_valid = meta[meta['is_valid'] == 1]

if dev_mode:
meta_train = meta_train.sample(20, random_state=1234)
meta_valid = meta_valid.sample(10, random_state=1234)
meta_train = meta_train.sample(20, random_state=seed)
meta_valid = meta_valid.sample(10, random_state=seed)

data = {'input': {'meta': meta_train,
'meta_valid': meta_valid,
Expand Down Expand Up @@ -108,7 +118,7 @@ def _evaluate(pipeline_name, dev_mode, chunk_size):
meta_valid = meta[meta['is_valid'] == 1]

if dev_mode:
meta_valid = meta_valid.sample(30, random_state=1234)
meta_valid = meta_valid.sample(30, random_state=seed)

pipeline = PIPELINES[pipeline_name]['inference'](SOLUTION_CONFIG)
prediction = generate_prediction(meta_valid, pipeline, logger, CATEGORY_IDS, chunk_size)
Expand Down Expand Up @@ -146,7 +156,7 @@ def _predict(pipeline_name, dev_mode, submit_predictions, chunk_size):
meta_test = meta[meta['is_test'] == 1]

if dev_mode:
meta_test = meta_test.sample(2, random_state=1234)
meta_test = meta_test.sample(2, random_state=seed)

pipeline = PIPELINES[pipeline_name]['inference'](SOLUTION_CONFIG)
prediction = generate_prediction(meta_test, pipeline, logger, CATEGORY_IDS, chunk_size)
Expand All @@ -161,6 +171,7 @@ def _predict(pipeline_name, dev_mode, submit_predictions, chunk_size):
if submit_predictions:
_make_submission(submission_filepath)


@action.command()
@click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True)
@click.option('-s', '--submit_predictions', help='submit predictions if true', is_flag=True, required=False)
Expand Down Expand Up @@ -262,4 +273,3 @@ def _generate_prediction_in_chunks(meta_data, pipeline, logger, category_ids, ch

if __name__ == "__main__":
action()

17 changes: 9 additions & 8 deletions neptune.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@ parameters:
# data_dir: /YOUR_PATH_TO_DATA_ON_CLOUD
# meta_dir: /YOUR_PATH_TO_DATA_ON_CLOUD
# masks_overlayed_dir: /YOUR_PATH_TO_DATA_ON_CLOUD/masks_overlayed
# masks_overlayed_eroded_dir: /YOUR_PATH_TO_DATA_ON_CLOUD/masks_overlayed_eroded/
# experiment_dir: /YOUR_PATH_TO_OUTPUT_FOLDER_ON_CLOUD/experiments

# Local Environment
data_dir: /path/to/data
meta_dir: /path/to/data
masks_overlayed_dir: /path/to/masks_overlayed
experiment_dir: /path/to/work/dir
data_dir: /path/to/data
meta_dir: /path/to/data
masks_overlayed_dir: /path/to/masks_overlayed
masks_overlayed_eroded_dir: /path/to/masks_overlayed_eroded
experiment_dir: /path/to/work/dir

overwrite: 0
num_workers: 4
Expand All @@ -57,11 +59,8 @@ parameters:

# U-Net loss weights (multi-output)
mask: 0.3
contour: 0.5
contour_touching: 0.1
center: 0.1
bce_mask: 1.0
dice_mask: 1.0
dice_mask: 2.0

# Training schedule
epochs_nr: 100
Expand All @@ -84,6 +83,8 @@ parameters:
# Postprocessing
threshold: 0.5
min_nuclei_size: 20
erode_selem_size: 5
dilate_selem_size: 5

#Neptune monitor
unet_outputs_to_plot: '["multichannel_map",]'
4 changes: 2 additions & 2 deletions pipeline_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

SIZE_COLUMNS = ['height', 'width']
X_COLUMNS = ['file_path_image']
Y_COLUMNS = ['file_path_mask']
Y_COLUMNS = ['file_path_mask_eroded']
Y_COLUMNS_SCORING = ['ImageId']
CATEGORY_IDS = [None, 100]

Expand Down Expand Up @@ -96,5 +96,5 @@
},
},
'dropper': {'min_size': params.min_nuclei_size},
'postprocessor': {}
'postprocessor': {'dilate_selem_size': params.dilate_selem_size}
})
18 changes: 15 additions & 3 deletions pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from steps.preprocessing.misc import XYSplit
from utils import squeeze_inputs
from models import PyTorchUNet, PyTorchUNetStream
from postprocessing import Resizer, CategoryMapper, MulticlassLabeler, \
ResizerStream, CategoryMapperStream, MulticlassLabelerStream
from postprocessing import Resizer, CategoryMapper, MulticlassLabeler, MaskDilator, \
ResizerStream, CategoryMapperStream, MulticlassLabelerStream, MaskDilatorStream


def unet(config, train_mode):
Expand All @@ -19,12 +19,24 @@ def unet(config, train_mode):

loader = preprocessing(config, model_type='single', is_train=train_mode)
unet = Step(name='unet',
transformer= PyTorchUNetStream(**config.unet) if config.execution.stream_mode else PyTorchUNet(**config.unet),
transformer=PyTorchUNetStream(**config.unet) if config.execution.stream_mode else PyTorchUNet(
**config.unet),
input_steps=[loader],
cache_dirpath=config.env.cache_dirpath,
save_output=save_output, load_saved_output=load_saved_output)

mask_postprocessed = mask_postprocessing(unet, config, save_output=save_output)
if config.postprocessor["dilate_selem_size"] > 0:
mask_postprocessed = Step(name='mask_dilation',
transformer=MaskDilatorStream(
**config.postprocessor) if config.execution.stream_mode else MaskDilator(
**config.postprocessor),
input_steps=[mask_postprocessed],
adapter={'images': ([(mask_postprocessed.name, 'categorized_images')]),
},
cache_dirpath=config.env.cache_dirpath,
save_output=save_output,
load_saved_output=False)
detached = multiclass_object_labeler(mask_postprocessed, config, save_output=save_output)
output = Step(name='output',
transformer=Dummy(),
Expand Down
29 changes: 29 additions & 0 deletions postprocessing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from scipy import ndimage as ndi
from skimage.transform import resize
from skimage.morphology import binary_dilation, rectangle
from tqdm import tqdm

from steps.base import BaseTransformer
Expand Down Expand Up @@ -34,6 +35,17 @@ def transform(self, images):
return {'categorized_images': categorized_images}


class MaskDilator(BaseTransformer):
def __init__(self, dilate_selem_size):
self.selem_size = dilate_selem_size

def transform(self, images):
dilated_images = []
for image in tqdm(images):
dilated_images.append(dilate_image(image, self.selem_size))
return {'categorized_images': dilated_images}


class MulticlassLabelerStream(BaseTransformer):
def transform(self, images):
return {'labeled_images': self._transform(images)}
Expand Down Expand Up @@ -64,6 +76,18 @@ def _transform(self, images):
yield categorize_image(image)


class MaskDilatorStream(BaseTransformer):
def __init__(self, dilate_selem_size):
self.selem_size = dilate_selem_size

def transform(self, images):
return {'categorized_images': self._transform(images)}

def _transform(self, images):
for image in tqdm(images):
yield dilate_image(image, self.selem_size)


def label(mask):
labeled, nr_true = ndi.label(mask)
return labeled
Expand All @@ -75,3 +99,8 @@ def label_multiclass_image(mask):
labeled_channels.append(label(mask == label_nr))
labeled_image = np.stack(labeled_channels)
return labeled_image


def dilate_image(mask, selem_size):
selem = rectangle(selem_size, selem_size)
return binary_dilation(mask, selem=selem)
28 changes: 27 additions & 1 deletion preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
from pycocotools.coco import COCO
from skimage.transform import resize
from tqdm import tqdm
from skimage.morphology import binary_erosion, rectangle

from utils import get_logger
from postprocessing import label

logger = get_logger()


def overlay_masks(data_dir, dataset, target_dir, category_ids, is_small=False):
def overlay_masks(data_dir, dataset, target_dir, category_ids, erode=0, is_small=False):
if is_small:
suffix = "-small"
else:
Expand All @@ -31,6 +33,9 @@ def overlay_masks(data_dir, dataset, target_dir, category_ids, is_small=False):
annotation_ids = coco.getAnnIds(imgIds=image_id, catIds=[category_id, ])
annotations = coco.loadAnns(annotation_ids)
mask = overlay_masks_from_annotations(annotations, image_size)
if erode > 0:
mask_eroded = overlay_eroded_masks_from_annotations(annotations, image_size, erode)
mask = add_dropped_objects(mask, mask_eroded)
mask_overlayed = np.where(mask, category_nr, mask_overlayed)
target_filepath = os.path.join(target_dir, dataset, "masks", image["file_name"][:-4]) + ".png"
os.makedirs(os.path.dirname(target_filepath), exist_ok=True)
Expand All @@ -50,6 +55,18 @@ def overlay_masks_from_annotations(annotations, image_size):
return np.where(mask > 0, 1, 0).astype('uint8')


def overlay_eroded_masks_from_annotations(annotations, image_size, selem_size):
mask = np.zeros(image_size)
selem = rectangle(selem_size, selem_size)
for ann in annotations:
rle = cocomask.frPyObjects(ann['segmentation'], image_size[0], image_size[1])
m = cocomask.decode(rle)
m = m.reshape(image_size)
m = binary_erosion(m, selem=selem)
mask += m
return np.where(mask > 0, 1, 0).astype('uint8')


def preprocess_image(img, target_size=(128, 128)):
img = resize(img, target_size, mode='constant')
x = np.expand_dims(img, axis=0)
Expand All @@ -60,3 +77,12 @@ def preprocess_image(img, target_size=(128, 128)):
else:
x = torch.autograd.Variable(x, volatile=True)
return x


def add_dropped_objects(original, processed):
reconstructed = processed.copy()
labeled = label(original)
for i in range(1, labeled.max() + 1):
if np.any(np.where(~(labeled==i) & processed)):
reconstructed += (labeled == i)
return reconstructed.astype('uint8')
15 changes: 12 additions & 3 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def read_params(ctx):

def generate_metadata(data_dir,
masks_overlayed_dir,
masks_overlayed_eroded_dir,
process_train_data=True,
process_validation_data=True,
process_test_data=True,
Expand All @@ -146,7 +147,7 @@ def generate_metadata(data_dir,

def _generate_metadata(dataset):
assert dataset in ["train", "test", "val"], "Unknown dataset!"
df_metadata = pd.DataFrame(columns=['ImageId', 'file_path_image', 'file_path_mask',
df_metadata = pd.DataFrame(columns=['ImageId', 'file_path_image', 'file_path_mask', 'file_path_mask_eroded',
'is_train', 'is_valid', 'is_test', 'n_buildings'])

if dataset == "test":
Expand All @@ -160,10 +161,15 @@ def _generate_metadata(dataset):

if public_paths:
images_path_to_write = os.path.join(public_path, dataset)
masks_overlayed_dir_to_write = os.path.join(public_path, "masks_overlayed")
mask_overlayed_suffix = os.path.join(masks_overlayed_dir, "")
masks_overlayed_dir_to_write = os.path.join(public_path, mask_overlayed_suffix.split("/")[-2])
mask_overlayed_eroded_suffix = os.path.join(masks_overlayed_eroded_dir, "")
masks_overlayed_eroded_dir_to_write = os.path.join(public_path,
mask_overlayed_eroded_suffix.split("/")[:-2])
else:
images_path_to_write = images_path
masks_overlayed_dir_to_write = masks_overlayed_dir
masks_overlayed_eroded_dir_to_write = masks_overlayed_eroded_dir

for image_file_name in sorted(os.listdir(images_path)):
file_path_image = os.path.join(images_path_to_write, image_file_name)
Expand All @@ -175,11 +181,14 @@ def _generate_metadata(dataset):

if dataset == "test_images":
file_path_mask = None
file_path_mask_eroded = None
n_buildings = None
is_test = 1
else:
file_path_mask = os.path.join(masks_overlayed_dir_to_write, dataset, "masks",
image_file_name[:-4] + ".png")
file_path_mask_eroded = os.path.join(masks_overlayed_eroded_dir_to_write, dataset, "masks",
image_file_name[:-4] + ".png")
n_buildings = None
if dataset == "val":
is_valid = 1
Expand All @@ -189,6 +198,7 @@ def _generate_metadata(dataset):
df_metadata = df_metadata.append({'ImageId': image_id,
'file_path_image': file_path_image,
'file_path_mask': file_path_mask,
'file_path_mask_eroded': file_path_mask_eroded,
'is_train': is_train,
'is_valid': is_valid,
'is_test': is_test,
Expand Down Expand Up @@ -332,7 +342,6 @@ def categorize_image(image, channel_axis=0):


def coco_evaluation(gt_filepath, prediction_filepath, image_ids, category_ids):

coco = COCO(gt_filepath)
coco_results = coco.loadRes(prediction_filepath)
cocoEval = COCOeval(coco, coco_results)
Expand Down

0 comments on commit 7210a61

Please sign in to comment.