Skip to content

Commit

Permalink
Merge pull request #51 from minerva-ml/dev
Browse files Browse the repository at this point in the history
Evaluation in chunks added, erosion pre - dilation post approach added, multiclass problem definition enabled
  • Loading branch information
jakubczakon authored May 9, 2018
2 parents 5d6da2a + 7210a61 commit 971405c
Show file tree
Hide file tree
Showing 9 changed files with 198 additions and 34 deletions.
28 changes: 19 additions & 9 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
ctx = neptune.Context()
params = read_params(ctx)

set_seed(1234)
seed = 1234
set_seed(seed)


@click.group()
Expand All @@ -37,6 +38,7 @@ def prepare_metadata(train_data, valid_data, test_data, public_paths):
logger.info('creating metadata')
meta = generate_metadata(data_dir=params.data_dir,
masks_overlayed_dir=params.masks_overlayed_dir,
masks_overlayed_eroded_dir=params.masks_overlayed_eroded_dir,
competition_stage=params.competition_stage,
process_train_data=train_data,
process_validation_data=valid_data,
Expand All @@ -49,14 +51,22 @@ def prepare_metadata(train_data, valid_data, test_data, public_paths):


@action.command()
def prepare_masks():
@click.option('-d', '--dev_mode', help='if true only a small sample of data will be used', is_flag=True, required=False)
def prepare_masks(dev_mode):
if params.erode_selem_size > 0:
erode = params.erode_selem_size
target_dir = params.masks_overlayed_eroded_dir
else:
erode = 0
target_dir = params.masks_overlayed_dir
for dataset in ["train", "val"]:
logger.info('Overlaying masks, dataset: {}'.format(dataset))
overlay_masks(data_dir=params.data_dir,
dataset=dataset,
target_dir=params.masks_overlayed_dir,
target_dir=target_dir,
category_ids=CATEGORY_IDS,
is_small=False)
erode=erode,
is_small=dev_mode)


@action.command()
Expand All @@ -77,8 +87,8 @@ def _train(pipeline_name, dev_mode):
meta_valid = meta[meta['is_valid'] == 1]

if dev_mode:
meta_train = meta_train.sample(20, random_state=1234)
meta_valid = meta_valid.sample(10, random_state=1234)
meta_train = meta_train.sample(20, random_state=seed)
meta_valid = meta_valid.sample(10, random_state=seed)

data = {'input': {'meta': meta_train,
'meta_valid': meta_valid,
Expand Down Expand Up @@ -108,7 +118,7 @@ def _evaluate(pipeline_name, dev_mode, chunk_size):
meta_valid = meta[meta['is_valid'] == 1]

if dev_mode:
meta_valid = meta_valid.sample(30, random_state=1234)
meta_valid = meta_valid.sample(30, random_state=seed)

pipeline = PIPELINES[pipeline_name]['inference'](SOLUTION_CONFIG)
prediction = generate_prediction(meta_valid, pipeline, logger, CATEGORY_IDS, chunk_size)
Expand Down Expand Up @@ -146,7 +156,7 @@ def _predict(pipeline_name, dev_mode, submit_predictions, chunk_size):
meta_test = meta[meta['is_test'] == 1]

if dev_mode:
meta_test = meta_test.sample(2, random_state=1234)
meta_test = meta_test.sample(2, random_state=seed)

pipeline = PIPELINES[pipeline_name]['inference'](SOLUTION_CONFIG)
prediction = generate_prediction(meta_test, pipeline, logger, CATEGORY_IDS, chunk_size)
Expand All @@ -161,6 +171,7 @@ def _predict(pipeline_name, dev_mode, submit_predictions, chunk_size):
if submit_predictions:
_make_submission(submission_filepath)


@action.command()
@click.option('-p', '--pipeline_name', help='pipeline to be trained', required=True)
@click.option('-s', '--submit_predictions', help='submit predictions if true', is_flag=True, required=False)
Expand Down Expand Up @@ -262,4 +273,3 @@ def _generate_prediction_in_chunks(meta_data, pipeline, logger, category_ids, ch

if __name__ == "__main__":
action()

45 changes: 45 additions & 0 deletions models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import torch
from torch.autograd import Variable
from torch import optim

from callbacks import NeptuneMonitorSegmentation
Expand Down Expand Up @@ -25,6 +27,49 @@ def transform(self, datagen, validation_datagen=None):
outputs[name] = softmax(prediction, axis=1)
return outputs

class PyTorchUNetStream(Model):
def __init__(self, architecture_config, training_config, callbacks_config):
super().__init__(architecture_config, training_config, callbacks_config)
self.model = UNet(**architecture_config['model_params'])
self.weight_regularization = weight_regularization_unet
self.optimizer = optim.Adam(self.weight_regularization(self.model, **architecture_config['regularizer_params']),
**architecture_config['optimizer_params'])
self.loss_function = [('multichannel_map', multiclass_segmentation_loss, 1.0)]
self.callbacks = callbacks_unet(self.callbacks_config)

def transform(self, datagen, validation_datagen=None):
if len(self.output_names) == 1:
output_generator = self._transform(datagen, validation_datagen)
output = {'{}_prediction'.format(self.output_names[0]): output_generator}
return output
else:
raise NotImplementedError

def _transform(self, datagen, validation_datagen=None):
self.model.eval()
batch_gen, steps = datagen
for batch_id, data in enumerate(batch_gen):
if isinstance(data, list):
X = data[0]
else:
X = data

if torch.cuda.is_available():
X = Variable(X, volatile=True).cuda()
else:
X = Variable(X, volatile=True)

outputs_batch = self.model(X)
outputs_batch = outputs_batch.data.cpu().numpy()

for output in outputs_batch:
output = softmax(output, axis=0)
yield output

if batch_id == steps:
break
self.model.train()


def weight_regularization(model, regularize, weight_decay_conv2d, weight_decay_linear):
if regularize:
Expand Down
18 changes: 10 additions & 8 deletions neptune.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,23 @@ parameters:
# data_dir: /YOUR_PATH_TO_DATA_ON_CLOUD
# meta_dir: /YOUR_PATH_TO_DATA_ON_CLOUD
# masks_overlayed_dir: /YOUR_PATH_TO_DATA_ON_CLOUD/masks_overlayed
# masks_overlayed_eroded_dir: /YOUR_PATH_TO_DATA_ON_CLOUD/masks_overlayed_eroded/
# experiment_dir: /YOUR_PATH_TO_OUTPUT_FOLDER_ON_CLOUD/experiments

# Local Environment
data_dir: /path/to/data
meta_dir: /path/to/data
masks_overlayed_dir: /path/to/masks_overlayed
experiment_dir: /path/to/work/dir
data_dir: /path/to/data
meta_dir: /path/to/data
masks_overlayed_dir: /path/to/masks_overlayed
masks_overlayed_eroded_dir: /path/to/masks_overlayed_eroded
experiment_dir: /path/to/work/dir

overwrite: 0
num_workers: 4
load_in_memory: 0
pin_memory: 1
competition_stage: 1
api_key: YOUR_CROWDAI_API_KEY
stream_mode: False

# General parameters
image_h: 256
Expand All @@ -56,11 +59,8 @@ parameters:

# U-Net loss weights (multi-output)
mask: 0.3
contour: 0.5
contour_touching: 0.1
center: 0.1
bce_mask: 1.0
dice_mask: 1.0
dice_mask: 2.0

# Training schedule
epochs_nr: 100
Expand All @@ -83,6 +83,8 @@ parameters:
# Postprocessing
threshold: 0.5
min_nuclei_size: 20
erode_selem_size: 5
dilate_selem_size: 5

#Neptune monitor
unet_outputs_to_plot: '["multichannel_map",]'
7 changes: 4 additions & 3 deletions pipeline_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

SIZE_COLUMNS = ['height', 'width']
X_COLUMNS = ['file_path_image']
Y_COLUMNS = ['file_path_mask']
Y_COLUMNS = ['file_path_mask_eroded']
Y_COLUMNS_SCORING = ['ImageId']
CATEGORY_IDS = [None, 100]

Expand All @@ -20,7 +20,8 @@
'num_classes': 2,
'img_H-W': (params.image_h, params.image_w),
'batch_size_train': params.batch_size_train,
'batch_size_inference': params.batch_size_inference
'batch_size_inference': params.batch_size_inference,
'stream_mode': params.stream_mode
}

SOLUTION_CONFIG = AttrDict({
Expand Down Expand Up @@ -95,5 +96,5 @@
},
},
'dropper': {'min_size': params.min_nuclei_size},
'postprocessor': {}
'postprocessor': {'dilate_selem_size': params.dilate_selem_size}
})
26 changes: 20 additions & 6 deletions pipelines.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from functools import partial

import loaders
from models import PyTorchUNet
from postprocessing import Resizer, CategoryMapper, MulticlassLabeler
from steps.base import Step, Dummy
from steps.preprocessing.misc import XYSplit
from utils import squeeze_inputs
from models import PyTorchUNet, PyTorchUNetStream
from postprocessing import Resizer, CategoryMapper, MulticlassLabeler, MaskDilator, \
ResizerStream, CategoryMapperStream, MulticlassLabelerStream, MaskDilatorStream


def unet(config, train_mode):
Expand All @@ -18,12 +19,24 @@ def unet(config, train_mode):

loader = preprocessing(config, model_type='single', is_train=train_mode)
unet = Step(name='unet',
transformer=PyTorchUNet(**config.unet),
transformer=PyTorchUNetStream(**config.unet) if config.execution.stream_mode else PyTorchUNet(
**config.unet),
input_steps=[loader],
cache_dirpath=config.env.cache_dirpath,
save_output=save_output, load_saved_output=load_saved_output)

mask_postprocessed = mask_postprocessing(unet, config, save_output=save_output)
if config.postprocessor["dilate_selem_size"] > 0:
mask_postprocessed = Step(name='mask_dilation',
transformer=MaskDilatorStream(
**config.postprocessor) if config.execution.stream_mode else MaskDilator(
**config.postprocessor),
input_steps=[mask_postprocessed],
adapter={'images': ([(mask_postprocessed.name, 'categorized_images')]),
},
cache_dirpath=config.env.cache_dirpath,
save_output=save_output,
load_saved_output=False)
detached = multiclass_object_labeler(mask_postprocessed, config, save_output=save_output)
output = Step(name='output',
transformer=Dummy(),
Expand All @@ -45,9 +58,10 @@ def preprocessing(config, model_type, is_train, loader_mode=None):
raise NotImplementedError
return loader


def multiclass_object_labeler(postprocessed_mask, config, save_output=True):
labeler = Step(name='labeler',
transformer=MulticlassLabeler(),
transformer=MulticlassLabelerStream() if config.execution.stream_mode else MulticlassLabeler(),
input_steps=[postprocessed_mask],
adapter={'images': ([(postprocessed_mask.name, 'categorized_images')]),
},
Expand Down Expand Up @@ -164,7 +178,7 @@ def _preprocessing_multitask_generator(config, is_train, use_patching):

def mask_postprocessing(model, config, save_output=False):
mask_resize = Step(name='mask_resize',
transformer=Resizer(),
transformer=ResizerStream() if config.execution.stream_mode else Resizer(),
input_data=['input'],
input_steps=[model],
adapter={'images': ([(model.name, 'multichannel_map_prediction')]),
Expand All @@ -173,7 +187,7 @@ def mask_postprocessing(model, config, save_output=False):
cache_dirpath=config.env.cache_dirpath,
save_output=save_output)
category_mapper = Step(name='category_mapper',
transformer=CategoryMapper(),
transformer=CategoryMapperStream() if config.execution.stream_mode else CategoryMapper(),
input_steps=[mask_resize],
adapter={'images': ([('mask_resize', 'resized_images')]),
},
Expand Down
59 changes: 59 additions & 0 deletions postprocessing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from scipy import ndimage as ndi
from skimage.transform import resize
from skimage.morphology import binary_dilation, rectangle
from tqdm import tqdm

from steps.base import BaseTransformer
Expand Down Expand Up @@ -34,6 +35,59 @@ def transform(self, images):
return {'categorized_images': categorized_images}


class MaskDilator(BaseTransformer):
def __init__(self, dilate_selem_size):
self.selem_size = dilate_selem_size

def transform(self, images):
dilated_images = []
for image in tqdm(images):
dilated_images.append(dilate_image(image, self.selem_size))
return {'categorized_images': dilated_images}


class MulticlassLabelerStream(BaseTransformer):
def transform(self, images):
return {'labeled_images': self._transform(images)}

def _transform(self, images):
for i, image in enumerate(images):
labeled_image = label_multiclass_image(image)
yield labeled_image


class ResizerStream(BaseTransformer):
def transform(self, images, target_sizes):
return {'resized_images': self._transform(images, target_sizes)}

def _transform(self, images, target_sizes):
for image, target_size in tqdm(zip(images, target_sizes)):
n_channels = image.shape[0]
resized_image = resize(image, (n_channels,) + target_size, mode='constant')
yield resized_image


class CategoryMapperStream(BaseTransformer):
def transform(self, images):
return {'categorized_images': self._transform(images)}

def _transform(self, images):
for image in tqdm(images):
yield categorize_image(image)


class MaskDilatorStream(BaseTransformer):
def __init__(self, dilate_selem_size):
self.selem_size = dilate_selem_size

def transform(self, images):
return {'categorized_images': self._transform(images)}

def _transform(self, images):
for image in tqdm(images):
yield dilate_image(image, self.selem_size)


def label(mask):
labeled, nr_true = ndi.label(mask)
return labeled
Expand All @@ -45,3 +99,8 @@ def label_multiclass_image(mask):
labeled_channels.append(label(mask == label_nr))
labeled_image = np.stack(labeled_channels)
return labeled_image


def dilate_image(mask, selem_size):
selem = rectangle(selem_size, selem_size)
return binary_dilation(mask, selem=selem)
Loading

0 comments on commit 971405c

Please sign in to comment.