Skip to content

Commit

Permalink
Dev lgbm (#147) (#152)
Browse files Browse the repository at this point in the history
* initial restructure

* thresholds on unet output

* added gmean tta, experimented with thresholding (#125)

* feature exractor and lightgbm

* pipeline is running ok

* tmp commit

* lgbm ready for tests

* tmp

* faster nms and feature extraction

* small fix

* cleaning

* Dev repo cleanup (#138)

* initial restructure

* clean structure (#126)

* clean structure

* correct readme

* further cleaning

* Dev apply transformer (#131)

* clean structure

* correct readme

* further cleaning

* resizer docstring

* couple docstrings

* make apply transformer, memory cache

* fixes

* postprocessing docstrings

* fixes in PR

* Dev repo cleanup (#132)

* cleanup

* remove src.

* Dev clean tta (#134)

* added resize padding, refactored inference pipelines

* refactored piepliens

* added color shift augmentation

* reduced caching to just mask_resize

* updated config

* Dev-repo_cleanup models and losses docstrings (#135)

* models and losses docstrings

* small fixes in docstrings

* resolve conflicts in with TTA PR (#137)

* refactor in stream mode (#139)

* hot fix of mask_postprocessing in tta with new make transformer

* finishing merge

* finishing merge v2

* finishing merge v3

* finishing merge v4

* tmp commit

* lgbm train and evaluate pipelines run correctly

* something is not yes

* fix

* working lgbm training with ugly train_mode=True

* back to pipelines.py

* small fix

* preparing PR

* preparing PR v2

* preparing PR v2

* fix

* fix_2

* fix_3

* fix_4
  • Loading branch information
jakubczakon authored Jun 21, 2018
1 parent 8e269de commit 20e64ed
Show file tree
Hide file tree
Showing 12 changed files with 485 additions and 51 deletions.
22 changes: 17 additions & 5 deletions neptune.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
project: YOUR_PROJECT_NAME

name: mapping_challenge_open_solution
tags: [solution_1]
tags: [solution_5]

metric:
channel: 'Final Validation Score'
Expand Down Expand Up @@ -41,7 +41,6 @@ parameters:
loader_mode: resize
stream_mode: 0


# General parameters
image_h: 256
image_w: 256
Expand Down Expand Up @@ -86,11 +85,10 @@ parameters:

# Postprocessing
threshold: 0.5
min_nuclei_size: 20
erosion_percentages: '[10,20,30]'
erode_selem_size: 0
dilate_selem_size: 2
tta_aggregation_method: gmean
nms__iou_threshold: 0.5

# Inference padding
crop_image_h: 300
Expand All @@ -100,4 +98,18 @@ parameters:
pad_method: 'replicate'

#Neptune monitor
unet_outputs_to_plot: '["multichannel_map",]'
unet_outputs_to_plot: '["multichannel_map",]'

#Scoring model
scoring_model: 'lgbm'
scoring_model__num_training_examples: 10000

#LightGBM
lgbm__learning_rate: 0.001
lgbm__num_leaves: 10
lgbm__min_data: 50
lgbm__max_depth: 10
lgbm__number_of_trees: 100
lgbm__early_stopping: 5
lgbm__train_size: 0.7
lgbm__target: 'iou'
4 changes: 2 additions & 2 deletions src/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from .steps.utils import get_logger
from .steps.pytorch.callbacks import NeptuneMonitor, ValidationMonitor
from .utils import softmax, coco_evaluation, create_annotations, make_apply_transformer
from .pipeline_config import CATEGORY_IDS, Y_COLUMNS_SCORING
from .pipeline_config import CATEGORY_IDS, Y_COLUMNS_SCORING, CATEGORY_LAYERS

logger = get_logger()

Expand Down Expand Up @@ -200,7 +200,7 @@ def _generate_prediction(self, cache_dirpath, outputs):
output = pipeline.transform(data)
y_pred = output['y_pred']

prediction = create_annotations(self.meta_valid, y_pred, logger, CATEGORY_IDS)
prediction = create_annotations(self.meta_valid, y_pred, logger, CATEGORY_IDS, CATEGORY_LAYERS)
return prediction


Expand Down
6 changes: 3 additions & 3 deletions src/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,9 +436,9 @@ def _get_tta_data(self, i, row):


class TestTimeAugmentationAggregator(BaseTransformer):
def __init__(self, method, nthreads):
def __init__(self, method, num_threads):
self.method = method
self.nthreads = nthreads
self.num_threads = num_threads

@property
def agg_method(self):
Expand All @@ -456,7 +456,7 @@ def transform(self, images, tta_params, img_ids, **kwargs):
img_ids=img_ids,
agg_method=self.agg_method)
unique_img_ids = set(img_ids)
threads = min(self.nthreads, len(unique_img_ids))
threads = min(self.num_threads, len(unique_img_ids))
with mp.pool.ThreadPool(threads) as executor:
averages_images = executor.map(_aggregate_augmentations, unique_img_ids)
return {'aggregated_prediction': averages_images}
Expand Down
97 changes: 94 additions & 3 deletions src/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,18 @@
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib
from sklearn.ensemble import RandomForestRegressor

from .callbacks import NeptuneMonitorSegmentation, ValidationMonitorSegmentation
from .steps.pytorch.architectures.unet import UNet
from .steps.pytorch.callbacks import CallbackList, TrainingMonitor, ModelCheckpoint, \
ExperimentTiming, ExponentialLRScheduler, EarlyStopping
from .steps.pytorch.models import Model
from .steps.pytorch.validation import multiclass_segmentation_loss, DiceLoss
from .steps.sklearn.models import LightGBM, make_transformer, SklearnRegressor
from .utils import softmax
from .unet_models import AlbuNet, UNet11, UNetVGG16, UNetResNet

Expand Down Expand Up @@ -159,9 +164,12 @@ def __init__(self, architecture_config, training_config, callbacks_config):
class PyTorchUNetWeightedStream(BasePyTorchUNet):
def __init__(self, architecture_config, training_config, callbacks_config):
super().__init__(architecture_config, training_config, callbacks_config)
weighted_loss = partial(multiclass_weighted_cross_entropy,
**get_loss_variables(**architecture_config['weighted_cross_entropy']))
loss = partial(mixed_dice_cross_entropy_loss, dice_weight=architecture_config['loss_weights']['dice_mask'],
weights_function = partial(get_weights, **architecture_config['weighted_cross_entropy'])
weighted_loss = partial(multiclass_weighted_cross_entropy, weights_function=weights_function)
dice_loss = partial(multiclass_dice_loss, excluded_classes=[0])
loss = partial(mixed_dice_cross_entropy_loss,
dice_loss=dice_loss,
dice_weight=architecture_config['loss_weights']['dice_mask'],
cross_entropy_weight=architecture_config['loss_weights']['bce_mask'],
cross_entropy_loss=weighted_loss,
**architecture_config['dice'])
Expand Down Expand Up @@ -201,6 +209,81 @@ def _transform(self, datagen, validation_datagen=None):
self.model.train()


class ScoringLightGBM(LightGBM):
def __init__(self, model_params, training_params, train_size, target):
self.train_size = train_size
self.target = target
self.feature_names = []
self.estimator = None
super().__init__(model_params, training_params)

def fit(self, features, **kwargs):
df_features = _convert_features_to_df(features)
train_data, val_data = train_test_split(df_features, train_size=self.train_size)
self.feature_names = list(df_features.columns.drop(self.target))
super().fit(X=train_data[self.feature_names],
y=train_data[self.target],
X_valid=val_data[self.feature_names],
y_valid=val_data[self.target],
feature_names=self.feature_names,
categorical_features=[])
return self

def transform(self, features, **kwargs):
scores = []
for image_features in features:
image_scores = []
for layer_features in image_features:
if len(layer_features) > 0:
layer_scores = super().transform(layer_features[self.feature_names])
image_scores.append(list(layer_scores['prediction']))
else:
image_scores.append([])
scores.append(image_scores)
return {'scores': scores}

def save(self, filepath):
joblib.dump((self.estimator, self.feature_names), filepath)

def load(self, filepath):
self.estimator, self.feature_names = joblib.load(filepath)


class ScoringRandomForest(SklearnRegressor):
def __init__(self, train_size, target, **kwargs):
self.train_size = train_size
self.target = target
self.feature_names = []
self.estimator = RandomForestRegressor()

def fit(self, features, **kwargs):
df_features = _convert_features_to_df(features)
train_data, val_data = train_test_split(df_features, train_size=self.train_size)
self.feature_names = list(df_features.columns.drop(self.target))
super().fit(X=train_data[self.feature_names],
y=train_data[self.target])
return self

def transform(self, features, **kwargs):
scores = []
for image_features in features:
image_scores = []
for layer_features in image_features:
if len(layer_features) > 0:
layer_scores = super().transform(layer_features[self.feature_names])
image_scores.append(list(layer_scores['prediction']))
else:
image_scores.append([])
scores.append(image_scores)
return {'scores': scores}

def save(self, filepath):
joblib.dump((self.estimator, self.feature_names), filepath)

def load(self, filepath):
self.estimator, self.feature_names = joblib.load(filepath)


def weight_regularization_unet(model, regularize, weight_decay_conv2d):
if regularize:
parameter_list = [{'params': model.parameters(), 'weight_decay': weight_decay_conv2d}]
Expand Down Expand Up @@ -369,3 +452,11 @@ def multiclass_dice_loss(output, target, smooth=0, activation='softmax', exclude
class_target.data = class_target.data.float()
loss += dice(output[:, class_nr, :, :], class_target)
return loss


def _convert_features_to_df(features):
df_features = []
for image_features in features:
for layer_features in image_features[1:]:
df_features.append(layer_features)
return pd.concat(df_features)
24 changes: 21 additions & 3 deletions src/pipeline_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
X_COLUMNS = ['file_path_image']
Y_COLUMNS = ['file_path_mask_eroded_0_dilated_0']
Y_COLUMNS_SCORING = ['ImageId']
CATEGORY_IDS = [None, 100]
SEED = 1234
CATEGORY_IDS = [None, 100]
CATEGORY_LAYERS = [1, 19]
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]

Expand Down Expand Up @@ -121,15 +122,32 @@
'rotation': True,
'color_shift_runs': False},
'tta_aggregator': {'method': params.tta_aggregation_method,
'nthreads': params.num_threads
'num_threads': params.num_threads
},
'dropper': {'min_size': params.min_nuclei_size},
'postprocessor': {'mask_dilation': {'dilate_selem_size': params.dilate_selem_size
},
'mask_erosion': {'erode_selem_size': params.erode_selem_size
},
'prediction_crop': {'h_crop': params.crop_image_h,
'w_crop': params.crop_image_w
},
'scoring_model': params.scoring_model,
'lightGBM': {'model_params': {'learning_rate': params.lgbm__learning_rate,
'boosting_type': 'gbdt',
'objective': 'regression',
'metric': 'regression_l2',
'sub_feature': 1.0,
'num_leaves': params.lgbm__num_leaves,
'min_data': params.lgbm__min_data,
'max_depth': params.lgbm__max_depth},
'training_params': {'number_boosting_rounds': params.lgbm__number_of_trees,
'early_stopping_rounds': params.lgbm__early_stopping},
'train_size': params.lgbm__train_size,
'target': params.lgbm__target
},
'random_forest': {'train_size': params.lgbm__train_size,
'target': params.lgbm__target},
'nms': {'iou_threshold': params.nms__iou_threshold,
'num_threads': params.num_threads},
}
})
Loading

0 comments on commit 20e64ed

Please sign in to comment.