config.py

"""
Mask R-CNN
Base Configurations class.

Copyright (c) 2017 Matterport, Inc.
Licensed under the MIT License (see LICENSE for details)
Written by Waleed Abdulla
"""
from __future__ import division
import math
import numpy as np


# Base Configuration Class
# Don't use this class directly. Instead, sub-class it and override
# the configurations you need to change.

class Config(object):
    """Base configuration class. For custom configurations, create a
    sub-class that inherits from this one and override properties
    that need to be changed.
    """
    # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
    # Useful if your code needs to do things differently depending on which
    # experiment is running.
    NAME = None  # Override in sub-classes

    # NUMBER OF GPUs to use. For CPU training, use 1
    GPU_COUNT = 1

    # Number of images to train with on each GPU. A 12GB GPU can typically
    # handle 2 images of 1024x1024px.
    # Adjust based on your GPU memory and image sizes. Use the highest
    # number that your GPU can handle for best performance.
    IMAGES_PER_GPU = 2


    IMAGE_MIN_DIM = 128
    IMAGE_MAX_DIM = 320
    # If True, pad images with zeros such that they're (max_dim by max_dim)
    IMAGE_PADDING = True  # currently, the False option is not supported

    LEARNING_RATE = 0.002
    LEARNING_MOMENTUM = 0.9

    # Weight decay regularization
    WEIGHT_DECAY = 0.0001    
    # Image mean (RGB)
    MEAN_PIXEL = np.array([123.7, 116.8, 103.9])    
    # Number of training steps per epoch
    # This doesn't need to match the size of the training set. Tensorboard
    # updates are saved at the end of each epoch, so setting this to a
    # smaller number means getting more frequent TensorBoard updates.
    # Validation stats are also calculated at each epoch end and they
    # might take a while, so don't set this too small to avoid spending
    # a lot of time on validation stats.
    STEPS_PER_EPOCH = 1000

    # Number of validation steps to run at the end of every training epoch.
    # A bigger number improves accuracy of validation stats, but slows
    # down the training.
    VALIDATION_STPES = 100

    # The strides of each layer of the FPN Pyramid. These values
    # are based on a Resnet101 backbone.
    BACKBONE_STRIDES = [4, 8, 16, 32, 64]

    # Number of classification classes (including background)
    NUM_CLASSES = 1  # Override in sub-classes

    ###### Useless Parameters ######
    # Length of square anchor side in pixels
    #RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)

    ## Ratios of anchors at each cell (width/height)
    ## A value of 1 represents a square anchor, and 0.5 is a wide anchor
    #RPN_ANCHOR_RATIOS = [0.5, 1, 2]

    ## Anchor stride
    ## If 1 then anchors are created for each cell in the backbone feature map.
    ## If 2, then anchors are created for every other cell, and so on.
    #RPN_ANCHOR_STRIDE = 2

    ## How many anchors per image to use for RPN training
    #RPN_TRAIN_ANCHORS_PER_IMAGE = 256

    ## ROIs kept after non-maximum supression (training and inference)
    #POST_NMS_ROIS_TRAINING = 1000
    #POST_NMS_ROIS_INFERENCE = 1000

    ## If enabled, resizes instance masks to a smaller size to reduce
    ## memory load. Recommended when using high-resolution images.
    #USE_MINI_MASK = True
    #MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
    
    ## DETECT KEYPOINTS CONFIG can't use together with USE_MINI_MASK now, may be solve later    
    #DETECT_KEYPOINTS =False   
    #KEYPOINTS_MASK_SHAPE=(56,56)
    #KEYPOINTS_NUM=17
    ## Input image resing
    ## Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
    ## the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
    ## be satisfied together the IMAGE_MAX_DIM is enforced.
    

    ## Number of ROIs per image to feed to classifier/mask heads
    #TRAIN_ROIS_PER_IMAGE = 128  # TODO: paper uses 512

    ## Percent of positive ROIs used to train classifier/mask heads
    #ROI_POSITIVE_RATIO = 0.33

    ## Pooled ROIs
    #POOL_SIZE = 7
    #MASK_POOL_SIZE = 14
    #MASK_SHAPE = [28, 28]

    ## Maximum number of ground truth instances to use in one image
    #MAX_GT_INSTANCES = 10  # use for keypoints or reduce memory for not use_mini_mask
    ##MAX_GT_INSTANCES = 100 # orignal setting
    ## Bounding box refinement standard deviation for RPN and final detections.
    #RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
    #BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])

    ## Max number of final detections
    #DETECTION_MAX_INSTANCES = 100

    ## Minimum probability value to accept a detected instance
    ## ROIs below this threshold are skipped
    #DETECTION_MIN_CONFIDENCE = 0.7

    ## Non-maximum suppression threshold for detection
    #DETECTION_NMS_THRESHOLD = 0.3

    ## Learning rate and momentum
    ## The paper uses lr=0.02, but we found that to cause weights to explode often
    

    ## Use RPN ROIs or externally generated ROIs for training
    ## Keep this True for most situations. Set to False if you want to train
    ## the head branches on ROI generated by code rather than the ROIs from
    ## the RPN. For example, to debug the classifier head without having to
    ## train the RPN.
    #USE_RPN_ROIS = True

    def __init__(self):
        """Set values of computed attributes."""
        # Effective batch size
        self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT

        # Input image size
        self.IMAGE_SHAPE = np.array(
            [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])

        # Compute backbone size from input image size
        self.BACKBONE_SHAPES = np.array(
            [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)),
              int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
             for stride in self.BACKBONE_STRIDES])

    def display(self):
        """Display Configuration values."""
        print("\nConfigurations:")
        for a in dir(self):
            if not a.startswith("__") and not callable(getattr(self, a)):
                print("{:30} {}".format(a, getattr(self, a)))
        print("\n")