diff --git a/.gitignore b/.gitignore
index 7d7b138f9..506c445eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,61 @@
+# Data files and directories common in repo root
+datasets/
+logs/
+*.h5
+*.weights
+results/
+temp/
+test/
 *.jpg
 *.jpeg
-*.weights
-*.h5
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# VS Studio Code
+.vscode
+
+# PyCharm
+.idea/
+
+# Dropbox
+.dropbox.attr
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
diff --git a/README.md b/README.md
index 0875cfcba..b698777cf 100644
--- a/README.md
+++ b/README.md
@@ -22,8 +22,26 @@ VOC (20 classes) (http://host.robots.ox.ac.uk/pascal/VOC/voc2012/) | 72% | check
 
 Grab the pretrained weights of yolo3 from https://pjreddie.com/media/files/yolov3.weights.
 
+```wget -c https://pjreddie.com/media/files/yolov3.weights```
+
+Environment setup:
+
+```pip install -r requirements.txt```
+
+If Nvidia GPU is available:
+
+```pip uninstall tensorflow```
+
+```pip install tensorflow-gpu```
+
+Detection on single image:
+
 ```python yolo3_one_file_to_detect_them_all.py -w yolo3.weights -i dog.jpg``` 
 
+If your webcam is available:
+
+```python yolo3_cam.py -w yolov3.weights``` 
+
 ## Training
 
 ### 1. Data preparation 
diff --git a/RMS/RMS_yolo3.py b/RMS/RMS_yolo3.py
new file mode 100644
index 000000000..0ac94c613
--- /dev/null
+++ b/RMS/RMS_yolo3.py
@@ -0,0 +1,223 @@
+import base64
+import os
+import sys
+import argparse
+import warnings
+warnings.filterwarnings("ignore")
+os.environ.setdefault('PATH', '')
+import numpy as np
+import redis
+import time
+import json
+from io import BytesIO
+from multiprocessing import Process, Pipe, current_process, Lock
+import GPUtil
+from skimage.measure import find_contours
+import struct
+import cv2
+import numpy as np
+import config
+
+# connect to Redis server
+redispool = redis.ConnectionPool(host=config.REDIS_HOST,
+                          port=config.REDIS_PORT,
+                          db=config.REDIS_DB,
+                          socket_keepalive=True)
+
+try:
+    print('Testing Redis Connection')
+    redisdbSession = redis.StrictRedis(connection_pool=redispool)
+    response = redisdbSession.client_list()
+    print('Redis Connection Established')
+except redis.ConnectionError as e:
+    print(e)
+    sys.exit(1)
+
+np.set_printoptions(threshold=np.nan)
+os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"]="0"
+
+# set some parameters
+net_h, net_w = 416, 416
+obj_thresh, nms_thresh = 0.7, 0.7
+anchors = [[116,90,  156,198,  373,326],  [30,61, 62,45,  59,119], [10,13,  16,30,  33,23]]
+labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \
+            "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \
+            "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \
+            "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \
+            "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \
+            "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \
+            "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \
+            "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \
+            "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \
+            "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
+
+class mlWorker(Process):
+    def __init__(self, LOCK, GPU="", FRAC=0):
+        Process.__init__(self)
+        self.lock = LOCK
+        if GPU:
+            print('{} using GPUid: {}, Name: {}'.format(self.name, str(GPU.id), str(GPU.name)))
+            os.environ["CUDA_VISIBLE_DEVICES"] = str(GPU.id)
+            self.device = '/device:GPU:0'
+        else:
+            self.device = ''
+        self.GPU = GPU
+        self.frac = FRAC
+        self.counter = 0
+        self.dt = 0.0
+
+    def run(self):
+        from utils.weightreader import WeightReader
+        from utils.bbox import BoundBox
+        from utils.tools import preprocess_input, decode_netout
+        from utils.tools import correct_yolo_boxes, do_nms, draw_boxes
+        from model.yolo3 import make_yolov3_model
+        import tensorflow as tf
+        from PIL import Image
+        self.Image = Image
+        self.preprocess_input = preprocess_input
+        self.decode_netout = decode_netout
+        self.correct_yolo_boxes = correct_yolo_boxes
+        self.do_nms = do_nms
+        self.draw_boxes = draw_boxes
+        if self.GPU:
+            print('ML Process: {} starting, using GPU: {}, frac: {}'.format(self.name,self.GPU.id,self.frac))
+        keras.backend.clear_session()
+        conf = tf.ConfigProto()
+        conf.gpu_options.per_process_gpu_memory_fraction = self.frac
+        set_session(tf.Session(config=conf))
+        # make the yolov3 model to predict 80 classes on COCO
+        _model = make_yolov3_model()
+
+        # load the weights trained on COCO into the model
+        weight_reader = WeightReader(config.MODEL_PATH)
+        weight_reader.load_weights(_model)
+
+        graph = tf.get_default_graph()
+        print('ML Process: {} started'.format(self.name))
+        self.mainloop(model=_model, graph=graph)
+
+    def mainloop(self, model='', graph=''):
+        while True:
+            # attempt to grab a batch of images from the database, then
+            # initialize the image IDs and batch of images themselves
+            try:
+                redisdbSession = redis.StrictRedis(connection_pool=redispool)
+                self.lock.acquire()
+                query = redisdbSession.lrange(config.IMAGE_QUEUE, 0, config.BATCH_SIZE - 1)
+                redisdbSession.ltrim(config.IMAGE_QUEUE, len(query), -1)
+                self.lock.release()
+                imageIDs = []
+                thresholds = {}
+                batch = []
+                # loop over the queue
+                # deserialize the object and obtain the input image
+                if query:
+                    for item in query:
+                        data = json.loads(item)
+                        image = self.base64_decode_image(data["image"])
+                        image = self.preprocess_input(image, net_h, net_w)
+                        # check to see if the batch list is None
+                        batch.append(image)
+                        # update the list of image IDs
+                        imageIDs.append(data["id"])
+                        thresholds[data["id"]] = data["threshold"]
+
+                # check to see if we need to process the batch
+                if len(imageIDs) > 0:
+                    #print('{}: Procesing {} images!'.format(self.name, len(imageIDs)))
+                    start = time.time()
+                    with graph.as_default():
+                        results = model.predict(batch[0])
+                    end = time.time()
+                    et = end - start
+                    self.dt += float(et)
+                    self.counter += 1
+                    adt = float(self.dt)/float(self.counter)
+                    print('avg dt: %f' % adt) 
+                    # loop over the image IDs and their corresponding set of
+                    # results from our model
+                    output = []
+                    output = self.extract_result(results, labels,
+                        throttle=float(thresholds[imageID]))
+                    redisdbSession.set(imageID, json.dumps(output))
+                # sleep for a small amount
+                time.sleep(config.SERVER_SLEEP*2)
+            except Exception as e:
+                print(e)
+                time.sleep(config.SERVER_SLEEP)
+                continue
+
+    def extract_result(self, results, labels, throttle='0.95'):
+        boxes = []
+
+        for i in range(len(yolos)):
+            # decode the output of the network
+            boxes.append(decode_netout(yolos[i][0], anchors[i], obj_thresh, net_h, net_w))
+        # correct the sizes of the bounding boxes
+        correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)
+        # suppress non-maximal boxes
+        do_nms(boxes, nms_thresh)
+        output = []
+        for box in boxes:
+            label_str = ''
+            label = -1
+            for i in range(len(labels)):
+                if box.classes[i] > obj_thresh:
+                    label_str += labels[i]
+                    label = i
+            if label >= 0:
+                output.append({
+                    bbox: [box.xmin,box.ymin,box.xmax,box.ymax],
+                    label: label_str
+                    score: str(box.get_score())
+                })
+        return output
+
+    def base64_decode_image(self, a):
+        """
+        return: <ndarray>
+        """
+        img = self.Image.open(BytesIO(base64.b64decode(a)))
+        if img.mode != "RGB":
+            img = img.convert("RGB")
+        img = np.array(img)
+        return img
+
+
+
+if __name__ == "__main__":
+    LOCK = Lock()
+    AVAIL_DEVICE_LIST = config.AVAIL_DEVICE_LIST
+    AVAIL_DEVICE_MEMFRAC = config.AVAIL_DEVICE_MEMFRAC
+    AVAIL_DEVICE_MAXTHREAD = config.AVAIL_DEVICE_MAXTHREAD
+
+    proc_list = []
+    print('{} GPUs Available'.format(len(AVAIL_DEVICE_LIST)))
+    if AVAIL_DEVICE_LIST:
+        for index, device in enumerate(AVAIL_DEVICE_LIST):
+            thread_count = int(AVAIL_DEVICE_MAXTHREAD[index])
+            mem_frac = float(AVAIL_DEVICE_MEMFRAC[index])
+            if config.MAX_FRAC < mem_frac:
+                mem_frac = config.MAX_FRAC
+            print('Preparing {} process on GPU: {}, frac: {}'.format(thread_count, device.id, mem_frac))
+            if config.MAX_THREADS < thread_count:
+                thread_count = config.MAX_THREADS
+            for thread in range(thread_count):
+                p = mlWorker(LOCK, GPU=device, FRAC=mem_frac)
+                p.daemon = True
+                proc_list.append(p)
+        print('Starting total: {} processes'.format(len(proc_list)))
+        for proc in proc_list:
+            proc.start()
+        print('All processes started')
+    else:
+        p = mlWorker(LOCK)
+        p.daemon = True
+        p.start()
+        p.join()
+
+    if proc_list:
+        for proc in proc_list:
+            proc.join()
diff --git a/RMS/config.py b/RMS/config.py
new file mode 100644
index 000000000..c6fb41df9
--- /dev/null
+++ b/RMS/config.py
@@ -0,0 +1,83 @@
+import os
+import urllib.request
+import shutil
+from mrcnn.tools.config import Config
+
+
+ALLOWED_EXTENSIONS = set(['jpg', 'jpeg'])
+
+ROOT_DIR = os.getcwd()
+UPLOAD_FOLDER = os.path.join(ROOT_DIR, "images")
+if not os.path.exists(UPLOAD_FOLDER):
+    os.makedirs(UPLOAD_FOLDER)
+MODEL_DIR = os.path.join(ROOT_DIR, "logs")
+if not os.path.exists(MODEL_DIR):
+    os.makedirs(MODEL_DIR)
+# Local path to trained weights file
+MODEL_DIR = os.path.join(ROOT_DIR, "weights")
+if not os.path.exists(COCO_MODEL_DIR):
+    os.makedirs(COCO_MODEL_DIR)
+MODEL_PATH = os.path.join(ROOT_DIR, "weights/mask_rcnn_coco.h5")
+
+def download_trained_weights(coco_model_path, verbose=1):
+    """Download COCO trained weights from Releases.
+
+    coco_model_path: local path of COCO trained weights
+    """
+    MODEL_URL = "https://pjreddie.com/media/files/yolov3.weights"
+    if verbose > 0:
+        print("Downloading pretrained model to " + MODEL_PATH + " ...")
+    with urllib.request.urlopen(MODEL_URL) as resp, open(MODEL_PATH, 'wb') as out:
+        shutil.copyfileobj(resp, out)
+    if verbose > 0:
+        print("... done downloading pretrained model!")
+
+# Download COCO trained weights from Releases if needed
+if not os.path.exists(MODEL_PATH):
+    download_trained_weights(MODEL_PATH, verbose=VERBOSE)
+
+import GPUtil
+
+LEAST_GMEM = 2250  # MB
+MAX_THREADS = 1
+MIN_FRAC = 0.3
+MAX_FRAC = 0.3
+GPU_LAOD = 0.5
+GMEM_LAOD_LIMIT = 1.0
+AVAIL_DEVICE_LIST = []
+AVAIL_DEVICE_MAT = []
+AVAIL_DEVICE_MEMFRAC = []
+AVAIL_DEVICE_MAXTHREAD = []
+try:
+    GPUs = GPUtil.getGPUs()
+    Gall = ''
+    Gfree = ''
+    for GPU in GPUs:
+        Gall = GPU.memoryTotal
+        Gfree = GPU.memoryFree
+        GMEM_LAOD_LIMIT = float(format(float(LEAST_GMEM / Gall), '.2f'))
+        if int(GPUtil.getAvailability([GPU], maxLoad=GPU_LAOD, maxMemory=GMEM_LAOD_LIMIT)) == 1:
+            AVAIL_DEVICE_LIST.append(GPU)
+            if GMEM_LAOD_LIMIT < MIN_FRAC:
+                GMEM_LAOD_LIMIT = MIN_FRAC
+            if GMEM_LAOD_LIMIT > MAX_FRAC:
+                GMEM_LAOD_LIMIT = MAX_FRAC
+            AVAIL_DEVICE_MEMFRAC.append(GMEM_LAOD_LIMIT)
+            AVAIL_DEVICE_MAXTHREAD.append(int(1.0/GMEM_LAOD_LIMIT))
+except Exception as e:
+    print(e)
+
+# initialize Redis connection settings
+REDIS_HOST = "localhost"
+REDIS_PORT = 6379
+REDIS_DB = 0
+
+BATCH_SIZE = 1
+# initialize constants used for server queuing
+IMAGE_QUEUE = "yolo3_queue"
+
+SERVER_SLEEP = 0.1
+CLIENT_SLEEP = 0.1
+
+# Output Throttle
+THROTTLE = 0.9
diff --git a/anchors/gen_openimg_anchors.py b/anchors/gen_openimg_anchors.py
new file mode 100644
index 000000000..8c45d3005
--- /dev/null
+++ b/anchors/gen_openimg_anchors.py
@@ -0,0 +1,137 @@
+import os, sys
+import random
+import argparse
+import numpy as np
+import json
+
+ROOT_DIR = os.path.abspath("../")
+sys.path.append(ROOT_DIR)
+from dataset.openimg import parse_openimg_annotation
+
+def IOU(ann, centroids):
+    w, h = ann
+    similarities = []
+
+    for centroid in centroids:
+        c_w, c_h = centroid
+
+        if c_w >= w and c_h >= h:
+            similarity = w*h/(c_w*c_h)
+        elif c_w >= w and c_h <= h:
+            similarity = w*c_h/(w*h + (c_w-w)*c_h)
+        elif c_w <= w and c_h >= h:
+            similarity = c_w*h/(w*h + c_w*(c_h-h))
+        else: #means both w,h are bigger than c_w and c_h respectively
+            similarity = (c_w*c_h)/(w*h)
+        similarities.append(similarity) # will become (k,) shape
+
+    return np.array(similarities)
+
+def avg_IOU(anns, centroids):
+    n,d = anns.shape
+    sum = 0.
+
+    for i in range(anns.shape[0]):
+        sum+= max(IOU(anns[i], centroids))
+
+    return sum/n
+
+def print_anchors(centroids):
+    out_string = ''
+
+    anchors = centroids.copy()
+
+    widths = anchors[:, 0]
+    sorted_indices = np.argsort(widths)
+
+    r = "anchors: ["
+    for i in sorted_indices:
+        out_string += str(int(anchors[i,0]*416)) + ',' + str(int(anchors[i,1]*416)) + ', '
+            
+    print(out_string[:-2])
+
+def run_kmeans(ann_dims, anchor_num):
+    ann_num = ann_dims.shape[0]
+    iterations = 0
+    prev_assignments = np.ones(ann_num)*(-1)
+    iteration = 0
+    old_distances = np.zeros((ann_num, anchor_num))
+
+    indices = [random.randrange(ann_dims.shape[0]) for i in range(anchor_num)]
+    centroids = ann_dims[indices]
+    anchor_dim = ann_dims.shape[1]
+
+    while True:
+        distances = []
+        iteration += 1
+        for i in range(ann_num):
+            d = 1 - IOU(ann_dims[i], centroids)
+            distances.append(d)
+        distances = np.array(distances) # distances.shape = (ann_num, anchor_num)
+
+        print("iteration {}: dists = {}".format(iteration, np.sum(np.abs(old_distances-distances))))
+
+        #assign samples to centroids
+        assignments = np.argmin(distances,axis=1)
+
+        if (assignments == prev_assignments).all() :
+            return centroids
+
+        #calculate new centroids
+        centroid_sums=np.zeros((anchor_num, anchor_dim), np.float)
+        for i in range(ann_num):
+            centroid_sums[assignments[i]]+=ann_dims[i]
+        for j in range(anchor_num):
+            centroids[j] = centroid_sums[j]/(np.sum(assignments==j) + 1e-6)
+
+        prev_assignments = assignments.copy()
+        old_distances = distances.copy()
+
+def _main_(argv):
+    config_path = args.conf
+    num_anchors = args.anchors
+
+    with open(config_path) as config_buffer:
+        config = json.loads(config_buffer.read())
+
+    train_imgs, train_labels = parse_openimg_annotation(
+        config['train']['train_annot_file'],
+        config['train']['train_image_folder'],
+        config['train']['label_map'],
+        config['train']['cache_name'],
+        config['model']['labels']
+    )
+
+    # run k_mean to find the anchors
+    annotation_dims = []
+    for image in train_imgs:
+        print(image['filename'])
+        print(image)
+        for obj in image['object']:
+            relative_w = (float(obj['xmax']) - float(obj['xmin']))/image['width']
+            relatice_h = (float(obj["ymax"]) - float(obj['ymin']))/image['height']
+            annotation_dims.append(tuple(map(float, (relative_w,relatice_h))))
+    print(train_imgs)
+    annotation_dims = np.array(annotation_dims)
+    centroids = run_kmeans(annotation_dims, num_anchors)
+
+    # write anchors to file
+    print('\naverage IOU for', num_anchors, 'anchors:', '%0.2f' % avg_IOU(annotation_dims, centroids))
+    print_anchors(centroids)
+
+if __name__ == '__main__':
+    argparser = argparse.ArgumentParser()
+
+    argparser.add_argument(
+        '-c',
+        '--conf',
+        default='config.json',
+        help='path to configuration file')
+    argparser.add_argument(
+        '-a',
+        '--anchors',
+        default=9,
+        help='number of anchors to use')
+
+    args = argparser.parse_args()
+    _main_(args)
diff --git a/gen_anchors.py b/anchors/gen_voc_anchors.py
similarity index 100%
rename from gen_anchors.py
rename to anchors/gen_voc_anchors.py
diff --git a/config/config_carplate.json b/config/config_carplate.json
new file mode 100644
index 000000000..0da74d04e
--- /dev/null
+++ b/config/config_carplate.json
@@ -0,0 +1,38 @@
+{
+    "model" : {
+        "min_input_size":       352,
+        "max_input_size":       448,
+        "anchors":              [2,5, 2,7, 3,8, 4,11, 5,15, 7,20, 11,27, 16,46, 29,59],
+        "labels":               ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9","A", "B", "C", "D", "E", "F",
+				 "G", "H", "I", "J", "K", "L", "M","N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "carPlate"]
+    },
+
+    "train": {
+        "train_image_folder":   "/workspace/carPlateDataset/20180607/JPEGImages/",
+        "train_annot_folder":   "/workspace/carPlateDataset/20180607/Annotations/",
+        "cache_name":           "cache/carplate_train.pkl",
+
+        "train_times":          8,
+        "batch_size":           12,
+        "learning_rate":        1e-4,
+        "nb_epochs":            100,
+        "warmup_epochs":        3,
+        "ignore_thresh":        0.5,
+        "gpus":                 "0",
+        "grid_scales":          [1,1,1],
+        "obj_scale":            5,
+        "noobj_scale":          1,
+        "xywh_scale":           1,
+        "class_scale":          1,
+        "tensorboard_dir":      "logs",
+        "saved_weights_name":   "weights/carplate.h5",
+        "debug":                false
+    },
+    "valid": {
+        "valid_image_folder":   "",
+        "valid_annot_folder":   "",
+        "cache_name":           "",
+
+        "valid_times":          1
+    }
+}
diff --git a/config/config_openimg.json b/config/config_openimg.json
new file mode 100644
index 000000000..6730746d3
--- /dev/null
+++ b/config/config_openimg.json
@@ -0,0 +1,41 @@
+{
+    "model" : {
+        "min_input_size":       448,
+        "max_input_size":       448,
+        "anchors":              [55,69, 75,234, 133,240, 136,129, 142,363, 203,290, 228,184, 285,359, 341,260],
+        "labels":               ["Person"]
+    },
+
+    "train": {
+        "train_image_folder":   "/data1/openimages/600c_bbox/images/train/",
+        "train_annot_file":     "/data1/openimages/600c_bbox/annotation/train-annotations-bbox.csv",
+        "train_annot_folder":   "/data1/openimages/600c_bbox/annotation/",
+        "cache_name":           "cache/openimg_train.pkl",
+        "label_map":            "/data1/openimages/600c_bbox/annotation/class-descriptions-boxable.csv",
+        "train_times":          8,
+        "batch_size":           16,
+        "learning_rate":        1e-4,
+        "nb_epochs":            100,
+        "warmup_epochs":        3,
+        "ignore_thresh":        0.5,
+        "gpus":                 "0,1",
+
+        "grid_scales":          [1,1,1],
+        "obj_scale":            5,
+        "noobj_scale":          1,
+        "xywh_scale":           1,
+        "class_scale":          1,
+
+        "tensorboard_dir":      "logs",
+        "saved_weights_name":   "weights/openimg_600c.h5",
+        "debug":                true
+    },
+
+    "valid": {
+        "valid_image_folder":   "/data1/openimages/600c_bbox/images/validation/",
+        "valid_annot_folder":   "/data1/openimages/600c_bbox/annotation/",
+	"train_annot_file":     "validation-annotations-bbox.csv",
+        "cache_name":           "openimg_val.pkl",
+        "valid_times":          1
+    }
+}
diff --git a/config.json b/config/config_sample.json
similarity index 84%
rename from config.json
rename to config/config_sample.json
index 31722ac92..b2830a32a 100644
--- a/config.json
+++ b/config/config_sample.json
@@ -7,9 +7,9 @@
     },
 
     "train": {
-        "train_image_folder":   "/home/andy/Desktop/github/kangaroo/images/",
+        "train_image_folder":   "/data1/openimages/600c_bbox/images/train/",
         "train_annot_folder":   "/home/andy/Desktop/github/kangaroo/annots/",
-        "cache_name":           "kangaroo_train.pkl",
+        "cache_name":           cache/"kangaroo_train.pkl",
 
         "train_times":          8,
         "batch_size":           16,
@@ -26,7 +26,7 @@
         "class_scale":          1,
 
         "tensorboard_dir":      "logs",
-        "saved_weights_name":   "kangaroo.h5",
+        "saved_weights_name":   "weights/kangaroo.h5",
         "debug":                true
     },
 
diff --git a/dataset/__init__.py b/dataset/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/dataset/csv2xml.py b/dataset/csv2xml.py
new file mode 100644
index 000000000..91fb0dc0e
--- /dev/null
+++ b/dataset/csv2xml.py
@@ -0,0 +1,104 @@
+'''
+### CSV Format ###
+ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
+000026e7ee790996,freeform,/m/07j7r,1,0.071905,0.145346,0.206591,0.391306,0,1,1,0,0
+### File Format ###
+/data/
+  - /images/
+    - /train/
+    - /validation/
+    - /test/
+  - /annotations/
+    - train-annotations-bbox.csv
+    - validation-annotations-bbox.csv
+    - test-annotations-bbox.csv
+    - class-descriptions-boxable.csv  
+'''
+import numpy as np
+import os
+import pickle
+import pandas
+from lxml.etree import Element, SubElement, tostring
+from xml.dom.minidom import parseString
+from PIL import Image
+
+def parse_tfrecord_annotation(ann_file, img_dir, lablefile, cache_name, labels=[]):
+    '''
+    ann_file: /data/annotations/train-annotations-bbox.csv
+    img_dir: /data/images/train/
+    '''
+    if os.path.exists(cache_name):
+        with open(cache_name, 'rb') as handle:
+            cache = pickle.load(handle)
+        all_insts, seen_labels = cache['all_insts'], cache['seen_labels']
+    else:
+        all_insts = []
+        seen_labels = {}
+
+        try:
+            csv = pandas.read_csv(ann_file).values
+        except Exception as e:
+            print(e)
+            print('Ignore this bad annotation: ' + ann_file)
+            continue
+        img = {}
+        for row in csv:
+            if not row[0] in img:
+                fn = row[0] + '.jpg'
+                img[row[0]] = {
+                    'filename': fn,
+                    'path': img_dir + fn
+                }
+                im = Image.open(img[row[0]]['path'])
+                img[row[0]]['size'] = {}
+                img[row[0]]['size']['width'], img[row[0]]['size']['height'] = im.size
+                img[row[0]]['size']['depth'] = 3
+            img[row[0]]['object'] = []
+            img[row[0]]['object'].append({
+                'name': row[2]
+            })
+
+
+        for elem in tree.iter():
+            if 'filename' in elem.tag:
+                img['filename'] = img_dir + elem.text
+            if 'width' in elem.tag:
+                img['width'] = int(elem.text)
+            if 'height' in elem.tag:
+                img['height'] = int(elem.text)
+            if 'object' in elem.tag or 'part' in elem.tag:
+                obj = {}
+                
+                for attr in list(elem):
+                    if 'name' in attr.tag:
+                        obj['name'] = attr.text
+
+                        if obj['name'] in seen_labels:
+                            seen_labels[obj['name']] += 1
+                        else:
+                            seen_labels[obj['name']] = 1
+                        
+                        if len(labels) > 0 and obj['name'] not in labels:
+                            break
+                        else:
+                            img['object'] += [obj]
+                            
+                    if 'bndbox' in attr.tag:
+                        for dim in list(attr):
+                            if 'xmin' in dim.tag:
+                                obj['xmin'] = int(round(float(dim.text)))
+                            if 'ymin' in dim.tag:
+                                obj['ymin'] = int(round(float(dim.text)))
+                            if 'xmax' in dim.tag:
+                                obj['xmax'] = int(round(float(dim.text)))
+                            if 'ymax' in dim.tag:
+                                obj['ymax'] = int(round(float(dim.text)))
+
+            if len(img['object']) > 0:
+                all_insts += [img]
+
+        cache = {'all_insts': all_insts, 'seen_labels': seen_labels}
+        with open(cache_name, 'wb') as handle:
+            pickle.dump(cache, handle, protocol=pickle.HIGHEST_PROTOCOL)    
+                        
+    return all_insts, seen_labels
\ No newline at end of file
diff --git a/dataset/openimg.py b/dataset/openimg.py
new file mode 100644
index 000000000..bc60356a7
--- /dev/null
+++ b/dataset/openimg.py
@@ -0,0 +1,90 @@
+'''
+### CSV Format ###
+ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
+000026e7ee790996,freeform,/m/07j7r,1,0.071905,0.145346,0.206591,0.391306,0,1,1,0,0
+### File Format ###
+/data/
+  - /images/
+    - /train/
+    - /validation/
+    - /test/
+  - /annotations/
+    - train-annotations-bbox.csv
+    - validation-annotations-bbox.csv
+    - test-annotations-bbox.csv
+    - class-descriptions-boxable.csv  
+'''
+import numpy as np
+import os
+import xml.etree.ElementTree as ET
+import pickle
+import numpy as np
+import os, sys
+import pickle
+import pandas
+from lxml.etree import Element, SubElement, tostring
+from xml.dom.minidom import parseString
+from PIL import Image
+import glob
+
+def parse_openimg_annotation(ann_file, img_dir, lable_map, cache_name, labels=[]):
+    if os.path.exists(cache_name):
+        with open(cache_name, 'rb') as handle:
+            cache = pickle.load(handle)
+        all_insts, seen_labels = cache['all_insts'], cache['seen_labels']
+    else:
+        all_insts = []
+        seen_labels = {}
+        imgs = {}
+        label_map = {}
+        
+        try:
+            img_csv = pandas.read_csv(ann_file, sep=',', header = None, skiprows=1, chunksize=1, dtype=str)
+            label_csv = pandas.read_csv(lable_map,sep=',', header = None, chunksize=1)
+        except Exception as e:
+            print(e)
+            print('Ignore this bad annotation: ' + ann_dir + ann)
+        for row in label_csv:
+            label_map[str(row[0].iloc[0])] = str(row[1].iloc[0])
+        
+        for row in img_csv:
+            iid = str(row[0].iloc[0])
+            if not iid in imgs:
+                imgs[iid] = {}
+                imgs[iid]['object'] = []
+                imgs[iid]['filename'] = os.path.join(img_dir, iid + '.jpg')
+                try:
+                    im = Image.open(imgs[iid]['filename'])
+                    imgs[iid]['width'], imgs[iid]['height'] = im.size
+                except:
+                    npath = glob.glob(os.path.join(img_dir, iid) + '.*')
+                    imgs[iid]['filename'] = npath[0]
+                    im = Image.open(imgs[iid]['filename'])
+                    imgs[iid]['width'], imgs[iid]['height'] = im.size
+            
+            label_id = str(row[2].iloc[0])
+            label_name = label_map[label_id]
+            if label_name in seen_labels:
+                seen_labels[label_name] += 1
+            else:
+                seen_labels[label_name] = 1
+            if len(labels) > 0 and label_name not in labels:
+                continue
+            else:
+                obj = {
+                    'name': label_name,
+                    'xmin': int(round(float(row[4].iloc[0]) * imgs[iid]['width'])),
+                    'ymin': int(round(float(row[5].iloc[0]) * imgs[iid]['height'])),
+                    'xmax': int(round(float(row[6].iloc[0]) * imgs[iid]['width'])),
+                    'ymax': int(round(float(row[7].iloc[0]) * imgs[iid]['height']))}
+                imgs[iid]['object'].append(obj)
+        print(imgs)
+        for key, img in imgs.items():
+            if len(img['object']) > 0:
+                all_insts += [img]
+
+        cache = {'all_insts': all_insts, 'seen_labels': seen_labels}
+        with open(cache_name, 'wb') as handle:
+            pickle.dump(cache, handle, protocol=pickle.HIGHEST_PROTOCOL)    
+                        
+    return all_insts, seen_labels
diff --git a/voc.py b/dataset/voc.py
similarity index 100%
rename from voc.py
rename to dataset/voc.py
diff --git a/demo/yolo3_cam.py b/demo/yolo3_cam.py
new file mode 100644
index 000000000..04f2e17cc
--- /dev/null
+++ b/demo/yolo3_cam.py
@@ -0,0 +1,90 @@
+import argparse
+import os
+
+import struct
+import cv2
+import numpy as np
+import sys
+ROOT_DIR = os.path.abspath("../")
+sys.path.append(ROOT_DIR)
+from utils.weightreader import WeightReader
+from utils.bbox import BoundBox
+from utils.tools import preprocess_input, decode_netout
+from utils.tools import correct_yolo_boxes, do_nms, draw_boxes
+from model.yolo3 import make_yolov3_model
+
+np.set_printoptions(threshold=np.nan)
+os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"]="0"
+
+# set some parameters
+net_h, net_w = 416, 416
+obj_thresh, nms_thresh = 0.7, 0.7
+anchors = [[116,90,  156,198,  373,326],  [30,61, 62,45,  59,119], [10,13,  16,30,  33,23]]
+labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \
+            "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \
+            "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \
+            "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \
+            "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \
+            "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \
+            "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \
+            "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \
+            "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \
+            "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
+
+def post_stream(yolos, boxes, image):
+    image_h, image_w, _ = image.shape
+    for i in range(len(yolos)):
+        # decode the output of the network
+        boxes += decode_netout(yolos[i][0], anchors[i], obj_thresh, net_h, net_w)
+
+    # correct the sizes of the bounding boxes
+    correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)
+    # suppress non-maximal boxes
+    do_nms(boxes, nms_thresh)
+    # draw bounding boxes on the image using labels
+    i = draw_boxes(image, boxes, labels, obj_thresh)
+    return i
+
+def _main_(args):
+    weights_path = args.weights
+
+    # make the yolov3 model to predict 80 classes on COCO
+    yolov3 = make_yolov3_model()
+
+    # load the weights trained on COCO into the model
+    weight_reader = WeightReader(weights_path)
+    weight_reader.load_weights(yolov3)
+
+    # While frame
+    #   pre_stream()
+    #   detect()
+    #   post_stream()
+    cap = cv2.VideoCapture(0)
+    cap.set(3, 1280) # set the Horizontal resolution
+    cap.set(4, 720)
+    while(True):
+        # Capture frame-by-frame
+        _, image = cap.read()
+        # preprocess the image
+        new_image = preprocess_input(image, net_h, net_w)
+        # run the prediction
+        yolos = yolov3.predict(new_image)
+        boxes = []
+        frame = post_stream(yolos, boxes, image)
+        cv2.imshow('Yolo3', frame)
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+    cap.release()
+    cv2.destroyAllWindows()
+
+if __name__ == '__main__':
+    argparser = argparse.ArgumentParser(
+    description='test yolov3 network with coco weights')
+    argparser.add_argument(
+        '-w',
+        '--weights',
+        help='path to weights file')
+
+    args = argparser.parse_args()
+    _main_(args)
diff --git a/demo/yolo3_one_file_to_detect_them_all.py b/demo/yolo3_one_file_to_detect_them_all.py
new file mode 100644
index 000000000..fee5610db
--- /dev/null
+++ b/demo/yolo3_one_file_to_detect_them_all.py
@@ -0,0 +1,88 @@
+import argparse
+import os
+import numpy as np
+from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D
+from keras.layers.merge import add, concatenate
+from keras.models import Model
+import struct
+import cv2
+import sys
+ROOT_DIR = os.path.abspath("../")
+sys.path.append(ROOT_DIR)
+from utils.weightreader import WeightReader
+from utils.bbox import BoundBox
+from utils.tools import preprocess_input, decode_netout
+from utils.tools import correct_yolo_boxes, do_nms, draw_boxes
+from model.yolo3 import make_yolov3_model
+
+np.set_printoptions(threshold=np.nan)
+os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"]="0"
+
+argparser = argparse.ArgumentParser(
+    description='test yolov3 network with coco weights')
+
+argparser.add_argument(
+    '-w',
+    '--weights',
+    help='path to weights file')
+
+argparser.add_argument(
+    '-i',
+    '--image',
+    help='path to image file')
+
+def _main_(args):
+    weights_path = args.weights
+    image_path   = args.image
+
+    # set some parameters
+    net_h, net_w = 416, 416
+    obj_thresh, nms_thresh = 0.5, 0.45
+    anchors = [[116,90,  156,198,  373,326],  [30,61, 62,45,  59,119], [10,13,  16,30,  33,23]]
+    labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \
+            "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \
+            "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \
+            "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \
+            "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \
+            "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \
+            "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \
+            "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \
+            "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \
+            "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
+
+    # make the yolov3 model to predict 80 classes on COCO
+    yolov3 = make_yolov3_model()
+
+    # load the weights trained on COCO into the model
+    weight_reader = WeightReader(weights_path)
+    weight_reader.load_weights(yolov3)
+
+    # preprocess the image
+    image = cv2.imread(image_path)
+    image_h, image_w, _ = image.shape
+    new_image = preprocess_input(image, net_h, net_w)
+
+    # run the prediction
+    yolos = yolov3.predict(new_image)
+    boxes = []
+
+    for i in range(len(yolos)):
+        # decode the output of the network
+        boxes += decode_netout(yolos[i][0], anchors[i], obj_thresh, net_h, net_w)
+
+    # correct the sizes of the bounding boxes
+    correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)
+
+    # suppress non-maximal boxes
+    do_nms(boxes, nms_thresh)     
+
+    # draw bounding boxes on the image using labels
+    draw_boxes(image, boxes, labels, obj_thresh) 
+ 
+    # write the image with bounding boxes to file
+    cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], (image).astype('uint8')) 
+
+if __name__ == '__main__':
+    args = argparser.parse_args()
+    _main_(args)
diff --git a/model/__init__.py b/model/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/yolo.py b/model/yolo.py
similarity index 100%
rename from yolo.py
rename to model/yolo.py
diff --git a/model/yolo3.py b/model/yolo3.py
new file mode 100644
index 000000000..bd02427af
--- /dev/null
+++ b/model/yolo3.py
@@ -0,0 +1,619 @@
+import os
+import multiprocessing
+import numpy as np
+from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D
+from keras.layers.merge import add, concatenate
+from keras.models import Model
+
+import keras
+import keras.backend as K
+import keras.layers as KL
+import keras.engine as KE
+import keras.models as KM
+from keras.optimizers import Adam
+
+
+def _conv_block(inp, convs, skip=True):
+    x = inp
+    count = 0
+    
+    for conv in convs:
+        if count == (len(convs) - 2) and skip:
+            skip_connection = x
+        count += 1
+        
+        if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top
+        x = Conv2D(conv['filter'], 
+                   conv['kernel'], 
+                   strides=conv['stride'], 
+                   padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top
+                   name='conv_' + str(conv['layer_idx']), 
+                   use_bias=False if conv['bnorm'] else True)(x)
+        if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
+        if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
+
+    return add([skip_connection, x]) if skip else x
+
+def conv_block(inp, convs, skip=True):
+    x = inp
+    count = 0
+    
+    for conv in convs:
+        if count == (len(convs) - 2) and skip:
+            skip_connection = x
+        count += 1
+        
+        if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top
+        x = Conv2D(conv['filter'], 
+                   conv['kernel'], 
+                   strides=conv['stride'], 
+                   padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top
+                   name='conv_' + str(conv['layer_idx']), 
+                   use_bias=False if conv['bnorm'] else True)(x)
+        if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
+        if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
+        merged_graph = add([skip_connection, x])
+    return merged_graph if skip else x
+
+def _interval_overlap(interval_a, interval_b):
+    x1, x2 = interval_a
+    x3, x4 = interval_b
+
+    if x3 < x1:
+        if x4 < x1:
+            return 0
+        else:
+            return min(x2,x4) - x1
+    else:
+        if x2 < x3:
+             return 0
+        else:
+            return min(x2,x4) - x3          
+
+
+def _sigmoid(x):
+    return 1. / (1. + np.exp(-x))
+
+def bbox_iou(box1, box2):
+    intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
+    intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
+    
+    intersect = intersect_w * intersect_h
+
+    w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
+    w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
+    
+    union = w1*h1 + w2*h2 - intersect
+    
+    return float(intersect) / union
+
+class YoloLayer():
+    def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh, 
+                    grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale, 
+                    **kwargs):
+        # make the model settings persistent
+        self.ignore_thresh  = ignore_thresh
+        self.warmup_batches = warmup_batches
+        self.anchors        = tf.constant(anchors, dtype='float', shape=[1,1,1,3,2])
+        self.grid_scale     = grid_scale
+        self.obj_scale      = obj_scale
+        self.noobj_scale    = noobj_scale
+        self.xywh_scale     = xywh_scale
+        self.class_scale    = class_scale        
+
+        # make a persistent mesh grid
+        max_grid_h, max_grid_w = max_grid
+
+        cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)))
+        cell_y = tf.transpose(cell_x, (0,2,1,3,4))
+        self.cell_grid = tf.tile(tf.concat([cell_x,cell_y],-1), [batch_size, 1, 1, 3, 1])
+
+        super(YoloLayer, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        super(YoloLayer, self).build(input_shape)  # Be sure to call this somewhere!
+
+    def call(self, x):
+        input_image, y_pred, y_true, true_boxes = x
+
+        # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
+        y_pred = tf.reshape(y_pred, tf.concat([tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0))
+        
+        # initialize the masks
+        object_mask     = tf.expand_dims(y_true[..., 4], 4)
+
+        # the variable to keep track of number of batches processed
+        batch_seen = tf.Variable(0.)        
+
+        # compute grid factor and net factor
+        grid_h      = tf.shape(y_true)[1]
+        grid_w      = tf.shape(y_true)[2]
+        grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1,1,1,1,2])
+
+        net_h       = tf.shape(input_image)[1]
+        net_w       = tf.shape(input_image)[2]            
+        net_factor  = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1,1,1,1,2])
+        
+        """
+        Adjust prediction
+        """
+        pred_box_xy    = (self.cell_grid[:,:grid_h,:grid_w,:,:] + tf.sigmoid(y_pred[..., :2]))  # sigma(t_xy) + c_xy
+        pred_box_wh    = y_pred[..., 2:4]                                                       # t_wh
+        pred_box_conf  = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4)                          # adjust confidence
+        pred_box_class = y_pred[..., 5:]                                                        # adjust class probabilities      
+
+        """
+        Adjust ground truth
+        """
+        true_box_xy    = y_true[..., 0:2] # (sigma(t_xy) + c_xy)
+        true_box_wh    = y_true[..., 2:4] # t_wh
+        true_box_conf  = tf.expand_dims(y_true[..., 4], 4)
+        true_box_class = tf.argmax(y_true[..., 5:], -1)         
+
+        """
+        Compare each predicted box to all true boxes
+        """        
+        # initially, drag all objectness of all boxes to 0
+        conf_delta  = pred_box_conf - 0 
+
+        # then, ignore the boxes which have good overlap with some true box
+        true_xy = true_boxes[..., 0:2] / grid_factor
+        true_wh = true_boxes[..., 2:4] / net_factor
+        
+        true_wh_half = true_wh / 2.
+        true_mins    = true_xy - true_wh_half
+        true_maxes   = true_xy + true_wh_half
+        
+        pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
+        pred_wh = tf.expand_dims(tf.exp(pred_box_wh) * self.anchors / net_factor, 4)
+        
+        pred_wh_half = pred_wh / 2.
+        pred_mins    = pred_xy - pred_wh_half
+        pred_maxes   = pred_xy + pred_wh_half    
+
+        intersect_mins  = tf.maximum(pred_mins,  true_mins)
+        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
+
+        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
+        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
+        
+        true_areas = true_wh[..., 0] * true_wh[..., 1]
+        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
+
+        union_areas = pred_areas + true_areas - intersect_areas
+        iou_scores  = tf.truediv(intersect_areas, union_areas)
+
+        best_ious   = tf.reduce_max(iou_scores, axis=4)        
+        conf_delta *= tf.expand_dims(tf.to_float(best_ious < self.ignore_thresh), 4)
+
+        """
+        Compute some online statistics
+        """            
+        true_xy = true_box_xy / grid_factor
+        true_wh = tf.exp(true_box_wh) * self.anchors / net_factor
+
+        true_wh_half = true_wh / 2.
+        true_mins    = true_xy - true_wh_half
+        true_maxes   = true_xy + true_wh_half
+
+        pred_xy = pred_box_xy / grid_factor
+        pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor 
+        
+        pred_wh_half = pred_wh / 2.
+        pred_mins    = pred_xy - pred_wh_half
+        pred_maxes   = pred_xy + pred_wh_half      
+
+        intersect_mins  = tf.maximum(pred_mins,  true_mins)
+        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
+        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
+        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
+        
+        true_areas = true_wh[..., 0] * true_wh[..., 1]
+        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
+
+        union_areas = pred_areas + true_areas - intersect_areas
+        iou_scores  = tf.truediv(intersect_areas, union_areas)
+        iou_scores  = object_mask * tf.expand_dims(iou_scores, 4)
+        
+        count       = tf.reduce_sum(object_mask)
+        count_noobj = tf.reduce_sum(1 - object_mask)
+        detect_mask = tf.to_float((pred_box_conf*object_mask) >= 0.5)
+        class_mask  = tf.expand_dims(tf.to_float(tf.equal(tf.argmax(pred_box_class, -1), true_box_class)), 4)
+        recall50    = tf.reduce_sum(tf.to_float(iou_scores >= 0.5 ) * detect_mask  * class_mask) / (count + 1e-3)
+        recall75    = tf.reduce_sum(tf.to_float(iou_scores >= 0.75) * detect_mask  * class_mask) / (count + 1e-3)    
+        avg_iou     = tf.reduce_sum(iou_scores) / (count + 1e-3)
+        avg_obj     = tf.reduce_sum(pred_box_conf  * object_mask)  / (count + 1e-3)
+        avg_noobj   = tf.reduce_sum(pred_box_conf  * (1-object_mask))  / (count_noobj + 1e-3)
+        avg_cat     = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3) 
+
+        """
+        Warm-up training
+        """
+        batch_seen = tf.assign_add(batch_seen, 1.)
+        
+        true_box_xy, true_box_wh, xywh_mask = tf.cond(tf.less(batch_seen, self.warmup_batches+1), 
+                              lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask), 
+                                       true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask), 
+                                       tf.ones_like(object_mask)],
+                              lambda: [true_box_xy, 
+                                       true_box_wh,
+                                       object_mask])
+
+        """
+        Compare each true box to all anchor boxes
+        """      
+        wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
+        wh_scale = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale
+
+        xy_delta    = xywh_mask   * (pred_box_xy-true_box_xy) * wh_scale * self.xywh_scale
+        wh_delta    = xywh_mask   * (pred_box_wh-true_box_wh) * wh_scale * self.xywh_scale
+        conf_delta  = object_mask * (pred_box_conf-true_box_conf) * self.obj_scale + (1-object_mask) * conf_delta * self.noobj_scale
+        class_delta = object_mask * \
+                      tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
+                      self.class_scale
+
+        loss_xy    = tf.reduce_sum(tf.square(xy_delta),       list(range(1,5)))
+        loss_wh    = tf.reduce_sum(tf.square(wh_delta),       list(range(1,5)))
+        loss_conf  = tf.reduce_sum(tf.square(conf_delta),     list(range(1,5)))
+        loss_class = tf.reduce_sum(class_delta,               list(range(1,5)))
+
+        loss = loss_xy + loss_wh + loss_conf + loss_class
+
+        loss = tf.Print(loss, [grid_h, avg_obj], message='avg_obj \t\t', summarize=1000)
+        loss = tf.Print(loss, [grid_h, avg_noobj], message='avg_noobj \t\t', summarize=1000)
+        loss = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000)
+        loss = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000)
+        loss = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000)
+        loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000)   
+        loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000)     
+        loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy), 
+                                       tf.reduce_sum(loss_wh), 
+                                       tf.reduce_sum(loss_conf), 
+                                       tf.reduce_sum(loss_class)],  message='loss xy, wh, conf, class: \t',   summarize=1000)   
+
+
+        return loss*self.grid_scale
+
+    def compute_output_shape(self, input_shape):
+        return [(None, 1)]
+
+
+def make_yolov3_model():
+    input_image = Input(shape=(None, None, 3))
+
+    # Layer  0 => 4
+    x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
+                                  {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
+                                  {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
+                                  {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])
+
+    # Layer  5 => 8
+    x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
+                        {'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
+                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])
+
+    # Layer  9 => 11
+    x = _conv_block(x, [{'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
+                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])
+
+    # Layer 12 => 15
+    x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
+                        {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
+                        {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])
+
+    # Layer 16 => 36
+    for i in range(7):
+        x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
+                            {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
+        
+    skip_36 = x
+        
+    # Layer 37 => 40
+    x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
+                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
+                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])
+
+    # Layer 41 => 61
+    for i in range(7):
+        x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
+                            {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
+        
+    skip_61 = x
+        
+    # Layer 62 => 65
+    x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
+                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
+                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])
+
+    # Layer 66 => 74
+    for i in range(3):
+        x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
+                            {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
+        
+    # Layer 75 => 79
+    x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
+                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
+                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
+                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
+                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], skip=False)
+
+    # Layer 80 => 82
+    yolo_82 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 80},
+                              {'filter':  255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], skip=False)
+
+    # Layer 83 => 86
+    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], skip=False)
+    x = UpSampling2D(2)(x)
+    x = concatenate([x, skip_61])
+
+    # Layer 87 => 91
+    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
+                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
+                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
+                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
+                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], skip=False)
+
+    # Layer 92 => 94
+    yolo_94 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 92},
+                              {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], skip=False)
+
+    # Layer 95 => 98
+    x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True,   'layer_idx': 96}], skip=False)
+    x = UpSampling2D(2)(x)
+    x = concatenate([x, skip_36])
+
+    # Layer 99 => 106
+    yolo_106 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 99},
+                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 100},
+                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 101},
+                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 102},
+                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 103},
+                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 104},
+                               {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False)
+
+    model = Model(input_image, [yolo_82, yolo_94, yolo_106])    
+    return model
+
+def dummy_loss(y_true, y_pred):
+    return tf.sqrt(tf.reduce_sum(y_pred))
+
+class YOLO3():
+    """
+    Encapsulates the YOLO3 model functionality.
+    The actual Keras model is in the keras_model property.
+    """
+    def __init__(self, mode, config, model_dir):
+        """
+        mode: Either "training" or "inference"
+        config: A Sub-class of the Config class
+        model_dir: Directory to save training logs and trained weights
+        """
+        assert mode in ['training', 'inference']
+        self.mode = mode
+        self.config = config
+        self.model_dir = model_dir
+        self.set_log_dir()
+        self.keras_model = self.build(mode=mode, config=config)
+    
+    def build(self, mode, config):
+        """
+        Build YOLO3 architecture.
+        mode: Either ["training", "inference"]
+        Config: {nb_class,anchors,max_box_per_image, 
+            max_grid,batch_size,warmup_batches,ignore_thresh,
+            grid_scales,obj_scale,noobj_scale,xywh_scale,class_scale}
+        """
+        assert mode in ['training', 'inference']
+
+        input_image = KL.Input(shape=(None, None, 3)) # net_h, net_w, 3
+        true_boxes  = KL.Input(
+            shape=(1, 1, 1,config.max_box_per_image, 4))
+        true_yolo_1 = KL.Input(
+            shape=(None, None, len(config.anchors)//6, 4+1+config.nb_class))
+        # grid_h, grid_w, nb_anchor, 5+nb_class
+        true_yolo_2 = KL.Input(
+            shape=(None, None, len(config.anchors)//6, 4+1+config.nb_class))
+        # grid_h, grid_w, nb_anchor, 5+nb_class
+        true_yolo_3 = KL.Input(
+            shape=(None, None, len(config.anchors)//6, 4+1+config.nb_class))
+        # grid_h, grid_w, nb_anchor, 5+nb_class
+        # Layer  0 => 4
+        x = _conv_block(input_image, [
+            {'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
+            {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
+            {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
+            {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])
+        # Layer  5 => 8
+        x = _conv_block(x, [
+            {'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
+            {'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
+            {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])
+        # Layer  9 => 11
+        x = _conv_block(x, [
+            {'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
+            {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])
+        # Layer 12 => 15
+        x = _conv_block(x, [
+            {'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
+            {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
+            {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])
+        # Layer 16 => 36
+        for i in range(7):
+            x = _conv_block(x, [
+                {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
+                {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
+        skip_36 = x
+        # Layer 37 => 40
+        x = _conv_block(x, [
+            {'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
+            {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
+            {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])
+        # Layer 41 => 61
+        for i in range(7):
+            x = _conv_block(x, [
+                {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
+                {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
+        skip_61 = x
+        # Layer 62 => 65
+        x = _conv_block(x, [
+            {'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
+            {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
+            {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])
+        # Layer 66 => 74
+        for i in range(3):
+            x = _conv_block(x, [
+                {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
+                {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
+        # Layer 75 => 79
+        x = _conv_block(x, [
+                {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
+                {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
+                {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
+                {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
+                {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}
+            ], do_skip=False)
+        # Layer 80 => 82
+        pred_yolo_1 = _conv_block(x, [
+                {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 80},
+                {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}
+            ], do_skip=False)
+        # YOLO Layer
+        loss_yolo_1 = YoloLayer(
+            anchors[12:], 
+            [1*num for num in max_grid], 
+            batch_size, 
+            warmup_batches, 
+            ignore_thresh, 
+            grid_scales[0],
+            obj_scale,
+            noobj_scale,
+            xywh_scale,
+            class_scale)(
+                [input_image, pred_yolo_1, true_yolo_1, true_boxes])
+        # Layer 83 => 86
+        x = _conv_block(x, [
+                {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}
+                ], do_skip=False)
+        x = UpSampling2D(2)(x)
+        x = concatenate([x, skip_61])
+
+        # Layer 87 => 91
+        x = _conv_block(x, [
+                {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
+                {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
+                {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
+                {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
+                {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}
+                ], do_skip=False)
+
+        # Layer 92 => 94
+        pred_yolo_2 = _conv_block(x, [
+                {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 92},
+                {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}
+            ], do_skip=False)
+        loss_yolo_2 = YoloLayer(
+            anchors[6:12], 
+            [2*num for num in max_grid], 
+            batch_size, 
+            warmup_batches, 
+            ignore_thresh, 
+            grid_scales[1],
+            obj_scale,
+            noobj_scale,
+            xywh_scale,
+            class_scale)(
+                [input_image, pred_yolo_2, true_yolo_2, true_boxes])
+        # Layer 95 => 98
+        x = _conv_block(x, [
+                {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True,   'layer_idx': 96}
+            ], do_skip=False)
+        x = UpSampling2D(2)(x)
+        x = concatenate([x, skip_36])
+        # Layer 99 => 106
+        pred_yolo_3 = _conv_block(x, [
+                {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 99},
+                {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 100},
+                {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 101},
+                {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 102},
+                {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 103},
+                {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 104},
+                {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}
+            ], do_skip=False)
+        loss_yolo_3 = YoloLayer(
+            anchors[:6], 
+            [4*num for num in max_grid], 
+            batch_size, 
+            warmup_batches, 
+            ignore_thresh, 
+            grid_scales[2],
+            obj_scale,
+            noobj_scale,
+            xywh_scale,
+            class_scale)(
+                [input_image, pred_yolo_3, true_yolo_3, true_boxes]) 
+
+        if mode == "training":
+            model = KM.Model(
+                [input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3],
+                [loss_yolo_1, loss_yolo_2, loss_yolo_3])
+        elif mode == "inference":
+            model = KM.Model(
+                input_image,
+                [pred_yolo_1, pred_yolo_2, pred_yolo_3])
+        # Muti-GPU Model Build Here
+        if config.GPU_COUNT > 1:
+            pass
+        return model
+
+    def compile(self, lr):
+        assert self.mode in ['training']
+        optimizer = Adam(lr=lr, clipnorm=0.001)
+        self.model.compile(loss=self.dummy_loss, optimizer=optimizer)
+
+    def train(self, train_dataset, val_dataset, 
+        learning_rate, epochs, layers,augmentation=None):
+        assert self.mode == "training", "train"
+        # Callbacks
+        callbacks = [
+            keras.callbacks.TensorBoard(
+                log_dir=self.log_dir, histogram_freq=0, write_graph=True, write_images=False),
+            keras.callbacks.ModelCheckpoint(
+                self.checkpoint_path, verbose=0, save_weights_only=True)
+        ]
+        # Data generators
+        ################ data_generator need to be impl
+        '''
+        train_generator = data_generator(train_dataset,
+            self.config, shuffle=True,
+            augmentation=augmentation, batch_size=self.config.BATCH_SIZE)
+        val_generator = data_generator(val_dataset,
+            self.config, shuffle=True, batch_size=self.config.BATCH_SIZE)
+        '''
+        # Train
+        # log("\nStarting at epoch {}. LR={}\n".format(self.epoch, learning_rate))
+        # log("Checkpoint Path: {}".format(self.checkpoint_path))
+        # self.set_trainable(layers)
+        self.compile(self.config.learning_rate)
+        if os.name is 'nt':
+            workers = 0
+        else:
+            workers = multiprocessing.cpu_count()
+        self.keras_model.fit_generator(
+            train_generator, 
+            steps_per_epoch = len(train_generator) * config['train']['train_times'], 
+            epochs = config['train']['nb_epochs'] + config['train']['warmup_epochs'], 
+            verbose = 2 if config['train']['debug'] else 1,
+            callbacks = callbacks,
+            workers = workers,
+            max_queue_size = 100,
+            use_multiprocessing=True)
+        self.epoch = max(self.epoch, epochs)
+
+    def detect(self):
+        pass
+    def trainingModel(self):
+        pass
+    def inferenceModel(self):
+        pass
+    def dummy_loss(y_true, y_pred):
+        return tf.sqrt(tf.reduce_sum(y_pred))
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000..0eb506f9c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,30 @@
+absl-py==0.2.0
+astor==0.6.2
+bleach==1.5.0
+certifi==2018.4.16
+cycler==0.10.0
+gast==0.2.0
+grpcio==1.11.0
+h5py==2.7.1
+html5lib==0.9999999
+Keras==2.1.5
+kiwisolver==1.0.1
+Markdown==2.6.11
+matplotlib==2.2.2
+numpy==1.14.2
+opencv-python==3.4.0.12
+pandas==0.23.0
+Pillow==5.1.0
+protobuf==3.5.2.post1
+pyparsing==2.2.0
+python-dateutil==2.7.2
+pytz==2018.4
+PyYAML==3.12
+scipy==1.0.1
+seaborn==0.8.1
+selenium==3.12.0
+six==1.11.0
+tensorboard==1.7.0
+tensorflow==1.7.0
+termcolor==1.1.0
+Werkzeug==0.14.1
diff --git a/train.py b/train.py
index 491289da9..85f9fcdd2 100644
--- a/train.py
+++ b/train.py
@@ -4,14 +4,14 @@
 import os
 import numpy as np
 import json
-from voc import parse_voc_annotation
-from yolo import create_yolov3_model, dummy_loss
+from .dataset.voc import parse_voc_annotation
+from .model.yolo import create_yolov3_model, dummy_loss
 from generator import BatchGenerator
-from utils.utils import normalize, evaluate, makedirs
+from .utils.utils import normalize, evaluate, makedirs
 from keras.callbacks import EarlyStopping, ReduceLROnPlateau
 from keras.optimizers import Adam
 from callbacks import CustomModelCheckpoint, CustomTensorBoard
-from utils.multi_gpu_model import multi_gpu_model
+from .utils.multi_gpu_model import multi_gpu_model
 import tensorflow as tf
 import keras
 from keras.models import load_model
diff --git a/train_openimg.py b/train_openimg.py
new file mode 100644
index 000000000..b772002ac
--- /dev/null
+++ b/train_openimg.py
@@ -0,0 +1,282 @@
+#! /usr/bin/env python
+
+import argparse
+import os
+import numpy as np
+import json
+from .dataset.openimg import parse_openimg_annotation
+from .model.yolo import create_yolov3_model, dummy_loss
+from generator import BatchGenerator
+from .utils.utils import normalize, evaluate, makedirs
+from keras.callbacks import EarlyStopping, ReduceLROnPlateau
+from keras.optimizers import Adam
+from callbacks import CustomModelCheckpoint, CustomTensorBoard
+from .utils.multi_gpu_model import multi_gpu_model
+import tensorflow as tf
+import keras
+from keras.models import load_model
+
+def create_training_instances(
+    train_annot_file,
+    train_image_folder,
+    label_map,
+    train_cache,
+    valid_annot_folder,
+    valid_image_folder,
+    valid_cache,
+    labels,
+):
+    # parse annotations of the training set
+    train_ints, train_labels = parse_openimg_annotation(train_annot_file, train_image_folder, label_map, train_cache, labels)
+
+    # parse annotations of the validation set, if any, otherwise split the training set
+    if os.path.exists(valid_annot_folder):
+        valid_ints, valid_labels = parse_openimg_annotation(valid_annot_file, valid_image_folder, label_map, valid_cache, labels)
+    else:
+        print("valid_annot_folder not exists. Spliting the trainining set.")
+
+        train_valid_split = int(0.8*len(train_ints))
+        np.random.seed(0)
+        np.random.shuffle(train_ints)
+        np.random.seed()
+
+        valid_ints = train_ints[train_valid_split:]
+        train_ints = train_ints[:train_valid_split]
+
+    # compare the seen labels with the given labels in config.json
+    if len(labels) > 0:
+        overlap_labels = set(labels).intersection(set(train_labels.keys()))
+
+        print('Seen labels: \t'  + str(train_labels) + '\n')
+        print('Given labels: \t' + str(labels))
+
+        # return None, None, None if some given label is not in the dataset
+        if len(overlap_labels) < len(labels):
+            print('Some labels have no annotations! Please revise the list of labels in the config.json.')
+            return None, None, None
+    else:
+        print('No labels are provided. Train on all seen labels.')
+        print(train_labels)
+        labels = train_labels.keys()
+
+    max_box_per_image = max([len(inst['object']) for inst in (train_ints + valid_ints)])
+
+    return train_ints, valid_ints, sorted(labels), max_box_per_image
+
+def create_callbacks(saved_weights_name, tensorboard_logs, model_to_save):
+    makedirs(tensorboard_logs)
+    
+    early_stop = EarlyStopping(
+        monitor     = 'loss', 
+        min_delta   = 0.01, 
+        patience    = 5, 
+        mode        = 'min', 
+        verbose     = 1
+    )
+    checkpoint = CustomModelCheckpoint(
+        model_to_save   = model_to_save,
+        filepath        = saved_weights_name,# + '{epoch:02d}.h5', 
+        monitor         = 'loss', 
+        verbose         = 1, 
+        save_best_only  = True, 
+        mode            = 'min', 
+        period          = 1
+    )
+    reduce_on_plateau = ReduceLROnPlateau(
+        monitor  = 'loss',
+        factor   = 0.1,
+        patience = 2,
+        verbose  = 1,
+        mode     = 'min',
+        epsilon  = 0.01,
+        cooldown = 0,
+        min_lr   = 0
+    )
+    tensorboard = CustomTensorBoard(
+        log_dir                = tensorboard_logs,
+        write_graph            = True,
+        write_images           = True,
+    )    
+    return [early_stop, checkpoint, reduce_on_plateau, tensorboard]
+
+def create_model(
+    nb_class, 
+    anchors, 
+    max_box_per_image, 
+    max_grid, batch_size, 
+    warmup_batches, 
+    ignore_thresh, 
+    multi_gpu, 
+    saved_weights_name, 
+    lr,
+    grid_scales,
+    obj_scale,
+    noobj_scale,
+    xywh_scale,
+    class_scale  
+):
+    if multi_gpu > 1:
+        with tf.device('/cpu:0'):
+            template_model, infer_model = create_yolov3_model(
+                nb_class            = nb_class, 
+                anchors             = anchors, 
+                max_box_per_image   = max_box_per_image, 
+                max_grid            = max_grid, 
+                batch_size          = batch_size//multi_gpu, 
+                warmup_batches      = warmup_batches,
+                ignore_thresh       = ignore_thresh,
+                grid_scales         = grid_scales,
+                obj_scale           = obj_scale,
+                noobj_scale         = noobj_scale,
+                xywh_scale          = xywh_scale,
+                class_scale         = class_scale
+            )
+    else:
+        template_model, infer_model = create_yolov3_model(
+            nb_class            = nb_class, 
+            anchors             = anchors, 
+            max_box_per_image   = max_box_per_image, 
+            max_grid            = max_grid, 
+            batch_size          = batch_size, 
+            warmup_batches      = warmup_batches,
+            ignore_thresh       = ignore_thresh,
+            grid_scales         = grid_scales,
+            obj_scale           = obj_scale,
+            noobj_scale         = noobj_scale,
+            xywh_scale          = xywh_scale,
+            class_scale         = class_scale
+        )  
+
+    # load the pretrained weight if exists, otherwise load the backend weight only
+    if os.path.exists(saved_weights_name): 
+        print("\nLoading pretrained weights.\n")
+        template_model.load_weights(saved_weights_name)
+    else:
+        template_model.load_weights("backend.h5", by_name=True)       
+
+    if multi_gpu > 1:
+        train_model = multi_gpu_model(template_model, gpus=multi_gpu)
+    else:
+        train_model = template_model      
+
+    optimizer = Adam(lr=lr, clipnorm=0.001)
+    train_model.compile(loss=dummy_loss, optimizer=optimizer)             
+
+    return train_model, infer_model
+
+def _main_(args):
+    config_path = args.conf
+
+    with open(config_path) as config_buffer:    
+        config = json.loads(config_buffer.read())
+
+    ###############################
+    #   Parse the annotations 
+    ###############################
+    train_ints, valid_ints, labels, max_box_per_image = create_training_instances(
+        config['train']['train_annot_file'],
+        config['train']['train_image_folder'],
+        config['train']['label_map'],
+        config['train']['cache_name'],
+        config['valid']['valid_annot_file'],
+        config['valid']['valid_image_folder'],
+        config['valid']['cache_name'],
+        config['model']['labels']
+    )
+    print('\nTraining on: \t' + str(labels) + '\n')
+
+    ###############################
+    #   Create the generators 
+    ###############################    
+    train_generator = BatchGenerator(
+        instances           = train_ints, 
+        anchors             = config['model']['anchors'],   
+        labels              = labels,        
+        downsample          = 32, # ratio between network input's size and network output's size, 32 for YOLOv3
+        max_box_per_image   = max_box_per_image,
+        batch_size          = config['train']['batch_size'],
+        min_net_size        = config['model']['min_input_size'],
+        max_net_size        = config['model']['max_input_size'],   
+        shuffle             = True, 
+        jitter              = 0.3, 
+        norm                = normalize
+    )
+    
+    valid_generator = BatchGenerator(
+        instances           = valid_ints, 
+        anchors             = config['model']['anchors'],   
+        labels              = labels,        
+        downsample          = 32, # ratio between network input's size and network output's size, 32 for YOLOv3
+        max_box_per_image   = max_box_per_image,
+        batch_size          = config['train']['batch_size'],
+        min_net_size        = config['model']['min_input_size'],
+        max_net_size        = config['model']['max_input_size'],   
+        shuffle             = True, 
+        jitter              = 0.0, 
+        norm                = normalize
+    )
+
+    ###############################
+    #   Create the model 
+    ###############################
+    if os.path.exists(config['train']['saved_weights_name']): 
+        config['train']['warmup_epochs'] = 0
+    warmup_batches = config['train']['warmup_epochs'] * (config['train']['train_times']*len(train_generator))   
+
+    os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus']
+    multi_gpu = len(config['train']['gpus'].split(','))
+
+    train_model, infer_model = create_model(
+        nb_class            = len(labels), 
+        anchors             = config['model']['anchors'], 
+        max_box_per_image   = max_box_per_image, 
+        max_grid            = [config['model']['max_input_size'], config['model']['max_input_size']], 
+        batch_size          = config['train']['batch_size'], 
+        warmup_batches      = warmup_batches,
+        ignore_thresh       = config['train']['ignore_thresh'],
+        multi_gpu           = multi_gpu,
+        saved_weights_name  = config['train']['saved_weights_name'],
+        lr                  = config['train']['learning_rate'],
+        grid_scales         = config['train']['grid_scales'],
+        obj_scale           = config['train']['obj_scale'],
+        noobj_scale         = config['train']['noobj_scale'],
+        xywh_scale          = config['train']['xywh_scale'],
+        class_scale         = config['train']['class_scale'],
+    )
+
+    ###############################
+    #   Kick off the training
+    ###############################
+    callbacks = create_callbacks(config['train']['saved_weights_name'], config['train']['tensorboard_dir'], infer_model)
+
+    train_model.fit_generator(
+        generator        = train_generator, 
+        steps_per_epoch  = len(train_generator) * config['train']['train_times'], 
+        epochs           = config['train']['nb_epochs'] + config['train']['warmup_epochs'], 
+        verbose          = 2 if config['train']['debug'] else 1,
+        callbacks        = callbacks, 
+        workers          = 4,
+        max_queue_size   = 8
+    )
+
+    # make a GPU version of infer_model for evaluation
+    if multi_gpu > 1:
+        infer_model = load_model(config['train']['saved_weights_name'])
+
+    ###############################
+    #   Run the evaluation
+    ###############################   
+    # compute mAP for all the classes
+    average_precisions = evaluate(infer_model, valid_generator)
+
+    # print the score
+    for label, average_precision in average_precisions.items():
+        print(labels[label] + ': {:.4f}'.format(average_precision))
+    print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))           
+
+if __name__ == '__main__':
+    argparser = argparse.ArgumentParser(description='train and evaluate YOLO_v3 model on any dataset')
+    argparser.add_argument('-c', '--conf', help='path to configuration file')   
+
+    args = argparser.parse_args()
+    _main_(args)
diff --git a/utils/tools.py b/utils/tools.py
new file mode 100644
index 000000000..a5e852b09
--- /dev/null
+++ b/utils/tools.py
@@ -0,0 +1,345 @@
+import cv2
+import numpy as np
+import os
+from .bbox import BoundBox, bbox_iou
+from scipy.special import expit
+
+def _sigmoid(x):
+    return expit(x)
+
+def makedirs(path):
+    try:
+        os.makedirs(path)
+    except OSError:
+        if not os.path.isdir(path):
+            raise
+
+def evaluate(model, 
+             generator, 
+             iou_threshold=0.5,
+             obj_thresh=0.5,
+             nms_thresh=0.45,
+             net_h=416,
+             net_w=416,
+             save_path=None):
+    """ Evaluate a given dataset using a given model.
+    code originally from https://github.com/fizyr/keras-retinanet
+
+    # Arguments
+        model           : The model to evaluate.
+        generator       : The generator that represents the dataset to evaluate.
+        iou_threshold   : The threshold used to consider when a detection is positive or negative.
+        obj_thresh      : The threshold used to distinguish between object and non-object
+        nms_thresh      : The threshold used to determine whether two detections are duplicates
+        net_h           : The height of the input image to the model, higher value results in better accuracy
+        net_w           : The width of the input image to the model
+        save_path       : The path to save images with visualized detections to.
+    # Returns
+        A dict mapping class names to mAP scores.
+    """    
+    # gather all detections and annotations
+    all_detections     = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
+    all_annotations    = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
+
+    for i in range(generator.size()):
+        raw_image = [generator.load_image(i)]
+
+        # make the boxes and the labels
+        pred_boxes = get_yolo_boxes(model, raw_image, net_h, net_w, generator.get_anchors(), obj_thresh, nms_thresh)[0]
+
+        score = np.array([box.get_score() for box in pred_boxes])
+        pred_labels = np.array([box.label for box in pred_boxes])        
+        
+        if len(pred_boxes) > 0:
+            pred_boxes = np.array([[box.xmin, box.ymin, box.xmax, box.ymax, box.get_score()] for box in pred_boxes]) 
+        else:
+            pred_boxes = np.array([[]])  
+        
+        # sort the boxes and the labels according to scores
+        score_sort = np.argsort(-score)
+        pred_labels = pred_labels[score_sort]
+        pred_boxes  = pred_boxes[score_sort]
+        
+        # copy detections to all_detections
+        for label in range(generator.num_classes()):
+            all_detections[i][label] = pred_boxes[pred_labels == label, :]
+
+        annotations = generator.load_annotation(i)
+        
+        # copy detections to all_annotations
+        for label in range(generator.num_classes()):
+            all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
+
+    # compute mAP by comparing all detections and all annotations
+    average_precisions = {}
+    
+    for label in range(generator.num_classes()):
+        false_positives = np.zeros((0,))
+        true_positives  = np.zeros((0,))
+        scores          = np.zeros((0,))
+        num_annotations = 0.0
+
+        for i in range(generator.size()):
+            detections           = all_detections[i][label]
+            annotations          = all_annotations[i][label]
+            num_annotations     += annotations.shape[0]
+            detected_annotations = []
+
+            for d in detections:
+                scores = np.append(scores, d[4])
+
+                if annotations.shape[0] == 0:
+                    false_positives = np.append(false_positives, 1)
+                    true_positives  = np.append(true_positives, 0)
+                    continue
+
+                overlaps            = compute_overlap(np.expand_dims(d, axis=0), annotations)
+                assigned_annotation = np.argmax(overlaps, axis=1)
+                max_overlap         = overlaps[0, assigned_annotation]
+
+                if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
+                    false_positives = np.append(false_positives, 0)
+                    true_positives  = np.append(true_positives, 1)
+                    detected_annotations.append(assigned_annotation)
+                else:
+                    false_positives = np.append(false_positives, 1)
+                    true_positives  = np.append(true_positives, 0)
+
+        # no annotations -> AP for this class is 0 (is this correct?)
+        if num_annotations == 0:
+            average_precisions[label] = 0
+            continue
+
+        # sort by score
+        indices         = np.argsort(-scores)
+        false_positives = false_positives[indices]
+        true_positives  = true_positives[indices]
+
+        # compute false positives and true positives
+        false_positives = np.cumsum(false_positives)
+        true_positives  = np.cumsum(true_positives)
+
+        # compute recall and precision
+        recall    = true_positives / num_annotations
+        precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
+
+        # compute average precision
+        average_precision  = compute_ap(recall, precision)  
+        average_precisions[label] = average_precision
+
+    return average_precisions    
+
+def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
+    if (float(net_w)/image_w) < (float(net_h)/image_h):
+        new_w = net_w
+        new_h = (image_h*net_w)/image_w
+    else:
+        new_h = net_w
+        new_w = (image_w*net_h)/image_h
+        
+    for i in range(len(boxes)):
+        x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
+        y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
+        
+        boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
+        boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
+        boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
+        boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
+        
+def do_nms(boxes, nms_thresh):
+    if len(boxes) > 0:
+        nb_class = len(boxes[0].classes)
+    else:
+        return
+        
+    for c in range(nb_class):
+        sorted_indices = np.argsort([-box.classes[c] for box in boxes])
+
+        for i in range(len(sorted_indices)):
+            index_i = sorted_indices[i]
+
+            if boxes[index_i].classes[c] == 0: continue
+
+            for j in range(i+1, len(sorted_indices)):
+                index_j = sorted_indices[j]
+
+                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
+                    boxes[index_j].classes[c] = 0
+
+def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
+    grid_h, grid_w = netout.shape[:2]
+    nb_box = 3
+    netout = netout.reshape((grid_h, grid_w, nb_box, -1))
+    nb_class = netout.shape[-1] - 5
+
+    boxes = []
+
+    netout[..., :2]  = _sigmoid(netout[..., :2])
+    netout[..., 4]   = _sigmoid(netout[..., 4])
+    netout[..., 5:]  = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
+    netout[..., 5:] *= netout[..., 5:] > obj_thresh
+
+    for i in range(grid_h*grid_w):
+        row = i // grid_w
+        col = i % grid_w
+        
+        for b in range(nb_box):
+            # 4th element is objectness score
+            objectness = netout[row, col, b, 4]
+            
+            if(objectness <= obj_thresh): continue
+            
+            # first 4 elements are x, y, w, and h
+            x, y, w, h = netout[row,col,b,:4]
+
+            x = (col + x) / grid_w # center position, unit: image width
+            y = (row + y) / grid_h # center position, unit: image height
+            w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
+            h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height  
+            
+            # last elements are class probabilities
+            classes = netout[row,col,b,5:]
+            
+            box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
+
+            boxes.append(box)
+
+    return boxes
+
+def preprocess_input(image, net_h, net_w):
+    new_h, new_w, _ = image.shape
+
+    # determine the new size of the image
+    if (float(net_w)/new_w) < (float(net_h)/new_h):
+        new_h = (new_h * net_w)//new_w
+        new_w = net_w
+    else:
+        new_w = (new_w * net_h)//new_h
+        new_h = net_h
+
+    # resize the image to the new size
+    resized = cv2.resize(image[:,:,::-1]/255., (new_w, new_h))
+
+    # embed the image into the standard letter box
+    new_image = np.ones((net_h, net_w, 3)) * 0.5
+    new_image[(net_h-new_h)//2:(net_h+new_h)//2, (net_w-new_w)//2:(net_w+new_w)//2, :] = resized
+    new_image = np.expand_dims(new_image, 0)
+
+    return new_image
+
+def normalize(image):
+    return image/255.
+       
+def get_yolo_boxes(model, images, net_h, net_w, anchors, obj_thresh, nms_thresh):
+    image_h, image_w, _ = images[0].shape
+    nb_images           = len(images)
+    batch_input         = np.zeros((nb_images, net_h, net_w, 3))
+
+    # preprocess the input
+    for i in range(nb_images):
+        batch_input[i] = preprocess_input(images[i], net_h, net_w)        
+
+    # run the prediction
+    batch_output = model.predict_on_batch(batch_input)
+    batch_boxes  = [None]*nb_images
+
+    for i in range(nb_images):
+        yolos = [batch_output[0][i], batch_output[1][i], batch_output[2][i]]
+        boxes = []
+
+        # decode the output of the network
+        for j in range(len(yolos)):
+            yolo_anchors = anchors[(2-j)*6:(3-j)*6] # config['model']['anchors']
+            boxes += decode_netout(yolos[j], yolo_anchors, obj_thresh, net_h, net_w)
+
+        # correct the sizes of the bounding boxes
+        correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)
+
+        # suppress non-maximal boxes
+        do_nms(boxes, nms_thresh)        
+           
+        batch_boxes[i] = boxes
+
+    return batch_boxes        
+
+def compute_overlap(a, b):
+    """
+    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+    Parameters
+    ----------
+    a: (N, 4) ndarray of float
+    b: (K, 4) ndarray of float
+    Returns
+    -------
+    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
+    """
+    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
+
+    iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
+    ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
+
+    iw = np.maximum(iw, 0)
+    ih = np.maximum(ih, 0)
+
+    ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
+
+    ua = np.maximum(ua, np.finfo(float).eps)
+
+    intersection = iw * ih
+
+    return intersection / ua  
+    
+def compute_ap(recall, precision):
+    """ Compute the average precision, given the recall and precision curves.
+    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+
+    # Arguments
+        recall:    The recall curve (list).
+        precision: The precision curve (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+    # correct AP calculation
+    # first append sentinel values at the end
+    mrec = np.concatenate(([0.], recall, [1.]))
+    mpre = np.concatenate(([0.], precision, [0.]))
+
+    # compute the precision envelope
+    for i in range(mpre.size - 1, 0, -1):
+        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+    # to calculate area under PR curve, look for points
+    # where X axis (recall) changes value
+    i = np.where(mrec[1:] != mrec[:-1])[0]
+
+    # and sum (\Delta recall) * prec
+    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap     
+
+def _softmax(x, axis=-1):
+    x = x - np.amax(x, axis, keepdims=True)
+    e_x = np.exp(x)
+    
+    return e_x / e_x.sum(axis, keepdims=True)
+
+def draw_boxes(image, boxes, labels, obj_thresh):
+    for box in boxes:
+        label_str = ''
+        label = -1
+        
+        for i in range(len(labels)):
+            if box.classes[i] > obj_thresh:
+                label_str += labels[i]
+                label = i
+                print(labels[i] + ': ' + str(box.classes[i]*100) + '%')
+                
+        if label >= 0:
+            cv2.rectangle(image, (box.xmin,box.ymin), (box.xmax,box.ymax), (0,255,0), 3)
+            cv2.putText(image, 
+                        label_str + ' ' + str(box.get_score()), 
+                        (box.xmin, box.ymin - 13), 
+                        cv2.FONT_HERSHEY_SIMPLEX, 
+                        1e-3 * image.shape[0], 
+                        (0,255,0), 2)
+        
+    return image
\ No newline at end of file
diff --git a/utils/weightreader.py b/utils/weightreader.py
new file mode 100644
index 000000000..02c4de8c3
--- /dev/null
+++ b/utils/weightreader.py
@@ -0,0 +1,69 @@
+import argparse
+import os
+import numpy as np
+from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D
+from keras.layers.merge import add, concatenate
+from keras.models import Model
+import struct
+import cv2
+
+
+class WeightReader:
+    def __init__(self, weight_file):
+        with open(weight_file, 'rb') as w_f:
+            major,    = struct.unpack('i', w_f.read(4))
+            minor,    = struct.unpack('i', w_f.read(4))
+            revision, = struct.unpack('i', w_f.read(4))
+
+            if (major*10 + minor) >= 2 and major < 1000 and minor < 1000:
+                w_f.read(8)
+            else:
+                w_f.read(4)
+
+            transpose = (major > 1000) or (minor > 1000)
+            
+            binary = w_f.read()
+
+        self.offset = 0
+        self.all_weights = np.frombuffer(binary, dtype='float32')
+        
+    def read_bytes(self, size):
+        self.offset = self.offset + size
+        return self.all_weights[self.offset-size:self.offset]
+
+    def load_weights(self, model):
+        for i in range(106):
+            try:
+                conv_layer = model.get_layer('conv_' + str(i))
+                print("loading weights of convolution #" + str(i))
+
+                if i not in [81, 93, 105]:
+                    norm_layer = model.get_layer('bnorm_' + str(i))
+
+                    size = np.prod(norm_layer.get_weights()[0].shape)
+
+                    beta  = self.read_bytes(size) # bias
+                    gamma = self.read_bytes(size) # scale
+                    mean  = self.read_bytes(size) # mean
+                    var   = self.read_bytes(size) # variance            
+
+                    weights = norm_layer.set_weights([gamma, beta, mean, var])  
+
+                if len(conv_layer.get_weights()) > 1:
+                    bias   = self.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
+                    kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
+                    
+                    kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
+                    kernel = kernel.transpose([2,3,1,0])
+                    conv_layer.set_weights([kernel, bias])
+                else:
+                    kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
+                    kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
+                    kernel = kernel.transpose([2,3,1,0])
+                    conv_layer.set_weights([kernel])
+            except ValueError:
+                print("no convolution #" + str(i))     
+    
+    def reset(self):
+        self.offset = 0
+
diff --git a/yolo3_one_file_to_detect_them_all.py b/yolo3_one_file_to_detect_them_all.py
deleted file mode 100644
index 17ca64e22..000000000
--- a/yolo3_one_file_to_detect_them_all.py
+++ /dev/null
@@ -1,434 +0,0 @@
-import argparse
-import os
-import numpy as np
-from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D
-from keras.layers.merge import add, concatenate
-from keras.models import Model
-import struct
-import cv2
-
-np.set_printoptions(threshold=np.nan)
-os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
-os.environ["CUDA_VISIBLE_DEVICES"]="0"
-
-argparser = argparse.ArgumentParser(
-    description='test yolov3 network with coco weights')
-
-argparser.add_argument(
-    '-w',
-    '--weights',
-    help='path to weights file')
-
-argparser.add_argument(
-    '-i',
-    '--image',
-    help='path to image file')
-
-class WeightReader:
-    def __init__(self, weight_file):
-        with open(weight_file, 'rb') as w_f:
-            major,    = struct.unpack('i', w_f.read(4))
-            minor,    = struct.unpack('i', w_f.read(4))
-            revision, = struct.unpack('i', w_f.read(4))
-
-            if (major*10 + minor) >= 2 and major < 1000 and minor < 1000:
-                w_f.read(8)
-            else:
-                w_f.read(4)
-
-            transpose = (major > 1000) or (minor > 1000)
-            
-            binary = w_f.read()
-
-        self.offset = 0
-        self.all_weights = np.frombuffer(binary, dtype='float32')
-        
-    def read_bytes(self, size):
-        self.offset = self.offset + size
-        return self.all_weights[self.offset-size:self.offset]
-
-    def load_weights(self, model):
-        for i in range(106):
-            try:
-                conv_layer = model.get_layer('conv_' + str(i))
-                print("loading weights of convolution #" + str(i))
-
-                if i not in [81, 93, 105]:
-                    norm_layer = model.get_layer('bnorm_' + str(i))
-
-                    size = np.prod(norm_layer.get_weights()[0].shape)
-
-                    beta  = self.read_bytes(size) # bias
-                    gamma = self.read_bytes(size) # scale
-                    mean  = self.read_bytes(size) # mean
-                    var   = self.read_bytes(size) # variance            
-
-                    weights = norm_layer.set_weights([gamma, beta, mean, var])  
-
-                if len(conv_layer.get_weights()) > 1:
-                    bias   = self.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
-                    kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
-                    
-                    kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
-                    kernel = kernel.transpose([2,3,1,0])
-                    conv_layer.set_weights([kernel, bias])
-                else:
-                    kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
-                    kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
-                    kernel = kernel.transpose([2,3,1,0])
-                    conv_layer.set_weights([kernel])
-            except ValueError:
-                print("no convolution #" + str(i))     
-    
-    def reset(self):
-        self.offset = 0
-
-class BoundBox:
-    def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
-        self.xmin = xmin
-        self.ymin = ymin
-        self.xmax = xmax
-        self.ymax = ymax
-        
-        self.objness = objness
-        self.classes = classes
-
-        self.label = -1
-        self.score = -1
-
-    def get_label(self):
-        if self.label == -1:
-            self.label = np.argmax(self.classes)
-        
-        return self.label
-    
-    def get_score(self):
-        if self.score == -1:
-            self.score = self.classes[self.get_label()]
-            
-        return self.score
-
-def _conv_block(inp, convs, skip=True):
-    x = inp
-    count = 0
-    
-    for conv in convs:
-        if count == (len(convs) - 2) and skip:
-            skip_connection = x
-        count += 1
-        
-        if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top
-        x = Conv2D(conv['filter'], 
-                   conv['kernel'], 
-                   strides=conv['stride'], 
-                   padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top
-                   name='conv_' + str(conv['layer_idx']), 
-                   use_bias=False if conv['bnorm'] else True)(x)
-        if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
-        if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
-
-    return add([skip_connection, x]) if skip else x
-
-def _interval_overlap(interval_a, interval_b):
-    x1, x2 = interval_a
-    x3, x4 = interval_b
-
-    if x3 < x1:
-        if x4 < x1:
-            return 0
-        else:
-            return min(x2,x4) - x1
-    else:
-        if x2 < x3:
-             return 0
-        else:
-            return min(x2,x4) - x3          
-
-def _sigmoid(x):
-    return 1. / (1. + np.exp(-x))
-
-def bbox_iou(box1, box2):
-    intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
-    intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
-    
-    intersect = intersect_w * intersect_h
-
-    w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
-    w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
-    
-    union = w1*h1 + w2*h2 - intersect
-    
-    return float(intersect) / union
-
-def make_yolov3_model():
-    input_image = Input(shape=(None, None, 3))
-
-    # Layer  0 => 4
-    x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
-                                  {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
-                                  {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
-                                  {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])
-
-    # Layer  5 => 8
-    x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
-                        {'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
-                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])
-
-    # Layer  9 => 11
-    x = _conv_block(x, [{'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
-                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])
-
-    # Layer 12 => 15
-    x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
-                        {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
-                        {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])
-
-    # Layer 16 => 36
-    for i in range(7):
-        x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
-                            {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
-        
-    skip_36 = x
-        
-    # Layer 37 => 40
-    x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
-                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
-                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])
-
-    # Layer 41 => 61
-    for i in range(7):
-        x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
-                            {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
-        
-    skip_61 = x
-        
-    # Layer 62 => 65
-    x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
-                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
-                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])
-
-    # Layer 66 => 74
-    for i in range(3):
-        x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
-                            {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
-        
-    # Layer 75 => 79
-    x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
-                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
-                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
-                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
-                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], skip=False)
-
-    # Layer 80 => 82
-    yolo_82 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 80},
-                              {'filter':  255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], skip=False)
-
-    # Layer 83 => 86
-    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], skip=False)
-    x = UpSampling2D(2)(x)
-    x = concatenate([x, skip_61])
-
-    # Layer 87 => 91
-    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
-                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
-                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
-                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
-                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], skip=False)
-
-    # Layer 92 => 94
-    yolo_94 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 92},
-                              {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], skip=False)
-
-    # Layer 95 => 98
-    x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True,   'layer_idx': 96}], skip=False)
-    x = UpSampling2D(2)(x)
-    x = concatenate([x, skip_36])
-
-    # Layer 99 => 106
-    yolo_106 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 99},
-                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 100},
-                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 101},
-                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 102},
-                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 103},
-                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 104},
-                               {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False)
-
-    model = Model(input_image, [yolo_82, yolo_94, yolo_106])    
-    return model
-
-def preprocess_input(image, net_h, net_w):
-    new_h, new_w, _ = image.shape
-
-    # determine the new size of the image
-    if (float(net_w)/new_w) < (float(net_h)/new_h):
-        new_h = (new_h * net_w)/new_w
-        new_w = net_w
-    else:
-        new_w = (new_w * net_h)/new_h
-        new_h = net_h
-
-    # resize the image to the new size
-    resized = cv2.resize(image[:,:,::-1]/255., (int(new_w), int(new_h)))
-
-    # embed the image into the standard letter box
-    new_image = np.ones((net_h, net_w, 3)) * 0.5
-    new_image[int((net_h-new_h)//2):int((net_h+new_h)//2), int((net_w-new_w)//2):int((net_w+new_w)//2), :] = resized
-    new_image = np.expand_dims(new_image, 0)
-
-    return new_image
-
-def decode_netout(netout, anchors, obj_thresh, nms_thresh, net_h, net_w):
-    grid_h, grid_w = netout.shape[:2]
-    nb_box = 3
-    netout = netout.reshape((grid_h, grid_w, nb_box, -1))
-    nb_class = netout.shape[-1] - 5
-
-    boxes = []
-
-    netout[..., :2]  = _sigmoid(netout[..., :2])
-    netout[..., 4:]  = _sigmoid(netout[..., 4:])
-    netout[..., 5:]  = netout[..., 4][..., np.newaxis] * netout[..., 5:]
-    netout[..., 5:] *= netout[..., 5:] > obj_thresh
-
-    for i in range(grid_h*grid_w):
-        row = i / grid_w
-        col = i % grid_w
-        
-        for b in range(nb_box):
-            # 4th element is objectness score
-            objectness = netout[int(row)][int(col)][b][4]
-            #objectness = netout[..., :4]
-            
-            if(objectness.all() <= obj_thresh): continue
-            
-            # first 4 elements are x, y, w, and h
-            x, y, w, h = netout[int(row)][int(col)][b][:4]
-
-            x = (col + x) / grid_w # center position, unit: image width
-            y = (row + y) / grid_h # center position, unit: image height
-            w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
-            h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height  
-            
-            # last elements are class probabilities
-            classes = netout[int(row)][col][b][5:]
-            
-            box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
-            #box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, None, classes)
-
-            boxes.append(box)
-
-    return boxes
-
-def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
-    if (float(net_w)/image_w) < (float(net_h)/image_h):
-        new_w = net_w
-        new_h = (image_h*net_w)/image_w
-    else:
-        new_h = net_w
-        new_w = (image_w*net_h)/image_h
-        
-    for i in range(len(boxes)):
-        x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
-        y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
-        
-        boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
-        boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
-        boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
-        boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
-        
-def do_nms(boxes, nms_thresh):
-    if len(boxes) > 0:
-        nb_class = len(boxes[0].classes)
-    else:
-        return
-        
-    for c in range(nb_class):
-        sorted_indices = np.argsort([-box.classes[c] for box in boxes])
-
-        for i in range(len(sorted_indices)):
-            index_i = sorted_indices[i]
-
-            if boxes[index_i].classes[c] == 0: continue
-
-            for j in range(i+1, len(sorted_indices)):
-                index_j = sorted_indices[j]
-
-                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
-                    boxes[index_j].classes[c] = 0
-                    
-def draw_boxes(image, boxes, labels, obj_thresh):
-    for box in boxes:
-        label_str = ''
-        label = -1
-        
-        for i in range(len(labels)):
-            if box.classes[i] > obj_thresh:
-                label_str += labels[i]
-                label = i
-                print(labels[i] + ': ' + str(box.classes[i]*100) + '%')
-                
-        if label >= 0:
-            cv2.rectangle(image, (box.xmin,box.ymin), (box.xmax,box.ymax), (0,255,0), 3)
-            cv2.putText(image, 
-                        label_str + ' ' + str(box.get_score()), 
-                        (box.xmin, box.ymin - 13), 
-                        cv2.FONT_HERSHEY_SIMPLEX, 
-                        1e-3 * image.shape[0], 
-                        (0,255,0), 2)
-        
-    return image      
-
-def _main_(args):
-    weights_path = args.weights
-    image_path   = args.image
-
-    # set some parameters
-    net_h, net_w = 416, 416
-    obj_thresh, nms_thresh = 0.5, 0.45
-    anchors = [[116,90,  156,198,  373,326],  [30,61, 62,45,  59,119], [10,13,  16,30,  33,23]]
-    labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \
-              "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \
-              "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \
-              "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \
-              "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \
-              "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \
-              "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \
-              "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \
-              "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \
-              "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
-
-    # make the yolov3 model to predict 80 classes on COCO
-    yolov3 = make_yolov3_model()
-
-    # load the weights trained on COCO into the model
-    weight_reader = WeightReader(weights_path)
-    weight_reader.load_weights(yolov3)
-
-    # preprocess the image
-    image = cv2.imread(image_path)
-    image_h, image_w, _ = image.shape
-    new_image = preprocess_input(image, net_h, net_w)
-
-    # run the prediction
-    yolos = yolov3.predict(new_image)
-    boxes = []
-
-    for i in range(len(yolos)):
-        # decode the output of the network
-        boxes += decode_netout(yolos[i][0], anchors[i], obj_thresh, nms_thresh, net_h, net_w)
-
-    # correct the sizes of the bounding boxes
-    correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)
-
-    # suppress non-maximal boxes
-    do_nms(boxes, nms_thresh)     
-
-    # draw bounding boxes on the image using labels
-    draw_boxes(image, boxes, labels, obj_thresh) 
- 
-    # write the image with bounding boxes to file
-    cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], (image).astype('uint8')) 
-
-if __name__ == '__main__':
-    args = argparser.parse_args()
-    _main_(args)