Orthographic camera (#172)

leopoldo-zugasti · web-flow · commit 3c1b9e6ba5fe · 2021-06-22T14:26:59.000-04:00
* Add orthographic projection

* Update plotting functions to handle orthographic option

* Added testing
diff --git a/datasetinsights/datasets/synthetic.py b/datasetinsights/datasets/synthetic.py
@@ -27,13 +27,15 @@ def read_bounding_box_3d(annotation, label_mappings=None):
 
     for b in annotation:
         label_id = b["label_id"]
-        translation = b["translation"]
-        translation = [translation["x"], translation["y"], translation["z"]]
-        size = b["size"]
-        size = [size["x"], size["y"], size["z"]]
+        translation = (
+            b["translation"]["x"],
+            b["translation"]["y"],
+            b["translation"]["z"],
+        )
+        size = (b["size"]["x"], b["size"]["y"], b["size"]["z"])
         rotation = b["rotation"]
         rotation = Quaternion(
-            b=rotation["x"], c=rotation["y"], d=rotation["z"], a=rotation["w"]
+            x=rotation["x"], y=rotation["y"], z=rotation["z"], w=rotation["w"]
         )
 
         if label_mappings and label_id not in label_mappings:
diff --git a/datasetinsights/stats/visualization/bbox3d_plot.py b/datasetinsights/stats/visualization/bbox3d_plot.py
@@ -80,7 +80,7 @@ def _add_single_bbox3d_on_image(
 
 
 def add_single_bbox3d_on_image(
-    image, box, proj, color=None, box_line_width=2,
+    image, box, proj, color=None, box_line_width=2, orthographic=False,
 ):
     """" Add single 3D bounding box on a given image.
 
@@ -91,6 +91,7 @@ def add_single_bbox3d_on_image(
         color(tuple): RGBA color of the bounding box. Defaults to None. If
         color = None the the tuple of [0, 255, 0, 255] (Green) will be used.
         box_line_width (int): line width of the bounding boxes. Defaults to 2.
+        orthographic (bool): true if proj is orthographic, else perspective
     """
     img_height, img_width, _ = image.shape
 
@@ -104,14 +105,20 @@ def add_single_bbox3d_on_image(
     bur = box.front_right_top_pt
     blr = box.front_right_bottom_pt
 
-    fll_raster = _project_pt_to_pixel_location(fll, proj, img_height, img_width)
-    ful_raster = _project_pt_to_pixel_location(ful, proj, img_height, img_width)
-    fur_raster = _project_pt_to_pixel_location(fur, proj, img_height, img_width)
-    flr_raster = _project_pt_to_pixel_location(flr, proj, img_height, img_width)
-    bll_raster = _project_pt_to_pixel_location(bll, proj, img_height, img_width)
-    bul_raster = _project_pt_to_pixel_location(bul, proj, img_height, img_width)
-    bur_raster = _project_pt_to_pixel_location(bur, proj, img_height, img_width)
-    blr_raster = _project_pt_to_pixel_location(blr, proj, img_height, img_width)
+    project_pt_to_pixel = (
+        _project_pt_to_pixel_location_orthographic
+        if orthographic
+        else _project_pt_to_pixel_location
+    )
+
+    fll_raster = project_pt_to_pixel(fll, proj, img_height, img_width)
+    ful_raster = project_pt_to_pixel(ful, proj, img_height, img_width)
+    fur_raster = project_pt_to_pixel(fur, proj, img_height, img_width)
+    flr_raster = project_pt_to_pixel(flr, proj, img_height, img_width)
+    bll_raster = project_pt_to_pixel(bll, proj, img_height, img_width)
+    bul_raster = project_pt_to_pixel(bul, proj, img_height, img_width)
+    bur_raster = project_pt_to_pixel(bur, proj, img_height, img_width)
+    blr_raster = project_pt_to_pixel(blr, proj, img_height, img_width)
 
     _add_single_bbox3d_on_image(
         image,
@@ -129,7 +136,7 @@ def add_single_bbox3d_on_image(
 
 
 def _project_pt_to_pixel_location(pt, projection, img_height, img_width):
-    """ Projects a 3D coordinate into a pixel location.
+    """ Projects a 3D coordinate into a pixel location from a perspective camera.
 
     Applies the passed in projection matrix to project a point from the camera's
     coordinate space into pixel space.
@@ -161,3 +168,43 @@ def _project_pt_to_pixel_location(pt, projection, img_height, img_width):
             int((_pt[1] * img_height) / 2.0 + (img_height * 0.5)),
         ]
     )
+
+
+def _project_pt_to_pixel_location_orthographic(
+    pt, projection, img_height, img_width
+):
+    """ Projects a 3D coordinate into a pixel location from an orthographic camera.
+
+        Applies the passed in projection matrix to project a point from the
+        camera's coordinate space into pixel space.
+
+        For a description of the math used in this method, see:
+        https://www.scratchapixel.com/lessons/3d-basic-rendering/perspective-and-
+        orthographic-projection-matrix/projection-matrix-introduction
+
+        Args:
+            pt (numpy array): The 3D point to project.
+            projection (numpy 2D array): The camera's 3x3 projection matrix.
+            img_height (int): The height of the image in pixels.
+            img_width (int): The width of the image in pixels.
+
+        Returns:
+            numpy array: a one-dimensional array with two values (x and y)
+            representing a point's pixel coordinate in an image.
+    """
+
+    # The 'y' component needs to be flipped because of how Unity works
+    projection = numpy.array(
+        [
+            [projection[0][0], 0, 0],
+            [0, -projection[1][1], 0],
+            [0, 0, projection[2][2]],
+        ]
+    )
+    temp = projection.dot(pt)
+
+    pixel = [
+        int((temp[0] + 1) * 0.5 * img_width),
+        int((temp[1] + 1) * 0.5 * img_height),
+    ]
+    return pixel
diff --git a/datasetinsights/stats/visualization/plots.py b/datasetinsights/stats/visualization/plots.py
@@ -78,7 +78,7 @@ def _process_label(bbox, label_mappings=None):
         return label
 
 
-def plot_bboxes3d(image, bboxes, projection, colors=None):
+def plot_bboxes3d(image, bboxes, projection, colors=None, orthographic=False):
     """ Plot an image with 3D bounding boxes
 
     Currently this method should only be used for ground truth images, and
@@ -92,6 +92,7 @@ def plot_bboxes3d(image, bboxes, projection, colors=None):
         captured the ground truth.
         colors (list): a color list for boxes. Defaults to none. If
         colors = None, it will default to coloring all boxes green.
+        orthographic (bool): true if proj is orthographic, else perspective
 
     Returns:
         PIL image: a PIL image with bounding boxes drawn on it.
@@ -101,7 +102,9 @@ def plot_bboxes3d(image, bboxes, projection, colors=None):
 
     for i, box in enumerate(bboxes):
         color = colors[i] if colors else None
-        add_single_bbox3d_on_image(np_image, box, projection, color)
+        add_single_bbox3d_on_image(
+            np_image, box, projection, color, orthographic=orthographic
+        )
 
     return Image.fromarray(np_image)
 
diff --git a/tests/test_bbox.py b/tests/test_bbox.py
@@ -3,6 +3,7 @@
 from datasetinsights.io.bbox import BBox2D, BBox3D, group_bbox2d_per_label
 from datasetinsights.stats.visualization.bbox3d_plot import (
     _project_pt_to_pixel_location,
+    _project_pt_to_pixel_location_orthographic,
 )
 
 
@@ -59,3 +60,28 @@ def test_project_pt_to_pixel_location():
     pixel_loc = _project_pt_to_pixel_location(pt, proj, img_height, img_width)
     assert pixel_loc[0] == 320
     assert pixel_loc[1] == 240
+
+
+def test_project_pt_to_pixel_location_orthographic():
+    pt = [0, 0, 0]
+    proj = numpy.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+    img_height = 480
+    img_width = 640
+
+    pixel_loc = _project_pt_to_pixel_location_orthographic(
+        pt, proj, img_height, img_width
+    )
+    assert pixel_loc[0] == 320
+    assert pixel_loc[1] == 240
+
+    # more interesting case
+    pt = [0.3, 0, 0]
+    proj = numpy.array([[0.08951352, 0, 0], [0, 0.2, 0], [0, 0, -0.0020006]])
+
+    pixel_loc = _project_pt_to_pixel_location_orthographic(
+        pt, proj, img_height, img_width
+    )
+    assert pixel_loc[0] == int(
+        (proj[0][0] * pt[0] + 1) * 0.5 * img_width
+    )  # 328
+    assert pixel_loc[1] == img_height // 2
diff --git a/tests/test_visual.py b/tests/test_visual.py
@@ -5,9 +5,10 @@
 import pandas as pd
 import pytest
 from PIL import Image
+from pyquaternion import Quaternion
 from pytest import approx
 
-from datasetinsights.io.bbox import BBox2D
+from datasetinsights.io.bbox import BBox2D, BBox3D
 from datasetinsights.stats.visualization.bbox2d_plot import (
     _COLOR_NAME_TO_RGB,
     _add_label_on_image,
@@ -21,6 +22,7 @@
     model_performance_box_plot,
     model_performance_comparison_box_plot,
     plot_bboxes,
+    plot_bboxes3d,
     plot_keypoints,
 )
 
@@ -265,6 +267,47 @@ def test_add_single_bbox_on_image(mock, get_image_and_bbox):
     )
 
 
+def test_plot_bboxes3d():
+    cur_dir = pathlib.Path(__file__).parent.absolute()
+    img = Image.open(
+        str(cur_dir / "mock_data" / "simrun" / "captures" / "camera_000.png")
+    )
+    boxes = [
+        BBox3D(
+            label=1,
+            translation=(0, 0, 0),
+            size=(1, 1, 1),
+            rotation=Quaternion(x=1, y=0, z=0, w=0),
+            sample_token=0,
+            score=1,
+        ),
+        BBox3D(
+            label=2,
+            translation=(0, 0, 0),
+            size=(1, 1, 1),
+            rotation=Quaternion(x=0, y=1, z=0, w=0),
+            sample_token=0,
+            score=1,
+        ),
+        BBox3D(
+            label=3,
+            translation=(0, 0, 0),
+            size=(1, 1, 1),
+            rotation=Quaternion(x=0, y=0, z=1, w=0),
+            sample_token=0,
+            score=1,
+        ),
+    ]
+    projection = [[0.08951352, 0, 0], [0, 0.2, 0], [0, 0, -0.0020006]]
+    colors = [(1, 0, 0), (0, 1, 0), (0, 0, 1)]
+
+    with patch(
+        "datasetinsights.stats.visualization.plots.add_single_bbox3d_on_image"
+    ) as mock:
+        plot_bboxes3d(img, boxes, projection, colors)
+        assert mock.call_count == len(boxes)
+
+
 @pytest.fixture
 def test_template():
     """prepare a fake template"""