Skip to content
This repository was archived by the owner on Dec 19, 2024. It is now read-only.

Commit 3c1b9e6

Browse files
Orthographic camera (#172)
* Add orthographic projection * Update plotting functions to handle orthographic option * Added testing
1 parent 37310fa commit 3c1b9e6

File tree

5 files changed

+139
-18
lines changed

5 files changed

+139
-18
lines changed

datasetinsights/datasets/synthetic.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,15 @@ def read_bounding_box_3d(annotation, label_mappings=None):
2727

2828
for b in annotation:
2929
label_id = b["label_id"]
30-
translation = b["translation"]
31-
translation = [translation["x"], translation["y"], translation["z"]]
32-
size = b["size"]
33-
size = [size["x"], size["y"], size["z"]]
30+
translation = (
31+
b["translation"]["x"],
32+
b["translation"]["y"],
33+
b["translation"]["z"],
34+
)
35+
size = (b["size"]["x"], b["size"]["y"], b["size"]["z"])
3436
rotation = b["rotation"]
3537
rotation = Quaternion(
36-
b=rotation["x"], c=rotation["y"], d=rotation["z"], a=rotation["w"]
38+
x=rotation["x"], y=rotation["y"], z=rotation["z"], w=rotation["w"]
3739
)
3840

3941
if label_mappings and label_id not in label_mappings:

datasetinsights/stats/visualization/bbox3d_plot.py

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def _add_single_bbox3d_on_image(
8080

8181

8282
def add_single_bbox3d_on_image(
83-
image, box, proj, color=None, box_line_width=2,
83+
image, box, proj, color=None, box_line_width=2, orthographic=False,
8484
):
8585
"""" Add single 3D bounding box on a given image.
8686
@@ -91,6 +91,7 @@ def add_single_bbox3d_on_image(
9191
color(tuple): RGBA color of the bounding box. Defaults to None. If
9292
color = None the the tuple of [0, 255, 0, 255] (Green) will be used.
9393
box_line_width (int): line width of the bounding boxes. Defaults to 2.
94+
orthographic (bool): true if proj is orthographic, else perspective
9495
"""
9596
img_height, img_width, _ = image.shape
9697

@@ -104,14 +105,20 @@ def add_single_bbox3d_on_image(
104105
bur = box.front_right_top_pt
105106
blr = box.front_right_bottom_pt
106107

107-
fll_raster = _project_pt_to_pixel_location(fll, proj, img_height, img_width)
108-
ful_raster = _project_pt_to_pixel_location(ful, proj, img_height, img_width)
109-
fur_raster = _project_pt_to_pixel_location(fur, proj, img_height, img_width)
110-
flr_raster = _project_pt_to_pixel_location(flr, proj, img_height, img_width)
111-
bll_raster = _project_pt_to_pixel_location(bll, proj, img_height, img_width)
112-
bul_raster = _project_pt_to_pixel_location(bul, proj, img_height, img_width)
113-
bur_raster = _project_pt_to_pixel_location(bur, proj, img_height, img_width)
114-
blr_raster = _project_pt_to_pixel_location(blr, proj, img_height, img_width)
108+
project_pt_to_pixel = (
109+
_project_pt_to_pixel_location_orthographic
110+
if orthographic
111+
else _project_pt_to_pixel_location
112+
)
113+
114+
fll_raster = project_pt_to_pixel(fll, proj, img_height, img_width)
115+
ful_raster = project_pt_to_pixel(ful, proj, img_height, img_width)
116+
fur_raster = project_pt_to_pixel(fur, proj, img_height, img_width)
117+
flr_raster = project_pt_to_pixel(flr, proj, img_height, img_width)
118+
bll_raster = project_pt_to_pixel(bll, proj, img_height, img_width)
119+
bul_raster = project_pt_to_pixel(bul, proj, img_height, img_width)
120+
bur_raster = project_pt_to_pixel(bur, proj, img_height, img_width)
121+
blr_raster = project_pt_to_pixel(blr, proj, img_height, img_width)
115122

116123
_add_single_bbox3d_on_image(
117124
image,
@@ -129,7 +136,7 @@ def add_single_bbox3d_on_image(
129136

130137

131138
def _project_pt_to_pixel_location(pt, projection, img_height, img_width):
132-
""" Projects a 3D coordinate into a pixel location.
139+
""" Projects a 3D coordinate into a pixel location from a perspective camera.
133140
134141
Applies the passed in projection matrix to project a point from the camera's
135142
coordinate space into pixel space.
@@ -161,3 +168,43 @@ def _project_pt_to_pixel_location(pt, projection, img_height, img_width):
161168
int((_pt[1] * img_height) / 2.0 + (img_height * 0.5)),
162169
]
163170
)
171+
172+
173+
def _project_pt_to_pixel_location_orthographic(
174+
pt, projection, img_height, img_width
175+
):
176+
""" Projects a 3D coordinate into a pixel location from an orthographic camera.
177+
178+
Applies the passed in projection matrix to project a point from the
179+
camera's coordinate space into pixel space.
180+
181+
For a description of the math used in this method, see:
182+
https://www.scratchapixel.com/lessons/3d-basic-rendering/perspective-and-
183+
orthographic-projection-matrix/projection-matrix-introduction
184+
185+
Args:
186+
pt (numpy array): The 3D point to project.
187+
projection (numpy 2D array): The camera's 3x3 projection matrix.
188+
img_height (int): The height of the image in pixels.
189+
img_width (int): The width of the image in pixels.
190+
191+
Returns:
192+
numpy array: a one-dimensional array with two values (x and y)
193+
representing a point's pixel coordinate in an image.
194+
"""
195+
196+
# The 'y' component needs to be flipped because of how Unity works
197+
projection = numpy.array(
198+
[
199+
[projection[0][0], 0, 0],
200+
[0, -projection[1][1], 0],
201+
[0, 0, projection[2][2]],
202+
]
203+
)
204+
temp = projection.dot(pt)
205+
206+
pixel = [
207+
int((temp[0] + 1) * 0.5 * img_width),
208+
int((temp[1] + 1) * 0.5 * img_height),
209+
]
210+
return pixel

datasetinsights/stats/visualization/plots.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def _process_label(bbox, label_mappings=None):
7878
return label
7979

8080

81-
def plot_bboxes3d(image, bboxes, projection, colors=None):
81+
def plot_bboxes3d(image, bboxes, projection, colors=None, orthographic=False):
8282
""" Plot an image with 3D bounding boxes
8383
8484
Currently this method should only be used for ground truth images, and
@@ -92,6 +92,7 @@ def plot_bboxes3d(image, bboxes, projection, colors=None):
9292
captured the ground truth.
9393
colors (list): a color list for boxes. Defaults to none. If
9494
colors = None, it will default to coloring all boxes green.
95+
orthographic (bool): true if proj is orthographic, else perspective
9596
9697
Returns:
9798
PIL image: a PIL image with bounding boxes drawn on it.
@@ -101,7 +102,9 @@ def plot_bboxes3d(image, bboxes, projection, colors=None):
101102

102103
for i, box in enumerate(bboxes):
103104
color = colors[i] if colors else None
104-
add_single_bbox3d_on_image(np_image, box, projection, color)
105+
add_single_bbox3d_on_image(
106+
np_image, box, projection, color, orthographic=orthographic
107+
)
105108

106109
return Image.fromarray(np_image)
107110

tests/test_bbox.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from datasetinsights.io.bbox import BBox2D, BBox3D, group_bbox2d_per_label
44
from datasetinsights.stats.visualization.bbox3d_plot import (
55
_project_pt_to_pixel_location,
6+
_project_pt_to_pixel_location_orthographic,
67
)
78

89

@@ -59,3 +60,28 @@ def test_project_pt_to_pixel_location():
5960
pixel_loc = _project_pt_to_pixel_location(pt, proj, img_height, img_width)
6061
assert pixel_loc[0] == 320
6162
assert pixel_loc[1] == 240
63+
64+
65+
def test_project_pt_to_pixel_location_orthographic():
66+
pt = [0, 0, 0]
67+
proj = numpy.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
68+
img_height = 480
69+
img_width = 640
70+
71+
pixel_loc = _project_pt_to_pixel_location_orthographic(
72+
pt, proj, img_height, img_width
73+
)
74+
assert pixel_loc[0] == 320
75+
assert pixel_loc[1] == 240
76+
77+
# more interesting case
78+
pt = [0.3, 0, 0]
79+
proj = numpy.array([[0.08951352, 0, 0], [0, 0.2, 0], [0, 0, -0.0020006]])
80+
81+
pixel_loc = _project_pt_to_pixel_location_orthographic(
82+
pt, proj, img_height, img_width
83+
)
84+
assert pixel_loc[0] == int(
85+
(proj[0][0] * pt[0] + 1) * 0.5 * img_width
86+
) # 328
87+
assert pixel_loc[1] == img_height // 2

tests/test_visual.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
import pandas as pd
66
import pytest
77
from PIL import Image
8+
from pyquaternion import Quaternion
89
from pytest import approx
910

10-
from datasetinsights.io.bbox import BBox2D
11+
from datasetinsights.io.bbox import BBox2D, BBox3D
1112
from datasetinsights.stats.visualization.bbox2d_plot import (
1213
_COLOR_NAME_TO_RGB,
1314
_add_label_on_image,
@@ -21,6 +22,7 @@
2122
model_performance_box_plot,
2223
model_performance_comparison_box_plot,
2324
plot_bboxes,
25+
plot_bboxes3d,
2426
plot_keypoints,
2527
)
2628

@@ -265,6 +267,47 @@ def test_add_single_bbox_on_image(mock, get_image_and_bbox):
265267
)
266268

267269

270+
def test_plot_bboxes3d():
271+
cur_dir = pathlib.Path(__file__).parent.absolute()
272+
img = Image.open(
273+
str(cur_dir / "mock_data" / "simrun" / "captures" / "camera_000.png")
274+
)
275+
boxes = [
276+
BBox3D(
277+
label=1,
278+
translation=(0, 0, 0),
279+
size=(1, 1, 1),
280+
rotation=Quaternion(x=1, y=0, z=0, w=0),
281+
sample_token=0,
282+
score=1,
283+
),
284+
BBox3D(
285+
label=2,
286+
translation=(0, 0, 0),
287+
size=(1, 1, 1),
288+
rotation=Quaternion(x=0, y=1, z=0, w=0),
289+
sample_token=0,
290+
score=1,
291+
),
292+
BBox3D(
293+
label=3,
294+
translation=(0, 0, 0),
295+
size=(1, 1, 1),
296+
rotation=Quaternion(x=0, y=0, z=1, w=0),
297+
sample_token=0,
298+
score=1,
299+
),
300+
]
301+
projection = [[0.08951352, 0, 0], [0, 0.2, 0], [0, 0, -0.0020006]]
302+
colors = [(1, 0, 0), (0, 1, 0), (0, 0, 1)]
303+
304+
with patch(
305+
"datasetinsights.stats.visualization.plots.add_single_bbox3d_on_image"
306+
) as mock:
307+
plot_bboxes3d(img, boxes, projection, colors)
308+
assert mock.call_count == len(boxes)
309+
310+
268311
@pytest.fixture
269312
def test_template():
270313
"""prepare a fake template"""

0 commit comments

Comments
 (0)