Skip to content

Commit f6885dc

Browse files
authored
Revert Tinygrad (commaai#34243)
* Revert "dmonitoringmodeld: use cl transform (commaai#34235)" This reverts commit 684b0b9. * Revert "load model before calling convert_fp16_to_fp32" This reverts commit 31606a7. * Revert "bump tinygrad" This reverts commit 44f58ff. * Revert "Tinygrad runner (commaai#34171)" This reverts commit 7b5a4fb. * Allow init buffer * typo
1 parent 4c27878 commit f6885dc

39 files changed

+1547
-368
lines changed

common/transformations/model.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22

33
from openpilot.common.transformations.orientation import rot_from_euler
4-
from openpilot.common.transformations.camera import get_view_frame_from_calib_frame, view_frame_from_device_frame, _ar_ox_fisheye
4+
from openpilot.common.transformations.camera import get_view_frame_from_calib_frame, view_frame_from_device_frame
55

66
# segnet
77
SEGNET_SIZE = (512, 384)
@@ -39,13 +39,6 @@
3939
[0.0, sbigmodel_fl, 0.5 * (256 + MEDMODEL_CY)],
4040
[0.0, 0.0, 1.0]])
4141

42-
DM_INPUT_SIZE = (1440, 960)
43-
dmonitoringmodel_fl = _ar_ox_fisheye.focal_length
44-
dmonitoringmodel_intrinsics = np.array([
45-
[dmonitoringmodel_fl, 0.0, DM_INPUT_SIZE[0]/2],
46-
[0.0, dmonitoringmodel_fl, DM_INPUT_SIZE[1]/2 - (_ar_ox_fisheye.height - DM_INPUT_SIZE[1])/2],
47-
[0.0, 0.0, 1.0]])
48-
4942
bigmodel_frame_from_calib_frame = np.dot(bigmodel_intrinsics,
5043
get_view_frame_from_calib_frame(0, 0, 0, 0))
5144

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ dependencies = [
4242

4343
# modeld
4444
"onnx >= 1.14.0",
45-
"onnxruntime >=1.16.3",
45+
"onnxruntime >=1.16.3; platform_system == 'Linux' and platform_machine == 'aarch64'",
46+
"onnxruntime-gpu >=1.16.3; platform_system == 'Linux' and platform_machine == 'x86_64'",
4647

4748
# logging
4849
"pyzmq",

release/release_files.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
"tools/joystick/",
5555
"tools/longitudinal_maneuvers/",
5656

57-
"tinygrad_repo/examples/openpilot/compile3.py",
57+
"tinygrad_repo/openpilot/compile2.py",
5858
"tinygrad_repo/extra/onnx.py",
5959
"tinygrad_repo/extra/onnx_ops.py",
6060
"tinygrad_repo/extra/thneed.py",

selfdrive/modeld/SConscript

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,20 @@ common_src = [
1313
"transforms/transform.cc",
1414
]
1515

16+
thneed_src_common = [
17+
"thneed/thneed_common.cc",
18+
"thneed/serialize.cc",
19+
]
20+
21+
thneed_src_qcom = thneed_src_common + ["thneed/thneed_qcom2.cc"]
22+
thneed_src_pc = thneed_src_common + ["thneed/thneed_pc.cc"]
23+
thneed_src = thneed_src_qcom if arch == "larch64" else thneed_src_pc
24+
25+
# SNPE except on Mac and ARM Linux
26+
snpe_lib = []
27+
if arch != "Darwin" and arch != "aarch64":
28+
common_src += ['runners/snpemodel.cc']
29+
snpe_lib += ['SNPE']
1630

1731
# OpenCL is a framework on Mac
1832
if arch == "Darwin":
@@ -31,7 +45,11 @@ snpe_rpath_pc = f"{Dir('#').abspath}/third_party/snpe/x86_64-linux-clang"
3145
snpe_rpath = lenvCython['RPATH'] + [snpe_rpath_qcom if arch == "larch64" else snpe_rpath_pc]
3246

3347
cython_libs = envCython["LIBS"] + libs
48+
snpemodel_lib = lenv.Library('snpemodel', ['runners/snpemodel.cc'])
3449
commonmodel_lib = lenv.Library('commonmodel', common_src)
50+
51+
lenvCython.Program('runners/runmodel_pyx.so', 'runners/runmodel_pyx.pyx', LIBS=cython_libs, FRAMEWORKS=frameworks)
52+
lenvCython.Program('runners/snpemodel_pyx.so', 'runners/snpemodel_pyx.pyx', LIBS=[snpemodel_lib, snpe_lib, *cython_libs], FRAMEWORKS=frameworks, RPATH=snpe_rpath)
3553
lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LIBS=[commonmodel_lib, *cython_libs], FRAMEWORKS=frameworks)
3654

3755
tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath)]
@@ -41,17 +59,20 @@ fn = File("models/supercombo").abspath
4159
cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx'
4260
lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files, cmd)
4361

44-
# Compile tinygrad model
45-
# TODO this is all super hacky
62+
# Build thneed model
63+
if arch == "larch64" or GetOption('pc_thneed'):
64+
tinygrad_opts = []
65+
if not GetOption('pc_thneed'):
66+
# use FLOAT16 on device for speed + don't cache the CL kernels for space
67+
tinygrad_opts += ["FLOAT16=1", "PYOPENCL_NO_CACHE=1"]
68+
cmd = f"cd {Dir('#').abspath}/tinygrad_repo && " + ' '.join(tinygrad_opts) + f" python3 openpilot/compile2.py {fn}.onnx {fn}.thneed"
4669

47-
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
48-
if arch == 'larch64':
49-
device_string = 'QCOM=1'
50-
else:
51-
device_string = 'CLANG=1 IMAGE=0'
70+
lenv.Command(fn + ".thneed", [fn + ".onnx"] + tinygrad_files, cmd)
5271

53-
for model_name in ['supercombo', 'dmonitoring_model']:
54-
fn = File(f"models/{model_name}").abspath
55-
cmd = f'{pythonpath_string} {device_string} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl'
56-
lenv.Command(fn + "_tinygrad.pkl", [fn + ".onnx"] + tinygrad_files, cmd)
72+
fn_dm = File("models/dmonitoring_model").abspath
73+
cmd = f"cd {Dir('#').abspath}/tinygrad_repo && " + ' '.join(tinygrad_opts) + f" python3 openpilot/compile2.py {fn_dm}.onnx {fn_dm}.thneed"
74+
lenv.Command(fn_dm + ".thneed", [fn_dm + ".onnx"] + tinygrad_files, cmd)
5775

76+
thneed_lib = env.SharedLibrary('thneed', thneed_src, LIBS=[gpucommon, common, 'OpenCL', 'dl'])
77+
thneedmodel_lib = env.Library('thneedmodel', ['runners/thneedmodel.cc'])
78+
lenvCython.Program('runners/thneedmodel_pyx.so', 'runners/thneedmodel_pyx.pyx', LIBS=envCython["LIBS"]+[thneedmodel_lib, thneed_lib, gpucommon, common, 'dl', 'OpenCL'])

selfdrive/modeld/dmonitoringmodeld

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
11
#!/usr/bin/env bash
22

33
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)"
4+
cd "$DIR/../../"
5+
6+
if [ -f "$DIR/libthneed.so" ]; then
7+
export LD_PRELOAD="$DIR/libthneed.so"
8+
fi
9+
410
exec "$DIR/dmonitoringmodeld.py" "$@"

selfdrive/modeld/dmonitoringmodeld.py

Lines changed: 26 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,8 @@
11
#!/usr/bin/env python3
22
import os
3-
from openpilot.system.hardware import TICI
4-
## TODO this is hack
5-
if TICI:
6-
from tinygrad.tensor import Tensor
7-
from tinygrad.dtype import dtypes
8-
from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
9-
os.environ['QCOM'] = '1'
10-
else:
11-
from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner
123
import gc
134
import math
145
import time
15-
import pickle
166
import ctypes
177
import numpy as np
188
from pathlib import Path
@@ -23,20 +13,21 @@
2313
from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf
2414
from openpilot.common.swaglog import cloudlog
2515
from openpilot.common.realtime import set_realtime_priority
26-
from openpilot.common.transformations.model import dmonitoringmodel_intrinsics, DM_INPUT_SIZE
27-
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
28-
from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext, MonitoringModelFrame
16+
from openpilot.selfdrive.modeld.runners import ModelRunner, Runtime
17+
from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext
2918
from openpilot.selfdrive.modeld.parse_model_outputs import sigmoid
3019

31-
MODEL_WIDTH, MODEL_HEIGHT = DM_INPUT_SIZE
3220
CALIB_LEN = 3
21+
MODEL_WIDTH = 1440
22+
MODEL_HEIGHT = 960
3323
FEATURE_LEN = 512
3424
OUTPUT_SIZE = 84 + FEATURE_LEN
3525

3626
PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld"
3727
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
38-
MODEL_PATH = Path(__file__).parent / 'models/dmonitoring_model.onnx'
39-
MODEL_PKL_PATH = Path(__file__).parent / 'models/dmonitoring_model_tinygrad.pkl'
28+
MODEL_PATHS = {
29+
ModelRunner.THNEED: Path(__file__).parent / 'models/dmonitoring_model.thneed',
30+
ModelRunner.ONNX: Path(__file__).parent / 'models/dmonitoring_model.onnx'}
4031

4132
class DriverStateResult(ctypes.Structure):
4233
_fields_ = [
@@ -67,42 +58,33 @@ class DMonitoringModelResult(ctypes.Structure):
6758
class ModelState:
6859
inputs: dict[str, np.ndarray]
6960
output: np.ndarray
61+
model: ModelRunner
7062

7163
def __init__(self, cl_ctx):
7264
assert ctypes.sizeof(DMonitoringModelResult) == OUTPUT_SIZE * ctypes.sizeof(ctypes.c_float)
65+
self.output = np.zeros(OUTPUT_SIZE, dtype=np.float32)
66+
self.inputs = {
67+
'input_img': np.zeros(MODEL_HEIGHT * MODEL_WIDTH, dtype=np.uint8),
68+
'calib': np.zeros(CALIB_LEN, dtype=np.float32)}
7369

74-
self.frame = MonitoringModelFrame(cl_ctx)
75-
self.numpy_inputs = {
76-
'calib': np.zeros((1, CALIB_LEN), dtype=np.float32),
77-
}
70+
self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.GPU, False, cl_ctx)
71+
self.model.addInput("input_img", None)
72+
self.model.addInput("calib", self.inputs['calib'])
7873

79-
if TICI:
80-
self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
81-
with open(MODEL_PKL_PATH, "rb") as f:
82-
self.model_run = pickle.load(f)
83-
else:
84-
self.onnx_cpu_runner = make_onnx_cpu_runner(MODEL_PATH)
74+
def run(self, buf:VisionBuf, calib:np.ndarray) -> tuple[np.ndarray, float]:
75+
self.inputs['calib'][:] = calib
8576

86-
def run(self, buf:VisionBuf, calib:np.ndarray, transform:np.ndarray) -> tuple[np.ndarray, float]:
87-
self.numpy_inputs['calib'][0,:] = calib
77+
v_offset = buf.height - MODEL_HEIGHT
78+
h_offset = (buf.width - MODEL_WIDTH) // 2
79+
buf_data = buf.data.reshape(-1, buf.stride)
80+
input_data = self.inputs['input_img'].reshape(MODEL_HEIGHT, MODEL_WIDTH)
81+
input_data[:] = buf_data[v_offset:v_offset+MODEL_HEIGHT, h_offset:h_offset+MODEL_WIDTH]
8882

83+
self.model.setInputBuffer("input_img", self.inputs['input_img'].view(np.float32))
8984
t1 = time.perf_counter()
90-
91-
input_img_cl = self.frame.prepare(buf, transform.flatten())
92-
if TICI:
93-
# The imgs tensors are backed by opencl memory, only need init once
94-
if 'input_img' not in self.tensor_inputs:
95-
self.tensor_inputs['input_img'] = qcom_tensor_from_opencl_address(input_img_cl.mem_address, (1, MODEL_WIDTH*MODEL_HEIGHT), dtype=dtypes.uint8)
96-
else:
97-
self.numpy_inputs['input_img'] = self.frame.buffer_from_cl(input_img_cl).reshape((1, MODEL_WIDTH*MODEL_HEIGHT))
98-
99-
if TICI:
100-
output = self.model_run(**self.tensor_inputs).numpy().flatten()
101-
else:
102-
output = self.onnx_cpu_runner.run(None, self.numpy_inputs)[0].flatten()
103-
85+
self.model.execute()
10486
t2 = time.perf_counter()
105-
return output, t2 - t1
87+
return self.output, t2 - t1
10688

10789

10890
def fill_driver_state(msg, ds_result: DriverStateResult):
@@ -155,23 +137,18 @@ def main():
155137
pm = PubMaster(["driverStateV2"])
156138

157139
calib = np.zeros(CALIB_LEN, dtype=np.float32)
158-
model_transform = None
159140

160141
while True:
161142
buf = vipc_client.recv()
162143
if buf is None:
163144
continue
164145

165-
if model_transform is None:
166-
cam = _os_fisheye if buf.width == _os_fisheye.width else _ar_ox_fisheye
167-
model_transform = np.linalg.inv(np.dot(dmonitoringmodel_intrinsics, np.linalg.inv(cam.intrinsics))).astype(np.float32)
168-
169146
sm.update(0)
170147
if sm.updated["liveCalibration"]:
171148
calib[:] = np.array(sm["liveCalibration"].rpyCalib)
172149

173150
t1 = time.perf_counter()
174-
model_output, gpu_execution_time = model.run(buf, calib, model_transform)
151+
model_output, gpu_execution_time = model.run(buf, calib)
175152
t2 = time.perf_counter()
176153

177154
pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, gpu_execution_time))

0 commit comments

Comments
 (0)