|
1 | 1 | #!/usr/bin/env python3 |
2 | 2 | import os |
3 | | -from openpilot.system.hardware import TICI |
4 | | -## TODO this is hack |
5 | | -if TICI: |
6 | | - from tinygrad.tensor import Tensor |
7 | | - from tinygrad.dtype import dtypes |
8 | | - from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address |
9 | | - os.environ['QCOM'] = '1' |
10 | | -else: |
11 | | - from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner |
12 | 3 | import gc |
13 | 4 | import math |
14 | 5 | import time |
15 | | -import pickle |
16 | 6 | import ctypes |
17 | 7 | import numpy as np |
18 | 8 | from pathlib import Path |
|
23 | 13 | from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf |
24 | 14 | from openpilot.common.swaglog import cloudlog |
25 | 15 | from openpilot.common.realtime import set_realtime_priority |
26 | | -from openpilot.common.transformations.model import dmonitoringmodel_intrinsics, DM_INPUT_SIZE |
27 | | -from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye |
28 | | -from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext, MonitoringModelFrame |
| 16 | +from openpilot.selfdrive.modeld.runners import ModelRunner, Runtime |
| 17 | +from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext |
29 | 18 | from openpilot.selfdrive.modeld.parse_model_outputs import sigmoid |
30 | 19 |
|
31 | | -MODEL_WIDTH, MODEL_HEIGHT = DM_INPUT_SIZE |
32 | 20 | CALIB_LEN = 3 |
| 21 | +MODEL_WIDTH = 1440 |
| 22 | +MODEL_HEIGHT = 960 |
33 | 23 | FEATURE_LEN = 512 |
34 | 24 | OUTPUT_SIZE = 84 + FEATURE_LEN |
35 | 25 |
|
36 | 26 | PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld" |
37 | 27 | SEND_RAW_PRED = os.getenv('SEND_RAW_PRED') |
38 | | -MODEL_PATH = Path(__file__).parent / 'models/dmonitoring_model.onnx' |
39 | | -MODEL_PKL_PATH = Path(__file__).parent / 'models/dmonitoring_model_tinygrad.pkl' |
| 28 | +MODEL_PATHS = { |
| 29 | + ModelRunner.THNEED: Path(__file__).parent / 'models/dmonitoring_model.thneed', |
| 30 | + ModelRunner.ONNX: Path(__file__).parent / 'models/dmonitoring_model.onnx'} |
40 | 31 |
|
41 | 32 | class DriverStateResult(ctypes.Structure): |
42 | 33 | _fields_ = [ |
@@ -67,42 +58,33 @@ class DMonitoringModelResult(ctypes.Structure): |
67 | 58 | class ModelState: |
68 | 59 | inputs: dict[str, np.ndarray] |
69 | 60 | output: np.ndarray |
| 61 | + model: ModelRunner |
70 | 62 |
|
71 | 63 | def __init__(self, cl_ctx): |
72 | 64 | assert ctypes.sizeof(DMonitoringModelResult) == OUTPUT_SIZE * ctypes.sizeof(ctypes.c_float) |
| 65 | + self.output = np.zeros(OUTPUT_SIZE, dtype=np.float32) |
| 66 | + self.inputs = { |
| 67 | + 'input_img': np.zeros(MODEL_HEIGHT * MODEL_WIDTH, dtype=np.uint8), |
| 68 | + 'calib': np.zeros(CALIB_LEN, dtype=np.float32)} |
73 | 69 |
|
74 | | - self.frame = MonitoringModelFrame(cl_ctx) |
75 | | - self.numpy_inputs = { |
76 | | - 'calib': np.zeros((1, CALIB_LEN), dtype=np.float32), |
77 | | - } |
| 70 | + self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.GPU, False, cl_ctx) |
| 71 | + self.model.addInput("input_img", None) |
| 72 | + self.model.addInput("calib", self.inputs['calib']) |
78 | 73 |
|
79 | | - if TICI: |
80 | | - self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()} |
81 | | - with open(MODEL_PKL_PATH, "rb") as f: |
82 | | - self.model_run = pickle.load(f) |
83 | | - else: |
84 | | - self.onnx_cpu_runner = make_onnx_cpu_runner(MODEL_PATH) |
| 74 | + def run(self, buf:VisionBuf, calib:np.ndarray) -> tuple[np.ndarray, float]: |
| 75 | + self.inputs['calib'][:] = calib |
85 | 76 |
|
86 | | - def run(self, buf:VisionBuf, calib:np.ndarray, transform:np.ndarray) -> tuple[np.ndarray, float]: |
87 | | - self.numpy_inputs['calib'][0,:] = calib |
| 77 | + v_offset = buf.height - MODEL_HEIGHT |
| 78 | + h_offset = (buf.width - MODEL_WIDTH) // 2 |
| 79 | + buf_data = buf.data.reshape(-1, buf.stride) |
| 80 | + input_data = self.inputs['input_img'].reshape(MODEL_HEIGHT, MODEL_WIDTH) |
| 81 | + input_data[:] = buf_data[v_offset:v_offset+MODEL_HEIGHT, h_offset:h_offset+MODEL_WIDTH] |
88 | 82 |
|
| 83 | + self.model.setInputBuffer("input_img", self.inputs['input_img'].view(np.float32)) |
89 | 84 | t1 = time.perf_counter() |
90 | | - |
91 | | - input_img_cl = self.frame.prepare(buf, transform.flatten()) |
92 | | - if TICI: |
93 | | - # The imgs tensors are backed by opencl memory, only need init once |
94 | | - if 'input_img' not in self.tensor_inputs: |
95 | | - self.tensor_inputs['input_img'] = qcom_tensor_from_opencl_address(input_img_cl.mem_address, (1, MODEL_WIDTH*MODEL_HEIGHT), dtype=dtypes.uint8) |
96 | | - else: |
97 | | - self.numpy_inputs['input_img'] = self.frame.buffer_from_cl(input_img_cl).reshape((1, MODEL_WIDTH*MODEL_HEIGHT)) |
98 | | - |
99 | | - if TICI: |
100 | | - output = self.model_run(**self.tensor_inputs).numpy().flatten() |
101 | | - else: |
102 | | - output = self.onnx_cpu_runner.run(None, self.numpy_inputs)[0].flatten() |
103 | | - |
| 85 | + self.model.execute() |
104 | 86 | t2 = time.perf_counter() |
105 | | - return output, t2 - t1 |
| 87 | + return self.output, t2 - t1 |
106 | 88 |
|
107 | 89 |
|
108 | 90 | def fill_driver_state(msg, ds_result: DriverStateResult): |
@@ -155,23 +137,18 @@ def main(): |
155 | 137 | pm = PubMaster(["driverStateV2"]) |
156 | 138 |
|
157 | 139 | calib = np.zeros(CALIB_LEN, dtype=np.float32) |
158 | | - model_transform = None |
159 | 140 |
|
160 | 141 | while True: |
161 | 142 | buf = vipc_client.recv() |
162 | 143 | if buf is None: |
163 | 144 | continue |
164 | 145 |
|
165 | | - if model_transform is None: |
166 | | - cam = _os_fisheye if buf.width == _os_fisheye.width else _ar_ox_fisheye |
167 | | - model_transform = np.linalg.inv(np.dot(dmonitoringmodel_intrinsics, np.linalg.inv(cam.intrinsics))).astype(np.float32) |
168 | | - |
169 | 146 | sm.update(0) |
170 | 147 | if sm.updated["liveCalibration"]: |
171 | 148 | calib[:] = np.array(sm["liveCalibration"].rpyCalib) |
172 | 149 |
|
173 | 150 | t1 = time.perf_counter() |
174 | | - model_output, gpu_execution_time = model.run(buf, calib, model_transform) |
| 151 | + model_output, gpu_execution_time = model.run(buf, calib) |
175 | 152 | t2 = time.perf_counter() |
176 | 153 |
|
177 | 154 | pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, gpu_execution_time)) |
|
0 commit comments