forked from pbridger/deepstream-video-pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
export_tsc.py
63 lines (49 loc) · 2.36 KB
/
export_tsc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import sys
import subprocess
import importlib
import argparse
import torch
def parse_args():
a = argparse.ArgumentParser()
a.add_argument('--batch-dim', type=int, default=8)
a.add_argument('--height-dim', type=int, default=300)
a.add_argument('--width-dim', type=int, default=300)
a.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu')
a.add_argument('--ssd-module-name', type=str)
a.add_argument('--trt-module-name', type=str)
a.add_argument('--tsc-module-name', type=str)
# a.add_argument('--output-names', default=['image_nchw_out'])
return a.parse_args()
if __name__ =='__main__':
args = parse_args()
ds_ssd300 = importlib.import_module(args.ssd_module_name)
ds_trt = importlib.import_module(args.trt_module_name)
ds_tsc = importlib.import_module(args.tsc_module_name)
device = torch.device('cuda:0')
threshold = 0.4
model_precision = 'fp16'
image_nchw = (torch.randn((args.batch_dim, 3, args.height_dim, args.width_dim)) * 255).to(device, torch.float16)
tensorrt_model = ds_trt.TensorRTPart(ds_ssd300.SSD300(threshold, model_precision, args.batch_dim)).to(device)
# sanity test
intermediate_result = tensorrt_model(image_nchw)
if tensorrt_model.creates_dummy_dim:
intermediate_result = tuple(r.squeeze(0) for r in intermediate_result)
tsc_batch_dim = args.batch_dim
else:
intermediate_result = tuple(r[0] for r in intermediate_result)
tsc_batch_dim = 1
torchscript_model = ds_tsc.TorchScriptPart(ds_ssd300.SSD300(threshold, model_precision, tsc_batch_dim)).to(device)
print(torchscript_model(*intermediate_result))
with torch.jit.optimized_execution(should_optimize=True):
for gpu_id in range(2):
traced_module = torch.jit.trace(
torchscript_model.to(torch.device('cuda', gpu_id)),
tuple(r.to(torch.device('cuda', gpu_id)) for r in intermediate_result),
)
traced_module.save(f'checkpoints/{args.tsc_module_name}.tsc.pth.{gpu_id}')
# print('sanity output:')
# print(traced_module(intermediate_result))
# print('device independence:')
# traced_module = traced_module.to(torch.device('cuda:1'))
# intermediate_result = intermediate_result.to(torch.device('cuda:1'))
# print(traced_module(intermediate_result))