Added tips for conversion to TensorRT format

Daniil-Osokin · Daniil-Osokin · commit 6d10db0a9d02 · 2020-10-27T21:12:59.000+03:00
diff --git a/README.md b/README.md
@@ -85,11 +85,14 @@ To run with TensorRT, it is necessary to install it properly. Please, follow the
     ```
 4. [Install](https://github.com/NVIDIA-AI-IOT/torch2trt) `torch2trt`.
 
-
 Convert checkpoint to TensorRT format:
 ```
 python scripts/convert_to_trt.py --checkpoint-path human-pose-estimation-3d.pth
 ```
+> TensorRT does not support dynamic network input size reshape.
+  Make sure you have set proper network input height, width with `--height` and `--width` options during conversion (if not, there will be no detections).
+  Default values work for a usual video with 16:9 aspect ratio (1280x720, 1920x1080).
+  You can check the network input size with `print(scaled_img.shape)` in the demo.py
 
 To run the demo with TensorRT inference, pass `--use-tensorrt` option:
 ```
diff --git a/models/with_mobilenet.py b/models/with_mobilenet.py
@@ -192,4 +192,3 @@ def forward(self, x):
         out = self.Pose3D(backbone_features, torch.cat([stages_output[-2], stages_output[-1]], dim=1))
 
         return out, keypoints2d_maps, paf_maps
-
diff --git a/scripts/convert_to_trt.py b/scripts/convert_to_trt.py
@@ -7,22 +7,28 @@
 from modules.load_state import load_state
 
 
-def convert_to_trt(net, output_name):
+def convert_to_trt(net, output_name, height, width):
     net.eval()
-    input = torch.randn(1, 3, 256, 448).cuda()
+    input = torch.randn(1, 3, height, width).cuda()
     net_trt = torch2trt(net, [input])
     torch.save(net_trt.state_dict(), output_name)
 
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint')
+    parser.add_argument('--height', type=int, default=256, help='network input height')
+    parser.add_argument('--width', type=int, default=448, help='network input width')
     parser.add_argument('--output-name', type=str, default='human-pose-estimation-3d-trt.pth',
                         help='name of output model in TensorRT format')
     args = parser.parse_args()
+    print('TensorRT does not support dynamic network input size reshape.\n'
+          'Make sure you have set proper network input height, width. If not, there will be no detections.\n'
+          'Default values work for a usual video with 16:9 aspect ratio (1280x720, 1920x1080).\n'
+          'You can check the network input size with \'print(scaled_img.shape)\' in demo.py')
 
     net = PoseEstimationWithMobileNet().cuda()
     checkpoint = torch.load(args.checkpoint_path)
     load_state(net, checkpoint)
 
-    convert_to_trt(net, args.output_name)
+    convert_to_trt(net, args.output_name, args.height, args.width)

Original file line number	Diff line number	Diff line change
`@@ -192,4 +192,3 @@ def forward(self, x):`
`192`	`192`	`out = self.Pose3D(backbone_features, torch.cat([stages_output[-2], stages_output[-1]], dim=1))`
`193`	`193`
`194`	`194`	`return out, keypoints2d_maps, paf_maps`
`195`		`-`