Skip to content

Commit e838803

Browse files
Fixed 4k images with many people not rendering skeletons
1 parent 98e53de commit e838803

File tree

13 files changed

+91
-92
lines changed

13 files changed

+91
-92
lines changed

doc/release_notes.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,10 @@ OpenPose Library - Release Notes
407407
4. Natural sort now works properly with filenames containining numbers longer than the limit of an int.
408408
5. Optionally auto-generated bin folder only contains the required DLLs (depending on the CMake configuration), instead of all of them.
409409
6. When WrapperStructFace and WrapperStructHand are not called and configured for Wrapper, setting body to CPU rendering was not working.
410-
7. Skelton rendering: All or some skeletons were not properly displayed or completely missing on images with many people.
410+
7. Skeleton rendering bugs:
411+
1. All or some skeletons were not properly displayed or completely missing on images with many people (e.g., videos with about 32 people).
412+
2. All or some skeletons were not properly displayed or completely missing on images where the multiplication of people and image resolution was too big (e.g., videos with about 32 people on 4k resolution).
413+
3. Flag `output_resolution` was not working with GPU resize, redirected to CPU in those cases.
411414
4. Changes/additions that affect the compatibility with the OpenPose Unity Plugin:
412415

413416

include/openpose/face/renderFace.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ namespace op
1010
Array<float>& frameArray, const Array<float>& faceKeypoints, const float renderThreshold);
1111

1212
void renderFaceKeypointsGpu(
13-
float* framePtr, float* maxPtr, float* minPtr, float* scalePtr, const Point<int>& frameSize,
13+
float* framePtr, float* maxPtr, float* minPtr, float* scalePtr, const Point<unsigned int>& frameSize,
1414
const float* const facePtr, const int numberPeople, const float renderThreshold,
1515
const float alphaColorToAdd = FACE_DEFAULT_ALPHA_KEYPOINT);
1616
}

include/openpose/gpu/cuda.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace op
1919
}
2020

2121
OP_API void getNumberCudaThreadsAndBlocks(
22-
dim3& numberCudaThreads, dim3& numberCudaBlocks, const Point<int>& frameSize);
22+
dim3& numberCudaThreads, dim3& numberCudaBlocks, const Point<unsigned int>& frameSize);
2323

2424
template <typename T>
2525
void reorderAndNormalize(

include/openpose/hand/renderHand.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ namespace op
1010
Array<float>& frameArray, const std::array<Array<float>, 2>& handKeypoints, const float renderThreshold);
1111

1212
void renderHandKeypointsGpu(
13-
float* framePtr, float* maxPtr, float* minPtr, float* scalePtr, const Point<int>& frameSize,
13+
float* framePtr, float* maxPtr, float* minPtr, float* scalePtr, const Point<unsigned int>& frameSize,
1414
const float* const handsPtr, const int numberHands, const float renderThreshold,
1515
const float alphaColorToAdd = HAND_DEFAULT_ALPHA_KEYPOINT);
1616
}

include/openpose/pose/renderPose.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,32 +13,32 @@ namespace op
1313

1414
void renderPoseKeypointsGpu(
1515
float* framePtr, float* maxPtr, float* minPtr, float* scalePtr, const PoseModel poseModel,
16-
const int numberPeople, const Point<int>& frameSize, const float* const posePtr,
16+
const int numberPeople, const Point<unsigned int>& frameSize, const float* const posePtr,
1717
const float renderThreshold, const bool googlyEyes = false, const bool blendOriginalFrame = true,
1818
const float alphaBlending = POSE_DEFAULT_ALPHA_KEYPOINT);
1919

2020
void renderPoseHeatMapGpu(
21-
float* frame, const Point<int>& frameSize, const float* const heatMapPtr, const Point<int>& heatMapSize,
21+
float* frame, const Point<unsigned int>& frameSize, const float* const heatMapPtr, const Point<int>& heatMapSize,
2222
const float scaleToKeepRatio, const unsigned int part,
2323
const float alphaBlending = POSE_DEFAULT_ALPHA_HEAT_MAP);
2424

2525
void renderPoseHeatMapsGpu(
26-
float* frame, const PoseModel poseModel, const Point<int>& frameSize, const float* const heatMapPtr,
26+
float* frame, const PoseModel poseModel, const Point<unsigned int>& frameSize, const float* const heatMapPtr,
2727
const Point<int>& heatMapSize, const float scaleToKeepRatio,
2828
const float alphaBlending = POSE_DEFAULT_ALPHA_HEAT_MAP);
2929

3030
void renderPosePAFGpu(
31-
float* framePtr, const PoseModel poseModel, const Point<int>& frameSize, const float* const heatMapPtr,
31+
float* framePtr, const PoseModel poseModel, const Point<unsigned int>& frameSize, const float* const heatMapPtr,
3232
const Point<int>& heatMapSize, const float scaleToKeepRatio, const int part,
3333
const float alphaBlending = POSE_DEFAULT_ALPHA_HEAT_MAP);
3434

3535
void renderPosePAFsGpu(
36-
float* framePtr, const PoseModel poseModel, const Point<int>& frameSize, const float* const heatMapPtr,
36+
float* framePtr, const PoseModel poseModel, const Point<unsigned int>& frameSize, const float* const heatMapPtr,
3737
const Point<int>& heatMapSize, const float scaleToKeepRatio,
3838
const float alphaBlending = POSE_DEFAULT_ALPHA_HEAT_MAP);
3939

4040
void renderPoseDistanceGpu(
41-
float* framePtr, const Point<int>& frameSize, const float* const heatMapPtr, const Point<int>& heatMapSize,
41+
float* framePtr, const Point<unsigned int>& frameSize, const float* const heatMapPtr, const Point<int>& heatMapSize,
4242
const float scaleToKeepRatio, const unsigned int part, const float alphaBlending = POSE_DEFAULT_ALPHA_HEAT_MAP);
4343
}
4444

include/openpose_private/utilities/render.hu

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
namespace op
55
{
66
__inline__ __device__ void getBoundingBoxPerPerson(
7-
float* maxPtr, float* minPtr, float* scalePtr,const int targetWidth, const int targetHeight,
7+
float* maxPtr, float* minPtr, float* scalePtr,const unsigned int targetWidth, const unsigned int targetHeight,
88
const float* const keypointsPtr, const int numberPeople, const int numberParts, const float threshold)
99
{
10-
const auto globalIdx = threadIdx.x;
10+
const unsigned long globalIdx = threadIdx.x;
1111

1212
// Fill shared parameters
1313
if (globalIdx < numberPeople)
@@ -18,10 +18,10 @@ namespace op
1818
float maxValueY = 0.f;
1919
for (auto part = 0 ; part < numberParts ; part++)
2020
{
21-
const auto index = 3 * (globalIdx*numberParts + part);
22-
const auto x = keypointsPtr[index];
23-
const auto y = keypointsPtr[index+1];
24-
const auto score = keypointsPtr[index+2];
21+
const unsigned long index = 3u * (globalIdx*numberParts + part);
22+
const float x = keypointsPtr[index];
23+
const float y = keypointsPtr[index+1];
24+
const float score = keypointsPtr[index+2];
2525
if (score > threshold)
2626
{
2727
if (x < minValueX)
@@ -62,7 +62,7 @@ namespace op
6262
__inline__ __device__ void renderKeypoints(
6363
float* targetPtr, float* sharedMaxs, float* sharedMins, float* sharedScaleF, const float* const maxPtr,
6464
const float* const minPtr, const float* const scalePtr, const int globalIdx, const int x, const int y,
65-
const int targetWidth, const int targetHeight, const float* const keypointsPtr,
65+
const unsigned int targetWidth, const unsigned int targetHeight, const float* const keypointsPtr,
6666
const unsigned int* const partPairsPtr, const int numberPeople, const int numberParts,
6767
const int numberPartPairs, const float* const rgbColorsPtr, const int numberColors, const float radius,
6868
const float lineWidth, const float* const keypointScalePtr, const int numberScales, const float threshold,
@@ -82,7 +82,7 @@ namespace op
8282
// Fill each (x,y) target pixel
8383
if (x < targetWidth && y < targetHeight)
8484
{
85-
const auto baseIndex = 3*(y * targetWidth + x);
85+
const unsigned long baseIndex = 3u*(y * (unsigned long)targetWidth + x);
8686
float b = targetPtr[baseIndex];
8787
float g = targetPtr[baseIndex+1];
8888
float r = targetPtr[baseIndex+2];
@@ -208,7 +208,7 @@ namespace op
208208

209209
__inline__ __device__ void renderKeypointsOld(
210210
float* targetPtr, float2* sharedMaxs, float2* sharedMins, float* sharedScaleF, const int globalIdx,
211-
const int x, const int y, const int targetWidth, const int targetHeight, const float* const keypointsPtr,
211+
const int x, const int y, const unsigned int targetWidth, const unsigned int targetHeight, const float* const keypointsPtr,
212212
const unsigned int* const partPairsPtr, const int numberPeople, const int numberParts,
213213
const int numberPartPairs, const float* const rgbColorsPtr, const int numberColors, const float radius,
214214
const float lineWidth, const float* const keypointScalePtr, const int numberScales, const float threshold,
@@ -224,10 +224,10 @@ namespace op
224224
float maxValueY = 0.f;
225225
for (auto part = 0 ; part < numberParts ; part++)
226226
{
227-
const auto index = 3 * (globalIdx*numberParts + part);
228-
const auto x = keypointsPtr[index];
229-
const auto y = keypointsPtr[index+1];
230-
const auto score = keypointsPtr[index+2];
227+
const unsigned long index = 3u * (((unsigned long)globalIdx)*numberParts + part);
228+
const float x = keypointsPtr[index];
229+
const float y = keypointsPtr[index+1];
230+
const float score = keypointsPtr[index+2];
231231
if (score > threshold)
232232
{
233233
if (x < minValueX)
@@ -263,7 +263,7 @@ namespace op
263263
// Fill each (x,y) target pixel
264264
if (x < targetWidth && y < targetHeight)
265265
{
266-
const auto baseIndex = 3*(y * targetWidth + x);
266+
const unsigned long baseIndex = 3u*(y * (unsigned long)targetWidth + x);
267267
float b = targetPtr[baseIndex];
268268
float g = targetPtr[baseIndex+1];
269269
float r = targetPtr[baseIndex+2];

src/openpose/face/faceGpuRenderer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ namespace op
8383
// I prefer std::round(T&) over positiveIntRound(T) for std::atomic
8484
const auto elementRendered = spElementToRender->load();
8585
const auto numberPeople = faceKeypoints.getSize(0);
86-
const Point<int> frameSize{outputData.getSize(1), outputData.getSize(0)};
86+
const Point<unsigned int> frameSize{(unsigned int)outputData.getSize(1), (unsigned int)outputData.getSize(0)};
8787
if (numberPeople > 0 && elementRendered == 0)
8888
{
8989
// Draw faceKeypoints

src/openpose/face/renderFace.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ namespace op
4646
}
4747

4848
void renderFaceKeypointsGpu(
49-
float* framePtr, float* maxPtr, float* minPtr, float* scalePtr, const Point<int>& frameSize,
49+
float* framePtr, float* maxPtr, float* minPtr, float* scalePtr, const Point<unsigned int>& frameSize,
5050
const float* const facePtr, const int numberPeople, const float renderThreshold, const float alphaColorToAdd)
5151
{
5252
try

src/openpose/gpu/cuda.cpp

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,11 @@
88
namespace op
99
{
1010
#ifdef USE_CUDA
11-
#ifdef DNDEBUG
12-
#define base 32
13-
#else
14-
#define base 64
15-
#endif
16-
const dim3 THREADS_PER_BLOCK_TINY{base, base, 1}; // 32 |64
17-
const dim3 THREADS_PER_BLOCK_SMALL{2*base, 2*base, 1}; // 64 |128
18-
const dim3 THREADS_PER_BLOCK_MEDIUM{4*base, 4*base, 1}; // 128|256
19-
const dim3 THREADS_PER_BLOCK_BIG{8*base, 8*base, 1}; // 256|512
20-
const dim3 THREADS_PER_BLOCK_HUGE{16*base, 16*base, 1}; // 512|1024
11+
const dim3 THREADS_PER_BLOCK_TINY {32, 32, 1};
12+
const dim3 THREADS_PER_BLOCK_SMALL {64, 64, 1};
13+
const dim3 THREADS_PER_BLOCK_MEDIUM{128, 128, 1};
14+
const dim3 THREADS_PER_BLOCK_BIG {256, 256, 1};
15+
const dim3 THREADS_PER_BLOCK_HUGE {256, 256, 1};
2116
#endif
2217

2318
void cudaCheck(const int line, const std::string& function, const std::string& file)
@@ -65,7 +60,7 @@ namespace op
6560
}
6661
}
6762

68-
void getNumberCudaThreadsAndBlocks(dim3& numberCudaThreads, dim3& numberCudaBlocks, const Point<int>& frameSize)
63+
void getNumberCudaThreadsAndBlocks(dim3& numberCudaThreads, dim3& numberCudaBlocks, const Point<unsigned int>& frameSize)
6964
{
7065
try
7166
{
@@ -93,8 +88,8 @@ namespace op
9388
else
9489
numberCudaThreads = THREADS_PER_BLOCK_TINY;
9590
// numberCudaBlocks
96-
numberCudaBlocks = dim3{getNumberCudaBlocks((unsigned int)frameSize.x, numberCudaThreads.x),
97-
getNumberCudaBlocks((unsigned int)frameSize.y, numberCudaThreads.y),
91+
numberCudaBlocks = dim3{getNumberCudaBlocks(frameSize.x, numberCudaThreads.x),
92+
getNumberCudaBlocks(frameSize.y, numberCudaThreads.y),
9893
numberCudaThreads.z};
9994
#else
10095
UNUSED(numberCudaThreads);

src/openpose/hand/handGpuRenderer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ namespace op
8484
// I prefer std::round(T&) over positiveIntRound(T) for std::atomic
8585
const auto elementRendered = spElementToRender->load();
8686
const auto numberPeople = handKeypoints[0].getSize(0);
87-
const Point<int> frameSize{outputData.getSize(1), outputData.getSize(0)};
87+
const Point<unsigned int> frameSize{(unsigned int)outputData.getSize(1), (unsigned int)outputData.getSize(0)};
8888
// GPU rendering
8989
if (numberPeople > 0 && elementRendered == 0)
9090
{

0 commit comments

Comments
 (0)