Skip to content

Commit 6fcfa33

Browse files
authored
Merge pull request #139 from hutingh/fix_resize_gpu
Fix resize gpu
2 parents cf4ca8f + d6154fb commit 6fcfa33

File tree

5 files changed

+78
-16
lines changed

5 files changed

+78
-16
lines changed

compute/image/src/gpu/mali/cl/kernel_option/resize_opt.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,14 @@ inline EE set_resize_opt_mali(ResizeParamSpec p,
1212
char *kernelName,
1313
KernelOpt *kernelOpt)
1414
{
15-
#ifdef _USE_FP16
16-
kernelOpt->kernelDataType = DT_F16;
17-
#else
18-
kernelOpt->kernelDataType = DT_F32;
19-
#endif
15+
DataType dt = idt;
16+
if (bytesOf(odt) > bytesOf(idt)) {
17+
dt = odt;
18+
}
19+
if (bytesOf(dt) < 2) {
20+
dt = DT_F32;
21+
}
22+
kernelOpt->kernelDataType = dt;
2023
char *opt = kernelOpt->option;
2124
std::string source;
2225
if (p.mode == RESIZE_NEAREST) {
@@ -55,9 +58,13 @@ inline EE set_resize_opt_mali(ResizeParamSpec p,
5558
} else {
5659
CHECK_STATUS(add_macro(opt, "USE_NCHW"));
5760
}
61+
if (odt == DT_U8) {
62+
CHECK_STATUS(add_macro(opt, "OUTPUT_UCHAR"));
63+
}
5864
std::string idtName = gcl_get_type(idt);
5965
std::string odtName = gcl_get_type(odt);
6066
CHECK_STATUS(add_macro(opt, "IT", idtName));
67+
CHECK_STATUS(add_macro(opt, "IT4", idtName + "4"));
6168
CHECK_STATUS(add_macro(opt, "OT", odtName));
6269
CHECK_STATUS(add_macro_type(opt, kernelOpt->kernelDataType));
6370
CHECK_STATUS(add_macro_io(opt, inputMemType, outputMemType));

compute/image/src/gpu/mali/cl/resize_bilinear.cl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535
}
3636
#endif
3737

38-
#if OT == uchar
39-
#define func convert_uchar_sat
38+
#if defined(OUTPUT_UCHAR)
39+
#define func convert_uchar_sat_rte
4040
#else
4141
#define func
4242
#endif
@@ -80,8 +80,8 @@ __kernel void KERNEL_NAME(const int iw_str,
8080
tblr.y = min(tblr.x + 1, iw - 1); // R
8181
tblr.z = max(0, (int)floor(iy)); // T
8282
tblr.w = min(tblr.z + 1, ih - 1); // B
83-
T dif1 = ix - tblr.x; // C-L
84-
T dif2 = iy - tblr.z; // C-T
83+
T dif1 = ix - (float)tblr.x; // C-L
84+
T dif2 = iy - (float)tblr.z; // C-T
8585

8686
#if defined(USE_NCHW) || defined(USE_NHWC)
8787
int x = (idz * ih_str + tblr.z) * iw_str + tblr.x + i_off; // TL_off

compute/image/src/gpu/mali/fp16/resize_mali_fp16.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,22 @@ inline EE resize_core_mali_fp16(GCLHandle_t handle,
3030
GCLMem_t output)
3131
{
3232
DataType idt, odt;
33+
DataFormat idf;
3334
U32 iw, ih, ic, in;
3435
U32 ow, oh, oc, on;
35-
tensorSelectGet(inputDesc, &idt, NULL, &in, &ic, &ih, &iw);
36+
tensorSelectGet(inputDesc, &idt, &idf, &in, &ic, &ih, &iw);
3637
tensorSelectGet(outputDesc, &odt, NULL, &on, &oc, &oh, &ow);
3738

3839
U32 iw_str, ih_str, iw_off, ih_off, i_off;
3940
U32 ow_str, oh_str, ow_off, oh_off, o_off;
4041
get_gclmem_dim(input->desc, &iw_str, &ih_str, NULL, &iw_off, &ih_off);
4142
get_gclmem_dim(output->desc, &ow_str, &oh_str, NULL, &ow_off, &oh_off);
43+
if (iw_str < iw) {
44+
ih_str = ih;
45+
iw_str = iw;
46+
oh_str = oh;
47+
ow_str = ow;
48+
}
4249
cl_mem inbuf = input->mem;
4350
cl_mem outbuf = output->mem;
4451
GCLMemType inputMemType = input->desc.memType;
@@ -53,9 +60,9 @@ inline EE resize_core_mali_fp16(GCLHandle_t handle,
5360
U32 dim = 3;
5461
U32 gs[3] = {ow, oh, 0};
5562
U32 ls[3] = {0, 0, 0};
56-
if (input->desc.df == DF_NCHWC4) {
63+
if (idf == DF_NCHWC4) {
5764
gs[2] = (oc + 3) / 4 * on;
58-
} else if (input->desc.df == DF_NHWC) {
65+
} else if (idf == DF_NHWC) {
5966
gs[2] = on;
6067
} else {
6168
gs[2] = oc * on;
@@ -64,8 +71,8 @@ inline EE resize_core_mali_fp16(GCLHandle_t handle,
6471
Kernel kernel;
6572
KernelOpt kernelOpt;
6673
char kernelName[128];
67-
CHECK_STATUS(set_resize_opt_mali(
68-
p, input->desc.df, idt, odt, inputMemType, outputMemType, kernelName, &kernelOpt));
74+
CHECK_STATUS(
75+
set_resize_opt_mali(p, idf, idt, odt, inputMemType, outputMemType, kernelName, &kernelOpt));
6976
CHECK_STATUS(gcl_create_kernel(handle, kernelName, &kernel, &kernelOpt));
7077
CHECK_STATUS(gcl_set_kernelArgs(kernel, iw_str, ih_str, i_off, iw, ih, ow_str, oh_str, o_off,
7178
ow, oh, r0_w, r0_h, r1_w, r1_h, inbuf, outbuf));

compute/image/src/resize.cpp

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@
2828
#include "cpu/x86/image_x86.h"
2929
#endif
3030

31+
static bool is_implicit_nhwc(TensorDesc desc) {
32+
bool ret = false;
33+
if (desc.df == DF_NCHW && desc.dims[0] == 3 && desc.dims[1] > 3 && desc.dims[2] > 3) {
34+
ret = true;
35+
}
36+
return ret;
37+
}
38+
3139
// params is a pointer to either the target size or the resize ratios
3240
// When paramDT specifies DT_U32, params should point to target sizes (height and width)
3341
// When paramDT specifies DT_F32, params should point to resize ratios
@@ -37,10 +45,18 @@ EE resize_infer_output_size_cpu(TensorDesc inputDesc, ResizeParamSpec p, TensorD
3745
DataFormat idf, odf;
3846
U32 in, ic, ih, iw = 1;
3947
U32 oh, ow = 1;
48+
bool nhwc = false;
4049
if (tensorIs3d(inputDesc)) {
4150
CHECK_STATUS(tensor3dGet(inputDesc, &idt, &idf, &in, &ic, &ih));
4251
} else if (tensorIs4d(inputDesc)) {
4352
CHECK_STATUS(tensor4dGet(inputDesc, &idt, &idf, &in, &ic, &ih, &iw));
53+
nhwc = is_implicit_nhwc(inputDesc);
54+
if (nhwc) {
55+
int t = iw;
56+
iw = ih;
57+
ih = ic;
58+
ic = t;
59+
}
4460
} else {
4561
UNI_ERROR_LOG("can support to resize %d-dim tensor.\n", inputDesc.nDims);
4662
}
@@ -77,7 +93,11 @@ EE resize_infer_output_size_cpu(TensorDesc inputDesc, ResizeParamSpec p, TensorD
7793
if (tensorIs3d(inputDesc)) {
7894
*outputDesc = tensor3df(idt, odf, in, ic, oh);
7995
} else if (tensorIs4d(inputDesc)) {
80-
*outputDesc = tensor4df(idt, odf, in, ic, oh, ow);
96+
if (nhwc) {
97+
*outputDesc = tensor4df(idt, odf, in, oh, ow, ic);
98+
} else {
99+
*outputDesc = tensor4df(idt, odf, in, ic, oh, ow);
100+
}
81101
}
82102
return SUCCESS;
83103
}
@@ -201,6 +221,34 @@ EE resize_nearest(TensorDesc inputDesc,
201221
return ret;
202222
}
203223

224+
static bool update(TensorDesc &inputDesc, TensorDesc &outputDesc) {
225+
bool ret = false;
226+
if (is_implicit_nhwc(inputDesc) && inputDesc.dims[0] == outputDesc.dims[0]) {
227+
TensorDesc desc0 = inputDesc;
228+
U32 v = inputDesc.dims[0];
229+
for (U32 i = 0; i < inputDesc.nDims; i++) {
230+
inputDesc.dims[i - 1] = inputDesc.dims[i];
231+
}
232+
inputDesc.dims[inputDesc.nDims - 2] = v;
233+
inputDesc.df = DF_NCHW;
234+
235+
TensorDesc desc1 = outputDesc;
236+
v = outputDesc.dims[0];
237+
for (U32 i = 1; i < outputDesc.nDims; i++) {
238+
outputDesc.dims[i - 1] = outputDesc.dims[i];
239+
}
240+
outputDesc.dims[outputDesc.nDims - 2] = v;
241+
outputDesc.df = DF_NHWC;
242+
243+
UNI_DEBUG_LOG("change input from %s -> %s.\n", tensorDesc2Str(desc0).c_str(),
244+
tensorDesc2Str(inputDesc).c_str());
245+
UNI_DEBUG_LOG("change output from %s -> %s.\n", tensorDesc2Str(desc1).c_str(),
246+
tensorDesc2Str(outputDesc).c_str());
247+
ret = true;
248+
}
249+
return ret;
250+
}
251+
204252
EE resize(
205253
Tensor inputTensor, ResizeParamSpec p, Tensor tmpTensor, Tensor outputTensor, ArchInfo_t archInfo)
206254
{
@@ -210,6 +258,7 @@ EE resize(
210258
TensorDesc outputDesc = outputTensor.get_desc();
211259
void *output = get_ptr_from_tensor(outputTensor, arch);
212260
void *tmp = get_ptr_from_tensor(tmpTensor, arch);
261+
update(inputDesc, outputDesc);
213262

214263
if (inputDesc.nDims == 3) {
215264
for (int i = inputDesc.nDims; i > 0; i--) {

inference/engine/src/bolt_c_simplify.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,6 @@ int FreeTensor(
141141
c, h, w, dt, df, data);
142142
int ret = 0;
143143
if (num > 0) {
144-
FreeTensorDesc(num, name, n, c, h, w, dt, df);
145144
for (int i = 0; i < num; i++) {
146145
UNI_FREE(data[i]);
147146
}

0 commit comments

Comments
 (0)