Skip to content

Commit

Permalink
Merge pull request #2583 from alibaba/feature/bugfix
Browse files Browse the repository at this point in the history
Feature/bugfix
  • Loading branch information
jxt1234 authored Sep 7, 2023
2 parents 9e3cc72 + 8514c07 commit 32f72f4
Show file tree
Hide file tree
Showing 46 changed files with 125 additions and 167 deletions.
18 changes: 6 additions & 12 deletions source/backend/opencl/core/ImageBufferConvertor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ bool convertNCHWBufferToImage(const Tensor *input, Tensor *output, cl::Kernel &b
}

#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = (int)runtime->getCostTime(&event);
MNN_PRINT("kernel cost:%d us inputFormatTransform\n",costTime);
runtime->pushEvent({"inputFormatTransform", event});
#endif
return true;
}
Expand Down Expand Up @@ -111,8 +110,7 @@ bool convertNHWCBufferToImage(const Tensor *input, Tensor *output, cl::Kernel &b
}

#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = (int)runtime->getCostTime(&event);
MNN_PRINT("kernel cost:%d us inputFormatTransform\n",costTime);
runtime->pushEvent({"inputFormatTransform", event});
#endif
return true;
}
Expand Down Expand Up @@ -167,8 +165,7 @@ bool convertImageToNCHWBuffer(const Tensor *input, Tensor *output, cl::Kernel &i
}

#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = (int)runtime->getCostTime(&event);
MNN_PRINT("kernel cost:%d us outputFormatTransform\n",costTime);
runtime->pushEvent({"outputFormatTransform", event});
#endif
return true;
}
Expand Down Expand Up @@ -220,8 +217,7 @@ bool convertNC4HW4BufferToImage(const Tensor *input, Tensor *output, cl::Kernel
}

#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = (int)runtime->getCostTime(&event);
MNN_PRINT("kernel cost:%d us inputFormatTransform\n",costTime);
runtime->pushEvent({"inputFormatTransform", event});
#endif
return true;
}
Expand Down Expand Up @@ -285,8 +281,7 @@ bool convertImageToNC4HW4Buffer(const Tensor *input, Tensor *output, cl::Kernel
}

#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = (int)runtime->getCostTime(&event);
MNN_PRINT("kernel cost:%d us outputFormatTransform\n",costTime);
runtime->pushEvent({"outputFormatTransform", event});
#endif
return true;
}
Expand Down Expand Up @@ -341,8 +336,7 @@ bool convertImageToNHWCBuffer(const Tensor *input, Tensor *output, cl::Kernel &i
}

#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = (int)runtime->getCostTime(&event);
MNN_PRINT("kernel cost:%d us outputFormatTransform\n",costTime);
runtime->pushEvent({"outputFormatTransform", event});
#endif

return true;
Expand Down
9 changes: 3 additions & 6 deletions source/backend/opencl/core/OpenCLBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,14 +514,15 @@ void OpenCLBackend::onResizeEnd() {

void OpenCLBackend::onExecuteBegin() const {
mOpenCLRuntime->mQueueCount = 0;
mOpenCLRuntime->mKernelTime = 0;
mOpenCLRuntime->clearRecord();
mOpenCLRuntime->clearEvent();
}

void OpenCLBackend::onExecuteEnd() const {
mOpenCLRuntime->mQueueCount = 0;
mOpenCLRuntime->clearRecord();
mOpenCLRuntime->enqeueRecord();
mOpenCLRuntime->printEventTime();
}


Expand Down Expand Up @@ -698,7 +699,7 @@ void OpenCLBackend::copyFromDevice(const Tensor* srcTensor, const Tensor* dstTen
mOpenCLRuntime->clearRecord();
//Convert format
mCLRuntime->convertFromDevice(srcTensor, (const Tensor*)&interTensor, data_format, false);

mOpenCLRuntime->printEventTime();

#ifdef ENABLE_OPENCL_TIME_PROFILER
mOpenCLRuntime->commandQueue().finish();
Expand Down Expand Up @@ -743,10 +744,6 @@ void OpenCLBackend::copyFromDevice(const Tensor* srcTensor, const Tensor* dstTen
hostPtr = nullptr;
}
}

#ifdef ENABLE_OPENCL_TIME_PROFILER
MNN_PRINT("total kernel time:%d us\n", (int)mOpenCLRuntime->mKernelTime);
#endif
}


Expand Down
21 changes: 21 additions & 0 deletions source/backend/opencl/core/runtime/OpenCLRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ OpenCLRuntime::~OpenCLRuntime() {
#ifdef LOG_VERBOSE
MNN_PRINT("start ~OpenCLRuntime !\n");
#endif
clearEvent();
releaseRecord();
mBuildProgramMap.clear();
mRecordings.clear();
Expand Down Expand Up @@ -779,4 +780,24 @@ void OpenCLRuntime::releaseRecord(){
}
#endif
}

void OpenCLRuntime::printEventTime(){
#ifdef ENABLE_OPENCL_TIME_PROFILER
if(mEvents.empty()){
return;
}
for(int i = 0; i < mEvents.size(); ++i){
auto event = &mEvents[i].second;
cl_int res = event->wait();
MNN_CHECK_CL_SUCCESS(res, "clEvent");
auto StartNanos = event->getProfilingInfo<CL_PROFILING_COMMAND_START>();
auto StopNanos = event->getProfilingInfo<CL_PROFILING_COMMAND_END>();
auto kernel_time = (unsigned int)((StopNanos - StartNanos) / 1000.0);
mKernelTime += kernel_time;
MNN_PRINT("kernel time = %d us %s\n", kernel_time, mEvents[i].first.c_str());
}
mEvents.clear();
MNN_PRINT("total kernel time = %d us\n", mKernelTime);
#endif
}
} // namespace MNN
9 changes: 9 additions & 0 deletions source/backend/opencl/core/runtime/OpenCLRuntime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,14 @@ class OpenCLRuntime {
std::string getDeviceName() {
return mDeviceName;
}
void pushEvent(std::pair<std::string, cl::Event> data) {
return mEvents.push_back(data);
}
void printEventTime();
void clearEvent(){
mKernelTime = 0;
mEvents.clear();
}
uint64_t maxAllocSize() const;
void setCommandQueueProfileEnable();
void setCommandQueueProfileDisable();
Expand Down Expand Up @@ -181,6 +189,7 @@ class OpenCLRuntime {
GpuType mGpuType;
MaliAr mMaliAr;
float mCLVersion = 1.0f;
std::vector<std::pair<std::string, cl::Event>> mEvents;

#ifdef MNN_OPENCL_SVM_ENABLE
cl_device_svm_capabilities mSvmCapabilities;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,7 @@ ErrorCode ArgMaxBufExecution::onExecute(const std::vector<Tensor*>& inputs, cons
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime(), &event);

int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us ArgMax\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ArgMax", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime());
Expand Down
8 changes: 1 addition & 7 deletions source/backend/opencl/execution/buffer/CastBufExecution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,6 @@ ErrorCode CastBufExecution::onResize(const std::vector<Tensor*>& inputs, const s
Tensor* output = outputs[0];
auto openCLBackend = static_cast<OpenCLBackend*>(backend());
auto runtime = openCLBackend->getOpenCLRuntime();
#ifdef MNN_SUPPORT_INTEL_SUBGROUP
if (runtime->isSupportedIntelSubgroup()) {
return SubgrouponResize(inputs, outputs);
}
#endif /* MNN_SUPPORT_INTEL_SUBGROUP */
mKernel = runtime->buildKernel("cast_buf", "cast_buf", mBuildOptions);
mMaxWorkGroupSize = static_cast<uint32_t>(runtime->getMaxWorkGroupSize(mKernel));

Expand Down Expand Up @@ -75,8 +70,7 @@ ErrorCode CastBufExecution::onExecute(const std::vector<Tensor*>& inputs, const
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime(), &event);

int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Cast\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Cast", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime());
Expand Down
3 changes: 1 addition & 2 deletions source/backend/opencl/execution/buffer/ConvBufExecution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -674,8 +674,7 @@ ErrorCode ConvBufExecution::onExecute(const std::vector<Tensor *> &inputs, const
#ifdef ENABLE_OPENCL_TIME_PROFILER
cl::Event event;
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us ConvBuf2D\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvBuf2D", event});
#else
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
#endif
Expand Down
21 changes: 3 additions & 18 deletions source/backend/opencl/execution/buffer/ConvBufWinograd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,9 +570,6 @@ ErrorCode ConvBufWinograd::onExecute(const std::vector<Tensor*>& inputs, const s
auto input = inputs[0];
auto output = outputs[0];

#ifdef ENABLE_OPENCL_TIME_PROFILER
int costTime = 0;
#endif
for (int b = 0; b < input->batch(); ++b) {
int index = b;
/*Source Transform*/
Expand All @@ -581,10 +578,7 @@ ErrorCode ConvBufWinograd::onExecute(const std::vector<Tensor*>& inputs, const s
cl::Event event;
runKernel2D(mSourceTransform[index], mGWS_S[index], mLWS_S[index],
mOpenCLBackend->getOpenCLRuntime(), &event);

int costTime0 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
costTime += costTime0;
MNN_PRINT("kernel cost:%d us ConvWino0\n",costTime0);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino0", event});
#else
runKernel2D(mSourceTransform[index], mGWS_S[index], mLWS_S[index],
mOpenCLBackend->getOpenCLRuntime());
Expand All @@ -600,10 +594,7 @@ ErrorCode ConvBufWinograd::onExecute(const std::vector<Tensor*>& inputs, const s
} else {
runKernel2D(mMatMul[index], mGWS_M[index], mLWS_M[index], mOpenCLBackend->getOpenCLRuntime(), &event);
}

int costTime1 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
costTime += costTime1;
MNN_PRINT("kernel cost:%d us ConvWino1\n",costTime1);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino1", event});
#else
if (mUseSubgroup) {
run3DKernelDefault(mMatMul[index], mGWS_M[index], mLWS_M[index], mOpenCLBackend->getOpenCLRuntime());
Expand All @@ -619,19 +610,13 @@ ErrorCode ConvBufWinograd::onExecute(const std::vector<Tensor*>& inputs, const s
cl::Event event;
runKernel2D(mDestTransform[index], mGWS_D[index], mLWS_D[index],
mOpenCLBackend->getOpenCLRuntime(), &event);

int costTime2 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
costTime += costTime2;
MNN_PRINT("kernel cost:%d us ConvWino2\n",costTime2);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvWino2", event});
#else
runKernel2D(mDestTransform[index], mGWS_D[index], mLWS_D[index],
mOpenCLBackend->getOpenCLRuntime());
#endif
}
}
#ifdef ENABLE_OPENCL_TIME_PROFILER
MNN_PRINT("kernel cost:%d us ConvWino total\n",costTime);
#endif

return NO_ERROR;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,7 @@ ErrorCode ConvSubgroupBuf::onExecute(const std::vector<Tensor *> &inputs, const

cl::Event event;
run3DKernelDefault(mTranseKernel, mTranseGlobalWorkSize, mTranseLocalWorkSize, mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime0 = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us ConvSubgroup transe\n", costTime0);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvSubgroup", event});
#else
run3DKernelDefault(mTranseKernel, mTranseGlobalWorkSize, mTranseLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
#endif
Expand All @@ -407,8 +406,7 @@ ErrorCode ConvSubgroupBuf::onExecute(const std::vector<Tensor *> &inputs, const
#ifdef ENABLE_OPENCL_TIME_PROFILER
cl::Event event;
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us ConvSubgroupBuf2D\n", costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"ConvSubgroupBuf2D", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
#endif
Expand Down
4 changes: 2 additions & 2 deletions source/backend/opencl/execution/buffer/DeconvBufExecution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,8 @@ ErrorCode DeconvBufExecution::onExecute(const std::vector<Tensor *> &inputs, con
mOpenCLBackend->getOpenCLRuntime(),
&event);

int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us DeconvBuf\n",costTime);

mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DeconvBuf", event});
#else
run3DKernelDefault(mKernel, mGWS, mLWS,
mOpenCLBackend->getOpenCLRuntime());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,9 +307,7 @@ ErrorCode DepthwiseConvBufExecution::onExecute(const std::vector<Tensor *> &inpu
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(),
&event);

int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us DepthwiseConvBuf\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DepthwiseConvBuf", event});
#else
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,8 @@ ErrorCode DepthwiseConvSubgroupBufExecution::onExecute(const std::vector<Tensor

run3DKernelDefault(mTranseKernel, mTranseGlobalWorkSize, mTranseLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us DepthwiseConvSubgroup transe\n", costTime);

mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DepthwiseConvSubgroup transe", event});
#else
run3DKernelDefault(mTranseKernel, mTranseGlobalWorkSize, mTranseLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime());
Expand All @@ -274,8 +274,7 @@ ErrorCode DepthwiseConvSubgroupBufExecution::onExecute(const std::vector<Tensor
mOpenCLBackend->getOpenCLRuntime(),
&event);

int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us DepthwiseConvSubgroupBuf\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"DepthwiseConvSubgroupBuf", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,8 @@ ErrorCode GridSampleBufExecution::onExecute(const std::vector<Tensor *> &inputs,
cl::Event event;
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);

int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us GridSample\n", costTime);

mOpenCLBackend->getOpenCLRuntime()->pushEvent({"GridSample", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize, mOpenCLBackend->getOpenCLRuntime());
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,7 @@ ErrorCode Interp3DBufExecution::onExecute(const std::vector<Tensor *> &inputs, c
run3DKernelDefault(mKernel, mGWS, mLWS,
mOpenCLBackend->getOpenCLRuntime(), &event);

int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Interp\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Interp", event});
#else
run3DKernelDefault(mKernel, mGWS, mLWS, mOpenCLBackend->getOpenCLRuntime());
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,7 @@ ErrorCode InterpBufExecution::onExecute(const std::vector<Tensor *> &inputs, con
run3DKernelDefault(mKernel, mGWS, mLWS,
mOpenCLBackend->getOpenCLRuntime(), &event);

int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Interp\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Interp", event});
#else
run3DKernelDefault(mKernel, mGWS, mLWS, mOpenCLBackend->getOpenCLRuntime());
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,7 @@ ErrorCode LayerNormBufExecution::onExecute(const std::vector<Tensor *> &inputs,
run3DKernelDefault(mKernel, mGWS, mLWS,
mOpenCLBackend->getOpenCLRuntime(), &event);

int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us LayerNormBuf\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"LayerNormBuf", event});
#else
run3DKernelDefault(mKernel, mGWS, mLWS, mOpenCLBackend->getOpenCLRuntime());
#endif
Expand Down
5 changes: 2 additions & 3 deletions source/backend/opencl/execution/buffer/MatmulBufExecution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,8 @@ ErrorCode MatMulBufExecution::onExecute(const std::vector<Tensor *> &inputs, con
#ifdef ENABLE_OPENCL_TIME_PROFILER
cl::Event event;
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, runtime, &event);

int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us MatmulBuf\n",costTime);

mOpenCLBackend->getOpenCLRuntime()->pushEvent({"MatmulBuf", event});
#else
runKernel2D(mKernel, mGlobalWorkSize, mLocalWorkSize, runtime, nullptr);
#endif
Expand Down
3 changes: 1 addition & 2 deletions source/backend/opencl/execution/buffer/PoolBufExecution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,7 @@ ErrorCode PoolBufExecution::onExecute(const std::vector<Tensor *> &inputs, const
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);

int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Pooling\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Pooling", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime());
Expand Down
3 changes: 1 addition & 2 deletions source/backend/opencl/execution/buffer/RangeBufExecution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ ErrorCode RangeBufExecution::onExecute(const std::vector<Tensor*>& inputs, const
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime(), &event);

int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Range\n",costTime);
mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Range", event});
#else
run3DKernelDefault(mKernel, mGlobalWorkSize, mLocalSize,
mOpenCLBackend->getOpenCLRuntime());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ ErrorCode ReductionBufExecution::onExecute(const std::vector<Tensor *> &inputs,
cl::Event event;
run3DKernelDefault(mReduct1DKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime(), &event);
int costTime = (int)mOpenCLBackend->getOpenCLRuntime()->getCostTime(&event);
MNN_PRINT("kernel cost:%d us Reduct1D\n",costTime);

mOpenCLBackend->getOpenCLRuntime()->pushEvent({"Reduct1D", event});
#else
run3DKernelDefault(mReduct1DKernel, mGlobalWorkSize, mLocalWorkSize,
mOpenCLBackend->getOpenCLRuntime());
Expand Down
Loading

0 comments on commit 32f72f4

Please sign in to comment.