Skip to content

Commit 39930a3

Browse files
committed
*refactoring of SynetConvolution16bNhwcSpecV0 class.
1 parent 70e6c83 commit 39930a3

4 files changed

+14
-3
lines changed

src/Simd/SimdAvx2SynetConvolution16b.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ namespace Simd
3333
ConvParam param(batch, conv, compatibility);
3434
if (!param.Valid(SimdTensorData32f, SimdTensorData16b))
3535
return NULL;
36-
if (SynetConvolution16bNhwcSpecV1::Preferable(param))
37-
return new Avx2::SynetConvolution16bNhwcSpecV1(param);
36+
//if (SynetConvolution16bNhwcSpecV1::Preferable(param))
37+
// return new Avx2::SynetConvolution16bNhwcSpecV1(param);
3838
if (SynetConvolution16bNhwcSpecV0::Preferable(param))
3939
return new Avx2::SynetConvolution16bNhwcSpecV0(param);
4040
if (SynetConvolution16bNhwcGemm::Preferable(param))

src/Simd/SimdBaseSynetConvolution16bNhwcSpecV0.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ namespace Simd
6767
a.K = p.kernelX * p.kernelY;
6868

6969
a.macroC = Simd::RestrictRange(AlignLo(L1 / a.microD / a.K / 2, a.microC), a.microC, a.srcC);
70+
a.macroO = a.macroC * a.K / a.microC;
7071
a.batch = 1;
7172
size_t bufSize = a.srcC * a.srcH * a.srcW * 2;
7273
if (bufSize * 2 <= L2 && p.batch > 1)
@@ -84,6 +85,13 @@ namespace Simd
8485

8586
_stepS = p.srcH * p.srcW * p.srcC * a.batch * _elemS;
8687
_stepD = p.dstH * p.dstW * p.dstC * a.batch * _elemD;
88+
89+
int dX = (int)a.microC, dY = (int)a.srcW * dX, dC = dY * int(a.srcH * a.batch);
90+
_offset.Resize(DivHi(a.srcC, a.microC) * a.K);
91+
for (size_t c = 0, offsS = 0, i = 0; c < a.srcC; c += dX, offsS += dC)
92+
for (size_t y = 0, offsY = offsS; y < p.kernelY; y += 1, offsY += dY)
93+
for (size_t offsX = offsY, endX = offsY + p.kernelX * dX; offsX < endX; offsX += dX, i++)
94+
_offset[i] = (int)offsX;
8795
}
8896

8997
size_t SynetConvolution16bNhwcSpecV0::ExternalBufferSize() const

src/Simd/SimdSynetConvolution16b.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ namespace Simd
177177
{
178178
size_t batch, srcC, srcH, srcW, dstC, K;
179179
size_t F, microD, microS, microC;
180-
size_t macroD, macroH, macroC, numH;
180+
size_t macroD, macroH, macroC, numH, macroO;
181181
size_t bufS, bufD, elem;
182182
Array32i offs;
183183
};
@@ -194,6 +194,7 @@ namespace Simd
194194
void Forward(const uint8_t* src, uint16_t* buf, float* sum, uint8_t* dst);
195195

196196
AlgParam _alg;
197+
Array32i _offset;
197198
PreprocessPtr _preprocess;
198199
ConvolutionPtr _convolution;
199200
PostprocessPtr _postprocess;

src/Test/TestSynetConvolution16b.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,9 @@ namespace Test
363363
#endif
364364
#if 1
365365
result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 56, 48, 48, 56, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);
366+
result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 64, 48, 48, 56, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);
366367
result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 112, 24, 24, 112, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);
368+
result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 128, 24, 24, 112, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);
367369
result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 224, 12, 12, 224, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);
368370
result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 448, 6, 6, 448, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);
369371
result = result && SynetConvolution16bForwardAutoTest(eps, Param(10, 448, 6, 6, 448, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);

0 commit comments

Comments
 (0)