Skip to content

Commit

Permalink
+add AVX-512BW kernels Convolution32fNhwcDepthwise_k7p3d1s1w4, Convol…
Browse files Browse the repository at this point in the history
…ution32fNhwcDepthwise_k7p3d1s1w6, Convolution32fNhwcDepthwise_k7p3d1s1w8 for framework SynetMergedConvolution32f.
  • Loading branch information
ermig1979 committed Oct 15, 2024
1 parent 3a1747a commit 93b800a
Show file tree
Hide file tree
Showing 3 changed files with 405 additions and 7 deletions.
3 changes: 3 additions & 0 deletions docs/2024.html
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ <h5>New features</h5>
<li>AVX-512BW kernel Convolution32fNhwcDepthwise_k7p3d1s1w8 for class SynetConvolution32fNhwcDepthwise.</li>
<li>AMX-BF16 kernel DepthwiseConvolution_k7p3d1s1w6 for class SynetMergedConvolution16b.</li>
<li>AMX-BF16 kernel DepthwiseConvolution_k7p3d1s1w8 for class SynetMergedConvolution16b.</li>
<li>AVX-512BW kernel Convolution32fNhwcDepthwise_k7p3d1s1w4 for framework SynetMergedConvolution32f.</li>
<li>AVX-512BW kernel Convolution32fNhwcDepthwise_k7p3d1s1w6 for framework SynetMergedConvolution32f.</li>
<li>AVX-512BW kernel Convolution32fNhwcDepthwise_k7p3d1s1w8 for framework SynetMergedConvolution32f.</li>
</ul>
<h5>Improving</h5>
<ul>
Expand Down
12 changes: 6 additions & 6 deletions src/Simd/SimdAvx512bwSynetConvolution32fNhwcDepthwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -889,7 +889,7 @@ namespace Simd
{
assert(p.IsKernel(7) && p.IsPad(3) && p.IsStride(1) && p.IsDilation(1) && Aligned(p.srcW, 4));

size_t dstC = p.dstC, dstCF = AlignLo(p.dstC, F), dstW = p.dstW, srcH = p.srcH, end = dstW - 4;
size_t dstC = p.dstC, dstW = p.dstW, srcH = p.srcH, end = dstW - 4;
__m512 s0, s1, w0, w1, w2, w3, w4, w5, w6, d0, d1, d2, d3, _params[2];
_params[0] = _mm512_set1_ps(params[0]);
if (type == SimdConvolutionActivationRestrictRange ||
Expand All @@ -902,7 +902,7 @@ namespace Simd
{
for (size_t dc = 0; dc < dstC; dc += F)
{
__mmask16 tail = dc < dstCF ? __mmask16(-1) : TailMask16(dstC - dc);
__mmask16 tail = TailMask16(dstC - dc);
if (type == SimdConvolutionActivationPrelu)
_params[0] = _mm512_maskz_loadu_ps(tail, params + dc);
d0 = bias ? _mm512_maskz_loadu_ps(tail, bias + dc) : _mm512_setzero_ps();
Expand Down Expand Up @@ -990,7 +990,7 @@ namespace Simd
{
assert(p.IsKernel(7) && p.IsPad(3) && p.IsStride(1) && p.IsDilation(1) && AlignedAny(p.srcW, 6));

size_t dstC = p.dstC, dstCF = AlignLo(p.dstC, F), dstW = p.dstW, srcH = p.srcH, end = dstW - 6;
size_t dstC = p.dstC, dstW = p.dstW, srcH = p.srcH, end = dstW - 6;
__m512 s0, s1, w0, w1, w2, w3, w4, w5, w6, d0, d1, d2, d3, d4, d5, _params[2];
_params[0] = _mm512_set1_ps(params[0]);
if (type == SimdConvolutionActivationRestrictRange ||
Expand All @@ -1003,7 +1003,7 @@ namespace Simd
{
for (size_t dc = 0; dc < dstC; dc += F)
{
__mmask16 tail = dc < dstCF ? __mmask16(-1) : TailMask16(dstC - dc);
__mmask16 tail = TailMask16(dstC - dc);
if (type == SimdConvolutionActivationPrelu)
_params[0] = _mm512_maskz_loadu_ps(tail, params + dc);
d0 = bias ? _mm512_maskz_loadu_ps(tail, bias + dc) : _mm512_setzero_ps();
Expand Down Expand Up @@ -1112,7 +1112,7 @@ namespace Simd
{
assert(p.IsKernel(7) && p.IsPad(3) && p.IsStride(1) && p.IsDilation(1) && Aligned(p.srcW, 8));

size_t dstC = p.dstC, dstCF = AlignLo(p.dstC, F), dstW = p.dstW, srcH = p.srcH, end = dstW - 8;
size_t dstC = p.dstC, dstW = p.dstW, srcH = p.srcH, end = dstW - 8;
__m512 s0, s1, w0, w1, w2, w3, w4, w5, w6, d0, d1, d2, d3, d4, d5, d6, d7, _params[2];
_params[0] = _mm512_set1_ps(params[0]);
if (type == SimdConvolutionActivationRestrictRange ||
Expand All @@ -1125,7 +1125,7 @@ namespace Simd
{
for (size_t dc = 0; dc < dstC; dc += F)
{
__mmask16 tail = dc < dstCF ? __mmask16(-1) : TailMask16(dstC - dc);
__mmask16 tail = TailMask16(dstC - dc);
if (type == SimdConvolutionActivationPrelu)
_params[0] = _mm512_maskz_loadu_ps(tail, params + dc);
d0 = bias ? _mm512_maskz_loadu_ps(tail, bias + dc) : _mm512_setzero_ps();
Expand Down
Loading

0 comments on commit 93b800a

Please sign in to comment.