Skip to content

Commit

Permalink
*refactoring of AVX2 optimizations of SynetMergedConvolution32f.
Browse files Browse the repository at this point in the history
  • Loading branch information
ermig1979 committed Oct 14, 2024
1 parent 690e34e commit 1585ff6
Show file tree
Hide file tree
Showing 12 changed files with 1,467 additions and 2,164 deletions.
7 changes: 4 additions & 3 deletions prj/vs2019/Avx2.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,10 @@
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution16bDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution16bInput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution16bOutput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fCd.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fCdc.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fDc.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32f.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fInput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fOutput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution8i.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution8iDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution8iInput.cpp" />
Expand Down
21 changes: 12 additions & 9 deletions prj/vs2019/Avx2.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,6 @@
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetConvolution8iDirect.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fCdc.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fCd.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fDc.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution8i.cpp">
<Filter>Avx2</Filter>
</ClCompile>
Expand Down Expand Up @@ -394,6 +385,18 @@
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetConvolution16bNhwcDeptwise.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32f.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fDepthwise.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fInput.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fOutput.cpp">
<Filter>Avx2</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="Avx2">
Expand Down
7 changes: 4 additions & 3 deletions prj/vs2022/Avx2.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,10 @@
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution16bDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution16bInput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution16bOutput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fCd.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fCdc.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fDc.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32f.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fInput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fOutput.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution8i.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution8iDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution8iInput.cpp" />
Expand Down
21 changes: 12 additions & 9 deletions prj/vs2022/Avx2.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,6 @@
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetConvolution8iDirect.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fCdc.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fCd.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fDc.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution8i.cpp">
<Filter>Avx2</Filter>
</ClCompile>
Expand Down Expand Up @@ -394,6 +385,18 @@
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetConvolution16bNhwcDeptwise.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32f.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fDepthwise.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fInput.cpp">
<Filter>Avx2</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx2SynetMergedConvolution32fOutput.cpp">
<Filter>Avx2</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="Avx2">
Expand Down
96 changes: 96 additions & 0 deletions src/Simd/SimdAvx2SynetMergedConvolution32f.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Simd Library (http://ermig1979.github.io/Simd).
*
* Copyright (c) 2011-2024 Yermalayeu Ihar.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "Simd/SimdSynetMergedConvolution32f.h"
#include "Simd/SimdSynetConvolution32fCommon.h"
#include "Simd/SimdUpdate.h"
#include "Simd/SimdCpu.h"

namespace Simd
{
#if defined(SIMD_AVX2_ENABLE) && defined(SIMD_SYNET_ENABLE)
namespace Avx2
{
SynetMergedConvolution32fCdc::SynetMergedConvolution32fCdc(const MergConvParam& p)
: Sse41::SynetMergedConvolution32fCdc(p)
{
SetSize(Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F);
SetInput(p.conv[0], _convolution + 0);
SetDepthwise(p.conv[1], false, _convolution + 1);
SetOutput(p.conv[2], _convolution + 2);
}

//-------------------------------------------------------------------------------------------------

SynetMergedConvolution32fCd::SynetMergedConvolution32fCd(const MergConvParam& p)
: Sse41::SynetMergedConvolution32fCd(p)
{
SetSize(Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F);
SetInput(_param.conv[0], _convolution + 0);
SetDepthwise(_param.conv[1], true, _convolution + 1);
}

//-------------------------------------------------------------------------------------------------

SynetMergedConvolution32fDc::SynetMergedConvolution32fDc(const MergConvParam& p)
: Sse41::SynetMergedConvolution32fDc(p)
{
SetSize(Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F);
SetDepthwise(p.conv[0], false, _convolution + 0);
SetOutput(p.conv[1], _convolution + 1);
}

//-------------------------------------------------------------------------------------------------

void* SynetMergedConvolution32fInit(size_t batch, const SimdConvolutionParameters* convs, size_t count, SimdBool add)
{
MergConvParam param(batch, convs, count, add, SimdSynetCompatibilityDefault);
if (!param.Valid(SimdTensorData32f))
return NULL;
if (SynetMergedConvolution32fCdc::Preferable(param))
{
if (param.conv[2].dstC < F)
return new Sse41::SynetMergedConvolution32fCdc(param);
else
return new Avx2::SynetMergedConvolution32fCdc(param);
}
else if (SynetMergedConvolution32fCd::Preferable(param))
{
if (param.conv[1].dstC < F)
return new Sse41::SynetMergedConvolution32fCd(param);
else
return new Avx2::SynetMergedConvolution32fCd(param);
}
else if (SynetMergedConvolution32fDc::Preferable(param))
{
if (param.conv[0].dstC < F || param.conv[1].dstC < HF)
return new Sse41::SynetMergedConvolution32fDc(param);
else
return new Avx2::SynetMergedConvolution32fDc(param);
}
else
return new Base::SynetMergedConvolution32f(param);
}
}
#endif
}
Loading

0 comments on commit 1585ff6

Please sign in to comment.