diff --git a/kernel/x86_64/sbgemv_n.c b/kernel/x86_64/sbgemv_n.c index b2d4eb74f1..08ccace617 100644 --- a/kernel/x86_64/sbgemv_n.c +++ b/kernel/x86_64/sbgemv_n.c @@ -28,9 +28,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -//#if defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) -//#include "sbgemv_n_microk_cooperlake.c" -//#endif +#if defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) +#include "sbgemv_n_microk_cooperlake.c" +#endif #define ALIGN64_ALLOC(alloc_size, TYPE, ptr_align, ptr) \ ptr = (TYPE *) malloc(sizeof(TYPE)*alloc_size + 63); \ diff --git a/kernel/x86_64/sbgemv_n_microk_cooperlake_template.c b/kernel/x86_64/sbgemv_n_microk_cooperlake_template.c index 4711e9720c..ab22e08488 100644 --- a/kernel/x86_64/sbgemv_n_microk_cooperlake_template.c +++ b/kernel/x86_64/sbgemv_n_microk_cooperlake_template.c @@ -231,7 +231,7 @@ static int sbgemv_kernel_32xN_lda_direct(BLASLONG m, BLASLONG n, float alpha, bf accum512_8 = _mm512_permutex2var_ps(accum512_0, idx_base_0, accum512_1); accum512_9 = _mm512_permutex2var_ps(accum512_0, idx_base_1, accum512_1); - if ((m-tag_m_32x) > 16) { + if ((m-tag_m_32x) >= 16) { STORE16_COMPLETE_RESULT(accum512_8, y+tag_m_32x+0) STORE16_MASK_COMPLETE_RESULT(accum512_9, y+tag_m_32x+16, store_tail_mask) } else {