Skip to content

Commit 983f28b

Browse files
authored
Merge pull request #9723 from tvegas1/sse_no_mempcy_v1.16.x-r5
UCT/IB/MLX5: Prevent compiler to replace SSE instructions by memmove() - v1.16.x
2 parents 10d785d + ded36cb commit 983f28b

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

src/uct/ib/mlx5/ib_mlx5.inl

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -504,16 +504,20 @@ size_t uct_ib_mlx5_set_data_seg_iov(uct_ib_mlx5_txwq_t *txwq,
504504
static UCS_F_ALWAYS_INLINE void uct_ib_mlx5_bf_copy_bb(void * restrict dst,
505505
void * restrict src)
506506
{
507-
#if defined( __SSE4_2__)
508-
UCS_WORD_COPY(__m128i, dst, __m128i, src, MLX5_SEND_WQE_BB);
509-
#elif defined(__ARM_NEON)
507+
#if defined(__ARM_NEON)
510508
UCS_WORD_COPY(int16x8_t, dst, int16x8_t, src, MLX5_SEND_WQE_BB);
511509
#else
510+
#if defined(__SSE4_2__)
511+
typedef __m128i uct_ib_mlx5_send_wqe_bb_block_t;
512+
#else
513+
typedef uint8_t uct_ib_mlx5_send_wqe_bb_block_t;
514+
#endif
515+
/* Prevent compiler to replace by memmove() */
512516
typedef struct {
513-
uint8_t data[MLX5_SEND_WQE_BB];
517+
uct_ib_mlx5_send_wqe_bb_block_t
518+
data[MLX5_SEND_WQE_BB / sizeof(uct_ib_mlx5_send_wqe_bb_block_t)];
514519
} UCS_S_PACKED uct_ib_mlx5_send_wqe_bb_t;
515520

516-
/* Prevent the compiler to replace by memmove() */
517521
UCS_WORD_COPY(uct_ib_mlx5_send_wqe_bb_t, dst,
518522
uct_ib_mlx5_send_wqe_bb_t, src,
519523
MLX5_SEND_WQE_BB);

0 commit comments

Comments
 (0)