Skip to content

Commit 2c5ff00

Browse files
author
Fomenko, Evarist M
committed
cpu: concat: use memcpy for fast copy
Play with pointers and `pragma omp simd` could be dangerous, especially if the data alignment is not consistent with its size. :) This closes #668
1 parent f5c0afe commit 2c5ff00

File tree

1 file changed

+3
-21
lines changed

1 file changed

+3
-21
lines changed

src/cpu/simple_concat.cpp

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
* limitations under the License.
1515
*******************************************************************************/
1616

17+
#include <cstring>
18+
1719
#include "dnnl_thread.hpp"
1820

1921
#include "simple_concat.hpp"
@@ -97,27 +99,7 @@ status_t simple_concat_t<data_type>::execute(const exec_ctx_t &ctx) const {
9799
const data_t *i = &iptrs[a][in_off];
98100
data_t *o = &optrs[a][out_off];
99101
#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
100-
// The code below performs data copying: o[e] = i[e]
101-
// and uses a workaround to make GNU compilers optimize it
102-
uint8_t *ptro = reinterpret_cast<uint8_t *>(o);
103-
const uint8_t *ptri = reinterpret_cast<const uint8_t *>(i);
104-
const dim_t main_part = (nelems_to_copy[a] * sizeof(data_t))
105-
/ sizeof(uint32_t);
106-
const dim_t tail_part = (nelems_to_copy[a] * sizeof(data_t))
107-
% sizeof(uint32_t);
108-
109-
PRAGMA_OMP_SIMD()
110-
for (dim_t e = 0; e < main_part; ++e) {
111-
*(reinterpret_cast<uint32_t *>(ptro))
112-
= *(reinterpret_cast<const uint32_t *>(ptri));
113-
ptro += sizeof(uint32_t);
114-
ptri += sizeof(uint32_t);
115-
}
116-
for (dim_t e = 0; e < tail_part; ++e) {
117-
*ptro = *ptri;
118-
++ptro;
119-
++ptri;
120-
}
102+
std::memcpy(o, i, nelems_to_copy[a] * sizeof(data_t));
121103
#else
122104
PRAGMA_OMP_SIMD()
123105
for (dim_t e = 0; e < nelems_to_copy[a]; ++e) o[e] = i[e];

0 commit comments

Comments
 (0)