Skip to content

Commit 436f4df

Browse files
committed
src: cpu: rnn: remove macro in omp simd loop
This is to workaround a compiler bug (affects icc19.1).
1 parent 4d6a926 commit 436f4df

File tree

1 file changed

+15
-9
lines changed

1 file changed

+15
-9
lines changed

src/cpu/rnn/ref_rnn.hpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,26 +36,32 @@ namespace dnnl {
3636
namespace impl {
3737
namespace cpu {
3838

39+
namespace {
40+
template <typename gates_t, typename acc_t>
41+
// The loop body needs to be put in a function as some versions of icc have
42+
// an issue with lambdas & macros inside omp simd loops
43+
inline void body_loop(int i, int k, const gates_t *ws_gates, acc_t *diff_bias,
44+
const rnn_utils::rnn_conf_t &rnn) {
45+
for (int j = 0; j < rnn.mb; j++)
46+
diff_bias[i * rnn.dhc + k]
47+
+= ws_gates[j * rnn.scratch_gates_ld + i * rnn.dhc + k];
48+
}
49+
} // namespace
50+
3951
template <typename gates_t, typename acc_t>
4052
void gates_reduction(const rnn_utils::rnn_conf_t &rnn, const gates_t *ws_gates_,
4153
acc_t *diff_bias_) {
4254

43-
// The loop body needs to be inlined as some versions of icc have
44-
// an issue with lambdas inside omp simd loops
45-
#define body_loop(i, k) \
46-
for (int j = 0; j < rnn.mb; j++) \
47-
diff_bias_[i * rnn.dhc + k] \
48-
+= ws_gates_[j * rnn.scratch_gates_ld + i * rnn.dhc + k];
49-
5055
// @todo block k on simd-width to enable vectorization in
5156
// parallel_nd path
5257
#if DNNL_CPU_RUNTIME == DNNL_RUNTIME_OMP && _OPENMP >= 201307
5358
#pragma omp parallel for simd collapse(2)
5459
for (int i = 0; i < rnn.n_gates; i++)
5560
for (int k = 0; k < rnn.dhc; k++)
56-
body_loop(i, k);
61+
body_loop(i, k, ws_gates_, diff_bias_, rnn);
5762
#else
58-
parallel_nd(rnn.n_gates, rnn.dhc, [&](int i, int k) { body_loop(i, k); });
63+
parallel_nd(rnn.n_gates, rnn.dhc,
64+
[&](int i, int k) { body_loop(i, k, ws_gates_, diff_bias_, rnn); });
5965
#endif
6066

6167
#undef body_loop

0 commit comments

Comments
 (0)