Skip to content

Commit a603593

Browse files
committed
src: cpu: rnn: remove lambda call in omp simd loop
This is to workaround a compiler bug (affects icc17 and icc19.1).
1 parent 0405c9a commit a603593

File tree

1 file changed

+14
-11
lines changed

1 file changed

+14
-11
lines changed

src/cpu/rnn/ref_rnn.hpp

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,26 @@ namespace cpu {
3939
template <typename src_data_t, typename acc_data_t>
4040
void gates_reduction(const rnn_utils::rnn_conf_t &rnn,
4141
const src_data_t *ws_gates_, acc_data_t *diff_bias_) {
42-
auto body = [&](int i, int k) {
43-
for (int j = 0; j < rnn.mb; j++)
44-
diff_bias_[i * rnn.dic + k]
45-
+= ws_gates_[j * rnn.gates_ws_ld + i * rnn.dic + k];
46-
};
4742

48-
// @todo block k on simd-width
49-
#if DNNL_CPU_RUNTIME == DNNL_RUNTIME_OMP \
50-
&& _OPENMP >= 201307 /* icc 17.0 has a problem with simd collapse */ \
51-
&& !((defined __INTEL_COMPILER) && (__INTEL_COMPILER == 1700))
43+
// The loop body needs to be inlined as some versions of icc have
44+
// an issue with lambdas inside omp simd loops
45+
#define body_loop(i, k) \
46+
for (int j = 0; j < rnn.mb; j++) \
47+
diff_bias_[i * rnn.dic + k] \
48+
+= ws_gates_[j * rnn.gates_ws_ld + i * rnn.dic + k];
49+
50+
// @todo block k on simd-width to enable vectorization in
51+
// parallel_nd path
52+
#if DNNL_CPU_RUNTIME == DNNL_RUNTIME_OMP && _OPENMP >= 201307
5253
#pragma omp parallel for simd collapse(2)
5354
for (int i = 0; i < rnn.n_gates; i++)
5455
for (int k = 0; k < rnn.dic; k++)
55-
body(i, k);
56+
body_loop(i, k);
5657
#else
57-
parallel_nd(rnn.n_gates, rnn.dic, body);
58+
parallel_nd(rnn.n_gates, rnn.dic, [&](int i, int k) { body_loop(i, k); });
5859
#endif
60+
61+
#undef body_loop
5962
}
6063

6164
template <prop_kind_t aprop, impl::data_type_t src_type,

0 commit comments

Comments
 (0)