Skip to content

Commit 789dac1

Browse files
committed
xe: jit: codegen: limit comparison binary op SIMD
These operations use a flag register which forces SIMD16. This is problematic when padding to 16 elements exceeds the size of the allocated register buffer.
1 parent 9377075 commit 789dac1

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

src/gpu/intel/jit/codegen/codegen.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -999,6 +999,12 @@ class expr_evaluator_t : public ir_visitor_t {
999999
auto src0_op = eval(obj.a, a_out_op);
10001000
auto src1_op = eval(obj.b, b_out_op);
10011001

1002+
if ((src0_op.is_reg_buf_data()
1003+
&& src0_op.reg_buf_data().hs() != 0)
1004+
|| (src1_op.is_reg_buf_data()
1005+
&& src1_op.reg_buf_data().hs() != 0))
1006+
mod.setExecSize(obj.type.elems());
1007+
10021008
ebinary(obj, mod, dst_op, src0_op, src1_op);
10031009
break;
10041010
}

0 commit comments

Comments
 (0)