Skip to content

Commit cf16f00

Browse files
abadamsalexreinking
authored andcommitted
Don't emit aligned loads to unaligned addresses (#6047)
* Don't emit aligned loads to unaligned addresses Fixes #6046 (cherry picked from commit 626c34a)
1 parent 4f87171 commit cf16f00

File tree

2 files changed

+35
-3
lines changed

2 files changed

+35
-3
lines changed

src/CodeGen_LLVM.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1915,8 +1915,12 @@ void CodeGen_LLVM::visit(const Load *op) {
19151915
// Try to rewrite strided loads as shuffles of dense loads,
19161916
// aligned to the stride. This makes adjacent strided loads
19171917
// share the same underlying dense loads.
1918-
ModulusRemainder align = op->alignment;
19191918
Expr base = ramp->base;
1919+
// The variable align will track the alignment of the
1920+
// base. Every time we change base, we also need to update
1921+
// align.
1922+
ModulusRemainder align = op->alignment;
1923+
19201924
int aligned_stride = gcd(stride->value, align.modulus);
19211925
int offset = 0;
19221926
if (aligned_stride == stride->value) {
@@ -1930,7 +1934,7 @@ void CodeGen_LLVM::visit(const Load *op) {
19301934

19311935
if (offset) {
19321936
base = simplify(base - offset);
1933-
align.remainder -= offset;
1937+
align.remainder = mod_imp(align.remainder - offset, align.modulus);
19341938
}
19351939

19361940
// We want to load a few more bytes than the original load did.
@@ -1947,6 +1951,11 @@ void CodeGen_LLVM::visit(const Load *op) {
19471951

19481952
int slice_lanes = native_vector_bits() / op->type.bits();
19491953

1954+
// We're going to add multiples of slice_lanes to base in
1955+
// the loop below, so reduce alignment modulo slice_lanes.
1956+
align.modulus = gcd(align.modulus, slice_lanes);
1957+
align.remainder = mod_imp(align.remainder, align.modulus);
1958+
19501959
// We need to slice the result in to native vector lanes, otherwise
19511960
// LLVM misses optimizations like using ldN on ARM.
19521961
vector<Value *> results;
@@ -1957,7 +1966,7 @@ void CodeGen_LLVM::visit(const Load *op) {
19571966
Expr slice_base = simplify(base + load_base_i);
19581967

19591968
Value *load_i = codegen_dense_vector_load(op->type.with_lanes(load_lanes_i), op->name, slice_base,
1960-
op->image, op->param, op->alignment, nullptr, false);
1969+
op->image, op->param, align, nullptr, false);
19611970

19621971
SmallVector<Constant *, 256> constants;
19631972
for (int j = 0; j < lanes_i; j++) {

test/correctness/align_bounds.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,29 @@ int main(int argc, char **argv) {
197197
}
198198
}
199199

200+
// Try a case where aligning a buffer means that strided loads can
201+
// do dense aligned loads and then shuffle. This used to trigger a
202+
// bug in codegen.
203+
{
204+
Func f, g;
205+
Var x;
206+
207+
f(x) = x;
208+
209+
// Do strided loads of every possible alignment
210+
Expr e = 0;
211+
for (int i = -32; i <= 32; i++) {
212+
e += f(3 * x + i);
213+
}
214+
g(x) = e;
215+
216+
f.compute_root();
217+
g.bound(x, 0, 1024).vectorize(x, 16, TailStrategy::RoundUp);
218+
219+
// Just check if it crashes
220+
g.realize({1024});
221+
}
222+
200223
printf("Success!\n");
201224
return 0;
202225
}

0 commit comments

Comments
 (0)