Skip to content

Commit 7c1d517

Browse files
anoopkg6anoopkg6
andauthored
[SystemZ] Enable rematerialization for scalar loads (llvm#179838)
We can avoid the unnecessary spill by marking loads as rematerializable and just directly loading from where the argument was originally passed on the stack. TargetTransformInfo::isReMaterializableImpl checks to make sure that any loads are MI.isDereferenceableInvariantLoad(), so we should be able to move the load down to the remat site. Related: [llvm#166774](llvm#166774) --------- Co-authored-by: anoopkg6 <anoopkg6@github.com>
1 parent 4f92cf9 commit 7c1d517

File tree

6 files changed

+185
-79
lines changed

6 files changed

+185
-79
lines changed

llvm/lib/Target/SystemZ/SystemZInstrFP.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,8 @@ defm LoadStoreF128 : MVCLoadStore<load, f128, MVCImm, 15>;
156156
// Load instructions
157157
//===----------------------------------------------------------------------===//
158158

159-
let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in {
159+
let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1,
160+
isReMaterializable = 1 in {
160161
let isCodeGenOnly = 1 in
161162
// Reload f16 from 4-byte spill slot.
162163
defm LE16 : UnaryRXPair<"le", 0x78, 0xED64, z_load, FP16, 4>;

llvm/lib/Target/SystemZ/SystemZInstrInfo.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,8 @@ def LLGFI : InstAlias<"llgfi\t$R1, $RI1", (LLILF GR64:$R1, imm64lf32:$RI1)>;
461461
def LLGHI : InstAlias<"llghi\t$R1, $RI1", (LLILL GR64:$R1, imm64ll16:$RI1)>;
462462

463463
// Register loads.
464-
let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in {
464+
let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1,
465+
isReMaterializable = 1 in {
465466
// Expands to L, LY or LFH, depending on the choice of register.
466467
def LMux : UnaryRXYPseudo<"l", z_load, GRX32, 4>,
467468
Requires<[FeatureHighWord]>;
@@ -482,7 +483,7 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
482483
def LTG : UnaryRXY<"ltg", 0xE302, z_load, GR64, 8>;
483484
}
484485

485-
let canFoldAsLoad = 1 in {
486+
let canFoldAsLoad = 1, isReMaterializable =1 in {
486487
def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_z_load, GR32>;
487488
def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_z_load, GR64>;
488489
}

llvm/lib/Target/SystemZ/SystemZInstrVector.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,8 @@ let Predicates = [FeatureVector] in {
145145
// LEY and LDY offer full 20-bit displacement fields. It's often better
146146
// to use those instructions rather than force a 20-bit displacement
147147
// into a GPR temporary.
148-
let mayLoad = 1, SimpleBDXLoad = 1, canFoldAsLoad = 1 in {
148+
let mayLoad = 1, SimpleBDXLoad = 1, canFoldAsLoad = 1,
149+
isReMaterializable = 1 in {
149150
def VL16 : UnaryAliasVRX<z_load, v16hb, bdxaddr12pair>;
150151
def VL32 : UnaryAliasVRX<z_load, v32sb, bdxaddr12pair>;
151152
def VL64 : UnaryAliasVRX<z_load, v64db, bdxaddr12pair>;

llvm/test/CodeGen/SystemZ/builtin-setjmp-spills.ll

Lines changed: 43 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; Simulate register pressure around setjmp call and check all virtual registers
3-
; are saved to stack before setjmp call and restored from stack after the call.
4-
; And these registers are not live across the setjmp call.
2+
; Simulate register pressure around setjmp call.
3+
; With load materialization, pr#179838, it is no longer necessary to check if
4+
; all virtual registers are saved to stack before setjmp call because of load
5+
; rematerialization of the global variables.
56
; setjmp storing to jmp_buf.
67
; Return address in slot 2.
78
; Stack Pointer in slot 4.
@@ -47,16 +48,16 @@ define signext i32 @func() {
4748
; CHECK-NEXT: .cfi_offset %r13, -56
4849
; CHECK-NEXT: .cfi_offset %r14, -48
4950
; CHECK-NEXT: .cfi_offset %r15, -40
50-
; CHECK-NEXT: aghi %r15, -384
51-
; CHECK-NEXT: .cfi_def_cfa_offset 544
52-
; CHECK-NEXT: std %f8, 376(%r15) # 8-byte Spill
53-
; CHECK-NEXT: std %f9, 368(%r15) # 8-byte Spill
54-
; CHECK-NEXT: std %f10, 360(%r15) # 8-byte Spill
55-
; CHECK-NEXT: std %f11, 352(%r15) # 8-byte Spill
56-
; CHECK-NEXT: std %f12, 344(%r15) # 8-byte Spill
57-
; CHECK-NEXT: std %f13, 336(%r15) # 8-byte Spill
58-
; CHECK-NEXT: std %f14, 328(%r15) # 8-byte Spill
59-
; CHECK-NEXT: std %f15, 320(%r15) # 8-byte Spill
51+
; CHECK-NEXT: aghi %r15, -64
52+
; CHECK-NEXT: .cfi_def_cfa_offset 224
53+
; CHECK-NEXT: std %f8, 56(%r15) # 8-byte Spill
54+
; CHECK-NEXT: std %f9, 48(%r15) # 8-byte Spill
55+
; CHECK-NEXT: std %f10, 40(%r15) # 8-byte Spill
56+
; CHECK-NEXT: std %f11, 32(%r15) # 8-byte Spill
57+
; CHECK-NEXT: std %f12, 24(%r15) # 8-byte Spill
58+
; CHECK-NEXT: std %f13, 16(%r15) # 8-byte Spill
59+
; CHECK-NEXT: std %f14, 8(%r15) # 8-byte Spill
60+
; CHECK-NEXT: std %f15, 0(%r15) # 8-byte Spill
6061
; CHECK-NEXT: .cfi_offset %f8, -168
6162
; CHECK-NEXT: .cfi_offset %f9, -176
6263
; CHECK-NEXT: .cfi_offset %f10, -184
@@ -67,64 +68,44 @@ define signext i32 @func() {
6768
; CHECK-NEXT: .cfi_offset %f15, -224
6869
; CHECK-NEXT: lgrl %r1, t@GOT
6970
; CHECK-NEXT: lgrl %r2, s@GOT
70-
; CHECK-NEXT: stg %r1, 312(%r15) # 8-byte Spill
7171
; CHECK-NEXT: mvhi 0(%r1), 1
7272
; CHECK-NEXT: lgrl %r1, r@GOT
7373
; CHECK-NEXT: lgrl %r3, q@GOT
74-
; CHECK-NEXT: stg %r2, 304(%r15) # 8-byte Spill
7574
; CHECK-NEXT: mvhi 0(%r2), 1
7675
; CHECK-NEXT: lgrl %r2, p@GOT
77-
; CHECK-NEXT: stg %r1, 296(%r15) # 8-byte Spill
7876
; CHECK-NEXT: mvhi 0(%r1), 1
79-
; CHECK-NEXT: stg %r3, 288(%r15) # 8-byte Spill
8077
; CHECK-NEXT: mvhi 0(%r3), 1
8178
; CHECK-NEXT: lgrl %r1, o@GOT
82-
; CHECK-NEXT: stg %r2, 280(%r15) # 8-byte Spill
8379
; CHECK-NEXT: mvhi 0(%r2), 1
8480
; CHECK-NEXT: lgrl %r2, n@GOT
8581
; CHECK-NEXT: lgrl %r3, m@GOT
86-
; CHECK-NEXT: stg %r1, 272(%r15) # 8-byte Spill
8782
; CHECK-NEXT: mvhi 0(%r1), 1
8883
; CHECK-NEXT: lgrl %r1, l@GOT
89-
; CHECK-NEXT: stg %r2, 264(%r15) # 8-byte Spill
9084
; CHECK-NEXT: mvhi 0(%r2), 1
91-
; CHECK-NEXT: stg %r3, 256(%r15) # 8-byte Spill
9285
; CHECK-NEXT: mvhi 0(%r3), 1
9386
; CHECK-NEXT: lgrl %r2, k@GOT
94-
; CHECK-NEXT: stg %r1, 248(%r15) # 8-byte Spill
9587
; CHECK-NEXT: mvhi 0(%r1), 1
9688
; CHECK-NEXT: lgrl %r1, j@GOT
9789
; CHECK-NEXT: lgrl %r3, i@GOT
98-
; CHECK-NEXT: stg %r2, 240(%r15) # 8-byte Spill
9990
; CHECK-NEXT: mvhi 0(%r2), 1
10091
; CHECK-NEXT: lgrl %r2, h@GOT
101-
; CHECK-NEXT: stg %r1, 232(%r15) # 8-byte Spill
10292
; CHECK-NEXT: mvhi 0(%r1), 1
103-
; CHECK-NEXT: stg %r3, 224(%r15) # 8-byte Spill
10493
; CHECK-NEXT: mvhi 0(%r3), 1
10594
; CHECK-NEXT: lgrl %r1, g@GOT
106-
; CHECK-NEXT: stg %r2, 216(%r15) # 8-byte Spill
10795
; CHECK-NEXT: mvhi 0(%r2), 1
10896
; CHECK-NEXT: lgrl %r2, f@GOT
10997
; CHECK-NEXT: lgrl %r3, e@GOT
110-
; CHECK-NEXT: stg %r1, 208(%r15) # 8-byte Spill
11198
; CHECK-NEXT: mvhi 0(%r1), 1
11299
; CHECK-NEXT: lgrl %r1, d@GOT
113-
; CHECK-NEXT: stg %r2, 200(%r15) # 8-byte Spill
114100
; CHECK-NEXT: mvhi 0(%r2), 1
115-
; CHECK-NEXT: stg %r3, 192(%r15) # 8-byte Spill
116101
; CHECK-NEXT: mvhi 0(%r3), 1
117102
; CHECK-NEXT: lgrl %r2, c@GOT
118-
; CHECK-NEXT: stg %r1, 184(%r15) # 8-byte Spill
119103
; CHECK-NEXT: mvhi 0(%r1), 1
120104
; CHECK-NEXT: lgrl %r3, b@GOT
121105
; CHECK-NEXT: lgrl %r4, a@GOT
122-
; CHECK-NEXT: stg %r2, 176(%r15) # 8-byte Spill
123106
; CHECK-NEXT: mvhi 0(%r2), 1
124107
; CHECK-NEXT: lgrl %r1, buf@GOT
125-
; CHECK-NEXT: stg %r3, 168(%r15) # 8-byte Spill
126108
; CHECK-NEXT: mvhi 0(%r3), 1
127-
; CHECK-NEXT: stg %r4, 160(%r15) # 8-byte Spill
128109
; CHECK-NEXT: mvhi 0(%r4), 1
129110
; CHECK-NEXT: larl %r0, .LBB0_2
130111
; CHECK-NEXT: stg %r0, 8(%r1)
@@ -136,56 +117,56 @@ define signext i32 @func() {
136117
; CHECK-NEXT: # %entry
137118
; CHECK-NEXT: lhi %r0, 1
138119
; CHECK-NEXT: .LBB0_3: # %entry
139-
; CHECK-NEXT: lg %r1, 160(%r15) # 8-byte Reload
120+
; CHECK-NEXT: lgrl %r1, a@GOT
140121
; CHECK-NEXT: a %r0, 0(%r1)
141-
; CHECK-NEXT: lg %r1, 168(%r15) # 8-byte Reload
122+
; CHECK-NEXT: lgrl %r1, b@GOT
142123
; CHECK-NEXT: a %r0, 0(%r1)
143-
; CHECK-NEXT: lg %r1, 176(%r15) # 8-byte Reload
124+
; CHECK-NEXT: lgrl %r1, c@GOT
144125
; CHECK-NEXT: a %r0, 0(%r1)
145-
; CHECK-NEXT: lg %r1, 184(%r15) # 8-byte Reload
126+
; CHECK-NEXT: lgrl %r1, d@GOT
146127
; CHECK-NEXT: a %r0, 0(%r1)
147-
; CHECK-NEXT: lg %r1, 192(%r15) # 8-byte Reload
128+
; CHECK-NEXT: lgrl %r1, e@GOT
148129
; CHECK-NEXT: a %r0, 0(%r1)
149-
; CHECK-NEXT: lg %r1, 200(%r15) # 8-byte Reload
130+
; CHECK-NEXT: lgrl %r1, f@GOT
150131
; CHECK-NEXT: a %r0, 0(%r1)
151-
; CHECK-NEXT: lg %r1, 208(%r15) # 8-byte Reload
132+
; CHECK-NEXT: lgrl %r1, g@GOT
152133
; CHECK-NEXT: a %r0, 0(%r1)
153-
; CHECK-NEXT: lg %r1, 216(%r15) # 8-byte Reload
134+
; CHECK-NEXT: lgrl %r1, h@GOT
154135
; CHECK-NEXT: a %r0, 0(%r1)
155-
; CHECK-NEXT: lg %r1, 224(%r15) # 8-byte Reload
136+
; CHECK-NEXT: lgrl %r1, i@GOT
156137
; CHECK-NEXT: a %r0, 0(%r1)
157-
; CHECK-NEXT: lg %r1, 232(%r15) # 8-byte Reload
138+
; CHECK-NEXT: lgrl %r1, j@GOT
158139
; CHECK-NEXT: a %r0, 0(%r1)
159-
; CHECK-NEXT: lg %r1, 240(%r15) # 8-byte Reload
140+
; CHECK-NEXT: lgrl %r1, k@GOT
160141
; CHECK-NEXT: a %r0, 0(%r1)
161-
; CHECK-NEXT: lg %r1, 248(%r15) # 8-byte Reload
142+
; CHECK-NEXT: lgrl %r1, l@GOT
162143
; CHECK-NEXT: a %r0, 0(%r1)
163-
; CHECK-NEXT: lg %r1, 256(%r15) # 8-byte Reload
144+
; CHECK-NEXT: lgrl %r1, m@GOT
164145
; CHECK-NEXT: a %r0, 0(%r1)
165-
; CHECK-NEXT: lg %r1, 264(%r15) # 8-byte Reload
146+
; CHECK-NEXT: lgrl %r1, n@GOT
166147
; CHECK-NEXT: a %r0, 0(%r1)
167-
; CHECK-NEXT: lg %r1, 272(%r15) # 8-byte Reload
148+
; CHECK-NEXT: lgrl %r1, o@GOT
168149
; CHECK-NEXT: a %r0, 0(%r1)
169-
; CHECK-NEXT: lg %r1, 280(%r15) # 8-byte Reload
150+
; CHECK-NEXT: lgrl %r1, p@GOT
170151
; CHECK-NEXT: a %r0, 0(%r1)
171-
; CHECK-NEXT: lg %r1, 288(%r15) # 8-byte Reload
152+
; CHECK-NEXT: lgrl %r1, q@GOT
172153
; CHECK-NEXT: a %r0, 0(%r1)
173-
; CHECK-NEXT: lg %r1, 296(%r15) # 8-byte Reload
154+
; CHECK-NEXT: lgrl %r1, r@GOT
174155
; CHECK-NEXT: a %r0, 0(%r1)
175-
; CHECK-NEXT: lg %r1, 304(%r15) # 8-byte Reload
156+
; CHECK-NEXT: lgrl %r1, s@GOT
176157
; CHECK-NEXT: a %r0, 0(%r1)
177-
; CHECK-NEXT: lg %r1, 312(%r15) # 8-byte Reload
158+
; CHECK-NEXT: lgrl %r1, t@GOT
178159
; CHECK-NEXT: a %r0, 0(%r1)
179160
; CHECK-NEXT: lgfr %r2, %r0
180-
; CHECK-NEXT: ld %f8, 376(%r15) # 8-byte Reload
181-
; CHECK-NEXT: ld %f9, 368(%r15) # 8-byte Reload
182-
; CHECK-NEXT: ld %f10, 360(%r15) # 8-byte Reload
183-
; CHECK-NEXT: ld %f11, 352(%r15) # 8-byte Reload
184-
; CHECK-NEXT: ld %f12, 344(%r15) # 8-byte Reload
185-
; CHECK-NEXT: ld %f13, 336(%r15) # 8-byte Reload
186-
; CHECK-NEXT: ld %f14, 328(%r15) # 8-byte Reload
187-
; CHECK-NEXT: ld %f15, 320(%r15) # 8-byte Reload
188-
; CHECK-NEXT: lmg %r6, %r15, 432(%r15)
161+
; CHECK-NEXT: ld %f8, 56(%r15) # 8-byte Reload
162+
; CHECK-NEXT: ld %f9, 48(%r15) # 8-byte Reload
163+
; CHECK-NEXT: ld %f10, 40(%r15) # 8-byte Reload
164+
; CHECK-NEXT: ld %f11, 32(%r15) # 8-byte Reload
165+
; CHECK-NEXT: ld %f12, 24(%r15) # 8-byte Reload
166+
; CHECK-NEXT: ld %f13, 16(%r15) # 8-byte Reload
167+
; CHECK-NEXT: ld %f14, 8(%r15) # 8-byte Reload
168+
; CHECK-NEXT: ld %f15, 0(%r15) # 8-byte Reload
169+
; CHECK-NEXT: lmg %r6, %r15, 112(%r15)
189170
; CHECK-NEXT: br %r14
190171
entry:
191172
store i32 1, ptr @t, align 4

llvm/test/CodeGen/SystemZ/fp-move-02.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -147,18 +147,18 @@ define void @f10(double %extra) {
147147
; CHECK-NEXT: .cfi_offset %r13, -56
148148
; CHECK-NEXT: .cfi_offset %r14, -48
149149
; CHECK-NEXT: .cfi_offset %r15, -40
150-
; CHECK-NEXT: aghi %r15, -184
151-
; CHECK-NEXT: .cfi_def_cfa_offset 344
150+
; CHECK-NEXT: aghi %r15, -176
151+
; CHECK-NEXT: .cfi_def_cfa_offset 336
152152
; CHECK-NEXT: lgrl %r1, dptr@GOT
153153
; CHECK-NEXT: ldr %f1, %f0
154154
; CHECK-NEXT: adb %f1, 0(%r1)
155155
; CHECK-NEXT: ldr %f2, %f0
156156
; CHECK-NEXT: adb %f2, 0(%r1)
157157
; CHECK-NEXT: ldr %f3, %f0
158158
; CHECK-NEXT: adb %f3, 0(%r1)
159-
; CHECK-NEXT: std %f1, 176(%r15) # 8-byte Spill
160-
; CHECK-NEXT: std %f2, 168(%r15) # 8-byte Spill
161-
; CHECK-NEXT: std %f3, 160(%r15) # 8-byte Spill
159+
; CHECK-NEXT: std %f1, 168(%r15) # 8-byte Spill
160+
; CHECK-NEXT: std %f2, 160(%r15) # 8-byte Spill
161+
; CHECK-NEXT: lgdr %r11, %f3
162162
; CHECK-NEXT: ldr %f1, %f0
163163
; CHECK-NEXT: adb %f1, 0(%r1)
164164
; CHECK-NEXT: ldr %f2, %f0
@@ -176,28 +176,28 @@ define void @f10(double %extra) {
176176
; CHECK-NEXT: ldr %f2, %f0
177177
; CHECK-NEXT: adb %f2, 0(%r1)
178178
; CHECK-NEXT: adb %f0, 0(%r1)
179-
; CHECK-NEXT: lgrl %r6, iptr@GOT
180-
; CHECK-NEXT: lgdr %r13, %f1
181-
; CHECK-NEXT: lgdr %r12, %f2
182-
; CHECK-NEXT: lgdr %r11, %f0
179+
; CHECK-NEXT: lgdr %r6, %f1
180+
; CHECK-NEXT: lgdr %r13, %f2
181+
; CHECK-NEXT: lgdr %r12, %f0
183182
; CHECK-NEXT: .LBB9_1: # %loop
184183
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
185184
; CHECK-NEXT: brasl %r14, foo@PLT
186185
; CHECK-NEXT: lgr %r0, %r2
187-
; CHECK-NEXT: og %r0, 176(%r15) # 8-byte Folded Reload
188186
; CHECK-NEXT: og %r0, 168(%r15) # 8-byte Folded Reload
189187
; CHECK-NEXT: og %r0, 160(%r15) # 8-byte Folded Reload
188+
; CHECK-NEXT: ogr %r0, %r11
190189
; CHECK-NEXT: ogr %r0, %r10
191190
; CHECK-NEXT: ogr %r0, %r9
192191
; CHECK-NEXT: ogr %r0, %r8
193192
; CHECK-NEXT: ogr %r0, %r7
193+
; CHECK-NEXT: ogr %r0, %r6
194194
; CHECK-NEXT: ogr %r0, %r13
195195
; CHECK-NEXT: ogr %r0, %r12
196-
; CHECK-NEXT: ogr %r0, %r11
197-
; CHECK-NEXT: stg %r0, 0(%r6)
196+
; CHECK-NEXT: lgrl %r1, iptr@GOT
197+
; CHECK-NEXT: stg %r0, 0(%r1)
198198
; CHECK-NEXT: cgijlh %r2, 1, .LBB9_1
199199
; CHECK-NEXT: # %bb.2: # %exit
200-
; CHECK-NEXT: lmg %r6, %r15, 232(%r15)
200+
; CHECK-NEXT: lmg %r6, %r15, 224(%r15)
201201
; CHECK-NEXT: br %r14
202202
entry:
203203
%double0 = load volatile double, ptr@dptr

0 commit comments

Comments
 (0)