Skip to content

Commit c880e19

Browse files
Combine (shl (and x, imm1), imm2) to (shl x, imm2)
1 parent fcfddf3 commit c880e19

39 files changed

+1524
-1810
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,11 @@ class CombinerHelper {
399399
void applyCombineTruncOfExt(MachineInstr &MI,
400400
std::pair<Register, unsigned> &MatchInfo);
401401

402+
/// Transform (shl (and x, imm1, imm2) to (shl x, imm2)
403+
/// if (~imm1 << imm2) = 0
404+
bool matchCombineShlOfAnd(MachineInstr &MI, Register &Reg);
405+
void applyCombineShlOfAnd(MachineInstr &MI, Register &Reg);
406+
402407
/// Transform trunc (shl x, K) to shl (trunc x), K
403408
/// if K < VT.getScalarSizeInBits().
404409
///

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,15 @@ def trunc_ext_fold: GICombineRule <
822822
(apply [{ Helper.applyCombineTruncOfExt(*${root}, ${matchinfo}); }])
823823
>;
824824

825+
// Under certain conditions, transform:
826+
// Fold (shl (and x, imm1), imm2) -> (shl x, imm2)
827+
def shl_and_fold: GICombineRule <
828+
(defs root:$root, register_matchinfo:$matchinfo),
829+
(match (wip_match_opcode G_SHL):$root,
830+
[{ return Helper.matchCombineShlOfAnd(*${root}, ${matchinfo}); }]),
831+
(apply [{ Helper.applyCombineShlOfAnd(*${root}, ${matchinfo}); }])
832+
>;
833+
825834
// Under certain conditions, transform:
826835
// trunc (shl x, K) -> shl (trunc x), K//
827836
// trunc ([al]shr x, K) -> (trunc ([al]shr (trunc x), K))
@@ -1588,7 +1597,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
15881597
bitcast_bitcast_fold, fptrunc_fpext_fold,
15891598
right_identity_neg_zero_fp,
15901599
right_identity_neg_one_fp,
1591-
combine_inttoptr_constant]>;
1600+
combine_inttoptr_constant, shl_and_fold]>;
15921601

15931602
def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
15941603
overlapping_and, mulo_by_2, mulo_by_0,

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2553,6 +2553,42 @@ void CombinerHelper::applyCombineTruncOfExt(
25532553
MI.eraseFromParent();
25542554
}
25552555

2556+
bool CombinerHelper::matchCombineShlOfAnd(MachineInstr &MI, Register &Reg) {
2557+
// We're trying to match the following pattern:
2558+
// %t = G_AND %x, imm1
2559+
// %root = G_SHL %t, imm2
2560+
// -->
2561+
// %root = G_SHL %x, imm2
2562+
// Where (~imm1 << imm2) = 0
2563+
assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected a G_SHL");
2564+
const Register DstReg = MI.getOperand(0).getReg();
2565+
const Register SrcReg = MI.getOperand(1).getReg();
2566+
const LLT SrcTy = MRI.getType(SrcReg);
2567+
const unsigned Size = SrcTy.getSizeInBits();
2568+
2569+
// Try to match shl (and x, imm1), imm2
2570+
int64_t ShiftImm, AndImm;
2571+
if (!mi_match(DstReg, MRI,
2572+
m_GShl(m_OneNonDBGUse(m_GAnd(m_Reg(Reg), m_ICst(AndImm))),
2573+
m_ICst(ShiftImm))))
2574+
return false;
2575+
// Check if AndImm has bits set only in positions that will be shifted out by
2576+
// ShiftImm. If any significant bits remain after the shift, the AND operation
2577+
// cannot be removed.
2578+
uint64_t Mask = ~0ULL >> (64 - Size);
2579+
if (((~AndImm << ShiftImm) & Mask) != 0)
2580+
return false;
2581+
2582+
return true;
2583+
}
2584+
2585+
void CombinerHelper::applyCombineShlOfAnd(MachineInstr &MI, Register &Reg) {
2586+
assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected a G_SHL");
2587+
Observer.changingInstr(MI);
2588+
MI.getOperand(1).setReg(Reg);
2589+
Observer.changedInstr(MI);
2590+
}
2591+
25562592
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy) {
25572593
const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
25582594
const unsigned TruncSize = TruncTy.getScalarSizeInBits();
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
8+
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
9+
10+
---
11+
name: test_combine_shl_of_and_I16_shift_8
12+
legalized: true
13+
tracksRegLiveness: true
14+
body: |
15+
bb.0.entry:
16+
liveins: $w0
17+
; CHECK-LABEL: name: test_combine_shl_of_and_I16_shift_8
18+
; CHECK: liveins: $w0
19+
; CHECK-NEXT: {{ $}}
20+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
21+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
22+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
23+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16)
24+
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SHL]](s16)
25+
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
26+
%0:_(s32) = COPY $w0
27+
%1:_(s16) = G_CONSTANT i16 8
28+
%2:_(s16) = G_CONSTANT i16 255
29+
%3:_(s16) = G_TRUNC %0
30+
%4:_(s16) = G_AND %3, %2
31+
%5:_(s16) = G_SHL %4, %1
32+
%6:_(s32) = G_SEXT %5
33+
$w0 = COPY %6(s32)
34+
...
35+
---
36+
# Negative test case: Here we're trying to shift less than half size.
37+
name: test_combine_shl_of_and_I16_shift_4_neg
38+
legalized: true
39+
tracksRegLiveness: true
40+
body: |
41+
bb.0.entry:
42+
liveins: $w0
43+
; CHECK-LABEL: name: test_combine_shl_of_and_I16_shift_4_neg
44+
; CHECK: liveins: $w0
45+
; CHECK-NEXT: {{ $}}
46+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
47+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
48+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
49+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
50+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
51+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND]], [[C]](s16)
52+
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SHL]](s16)
53+
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
54+
%0:_(s32) = COPY $w0
55+
%1:_(s16) = G_CONSTANT i16 4
56+
%2:_(s16) = G_CONSTANT i16 15
57+
%3:_(s16) = G_TRUNC %0
58+
%4:_(s16) = G_AND %3, %2
59+
%5:_(s16) = G_SHL %4, %1
60+
%6:_(s32) = G_SEXT %5
61+
$w0 = COPY %6(s32)
62+
...
63+
---
64+
name: test_combine_shl_of_and_I32_shift_16
65+
legalized: true
66+
tracksRegLiveness: true
67+
body: |
68+
bb.0.entry:
69+
liveins: $w0
70+
; CHECK-LABEL: name: test_combine_shl_of_and_I32_shift_16
71+
; CHECK: liveins: $w0
72+
; CHECK-NEXT: {{ $}}
73+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
74+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
75+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
76+
; CHECK-NEXT: $w0 = COPY [[SHL]](s32)
77+
%0:_(s32) = COPY $w0
78+
%1:_(s32) = G_CONSTANT i32 16
79+
%2:_(s32) = G_CONSTANT i32 65535
80+
%3:_(s32) = G_AND %0, %2
81+
%4:_(s32) = G_SHL %3, %1
82+
$w0 = COPY %4(s32)
83+
...
84+
---
85+
name: test_combine_shl_of_and_I32_shift_24
86+
legalized: true
87+
tracksRegLiveness: true
88+
body: |
89+
bb.0.entry:
90+
liveins: $w0
91+
; CHECK-LABEL: name: test_combine_shl_of_and_I32_shift_24
92+
; CHECK: liveins: $w0
93+
; CHECK-NEXT: {{ $}}
94+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
95+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
96+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
97+
; CHECK-NEXT: $w0 = COPY [[SHL]](s32)
98+
%0:_(s32) = COPY $w0
99+
%1:_(s32) = G_CONSTANT i32 24
100+
%2:_(s32) = G_CONSTANT i32 16777215
101+
%3:_(s32) = G_AND %0, %2
102+
%4:_(s32) = G_SHL %3, %1
103+
$w0 = COPY %4(s32)
104+
...
105+
---
106+
# Negative test case: Here we're trying to shift less than half size.
107+
name: test_combine_shl_of_and_I32_shift_8_neg
108+
legalized: true
109+
tracksRegLiveness: true
110+
body: |
111+
bb.0.entry:
112+
liveins: $w0
113+
; CHECK-LABEL: name: test_combine_shl_of_and_I32_shift_8_neg
114+
; CHECK: liveins: $w0
115+
; CHECK-NEXT: {{ $}}
116+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
117+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
118+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
119+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
120+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
121+
; CHECK-NEXT: $w0 = COPY [[SHL]](s32)
122+
%0:_(s32) = COPY $w0
123+
%1:_(s32) = G_CONSTANT i32 8
124+
%2:_(s32) = G_CONSTANT i32 255
125+
%3:_(s32) = G_AND %0, %2
126+
%4:_(s32) = G_SHL %3, %1
127+
$w0 = COPY %4(s32)
128+
...
129+
---
130+
# Negative test case: Here mask 255 is not of shift value i.e. 16 bits.
131+
name: test_combine_shl_of_and_I32_shift_16_neg
132+
legalized: true
133+
tracksRegLiveness: true
134+
body: |
135+
bb.0.entry:
136+
liveins: $w0
137+
; CHECK-LABEL: name: test_combine_shl_of_and_I32_shift_16_neg
138+
; CHECK: liveins: $w0
139+
; CHECK-NEXT: {{ $}}
140+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
141+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
142+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
143+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
144+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
145+
; CHECK-NEXT: $w0 = COPY [[SHL]](s32)
146+
%0:_(s32) = COPY $w0
147+
%1:_(s32) = G_CONSTANT i32 16
148+
%2:_(s32) = G_CONSTANT i32 255
149+
%3:_(s32) = G_AND %0, %2
150+
%4:_(s32) = G_SHL %3, %1
151+
$w0 = COPY %4(s32)
152+
...

llvm/test/CodeGen/AIE/aie2/bfloat16_to_float.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ define dso_local noundef float @bfloat16_to_float_test(%class.bfloat16 %bf.coerc
1414
; CHECK: .p2align 4
1515
; CHECK-NEXT: // %bb.0: // %entry
1616
; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
17-
; CHECK-NEXT: nop // Delay Slot 5
17+
; CHECK-NEXT: nopx // Delay Slot 5
1818
; CHECK-NEXT: nop // Delay Slot 4
1919
; CHECK-NEXT: nop // Delay Slot 3
20-
; CHECK-NEXT: mova r0, #16; extend.u16 r1, r1 // Delay Slot 2
20+
; CHECK-NEXT: mova r0, #16 // Delay Slot 2
2121
; CHECK-NEXT: lshl r0, r1, r0 // Delay Slot 1
2222
entry:
2323
%bf.coerce.fca.0.extract = extractvalue %class.bfloat16 %bf.coerce, 0

llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -262,11 +262,10 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
262262
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
263263
; GFX7-LABEL: s_add_v2i16_neg_inline_imm_splat:
264264
; GFX7: ; %bb.0:
265-
; GFX7-NEXT: s_sub_i32 s1, s1, 64
266265
; GFX7-NEXT: s_sub_i32 s0, s0, 64
267-
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
268-
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
269266
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
267+
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
268+
; GFX7-NEXT: s_add_i32 s1, s1, 0xffc00000
270269
; GFX7-NEXT: s_or_b32 s0, s0, s1
271270
; GFX7-NEXT: ; return to shader part epilog
272271
;
@@ -304,11 +303,10 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
304303
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
305304
; GFX7-LABEL: s_add_v2i16_neg_inline_imm_lo:
306305
; GFX7: ; %bb.0:
307-
; GFX7-NEXT: s_add_i32 s1, s1, 4
308306
; GFX7-NEXT: s_sub_i32 s0, s0, 64
309-
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
310-
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
311307
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
308+
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
309+
; GFX7-NEXT: s_add_i32 s1, s1, 0x40000
312310
; GFX7-NEXT: s_or_b32 s0, s0, s1
313311
; GFX7-NEXT: ; return to shader part epilog
314312
;
@@ -346,11 +344,10 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
346344
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
347345
; GFX7-LABEL: s_add_v2i16_neg_inline_imm_hi:
348346
; GFX7: ; %bb.0:
349-
; GFX7-NEXT: s_sub_i32 s1, s1, 64
350347
; GFX7-NEXT: s_add_i32 s0, s0, 4
351-
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
352-
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
353348
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
349+
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
350+
; GFX7-NEXT: s_add_i32 s1, s1, 0xffc00000
354351
; GFX7-NEXT: s_or_b32 s0, s0, s1
355352
; GFX7-NEXT: ; return to shader part epilog
356353
;
@@ -388,9 +385,8 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
388385
define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
389386
; GFX7-LABEL: s_add_v2i16:
390387
; GFX7: ; %bb.0:
391-
; GFX7-NEXT: s_add_i32 s1, s1, s3
392388
; GFX7-NEXT: s_add_i32 s0, s0, s2
393-
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
389+
; GFX7-NEXT: s_add_i32 s1, s1, s3
394390
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
395391
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
396392
; GFX7-NEXT: s_or_b32 s0, s0, s1
@@ -439,9 +435,8 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg
439435
; GFX7-NEXT: s_or_b32 s0, s1, s0
440436
; GFX7-NEXT: s_xor_b32 s0, s0, 0x80008000
441437
; GFX7-NEXT: s_lshr_b32 s1, s0, 16
442-
; GFX7-NEXT: s_add_i32 s1, s1, s3
443438
; GFX7-NEXT: s_add_i32 s0, s0, s2
444-
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
439+
; GFX7-NEXT: s_add_i32 s1, s1, s3
445440
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
446441
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
447442
; GFX7-NEXT: s_or_b32 s0, s0, s1
@@ -495,9 +490,8 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg
495490
; GFX7-NEXT: s_or_b32 s2, s3, s2
496491
; GFX7-NEXT: s_xor_b32 s2, s2, 0x80008000
497492
; GFX7-NEXT: s_lshr_b32 s3, s2, 16
498-
; GFX7-NEXT: s_add_i32 s1, s1, s3
499493
; GFX7-NEXT: s_add_i32 s0, s0, s2
500-
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
494+
; GFX7-NEXT: s_add_i32 s1, s1, s3
501495
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
502496
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
503497
; GFX7-NEXT: s_or_b32 s0, s0, s1
@@ -556,11 +550,10 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x ha
556550
; GFX7-NEXT: s_xor_b32 s1, s1, 0x80008000
557551
; GFX7-NEXT: s_lshr_b32 s2, s0, 16
558552
; GFX7-NEXT: s_lshr_b32 s3, s1, 16
559-
; GFX7-NEXT: s_add_i32 s2, s2, s3
560553
; GFX7-NEXT: s_add_i32 s0, s0, s1
561-
; GFX7-NEXT: s_and_b32 s1, s2, 0xffff
554+
; GFX7-NEXT: s_add_i32 s2, s2, s3
562555
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
563-
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
556+
; GFX7-NEXT: s_lshl_b32 s1, s2, 16
564557
; GFX7-NEXT: s_or_b32 s0, s0, s1
565558
; GFX7-NEXT: ; return to shader part epilog
566559
;

llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -688,11 +688,9 @@ define <2 x i16> @v_andn2_v2i16(<2 x i16> %src0, <2 x i16> %src1) {
688688
define amdgpu_ps i48 @s_andn2_v3i16(<3 x i16> inreg %src0, <3 x i16> inreg %src1) {
689689
; GFX6-LABEL: s_andn2_v3i16:
690690
; GFX6: ; %bb.0:
691-
; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
692691
; GFX6-NEXT: s_mov_b32 s0, -1
693692
; GFX6-NEXT: s_and_b32 s5, s5, 0xffff
694693
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
695-
; GFX6-NEXT: s_and_b32 s3, s3, 0xffff
696694
; GFX6-NEXT: s_mov_b32 s1, 0xffff
697695
; GFX6-NEXT: s_or_b32 s6, s5, s6
698696
; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
@@ -741,11 +739,9 @@ define amdgpu_ps i48 @s_andn2_v3i16(<3 x i16> inreg %src0, <3 x i16> inreg %src1
741739
define amdgpu_ps i48 @s_andn2_v3i16_commute(<3 x i16> inreg %src0, <3 x i16> inreg %src1) {
742740
; GFX6-LABEL: s_andn2_v3i16_commute:
743741
; GFX6: ; %bb.0:
744-
; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
745742
; GFX6-NEXT: s_mov_b32 s0, -1
746743
; GFX6-NEXT: s_and_b32 s5, s5, 0xffff
747744
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
748-
; GFX6-NEXT: s_and_b32 s3, s3, 0xffff
749745
; GFX6-NEXT: s_mov_b32 s1, 0xffff
750746
; GFX6-NEXT: s_or_b32 s6, s5, s6
751747
; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
@@ -794,17 +790,15 @@ define amdgpu_ps i48 @s_andn2_v3i16_commute(<3 x i16> inreg %src0, <3 x i16> inr
794790
define amdgpu_ps { i48, i48 } @s_andn2_v3i16_multi_use(<3 x i16> inreg %src0, <3 x i16> inreg %src1) {
795791
; GFX6-LABEL: s_andn2_v3i16_multi_use:
796792
; GFX6: ; %bb.0:
797-
; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
798793
; GFX6-NEXT: s_mov_b32 s0, -1
799794
; GFX6-NEXT: s_and_b32 s5, s5, 0xffff
800795
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
801796
; GFX6-NEXT: s_mov_b32 s1, 0xffff
802797
; GFX6-NEXT: s_or_b32 s6, s5, s6
803798
; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
804799
; GFX6-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1]
805-
; GFX6-NEXT: s_and_b32 s1, s3, 0xffff
806800
; GFX6-NEXT: s_and_b32 s0, s2, 0xffff
807-
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
801+
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
808802
; GFX6-NEXT: s_or_b32 s0, s0, s1
809803
; GFX6-NEXT: s_and_b32 s1, s4, 0xffff
810804
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
@@ -866,10 +860,8 @@ define <3 x i16> @v_andn2_v3i16(<3 x i16> %src0, <3 x i16> %src1) {
866860
; GFX6-LABEL: v_andn2_v3i16:
867861
; GFX6: ; %bb.0:
868862
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
869-
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
870863
; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
871864
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
872-
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
873865
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
874866
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
875867
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1

0 commit comments

Comments
 (0)