Skip to content

Commit

Permalink
Combine (shl (and x, imm1), imm2) to (shl x, imm2) where (~imm1 << im…
Browse files Browse the repository at this point in the history
…m2) = 0
  • Loading branch information
abhinay-anubola committed Jan 7, 2025
1 parent d0eaceb commit 6829573
Show file tree
Hide file tree
Showing 39 changed files with 1,521 additions and 1,810 deletions.
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,11 @@ class CombinerHelper {
void applyCombineTruncOfExt(MachineInstr &MI,
std::pair<Register, unsigned> &MatchInfo);

/// Transform (shl (and x, imm1, imm2) to (shl x, imm2)
/// if (~imm1 << imm2) = 0
bool matchCombineShlOfAnd(MachineInstr &MI, Register &Reg);
void applyCombineShlOfAnd(MachineInstr &MI, Register &Reg);

/// Transform trunc (shl x, K) to shl (trunc x), K
/// if K < VT.getScalarSizeInBits().
///
Expand Down
11 changes: 10 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,15 @@ def trunc_ext_fold: GICombineRule <
(apply [{ Helper.applyCombineTruncOfExt(*${root}, ${matchinfo}); }])
>;

// Under certain conditions, transform:
// Fold (shl (and x, imm1), imm2) -> (shl x, imm2)
def shl_and_fold: GICombineRule <
(defs root:$root, register_matchinfo:$matchinfo),
(match (wip_match_opcode G_SHL):$root,
[{ return Helper.matchCombineShlOfAnd(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineShlOfAnd(*${root}, ${matchinfo}); }])
>;

// Under certain conditions, transform:
// trunc (shl x, K) -> shl (trunc x), K//
// trunc ([al]shr x, K) -> (trunc ([al]shr (trunc x), K))
Expand Down Expand Up @@ -1588,7 +1597,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
bitcast_bitcast_fold, fptrunc_fpext_fold,
right_identity_neg_zero_fp,
right_identity_neg_one_fp,
combine_inttoptr_constant]>;
combine_inttoptr_constant, shl_and_fold]>;

def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
overlapping_and, mulo_by_2, mulo_by_0,
Expand Down
33 changes: 33 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2553,6 +2553,39 @@ void CombinerHelper::applyCombineTruncOfExt(
MI.eraseFromParent();
}

bool CombinerHelper::matchCombineShlOfAnd(MachineInstr &MI, Register &Reg) {
// We're trying to match the following pattern:
// %t = G_AND %x, imm1
// %root = G_SHL %t, imm2
// -->
// %root = G_SHL %x, imm2
// Where (~imm1 << imm2) = 0
assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected a G_SHL");
const Register DstReg = MI.getOperand(0).getReg();
const Register SrcReg = MI.getOperand(1).getReg();
const LLT SrcTy = MRI.getType(SrcReg);
const unsigned Size = SrcTy.getSizeInBits();

// Try to match shl (and x, imm1), imm2
int64_t ShiftImm, AndImm;
if (!mi_match(DstReg, MRI,
m_GShl(m_OneNonDBGUse(m_GAnd(m_Reg(Reg), m_ICst(AndImm))),
m_ICst(ShiftImm))))
return false;
// Check if AndImm has bits set only in positions that will be shifted out by
// ShiftImm. If any significant bits remain after the shift, the AND operation
// cannot be removed.
uint64_t Mask = ~0ULL >> (64 - Size);
return !((~AndImm << ShiftImm) & Mask);
}

void CombinerHelper::applyCombineShlOfAnd(MachineInstr &MI, Register &Reg) {
assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected a G_SHL");
Observer.changingInstr(MI);
MI.getOperand(1).setReg(Reg);
Observer.changedInstr(MI);
}

static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy) {
const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
const unsigned TruncSize = TruncTy.getScalarSizeInBits();
Expand Down
152 changes: 152 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-shl-and.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s

---
name: test_combine_shl_of_and_I16_shift_8
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: test_combine_shl_of_and_I16_shift_8
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SHL]](s16)
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
%0:_(s32) = COPY $w0
%1:_(s16) = G_CONSTANT i16 8
%2:_(s16) = G_CONSTANT i16 255
%3:_(s16) = G_TRUNC %0
%4:_(s16) = G_AND %3, %2
%5:_(s16) = G_SHL %4, %1
%6:_(s32) = G_SEXT %5
$w0 = COPY %6(s32)
...
---
# Negative test case: Here we're trying to shift less than half size.
name: test_combine_shl_of_and_I16_shift_4_neg
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: test_combine_shl_of_and_I16_shift_4_neg
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND]], [[C]](s16)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SHL]](s16)
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
%0:_(s32) = COPY $w0
%1:_(s16) = G_CONSTANT i16 4
%2:_(s16) = G_CONSTANT i16 15
%3:_(s16) = G_TRUNC %0
%4:_(s16) = G_AND %3, %2
%5:_(s16) = G_SHL %4, %1
%6:_(s32) = G_SEXT %5
$w0 = COPY %6(s32)
...
---
name: test_combine_shl_of_and_I32_shift_16
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: test_combine_shl_of_and_I32_shift_16
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
; CHECK-NEXT: $w0 = COPY [[SHL]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 16
%2:_(s32) = G_CONSTANT i32 65535
%3:_(s32) = G_AND %0, %2
%4:_(s32) = G_SHL %3, %1
$w0 = COPY %4(s32)
...
---
name: test_combine_shl_of_and_I32_shift_24
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: test_combine_shl_of_and_I32_shift_24
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
; CHECK-NEXT: $w0 = COPY [[SHL]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 24
%2:_(s32) = G_CONSTANT i32 16777215
%3:_(s32) = G_AND %0, %2
%4:_(s32) = G_SHL %3, %1
$w0 = COPY %4(s32)
...
---
# Negative test case: Here we're trying to shift less than half size.
name: test_combine_shl_of_and_I32_shift_8_neg
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: test_combine_shl_of_and_I32_shift_8_neg
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; CHECK-NEXT: $w0 = COPY [[SHL]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 8
%2:_(s32) = G_CONSTANT i32 255
%3:_(s32) = G_AND %0, %2
%4:_(s32) = G_SHL %3, %1
$w0 = COPY %4(s32)
...
---
# Negative test case: The AND and SHL operations cannot be combined because imm1 (255) and imm2 (16) do not satisfy the condition (~imm1 << imm2) = 0
name: test_combine_shl_of_and_I32_shift_16_neg
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: test_combine_shl_of_and_I32_shift_16_neg
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; CHECK-NEXT: $w0 = COPY [[SHL]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 16
%2:_(s32) = G_CONSTANT i32 255
%3:_(s32) = G_AND %0, %2
%4:_(s32) = G_SHL %3, %1
$w0 = COPY %4(s32)
...
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AIE/aie2/bfloat16_to_float.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ define dso_local noundef float @bfloat16_to_float_test(%class.bfloat16 %bf.coerc
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nopx // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: mova r0, #16; extend.u16 r1, r1 // Delay Slot 2
; CHECK-NEXT: mova r0, #16 // Delay Slot 2
; CHECK-NEXT: lshl r0, r1, r0 // Delay Slot 1
entry:
%bf.coerce.fca.0.extract = extractvalue %class.bfloat16 %bf.coerce, 0
Expand Down
29 changes: 11 additions & 18 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -262,11 +262,10 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
; GFX7-LABEL: s_add_v2i16_neg_inline_imm_splat:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_sub_i32 s1, s1, 64
; GFX7-NEXT: s_sub_i32 s0, s0, 64
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_add_i32 s1, s1, 0xffc00000
; GFX7-NEXT: s_or_b32 s0, s0, s1
; GFX7-NEXT: ; return to shader part epilog
;
Expand Down Expand Up @@ -304,11 +303,10 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
; GFX7-LABEL: s_add_v2i16_neg_inline_imm_lo:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_add_i32 s1, s1, 4
; GFX7-NEXT: s_sub_i32 s0, s0, 64
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_add_i32 s1, s1, 0x40000
; GFX7-NEXT: s_or_b32 s0, s0, s1
; GFX7-NEXT: ; return to shader part epilog
;
Expand Down Expand Up @@ -346,11 +344,10 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
; GFX7-LABEL: s_add_v2i16_neg_inline_imm_hi:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_sub_i32 s1, s1, 64
; GFX7-NEXT: s_add_i32 s0, s0, 4
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_add_i32 s1, s1, 0xffc00000
; GFX7-NEXT: s_or_b32 s0, s0, s1
; GFX7-NEXT: ; return to shader part epilog
;
Expand Down Expand Up @@ -388,9 +385,8 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
; GFX7-LABEL: s_add_v2i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_add_i32 s1, s1, s3
; GFX7-NEXT: s_add_i32 s0, s0, s2
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
; GFX7-NEXT: s_add_i32 s1, s1, s3
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_or_b32 s0, s0, s1
Expand Down Expand Up @@ -439,9 +435,8 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg
; GFX7-NEXT: s_or_b32 s0, s1, s0
; GFX7-NEXT: s_xor_b32 s0, s0, 0x80008000
; GFX7-NEXT: s_lshr_b32 s1, s0, 16
; GFX7-NEXT: s_add_i32 s1, s1, s3
; GFX7-NEXT: s_add_i32 s0, s0, s2
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
; GFX7-NEXT: s_add_i32 s1, s1, s3
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_or_b32 s0, s0, s1
Expand Down Expand Up @@ -495,9 +490,8 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg
; GFX7-NEXT: s_or_b32 s2, s3, s2
; GFX7-NEXT: s_xor_b32 s2, s2, 0x80008000
; GFX7-NEXT: s_lshr_b32 s3, s2, 16
; GFX7-NEXT: s_add_i32 s1, s1, s3
; GFX7-NEXT: s_add_i32 s0, s0, s2
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
; GFX7-NEXT: s_add_i32 s1, s1, s3
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_or_b32 s0, s0, s1
Expand Down Expand Up @@ -556,11 +550,10 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x ha
; GFX7-NEXT: s_xor_b32 s1, s1, 0x80008000
; GFX7-NEXT: s_lshr_b32 s2, s0, 16
; GFX7-NEXT: s_lshr_b32 s3, s1, 16
; GFX7-NEXT: s_add_i32 s2, s2, s3
; GFX7-NEXT: s_add_i32 s0, s0, s1
; GFX7-NEXT: s_and_b32 s1, s2, 0xffff
; GFX7-NEXT: s_add_i32 s2, s2, s3
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_lshl_b32 s1, s2, 16
; GFX7-NEXT: s_or_b32 s0, s0, s1
; GFX7-NEXT: ; return to shader part epilog
;
Expand Down
10 changes: 1 addition & 9 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -688,11 +688,9 @@ define <2 x i16> @v_andn2_v2i16(<2 x i16> %src0, <2 x i16> %src1) {
define amdgpu_ps i48 @s_andn2_v3i16(<3 x i16> inreg %src0, <3 x i16> inreg %src1) {
; GFX6-LABEL: s_andn2_v3i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
; GFX6-NEXT: s_mov_b32 s0, -1
; GFX6-NEXT: s_and_b32 s5, s5, 0xffff
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_and_b32 s3, s3, 0xffff
; GFX6-NEXT: s_mov_b32 s1, 0xffff
; GFX6-NEXT: s_or_b32 s6, s5, s6
; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
Expand Down Expand Up @@ -741,11 +739,9 @@ define amdgpu_ps i48 @s_andn2_v3i16(<3 x i16> inreg %src0, <3 x i16> inreg %src1
define amdgpu_ps i48 @s_andn2_v3i16_commute(<3 x i16> inreg %src0, <3 x i16> inreg %src1) {
; GFX6-LABEL: s_andn2_v3i16_commute:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
; GFX6-NEXT: s_mov_b32 s0, -1
; GFX6-NEXT: s_and_b32 s5, s5, 0xffff
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_and_b32 s3, s3, 0xffff
; GFX6-NEXT: s_mov_b32 s1, 0xffff
; GFX6-NEXT: s_or_b32 s6, s5, s6
; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
Expand Down Expand Up @@ -794,17 +790,15 @@ define amdgpu_ps i48 @s_andn2_v3i16_commute(<3 x i16> inreg %src0, <3 x i16> inr
define amdgpu_ps { i48, i48 } @s_andn2_v3i16_multi_use(<3 x i16> inreg %src0, <3 x i16> inreg %src1) {
; GFX6-LABEL: s_andn2_v3i16_multi_use:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
; GFX6-NEXT: s_mov_b32 s0, -1
; GFX6-NEXT: s_and_b32 s5, s5, 0xffff
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_mov_b32 s1, 0xffff
; GFX6-NEXT: s_or_b32 s6, s5, s6
; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
; GFX6-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1]
; GFX6-NEXT: s_and_b32 s1, s3, 0xffff
; GFX6-NEXT: s_and_b32 s0, s2, 0xffff
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s4, 0xffff
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
Expand Down Expand Up @@ -866,10 +860,8 @@ define <3 x i16> @v_andn2_v3i16(<3 x i16> %src0, <3 x i16> %src1) {
; GFX6-LABEL: v_andn2_v3i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
Expand Down
Loading

0 comments on commit 6829573

Please sign in to comment.