Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Clang][BasicAA][AIE2] Enable full PHI AA for AIE #103

Merged
merged 1 commit into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/lib/Driver/ToolChains/AIE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ void AIEToolChain::addClangTargetOptions(
// Make sure to perform most optimizations before mandatory inlinings,
// otherwise noalias attributes can get lost and hurt AA results.
CC1Args.append({"-mllvm", "-mandatory-inlining-before-opt=false"});

// Perform complete AA analysis on phi nodes.
CC1Args.append({"-mllvm", "-basic-aa-full-phi-analysis=true"});

// Extend the max limit of the search depth in BasicAA
CC1Args.append({"-mllvm", "-basic-aa-max-lookup-search-depth=10"});
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we (ie in the average AIE application) wouldn't really notice it if we make it 20, (or 50?)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By looking to the statistics I saw this limit being exhausted for one benchmark.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So 20 would be better?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @martien-de-jong, I believe that 10 is enough for now, considering our benchmarks that are quite complex.

}

// Avoid using newer dwarf versions, as the simulator doesn't understand newer
Expand Down
19 changes: 14 additions & 5 deletions llvm/lib/Analysis/BasicAliasAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its
// affiliates
//
//===----------------------------------------------------------------------===//
//
// This file defines the primary stateless implementation of the
Expand Down Expand Up @@ -68,20 +71,26 @@ using namespace llvm;
static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden,
cl::init(true));

/// Enable full analysis of PHI nodes.
static cl::opt<bool> EnableFullPHIAnalysis("basic-aa-full-phi-analysis",
cl::Hidden, cl::init(false));

andcarminati marked this conversation as resolved.
Show resolved Hide resolved
static cl::opt<bool> EnableSeparateStorageAnalysis("basic-aa-separate-storage",
cl::Hidden, cl::init(true));

// The max limit of the search depth in DecomposeGEPExpression() and
// getUnderlyingObject().
static cl::opt<unsigned>
MaxLookupSearchDepth("basic-aa-max-lookup-search-depth", cl::Hidden,
cl::init(6));

/// SearchLimitReached / SearchTimes shows how often the limit of
/// to decompose GEPs is reached. It will affect the precision
/// of basic alias analysis.
STATISTIC(SearchLimitReached, "Number of times the limit to "
"decompose GEPs is reached");
STATISTIC(SearchTimes, "Number of times a GEP is decomposed");

// The max limit of the search depth in DecomposeGEPExpression() and
// getUnderlyingObject().
static const unsigned MaxLookupSearchDepth = 6;

bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv) {
// We don't care if this analysis itself is preserved, it has no state. But
Expand Down Expand Up @@ -1389,7 +1398,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
if (PV1 == PN)
continue;

if (isa<PHINode>(PV1)) {
if (!(EnableRecPhiAnalysis && EnableFullPHIAnalysis) && isa<PHINode>(PV1)) {
if (OnePhi && OnePhi != PV1) {
// To control potential compile time explosion, we choose to be
// conserviate when we have more than one Phi input. It is important
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/Analysis/BasicAA/phi-aa.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
; RUN: opt < %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
; RUN: opt < %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -basic-aa-full-phi-analysis \
; RUN: -disable-output 2>&1 | FileCheck %s --check-prefix=FULL-PHI

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

Expand Down Expand Up @@ -85,6 +88,9 @@ declare void @inc(ptr)
; CHECK: MayAlias: i32* %val1, i32* @Y
; CHECK: MayAlias: i32* %val2, i32* @Y
; CHECK: MayAlias: i32* %val3, i32* @Y
; FULL-PHI: NoAlias: i32* %val1, i32* @Y
; FULL-PHI: NoAlias: i32* %val2, i32* @Y
; FULL-PHI: NoAlias: i32* %val3, i32* @Y
define void @loop_phi_chain(i32 %a, i32 %b, i32 %c) {
entry:
br label %loop1
Expand Down
126 changes: 126 additions & 0 deletions llvm/test/Analysis/BasicAA/phi-full-aa-diamond.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
;
; This file is licensed under the Apache License v2.0 with LLVM Exceptions.
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
; (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
;
; RUN: opt < %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
; RUN: opt < %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -basic-aa-full-phi-analysis \
; RUN: -disable-output 2>&1 | FileCheck %s --check-prefix=FULL-PHI

@X = common global i32 0
@Y = common global i32 0

; The goal of this set of tests is to test the differences in the behavior of
; the BasicAA with and without full phi analysis in a dual diamond scenario.


; Test 1: The basic shape of the test is the following (simp. without phis):

; if (...) {
; if (...) {
; %P = getelementptr @X ...
; } else {
; %P = getelementptr @X ...
; }
; } else {
; %P = getelementptr @X ...
; }
gbossu marked this conversation as resolved.
Show resolved Hide resolved
;
; // Use of %P and @Y
;

; CHECK: MayAlias: i32* %P, i32* @Y
; FULL-PHI: NoAlias: i32* %P, i32* @Y
define void @test1(i32 %cond) nounwind {
entry:
%"alloca point" = bitcast i32 0 to i32
%tmp = icmp ne i32 %cond, 0
br i1 %tmp, label %bbtrue, label %bbfalse

bbtrue:
%tmp1 = icmp ne i32 %cond, 2
br i1 %tmp, label %bbtruetrue, label %bbtruefalse

bbtruetrue:
%p1 = getelementptr i32, ptr @X, i64 0
br label %bbtrueend

bbtruefalse:
%p2 = getelementptr i32, ptr @X, i64 0
br label %bbtrueend

bbtrueend:
%p3 = phi ptr [ %p1, %bbtruetrue ], [ %p2, %bbtruefalse ]
br label %bblast

bbfalse:
%p4 = getelementptr i32, ptr @X, i64 0
br label %bblast

bblast:
%P = phi ptr [ %p3, %bbtrueend ], [ %p4, %bbfalse ]
%tmp2 = load i32, ptr @Y, align 4
store i32 123, ptr %P, align 4
%tmp3 = load i32, ptr @Y, align 4
br label %return

return:
ret void
}


; Test 2: The basic shape of the test is the following (simp. without phis):

; if (...) {
; if (...) {
; %P = getelementptr @Y ... (cause of MayAlias)
; } else {
; %P = getelementptr @X ...
; }
; } else {
; %P = getelementptr @X ...
; }
;
; // Use of %P and @Y
;

; CHECK: MayAlias: i32* %P, i32* @Y
; FULL-PHI: MayAlias: i32* %P, i32* @Y
define void @test2(i32 %cond) nounwind {
entry:
%"alloca point" = bitcast i32 0 to i32
%tmp = icmp ne i32 %cond, 0
br i1 %tmp, label %bbtrue, label %bbfalse

bbtrue:
%tmp1 = icmp ne i32 %cond, 2
br i1 %tmp, label %bbtruetrue, label %bbtruefalse

bbtruetrue:
%p1 = getelementptr i32, ptr @Y, i64 0
br label %bbtrueend

bbtruefalse:
%p2 = getelementptr i32, ptr @X, i64 0
br label %bbtrueend

bbtrueend:
%p3 = phi ptr [ %p1, %bbtruetrue ], [ %p2, %bbtruefalse ]
br label %bblast

bbfalse:
%p4 = getelementptr i32, ptr @X, i64 0
br label %bblast

bblast:
%P = phi ptr [ %p3, %bbtrueend ], [ %p4, %bbfalse ]
%tmp2 = load i32, ptr @Y, align 4
store i32 123, ptr %P, align 4
%tmp3 = load i32, ptr @Y, align 4
br label %return

return:
ret void
}
Loading