Skip to content

Commit 8327c2c

Browse files
authored
LAA: fix logic for MaxTargetVectorWidth (#125487)
Uses the fixed register width if scalable vectorization is not enabled (via TargetTransformInfo::enableScalableVectorization) and improves results if there are scalable vector registers, but they shouldn't be used.
1 parent 6a59d60 commit 8327c2c

File tree

3 files changed

+299
-124
lines changed

3 files changed

+299
-124
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2999,20 +2999,12 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
29992999
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
30003000
PtrRtChecking(nullptr), TheLoop(L) {
30013001
unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
3002-
if (TTI) {
3003-
TypeSize FixedWidth =
3004-
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
3005-
if (FixedWidth.isNonZero()) {
3006-
// Scale the vector width by 2 as rough estimate to also consider
3007-
// interleaving.
3008-
MaxTargetVectorWidthInBits = FixedWidth.getFixedValue() * 2;
3009-
}
3002+
if (TTI && !TTI->enableScalableVectorization())
3003+
// Scale the vector width by 2 as rough estimate to also consider
3004+
// interleaving.
3005+
MaxTargetVectorWidthInBits =
3006+
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2;
30103007

3011-
TypeSize ScalableWidth =
3012-
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_ScalableVector);
3013-
if (ScalableWidth.isNonZero())
3014-
MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
3015-
}
30163008
DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
30173009
MaxTargetVectorWidthInBits);
30183010
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);

llvm/test/Analysis/LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
12
; RUN: opt -passes='print<access-info>' -disable-output < %s 2>&1 | FileCheck %s
23

34
; This is the test case from PR26314.
4-
; When we were retrying dependence checking with memchecks only,
5-
; the loop-invariant access in the inner loop was incorrectly determined to be wrapping
5+
; When we were retrying dependence checking with memchecks only,
6+
; the loop-invariant access in the inner loop was incorrectly determined to be wrapping
67
; because it was not strided in the inner loop.
7-
8+
89
; #define Z 32
910
; typedef struct s {
1011
; int v1[Z];
@@ -21,19 +22,52 @@
2122
; }
2223
; }
2324

24-
; CHECK: function 'Test':
25-
; CHECK: .inner:
26-
; CHECK-NEXT: Memory dependences are safe with run-time checks
27-
; CHECK-NEXT: Dependences:
28-
; CHECK-NEXT: Run-time memory checks:
29-
; CHECK: Check 0:
30-
; CHECK: Check 1:
31-
3225
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3326

3427
%struct.s = type { [32 x i32], [32 x i32], [32 x [32 x i32]] }
3528

3629
define void @Test(ptr nocapture %obj, i64 %z) #0 {
30+
; CHECK-LABEL: 'Test'
31+
; CHECK-NEXT: .inner:
32+
; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 2048 bits with run-time checks
33+
; CHECK-NEXT: Dependences:
34+
; CHECK-NEXT: Run-time memory checks:
35+
; CHECK-NEXT: Check 0:
36+
; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
37+
; CHECK-NEXT: %6 = getelementptr inbounds %struct.s, ptr %obj, i64 0, i32 2, i64 %i, i64 %j
38+
; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]):
39+
; CHECK-NEXT: %2 = getelementptr inbounds %struct.s, ptr %obj, i64 0, i32 0, i64 %j
40+
; CHECK-NEXT: Check 1:
41+
; CHECK-NEXT: Comparing group ([[GRP1]]):
42+
; CHECK-NEXT: %6 = getelementptr inbounds %struct.s, ptr %obj, i64 0, i32 2, i64 %i, i64 %j
43+
; CHECK-NEXT: Against group ([[GRP3:0x[0-9a-f]+]]):
44+
; CHECK-NEXT: %1 = getelementptr inbounds %struct.s, ptr %obj, i64 0, i32 1, i64 %i
45+
; CHECK-NEXT: Grouped accesses:
46+
; CHECK-NEXT: Group [[GRP1]]:
47+
; CHECK-NEXT: (Low: {(256 + %obj)<nuw>,+,128}<nuw><%.outer.preheader> High: {(256 + (4 * %z) + %obj),+,128}<nw><%.outer.preheader>)
48+
; CHECK-NEXT: Member: {{\{\{}}(256 + %obj)<nuw>,+,128}<nuw><%.outer.preheader>,+,4}<nuw><%.inner>
49+
; CHECK-NEXT: Group [[GRP2]]:
50+
; CHECK-NEXT: (Low: %obj High: ((4 * %z) + %obj))
51+
; CHECK-NEXT: Member: {%obj,+,4}<nuw><%.inner>
52+
; CHECK-NEXT: Group [[GRP3]]:
53+
; CHECK-NEXT: (Low: {(128 + %obj)<nuw>,+,4}<nuw><%.outer.preheader> High: {(132 + %obj),+,4}<nw><%.outer.preheader>)
54+
; CHECK-NEXT: Member: {(128 + %obj)<nuw>,+,4}<nuw><%.outer.preheader>
55+
; CHECK-EMPTY:
56+
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
57+
; CHECK-NEXT: SCEV assumptions:
58+
; CHECK-EMPTY:
59+
; CHECK-NEXT: Expressions re-written:
60+
; CHECK-NEXT: .outer.preheader:
61+
; CHECK-NEXT: Report: loop is not the innermost loop
62+
; CHECK-NEXT: Dependences:
63+
; CHECK-NEXT: Run-time memory checks:
64+
; CHECK-NEXT: Grouped accesses:
65+
; CHECK-EMPTY:
66+
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
67+
; CHECK-NEXT: SCEV assumptions:
68+
; CHECK-EMPTY:
69+
; CHECK-NEXT: Expressions re-written:
70+
;
3771
br label %.outer.preheader
3872

3973

@@ -44,7 +78,7 @@ define void @Test(ptr nocapture %obj, i64 %z) #0 {
4478

4579
.exit:
4680
ret void
47-
81+
4882
.outer:
4983
%i.next = add nuw nsw i64 %i, 1
5084
%exitcond.outer = icmp eq i64 %i.next, 32
@@ -59,7 +93,7 @@ define void @Test(ptr nocapture %obj, i64 %z) #0 {
5993
%6 = getelementptr inbounds %struct.s, ptr %obj, i64 0, i32 2, i64 %i, i64 %j
6094
%7 = load i32, ptr %6
6195
%8 = add nsw i32 %5, %7
62-
store i32 %8, ptr %6
96+
store i32 %8, ptr %6
6397
%j.next = add nuw nsw i64 %j, 1
6498
%exitcond.inner = icmp eq i64 %j.next, %z
6599
br i1 %exitcond.inner, label %.outer, label %.inner

0 commit comments

Comments
 (0)