Skip to content

Commit a800533

Browse files
committed
[LAA] Remove loop-invariant check added in 234cc40.
234cc40 introduced a loop-invariance check to limit the compile-time impact of the newly added checks. This patch removes the restriction and avoids extra compile-time impact by sinking the check to exits where we would return an unknown dependence. This notably reduces the amount the extra checks are executed while not missing out on any improvements from them. https://llvm-compile-time-tracker.com/compare.php?from=33e7cd6ff23f6c904314d17c68dc58168fd32d09&to=7c55e66d4f31ce8262b90c119a8e84e1f9515ff1&stat=instructions:u
1 parent b0fbfbb commit a800533

File tree

4 files changed

+62
-40
lines changed

4 files changed

+62
-40
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 58 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1937,27 +1937,6 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19371937
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
19381938
<< ": " << *Dist << "\n");
19391939

1940-
// Check if we can prove that Sink only accesses memory after Src's end or
1941-
// vice versa. At the moment this is limited to cases where either source or
1942-
// sink are loop invariant to avoid compile-time increases. This is not
1943-
// required for correctness.
1944-
if (SE.isLoopInvariant(Src, InnermostLoop) ||
1945-
SE.isLoopInvariant(Sink, InnermostLoop)) {
1946-
const auto &[SrcStart, SrcEnd] =
1947-
getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds);
1948-
const auto &[SinkStart, SinkEnd] =
1949-
getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds);
1950-
if (!isa<SCEVCouldNotCompute>(SrcStart) &&
1951-
!isa<SCEVCouldNotCompute>(SrcEnd) &&
1952-
!isa<SCEVCouldNotCompute>(SinkStart) &&
1953-
!isa<SCEVCouldNotCompute>(SinkEnd)) {
1954-
if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart))
1955-
return MemoryDepChecker::Dependence::NoDep;
1956-
if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart))
1957-
return MemoryDepChecker::Dependence::NoDep;
1958-
}
1959-
}
1960-
19611940
// Need accesses with constant strides and the same direction for further
19621941
// dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
19631942
// similar code or pointer arithmetic that could wrap in the address space.
@@ -2003,12 +1982,45 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20031982
const MemAccessInfo &B, unsigned BIdx) {
20041983
assert(AIdx < BIdx && "Must pass arguments in program order");
20051984

1985+
// Check if we can prove that Sink only accesses memory after Src's end or
1986+
// vice versa. The helper is used to perform the checks only on the exit paths
1987+
// where it helps to improve the analysis result.
1988+
auto CheckCompletelyBeforeOrAfter = [&]() {
1989+
auto *APtr = A.getPointer();
1990+
auto *BPtr = B.getPointer();
1991+
1992+
Type *ATy = getLoadStoreType(InstMap[AIdx]);
1993+
Type *BTy = getLoadStoreType(InstMap[BIdx]);
1994+
1995+
const SCEV *Src = PSE.getSCEV(APtr);
1996+
const SCEV *Sink = PSE.getSCEV(BPtr);
1997+
1998+
const auto &[SrcStart, SrcEnd] =
1999+
getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds);
2000+
if (isa<SCEVCouldNotCompute>(SrcStart) || isa<SCEVCouldNotCompute>(SrcEnd))
2001+
return false;
2002+
2003+
const auto &[SinkStart, SinkEnd] =
2004+
getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds);
2005+
if (isa<SCEVCouldNotCompute>(SinkStart) ||
2006+
isa<SCEVCouldNotCompute>(SinkEnd))
2007+
return false;
2008+
2009+
auto &SE = *PSE.getSE();
2010+
return SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart) ||
2011+
SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart);
2012+
};
2013+
20062014
// Get the dependence distance, stride, type size and what access writes for
20072015
// the dependence between A and B.
20082016
auto Res =
20092017
getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
2010-
if (std::holds_alternative<Dependence::DepType>(Res))
2018+
if (std::holds_alternative<Dependence::DepType>(Res)) {
2019+
if (std::get<Dependence::DepType>(Res) == Dependence::Unknown &&
2020+
CheckCompletelyBeforeOrAfter())
2021+
return Dependence::NoDep;
20112022
return std::get<Dependence::DepType>(Res);
2023+
}
20122024

20132025
auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
20142026
std::get<DepDistanceStrideAndSizeInfo>(Res);
@@ -2017,6 +2029,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20172029
std::optional<uint64_t> CommonStride =
20182030
StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt;
20192031
if (isa<SCEVCouldNotCompute>(Dist)) {
2032+
if (CheckCompletelyBeforeOrAfter())
2033+
return Dependence::NoDep;
2034+
20202035
// TODO: Relax requirement that there is a common stride to retry with
20212036
// non-constant distance dependencies.
20222037
FoundNonConstantDistanceDependence |= CommonStride.has_value();
@@ -2068,6 +2083,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20682083
// Write to the same location with the same size.
20692084
return Dependence::Forward;
20702085
}
2086+
assert(!CheckCompletelyBeforeOrAfter() &&
2087+
"unexpectedly proved no dependence");
20712088
LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
20722089
"different type sizes\n");
20732090
return Dependence::Unknown;
@@ -2089,6 +2106,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20892106
// did not set it when strides were different but there is no inherent
20902107
// reason to.
20912108
FoundNonConstantDistanceDependence |= CommonStride.has_value();
2109+
if (CheckCompletelyBeforeOrAfter())
2110+
return Dependence::NoDep;
20922111
return Dependence::Unknown;
20932112
}
20942113
if (!HasSameSize ||
@@ -2108,6 +2127,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21082127
// Below we only handle strictly positive distances.
21092128
if (MinDistance <= 0) {
21102129
FoundNonConstantDistanceDependence |= CommonStride.has_value();
2130+
if (CheckCompletelyBeforeOrAfter())
2131+
return Dependence::NoDep;
2132+
21112133
return Dependence::Unknown;
21122134
}
21132135

@@ -2124,13 +2146,18 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21242146
}
21252147

21262148
if (!HasSameSize) {
2149+
if (CheckCompletelyBeforeOrAfter())
2150+
return Dependence::NoDep;
21272151
LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
21282152
"different type sizes\n");
21292153
return Dependence::Unknown;
21302154
}
21312155

2132-
if (!CommonStride)
2156+
if (!CommonStride) {
2157+
if (CheckCompletelyBeforeOrAfter())
2158+
return Dependence::NoDep;
21332159
return Dependence::Unknown;
2160+
}
21342161

21352162
// Bail out early if passed-in parameters make vectorization not feasible.
21362163
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
@@ -2178,6 +2205,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21782205
// dependence distance and the distance may be larger at runtime (and safe
21792206
// for vectorization). Classify it as Unknown, so we re-try with runtime
21802207
// checks.
2208+
//
2209+
if (CheckCompletelyBeforeOrAfter())
2210+
return Dependence::NoDep;
2211+
21812212
return Dependence::Unknown;
21822213
}
21832214
LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
@@ -2190,6 +2221,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21902221
if (MinDistanceNeeded > MinDepDistBytes) {
21912222
LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
21922223
<< MinDistanceNeeded << " size in bytes\n");
2224+
assert(!CheckCompletelyBeforeOrAfter() &&
2225+
"unexpectedly proved no dependence");
21932226
return Dependence::Backward;
21942227
}
21952228

@@ -2237,6 +2270,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22372270
// For non-constant distances, we checked the lower bound of the dependence
22382271
// distance and the distance may be larger at runtime (and safe for
22392272
// vectorization). Classify it as Unknown, so we re-try with runtime checks.
2273+
assert(!CheckCompletelyBeforeOrAfter() &&
2274+
"unexpectedly proved no dependence");
22402275
return Dependence::Unknown;
22412276
}
22422277

llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -130,16 +130,8 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
130130
; CHECK-LABEL: 'neg_dist_dep_type_size_equivalence'
131131
; CHECK-NEXT: loop:
132132
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
133-
; CHECK-NEXT: Unknown data dependence.
133+
; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding.
134134
; CHECK-NEXT: Dependences:
135-
; CHECK-NEXT: Unknown:
136-
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
137-
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8
138-
; CHECK-EMPTY:
139-
; CHECK-NEXT: Unknown:
140-
; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 ->
141-
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8
142-
; CHECK-EMPTY:
143135
; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
144136
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
145137
; CHECK-NEXT: store double %val, ptr %gep.iv.101.i64, align 8

llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,8 @@ exit:
4545
define void @different_non_constant_strides_known_backward_distance_larger_than_trip_count(ptr %A) {
4646
; CHECK-LABEL: 'different_non_constant_strides_known_backward_distance_larger_than_trip_count'
4747
; CHECK-NEXT: loop:
48-
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
49-
; CHECK-NEXT: Unknown data dependence.
48+
; CHECK-NEXT: Memory dependences are safe
5049
; CHECK-NEXT: Dependences:
51-
; CHECK-NEXT: Unknown:
52-
; CHECK-NEXT: %l = load i32, ptr %gep, align 4 ->
53-
; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4
54-
; CHECK-EMPTY:
5550
; CHECK-NEXT: Run-time memory checks:
5651
; CHECK-NEXT: Grouped accesses:
5752
; CHECK-EMPTY:

llvm/test/Transforms/LoopVectorize/global_alias.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ for.end: ; preds = %for.body
503503
; return Foo.A[a];
504504
; }
505505
; CHECK-LABEL: define i32 @mayAlias01(
506-
; CHECK-NOT: add nsw <4 x i32>
506+
; CHECK: add nsw <4 x i32>
507507
; CHECK: ret
508508

509509
define i32 @mayAlias01(i32 %a) nounwind {
@@ -536,7 +536,7 @@ for.end: ; preds = %for.body
536536
; return Foo.A[a];
537537
; }
538538
; CHECK-LABEL: define i32 @mayAlias02(
539-
; CHECK-NOT: add nsw <4 x i32>
539+
; CHECK: add nsw <4 x i32>
540540
; CHECK: ret
541541

542542
define i32 @mayAlias02(i32 %a) nounwind {

0 commit comments

Comments
 (0)