Skip to content

Commit d43a809

Browse files
committed
Revert "[LAA] Remove loop-invariant check added in 234cc40."
This reverts commit a800533. The new asserts exposed an underlying issue where the expanded bounds could wrap, causing the parts of the code to incorrectly determine that accesses do not overlap. Reproducer below based on @mstorsjo's test case. opt -passes='print<access-info>' target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define i32 @j(ptr %P, i32 %x, i32 %y) { entry: %gep.P.4 = getelementptr inbounds nuw i8, ptr %P, i32 4 %gep.P.8 = getelementptr inbounds nuw i8, ptr %P, i32 8 br label %loop loop: %1 = phi i32 [ %x, %entry ], [ %sel, %loop.latch ] %iv = phi i32 [ %y, %entry ], [ %iv.next, %loop.latch ] %gep.iv = getelementptr inbounds i64, ptr %gep.P.8, i32 %iv %l = load i32, ptr %gep.iv, align 4 %c.1 = icmp eq i32 %l, 3 br i1 %c.1, label %loop.latch, label %if.then if.then: ; preds = %for.body store i64 0, ptr %gep.iv, align 4 %l.2 = load i32, ptr %gep.P.4 br label %loop.latch loop.latch: %sel = phi i32 [ %l.2, %if.then ], [ %1, %loop ] %iv.next = add nsw i32 %iv, 1 %c.2 = icmp slt i32 %iv.next, %sel br i1 %c.2, label %loop, label %exit exit: %res = phi i32 [ %iv.next, %loop.latch ] ret i32 %res }
1 parent 7c188ab commit d43a809

File tree

4 files changed

+40
-62
lines changed

4 files changed

+40
-62
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 23 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1937,6 +1937,27 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19371937
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
19381938
<< ": " << *Dist << "\n");
19391939

1940+
// Check if we can prove that Sink only accesses memory after Src's end or
1941+
// vice versa. At the moment this is limited to cases where either source or
1942+
// sink are loop invariant to avoid compile-time increases. This is not
1943+
// required for correctness.
1944+
if (SE.isLoopInvariant(Src, InnermostLoop) ||
1945+
SE.isLoopInvariant(Sink, InnermostLoop)) {
1946+
const auto &[SrcStart, SrcEnd] =
1947+
getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds);
1948+
const auto &[SinkStart, SinkEnd] =
1949+
getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds);
1950+
if (!isa<SCEVCouldNotCompute>(SrcStart) &&
1951+
!isa<SCEVCouldNotCompute>(SrcEnd) &&
1952+
!isa<SCEVCouldNotCompute>(SinkStart) &&
1953+
!isa<SCEVCouldNotCompute>(SinkEnd)) {
1954+
if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart))
1955+
return MemoryDepChecker::Dependence::NoDep;
1956+
if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart))
1957+
return MemoryDepChecker::Dependence::NoDep;
1958+
}
1959+
}
1960+
19401961
// Need accesses with constant strides and the same direction for further
19411962
// dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
19421963
// similar code or pointer arithmetic that could wrap in the address space.
@@ -1982,45 +2003,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
19822003
const MemAccessInfo &B, unsigned BIdx) {
19832004
assert(AIdx < BIdx && "Must pass arguments in program order");
19842005

1985-
// Check if we can prove that Sink only accesses memory after Src's end or
1986-
// vice versa. The helper is used to perform the checks only on the exit paths
1987-
// where it helps to improve the analysis result.
1988-
auto CheckCompletelyBeforeOrAfter = [&]() {
1989-
auto *APtr = A.getPointer();
1990-
auto *BPtr = B.getPointer();
1991-
1992-
Type *ATy = getLoadStoreType(InstMap[AIdx]);
1993-
Type *BTy = getLoadStoreType(InstMap[BIdx]);
1994-
1995-
const SCEV *Src = PSE.getSCEV(APtr);
1996-
const SCEV *Sink = PSE.getSCEV(BPtr);
1997-
1998-
const auto &[SrcStart, SrcEnd] =
1999-
getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds);
2000-
if (isa<SCEVCouldNotCompute>(SrcStart) || isa<SCEVCouldNotCompute>(SrcEnd))
2001-
return false;
2002-
2003-
const auto &[SinkStart, SinkEnd] =
2004-
getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds);
2005-
if (isa<SCEVCouldNotCompute>(SinkStart) ||
2006-
isa<SCEVCouldNotCompute>(SinkEnd))
2007-
return false;
2008-
2009-
auto &SE = *PSE.getSE();
2010-
return SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart) ||
2011-
SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart);
2012-
};
2013-
20142006
// Get the dependence distance, stride, type size and what access writes for
20152007
// the dependence between A and B.
20162008
auto Res =
20172009
getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
2018-
if (std::holds_alternative<Dependence::DepType>(Res)) {
2019-
if (std::get<Dependence::DepType>(Res) == Dependence::Unknown &&
2020-
CheckCompletelyBeforeOrAfter())
2021-
return Dependence::NoDep;
2010+
if (std::holds_alternative<Dependence::DepType>(Res))
20222011
return std::get<Dependence::DepType>(Res);
2023-
}
20242012

20252013
auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
20262014
std::get<DepDistanceStrideAndSizeInfo>(Res);
@@ -2029,9 +2017,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20292017
std::optional<uint64_t> CommonStride =
20302018
StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt;
20312019
if (isa<SCEVCouldNotCompute>(Dist)) {
2032-
if (CheckCompletelyBeforeOrAfter())
2033-
return Dependence::NoDep;
2034-
20352020
// TODO: Relax requirement that there is a common stride to retry with
20362021
// non-constant distance dependencies.
20372022
FoundNonConstantDistanceDependence |= CommonStride.has_value();
@@ -2083,8 +2068,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20832068
// Write to the same location with the same size.
20842069
return Dependence::Forward;
20852070
}
2086-
assert(!CheckCompletelyBeforeOrAfter() &&
2087-
"unexpectedly proved no dependence");
20882071
LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
20892072
"different type sizes\n");
20902073
return Dependence::Unknown;
@@ -2106,8 +2089,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21062089
// did not set it when strides were different but there is no inherent
21072090
// reason to.
21082091
FoundNonConstantDistanceDependence |= CommonStride.has_value();
2109-
if (CheckCompletelyBeforeOrAfter())
2110-
return Dependence::NoDep;
21112092
return Dependence::Unknown;
21122093
}
21132094
if (!HasSameSize ||
@@ -2127,9 +2108,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21272108
// Below we only handle strictly positive distances.
21282109
if (MinDistance <= 0) {
21292110
FoundNonConstantDistanceDependence |= CommonStride.has_value();
2130-
if (CheckCompletelyBeforeOrAfter())
2131-
return Dependence::NoDep;
2132-
21332111
return Dependence::Unknown;
21342112
}
21352113

@@ -2146,18 +2124,13 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21462124
}
21472125

21482126
if (!HasSameSize) {
2149-
if (CheckCompletelyBeforeOrAfter())
2150-
return Dependence::NoDep;
21512127
LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
21522128
"different type sizes\n");
21532129
return Dependence::Unknown;
21542130
}
21552131

2156-
if (!CommonStride) {
2157-
if (CheckCompletelyBeforeOrAfter())
2158-
return Dependence::NoDep;
2132+
if (!CommonStride)
21592133
return Dependence::Unknown;
2160-
}
21612134

21622135
// Bail out early if passed-in parameters make vectorization not feasible.
21632136
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
@@ -2205,10 +2178,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22052178
// dependence distance and the distance may be larger at runtime (and safe
22062179
// for vectorization). Classify it as Unknown, so we re-try with runtime
22072180
// checks.
2208-
//
2209-
if (CheckCompletelyBeforeOrAfter())
2210-
return Dependence::NoDep;
2211-
22122181
return Dependence::Unknown;
22132182
}
22142183
LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
@@ -2221,8 +2190,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22212190
if (MinDistanceNeeded > MinDepDistBytes) {
22222191
LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
22232192
<< MinDistanceNeeded << " size in bytes\n");
2224-
assert(!CheckCompletelyBeforeOrAfter() &&
2225-
"unexpectedly proved no dependence");
22262193
return Dependence::Backward;
22272194
}
22282195

@@ -2270,8 +2237,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22702237
// For non-constant distances, we checked the lower bound of the dependence
22712238
// distance and the distance may be larger at runtime (and safe for
22722239
// vectorization). Classify it as Unknown, so we re-try with runtime checks.
2273-
assert(!CheckCompletelyBeforeOrAfter() &&
2274-
"unexpectedly proved no dependence");
22752240
return Dependence::Unknown;
22762241
}
22772242

llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,16 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
130130
; CHECK-LABEL: 'neg_dist_dep_type_size_equivalence'
131131
; CHECK-NEXT: loop:
132132
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
133-
; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding.
133+
; CHECK-NEXT: Unknown data dependence.
134134
; CHECK-NEXT: Dependences:
135+
; CHECK-NEXT: Unknown:
136+
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
137+
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8
138+
; CHECK-EMPTY:
139+
; CHECK-NEXT: Unknown:
140+
; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 ->
141+
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8
142+
; CHECK-EMPTY:
135143
; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
136144
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
137145
; CHECK-NEXT: store double %val, ptr %gep.iv.101.i64, align 8

llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,13 @@ exit:
4545
define void @different_non_constant_strides_known_backward_distance_larger_than_trip_count(ptr %A) {
4646
; CHECK-LABEL: 'different_non_constant_strides_known_backward_distance_larger_than_trip_count'
4747
; CHECK-NEXT: loop:
48-
; CHECK-NEXT: Memory dependences are safe
48+
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
49+
; CHECK-NEXT: Unknown data dependence.
4950
; CHECK-NEXT: Dependences:
51+
; CHECK-NEXT: Unknown:
52+
; CHECK-NEXT: %l = load i32, ptr %gep, align 4 ->
53+
; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4
54+
; CHECK-EMPTY:
5055
; CHECK-NEXT: Run-time memory checks:
5156
; CHECK-NEXT: Grouped accesses:
5257
; CHECK-EMPTY:

llvm/test/Transforms/LoopVectorize/global_alias.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ for.end: ; preds = %for.body
503503
; return Foo.A[a];
504504
; }
505505
; CHECK-LABEL: define i32 @mayAlias01(
506-
; CHECK: add nsw <4 x i32>
506+
; CHECK-NOT: add nsw <4 x i32>
507507
; CHECK: ret
508508

509509
define i32 @mayAlias01(i32 %a) nounwind {
@@ -536,7 +536,7 @@ for.end: ; preds = %for.body
536536
; return Foo.A[a];
537537
; }
538538
; CHECK-LABEL: define i32 @mayAlias02(
539-
; CHECK: add nsw <4 x i32>
539+
; CHECK-NOT: add nsw <4 x i32>
540540
; CHECK: ret
541541

542542
define i32 @mayAlias02(i32 %a) nounwind {

0 commit comments

Comments
 (0)