Skip to content

Commit 933f492

Browse files
authored
[LAA] Support different strides & non constant dep distances using SCEV. (#88039)
Extend LoopAccessAnalysis to support different strides and as a consequence non-constant distances between dependences using SCEV to reason about the direction of the dependence. In multiple places, logic to rule out dependences using the stride has been updated to only be used if StrideA == StrideB, i.e. there's a common stride. We now also may bail out at multiple places where we may have to set FoundNonConstantDistanceDependence. This is done when we need to bail out and the distance is not constant to preserve original behavior. Fixes #87336 PR: #88039
1 parent f2d9950 commit 933f492

File tree

4 files changed

+102
-59
lines changed

4 files changed

+102
-59
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 91 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1920,20 +1920,21 @@ isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
19201920
namespace {
19211921
struct DepDistanceStrideAndSizeInfo {
19221922
const SCEV *Dist;
1923-
uint64_t Stride;
1923+
uint64_t StrideA;
1924+
uint64_t StrideB;
19241925
uint64_t TypeByteSize;
19251926
bool AIsWrite;
19261927
bool BIsWrite;
19271928

1928-
DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t Stride,
1929-
uint64_t TypeByteSize, bool AIsWrite,
1930-
bool BIsWrite)
1931-
: Dist(Dist), Stride(Stride), TypeByteSize(TypeByteSize),
1932-
AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
1929+
DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA,
1930+
uint64_t StrideB, uint64_t TypeByteSize,
1931+
bool AIsWrite, bool BIsWrite)
1932+
: Dist(Dist), StrideA(StrideA), StrideB(StrideB),
1933+
TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
19331934
};
19341935
} // namespace
19351936

1936-
// Get the dependence distance, stride, type size and whether it is a write for
1937+
// Get the dependence distance, strides, type size and whether it is a write for
19371938
// the dependence between A and B. Returns a DepType, if we can prove there's
19381939
// no dependence or the analysis fails. Outlined to lambda to limit he scope
19391940
// of various temporary variables, like A/BPtr, StrideA/BPtr and others.
@@ -1995,10 +1996,11 @@ getDependenceDistanceStrideAndSize(
19951996
InnermostLoop))
19961997
return MemoryDepChecker::Dependence::IndirectUnsafe;
19971998

1998-
// Need accesses with constant stride. We don't want to vectorize
1999-
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap
2000-
// in the address space.
2001-
if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) {
1999+
// Need accesses with constant strides and the same direction. We don't want
2000+
// to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that
2001+
// could wrap in the address space.
2002+
if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) ||
2003+
(StrideAPtr < 0 && StrideBPtr > 0)) {
20022004
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
20032005
return MemoryDepChecker::Dependence::Unknown;
20042006
}
@@ -2008,9 +2010,9 @@ getDependenceDistanceStrideAndSize(
20082010
DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
20092011
if (!HasSameSize)
20102012
TypeByteSize = 0;
2011-
uint64_t Stride = std::abs(StrideAPtr);
2012-
return DepDistanceStrideAndSizeInfo(Dist, Stride, TypeByteSize, AIsWrite,
2013-
BIsWrite);
2013+
return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr),
2014+
std::abs(StrideBPtr), TypeByteSize,
2015+
AIsWrite, BIsWrite);
20142016
}
20152017

20162018
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
@@ -2028,41 +2030,63 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
20282030
if (std::holds_alternative<Dependence::DepType>(Res))
20292031
return std::get<Dependence::DepType>(Res);
20302032

2031-
const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] =
2033+
const auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
20322034
std::get<DepDistanceStrideAndSizeInfo>(Res);
20332035
bool HasSameSize = TypeByteSize > 0;
20342036

2037+
std::optional<uint64_t> CommonStride =
2038+
StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt;
2039+
if (isa<SCEVCouldNotCompute>(Dist)) {
2040+
// TODO: Relax requirement that there is a common stride to retry with
2041+
// non-constant distance dependencies.
2042+
FoundNonConstantDistanceDependence |= !!CommonStride;
2043+
LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
2044+
return Dependence::Unknown;
2045+
}
2046+
20352047
ScalarEvolution &SE = *PSE.getSE();
20362048
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
2049+
20372050
// If the distance between the acecsses is larger than their absolute stride
20382051
// multiplied by the backedge taken count, the accesses are independet, i.e.
20392052
// they are far enough appart that accesses won't access the same location
20402053
// across all loop ierations.
2041-
if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
2054+
if (HasSameSize && CommonStride &&
20422055
isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist,
2043-
Stride, TypeByteSize))
2056+
*CommonStride, TypeByteSize))
20442057
return Dependence::NoDep;
20452058

20462059
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
2047-
if (!C) {
2048-
LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
2049-
FoundNonConstantDistanceDependence = true;
2050-
return Dependence::Unknown;
2051-
}
20522060

2053-
const APInt &Val = C->getAPInt();
2054-
int64_t Distance = Val.getSExtValue();
2055-
2056-
// If the distance between accesses and their strides are known constants,
2057-
// check whether the accesses interlace each other.
2058-
if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize &&
2059-
areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
2060-
LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
2061-
return Dependence::NoDep;
2061+
// Attempt to prove strided accesses independent.
2062+
if (C) {
2063+
const APInt &Val = C->getAPInt();
2064+
int64_t Distance = Val.getSExtValue();
2065+
2066+
// If the distance between accesses and their strides are known constants,
2067+
// check whether the accesses interlace each other.
2068+
if (std::abs(Distance) > 0 && CommonStride && *CommonStride > 1 &&
2069+
HasSameSize &&
2070+
areStridedAccessesIndependent(std::abs(Distance), *CommonStride,
2071+
TypeByteSize)) {
2072+
LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
2073+
return Dependence::NoDep;
2074+
}
20622075
}
20632076

20642077
// Negative distances are not plausible dependencies.
2065-
if (Val.isNegative()) {
2078+
if (SE.isKnownNonPositive(Dist)) {
2079+
if (SE.isKnownNonNegative(Dist)) {
2080+
if (HasSameSize) {
2081+
// Write to the same location with the same size.
2082+
return Dependence::Forward;
2083+
} else {
2084+
LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
2085+
"different type sizes\n");
2086+
return Dependence::Unknown;
2087+
}
2088+
}
2089+
20662090
bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
20672091
// Check if the first access writes to a location that is read in a later
20682092
// iteration, where the distance between them is not a multiple of a vector
@@ -2071,34 +2095,55 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
20712095
// NOTE: There is no need to update MaxSafeVectorWidthInBits after call to
20722096
// couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a
20732097
// forward dependency will allow vectorization using any width.
2074-
if (IsTrueDataDependence && EnableForwardingConflictDetection &&
2075-
(!HasSameSize || couldPreventStoreLoadForward(Val.abs().getZExtValue(),
2076-
TypeByteSize))) {
2077-
LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
2078-
return Dependence::ForwardButPreventsForwarding;
2098+
2099+
if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2100+
if (!C) {
2101+
// TODO: FoundNonConstantDistanceDependence is used as a necessary
2102+
// condition to consider retrying with runtime checks. Historically, we
2103+
// did not set it when strides were different but there is no inherent
2104+
// reason to.
2105+
FoundNonConstantDistanceDependence |= CommonStride.has_value();
2106+
return Dependence::Unknown;
2107+
}
2108+
if (!HasSameSize ||
2109+
couldPreventStoreLoadForward(C->getAPInt().abs().getZExtValue(),
2110+
TypeByteSize)) {
2111+
LLVM_DEBUG(
2112+
dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
2113+
return Dependence::ForwardButPreventsForwarding;
2114+
}
20792115
}
20802116

20812117
LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
20822118
return Dependence::Forward;
20832119
}
20842120

2085-
// Write to the same location with the same size.
2086-
if (Val == 0) {
2087-
if (HasSameSize)
2088-
return Dependence::Forward;
2089-
LLVM_DEBUG(
2090-
dbgs() << "LAA: Zero dependence difference but different type sizes\n");
2121+
if (!C) {
2122+
// TODO: FoundNonConstantDistanceDependence is used as a necessary condition
2123+
// to consider retrying with runtime checks. Historically, we did not set it
2124+
// when strides were different but there is no inherent reason to.
2125+
FoundNonConstantDistanceDependence |= CommonStride.has_value();
2126+
LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
20912127
return Dependence::Unknown;
20922128
}
20932129

2094-
assert(Val.isStrictlyPositive() && "Expect a positive value");
2130+
if (!SE.isKnownPositive(Dist))
2131+
return Dependence::Unknown;
20952132

20962133
if (!HasSameSize) {
20972134
LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
20982135
"different type sizes\n");
20992136
return Dependence::Unknown;
21002137
}
21012138

2139+
// The logic below currently only supports StrideA == StrideB, i.e. there's a
2140+
// common stride.
2141+
if (!CommonStride)
2142+
return Dependence::Unknown;
2143+
2144+
const APInt &Val = C->getAPInt();
2145+
int64_t Distance = Val.getSExtValue();
2146+
21022147
// Bail out early if passed-in parameters make vectorization not feasible.
21032148
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
21042149
VectorizerParams::VectorizationFactor : 1);
@@ -2134,7 +2179,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
21342179
// the minimum distance needed is 28, which is greater than distance. It is
21352180
// not safe to do vectorization.
21362181
uint64_t MinDistanceNeeded =
2137-
TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
2182+
TypeByteSize * (*CommonStride) * (MinNumIter - 1) + TypeByteSize;
21382183
if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
21392184
LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
21402185
<< Distance << '\n');
@@ -2183,7 +2228,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
21832228

21842229
// An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
21852230
// since there is a backwards dependency.
2186-
uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * Stride);
2231+
uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * (*CommonStride));
21872232
LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
21882233
<< " with max VF = " << MaxVF << '\n');
21892234
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;

llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,10 @@ struct StoreToLoadForwardingCandidate {
126126

127127
// We don't need to check non-wrapping here because forward/backward
128128
// dependence wouldn't be valid if these weren't monotonic accesses.
129-
auto *Dist = cast<SCEVConstant>(
129+
auto *Dist = dyn_cast<SCEVConstant>(
130130
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
131+
if (!Dist)
132+
return false;
131133
const APInt &Val = Dist->getAPInt();
132134
return Val == TypeByteSize * StrideLoad;
133135
}

llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,9 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
66
define void @forward_dep_known_safe_due_to_backedge_taken_count(ptr %A) {
77
; CHECK-LABEL: 'forward_dep_known_safe_due_to_backedge_taken_count'
88
; CHECK-NEXT: loop:
9-
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
10-
; CHECK-NEXT: Unknown data dependence.
9+
; CHECK-NEXT: Memory dependences are safe
1110
; CHECK-NEXT: Dependences:
12-
; CHECK-NEXT: Unknown:
11+
; CHECK-NEXT: Forward:
1312
; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 ->
1413
; CHECK-NEXT: store i32 %add, ptr %gep, align 4
1514
; CHECK-EMPTY:
@@ -44,10 +43,9 @@ exit:
4443
define void @forward_dep_not_known_safe_due_to_backedge_taken_count(ptr %A) {
4544
; CHECK-LABEL: 'forward_dep_not_known_safe_due_to_backedge_taken_count'
4645
; CHECK-NEXT: loop:
47-
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
48-
; CHECK-NEXT: Unknown data dependence.
46+
; CHECK-NEXT: Memory dependences are safe
4947
; CHECK-NEXT: Dependences:
50-
; CHECK-NEXT: Unknown:
48+
; CHECK-NEXT: Forward:
5149
; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 ->
5250
; CHECK-NEXT: store i32 %add, ptr %gep, align 4
5351
; CHECK-EMPTY:

llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@ declare void @llvm.assume(i1)
88
define void @different_non_constant_strides_known_forward(ptr %A) {
99
; CHECK-LABEL: 'different_non_constant_strides_known_forward'
1010
; CHECK-NEXT: loop:
11-
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
12-
; CHECK-NEXT: Unknown data dependence.
11+
; CHECK-NEXT: Memory dependences are safe
1312
; CHECK-NEXT: Dependences:
14-
; CHECK-NEXT: Unknown:
13+
; CHECK-NEXT: Forward:
1514
; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 ->
1615
; CHECK-NEXT: store i32 %add, ptr %gep, align 4
1716
; CHECK-EMPTY:
@@ -45,10 +44,9 @@ exit:
4544
define void @different_non_constant_strides_known_forward_min_distance_3(ptr %A) {
4645
; CHECK-LABEL: 'different_non_constant_strides_known_forward_min_distance_3'
4746
; CHECK-NEXT: loop:
48-
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
49-
; CHECK-NEXT: Unknown data dependence.
47+
; CHECK-NEXT: Memory dependences are safe
5048
; CHECK-NEXT: Dependences:
51-
; CHECK-NEXT: Unknown:
49+
; CHECK-NEXT: Forward:
5250
; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 ->
5351
; CHECK-NEXT: store i32 %add, ptr %gep, align 4
5452
; CHECK-EMPTY:

0 commit comments

Comments
 (0)