Skip to content

Commit fcec466

Browse files
committed
[LAA] Support different strides & non constant dep distances using SCEV. llvm#88039
1 parent d5f2753 commit fcec466

File tree

3 files changed

+95
-53
lines changed

3 files changed

+95
-53
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 88 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1920,20 +1920,21 @@ isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
19201920
namespace {
19211921
struct DepDistanceStrideAndSizeInfo {
19221922
const SCEV *Dist;
1923-
uint64_t Stride;
1923+
uint64_t StrideA;
1924+
uint64_t StrideB;
19241925
uint64_t TypeByteSize;
19251926
bool AIsWrite;
19261927
bool BIsWrite;
19271928

1928-
DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t Stride,
1929-
uint64_t TypeByteSize, bool AIsWrite,
1930-
bool BIsWrite)
1931-
: Dist(Dist), Stride(Stride), TypeByteSize(TypeByteSize),
1932-
AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
1929+
DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA,
1930+
uint64_t StrideB, uint64_t TypeByteSize,
1931+
bool AIsWrite, bool BIsWrite)
1932+
: Dist(Dist), StrideA(StrideA), StrideB(StrideB),
1933+
TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
19331934
};
19341935
} // namespace
19351936

1936-
// Get the dependence distance, stride, type size and whether it is a write for
1937+
// Get the dependence distance, strides, type size and whether it is a write for
19371938
// the dependence between A and B. Returns a DepType, if we can prove there's
19381939
// no dependence or the analysis fails. Outlined to lambda to limit he scope
19391940
// of various temporary variables, like A/BPtr, StrideA/BPtr and others.
@@ -1995,10 +1996,11 @@ getDependenceDistanceStrideAndSize(
19951996
InnermostLoop))
19961997
return MemoryDepChecker::Dependence::IndirectUnsafe;
19971998

1998-
// Need accesses with constant stride. We don't want to vectorize
1999-
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap
2000-
// in the address space.
2001-
if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) {
1999+
// Need accesses with constant strides and the same direction. We don't want
2000+
// to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that
2001+
// could wrap in the address space.
2002+
if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) ||
2003+
(StrideAPtr < 0 && StrideBPtr > 0)) {
20022004
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
20032005
return MemoryDepChecker::Dependence::Unknown;
20042006
}
@@ -2008,9 +2010,9 @@ getDependenceDistanceStrideAndSize(
20082010
DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
20092011
if (!HasSameSize)
20102012
TypeByteSize = 0;
2011-
uint64_t Stride = std::abs(StrideAPtr);
2012-
return DepDistanceStrideAndSizeInfo(Dist, Stride, TypeByteSize, AIsWrite,
2013-
BIsWrite);
2013+
return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr),
2014+
std::abs(StrideBPtr), TypeByteSize,
2015+
AIsWrite, BIsWrite);
20142016
}
20152017

20162018
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
@@ -2028,41 +2030,63 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
20282030
if (std::holds_alternative<Dependence::DepType>(Res))
20292031
return std::get<Dependence::DepType>(Res);
20302032

2031-
const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] =
2033+
const auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
20322034
std::get<DepDistanceStrideAndSizeInfo>(Res);
20332035
bool HasSameSize = TypeByteSize > 0;
20342036

2037+
std::optional<uint64_t> CommonStride =
2038+
StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt;
2039+
if (isa<SCEVCouldNotCompute>(Dist)) {
2040+
// TODO: Relax requirement that there is a common stride to retry with
2041+
// non-constant distance dependencies.
2042+
FoundNonConstantDistanceDependence |= !!CommonStride;
2043+
LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
2044+
return Dependence::Unknown;
2045+
}
2046+
20352047
ScalarEvolution &SE = *PSE.getSE();
20362048
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
2049+
20372050
// If the distance between the acecsses is larger than their absolute stride
20382051
// multiplied by the backedge taken count, the accesses are independet, i.e.
20392052
// they are far enough appart that accesses won't access the same location
20402053
// across all loop ierations.
2041-
if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
2054+
if (HasSameSize && CommonStride &&
20422055
isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist,
2043-
Stride, TypeByteSize))
2056+
*CommonStride, TypeByteSize))
20442057
return Dependence::NoDep;
20452058

20462059
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
2047-
if (!C) {
2048-
LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
2049-
FoundNonConstantDistanceDependence = true;
2050-
return Dependence::Unknown;
2051-
}
20522060

2053-
const APInt &Val = C->getAPInt();
2054-
int64_t Distance = Val.getSExtValue();
2055-
2056-
// If the distance between accesses and their strides are known constants,
2057-
// check whether the accesses interlace each other.
2058-
if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize &&
2059-
areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
2060-
LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
2061-
return Dependence::NoDep;
2061+
// Attempt to prove strided accesses independent.
2062+
if (C) {
2063+
const APInt &Val = C->getAPInt();
2064+
int64_t Distance = Val.getSExtValue();
2065+
2066+
// If the distance between accesses and their strides are known constants,
2067+
// check whether the accesses interlace each other.
2068+
if (std::abs(Distance) > 0 && CommonStride && *CommonStride > 1 &&
2069+
HasSameSize &&
2070+
areStridedAccessesIndependent(std::abs(Distance), *CommonStride,
2071+
TypeByteSize)) {
2072+
LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
2073+
return Dependence::NoDep;
2074+
}
20622075
}
20632076

20642077
// Negative distances are not plausible dependencies.
2065-
if (Val.isNegative()) {
2078+
if (SE.isKnownNonPositive(Dist)) {
2079+
if (SE.isKnownNonNegative(Dist)) {
2080+
if (HasSameSize) {
2081+
// Write to the same location with the same size.
2082+
return Dependence::Forward;
2083+
} else {
2084+
LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
2085+
"different type sizes\n");
2086+
return Dependence::Unknown;
2087+
}
2088+
}
2089+
20662090
bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
20672091
// Check if the first access writes to a location that is read in a later
20682092
// iteration, where the distance between them is not a multiple of a vector
@@ -2071,34 +2095,52 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
20712095
// NOTE: There is no need to update MaxSafeVectorWidthInBits after call to
20722096
// couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a
20732097
// forward dependency will allow vectorization using any width.
2074-
if (IsTrueDataDependence && EnableForwardingConflictDetection &&
2075-
(!HasSameSize || couldPreventStoreLoadForward(Val.abs().getZExtValue(),
2076-
TypeByteSize))) {
2077-
LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
2078-
return Dependence::ForwardButPreventsForwarding;
2098+
2099+
if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2100+
if (!C) {
2101+
// TODO: Relax requirement that there is a common stride to retry with
2102+
// non-constant distance dependencies.
2103+
FoundNonConstantDistanceDependence |= !!CommonStride;
2104+
return Dependence::Unknown;
2105+
}
2106+
if (!HasSameSize ||
2107+
couldPreventStoreLoadForward(C->getAPInt().abs().getZExtValue(),
2108+
TypeByteSize)) {
2109+
LLVM_DEBUG(
2110+
dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
2111+
return Dependence::ForwardButPreventsForwarding;
2112+
}
20792113
}
20802114

20812115
LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
20822116
return Dependence::Forward;
20832117
}
20842118

2085-
// Write to the same location with the same size.
2086-
if (Val == 0) {
2087-
if (HasSameSize)
2088-
return Dependence::Forward;
2089-
LLVM_DEBUG(
2090-
dbgs() << "LAA: Zero dependence difference but different type sizes\n");
2119+
if (!C) {
2120+
// TODO: Relax requirement that there is a common stride to retry with
2121+
// non-constant distance dependencies.
2122+
FoundNonConstantDistanceDependence |= !!CommonStride;
2123+
LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
20912124
return Dependence::Unknown;
20922125
}
20932126

2094-
assert(Val.isStrictlyPositive() && "Expect a positive value");
2127+
if (!SE.isKnownPositive(Dist))
2128+
return Dependence::Unknown;
20952129

20962130
if (!HasSameSize) {
20972131
LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
20982132
"different type sizes\n");
20992133
return Dependence::Unknown;
21002134
}
21012135

2136+
// The logic below currently only supports StrideA == StrideB, i.e. there's a
2137+
// common stride.
2138+
if (!CommonStride)
2139+
return Dependence::Unknown;
2140+
2141+
const APInt &Val = C->getAPInt();
2142+
int64_t Distance = Val.getSExtValue();
2143+
21022144
// Bail out early if passed-in parameters make vectorization not feasible.
21032145
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
21042146
VectorizerParams::VectorizationFactor : 1);
@@ -2134,7 +2176,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
21342176
// the minimum distance needed is 28, which is greater than distance. It is
21352177
// not safe to do vectorization.
21362178
uint64_t MinDistanceNeeded =
2137-
TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
2179+
TypeByteSize * (*CommonStride) * (MinNumIter - 1) + TypeByteSize;
21382180
if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
21392181
LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
21402182
<< Distance << '\n');
@@ -2183,7 +2225,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
21832225

21842226
// An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
21852227
// since there is a backwards dependency.
2186-
uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * Stride);
2228+
uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * (*CommonStride));
21872229
LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
21882230
<< " with max VF = " << MaxVF << '\n');
21892231
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;

llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,10 @@ struct StoreToLoadForwardingCandidate {
126126

127127
// We don't need to check non-wrapping here because forward/backward
128128
// dependence wouldn't be valid if these weren't monotonic accesses.
129-
auto *Dist = cast<SCEVConstant>(
129+
auto *Dist = dyn_cast<SCEVConstant>(
130130
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
131+
if (!Dist)
132+
return false;
131133
const APInt &Val = Dist->getAPInt();
132134
return Val == TypeByteSize * StrideLoad;
133135
}

llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@ declare void @llvm.assume(i1)
88
define void @different_non_constant_strides_known_forward(ptr %A) {
99
; CHECK-LABEL: 'different_non_constant_strides_known_forward'
1010
; CHECK-NEXT: loop:
11-
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
12-
; CHECK-NEXT: Unknown data dependence.
11+
; CHECK-NEXT: Memory dependences are safe
1312
; CHECK-NEXT: Dependences:
14-
; CHECK-NEXT: Unknown:
13+
; CHECK-NEXT: Forward:
1514
; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 ->
1615
; CHECK-NEXT: store i32 %add, ptr %gep, align 4
1716
; CHECK-EMPTY:
@@ -45,10 +44,9 @@ exit:
4544
define void @different_non_constant_strides_known_forward_min_distance_3(ptr %A) {
4645
; CHECK-LABEL: 'different_non_constant_strides_known_forward_min_distance_3'
4746
; CHECK-NEXT: loop:
48-
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
49-
; CHECK-NEXT: Unknown data dependence.
47+
; CHECK-NEXT: Memory dependences are safe
5048
; CHECK-NEXT: Dependences:
51-
; CHECK-NEXT: Unknown:
49+
; CHECK-NEXT: Forward:
5250
; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 ->
5351
; CHECK-NEXT: store i32 %add, ptr %gep, align 4
5452
; CHECK-EMPTY:

0 commit comments

Comments
 (0)