@@ -1920,20 +1920,21 @@ isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
1920
1920
namespace {
1921
1921
struct DepDistanceStrideAndSizeInfo {
1922
1922
const SCEV *Dist;
1923
- uint64_t Stride;
1923
+ uint64_t StrideA;
1924
+ uint64_t StrideB;
1924
1925
uint64_t TypeByteSize;
1925
1926
bool AIsWrite;
1926
1927
bool BIsWrite;
1927
1928
1928
- DepDistanceStrideAndSizeInfo (const SCEV *Dist, uint64_t Stride ,
1929
- uint64_t TypeByteSize, bool AIsWrite ,
1930
- bool BIsWrite)
1931
- : Dist(Dist), Stride(Stride ), TypeByteSize(TypeByteSize ),
1932
- AIsWrite (AIsWrite), BIsWrite(BIsWrite) {}
1929
+ DepDistanceStrideAndSizeInfo (const SCEV *Dist, uint64_t StrideA ,
1930
+ uint64_t StrideB, uint64_t TypeByteSize ,
1931
+ bool AIsWrite, bool BIsWrite)
1932
+ : Dist(Dist), StrideA(StrideA ), StrideB(StrideB ),
1933
+ TypeByteSize (TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
1933
1934
};
1934
1935
} // namespace
1935
1936
1936
- // Get the dependence distance, stride , type size and whether it is a write for
1937
+ // Get the dependence distance, strides , type size and whether it is a write for
1937
1938
// the dependence between A and B. Returns a DepType, if we can prove there's
1938
1939
// no dependence or the analysis fails. Outlined to lambda to limit he scope
1939
1940
// of various temporary variables, like A/BPtr, StrideA/BPtr and others.
@@ -1995,10 +1996,11 @@ getDependenceDistanceStrideAndSize(
1995
1996
InnermostLoop))
1996
1997
return MemoryDepChecker::Dependence::IndirectUnsafe;
1997
1998
1998
- // Need accesses with constant stride. We don't want to vectorize
1999
- // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap
2000
- // in the address space.
2001
- if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) {
1999
+ // Need accesses with constant strides and the same direction. We don't want
2000
+ // to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that
2001
+ // could wrap in the address space.
2002
+ if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0 ) ||
2003
+ (StrideAPtr < 0 && StrideBPtr > 0 )) {
2002
2004
LLVM_DEBUG (dbgs () << " Pointer access with non-constant stride\n " );
2003
2005
return MemoryDepChecker::Dependence::Unknown;
2004
2006
}
@@ -2008,9 +2010,9 @@ getDependenceDistanceStrideAndSize(
2008
2010
DL.getTypeStoreSizeInBits (ATy) == DL.getTypeStoreSizeInBits (BTy);
2009
2011
if (!HasSameSize)
2010
2012
TypeByteSize = 0 ;
2011
- uint64_t Stride = std::abs (StrideAPtr);
2012
- return DepDistanceStrideAndSizeInfo (Dist, Stride, TypeByteSize, AIsWrite ,
2013
- BIsWrite);
2013
+ return DepDistanceStrideAndSizeInfo (Dist, std::abs (StrideAPtr),
2014
+ std::abs (StrideBPtr), TypeByteSize ,
2015
+ AIsWrite, BIsWrite);
2014
2016
}
2015
2017
2016
2018
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent (
@@ -2028,41 +2030,63 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
2028
2030
if (std::holds_alternative<Dependence::DepType>(Res))
2029
2031
return std::get<Dependence::DepType>(Res);
2030
2032
2031
- const auto &[Dist, Stride , TypeByteSize, AIsWrite, BIsWrite] =
2033
+ const auto &[Dist, StrideA, StrideB , TypeByteSize, AIsWrite, BIsWrite] =
2032
2034
std::get<DepDistanceStrideAndSizeInfo>(Res);
2033
2035
bool HasSameSize = TypeByteSize > 0 ;
2034
2036
2037
+ std::optional<uint64_t > CommonStride =
2038
+ StrideA == StrideB ? std::make_optional (StrideA) : std::nullopt;
2039
+ if (isa<SCEVCouldNotCompute>(Dist)) {
2040
+ // TODO: Relax requirement that there is a common stride to retry with
2041
+ // non-constant distance dependencies.
2042
+ FoundNonConstantDistanceDependence |= !!CommonStride;
2043
+ LLVM_DEBUG (dbgs () << " LAA: Dependence because of uncomputable distance.\n " );
2044
+ return Dependence::Unknown;
2045
+ }
2046
+
2035
2047
ScalarEvolution &SE = *PSE.getSE ();
2036
2048
auto &DL = InnermostLoop->getHeader ()->getModule ()->getDataLayout ();
2049
+
2037
2050
// If the distance between the acecsses is larger than their absolute stride
2038
2051
// multiplied by the backedge taken count, the accesses are independet, i.e.
2039
2052
// they are far enough appart that accesses won't access the same location
2040
2053
// across all loop ierations.
2041
- if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
2054
+ if (HasSameSize && CommonStride &&
2042
2055
isSafeDependenceDistance (DL, SE, *(PSE.getBackedgeTakenCount ()), *Dist,
2043
- Stride , TypeByteSize))
2056
+ *CommonStride , TypeByteSize))
2044
2057
return Dependence::NoDep;
2045
2058
2046
2059
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
2047
- if (!C) {
2048
- LLVM_DEBUG (dbgs () << " LAA: Dependence because of non-constant distance\n " );
2049
- FoundNonConstantDistanceDependence = true ;
2050
- return Dependence::Unknown;
2051
- }
2052
2060
2053
- const APInt &Val = C->getAPInt ();
2054
- int64_t Distance = Val.getSExtValue ();
2055
-
2056
- // If the distance between accesses and their strides are known constants,
2057
- // check whether the accesses interlace each other.
2058
- if (std::abs (Distance) > 0 && Stride > 1 && HasSameSize &&
2059
- areStridedAccessesIndependent (std::abs (Distance), Stride, TypeByteSize)) {
2060
- LLVM_DEBUG (dbgs () << " LAA: Strided accesses are independent\n " );
2061
- return Dependence::NoDep;
2061
+ // Attempt to prove strided accesses independent.
2062
+ if (C) {
2063
+ const APInt &Val = C->getAPInt ();
2064
+ int64_t Distance = Val.getSExtValue ();
2065
+
2066
+ // If the distance between accesses and their strides are known constants,
2067
+ // check whether the accesses interlace each other.
2068
+ if (std::abs (Distance) > 0 && CommonStride && *CommonStride > 1 &&
2069
+ HasSameSize &&
2070
+ areStridedAccessesIndependent (std::abs (Distance), *CommonStride,
2071
+ TypeByteSize)) {
2072
+ LLVM_DEBUG (dbgs () << " LAA: Strided accesses are independent\n " );
2073
+ return Dependence::NoDep;
2074
+ }
2062
2075
}
2063
2076
2064
2077
// Negative distances are not plausible dependencies.
2065
- if (Val.isNegative ()) {
2078
+ if (SE.isKnownNonPositive (Dist)) {
2079
+ if (SE.isKnownNonNegative (Dist)) {
2080
+ if (HasSameSize) {
2081
+ // Write to the same location with the same size.
2082
+ return Dependence::Forward;
2083
+ } else {
2084
+ LLVM_DEBUG (dbgs () << " LAA: possibly zero dependence difference but "
2085
+ " different type sizes\n " );
2086
+ return Dependence::Unknown;
2087
+ }
2088
+ }
2089
+
2066
2090
bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
2067
2091
// Check if the first access writes to a location that is read in a later
2068
2092
// iteration, where the distance between them is not a multiple of a vector
@@ -2071,34 +2095,52 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
2071
2095
// NOTE: There is no need to update MaxSafeVectorWidthInBits after call to
2072
2096
// couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a
2073
2097
// forward dependency will allow vectorization using any width.
2074
- if (IsTrueDataDependence && EnableForwardingConflictDetection &&
2075
- (!HasSameSize || couldPreventStoreLoadForward (Val.abs ().getZExtValue (),
2076
- TypeByteSize))) {
2077
- LLVM_DEBUG (dbgs () << " LAA: Forward but may prevent st->ld forwarding\n " );
2078
- return Dependence::ForwardButPreventsForwarding;
2098
+
2099
+ if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2100
+ if (!C) {
2101
+ // TODO: Relax requirement that there is a common stride to retry with
2102
+ // non-constant distance dependencies.
2103
+ FoundNonConstantDistanceDependence |= !!CommonStride;
2104
+ return Dependence::Unknown;
2105
+ }
2106
+ if (!HasSameSize ||
2107
+ couldPreventStoreLoadForward (C->getAPInt ().abs ().getZExtValue (),
2108
+ TypeByteSize)) {
2109
+ LLVM_DEBUG (
2110
+ dbgs () << " LAA: Forward but may prevent st->ld forwarding\n " );
2111
+ return Dependence::ForwardButPreventsForwarding;
2112
+ }
2079
2113
}
2080
2114
2081
2115
LLVM_DEBUG (dbgs () << " LAA: Dependence is negative\n " );
2082
2116
return Dependence::Forward;
2083
2117
}
2084
2118
2085
- // Write to the same location with the same size.
2086
- if (Val == 0 ) {
2087
- if (HasSameSize)
2088
- return Dependence::Forward;
2089
- LLVM_DEBUG (
2090
- dbgs () << " LAA: Zero dependence difference but different type sizes\n " );
2119
+ if (!C) {
2120
+ // TODO: Relax requirement that there is a common stride to retry with
2121
+ // non-constant distance dependencies.
2122
+ FoundNonConstantDistanceDependence |= !!CommonStride;
2123
+ LLVM_DEBUG (dbgs () << " LAA: Dependence because of non-constant distance\n " );
2091
2124
return Dependence::Unknown;
2092
2125
}
2093
2126
2094
- assert (Val.isStrictlyPositive () && " Expect a positive value" );
2127
+ if (!SE.isKnownPositive (Dist))
2128
+ return Dependence::Unknown;
2095
2129
2096
2130
if (!HasSameSize) {
2097
2131
LLVM_DEBUG (dbgs () << " LAA: ReadWrite-Write positive dependency with "
2098
2132
" different type sizes\n " );
2099
2133
return Dependence::Unknown;
2100
2134
}
2101
2135
2136
+ // The logic below currently only supports StrideA == StrideB, i.e. there's a
2137
+ // common stride.
2138
+ if (!CommonStride)
2139
+ return Dependence::Unknown;
2140
+
2141
+ const APInt &Val = C->getAPInt ();
2142
+ int64_t Distance = Val.getSExtValue ();
2143
+
2102
2144
// Bail out early if passed-in parameters make vectorization not feasible.
2103
2145
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
2104
2146
VectorizerParams::VectorizationFactor : 1 );
@@ -2134,7 +2176,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
2134
2176
// the minimum distance needed is 28, which is greater than distance. It is
2135
2177
// not safe to do vectorization.
2136
2178
uint64_t MinDistanceNeeded =
2137
- TypeByteSize * Stride * (MinNumIter - 1 ) + TypeByteSize;
2179
+ TypeByteSize * (*CommonStride) * (MinNumIter - 1 ) + TypeByteSize;
2138
2180
if (MinDistanceNeeded > static_cast <uint64_t >(Distance)) {
2139
2181
LLVM_DEBUG (dbgs () << " LAA: Failure because of positive distance "
2140
2182
<< Distance << ' \n ' );
@@ -2183,7 +2225,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
2183
2225
2184
2226
// An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
2185
2227
// since there is a backwards dependency.
2186
- uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * Stride );
2228
+ uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * (*CommonStride) );
2187
2229
LLVM_DEBUG (dbgs () << " LAA: Positive distance " << Val.getSExtValue ()
2188
2230
<< " with max VF = " << MaxVF << ' \n ' );
2189
2231
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8 ;
0 commit comments