@@ -1795,8 +1795,7 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1795
1795
// / }
1796
1796
static bool isSafeDependenceDistance (const DataLayout &DL, ScalarEvolution &SE,
1797
1797
const SCEV &MaxBTC, const SCEV &Dist,
1798
- uint64_t MaxStride,
1799
- uint64_t TypeByteSize) {
1798
+ uint64_t MaxStride) {
1800
1799
1801
1800
// If we can prove that
1802
1801
// (**) |Dist| > MaxBTC * Step
@@ -1815,8 +1814,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
1815
1814
// will be executed only if LoopCount >= VF, proving distance >= LoopCount
1816
1815
// also guarantees that distance >= VF.
1817
1816
//
1818
- const uint64_t ByteStride = MaxStride * TypeByteSize;
1819
- const SCEV *Step = SE.getConstant (MaxBTC.getType (), ByteStride);
1817
+ const SCEV *Step = SE.getConstant (MaxBTC.getType (), MaxStride);
1820
1818
const SCEV *Product = SE.getMulExpr (&MaxBTC, Step);
1821
1819
1822
1820
const SCEV *CastedDist = &Dist;
@@ -1860,25 +1858,23 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1860
1858
if (Distance % TypeByteSize)
1861
1859
return false ;
1862
1860
1863
- uint64_t ScaledDist = Distance / TypeByteSize;
1864
-
1865
- // No dependence if the scaled distance is not multiple of the stride.
1861
+ // No dependence if the distance is not multiple of the stride.
1866
1862
// E.g.
1867
1863
// for (i = 0; i < 1024 ; i += 4)
1868
1864
// A[i+2] = A[i] + 1;
1869
1865
//
1870
- // Two accesses in memory (scaled distance is 2, stride is 4):
1866
+ // Two accesses in memory (distance is 2, stride is 4):
1871
1867
// | A[0] | | | | A[4] | | | |
1872
1868
// | | | A[2] | | | | A[6] | |
1873
1869
//
1874
1870
// E.g.
1875
1871
// for (i = 0; i < 1024 ; i += 3)
1876
1872
// A[i+4] = A[i] + 1;
1877
1873
//
1878
- // Two accesses in memory (scaled distance is 4, stride is 3):
1874
+ // Two accesses in memory (distance is 4, stride is 3):
1879
1875
// | A[0] | | | A[3] | | | A[6] | | |
1880
1876
// | | | | | A[4] | | | A[7] | |
1881
- return ScaledDist % Stride;
1877
+ return Distance % Stride;
1882
1878
}
1883
1879
1884
1880
std::variant<MemoryDepChecker::Dependence::DepType,
@@ -1987,25 +1983,27 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
1987
1983
return MemoryDepChecker::Dependence::Unknown;
1988
1984
}
1989
1985
1990
- uint64_t TypeByteSize = DL.getTypeAllocSize (ATy);
1991
- bool HasSameSize =
1992
- DL.getTypeStoreSizeInBits (ATy) == DL.getTypeStoreSizeInBits (BTy);
1993
- if (!HasSameSize)
1994
- TypeByteSize = 0 ;
1986
+ TypeSize AStoreSz = DL.getTypeStoreSize (ATy),
1987
+ BStoreSz = DL.getTypeStoreSize (BTy);
1988
+
1989
+ // If store sizes are not the same, set TypeByteSize to zero, so we can check
1990
+ // it in the caller.
1991
+ uint64_t ASz = DL.getTypeAllocSize (ATy), BSz = DL.getTypeAllocSize (BTy),
1992
+ TypeByteSize = AStoreSz == BStoreSz ? BSz : 0 ;
1995
1993
1996
- StrideAPtrInt = std::abs (StrideAPtrInt);
1997
- StrideBPtrInt = std::abs (StrideBPtrInt);
1994
+ uint64_t StrideAScaled = std::abs (StrideAPtrInt) * ASz ;
1995
+ uint64_t StrideBScaled = std::abs (StrideBPtrInt) * BSz ;
1998
1996
1999
- uint64_t MaxStride = std::max (StrideAPtrInt, StrideBPtrInt );
1997
+ uint64_t MaxStride = std::max (StrideAScaled, StrideBScaled );
2000
1998
2001
1999
std::optional<uint64_t > CommonStride;
2002
- if (StrideAPtrInt == StrideBPtrInt )
2003
- CommonStride = StrideAPtrInt ;
2000
+ if (StrideAScaled == StrideBScaled )
2001
+ CommonStride = StrideAScaled ;
2004
2002
2005
2003
// TODO: Historically, we don't retry with runtime checks unless the
2006
2004
// (unscaled) strides are the same. Fix this once the condition for runtime
2007
2005
// checks in isDependent is fixed.
2008
- bool ShouldRetryWithRuntimeCheck = CommonStride. has_value () ;
2006
+ bool ShouldRetryWithRuntimeCheck = StrideAPtrInt == StrideBPtrInt ;
2009
2007
2010
2008
return DepDistanceStrideAndSizeInfo (Dist, MaxStride, CommonStride,
2011
2009
ShouldRetryWithRuntimeCheck, TypeByteSize,
@@ -2045,9 +2043,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2045
2043
// upper bound of the number of iterations), the accesses are independet, i.e.
2046
2044
// they are far enough appart that accesses won't access the same location
2047
2045
// across all loop ierations.
2048
- if (HasSameSize && isSafeDependenceDistance (
2049
- DL, SE, *(PSE. getSymbolicMaxBackedgeTakenCount ()),
2050
- *Dist, MaxStride, TypeByteSize ))
2046
+ if (HasSameSize &&
2047
+ isSafeDependenceDistance (
2048
+ DL, SE, *(PSE. getSymbolicMaxBackedgeTakenCount ()), *Dist, MaxStride))
2051
2049
return Dependence::NoDep;
2052
2050
2053
2051
const SCEVConstant *ConstDist = dyn_cast<SCEVConstant>(Dist);
@@ -2151,8 +2149,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2151
2149
2152
2150
// It's not vectorizable if the distance is smaller than the minimum distance
2153
2151
// needed for a vectroized/unrolled version. Vectorizing one iteration in
2154
- // front needs TypeByteSize * Stride . Vectorizing the last iteration needs
2155
- // TypeByteSize (No need to plus the last gap distance).
2152
+ // front needs CommonStride . Vectorizing the last iteration needs TypeByteSize
2153
+ // (No need to plus the last gap distance).
2156
2154
//
2157
2155
// E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
2158
2156
// foo(int *A) {
@@ -2179,8 +2177,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2179
2177
// We know that Dist is positive, but it may not be constant. Use the signed
2180
2178
// minimum for computations below, as this ensures we compute the closest
2181
2179
// possible dependence distance.
2182
- uint64_t MinDistanceNeeded =
2183
- TypeByteSize * *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
2180
+ uint64_t MinDistanceNeeded = *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
2184
2181
if (MinDistanceNeeded > static_cast <uint64_t >(MinDistance)) {
2185
2182
if (!ConstDist) {
2186
2183
// For non-constant distances, we checked the lower bound of the
@@ -2236,7 +2233,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2236
2233
2237
2234
// An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
2238
2235
// since there is a backwards dependency.
2239
- uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * * CommonStride) ;
2236
+ uint64_t MaxVF = MinDepDistBytes / * CommonStride;
2240
2237
LLVM_DEBUG (dbgs () << " LAA: Positive min distance " << MinDistance
2241
2238
<< " with max VF = " << MaxVF << ' \n ' );
2242
2239
0 commit comments