@@ -1881,70 +1881,90 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
1881
1881
unsigned BIdx, const DenseMap<Value *, const SCEV *> &Strides,
1882
1882
const DenseMap<Value *, SmallVector<const Value *, 16 >>
1883
1883
&UnderlyingObjects) {
1884
- assert (AIdx < BIdx && " Must pass arguments in program order" );
1885
-
1886
- auto [APtr, AIsWrite] = A;
1887
- auto [BPtr, BIsWrite] = B;
1888
- Type *ATy = getLoadStoreType (InstMap[AIdx]);
1889
- Type *BTy = getLoadStoreType (InstMap[BIdx]);
1890
-
1891
- // Two reads are independent.
1892
- if (!AIsWrite && !BIsWrite)
1893
- return Dependence::NoDep;
1894
-
1895
- // We cannot check pointers in different address spaces.
1896
- if (APtr->getType ()->getPointerAddressSpace () !=
1897
- BPtr->getType ()->getPointerAddressSpace ())
1898
- return Dependence::Unknown;
1899
-
1900
- int64_t StrideAPtr =
1901
- getPtrStride (PSE, ATy, APtr, InnermostLoop, Strides, true ).value_or (0 );
1902
- int64_t StrideBPtr =
1903
- getPtrStride (PSE, BTy, BPtr, InnermostLoop, Strides, true ).value_or (0 );
1904
-
1905
- const SCEV *Src = PSE.getSCEV (APtr);
1906
- const SCEV *Sink = PSE.getSCEV (BPtr);
1884
+ ScalarEvolution &SE = *PSE.getSE ();
1885
+ auto &DL = InnermostLoop->getHeader ()->getModule ()->getDataLayout ();
1907
1886
1908
- // If the induction step is negative we have to invert source and sink of the
1909
- // dependence.
1910
- if (StrideAPtr < 0 ) {
1911
- std::swap (APtr, BPtr);
1912
- std::swap (ATy, BTy);
1913
- std::swap (Src, Sink);
1914
- std::swap (AIsWrite, BIsWrite);
1915
- std::swap (AIdx, BIdx);
1916
- std::swap (StrideAPtr, StrideBPtr);
1917
- }
1887
+ // Get the dependence distance, stride, type size in whether i is a write for
1888
+ // the dependence between A and B. Returns a DepType, if we can prove there's
1889
+ // no dependence or the analysis fails. Outlined to lambda to limit he scope
1890
+ // of various temporary variables, like A/BPtr, StrideA/BPtr and others.
1891
+ auto Res =
1892
+ [&]() -> std::variant<
1893
+ MemoryDepChecker::Dependence::DepType,
1894
+ std::tuple<const SCEV *, uint64_t , uint64_t , bool , bool >> {
1895
+ auto [APtr, AIsWrite] = A;
1896
+ auto [BPtr, BIsWrite] = B;
1897
+
1898
+ // Two reads are independent.
1899
+ if (!AIsWrite && !BIsWrite)
1900
+ return Dependence::NoDep;
1901
+
1902
+ assert (AIdx < BIdx && " Must pass arguments in program order" );
1903
+ Type *ATy = getLoadStoreType (InstMap[AIdx]);
1904
+ Type *BTy = getLoadStoreType (InstMap[BIdx]);
1905
+
1906
+ // We cannot check pointers in different address spaces.
1907
+ if (APtr->getType ()->getPointerAddressSpace () !=
1908
+ BPtr->getType ()->getPointerAddressSpace ())
1909
+ return Dependence::Unknown;
1910
+
1911
+ int64_t StrideAPtr =
1912
+ getPtrStride (PSE, ATy, APtr, InnermostLoop, Strides, true ).value_or (0 );
1913
+ int64_t StrideBPtr =
1914
+ getPtrStride (PSE, BTy, BPtr, InnermostLoop, Strides, true ).value_or (0 );
1915
+
1916
+ const SCEV *Src = PSE.getSCEV (APtr);
1917
+ const SCEV *Sink = PSE.getSCEV (BPtr);
1918
+
1919
+ // If the induction step is negative we have to invert source and sink of
1920
+ // the dependence.
1921
+ if (StrideAPtr < 0 ) {
1922
+ std::swap (APtr, BPtr);
1923
+ std::swap (ATy, BTy);
1924
+ std::swap (Src, Sink);
1925
+ std::swap (AIsWrite, BIsWrite);
1926
+ std::swap (AIdx, BIdx);
1927
+ std::swap (StrideAPtr, StrideBPtr);
1928
+ }
1918
1929
1919
- ScalarEvolution &SE = *PSE.getSE ();
1920
- const SCEV *Dist = SE.getMinusSCEV (Sink, Src);
1921
-
1922
- LLVM_DEBUG (dbgs () << " LAA: Src Scev: " << *Src << " Sink Scev: " << *Sink
1923
- << " (Induction step: " << StrideAPtr << " )\n " );
1924
- LLVM_DEBUG (dbgs () << " LAA: Distance for " << *InstMap[AIdx] << " to "
1925
- << *InstMap[BIdx] << " : " << *Dist << " \n " );
1926
-
1927
- // Needs accesses where the addresses of the accessed underlying objects do
1928
- // not change within the loop.
1929
- if (isLoopVariantIndirectAddress (UnderlyingObjects.find (APtr)->second , SE,
1930
- InnermostLoop) ||
1931
- isLoopVariantIndirectAddress (UnderlyingObjects.find (BPtr)->second , SE,
1932
- InnermostLoop))
1933
- return Dependence::IndirectUnsafe;
1934
-
1935
- // Need accesses with constant stride. We don't want to vectorize
1936
- // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1937
- // the address space.
1938
- if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1939
- LLVM_DEBUG (dbgs () << " Pointer access with non-constant stride\n " );
1940
- return Dependence::Unknown;
1941
- }
1930
+ const SCEV *Dist = SE.getMinusSCEV (Sink, Src);
1931
+
1932
+ LLVM_DEBUG (dbgs () << " LAA: Src Scev: " << *Src << " Sink Scev: " << *Sink
1933
+ << " (Induction step: " << StrideAPtr << " )\n " );
1934
+ LLVM_DEBUG (dbgs () << " LAA: Distance for " << *InstMap[AIdx] << " to "
1935
+ << *InstMap[BIdx] << " : " << *Dist << " \n " );
1936
+
1937
+ // Needs accesses where the addresses of the accessed underlying objects do
1938
+ // not change within the loop.
1939
+ if (isLoopVariantIndirectAddress (UnderlyingObjects.find (APtr)->second , SE,
1940
+ InnermostLoop) ||
1941
+ isLoopVariantIndirectAddress (UnderlyingObjects.find (BPtr)->second , SE,
1942
+ InnermostLoop))
1943
+ return Dependence::IndirectUnsafe;
1944
+
1945
+ // Need accesses with constant stride. We don't want to vectorize
1946
+ // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap
1947
+ // in the address space.
1948
+ if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) {
1949
+ LLVM_DEBUG (dbgs () << " Pointer access with non-constant stride\n " );
1950
+ return Dependence::Unknown;
1951
+ }
1942
1952
1943
- auto &DL = InnermostLoop->getHeader ()->getModule ()->getDataLayout ();
1944
- uint64_t TypeByteSize = DL.getTypeAllocSize (ATy);
1945
- bool HasSameSize =
1946
- DL.getTypeStoreSizeInBits (ATy) == DL.getTypeStoreSizeInBits (BTy);
1947
- uint64_t Stride = std::abs (StrideAPtr);
1953
+ uint64_t TypeByteSize = DL.getTypeAllocSize (ATy);
1954
+ bool HasSameSize =
1955
+ DL.getTypeStoreSizeInBits (ATy) == DL.getTypeStoreSizeInBits (BTy);
1956
+ if (!HasSameSize)
1957
+ TypeByteSize = 0 ;
1958
+ uint64_t Stride = std::abs (StrideAPtr);
1959
+ return std::make_tuple (Dist, Stride, TypeByteSize, AIsWrite, BIsWrite);
1960
+ }();
1961
+
1962
+ if (std::holds_alternative<Dependence::DepType>(Res))
1963
+ return std::get<Dependence::DepType>(Res);
1964
+
1965
+ const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] =
1966
+ std::get<std::tuple<const SCEV *, uint64_t , uint64_t , bool , bool >>(Res);
1967
+ bool HasSameSize = TypeByteSize > 0 ;
1948
1968
1949
1969
if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
1950
1970
isSafeDependenceDistance (DL, SE, *(PSE.getBackedgeTakenCount ()), *Dist,
0 commit comments