-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LAA] Support different strides & non constant dep distances using SCEV. #88039
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
110e5ea
95c329b
d13ddf8
2e5a807
bcb8853
53a1ea4
0accad1
2cee777
0a1509f
a9e7fa0
a695f67
7bec335
8bae450
e2e4615
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1920,20 +1920,21 @@ isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects, | |
namespace { | ||
struct DepDistanceStrideAndSizeInfo { | ||
const SCEV *Dist; | ||
uint64_t Stride; | ||
uint64_t StrideA; | ||
uint64_t StrideB; | ||
uint64_t TypeByteSize; | ||
bool AIsWrite; | ||
bool BIsWrite; | ||
|
||
DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t Stride, | ||
uint64_t TypeByteSize, bool AIsWrite, | ||
bool BIsWrite) | ||
: Dist(Dist), Stride(Stride), TypeByteSize(TypeByteSize), | ||
AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} | ||
DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA, | ||
uint64_t StrideB, uint64_t TypeByteSize, | ||
bool AIsWrite, bool BIsWrite) | ||
: Dist(Dist), StrideA(StrideA), StrideB(StrideB), | ||
TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} | ||
}; | ||
} // namespace | ||
|
||
// Get the dependence distance, stride, type size and whether it is a write for | ||
// Get the dependence distance, strides, type size and whether it is a write for | ||
// the dependence between A and B. Returns a DepType, if we can prove there's | ||
// no dependence or the analysis fails. Outlined to lambda to limit he scope | ||
// of various temporary variables, like A/BPtr, StrideA/BPtr and others. | ||
|
@@ -1998,7 +1999,7 @@ getDependenceDistanceStrideAndSize( | |
// Need accesses with constant stride. We don't want to vectorize | ||
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap | ||
// in the address space. | ||
if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) { | ||
if (!StrideAPtr || !StrideBPtr) { | ||
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); | ||
return MemoryDepChecker::Dependence::Unknown; | ||
} | ||
|
@@ -2008,9 +2009,9 @@ getDependenceDistanceStrideAndSize( | |
DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy); | ||
if (!HasSameSize) | ||
TypeByteSize = 0; | ||
uint64_t Stride = std::abs(StrideAPtr); | ||
return DepDistanceStrideAndSizeInfo(Dist, Stride, TypeByteSize, AIsWrite, | ||
BIsWrite); | ||
return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr), | ||
std::abs(StrideBPtr), TypeByteSize, | ||
AIsWrite, BIsWrite); | ||
} | ||
|
||
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( | ||
|
@@ -2028,68 +2029,105 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( | |
if (std::holds_alternative<Dependence::DepType>(Res)) | ||
return std::get<Dependence::DepType>(Res); | ||
|
||
const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] = | ||
const auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] = | ||
std::get<DepDistanceStrideAndSizeInfo>(Res); | ||
bool HasSameSize = TypeByteSize > 0; | ||
|
||
uint64_t CommonStride = StrideA == StrideB ? StrideA : 0; | ||
if (isa<SCEVCouldNotCompute>(Dist)) { | ||
FoundNonConstantDistanceDependence |= CommonStride; | ||
LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n"); | ||
return Dependence::Unknown; | ||
} | ||
|
||
ScalarEvolution &SE = *PSE.getSE(); | ||
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); | ||
if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize && | ||
if (HasSameSize && CommonStride && | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, I plan to address this separately. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Follow-up now available: #90036 |
||
isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist, | ||
Stride, TypeByteSize)) | ||
CommonStride, TypeByteSize)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At the moment the comment in the function implies that it assumes equal strides which is why I left it as-is for now, to limit the scope of the initial patch. It tries to prove A similar reasoning can be applied at the end of the function too, which also is currently guarded by having a common stride. I am also planning on addressing that separately. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add a TODO? |
||
return Dependence::NoDep; | ||
|
||
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist); | ||
if (!C) { | ||
LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); | ||
FoundNonConstantDistanceDependence = true; | ||
return Dependence::Unknown; | ||
} | ||
|
||
const APInt &Val = C->getAPInt(); | ||
int64_t Distance = Val.getSExtValue(); | ||
|
||
// Attempt to prove strided accesses independent. | ||
if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize && | ||
areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) { | ||
LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); | ||
return Dependence::NoDep; | ||
if (C) { | ||
const APInt &Val = C->getAPInt(); | ||
int64_t Distance = Val.getSExtValue(); | ||
|
||
if (std::abs(Distance) > 0 && CommonStride > 1 && HasSameSize && | ||
areStridedAccessesIndependent(std::abs(Distance), CommonStride, | ||
TypeByteSize)) { | ||
LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); | ||
return Dependence::NoDep; | ||
} | ||
|
||
// Write to the same location with the same size. | ||
if (C->isZero()) { | ||
if (HasSameSize) | ||
return Dependence::Forward; | ||
LLVM_DEBUG( | ||
dbgs() | ||
<< "LAA: Zero dependence difference but different type sizes\n"); | ||
return Dependence::Unknown; | ||
} | ||
} | ||
|
||
// Negative distances are not plausible dependencies. | ||
if (Val.isNegative()) { | ||
if (SE.isKnownNonPositive(Dist)) { | ||
if (!SE.isKnownNegative(Dist) && !HasSameSize) { | ||
LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but " | ||
"different type sizes\n"); | ||
return Dependence::Unknown; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My impression is that most of these There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've merged the code from above that handles C == 0 with the code here. The main reason we need to update The latest version of the patch limits setting |
||
|
||
bool IsTrueDataDependence = (AIsWrite && !BIsWrite); | ||
// There is no need to update MaxSafeVectorWidthInBits after call to | ||
// couldPreventStoreLoadForward, even if it changed MinDepDistBytes, | ||
// since a forward dependency will allow vectorization using any width. | ||
if (IsTrueDataDependence && EnableForwardingConflictDetection && | ||
(!HasSameSize || couldPreventStoreLoadForward(Val.abs().getZExtValue(), | ||
TypeByteSize))) { | ||
LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); | ||
return Dependence::ForwardButPreventsForwarding; | ||
if (IsTrueDataDependence && EnableForwardingConflictDetection) { | ||
if (!C) { | ||
FoundNonConstantDistanceDependence |= CommonStride; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there is no common stride ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the reason for only setting it for I added a TODO to relax this requirement, but would like to keep it in the patch to limit the impact. |
||
return Dependence::Unknown; | ||
} | ||
if (!HasSameSize || | ||
couldPreventStoreLoadForward(C->getAPInt().abs().getZExtValue(), | ||
TypeByteSize)) { | ||
LLVM_DEBUG( | ||
dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); | ||
return Dependence::ForwardButPreventsForwarding; | ||
} | ||
} | ||
|
||
LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n"); | ||
return Dependence::Forward; | ||
} | ||
|
||
// Write to the same location with the same size. | ||
if (Val == 0) { | ||
if (HasSameSize) | ||
return Dependence::Forward; | ||
LLVM_DEBUG( | ||
dbgs() << "LAA: Zero dependence difference but different type sizes\n"); | ||
if (!SE.isKnownPositive(Dist)) { | ||
FoundNonConstantDistanceDependence |= !C && CommonStride; | ||
return Dependence::Unknown; | ||
} | ||
|
||
assert(Val.isStrictlyPositive() && "Expect a positive value"); | ||
|
||
if (!HasSameSize) { | ||
LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with " | ||
"different type sizes\n"); | ||
FoundNonConstantDistanceDependence |= !C && CommonStride; | ||
return Dependence::Unknown; | ||
} | ||
|
||
if (!C) { | ||
LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); | ||
FoundNonConstantDistanceDependence |= CommonStride; | ||
return Dependence::Unknown; | ||
} | ||
|
||
// The logic below currently only supports StrideA == StrideB, i.e. there's a | ||
// common stride. | ||
if (!CommonStride) | ||
return Dependence::Unknown; | ||
Comment on lines
+2141
to
+2142
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Many checks above also test ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could only find the one guarding |
||
|
||
const APInt &Val = C->getAPInt(); | ||
int64_t Distance = Val.getSExtValue(); | ||
|
||
// Bail out early if passed-in parameters make vectorization not feasible. | ||
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? | ||
VectorizerParams::VectorizationFactor : 1); | ||
|
@@ -2125,7 +2163,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( | |
// the minimum distance needed is 28, which is greater than distance. It is | ||
// not safe to do vectorization. | ||
uint64_t MinDistanceNeeded = | ||
TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize; | ||
TypeByteSize * CommonStride * (MinNumIter - 1) + TypeByteSize; | ||
if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) { | ||
LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance " | ||
<< Distance << '\n'); | ||
|
@@ -2174,7 +2212,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( | |
|
||
// An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits | ||
// since there is a backwards dependency. | ||
uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * Stride); | ||
uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * CommonStride); | ||
LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() | ||
<< " with max VF = " << MaxVF << '\n'); | ||
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -126,8 +126,10 @@ struct StoreToLoadForwardingCandidate { | |
|
||
// We don't need to check non-wrapping here because forward/backward | ||
// dependence wouldn't be valid if these weren't monotonic accesses. | ||
auto *Dist = cast<SCEVConstant>( | ||
auto *Dist = dyn_cast<SCEVConstant>( | ||
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV)); | ||
if (!Dist) | ||
return false; | ||
Comment on lines
+129
to
+132
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this related? LAA doesn't seem to be involved. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the pass uses LoopAccessInfo in |
||
const APInt &Val = Dist->getAPInt(); | ||
return Val == TypeByteSize * StrideLoad; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't
0
a valid stride (i.e. indexed by a constant)?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At the moment, both strides need to be != 0 I think (
getDependenceDistanceStrideAndSize
returnsUnknown
dependence if any stride is == 0).But using std::optional makes this future proof (I am planning to also remove that restriction as follow up), updated, thanks!