-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LV]Enable non-power-of-2 store-load forwarding distance in predicated DataWithEVL vectorization mode #100755
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4c16b87
beed690
254d2eb
9ae7d31
03ac1b2
54e527b
0e03238
24f751e
24d478e
41834f0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1756,7 +1756,8 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, | |
// Maximum vector factor. | ||
uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 = | ||
std::min(VectorizerParams::MaxVectorWidth * TypeByteSize, | ||
MaxStoreLoadForwardSafeDistanceInBits); | ||
MaxPowerOf2StoreLoadForwardSafeDistanceInBits); | ||
uint64_t MaxVFWithoutSLForwardIssuesNonPowerOf2 = 0; | ||
|
||
// Compute the smallest VF at which the store and load would be misaligned. | ||
for (uint64_t VF = 2 * TypeByteSize; | ||
|
@@ -1768,24 +1769,61 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, | |
break; | ||
} | ||
} | ||
// RISCV VLA supports non-power-2 vector factor. So, we iterate in a | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this needed for correctness for RISCV? If not, can be done separately as this adds some extra complexity. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Initially, we can support only power-of-2. I can split this patch into 2 sub-patches. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes please, this would help to make the patch simpler hopefully. Also Is this relevant for cores supporting EVL? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The initial patch with only power-of-2 support is committed already, this one adds non-power-of-2 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Right.
|
||
// backward order to find largest VF, which allows aligned stores-loads or | ||
// the number of iterations between conflicting memory addresses is not less | ||
// than 8 (NumItersForStoreLoadThroughMemory). | ||
if (AllowNonPow2Deps) { | ||
MaxVFWithoutSLForwardIssuesNonPowerOf2 = | ||
std::min(VectorizerParams::MaxVectorWidth * TypeByteSize, | ||
MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits); | ||
|
||
for (uint64_t VF = MaxVFWithoutSLForwardIssuesNonPowerOf2; | ||
VF > MaxVFWithoutSLForwardIssuesPowerOf2; VF -= TypeByteSize) { | ||
if (Distance % VF == 0 || | ||
Distance / VF >= NumItersForStoreLoadThroughMemory) { | ||
uint64_t GCD = | ||
isSafeForAnyStoreLoadForwardDistances() | ||
? VF | ||
: std::gcd(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits, | ||
VF); | ||
MaxVFWithoutSLForwardIssuesNonPowerOf2 = GCD; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) { | ||
if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize && | ||
MaxVFWithoutSLForwardIssuesNonPowerOf2 < 2 * TypeByteSize) { | ||
LLVM_DEBUG( | ||
dbgs() << "LAA: Distance " << Distance | ||
<< " that could cause a store-load forwarding conflict\n"); | ||
return true; | ||
} | ||
|
||
// Handle non-power-2 store-load forwarding distance, power-of-2 distance can | ||
// be calculated. | ||
if (AllowNonPow2Deps && CommonStride && | ||
MaxVFWithoutSLForwardIssuesNonPowerOf2 < | ||
MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits && | ||
MaxVFWithoutSLForwardIssuesNonPowerOf2 != | ||
VectorizerParams::MaxVectorWidth * TypeByteSize) { | ||
uint64_t MaxVF = MaxVFWithoutSLForwardIssuesNonPowerOf2 / CommonStride; | ||
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; | ||
MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits = | ||
std::min(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits); | ||
} | ||
|
||
if (CommonStride && | ||
MaxVFWithoutSLForwardIssuesPowerOf2 < | ||
MaxStoreLoadForwardSafeDistanceInBits && | ||
MaxPowerOf2StoreLoadForwardSafeDistanceInBits && | ||
MaxVFWithoutSLForwardIssuesPowerOf2 != | ||
VectorizerParams::MaxVectorWidth * TypeByteSize) { | ||
uint64_t MaxVF = | ||
bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride); | ||
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; | ||
MaxStoreLoadForwardSafeDistanceInBits = | ||
std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits); | ||
MaxPowerOf2StoreLoadForwardSafeDistanceInBits = | ||
std::min(MaxPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits); | ||
} | ||
return false; | ||
} | ||
|
@@ -2250,7 +2288,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, | |
return Dependence::Unknown; | ||
} | ||
|
||
MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits); | ||
if (!AllowNonPow2Deps) | ||
MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits); | ||
return Dependence::BackwardVectorizable; | ||
} | ||
|
||
|
@@ -2984,8 +3023,9 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, | |
MaxTargetVectorWidthInBits = | ||
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2; | ||
|
||
DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides, | ||
MaxTargetVectorWidthInBits); | ||
DepChecker = std::make_unique<MemoryDepChecker>( | ||
*PSE, L, SymbolicStrides, MaxTargetVectorWidthInBits, | ||
TTI && TTI->hasActiveVectorLength(0, nullptr, Align())); | ||
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE); | ||
if (canAnalyzeLoop()) | ||
CanVecMem = analyzeLoop(AA, LI, TLI, DT); | ||
|
@@ -2999,7 +3039,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { | |
OS << " with a maximum safe vector width of " | ||
<< DC.getMaxSafeVectorWidthInBits() << " bits"; | ||
if (!DC.isSafeForAnyStoreLoadForwardDistances()) { | ||
uint64_t SLDist = DC.getStoreLoadForwardSafeDistanceInBits(); | ||
uint64_t SLDist = DC.getNonPowerOf2StoreLoadForwardSafeDistanceInBits(); | ||
if (SLDist == std::numeric_limits<uint64_t>::max()) | ||
SLDist = DC.getPowerOf2StoreLoadForwardSafeDistanceInBits(); | ||
OS << ", with a maximum safe store-load forward width of " << SLDist | ||
<< " bits"; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is confusing, in general any target supports non-power-of-2 dependence distances, if >= VF?