-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Infer amdgpu-no-flat-scratch-init attribute in AMDGPUAttributor #94647
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3030189
7eacaf2
fefedb9
0a0025b
3e84d16
bf274dc
703eecc
a5972c6
b3c81f9
09012f4
0a739e9
944dfc3
6296be1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -262,6 +262,18 @@ class AMDGPUInformationCache : public InformationCache { | |
return !HasAperture && (Access & ADDR_SPACE_CAST); | ||
} | ||
|
||
bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) { | ||
SmallPtrSet<const Constant *, 8> Visited; | ||
uint8_t Access = getConstantAccess(C, Visited); | ||
|
||
if (Access & ADDR_SPACE_CAST) | ||
if (const auto *CE = dyn_cast<ConstantExpr>(C)) | ||
if (CE->getOperand(0)->getType()->getPointerAddressSpace() == | ||
AMDGPUAS::PRIVATE_ADDRESS) | ||
return true; | ||
return false; | ||
} | ||
|
||
private: | ||
/// Used to determine if the Constant needs the queue pointer. | ||
DenseMap<const Constant *, uint8_t> ConstantStatus; | ||
|
@@ -525,6 +537,9 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { | |
if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV)) | ||
removeAssumedBits(COMPLETION_ACTION); | ||
|
||
if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A)) | ||
removeAssumedBits(FLAT_SCRATCH_INIT); | ||
|
||
return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED | ||
: ChangeStatus::UNCHANGED; | ||
} | ||
|
@@ -683,6 +698,65 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { | |
return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this, | ||
UsedAssumedInformation); | ||
} | ||
|
||
// Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is | ||
// not to be set. | ||
bool needFlatScratchInit(Attributor &A) { | ||
assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set | ||
|
||
// Check all AddrSpaceCast instructions. FlatScratchInit is needed if | ||
// there is a cast from PRIVATE_ADDRESS. | ||
auto AddrSpaceCastNotFromPrivate = [](Instruction &I) { | ||
return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() != | ||
AMDGPUAS::PRIVATE_ADDRESS; | ||
}; | ||
|
||
bool UsedAssumedInformation = false; | ||
if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this, | ||
{Instruction::AddrSpaceCast}, | ||
UsedAssumedInformation)) | ||
return true; | ||
|
||
// Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions | ||
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); | ||
|
||
Function *F = getAssociatedFunction(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible to merge this into There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that is not ideal, because |
||
for (Instruction &I : instructions(F)) { | ||
for (const Use &U : I.operands()) { | ||
if (const auto *C = dyn_cast<Constant>(U)) { | ||
if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C)) | ||
return true; | ||
} | ||
} | ||
} | ||
|
||
// Finally check callees. | ||
|
||
// This is called on each callee; false means callee shouldn't have | ||
// no-flat-scratch-init. | ||
auto CheckForNoFlatScratchInit = [&](Instruction &I) { | ||
const auto &CB = cast<CallBase>(I); | ||
Comment on lines
+737
to
+738
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would hope FroAllCallLikeInstructions would have a CallBase typed argument to begin with |
||
const Function *Callee = CB.getCalledFunction(); | ||
|
||
// Callee == 0 for inline asm or indirect call with known callees. | ||
// In the latter case, updateImpl() already checked the callees and we | ||
// know their FLAT_SCRATCH_INIT bit is set. | ||
// If function has indirect call with unknown callees, the bit is | ||
// already removed in updateImpl() and execution won't reach here. | ||
if (!Callee) | ||
return true; | ||
jwanggit86 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return Callee->getIntrinsicID() != | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIUC, this attribute should propagate from callee to caller, so you will need to check all function calls, and ask Attributor whether the callee needs it or not. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Callees are already checked at the beginning of |
||
Intrinsic::amdgcn_addrspacecast_nonnull; | ||
}; | ||
|
||
UsedAssumedInformation = false; | ||
// If any callee is false (i.e. need FlatScratchInit), | ||
// checkForAllCallLikeInstructions returns false, in which case this | ||
// function returns true. | ||
return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should look more like how the queue pointer is handled. This is just a slightly more complicated version of checkForQueuePtr. The instruction walk you put in initialize should be handled by checkForAllInstructions looking for addrspacecast There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doing it like QueuePtr is certainly more "canonical'. Seems more preferable than doing it in |
||
UsedAssumedInformation); | ||
} | ||
}; | ||
|
||
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can't this handle the call case instead of a separate checkForAllCallLikeInstructions?
Alternatively, we should finally add the nonnull flag to addrspacecast