Skip to content

Commit 6798dc4

Browse files
committed
Previous code was incorrect for indirect calls of known callees.
1 parent 0ec30ae commit 6798dc4

File tree

5 files changed

+205
-182
lines changed

5 files changed

+205
-182
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -692,20 +692,23 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
692692
// Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
693693
// not to be set.
694694
bool needFlatScratchInit(Attributor &A) {
695+
assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
696+
695697
// This is called on each callee; false means callee shouldn't have
696698
// no-flat-scratch-init.
697699
auto CheckForNoFlatScratchInit = [&](Instruction &I) {
698700
const auto &CB = cast<CallBase>(I);
699701
const Function *Callee = CB.getCalledFunction();
700-
if (!Callee) // indirect call
701-
return CB.isInlineAsm();
702702

703-
if (Callee->isIntrinsic())
704-
return Callee->getIntrinsicID() != Intrinsic::amdgcn_addrspacecast_nonnull;
703+
if (Callee && Callee->isIntrinsic())
704+
return Callee->getIntrinsicID() !=
705+
Intrinsic::amdgcn_addrspacecast_nonnull;
705706

706-
const auto *CalleeInfo = A.getAAFor<AAAMDAttributes>(
707-
*this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
708-
return CalleeInfo && CalleeInfo->isAssumed(FLAT_SCRATCH_INIT);
707+
// Return true for all other cases, including (1)inline asm, (2)direct
708+
// call, and (3)indirect call with known callees. For (2) and (3)
709+
// updateImpl() already checked the callees and we know their
710+
// FLAT_SCRATCH_INIT bit is set.
711+
return true;
709712
};
710713

711714
bool UsedAssumedInformation = false;

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
229229

230230
define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
231231
; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
232-
; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR7:[0-9]+]] {
232+
; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
233233
; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
234234
; CHECK-NEXT: call void [[FPTR]]()
235235
; CHECK-NEXT: ret void
@@ -249,7 +249,6 @@ attributes #0 = { "amdgpu-no-agpr" }
249249
; CHECK: attributes #[[ATTR4]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
250250
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
251251
; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
252-
; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
253252
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
254253
; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
255254
; CHECK: attributes #[[ATTR10]] = { "amdgpu-no-agpr" }

0 commit comments

Comments
 (0)