Skip to content

Commit 46097b1

Browse files
committed
Previous code was incorrect for indirect calls of known callees.
1 parent ffc131e commit 46097b1

File tree

5 files changed

+205
-182
lines changed

5 files changed

+205
-182
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -691,20 +691,23 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
691691
// Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
692692
// not to be set.
693693
bool needFlatScratchInit(Attributor &A) {
694+
assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
695+
694696
// This is called on each callee; false means callee shouldn't have
695697
// no-flat-scratch-init.
696698
auto CheckForNoFlatScratchInit = [&](Instruction &I) {
697699
const auto &CB = cast<CallBase>(I);
698700
const Function *Callee = CB.getCalledFunction();
699-
if (!Callee) // indirect call
700-
return CB.isInlineAsm();
701701

702-
if (Callee->isIntrinsic())
703-
return Callee->getIntrinsicID() != Intrinsic::amdgcn_addrspacecast_nonnull;
702+
if (Callee && Callee->isIntrinsic())
703+
return Callee->getIntrinsicID() !=
704+
Intrinsic::amdgcn_addrspacecast_nonnull;
704705

705-
const auto *CalleeInfo = A.getAAFor<AAAMDAttributes>(
706-
*this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
707-
return CalleeInfo && CalleeInfo->isAssumed(FLAT_SCRATCH_INIT);
706+
// Return true for all other cases, including (1)inline asm, (2)direct
707+
// call, and (3)indirect call with known callees. For (2) and (3)
708+
// updateImpl() already checked the callees and we know their
709+
// FLAT_SCRATCH_INIT bit is set.
710+
return true;
708711
};
709712

710713
bool UsedAssumedInformation = false;

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
229229

230230
define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
231231
; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
232-
; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR7:[0-9]+]] {
232+
; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
233233
; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
234234
; CHECK-NEXT: call void [[FPTR]]()
235235
; CHECK-NEXT: ret void
@@ -249,7 +249,6 @@ attributes #0 = { "amdgpu-no-agpr" }
249249
; CHECK: attributes #[[ATTR4]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
250250
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
251251
; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
252-
; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
253252
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
254253
; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
255254
; CHECK: attributes #[[ATTR10]] = { "amdgpu-no-agpr" }

0 commit comments

Comments
 (0)