Skip to content

Commit b28a77a

Browse files
committed
[AMDGPU] Infer amdgpu-no-flat-scratch-init attribute in AMDGPUAttributor
The AMDGPUAnnotateKernelFeatures pass infers the "amdgpu-calls" and "amdgpu-stack-objects" attributes, which are used to infer whether we need to initialize flat scratch. This is, however, not precise. Instead, we should use AMDGPUAttributor and infer amdgpu-no-flat-scratch-init on kernels.
1 parent c546578 commit b28a77a

File tree

56 files changed

+10689
-357
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+10689
-357
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributes.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,6 @@ AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z")
3030
AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id")
3131
AMDGPU_ATTRIBUTE(DEFAULT_QUEUE, "amdgpu-no-default-queue")
3232
AMDGPU_ATTRIBUTE(COMPLETION_ACTION, "amdgpu-no-completion-action")
33+
AMDGPU_ATTRIBUTE(FLAT_SCRATCH_INIT, "amdgpu-no-flat-scratch-init")
3334

3435
#undef AMDGPU_ATTRIBUTE

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,19 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
433433
indicatePessimisticFixpoint();
434434
return;
435435
}
436+
437+
bool HasAllocaOrASCast = false;
438+
for (BasicBlock &BB : *F) {
439+
for (Instruction &I : BB) {
440+
if (isa<AllocaInst>(I) || isa<AddrSpaceCastInst>(I)) {
441+
HasAllocaOrASCast = true;
442+
removeAssumedBits(FLAT_SCRATCH_INIT);
443+
break;
444+
}
445+
}
446+
if (HasAllocaOrASCast)
447+
break;
448+
}
436449
}
437450

438451
ChangeStatus updateImpl(Attributor &A) override {
@@ -519,6 +532,9 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
519532
if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
520533
removeAssumedBits(COMPLETION_ACTION);
521534

535+
if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
536+
removeAssumedBits(FLAT_SCRATCH_INIT);
537+
522538
return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
523539
: ChangeStatus::UNCHANGED;
524540
}
@@ -677,6 +693,33 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
677693
return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
678694
UsedAssumedInformation);
679695
}
696+
697+
// Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
698+
// not to be set.
699+
bool needFlatScratchInit(Attributor &A) {
700+
// This is called on each callee; false means callee shouldn't have
701+
// no-flat-scratch-init.
702+
auto CheckForNoFlatScratchInit = [&](Instruction &I) {
703+
const auto &CB = cast<CallBase>(I);
704+
const Value *CalleeOp = CB.getCalledOperand();
705+
const Function *Callee = dyn_cast<Function>(CalleeOp);
706+
if (!Callee) // indirect call
707+
return CB.isInlineAsm();
708+
709+
if (Callee->isIntrinsic())
710+
return true;
711+
712+
const auto *CalleeInfo = A.getAAFor<AAAMDAttributes>(
713+
*this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
714+
return CalleeInfo && CalleeInfo->isAssumed(FLAT_SCRATCH_INIT);
715+
};
716+
717+
bool UsedAssumedInformation = false;
718+
// If any callee is false (i.e. need FlatScratchInit),
719+
// checkForAllCallLikeInstructions returns false
720+
return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
721+
UsedAssumedInformation);
722+
}
680723
};
681724

682725
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,12 +1040,8 @@ GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
10401040
const CallingConv::ID CC = F.getCallingConv();
10411041
const bool IsKernel =
10421042
CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL;
1043-
// FIXME: Should have analysis or something rather than attribute to detect
1044-
// calls.
1045-
const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
1046-
// FIXME: This attribute is a hack, we just need an analysis on the function
1047-
// to look for allocas.
1048-
const bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
1043+
const bool NoFlatScratchInit =
1044+
F.hasFnAttribute("amdgpu-no-flat-scratch-init");
10491045

10501046
if (IsKernel && (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0))
10511047
KernargSegmentPtr = true;
@@ -1073,7 +1069,7 @@ GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
10731069
// lowering.
10741070
if (ST.hasFlatAddressSpace() && AMDGPU::isEntryFunctionCC(CC) &&
10751071
(IsAmdHsaOrMesa || ST.enableFlatScratch()) &&
1076-
(HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
1072+
(!NoFlatScratchInit || ST.enableFlatScratch()) &&
10771073
!ST.flatScratchIsArchitected()) {
10781074
FlatScratchInit = true;
10791075
}

llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
1212
; GFX8V4: ; %bb.0:
1313
; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
1414
; GFX8V4-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40
15-
; GFX8V4-NEXT: v_mov_b32_e32 v2, 1
15+
; GFX8V4-NEXT: s_add_i32 s8, s8, s11
16+
; GFX8V4-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
17+
; GFX8V4-NEXT: s_mov_b32 flat_scratch_lo, s9
1618
; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
1719
; GFX8V4-NEXT: s_mov_b32 s4, s0
1820
; GFX8V4-NEXT: s_mov_b32 s5, s3
@@ -23,6 +25,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
2325
; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1
2426
; GFX8V4-NEXT: v_mov_b32_e32 v0, s4
2527
; GFX8V4-NEXT: s_cselect_b64 s[0:1], s[6:7], 0
28+
; GFX8V4-NEXT: v_mov_b32_e32 v2, 1
2629
; GFX8V4-NEXT: v_mov_b32_e32 v1, s5
2730
; GFX8V4-NEXT: flat_store_dword v[0:1], v2
2831
; GFX8V4-NEXT: s_waitcnt vmcnt(0)
@@ -37,7 +40,9 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
3740
; GFX8V5: ; %bb.0:
3841
; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
3942
; GFX8V5-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0xc8
40-
; GFX8V5-NEXT: v_mov_b32_e32 v2, 1
43+
; GFX8V5-NEXT: s_add_i32 s6, s6, s9
44+
; GFX8V5-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
45+
; GFX8V5-NEXT: s_mov_b32 flat_scratch_lo, s7
4146
; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
4247
; GFX8V5-NEXT: s_mov_b32 s4, s0
4348
; GFX8V5-NEXT: s_mov_b32 s5, s2
@@ -47,6 +52,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
4752
; GFX8V5-NEXT: s_cmp_lg_u32 s1, -1
4853
; GFX8V5-NEXT: v_mov_b32_e32 v0, s4
4954
; GFX8V5-NEXT: s_cselect_b64 s[0:1], s[2:3], 0
55+
; GFX8V5-NEXT: v_mov_b32_e32 v2, 1
5056
; GFX8V5-NEXT: v_mov_b32_e32 v1, s5
5157
; GFX8V5-NEXT: flat_store_dword v[0:1], v2
5258
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
@@ -60,9 +66,10 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
6066
; GFX9V4-LABEL: addrspacecast:
6167
; GFX9V4: ; %bb.0:
6268
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
69+
; GFX9V4-NEXT: s_add_u32 flat_scratch_lo, s6, s9
70+
; GFX9V4-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
6371
; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
6472
; GFX9V4-NEXT: s_mov_b64 s[4:5], src_shared_base
65-
; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
6673
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
6774
; GFX9V4-NEXT: s_mov_b32 s2, s0
6875
; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
@@ -71,6 +78,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
7178
; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
7279
; GFX9V4-NEXT: v_mov_b32_e32 v0, s2
7380
; GFX9V4-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
81+
; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
7482
; GFX9V4-NEXT: v_mov_b32_e32 v1, s3
7583
; GFX9V4-NEXT: flat_store_dword v[0:1], v2
7684
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
@@ -84,9 +92,10 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
8492
; GFX9V5-LABEL: addrspacecast:
8593
; GFX9V5: ; %bb.0:
8694
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
95+
; GFX9V5-NEXT: s_add_u32 flat_scratch_lo, s6, s9
96+
; GFX9V5-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
8797
; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
8898
; GFX9V5-NEXT: s_mov_b64 s[4:5], src_shared_base
89-
; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
9099
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
91100
; GFX9V5-NEXT: s_mov_b32 s2, s0
92101
; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
@@ -95,6 +104,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
95104
; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
96105
; GFX9V5-NEXT: v_mov_b32_e32 v0, s2
97106
; GFX9V5-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
107+
; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
98108
; GFX9V5-NEXT: v_mov_b32_e32 v1, s3
99109
; GFX9V5-NEXT: flat_store_dword v[0:1], v2
100110
; GFX9V5-NEXT: s_waitcnt vmcnt(0)

llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,9 @@ attributes #1 = { nounwind }
233233
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
234234
;.
235235
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
236-
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
237-
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
238-
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
236+
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
237+
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
238+
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
239239
;.
240240
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
241241
;.

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ define amdgpu_kernel void @kernel_calls_extern() {
116116
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
117117
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
118118
; CHECK-SAME: ) #[[ATTR4]] {
119-
; CHECK-NEXT: call void @unknown() #[[ATTR9:[0-9]+]]
119+
; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]]
120120
; CHECK-NEXT: ret void
121121
;
122122
call void @unknown() #0
@@ -136,7 +136,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
136136
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
137137
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
138138
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] {
139-
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR9]]
139+
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]]
140140
; CHECK-NEXT: ret void
141141
;
142142
call void %indirect() #0
@@ -229,7 +229,7 @@ define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
229229

230230
define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
231231
; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
232-
; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
232+
; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR7:[0-9]+]] {
233233
; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
234234
; CHECK-NEXT: call void [[FPTR]]()
235235
; CHECK-NEXT: ret void
@@ -242,14 +242,15 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
242242

243243
attributes #0 = { "amdgpu-no-agpr" }
244244
;.
245-
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
246-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
247-
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
245+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
246+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
247+
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
248248
; CHECK: attributes #[[ATTR3:[0-9]+]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
249249
; CHECK: attributes #[[ATTR4]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
250-
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
250+
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
251251
; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
252-
; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
253-
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
254-
; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" }
252+
; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
253+
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
254+
; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
255+
; CHECK: attributes #[[ATTR10]] = { "amdgpu-no-agpr" }
255256
;.

0 commit comments

Comments
 (0)