Skip to content

Commit 66d27ed

Browse files
committed
rebase and resolve review comments
1 parent 5b7ec58 commit 66d27ed

17 files changed

+117
-193
lines changed

builtin.diff

Lines changed: 0 additions & 84 deletions
This file was deleted.

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx13-load-mcast.cl

Lines changed: 0 additions & 14 deletions
This file was deleted.

clang/test/CodeGenOpenCL/builtins-amdgcn.cl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -903,5 +903,4 @@ void test_set_fpenv(unsigned long env) {
903903

904904
// CHECK-DAG: [[$GRID_RANGE]] = !{i32 1, i32 0}
905905
// CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
906-
// CHECK-SPIRV-DAG: attributes #[[$NOUNWIND_READONLY]] = { convergent mustprogress nocallback nofree nounwind willreturn memory(none) }
907-
// CHECK-AMDGCN-DAG: attributes #[[$NOUNWIND_READONLY]] = { convergent mustprogress nocallback nofree nounwind willreturn memory(none) "amdgpu-waves-per-eu"="4,10" }
906+
// CHECK-DAG: attributes #[[$NOUNWIND_READONLY]] = { convergent mustprogress nocallback nofree nounwind willreturn memory(none) }

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1324,10 +1324,14 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
13241324
/// have a determined flat workgroup size.
13251325
static void updateWavesPerEU(Module &M, TargetMachine &TM) {
13261326
for (Function &F : M) {
1327+
if (F.isDeclaration())
1328+
continue;
1329+
13271330
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
13281331

1329-
auto FlatWgrpSizeAttr =
1330-
AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
1332+
std::optional<std::pair<unsigned, std::optional<unsigned>>>
1333+
FlatWgrpSizeAttr =
1334+
AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
13311335

13321336
unsigned MinWavesPerEU = ST.getMinWavesPerEU();
13331337
unsigned MaxWavesPerEU = ST.getMaxWavesPerEU();
@@ -1346,7 +1350,7 @@ static void updateWavesPerEU(Module &M, TargetMachine &TM) {
13461350
// Compute the range from flat workgroup size. `getWavesPerEU` will also
13471351
// account for the 'amdgpu-waves-er-eu' attribute.
13481352
auto [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] =
1349-
ST.getWavesPerEU(F, std::make_pair(MinFlatWgrpSize, MaxFlatWgrpSize));
1353+
ST.getWavesPerEU(F, {MinFlatWgrpSize, MaxFlatWgrpSize});
13501354

13511355
// For the lower bound, we have to "tighten" it.
13521356
Min = std::max(Min, MinFromFlatWgrpSize);
@@ -1444,6 +1448,8 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
14441448

14451449
bool Changed = A.run() == ChangeStatus::CHANGED;
14461450

1451+
// We only update the waves-per-eu attribute at the final stage to avoid
1452+
// setting it with intermediate values.
14471453
if (Changed && (LTOPhase == ThinOrFullLTOPhase::None ||
14481454
LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
14491455
LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink))

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,15 @@ AMDGPUSubtarget::getWavesPerEU(const Function &F) const {
216216
return getWavesPerEU(FlatWorkGroupSizes, LDSBytes, F);
217217
}
218218

219+
std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
220+
const Function &F, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
221+
// Minimum number of bytes allocated in the LDS.
222+
unsigned LDSBytes = AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size",
223+
{0, UINT32_MAX}, true)
224+
.first;
225+
return getWavesPerEU(FlatWorkGroupSizes, LDSBytes, F);
226+
}
227+
219228
std::pair<unsigned, unsigned>
220229
AMDGPUSubtarget::getWavesPerEU(std::pair<unsigned, unsigned> FlatWorkGroupSizes,
221230
unsigned LDSBytes, const Function &F) const {

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ class AMDGPUSubtarget {
108108
/// size, register usage, and/or lds usage.
109109
std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
110110

111+
/// Overload which uses the specified values for the flat work group sizes,
112+
/// rather than querying the function itself. \p FlatWorkGroupSizes Should
113+
/// correspond to the function's value for getFlatWorkGroupSizes.
114+
std::pair<unsigned, unsigned>
115+
getWavesPerEU(const Function &F,
116+
std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
117+
111118
/// Overload which uses the specified values for the flat workgroup sizes and
112119
/// LDS space rather than querying the function itself. \p FlatWorkGroupSizes
113120
/// should correspond to the function's value for getFlatWorkGroupSizes and \p

llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,6 @@ attributes #1 = { nounwind }
169169

170170
;.
171171
; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
172-
; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
173-
; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
172+
; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
173+
; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
174174
;.

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ declare void @unknown()
105105

106106
define amdgpu_kernel void @kernel_calls_extern() {
107107
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
108-
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
108+
; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
109109
; CHECK-NEXT: call void @unknown()
110110
; CHECK-NEXT: ret void
111111
;
@@ -115,8 +115,8 @@ define amdgpu_kernel void @kernel_calls_extern() {
115115

116116
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
117117
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
118-
; CHECK-SAME: ) #[[ATTR2]] {
119-
; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]]
118+
; CHECK-SAME: ) #[[ATTR3]] {
119+
; CHECK-NEXT: call void @unknown() #[[ATTR7:[0-9]+]]
120120
; CHECK-NEXT: ret void
121121
;
122122
call void @unknown() #0
@@ -125,7 +125,7 @@ define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
125125

126126
define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
127127
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
128-
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
128+
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
129129
; CHECK-NEXT: call void [[INDIRECT]]()
130130
; CHECK-NEXT: ret void
131131
;
@@ -135,8 +135,8 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
135135

136136
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
137137
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
138-
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
139-
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR6]]
138+
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
139+
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR7]]
140140
; CHECK-NEXT: ret void
141141
;
142142
call void %indirect() #0
@@ -252,13 +252,14 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
252252
}
253253

254254

255-
attributes #0 = { "amdgpu-no-agpr" }
255+
attributes #0 = { "amdgpu-agpr-alloc"="0" }
256256
;.
257257
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
258258
; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
259-
; CHECK: attributes #[[ATTR2]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
260-
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" }
261-
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" }
262-
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" }
263-
; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-agpr" }
259+
; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
260+
; CHECK: attributes #[[ATTR3]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
261+
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
262+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
263+
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
264+
; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="0" }
264265
;.

0 commit comments

Comments
 (0)