Skip to content

Commit 4616368

Browse files
authored
[AMDGPU] Allow WorkgroupID intrinsics in amdgpu_gfx functions (#89773)
With GFX12 architected SGPRs the workgroup ids are trivially available in any function called from a compute entrypoint.
1 parent 788d159 commit 4616368

File tree

4 files changed

+56
-20
lines changed

4 files changed

+56
-20
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4248,7 +4248,8 @@ bool AMDGPULegalizerInfo::loadInputValue(
42484248
AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
42494249
const ArgDescriptor WorkGroupIDZ =
42504250
ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
4251-
if (ST.hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) {
4251+
if (ST.hasArchitectedSGPRs() &&
4252+
(AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
42524253
switch (ArgType) {
42534254
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
42544255
Arg = &WorkGroupIDX;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2124,7 +2124,8 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
21242124
AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
21252125
const ArgDescriptor WorkGroupIDZ =
21262126
ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
2127-
if (Subtarget->hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) {
2127+
if (Subtarget->hasArchitectedSGPRs() &&
2128+
(AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
21282129
switch (PVID) {
21292130
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
21302131
Reg = &WorkGroupIDX;
@@ -2798,7 +2799,9 @@ SDValue SITargetLowering::LowerFormalArguments(
27982799
(void)UserSGPRInfo;
27992800
if (!Subtarget->enableFlatScratch())
28002801
assert(!UserSGPRInfo.hasFlatScratchInit());
2801-
if (CallConv != CallingConv::AMDGPU_CS || !Subtarget->hasArchitectedSGPRs())
2802+
if ((CallConv != CallingConv::AMDGPU_CS &&
2803+
CallConv != CallingConv::AMDGPU_Gfx) ||
2804+
!Subtarget->hasArchitectedSGPRs())
28022805
assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
28032806
!Info->hasWorkGroupIDZ());
28042807
}

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
110110
}
111111

112112
if (!AMDGPU::isGraphics(CC) ||
113-
(CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) {
113+
((CC == CallingConv::AMDGPU_CS || CC == CallingConv::AMDGPU_CS) &&
114+
ST.hasArchitectedSGPRs())) {
114115
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
115116
WorkGroupIDX = true;
116117

llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
3-
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
4-
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-SDAG %s
5-
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-GISEL %s
2+
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3+
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
4+
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s
5+
; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s
66
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
77
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
88

@@ -156,10 +156,37 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(
156156
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157157
; GFX9-NEXT: s_setpc_b64 s[30:31]
158158
;
159-
; GFX9ARCH-LABEL: workgroup_ids_gfx:
160-
; GFX9ARCH: ; %bb.0:
161-
; GFX9ARCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162-
; GFX9ARCH-NEXT: s_setpc_b64 s[30:31]
159+
; GFX9ARCH-SDAG-LABEL: workgroup_ids_gfx:
160+
; GFX9ARCH-SDAG: ; %bb.0:
161+
; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162+
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v6, ttmp9
163+
; GFX9ARCH-SDAG-NEXT: s_and_b32 s34, ttmp7, 0xffff
164+
; GFX9ARCH-SDAG-NEXT: global_store_dword v[0:1], v6, off
165+
; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0)
166+
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, s34
167+
; GFX9ARCH-SDAG-NEXT: s_lshr_b32 s34, ttmp7, 16
168+
; GFX9ARCH-SDAG-NEXT: global_store_dword v[2:3], v0, off
169+
; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0)
170+
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, s34
171+
; GFX9ARCH-SDAG-NEXT: global_store_dword v[4:5], v0, off
172+
; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0)
173+
; GFX9ARCH-SDAG-NEXT: s_setpc_b64 s[30:31]
174+
;
175+
; GFX9ARCH-GISEL-LABEL: workgroup_ids_gfx:
176+
; GFX9ARCH-GISEL: ; %bb.0:
177+
; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178+
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v6, ttmp9
179+
; GFX9ARCH-GISEL-NEXT: s_and_b32 s34, ttmp7, 0xffff
180+
; GFX9ARCH-GISEL-NEXT: s_lshr_b32 s35, ttmp7, 16
181+
; GFX9ARCH-GISEL-NEXT: global_store_dword v[0:1], v6, off
182+
; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0)
183+
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, s34
184+
; GFX9ARCH-GISEL-NEXT: global_store_dword v[2:3], v0, off
185+
; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0)
186+
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, s35
187+
; GFX9ARCH-GISEL-NEXT: global_store_dword v[4:5], v0, off
188+
; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0)
189+
; GFX9ARCH-GISEL-NEXT: s_setpc_b64 s[30:31]
163190
;
164191
; GFX12-LABEL: workgroup_ids_gfx:
165192
; GFX12: ; %bb.0:
@@ -168,6 +195,18 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(
168195
; GFX12-NEXT: s_wait_samplecnt 0x0
169196
; GFX12-NEXT: s_wait_bvhcnt 0x0
170197
; GFX12-NEXT: s_wait_kmcnt 0x0
198+
; GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
199+
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
200+
; GFX12-NEXT: v_dual_mov_b32 v6, ttmp9 :: v_dual_mov_b32 v7, s0
201+
; GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
202+
; GFX12-NEXT: v_mov_b32_e32 v8, s1
203+
; GFX12-NEXT: s_wait_storecnt 0x0
204+
; GFX12-NEXT: global_store_b32 v[0:1], v6, off scope:SCOPE_SYS
205+
; GFX12-NEXT: s_wait_storecnt 0x0
206+
; GFX12-NEXT: global_store_b32 v[2:3], v7, off scope:SCOPE_SYS
207+
; GFX12-NEXT: s_wait_storecnt 0x0
208+
; GFX12-NEXT: global_store_b32 v[4:5], v8, off scope:SCOPE_SYS
209+
; GFX12-NEXT: s_wait_storecnt 0x0
171210
; GFX12-NEXT: s_setpc_b64 s[30:31]
172211
%id.x = call i32 @llvm.amdgcn.workgroup.id.x()
173212
%id.y = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -177,11 +216,3 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(
177216
store volatile i32 %id.z, ptr addrspace(1) %outz
178217
ret void
179218
}
180-
181-
declare i32 @llvm.amdgcn.workgroup.id.x()
182-
declare i32 @llvm.amdgcn.workgroup.id.y()
183-
declare i32 @llvm.amdgcn.workgroup.id.z()
184-
declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
185-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
186-
; GFX9-GISEL: {{.*}}
187-
; GFX9-SDAG: {{.*}}

0 commit comments

Comments
 (0)