Skip to content

Commit 2171f04

Browse files
committed
[AMDGPU] Extend WorkGroupID* codegen for compute shaders
Currently, the codegen support for llvm.amdgcn.workgroup.id* intrinsics are enabled only for compute kernels. In addition, this patch enables their selection for compute shaders on subtargets that have architected SGPRs. Differential Revision: https://reviews.llvm.org/D145045
1 parent 7d4a799 commit 2171f04

File tree

3 files changed

+51
-6
lines changed

3 files changed

+51
-6
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2423,11 +2423,14 @@ SDValue SITargetLowering::LowerFormalArguments(
24232423

24242424
if (IsGraphics) {
24252425
assert(!Info->hasDispatchPtr() && !Info->hasKernargSegmentPtr() &&
2426-
(!Info->hasFlatScratchInit() || Subtarget->enableFlatScratch()) &&
2427-
!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
2428-
!Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
2429-
!Info->hasLDSKernelId() && !Info->hasWorkItemIDX() &&
2430-
!Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ());
2426+
!Info->hasWorkGroupInfo() && !Info->hasLDSKernelId() &&
2427+
!Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
2428+
!Info->hasWorkItemIDZ());
2429+
if (!Subtarget->enableFlatScratch())
2430+
assert(!Info->hasFlatScratchInit());
2431+
if (CallConv != CallingConv::AMDGPU_CS || !Subtarget->hasArchitectedSGPRs())
2432+
assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
2433+
!Info->hasWorkGroupIDZ());
24312434
}
24322435

24332436
if (CallConv == CallingConv::AMDGPU_PS) {

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
119119
else if (ST.isMesaGfxShader(F))
120120
ImplicitBufferPtr = true;
121121

122-
if (!AMDGPU::isGraphics(CC)) {
122+
if (!AMDGPU::isGraphics(CC) ||
123+
(CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) {
123124
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
124125
WorkGroupIDX = true;
125126

@@ -128,7 +129,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
128129

129130
if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
130131
WorkGroupIDZ = true;
132+
}
131133

134+
if (!AMDGPU::isGraphics(CC)) {
132135
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
133136
WorkItemIDX = true;
134137

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-SDAG %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-GISEL %s
4+
define amdgpu_cs void @_amdgpu_cs_main() {
5+
; GFX9-SDAG-LABEL: _amdgpu_cs_main:
6+
; GFX9-SDAG: ; %bb.0: ; %.entry
7+
; GFX9-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 16
8+
; GFX9-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
9+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
10+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
11+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s2
12+
; GFX9-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
13+
; GFX9-SDAG-NEXT: s_endpgm
14+
;
15+
; GFX9-GISEL-LABEL: _amdgpu_cs_main:
16+
; GFX9-GISEL: ; %bb.0: ; %.entry
17+
; GFX9-GISEL-NEXT: s_mov_b32 s0, ttmp9
18+
; GFX9-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
19+
; GFX9-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
20+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
21+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
22+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2
23+
; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
24+
; GFX9-GISEL-NEXT: s_endpgm
25+
.entry:
26+
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
27+
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
28+
%idz = call i32 @llvm.amdgcn.workgroup.id.z()
29+
%ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0
30+
%ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1
31+
%ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2
32+
call void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32> %ielemz, <4 x i32> undef, i32 0, i32 0, i32 0)
33+
ret void
34+
}
35+
36+
declare i32 @llvm.amdgcn.workgroup.id.x()
37+
declare i32 @llvm.amdgcn.workgroup.id.y()
38+
declare i32 @llvm.amdgcn.workgroup.id.z()
39+
declare void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32>, <4 x i32>, i32, i32, i32 immarg)

0 commit comments

Comments
 (0)