Skip to content

Commit f898161

Browse files
authored
[AMDGPU] Fix image_msaa_load waitcnt insertion for pre-gfx12 (#90710)
#90201 made some fixes for gfx12 image_msaa_load waitcnt insertion. That fix might break in some situations for pre-gfx12 - this fixes that by explitly checking for VSAMPLE which always requires a s_wait_samplecnt and leaves the previous logic intact for non-gfx12.
1 parent 5fb1e28 commit f898161

File tree

2 files changed

+30
-6
lines changed

2 files changed

+30
-6
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -187,12 +187,12 @@ VmemType getVmemType(const MachineInstr &Inst) {
187187
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Inst.getOpcode());
188188
const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
189189
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
190-
// The test for MSAA here is because gfx12+ image_msaa_load is actually
191-
// encoded as VSAMPLE and requires the appropriate s_waitcnt variant for that.
192-
// Pre-gfx12 doesn't care since all vmem types result in the same s_waitcnt.
193-
return BaseInfo->BVH ? VMEM_BVH
194-
: BaseInfo->Sampler || BaseInfo->MSAA ? VMEM_SAMPLER
195-
: VMEM_NOSAMPLER;
190+
// We have to make an additional check for isVSAMPLE here since some
191+
// instructions don't have a sampler, but are still classified as sampler
192+
// instructions for the purposes of e.g. waitcnt.
193+
return BaseInfo->BVH ? VMEM_BVH
194+
: (BaseInfo->Sampler || SIInstrInfo::isVSAMPLE(Inst)) ? VMEM_SAMPLER
195+
: VMEM_NOSAMPLER;
196196
}
197197

198198
unsigned &getCounterRef(AMDGPU::Waitcnt &Wait, InstCounterType T) {
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX11 %s
3+
4+
---
5+
name: sample_load_msaa
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
10+
11+
; GFX11-LABEL: name: sample_load_msaa
12+
; GFX11: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
13+
; GFX11-NEXT: {{ $}}
14+
; GFX11-NEXT: S_WAITCNT 0
15+
; GFX11-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V1_gfx11 killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
16+
; GFX11-NEXT: S_WAITCNT 1015
17+
; GFX11-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_MSAA_LOAD_V4_V2_gfx11 killed renamable $vgpr4_vgpr5, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 4, 7, -1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
18+
; GFX11-NEXT: S_WAITCNT 1015
19+
; GFX11-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3
20+
renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V1_gfx11 killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
21+
renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_MSAA_LOAD_V4_V2_gfx11 killed renamable $vgpr4_vgpr5, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 4, 7, -1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
22+
SI_RETURN_TO_EPILOG killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3
23+
24+
...

0 commit comments

Comments
 (0)