Skip to content

[AMDGPU] Update EXECZ retention in SIPreEmitPeephole for GFX10/12 #97676

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,29 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
}
}

bool isWaitcnt(unsigned Opcode) const {
switch (getNonSoftWaitcntOpcode(Opcode)) {
case AMDGPU::S_WAITCNT:
case AMDGPU::S_WAITCNT_VSCNT:
case AMDGPU::S_WAITCNT_VMCNT:
case AMDGPU::S_WAITCNT_EXPCNT:
case AMDGPU::S_WAITCNT_LGKMCNT:
case AMDGPU::S_WAIT_LOADCNT:
case AMDGPU::S_WAIT_LOADCNT_DSCNT:
case AMDGPU::S_WAIT_STORECNT:
case AMDGPU::S_WAIT_STORECNT_DSCNT:
case AMDGPU::S_WAIT_SAMPLECNT:
case AMDGPU::S_WAIT_BVHCNT:
case AMDGPU::S_WAIT_EXPCNT:
case AMDGPU::S_WAIT_DSCNT:
case AMDGPU::S_WAIT_KMCNT:
case AMDGPU::S_WAIT_IDLE:
return true;
default:
return false;
}
}

bool isVGPRCopy(const MachineInstr &MI) const {
assert(isCopyInstr(MI));
Register Dest = MI.getOperand(0).getReg();
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(

// These instructions are potentially expensive even if EXEC = 0.
if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
TII->isDS(MI) || MI.getOpcode() == AMDGPU::S_WAITCNT)
TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode()))
return true;

++NumInstr;
Expand Down
186 changes: 186 additions & 0 deletions llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s

---
name: skip_waitcnt_vscnt
body: |
; CHECK-LABEL: name: skip_waitcnt_vscnt
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: V_NOP_e32 implicit $exec
; CHECK-NEXT: S_WAITCNT_VSCNT $sgpr_null, 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
S_CBRANCH_EXECZ %bb.2, implicit $exec

bb.1:
successors: %bb.2
V_NOP_e32 implicit $exec
S_WAITCNT_VSCNT $sgpr_null, 0

bb.2:
S_ENDPGM 0
...

---
name: skip_waitcnt_expcnt
body: |
; CHECK-LABEL: name: skip_waitcnt_expcnt
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: V_NOP_e32 implicit $exec
; CHECK-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
S_CBRANCH_EXECZ %bb.2, implicit $exec

bb.1:
successors: %bb.2
V_NOP_e32 implicit $exec
S_WAITCNT_EXPCNT $sgpr_null, 0

bb.2:
S_ENDPGM 0
...

---
name: skip_waitcnt_vmcnt
body: |
; CHECK-LABEL: name: skip_waitcnt_vmcnt
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: V_NOP_e32 implicit $exec
; CHECK-NEXT: S_WAITCNT_VMCNT $sgpr_null, 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
S_CBRANCH_EXECZ %bb.2, implicit $exec

bb.1:
successors: %bb.2
V_NOP_e32 implicit $exec
S_WAITCNT_VMCNT $sgpr_null, 0

bb.2:
S_ENDPGM 0
...

---
name: skip_waitcnt_lgkmcnt
body: |
; CHECK-LABEL: name: skip_waitcnt_lgkmcnt
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: V_NOP_e32 implicit $exec
; CHECK-NEXT: S_WAITCNT_LGKMCNT $sgpr_null, 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
S_CBRANCH_EXECZ %bb.2, implicit $exec

bb.1:
successors: %bb.2
V_NOP_e32 implicit $exec
S_WAITCNT_LGKMCNT $sgpr_null, 0

bb.2:
S_ENDPGM 0
...

---
name: skip_wait_idle
body: |
; CHECK-LABEL: name: skip_wait_idle
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: V_NOP_e32 implicit $exec
; CHECK-NEXT: S_WAIT_IDLE
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
S_CBRANCH_EXECZ %bb.2, implicit $exec

bb.1:
successors: %bb.2
V_NOP_e32 implicit $exec
S_WAIT_IDLE

bb.2:
S_ENDPGM 0
...

---
name: skip_bvh
body: |
; CHECK-LABEL: name: skip_bvh
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: V_NOP_e32 implicit $exec
; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14 = IMPLICIT_DEF
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx11 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
S_CBRANCH_EXECZ %bb.2, implicit $exec

bb.1:
successors: %bb.2
V_NOP_e32 implicit $exec
$vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14 = IMPLICIT_DEF
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx11 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)

bb.2:
S_ENDPGM 0
...
Loading
Loading