Skip to content

Commit 5752242

Browse files
committed
merge consecutive wait_alu instructions
1 parent d6c0839 commit 5752242

File tree

2 files changed

+52
-0
lines changed

2 files changed

+52
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,21 @@ class AMDGPUWaitSGPRHazards {
164164
BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));
165165
}
166166

167+
unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
168+
unsigned Mask = Mask1 & Mask2;
169+
170+
Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
171+
Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
172+
AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2)));
173+
Mask = AMDGPU::DepCtr::encodeFieldVaSdst(
174+
Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1),
175+
AMDGPU::DepCtr::decodeFieldVaSdst(Mask2)));
176+
Mask = AMDGPU::DepCtr::encodeFieldVaVdst(
177+
Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
178+
AMDGPU::DepCtr::decodeFieldVaVdst(Mask2)));
179+
return Mask;
180+
}
181+
167182
bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
168183
enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
169184

@@ -362,6 +377,13 @@ class AMDGPUWaitSGPRHazards {
362377
Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
363378
}
364379
if (Emit) {
380+
if (MI != MI->getParent()->begin()) {
381+
MachineInstr &PrevMI = *std::prev(MI);
382+
if (PrevMI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
383+
Mask = mergeMasks(Mask, PrevMI.getOperand(0).getImm());
384+
PrevMI.eraseFromParent();
385+
}
386+
}
365387
auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
366388
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
367389
.addImm(Mask);
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass amdgpu-wait-sgpr-hazards -o - %s | FileCheck %s
3+
4+
5+
---
6+
name: merge_consecutive_wait_alus
7+
exposesReturnsTwice: false
8+
legalized: false
9+
regBankSelected: false
10+
selected: false
11+
failedISel: false
12+
tracksRegLiveness: true
13+
body: |
14+
bb.0:
15+
liveins: $vgpr0
16+
17+
; CHECK-LABEL: name: merge_consecutive_wait_alus
18+
; CHECK: liveins: $vgpr0
19+
; CHECK-NEXT: {{ $}}
20+
; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
21+
; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
22+
; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
23+
renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
24+
S_WAITCNT_DEPCTR 65530
25+
renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
26+
...
27+
28+
29+
## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
30+
# CHECK: {{.*}}

0 commit comments

Comments
 (0)