Skip to content

Commit 65ade6d

Browse files
[AMDGPU] Merge consecutive wait_alu instruction (llvm#128916)
1 parent adb44ed commit 65ade6d

File tree

4 files changed

+179
-4
lines changed

4 files changed

+179
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,46 @@ class AMDGPUWaitSGPRHazards {
164164
BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));
165165
}
166166

167+
unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
168+
unsigned Mask = 0xffff;
169+
Mask = AMDGPU::DepCtr::encodeFieldSaSdst(
170+
Mask, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(Mask1),
171+
AMDGPU::DepCtr::decodeFieldSaSdst(Mask2)));
172+
Mask = AMDGPU::DepCtr::encodeFieldVaVcc(
173+
Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(Mask1),
174+
AMDGPU::DepCtr::decodeFieldVaVcc(Mask2)));
175+
Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
176+
Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
177+
AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2)));
178+
Mask = AMDGPU::DepCtr::encodeFieldVaSdst(
179+
Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1),
180+
AMDGPU::DepCtr::decodeFieldVaSdst(Mask2)));
181+
Mask = AMDGPU::DepCtr::encodeFieldVaVdst(
182+
Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
183+
AMDGPU::DepCtr::decodeFieldVaVdst(Mask2)));
184+
Mask = AMDGPU::DepCtr::encodeFieldHoldCnt(
185+
Mask, std::min(AMDGPU::DepCtr::decodeFieldHoldCnt(Mask1),
186+
AMDGPU::DepCtr::decodeFieldHoldCnt(Mask2)));
187+
Mask = AMDGPU::DepCtr::encodeFieldVaSsrc(
188+
Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSsrc(Mask1),
189+
AMDGPU::DepCtr::decodeFieldVaSsrc(Mask2)));
190+
return Mask;
191+
}
192+
193+
bool mergeConsecutiveWaitAlus(MachineBasicBlock::instr_iterator &MI,
194+
unsigned Mask) {
195+
auto MBB = MI->getParent();
196+
if (MI == MBB->instr_begin())
197+
return false;
198+
199+
auto It = prev_nodbg(MI, MBB->instr_begin());
200+
if (It->getOpcode() != AMDGPU::S_WAITCNT_DEPCTR)
201+
return false;
202+
203+
It->getOperand(0).setImm(mergeMasks(Mask, It->getOperand(0).getImm()));
204+
return true;
205+
}
206+
167207
bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
168208
enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
169209

@@ -362,10 +402,12 @@ class AMDGPUWaitSGPRHazards {
362402
Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
363403
}
364404
if (Emit) {
365-
auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
366-
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
367-
.addImm(Mask);
368-
updateGetPCBundle(NewMI);
405+
if (!mergeConsecutiveWaitAlus(MI, Mask)) {
406+
auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
407+
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
408+
.addImm(Mask);
409+
updateGetPCBundle(NewMI);
410+
}
369411
Emitted = true;
370412
}
371413
}

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,18 @@ inline unsigned getSaSdstBitWidth() { return 1; }
164164
/// \returns SaSdst bit shift
165165
inline unsigned getSaSdstBitShift() { return 0; }
166166

167+
/// \returns VaSsrc width
168+
inline unsigned getVaSsrcBitWidth() { return 1; }
169+
170+
/// \returns VaSsrc bit shift
171+
inline unsigned getVaSsrcBitShift() { return 8; }
172+
173+
/// \returns HoldCnt bit shift
174+
inline unsigned getHoldCntWidth() { return 1; }
175+
176+
/// \returns HoldCnt bit shift
177+
inline unsigned getHoldCntBitShift() { return 7; }
178+
167179
} // end anonymous namespace
168180

169181
namespace llvm {
@@ -1740,6 +1752,14 @@ unsigned decodeFieldVaVcc(unsigned Encoded) {
17401752
return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
17411753
}
17421754

1755+
unsigned decodeFieldVaSsrc(unsigned Encoded) {
1756+
return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
1757+
}
1758+
1759+
unsigned decodeFieldHoldCnt(unsigned Encoded) {
1760+
return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth());
1761+
}
1762+
17431763
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
17441764
return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
17451765
}
@@ -1780,6 +1800,22 @@ unsigned encodeFieldVaVcc(unsigned VaVcc) {
17801800
return encodeFieldVaVcc(0xffff, VaVcc);
17811801
}
17821802

1803+
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
1804+
return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
1805+
}
1806+
1807+
unsigned encodeFieldVaSsrc(unsigned VaSsrc) {
1808+
return encodeFieldVaSsrc(0xffff, VaSsrc);
1809+
}
1810+
1811+
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) {
1812+
return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth());
1813+
}
1814+
1815+
unsigned encodeFieldHoldCnt(unsigned HoldCnt) {
1816+
return encodeFieldHoldCnt(0xffff, HoldCnt);
1817+
}
1818+
17831819
} // namespace DepCtr
17841820

17851821
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,6 +1180,12 @@ unsigned decodeFieldVaSdst(unsigned Encoded);
11801180
/// \returns Decoded VaVcc from given immediate \p Encoded.
11811181
unsigned decodeFieldVaVcc(unsigned Encoded);
11821182

1183+
/// \returns Decoded SaSrc from given immediate \p Encoded.
1184+
unsigned decodeFieldVaSsrc(unsigned Encoded);
1185+
1186+
/// \returns Decoded HoldCnt from given immediate \p Encoded.
1187+
unsigned decodeFieldHoldCnt(unsigned Encoded);
1188+
11831189
/// \returns \p VmVsrc as an encoded Depctr immediate.
11841190
unsigned encodeFieldVmVsrc(unsigned VmVsrc);
11851191

@@ -1210,6 +1216,18 @@ unsigned encodeFieldVaVcc(unsigned VaVcc);
12101216
/// \returns \p Encoded combined with encoded \p VaVcc.
12111217
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
12121218

1219+
/// \returns \p HoldCnt as an encoded Depctr immediate.
1220+
unsigned encodeFieldHoldCnt(unsigned HoldCnt);
1221+
1222+
/// \returns \p Encoded combined with encoded \p HoldCnt.
1223+
unsigned encodeFieldHoldCnt(unsigned HoldCnt, unsigned Encoded);
1224+
1225+
/// \returns \p VaSsrc as an encoded Depctr immediate.
1226+
unsigned encodeFieldVaSsrc(unsigned VaSsrc);
1227+
1228+
/// \returns \p Encoded combined with encoded \p VaSsrc.
1229+
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
1230+
12131231
} // namespace DepCtr
12141232

12151233
namespace Exp {
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass amdgpu-wait-sgpr-hazards -o - %s | FileCheck %s
3+
4+
5+
---
6+
name: merge_consecutive_wait_alus
7+
body: |
8+
bb.0:
9+
liveins: $vgpr0
10+
11+
; CHECK-LABEL: name: merge_consecutive_wait_alus
12+
; CHECK: liveins: $vgpr0
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
15+
; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
16+
; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
17+
renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
18+
S_WAITCNT_DEPCTR 65530
19+
renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
20+
...
21+
---
22+
name: merge_consecutive_wait_alus_two_bb
23+
body: |
24+
; CHECK-LABEL: name: merge_consecutive_wait_alus_two_bb
25+
; CHECK: bb.0:
26+
; CHECK-NEXT: successors: %bb.1(0x80000000)
27+
; CHECK-NEXT: liveins: $vgpr0
28+
; CHECK-NEXT: {{ $}}
29+
; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
30+
; CHECK-NEXT: S_WAITCNT_DEPCTR 65530
31+
; CHECK-NEXT: {{ $}}
32+
; CHECK-NEXT: bb.1:
33+
; CHECK-NEXT: liveins: $sgpr0
34+
; CHECK-NEXT: {{ $}}
35+
; CHECK-NEXT: S_WAITCNT_DEPCTR 61951
36+
; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
37+
bb.0:
38+
liveins: $vgpr0
39+
40+
renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
41+
S_WAITCNT_DEPCTR 65530
42+
43+
bb.1:
44+
liveins: $sgpr0
45+
46+
renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
47+
...
48+
---
49+
name: meta_instructions
50+
machineFunctionInfo:
51+
body: |
52+
bb.0:
53+
; CHECK-LABEL: name: meta_instructions
54+
; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
55+
; CHECK-NEXT: S_WAITCNT_DEPCTR 65530
56+
; CHECK-NEXT: SCHED_BARRIER 0
57+
; CHECK-NEXT: S_WAITCNT_DEPCTR 61951
58+
; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
59+
renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
60+
S_WAITCNT_DEPCTR 65530
61+
SCHED_BARRIER 0
62+
renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
63+
...
64+
---
65+
name: debug_instruction
66+
machineFunctionInfo:
67+
body: |
68+
bb.0:
69+
; CHECK-LABEL: name: debug_instruction
70+
; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
71+
; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
72+
; CHECK-NEXT: DBG_VALUE $sgpr0
73+
; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
74+
renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
75+
S_WAITCNT_DEPCTR 65530
76+
DBG_VALUE $sgpr0
77+
renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
78+
...
79+

0 commit comments

Comments
 (0)