Skip to content

Commit 1b17ae2

Browse files
arsenmpravinjagtap
authored andcommitted
AMDGPU: Handle vcmpx+permalane gfx950 hazard (llvm#117286)
Confusingly, this is a different hazard to the one on gfx10 with a subtarget feature.
1 parent d90707c commit 1b17ae2

File tree

3 files changed

+175
-4
lines changed

3 files changed

+175
-4
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,11 @@ static bool isPermlane(const MachineInstr &MI) {
166166
Opcode == AMDGPU::V_PERMLANE64_B32 ||
167167
Opcode == AMDGPU::V_PERMLANEX16_B32_e64 ||
168168
Opcode == AMDGPU::V_PERMLANE16_VAR_B32_e64 ||
169-
Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64;
169+
Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64 ||
170+
Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e32 ||
171+
Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e64 ||
172+
Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e32 ||
173+
Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e64;
170174
}
171175

172176
static bool isLdsDma(const MachineInstr &MI) {
@@ -393,6 +397,9 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
393397
SIInstrInfo::isDS(*MI))
394398
return std::max(WaitStates, checkMAILdStHazards(MI));
395399

400+
if (ST.hasGFX950Insts() && isPermlane(*MI))
401+
return std::max(WaitStates, checkPermlaneHazards(MI));
402+
396403
return WaitStates;
397404
}
398405

@@ -1196,16 +1203,21 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
11961203
fixVALUMaskWriteHazard(MI);
11971204
}
11981205

1206+
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
1207+
const MachineInstr &MI) {
1208+
return (TII.isVOPC(MI) ||
1209+
(MI.isCompare() && (TII.isVOP3(MI) || TII.isSDWA(MI)))) &&
1210+
MI.modifiesRegister(AMDGPU::EXEC, &TRI);
1211+
}
1212+
11991213
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
12001214
if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
12011215
return false;
12021216

12031217
const SIInstrInfo *TII = ST.getInstrInfo();
12041218
const SIRegisterInfo *TRI = ST.getRegisterInfo();
12051219
auto IsHazardFn = [TII, TRI](const MachineInstr &MI) {
1206-
return (TII->isVOPC(MI) ||
1207-
((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) &&
1208-
MI.modifiesRegister(AMDGPU::EXEC, TRI);
1220+
return isVCmpXWritesExec(*TII, *TRI, MI);
12091221
};
12101222

12111223
auto IsExpiredFn = [](const MachineInstr &MI, int) {
@@ -2525,6 +2537,20 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
25252537
return WaitStatesNeeded;
25262538
}
25272539

2540+
int GCNHazardRecognizer::checkPermlaneHazards(MachineInstr *MI) {
2541+
assert(!ST.hasVcmpxPermlaneHazard() &&
2542+
"this is a different vcmpx+permlane hazard");
2543+
const SIRegisterInfo *TRI = ST.getRegisterInfo();
2544+
const SIInstrInfo *TII = ST.getInstrInfo();
2545+
2546+
auto IsVCmpXWritesExecFn = [TII, TRI](const MachineInstr &MI) {
2547+
return isVCmpXWritesExec(*TII, *TRI, MI);
2548+
};
2549+
2550+
const int NumWaitStates = 4;
2551+
return NumWaitStates - getWaitStatesSince(IsVCmpXWritesExecFn, NumWaitStates);
2552+
}
2553+
25282554
static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
25292555
// 2 pass -> 4
25302556
// 4 pass -> 6

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
129129
int checkMFMAPadding(MachineInstr *MI);
130130
int checkMAIVALUHazards(MachineInstr *MI);
131131
int checkMAILdStHazards(MachineInstr *MI);
132+
int checkPermlaneHazards(MachineInstr *MI);
132133

133134
public:
134135
GCNHazardRecognizer(const MachineFunction &MF);
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
2+
3+
---
4+
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1
5+
# GCN: V_CMPX_EQ_I32_e32
6+
# GCN-NEXT: S_NOP 3
7+
# GCN-NEXT: V_PERMLANE
8+
name: vcmpx_vopc_write_exec_permlane16_swap_vop1
9+
body: |
10+
bb.0:
11+
liveins: $vgpr0, $vgpr1
12+
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
13+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
14+
...
15+
16+
---
17+
# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop1
18+
# GCN: V_CMPX_EQ_I32_e64
19+
# GCN-NEXT: S_NOP 3
20+
# GCN-NEXT: V_PERMLANE
21+
name: vcmpx_vop3_write_exec_permlane16_swap_vop1
22+
body: |
23+
bb.0:
24+
liveins: $vgpr0, $vgpr1
25+
$exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
26+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
27+
...
28+
29+
---
30+
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop3
31+
# GCN: V_CMPX_EQ_I32_e32
32+
# GCN-NEXT: S_NOP 3
33+
# GCN-NEXT: V_PERMLANE
34+
name: vcmpx_vopc_write_exec_permlane16_swap_vop3
35+
body: |
36+
bb.0:
37+
liveins: $vgpr0, $vgpr1
38+
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
39+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
40+
...
41+
42+
---
43+
# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop3
44+
# GCN: V_CMPX_EQ_I32_e64
45+
# GCN-NEXT: S_NOP 3
46+
# GCN-NEXT: V_PERMLANE
47+
name: vcmpx_vop3_write_exec_permlane16_swap_vop3
48+
body: |
49+
bb.0:
50+
liveins: $vgpr0, $vgpr1
51+
$exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
52+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
53+
...
54+
55+
---
56+
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop1
57+
# GCN: V_CMPX_EQ_I32_e32
58+
# GCN-NEXT: S_NOP 3
59+
# GCN-NEXT: V_PERMLANE
60+
name: vcmpx_vopc_write_exec_permlane32_swap_vop1
61+
body: |
62+
bb.0:
63+
liveins: $vgpr0, $vgpr1
64+
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
65+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
66+
...
67+
68+
---
69+
# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop1
70+
# GCN: V_CMPX_EQ_I32_e64
71+
# GCN-NEXT: S_NOP 3
72+
# GCN-NEXT: V_PERMLANE
73+
name: vcmpx_vop3_write_exec_permlane32_swap_vop1
74+
body: |
75+
bb.0:
76+
liveins: $vgpr0, $vgpr1
77+
$exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
78+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
79+
...
80+
81+
---
82+
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop3
83+
# GCN: V_CMPX_EQ_I32_e32
84+
# GCN-NEXT: S_NOP 3
85+
# GCN-NEXT: V_PERMLANE
86+
name: vcmpx_vopc_write_exec_permlane32_swap_vop3
87+
body: |
88+
bb.0:
89+
liveins: $vgpr0, $vgpr1
90+
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
91+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
92+
...
93+
94+
---
95+
# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop3
96+
# GCN: V_CMPX_EQ_I32_e64
97+
# GCN-NEXT: S_NOP 3
98+
# GCN-NEXT: V_PERMLANE
99+
name: vcmpx_vop3_write_exec_permlane32_swap_vop3
100+
body: |
101+
bb.0:
102+
liveins: $vgpr0, $vgpr1
103+
$exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
104+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
105+
...
106+
107+
---
108+
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait
109+
# GCN: V_CMPX_EQ_I32_e32
110+
# GCN-NEXT: V_MOV_B32
111+
# GCN-NEXT: V_MOV_B32
112+
# GCN-NEXT: V_MOV_B32
113+
# GCN-NEXT: V_MOV_B32
114+
# GCN-NEXT: V_PERMLANE
115+
name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait
116+
body: |
117+
bb.0:
118+
liveins: $vgpr0, $vgpr1
119+
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
120+
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
121+
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
122+
$vgpr4 = V_MOV_B32_e32 0, implicit $exec
123+
$vgpr5 = V_MOV_B32_e32 0, implicit $exec
124+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
125+
...
126+
127+
---
128+
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1
129+
# GCN: V_CMPX_EQ_I32_e32
130+
# GCN-NEXT: V_MOV_B32
131+
# GCN-NEXT: V_MOV_B32
132+
# GCN-NEXT: V_MOV_B32
133+
# GCN-NEXT: S_NOP 0
134+
# GCN-NEXT: V_PERMLANE
135+
name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1
136+
body: |
137+
bb.0:
138+
liveins: $vgpr0, $vgpr1
139+
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
140+
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
141+
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
142+
$vgpr4 = V_MOV_B32_e32 0, implicit $exec
143+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
144+
...

0 commit comments

Comments
 (0)