Skip to content

Commit c3fe5ad

Browse files
authored
AMDGPU: Handle vcmpx+permalane gfx950 hazard (#117286)
Confusingly, this is a different hazard to the one on gfx10 with a subtarget feature.
1 parent 3db4f5b commit c3fe5ad

File tree

3 files changed

+175
-4
lines changed

3 files changed

+175
-4
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,11 @@ static bool isPermlane(const MachineInstr &MI) {
168168
Opcode == AMDGPU::V_PERMLANE64_B32 ||
169169
Opcode == AMDGPU::V_PERMLANEX16_B32_e64 ||
170170
Opcode == AMDGPU::V_PERMLANE16_VAR_B32_e64 ||
171-
Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64;
171+
Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64 ||
172+
Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e32 ||
173+
Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e64 ||
174+
Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e32 ||
175+
Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e64;
172176
}
173177

174178
static bool isLdsDma(const MachineInstr &MI) {
@@ -395,6 +399,9 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
395399
SIInstrInfo::isDS(*MI))
396400
return std::max(WaitStates, checkMAILdStHazards(MI));
397401

402+
if (ST.hasGFX950Insts() && isPermlane(*MI))
403+
return std::max(WaitStates, checkPermlaneHazards(MI));
404+
398405
return WaitStates;
399406
}
400407

@@ -1200,16 +1207,21 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
12001207
fixRequiredExportPriority(MI);
12011208
}
12021209

1210+
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
1211+
const MachineInstr &MI) {
1212+
return (TII.isVOPC(MI) ||
1213+
(MI.isCompare() && (TII.isVOP3(MI) || TII.isSDWA(MI)))) &&
1214+
MI.modifiesRegister(AMDGPU::EXEC, &TRI);
1215+
}
1216+
12031217
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
12041218
if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
12051219
return false;
12061220

12071221
const SIInstrInfo *TII = ST.getInstrInfo();
12081222
const SIRegisterInfo *TRI = ST.getRegisterInfo();
12091223
auto IsHazardFn = [TII, TRI](const MachineInstr &MI) {
1210-
return (TII->isVOPC(MI) ||
1211-
((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) &&
1212-
MI.modifiesRegister(AMDGPU::EXEC, TRI);
1224+
return isVCmpXWritesExec(*TII, *TRI, MI);
12131225
};
12141226

12151227
auto IsExpiredFn = [](const MachineInstr &MI, int) {
@@ -2529,6 +2541,20 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
25292541
return WaitStatesNeeded;
25302542
}
25312543

2544+
int GCNHazardRecognizer::checkPermlaneHazards(MachineInstr *MI) {
2545+
assert(!ST.hasVcmpxPermlaneHazard() &&
2546+
"this is a different vcmpx+permlane hazard");
2547+
const SIRegisterInfo *TRI = ST.getRegisterInfo();
2548+
const SIInstrInfo *TII = ST.getInstrInfo();
2549+
2550+
auto IsVCmpXWritesExecFn = [TII, TRI](const MachineInstr &MI) {
2551+
return isVCmpXWritesExec(*TII, *TRI, MI);
2552+
};
2553+
2554+
const int NumWaitStates = 4;
2555+
return NumWaitStates - getWaitStatesSince(IsVCmpXWritesExecFn, NumWaitStates);
2556+
}
2557+
25322558
static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
25332559
// 2 pass -> 4
25342560
// 4 pass -> 6

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
134134
int checkMFMAPadding(MachineInstr *MI);
135135
int checkMAIVALUHazards(MachineInstr *MI);
136136
int checkMAILdStHazards(MachineInstr *MI);
137+
int checkPermlaneHazards(MachineInstr *MI);
137138

138139
public:
139140
GCNHazardRecognizer(const MachineFunction &MF);
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
2+
3+
---
4+
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1
5+
# GCN: V_CMPX_EQ_I32_e32
6+
# GCN-NEXT: S_NOP 3
7+
# GCN-NEXT: V_PERMLANE
8+
name: vcmpx_vopc_write_exec_permlane16_swap_vop1
9+
body: |
10+
bb.0:
11+
liveins: $vgpr0, $vgpr1
12+
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
13+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
14+
...
15+
16+
---
17+
# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop1
18+
# GCN: V_CMPX_EQ_I32_e64
19+
# GCN-NEXT: S_NOP 3
20+
# GCN-NEXT: V_PERMLANE
21+
name: vcmpx_vop3_write_exec_permlane16_swap_vop1
22+
body: |
23+
bb.0:
24+
liveins: $vgpr0, $vgpr1
25+
$exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
26+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
27+
...
28+
29+
---
30+
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop3
31+
# GCN: V_CMPX_EQ_I32_e32
32+
# GCN-NEXT: S_NOP 3
33+
# GCN-NEXT: V_PERMLANE
34+
name: vcmpx_vopc_write_exec_permlane16_swap_vop3
35+
body: |
36+
bb.0:
37+
liveins: $vgpr0, $vgpr1
38+
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
39+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
40+
...
41+
42+
---
43+
# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop3
44+
# GCN: V_CMPX_EQ_I32_e64
45+
# GCN-NEXT: S_NOP 3
46+
# GCN-NEXT: V_PERMLANE
47+
name: vcmpx_vop3_write_exec_permlane16_swap_vop3
48+
body: |
49+
bb.0:
50+
liveins: $vgpr0, $vgpr1
51+
$exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
52+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
53+
...
54+
55+
---
56+
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop1
57+
# GCN: V_CMPX_EQ_I32_e32
58+
# GCN-NEXT: S_NOP 3
59+
# GCN-NEXT: V_PERMLANE
60+
name: vcmpx_vopc_write_exec_permlane32_swap_vop1
61+
body: |
62+
bb.0:
63+
liveins: $vgpr0, $vgpr1
64+
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
65+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
66+
...
67+
68+
---
69+
# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop1
70+
# GCN: V_CMPX_EQ_I32_e64
71+
# GCN-NEXT: S_NOP 3
72+
# GCN-NEXT: V_PERMLANE
73+
name: vcmpx_vop3_write_exec_permlane32_swap_vop1
74+
body: |
75+
bb.0:
76+
liveins: $vgpr0, $vgpr1
77+
$exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
78+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
79+
...
80+
81+
---
82+
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop3
83+
# GCN: V_CMPX_EQ_I32_e32
84+
# GCN-NEXT: S_NOP 3
85+
# GCN-NEXT: V_PERMLANE
86+
name: vcmpx_vopc_write_exec_permlane32_swap_vop3
87+
body: |
88+
bb.0:
89+
liveins: $vgpr0, $vgpr1
90+
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
91+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
92+
...
93+
94+
---
95+
# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop3
96+
# GCN: V_CMPX_EQ_I32_e64
97+
# GCN-NEXT: S_NOP 3
98+
# GCN-NEXT: V_PERMLANE
99+
name: vcmpx_vop3_write_exec_permlane32_swap_vop3
100+
body: |
101+
bb.0:
102+
liveins: $vgpr0, $vgpr1
103+
$exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
104+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
105+
...
106+
107+
---
108+
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait
109+
# GCN: V_CMPX_EQ_I32_e32
110+
# GCN-NEXT: V_MOV_B32
111+
# GCN-NEXT: V_MOV_B32
112+
# GCN-NEXT: V_MOV_B32
113+
# GCN-NEXT: V_MOV_B32
114+
# GCN-NEXT: V_PERMLANE
115+
name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait
116+
body: |
117+
bb.0:
118+
liveins: $vgpr0, $vgpr1
119+
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
120+
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
121+
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
122+
$vgpr4 = V_MOV_B32_e32 0, implicit $exec
123+
$vgpr5 = V_MOV_B32_e32 0, implicit $exec
124+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
125+
...
126+
127+
---
128+
# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1
129+
# GCN: V_CMPX_EQ_I32_e32
130+
# GCN-NEXT: V_MOV_B32
131+
# GCN-NEXT: V_MOV_B32
132+
# GCN-NEXT: V_MOV_B32
133+
# GCN-NEXT: S_NOP 0
134+
# GCN-NEXT: V_PERMLANE
135+
name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1
136+
body: |
137+
bb.0:
138+
liveins: $vgpr0, $vgpr1
139+
V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
140+
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
141+
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
142+
$vgpr4 = V_MOV_B32_e32 0, implicit $exec
143+
renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
144+
...

0 commit comments

Comments
 (0)