Skip to content

Commit cbe6447

Browse files
committed
Unrevert "[AMDGPU] Do not ignore exec use where exec is read as data"
This patch should not have been reverted as it is required for correctness to prevent MachineLICM from hoisting some VALU compare instructions. Also revert the follow up "[AMDGPU] Allow hoisting of some VALU compare instructions" Change-Id: I14469655f7d0539ddf4a6f2940de4a8b668881cb
1 parent 26cf8aa commit cbe6447

File tree

3 files changed

+200
-31
lines changed

3 files changed

+200
-31
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,46 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(
147147
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
148148
}
149149

150+
// Returns true if the scalar result of a VALU instruction depends on exec.
151+
static bool resultDependsOnExec(const MachineInstr &MI) {
152+
// Ignore comparisons which are only used masked with exec.
153+
// This allows some hoisting/sinking of VALU comparisons.
154+
if (MI.isCompare()) {
155+
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
156+
Register DstReg = MI.getOperand(0).getReg();
157+
if (!DstReg.isVirtual())
158+
return true;
159+
for (MachineInstr &Use : MRI.use_nodbg_instructions(DstReg)) {
160+
switch (Use.getOpcode()) {
161+
case AMDGPU::S_AND_SAVEEXEC_B32:
162+
case AMDGPU::S_AND_SAVEEXEC_B64:
163+
break;
164+
case AMDGPU::S_AND_B32:
165+
case AMDGPU::S_AND_B64:
166+
if (!Use.readsRegister(AMDGPU::EXEC))
167+
return true;
168+
break;
169+
default:
170+
return true;
171+
}
172+
}
173+
return false;
174+
}
175+
176+
switch (MI.getOpcode()) {
177+
default:
178+
break;
179+
case AMDGPU::V_READFIRSTLANE_B32:
180+
return true;
181+
}
182+
183+
return false;
184+
}
185+
150186
bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const {
151187
// Any implicit use of exec by VALU is not a real register read.
152188
return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() &&
153-
isVALU(*MO.getParent());
189+
isVALU(*MO.getParent()) && !resultDependsOnExec(*MO.getParent());
154190
}
155191

156192
bool SIInstrInfo::isSafeToSink(MachineInstr &MI,
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machinelicm -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
3+
4+
---
5+
name: hoist_move
6+
tracksRegLiveness: true
7+
body: |
8+
; GCN-LABEL: name: hoist_move
9+
; GCN: bb.0:
10+
; GCN-NEXT: successors: %bb.1(0x80000000)
11+
; GCN-NEXT: {{ $}}
12+
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
13+
; GCN-NEXT: S_BRANCH %bb.1
14+
; GCN-NEXT: {{ $}}
15+
; GCN-NEXT: bb.1:
16+
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
17+
; GCN-NEXT: {{ $}}
18+
; GCN-NEXT: $exec = S_OR_B64 $exec, 1, implicit-def $scc
19+
; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
20+
; GCN-NEXT: S_BRANCH %bb.2
21+
; GCN-NEXT: {{ $}}
22+
; GCN-NEXT: bb.2:
23+
; GCN-NEXT: S_ENDPGM 0
24+
bb.0:
25+
S_BRANCH %bb.1
26+
27+
bb.1:
28+
%0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
29+
$exec = S_OR_B64 $exec, 1, implicit-def $scc
30+
S_CBRANCH_EXECNZ %bb.1, implicit $exec
31+
S_BRANCH %bb.2
32+
33+
bb.2:
34+
S_ENDPGM 0
35+
...
36+
---
37+
name: no_hoist_cmp
38+
tracksRegLiveness: true
39+
body: |
40+
; GCN-LABEL: name: no_hoist_cmp
41+
; GCN: bb.0:
42+
; GCN-NEXT: successors: %bb.1(0x80000000)
43+
; GCN-NEXT: {{ $}}
44+
; GCN-NEXT: S_BRANCH %bb.1
45+
; GCN-NEXT: {{ $}}
46+
; GCN-NEXT: bb.1:
47+
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
48+
; GCN-NEXT: {{ $}}
49+
; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
50+
; GCN-NEXT: $exec = S_OR_B64 $exec, [[V_CMP_EQ_U32_e64_]], implicit-def $scc
51+
; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
52+
; GCN-NEXT: S_BRANCH %bb.2
53+
; GCN-NEXT: {{ $}}
54+
; GCN-NEXT: bb.2:
55+
; GCN-NEXT: S_ENDPGM 0
56+
bb.0:
57+
S_BRANCH %bb.1
58+
59+
bb.1:
60+
%0:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
61+
$exec = S_OR_B64 $exec, %0:sreg_64, implicit-def $scc
62+
S_CBRANCH_EXECNZ %bb.1, implicit $exec
63+
S_BRANCH %bb.2
64+
65+
bb.2:
66+
S_ENDPGM 0
67+
...
68+
---
69+
name: allowable_hoist_cmp
70+
tracksRegLiveness: true
71+
body: |
72+
; GCN-LABEL: name: allowable_hoist_cmp
73+
; GCN: bb.0:
74+
; GCN-NEXT: successors: %bb.1(0x80000000)
75+
; GCN-NEXT: {{ $}}
76+
; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
77+
; GCN-NEXT: S_BRANCH %bb.1
78+
; GCN-NEXT: {{ $}}
79+
; GCN-NEXT: bb.1:
80+
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
81+
; GCN-NEXT: {{ $}}
82+
; GCN-NEXT: $exec = S_AND_B64 $exec, [[V_CMP_EQ_U32_e64_]], implicit-def $scc
83+
; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
84+
; GCN-NEXT: S_BRANCH %bb.2
85+
; GCN-NEXT: {{ $}}
86+
; GCN-NEXT: bb.2:
87+
; GCN-NEXT: S_ENDPGM 0
88+
bb.0:
89+
S_BRANCH %bb.1
90+
91+
bb.1:
92+
%0:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
93+
$exec = S_AND_B64 $exec, %0:sreg_64, implicit-def $scc
94+
S_CBRANCH_EXECNZ %bb.1, implicit $exec
95+
S_BRANCH %bb.2
96+
97+
bb.2:
98+
S_ENDPGM 0
99+
...
100+
---
101+
name: no_hoist_readfirstlane
102+
tracksRegLiveness: true
103+
body: |
104+
; GCN-LABEL: name: no_hoist_readfirstlane
105+
; GCN: bb.0:
106+
; GCN-NEXT: successors: %bb.1(0x80000000)
107+
; GCN-NEXT: {{ $}}
108+
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
109+
; GCN-NEXT: S_BRANCH %bb.1
110+
; GCN-NEXT: {{ $}}
111+
; GCN-NEXT: bb.1:
112+
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
113+
; GCN-NEXT: {{ $}}
114+
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[DEF]], implicit $exec
115+
; GCN-NEXT: $exec = S_OR_B64 $exec, 1, implicit-def $scc
116+
; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
117+
; GCN-NEXT: S_BRANCH %bb.2
118+
; GCN-NEXT: {{ $}}
119+
; GCN-NEXT: bb.2:
120+
; GCN-NEXT: S_ENDPGM 0
121+
bb.0:
122+
%0:vgpr_32 = IMPLICIT_DEF
123+
S_BRANCH %bb.1
124+
125+
bb.1:
126+
%1:sgpr_32 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec
127+
$exec = S_OR_B64 $exec, 1, implicit-def $scc
128+
S_CBRANCH_EXECNZ %bb.1, implicit $exec
129+
S_BRANCH %bb.2
130+
131+
bb.2:
132+
S_ENDPGM 0
133+
...

llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -110,81 +110,81 @@ define void @my_func(i32 %0) {
110110
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111111
; GCN-NEXT: s_mov_b64 s[4:5], 0
112112
; GCN-NEXT: s_load_dword s10, s[4:5], 0x0
113-
; GCN-NEXT: s_mov_b64 s[6:7], -1
113+
; GCN-NEXT: s_mov_b64 s[8:9], -1
114114
; GCN-NEXT: s_waitcnt lgkmcnt(0)
115115
; GCN-NEXT: s_cmp_lt_i32 s10, 1
116-
; GCN-NEXT: s_mov_b64 s[8:9], 0
116+
; GCN-NEXT: s_mov_b64 s[6:7], 0
117117
; GCN-NEXT: s_cbranch_scc1 .LBB0_7
118118
; GCN-NEXT: ; %bb.1: ; %LeafBlock1
119119
; GCN-NEXT: s_cmp_lg_u32 s10, 1
120-
; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0
120+
; GCN-NEXT: s_cselect_b64 s[6:7], -1, 0
121121
; GCN-NEXT: s_mov_b64 vcc, exec
122122
; GCN-NEXT: s_cbranch_execz .LBB0_8
123123
; GCN-NEXT: .LBB0_2: ; %Flow11
124-
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
124+
; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
125125
; GCN-NEXT: .LBB0_3: ; %do.body
126126
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
127127
; GCN-NEXT: .LBB0_4: ; %Flow17
128-
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
128+
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
129129
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
130130
; GCN-NEXT: ; %bb.5: ; %UnifiedUnreachableBlock
131131
; GCN-NEXT: ; divergent unreachable
132132
; GCN-NEXT: ; %bb.6: ; %UnifiedReturnBlock
133133
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
134134
; GCN-NEXT: s_setpc_b64 s[30:31]
135135
; GCN-NEXT: .LBB0_7: ; %Flow
136-
; GCN-NEXT: s_andn2_b64 vcc, exec, s[6:7]
136+
; GCN-NEXT: s_andn2_b64 vcc, exec, s[8:9]
137137
; GCN-NEXT: s_cbranch_vccnz .LBB0_2
138138
; GCN-NEXT: .LBB0_8: ; %LeafBlock
139139
; GCN-NEXT: s_cmp_eq_u32 s10, 0
140140
; GCN-NEXT: s_cbranch_scc1 .LBB0_10
141141
; GCN-NEXT: ; %bb.9:
142-
; GCN-NEXT: s_mov_b64 s[8:9], -1
143-
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
142+
; GCN-NEXT: s_mov_b64 s[6:7], -1
143+
; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
144144
; GCN-NEXT: s_cbranch_execnz .LBB0_3
145145
; GCN-NEXT: s_branch .LBB0_4
146146
; GCN-NEXT: .LBB0_10: ; %NodeBlock7
147147
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0
148-
; GCN-NEXT: s_mov_b64 s[10:11], 0
149148
; GCN-NEXT: s_mov_b64 s[8:9], 0
149+
; GCN-NEXT: s_mov_b64 s[6:7], 0
150150
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
151-
; GCN-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
151+
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
152152
; GCN-NEXT: ; %bb.11: ; %LeafBlock5
153-
; GCN-NEXT: s_mov_b64 s[8:9], exec
154-
; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 2, v0
155-
; GCN-NEXT: s_and_b64 s[10:11], s[4:5], exec
156-
; GCN-NEXT: ; implicit-def: $vgpr0
153+
; GCN-NEXT: s_mov_b64 s[6:7], exec
154+
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
155+
; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
157156
; GCN-NEXT: ; %bb.12: ; %Flow13
158-
; GCN-NEXT: s_andn2_saveexec_b64 s[12:13], s[6:7]
157+
; GCN-NEXT: s_andn2_saveexec_b64 s[10:11], s[4:5]
159158
; GCN-NEXT: ; %bb.13: ; %LeafBlock3
160-
; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
161-
; GCN-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v0
159+
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
160+
; GCN-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
161+
; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
162162
; GCN-NEXT: s_andn2_b64 s[8:9], s[8:9], exec
163-
; GCN-NEXT: s_andn2_b64 s[10:11], s[10:11], exec
164-
; GCN-NEXT: s_and_b64 s[6:7], s[6:7], exec
165163
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec
166-
; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[6:7]
167-
; GCN-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5]
164+
; GCN-NEXT: s_and_b64 s[12:13], vcc, exec
165+
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[4:5]
166+
; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
168167
; GCN-NEXT: ; %bb.14: ; %Flow14
169-
; GCN-NEXT: s_or_b64 exec, exec, s[12:13]
168+
; GCN-NEXT: s_or_b64 exec, exec, s[10:11]
170169
; GCN-NEXT: s_mov_b64 s[4:5], 0
171-
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[10:11]
170+
; GCN-NEXT: s_and_saveexec_b64 s[10:11], s[8:9]
172171
; GCN-NEXT: s_cbranch_execz .LBB0_18
173172
; GCN-NEXT: ; %bb.15: ; %LeafBlock9
174-
; GCN-NEXT: s_mov_b64 s[10:11], -1
173+
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0
174+
; GCN-NEXT: s_mov_b64 s[8:9], -1
175175
; GCN-NEXT: s_and_saveexec_b64 s[12:13], vcc
176176
; GCN-NEXT: ; %bb.16: ; %do.body.i.i.i.i
177177
; GCN-NEXT: s_mov_b64 s[4:5], exec
178-
; GCN-NEXT: s_xor_b64 s[10:11], exec, -1
178+
; GCN-NEXT: s_xor_b64 s[8:9], exec, -1
179179
; GCN-NEXT: ; %bb.17: ; %Flow16
180180
; GCN-NEXT: s_or_b64 exec, exec, s[12:13]
181181
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec
182-
; GCN-NEXT: s_andn2_b64 s[8:9], s[8:9], exec
183-
; GCN-NEXT: s_and_b64 s[10:11], s[10:11], exec
184-
; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11]
182+
; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
183+
; GCN-NEXT: s_and_b64 s[8:9], s[8:9], exec
184+
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
185185
; GCN-NEXT: .LBB0_18: ; %Flow15
186-
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
187-
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
186+
; GCN-NEXT: s_or_b64 exec, exec, s[10:11]
187+
; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
188188
; GCN-NEXT: s_cbranch_execnz .LBB0_3
189189
; GCN-NEXT: s_branch .LBB0_4
190190
entry:

0 commit comments

Comments
 (0)