Skip to content

Commit 64599ac

Browse files
committed
[MachineSink] Don't reject sinking because of dead def in isProfitableToSinkTo().
An instruction should be sunk (if otherwise legal and profitable) regardless of if it has a dead def of a physreg or not. Physreg defs are checked in other places and sinking is only done with dead defs of regs that are not live into the target MBB. Differential Revision: https://reviews.llvm.org/D150447 Reviewed By: sebastian-ne, arsenm
1 parent a27fd12 commit 64599ac

File tree

8 files changed

+87
-32
lines changed

8 files changed

+87
-32
lines changed

llvm/lib/CodeGen/MachineSink.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -806,12 +806,10 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
806806
continue;
807807

808808
if (Reg.isPhysical()) {
809-
if (MO.isUse() &&
810-
(MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
811-
continue;
812-
813-
// Don't handle non-constant and non-ignorable physical register.
814-
return false;
809+
// Don't handle non-constant and non-ignorable physical register uses.
810+
if (MO.isUse() && !MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
811+
return false;
812+
continue;
815813
}
816814

817815
// Users for the defs are all dominated by SuccToSinkTo.

llvm/test/CodeGen/AMDGPU/collapse-endcf.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,8 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
861861
; GCN-NEXT: .LBB5_2: ; %bb10
862862
; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1
863863
; GCN-NEXT: s_or_b64 exec, exec, s[14:15]
864+
; GCN-NEXT: s_and_b64 s[6:7], exec, s[4:5]
865+
; GCN-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13]
864866
; GCN-NEXT: s_mov_b64 s[6:7], 0
865867
; GCN-NEXT: s_andn2_b64 exec, exec, s[12:13]
866868
; GCN-NEXT: s_cbranch_execz .LBB5_7
@@ -873,12 +875,10 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
873875
; GCN-NEXT: ; %bb.4: ; %bb2
874876
; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1
875877
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
876-
; GCN-NEXT: s_and_b64 s[6:7], exec, s[4:5]
877878
; GCN-NEXT: s_mov_b32 s9, s8
878879
; GCN-NEXT: s_mov_b32 s10, s8
879880
; GCN-NEXT: s_mov_b32 s11, s8
880881
; GCN-NEXT: v_mov_b32_e32 v0, s8
881-
; GCN-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13]
882882
; GCN-NEXT: v_mov_b32_e32 v1, s9
883883
; GCN-NEXT: v_mov_b32_e32 v2, s10
884884
; GCN-NEXT: v_mov_b32_e32 v3, s11

llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,26 @@ define void @needs_and(i32 %arg) {
1010
; GCN-LABEL: needs_and:
1111
; GCN: ; %bb.0: ; %entry
1212
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13-
; GCN-NEXT: s_mov_b32 s8, 1
13+
; GCN-NEXT: s_mov_b32 s10, 1
1414
; GCN-NEXT: s_mov_b64 s[6:7], 0
1515
; GCN-NEXT: s_branch .LBB0_2
1616
; GCN-NEXT: .LBB0_1: ; %endif
1717
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
18-
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
19-
; GCN-NEXT: s_add_i32 s8, s8, 1
18+
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
19+
; GCN-NEXT: s_and_b64 s[4:5], exec, vcc
20+
; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7]
21+
; GCN-NEXT: s_add_i32 s10, s10, 1
2022
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
2123
; GCN-NEXT: s_cbranch_execz .LBB0_4
2224
; GCN-NEXT: .LBB0_2: ; %loop
2325
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
24-
; GCN-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0
25-
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s8, v0
26-
; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7]
27-
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
26+
; GCN-NEXT: v_cmp_gt_u32_e64 s[4:5], s10, v0
27+
; GCN-NEXT: v_cmp_le_u32_e32 vcc, s10, v0
28+
; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
2829
; GCN-NEXT: s_cbranch_execz .LBB0_1
2930
; GCN-NEXT: ; %bb.3: ; %then
3031
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
31-
; GCN-NEXT: s_nop 0
32+
; GCN-NEXT: s_nop 1
3233
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
3334
; GCN-NEXT: s_branch .LBB0_1
3435
; GCN-NEXT: .LBB0_4: ; %loopexit
@@ -107,13 +108,13 @@ define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
107108
; GCN-NEXT: .LBB2_1: ; %endif
108109
; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
109110
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
111+
; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
112+
; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
110113
; GCN-NEXT: s_add_i32 s10, s10, 1
111114
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
112115
; GCN-NEXT: s_cbranch_execz .LBB2_4
113116
; GCN-NEXT: .LBB2_2: ; %loop
114117
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
115-
; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
116-
; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
117118
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s10, v0
118119
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
119120
; GCN-NEXT: s_cbranch_execz .LBB2_1

llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ define amdgpu_cs void @should_not_hoist_set_inactive(<4 x i32> inreg %i14, i32 i
1313
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
1414
; GCN-NEXT: s_waitcnt_depctr 0xffe3
1515
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s8
16+
; GCN-NEXT: s_and_b32 s8, exec_lo, s6
17+
; GCN-NEXT: s_or_b32 s7, s8, s7
1618
; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s7
1719
; GCN-NEXT: s_cbranch_execz .LBB0_5
1820
; GCN-NEXT: .LBB0_2: ; %bb
1921
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
20-
; GCN-NEXT: s_and_b32 s8, exec_lo, s6
21-
; GCN-NEXT: s_or_b32 s7, s8, s7
2222
; GCN-NEXT: s_and_saveexec_b32 s8, vcc_lo
2323
; GCN-NEXT: s_cbranch_execz .LBB0_1
2424
; GCN-NEXT: ; %bb.3: ; %bb1
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z15 -O3 -run-pass=machine-sink %s -o - \
2+
# RUN: -verify-machineinstrs | FileCheck %s
3+
#
4+
# Test that the AGHIK can be sunk into %bb.4. It has a def of CC, but it is dead.
5+
6+
--- |
7+
define void @fun() { ret void }
8+
...
9+
10+
# CHECK-LABEL: bb.4:
11+
# CHECK: %1:gr64bit = nsw AGHIK %0, -4, implicit-def dead $cc
12+
# CHECK-NEXT: CGHI %1, 0, implicit-def $cc
13+
# CHECK-NEXT: BRC 14, 6, %bb.1, implicit $cc
14+
# CHECK-NEXT: J %bb.5
15+
16+
17+
---
18+
name: fun
19+
alignment: 16
20+
tracksRegLiveness: true
21+
registers:
22+
- { id: 0, class: gr64bit }
23+
- { id: 1, class: gr64bit }
24+
- { id: 2, class: grx32bit }
25+
- { id: 3, class: gr64bit }
26+
frameInfo:
27+
maxAlignment: 1
28+
machineFunctionInfo: {}
29+
body: |
30+
bb.0:
31+
32+
%2:grx32bit = LHIMux 0
33+
%3:gr64bit = IMPLICIT_DEF
34+
35+
bb.1:
36+
37+
%0:gr64bit = PHI %3, %bb.0, %1, %bb.4
38+
39+
bb.2:
40+
41+
%1:gr64bit = nsw AGHIK %0, -4, implicit-def dead $cc
42+
CHIMux %2, 0, implicit-def $cc
43+
BRC 14, 6, %bb.4, implicit $cc
44+
J %bb.3
45+
46+
bb.3:
47+
48+
bb.4:
49+
50+
CGHI %1, 0, implicit-def $cc
51+
BRC 14, 6, %bb.1, implicit $cc
52+
J %bb.5
53+
54+
bb.5:
55+
Return
56+
57+
...

llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,17 +102,17 @@ define dso_local void @foo(ptr %a0, ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5)
102102
; CHECK-NEXT: jns .LBB0_20
103103
; CHECK-NEXT: .LBB0_5: # %a50b
104104
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
105-
; CHECK-NEXT: shrl $31, %r9d
106105
; CHECK-NEXT: movl %eax, %r10d
107106
; CHECK-NEXT: orl %esi, %r10d
108107
; CHECK-NEXT: jns .LBB0_26
109108
; CHECK-NEXT: .LBB0_6: # %a57b
110109
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
111-
; CHECK-NEXT: shrl $31, %r10d
110+
; CHECK-NEXT: shrl $31, %r9d
112111
; CHECK-NEXT: testb %r9b, %r9b
113112
; CHECK-NEXT: je .LBB0_30
114113
; CHECK-NEXT: .LBB0_7: # %a66b
115114
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
115+
; CHECK-NEXT: shrl $31, %r10d
116116
; CHECK-NEXT: testb %r10b, %r10b
117117
; CHECK-NEXT: jne .LBB0_8
118118
; CHECK-NEXT: .p2align 4, 0x90

llvm/test/CodeGen/X86/dag-update-nodetomatch.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -196,10 +196,8 @@ define void @_Z2x6v() local_unnamed_addr {
196196
; CHECK-NEXT: ja .LBB1_14
197197
; CHECK-NEXT: .LBB1_7: # %vector.body.preheader
198198
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
199-
; CHECK-NEXT: leaq -4(%rcx), %r8
200-
; CHECK-NEXT: movq %r8, %r11
201-
; CHECK-NEXT: shrq $2, %r11
202-
; CHECK-NEXT: btl $2, %r8d
199+
; CHECK-NEXT: leaq -4(%rcx), %r11
200+
; CHECK-NEXT: btl $2, %r11d
203201
; CHECK-NEXT: jb .LBB1_8
204202
; CHECK-NEXT: # %bb.9: # %vector.body.prol.preheader
205203
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
@@ -208,12 +206,12 @@ define void @_Z2x6v() local_unnamed_addr {
208206
; CHECK-NEXT: movdqu %xmm0, (%r13,%rbp,8)
209207
; CHECK-NEXT: movdqu %xmm0, 16(%r13,%rbp,8)
210208
; CHECK-NEXT: movl $4, %r10d
211-
; CHECK-NEXT: testq %r11, %r11
209+
; CHECK-NEXT: shrq $2, %r11
212210
; CHECK-NEXT: jne .LBB1_11
213211
; CHECK-NEXT: jmp .LBB1_13
214212
; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1
215213
; CHECK-NEXT: xorl %r10d, %r10d
216-
; CHECK-NEXT: testq %r11, %r11
214+
; CHECK-NEXT: shrq $2, %r11
217215
; CHECK-NEXT: je .LBB1_13
218216
; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new
219217
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1

llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,11 @@ zero:
6060
; CHECK: JMP_1 %bb.4
6161
; CHECK: bb.4
6262
; CHECK: bb.5
63-
; CHECK: %4:gr64 = LEA64r %10, 1, $noreg, 8, $noreg
64-
; CHECK-LV: %3:gr64 = COPY killed %10
65-
; CHECK-LIS: %3:gr64 = COPY %10
66-
; CHECK-LV: TEST64rr killed %1, %1, implicit-def $eflags
63+
; CHECK: %3:gr64 = COPY %10
64+
; CHECK-LV: %4:gr64 = COPY killed %10
65+
; CHECK-LV: %4:gr64 = nuw ADD64ri8 %4, 8, implicit-def dead $eflags
66+
; CHECK-LIS: %4:gr64 = LEA64r %10, 1, $noreg, 8, $noreg
67+
; CHECK: TEST64rr killed %1, %1, implicit-def $eflags
6768
; CHECK: JCC_1 %bb.1, 5, implicit killed $eflags
6869
; CHECK: JMP_1 %bb.6
6970
define void @test2(i8 addrspace(1)* %this, i32 %0, i32 addrspace(1)* %p0, i8 addrspace(1)* %p1) gc "statepoint-example" personality i32* ()* @fake_personality_function {

0 commit comments

Comments
 (0)