Skip to content

Commit d7ee99a

Browse files
[MachineSink] Clear kill flags of sunk addressing mode registers (#75072)
When doing sink-and-fold, the MachineSink clears the "killed" flags of the operands of the sunk (and deleted) instruction. However, this is not always sufficient. In some cases we can create the new load/store instruction with operands other than the ones present in the deleted instruction. One such example is folding a zero word extend into a memory load on AArch64. The zero-extend is represented by a pair of instructions - `MOV` (i.e. `ORRwrs`) followed by a `SUBREG_TO_REG`. The `SUBREG_TO_REG` is deleted (it is the sunk instruction), but the new load instruction mentions operands "killed" in the `MOV`, which is no longer correct. To fix this, clear the "killed" flags of the registers participating in the addressing mode.
1 parent 96ab8ef commit d7ee99a

File tree

2 files changed

+209
-5
lines changed

2 files changed

+209
-5
lines changed

llvm/lib/CodeGen/MachineSink.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -500,11 +500,6 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
500500
return false;
501501

502502
// Now we know we can fold the instruction in all its users.
503-
if (UsedRegA)
504-
MRI->clearKillFlags(UsedRegA);
505-
if (UsedRegB)
506-
MRI->clearKillFlags(UsedRegB);
507-
508503
for (auto &[SinkDst, MaybeAM] : SinkInto) {
509504
MachineInstr *New = nullptr;
510505
LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into";
@@ -527,9 +522,25 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
527522
New = &*std::prev(InsertPt);
528523
if (!New->getDebugLoc())
529524
New->setDebugLoc(SinkDst->getDebugLoc());
525+
526+
// The operand registers of the "sunk" instruction have their live range
527+
// extended and their kill flags may no longer be correct. Conservatively
528+
// clear the kill flags.
529+
if (UsedRegA)
530+
MRI->clearKillFlags(UsedRegA);
531+
if (UsedRegB)
532+
MRI->clearKillFlags(UsedRegB);
530533
} else {
531534
// Fold instruction into the addressing mode of a memory instruction.
532535
New = TII->emitLdStWithAddr(*SinkDst, MaybeAM);
536+
537+
// The registers of the addressing mode may have their live range extended
538+
// and their kill flags may no longer be correct. Conservatively clear the
539+
// kill flags.
540+
if (Register R = MaybeAM.BaseReg; R.isValid() && R.isVirtual())
541+
MRI->clearKillFlags(R);
542+
if (Register R = MaybeAM.ScaledReg; R.isValid() && R.isVirtual())
543+
MRI->clearKillFlags(R);
533544
}
534545
LLVM_DEBUG(dbgs() << "yielding"; New->dump());
535546
// Clear the StoreInstrCache, since we may invalidate it by erasing.
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2+
# RUN: llc --run-pass=machine-sink %s -o - | FileCheck %s
3+
4+
# Test that the "killed" flags are cleared in the ORRWrs and SUBSWrr instructions
5+
# in 'f and @g, respectively
6+
7+
--- |
8+
source_filename = "crash.ll"
9+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
10+
target triple = "aarch64-linux"
11+
12+
define i32 @f(ptr %image, i32 %i) {
13+
entry:
14+
%add = add i32 %i, 1
15+
%idx = zext i32 %add to i64
16+
br label %A
17+
18+
A: ; preds = %B, %A, %entry
19+
%sunkaddr = getelementptr i8, ptr %image, i64 %idx
20+
%0 = load i8, ptr %sunkaddr, align 1
21+
%cmp153 = icmp eq i8 %0, 0
22+
br i1 %cmp153, label %B, label %A
23+
24+
B: ; preds = %A
25+
store i32 0, ptr %image, align 1
26+
br label %A
27+
}
28+
29+
define i32 @g(i32 %i, i32 %j) {
30+
entry:
31+
%add = add i32 %i, %j
32+
%neg = sub i32 0, %i
33+
br label %A
34+
35+
A: ; preds = %B, %A, %entry
36+
%0 = call i8 @h(i32 %add)
37+
%c = icmp eq i8 %0, 0
38+
br i1 %c, label %B, label %A
39+
40+
B: ; preds = %A
41+
%1 = call i8 @h(i32 %neg)
42+
br label %A
43+
}
44+
45+
declare i8 @h(i32)
46+
47+
...
48+
---
49+
name: f
50+
alignment: 4
51+
tracksRegLiveness: true
52+
registers:
53+
- { id: 0, class: gpr64, preferred-register: '' }
54+
- { id: 1, class: gpr64common, preferred-register: '' }
55+
- { id: 2, class: gpr32common, preferred-register: '' }
56+
- { id: 3, class: gpr32common, preferred-register: '' }
57+
- { id: 4, class: gpr32, preferred-register: '' }
58+
- { id: 5, class: gpr32, preferred-register: '' }
59+
- { id: 6, class: gpr32, preferred-register: '' }
60+
liveins:
61+
- { reg: '$x0', virtual-reg: '%1' }
62+
- { reg: '$w1', virtual-reg: '%2' }
63+
body: |
64+
; CHECK-LABEL: name: f
65+
; CHECK: bb.0.entry:
66+
; CHECK-NEXT: successors: %bb.1(0x80000000)
67+
; CHECK-NEXT: liveins: $x0, $w1
68+
; CHECK-NEXT: {{ $}}
69+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w1
70+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
71+
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 1, 0
72+
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ADDWri]], 0
73+
; CHECK-NEXT: {{ $}}
74+
; CHECK-NEXT: bb.1.A:
75+
; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.1(0x50000000)
76+
; CHECK-NEXT: {{ $}}
77+
; CHECK-NEXT: [[LDRBBroW:%[0-9]+]]:gpr32 = LDRBBroW [[COPY1]], [[ADDWri]], 0, 0 :: (load (s8) from %ir.sunkaddr)
78+
; CHECK-NEXT: CBNZW killed [[LDRBBroW]], %bb.1
79+
; CHECK-NEXT: B %bb.2
80+
; CHECK-NEXT: {{ $}}
81+
; CHECK-NEXT: bb.2.B:
82+
; CHECK-NEXT: successors: %bb.1(0x80000000)
83+
; CHECK-NEXT: {{ $}}
84+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
85+
; CHECK-NEXT: STRWui [[COPY2]], [[COPY1]], 0 :: (store (s32) into %ir.image, align 1)
86+
; CHECK-NEXT: B %bb.1
87+
bb.0.entry:
88+
successors: %bb.1(0x80000000)
89+
liveins: $x0, $w1
90+
91+
%2:gpr32common = COPY $w1
92+
%1:gpr64common = COPY $x0
93+
%3:gpr32common = ADDWri %2, 1, 0
94+
%4:gpr32 = ORRWrs $wzr, killed %3, 0
95+
%0:gpr64 = SUBREG_TO_REG 0, killed %4, %subreg.sub_32
96+
97+
bb.1.A:
98+
successors: %bb.2(0x30000000), %bb.1(0x50000000)
99+
100+
%5:gpr32 = LDRBBroX %1, %0, 0, 0 :: (load (s8) from %ir.sunkaddr)
101+
CBNZW killed %5, %bb.1
102+
B %bb.2
103+
104+
bb.2.B:
105+
successors: %bb.1(0x80000000)
106+
107+
%6:gpr32 = COPY $wzr
108+
STRWui %6, %1, 0 :: (store (s32) into %ir.image, align 1)
109+
B %bb.1
110+
...
111+
---
112+
name: g
113+
alignment: 4
114+
registers:
115+
- { id: 0, class: gpr32all, preferred-register: '' }
116+
- { id: 1, class: gpr32all, preferred-register: '' }
117+
- { id: 2, class: gpr32, preferred-register: '' }
118+
- { id: 3, class: gpr32, preferred-register: '' }
119+
- { id: 4, class: gpr32, preferred-register: '' }
120+
- { id: 5, class: gpr32, preferred-register: '' }
121+
- { id: 6, class: gpr32, preferred-register: '' }
122+
- { id: 7, class: gpr32, preferred-register: '' }
123+
- { id: 8, class: gpr32common, preferred-register: '' }
124+
- { id: 9, class: gpr32all, preferred-register: '' }
125+
liveins:
126+
- { reg: '$w0', virtual-reg: '%2' }
127+
- { reg: '$w1', virtual-reg: '%3' }
128+
body: |
129+
; CHECK-LABEL: name: g
130+
; CHECK: bb.0.entry:
131+
; CHECK-NEXT: successors: %bb.1(0x80000000)
132+
; CHECK-NEXT: liveins: $w0, $w1
133+
; CHECK-NEXT: {{ $}}
134+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
135+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
136+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
137+
; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY2]], [[COPY1]], implicit-def dead $nzcv
138+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32all = COPY [[SUBSWrr]]
139+
; CHECK-NEXT: {{ $}}
140+
; CHECK-NEXT: bb.1.A:
141+
; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.1(0x50000000)
142+
; CHECK-NEXT: {{ $}}
143+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
144+
; CHECK-NEXT: $w0 = ADDWrr [[COPY1]], [[COPY]]
145+
; CHECK-NEXT: BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
146+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
147+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $w0
148+
; CHECK-NEXT: $wzr = ANDSWri [[COPY4]], 7, implicit-def $nzcv
149+
; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
150+
; CHECK-NEXT: B %bb.2
151+
; CHECK-NEXT: {{ $}}
152+
; CHECK-NEXT: bb.2.B:
153+
; CHECK-NEXT: successors: %bb.1(0x80000000)
154+
; CHECK-NEXT: {{ $}}
155+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
156+
; CHECK-NEXT: $w0 = COPY [[COPY3]]
157+
; CHECK-NEXT: BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
158+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
159+
; CHECK-NEXT: B %bb.1
160+
bb.0.entry:
161+
successors: %bb.1(0x80000000)
162+
liveins: $w0, $w1
163+
164+
%3:gpr32 = COPY $w1
165+
%2:gpr32 = COPY $w0
166+
%4:gpr32 = ADDWrr %2, killed %3
167+
%0:gpr32all = COPY %4
168+
%5:gpr32 = COPY $wzr
169+
%6:gpr32 = SUBSWrr %5, killed %2, implicit-def dead $nzcv
170+
%1:gpr32all = COPY %6
171+
172+
bb.1.A:
173+
successors: %bb.2(0x30000000), %bb.1(0x50000000)
174+
175+
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
176+
$w0 = COPY %0
177+
BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
178+
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
179+
%7:gpr32 = COPY $w0
180+
$wzr = ANDSWri %7, 7, implicit-def $nzcv
181+
Bcc 1, %bb.1, implicit $nzcv
182+
B %bb.2
183+
184+
bb.2.B:
185+
successors: %bb.1(0x80000000)
186+
187+
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
188+
$w0 = COPY %1
189+
BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
190+
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
191+
B %bb.1
192+
193+
...

0 commit comments

Comments
 (0)