Skip to content

Commit 31f02ac

Browse files
committed
[ARM] Use mov operand if the mov cannot be moved while tail predicating
There are some cases where the instruction that sets up the iteration count for a tail predicated loop cannot be moved before the dlstp, stopping tail predication entirely. This patch checks if the mov operand can be used and if so, uses that instead. Differential Revision: https://reviews.llvm.org/D86087
1 parent cc98a0f commit 31f02ac

File tree

3 files changed

+375
-12
lines changed

3 files changed

+375
-12
lines changed

llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ namespace {
226226
MachineInstr *Dec = nullptr;
227227
MachineInstr *End = nullptr;
228228
MachineInstr *VCTP = nullptr;
229+
MachineOperand TPNumElements;
229230
SmallPtrSet<MachineInstr*, 4> SecondaryVCTPs;
230231
VPTBlock *CurrentBlock = nullptr;
231232
SetVector<MachineInstr*> CurrentPredicate;
@@ -239,7 +240,8 @@ namespace {
239240
LowOverheadLoop(MachineLoop &ML, MachineLoopInfo &MLI,
240241
ReachingDefAnalysis &RDA, const TargetRegisterInfo &TRI,
241242
const ARMBaseInstrInfo &TII)
242-
: ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII) {
243+
: ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII),
244+
TPNumElements(MachineOperand::CreateImm(0)) {
243245
MF = ML.getHeader()->getParent();
244246
if (auto *MBB = ML.getLoopPreheader())
245247
Preheader = MBB;
@@ -291,11 +293,10 @@ namespace {
291293

292294
SmallVectorImpl<VPTBlock> &getVPTBlocks() { return VPTBlocks; }
293295

294-
// Return the loop iteration count, or the number of elements if we're tail
295-
// predicating.
296-
MachineOperand &getCount() {
297-
return IsTailPredicationLegal() ?
298-
VCTP->getOperand(1) : Start->getOperand(0);
296+
// Return the operand for the loop start instruction. This will be the loop
297+
// iteration count, or the number of elements if we're tail predicating.
298+
MachineOperand &getLoopStartOperand() {
299+
return IsTailPredicationLegal() ? TPNumElements : Start->getOperand(0);
299300
}
300301

301302
unsigned getStartOpcode() const {
@@ -453,7 +454,8 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
453454
// of the iteration count, to the loop start instruction. The number of
454455
// elements is provided to the vctp instruction, so we need to check that
455456
// we can use this register at InsertPt.
456-
Register NumElements = VCTP->getOperand(1).getReg();
457+
TPNumElements = VCTP->getOperand(1);
458+
Register NumElements = TPNumElements.getReg();
457459

458460
// If the register is defined within loop, then we can't perform TP.
459461
// TODO: Check whether this is just a mov of a register that would be
@@ -466,9 +468,8 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
466468
// The element count register maybe defined after InsertPt, in which case we
467469
// need to try to move either InsertPt or the def so that the [w|d]lstp can
468470
// use the value.
469-
// TODO: On failing to move an instruction, check if the count is provided by
470-
// a mov and whether we can use the mov operand directly.
471471
MachineBasicBlock *InsertBB = StartInsertPt->getParent();
472+
472473
if (!RDA.isReachingDefLiveOut(StartInsertPt, NumElements)) {
473474
if (auto *ElemDef = RDA.getLocalLiveOutMIDef(InsertBB, NumElements)) {
474475
if (RDA.isSafeToMoveForwards(ElemDef, StartInsertPt)) {
@@ -482,9 +483,21 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
482483
StartInsertPt);
483484
LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);
484485
} else {
485-
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to move element count to loop "
486-
<< "start instruction.\n");
487-
return false;
486+
// If we fail to move an instruction and the element count is provided
487+
// by a mov, use the mov operand if it will have the same value at the
488+
// insertion point
489+
MachineOperand Operand = ElemDef->getOperand(1);
490+
if (isMovRegOpcode(ElemDef->getOpcode()) &&
491+
RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg()) ==
492+
RDA.getUniqueReachingMIDef(StartInsertPt, Operand.getReg())) {
493+
TPNumElements = Operand;
494+
NumElements = TPNumElements.getReg();
495+
} else {
496+
LLVM_DEBUG(dbgs()
497+
<< "ARM Loops: Unable to move element count to loop "
498+
<< "start instruction.\n");
499+
return false;
500+
}
488501
}
489502
}
490503
}
Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -run-pass=arm-low-overhead-loops -tail-predication=enabled %s -o - | FileCheck %s
3+
4+
--- |
5+
define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) #0 {
6+
entry:
7+
%0 = add i32 %blockSize, 3
8+
%1 = icmp slt i32 %blockSize, 4
9+
%smin = select i1 %1, i32 %blockSize, i32 4
10+
%2 = sub i32 %0, %smin
11+
%3 = lshr i32 %2, 2
12+
%4 = add nuw nsw i32 %3, 1
13+
%5 = icmp slt i32 %blockSize, 4
14+
%smin3 = select i1 %5, i32 %blockSize, i32 4
15+
%6 = sub i32 %0, %smin3
16+
%7 = lshr i32 %6, 2
17+
%8 = add nuw nsw i32 %7, 1
18+
call void @llvm.set.loop.iterations.i32(i32 %8)
19+
br label %do.body.i
20+
21+
do.body.i: ; preds = %do.body.i, %entry
22+
%blkCnt.0.i = phi i32 [ %13, %do.body.i ], [ %blockSize, %entry ]
23+
%sumVec.0.i = phi <4 x float> [ %12, %do.body.i ], [ zeroinitializer, %entry ]
24+
%pSrc.addr.0.i = phi float* [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ]
25+
%9 = phi i32 [ %8, %entry ], [ %14, %do.body.i ]
26+
%pSrc.addr.0.i2 = bitcast float* %pSrc.addr.0.i to <4 x float>*
27+
%10 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0.i)
28+
%11 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.0.i2, i32 4, <4 x i1> %10, <4 x float> zeroinitializer)
29+
%12 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %sumVec.0.i, <4 x float> %11, <4 x i1> %10, <4 x float> %sumVec.0.i)
30+
%add.ptr.i = getelementptr inbounds float, float* %pSrc.addr.0.i, i32 4
31+
%13 = add i32 %blkCnt.0.i, -4
32+
%14 = call i32 @llvm.loop.decrement.reg.i32(i32 %9, i32 1)
33+
%15 = icmp ne i32 %14, 0
34+
br i1 %15, label %do.body.i, label %arm_mean_f32_mve.exit
35+
36+
arm_mean_f32_mve.exit: ; preds = %do.body.i
37+
%16 = extractelement <4 x float> %12, i32 3
38+
%add2.i.i = fadd fast float %16, %16
39+
%conv.i = uitofp i32 %blockSize to float
40+
%div.i = fdiv fast float %add2.i.i, %conv.i
41+
%17 = bitcast float %div.i to i32
42+
%18 = insertelement <4 x i32> undef, i32 %17, i64 0
43+
%19 = shufflevector <4 x i32> %18, <4 x i32> undef, <4 x i32> zeroinitializer
44+
%20 = bitcast <4 x i32> %19 to <4 x float>
45+
call void @llvm.set.loop.iterations.i32(i32 %4)
46+
br label %do.body
47+
48+
do.body: ; preds = %do.body, %arm_mean_f32_mve.exit
49+
%blkCnt.0 = phi i32 [ %blockSize, %arm_mean_f32_mve.exit ], [ %26, %do.body ]
50+
%sumVec.0 = phi <4 x float> [ zeroinitializer, %arm_mean_f32_mve.exit ], [ %25, %do.body ]
51+
%pSrc.addr.0 = phi float* [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ]
52+
%21 = phi i32 [ %4, %arm_mean_f32_mve.exit ], [ %27, %do.body ]
53+
%pSrc.addr.01 = bitcast float* %pSrc.addr.0 to <4 x float>*
54+
%22 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
55+
%23 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.01, i32 4, <4 x i1> %22, <4 x float> zeroinitializer)
56+
%24 = tail call fast <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %23, <4 x float> %20, <4 x i1> %22, <4 x float> undef)
57+
%25 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %24, <4 x float> %24, <4 x float> %sumVec.0, <4 x i1> %22)
58+
%add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4
59+
%26 = add i32 %blkCnt.0, -4
60+
%27 = call i32 @llvm.loop.decrement.reg.i32(i32 %21, i32 1)
61+
%28 = icmp ne i32 %27, 0
62+
br i1 %28, label %do.body, label %do.end
63+
64+
do.end: ; preds = %do.body
65+
%29 = extractelement <4 x float> %25, i32 3
66+
%add2.i = fadd fast float %29, %29
67+
%sub2 = add i32 %blockSize, -1
68+
%conv = uitofp i32 %sub2 to float
69+
%div = fdiv fast float %add2.i, %conv
70+
store float %div, float* %pResult, align 4
71+
ret void
72+
}
73+
74+
; Function Attrs: nounwind readnone
75+
declare <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1
76+
77+
; Function Attrs: nounwind readnone
78+
declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>) #1
79+
80+
; Function Attrs: nounwind readnone
81+
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
82+
83+
; Function Attrs: argmemonly nounwind readonly willreturn
84+
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2
85+
86+
; Function Attrs: nounwind readnone
87+
declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1
88+
89+
; Function Attrs: noduplicate nounwind
90+
declare void @llvm.set.loop.iterations.i32(i32) #3
91+
92+
; Function Attrs: noduplicate nounwind
93+
declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #3
94+
95+
attributes #0 = { "target-features"="+mve.fp" }
96+
attributes #1 = { nounwind readnone "target-features"="+mve.fp" }
97+
attributes #2 = { argmemonly nounwind readonly willreturn "target-features"="+mve.fp" }
98+
attributes #3 = { noduplicate nounwind }
99+
100+
...
101+
---
102+
name: arm_var_f32_mve
103+
alignment: 2
104+
exposesReturnsTwice: false
105+
legalized: false
106+
regBankSelected: false
107+
selected: false
108+
failedISel: false
109+
tracksRegLiveness: true
110+
hasWinCFI: false
111+
registers: []
112+
liveins:
113+
- { reg: '$r0', virtual-reg: '' }
114+
- { reg: '$r1', virtual-reg: '' }
115+
- { reg: '$r2', virtual-reg: '' }
116+
frameInfo:
117+
isFrameAddressTaken: false
118+
isReturnAddressTaken: false
119+
hasStackMap: false
120+
hasPatchPoint: false
121+
stackSize: 8
122+
offsetAdjustment: 0
123+
maxAlignment: 4
124+
adjustsStack: false
125+
hasCalls: false
126+
stackProtector: ''
127+
maxCallFrameSize: 0
128+
cvBytesOfCalleeSavedRegisters: 0
129+
hasOpaqueSPAdjustment: false
130+
hasVAStart: false
131+
hasMustTailInVarArgFunc: false
132+
localFrameSize: 0
133+
savePoint: ''
134+
restorePoint: ''
135+
fixedStack: []
136+
stack:
137+
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
138+
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
139+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
140+
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
141+
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
142+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
143+
callSites: []
144+
constants: []
145+
machineFunctionInfo: {}
146+
body: |
147+
; CHECK-LABEL: name: arm_var_f32_mve
148+
; CHECK: bb.0.entry:
149+
; CHECK: successors: %bb.1(0x80000000)
150+
; CHECK: liveins: $lr, $r0, $r1, $r2, $r4
151+
; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
152+
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
153+
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
154+
; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8
155+
; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
156+
; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
157+
; CHECK: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg
158+
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
159+
; CHECK: bb.1.do.body.i:
160+
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
161+
; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r12
162+
; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.0.i2, align 4)
163+
; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0
164+
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1
165+
; CHECK: bb.2.arm_mean_f32_mve.exit:
166+
; CHECK: successors: %bb.3(0x80000000)
167+
; CHECK: liveins: $q0, $r0, $r1, $r2
168+
; CHECK: $s4 = VMOVSR $r1, 14 /* CC::al */, $noreg
169+
; CHECK: $lr = MVE_DLSTP_32 $r1
170+
; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, killed renamable $s3, 14 /* CC::al */, $noreg, implicit killed $q0
171+
; CHECK: renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg
172+
; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg
173+
; CHECK: renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg
174+
; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
175+
; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1
176+
; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
177+
; CHECK: bb.3.do.body:
178+
; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000)
179+
; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2, $r3
180+
; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
181+
; CHECK: renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.01, align 4)
182+
; CHECK: renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VSUBf32 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2
183+
; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 0, killed $noreg
184+
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.3
185+
; CHECK: bb.4.do.end:
186+
; CHECK: liveins: $q0, $r1, $r2
187+
; CHECK: renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14 /* CC::al */, $noreg
188+
; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, killed renamable $s3, 14 /* CC::al */, $noreg, implicit killed $q0
189+
; CHECK: $s2 = VMOVSR killed $r0, 14 /* CC::al */, $noreg
190+
; CHECK: renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg
191+
; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg
192+
; CHECK: VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.pResult)
193+
; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
194+
bb.0.entry:
195+
successors: %bb.1(0x80000000)
196+
liveins: $r0, $r1, $r2, $r4, $lr
197+
198+
frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
199+
frame-setup CFI_INSTRUCTION def_cfa_offset 8
200+
frame-setup CFI_INSTRUCTION offset $lr, -4
201+
frame-setup CFI_INSTRUCTION offset $r4, -8
202+
$r3 = tMOVr $r1, 14 /* CC::al */, $noreg
203+
tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
204+
t2IT 10, 8, implicit-def $itstate
205+
renamable $r3 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate
206+
renamable $r12 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
207+
renamable $r3, dead $cpsr = tSUBrr renamable $r1, killed renamable $r3, 14 /* CC::al */, $noreg
208+
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
209+
renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14 /* CC::al */, $noreg
210+
renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
211+
$r3 = tMOVr $r1, 14 /* CC::al */, $noreg
212+
$r12 = tMOVr $r0, 14 /* CC::al */, $noreg
213+
t2DoLoopStart renamable $lr
214+
$r4 = tMOVr $lr, 14 /* CC::al */, $noreg
215+
216+
bb.1.do.body.i:
217+
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
218+
liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r12
219+
220+
renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
221+
renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
222+
renamable $lr = t2LoopDec killed renamable $lr, 1
223+
MVE_VPST 4, implicit $vpr
224+
renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.0.i2, align 4)
225+
renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, renamable $q0
226+
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
227+
tB %bb.2, 14 /* CC::al */, $noreg
228+
229+
bb.2.arm_mean_f32_mve.exit:
230+
successors: %bb.3(0x80000000)
231+
liveins: $q0, $r0, $r1, $r2, $r4
232+
233+
$s4 = VMOVSR $r1, 14 /* CC::al */, $noreg
234+
$lr = tMOVr $r4, 14 /* CC::al */, $noreg
235+
renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
236+
t2DoLoopStart killed $r4
237+
renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg
238+
renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg
239+
renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg
240+
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
241+
renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1
242+
$r3 = tMOVr $r1, 14 /* CC::al */, $noreg
243+
244+
bb.3.do.body:
245+
successors: %bb.3(0x7c000000), %bb.4(0x04000000)
246+
liveins: $lr, $q0, $q1, $r0, $r1, $r2, $r3
247+
248+
renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
249+
renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
250+
renamable $lr = t2LoopDec killed renamable $lr, 1
251+
MVE_VPST 2, implicit $vpr
252+
renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.01, align 4)
253+
renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VSUBf32 killed renamable $q2, renamable $q1, 1, renamable $vpr, undef renamable $q2
254+
renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q2, renamable $q2, 1, killed renamable $vpr
255+
t2LoopEnd renamable $lr, %bb.3, implicit-def dead $cpsr
256+
tB %bb.4, 14 /* CC::al */, $noreg
257+
258+
bb.4.do.end:
259+
liveins: $q0, $r1, $r2
260+
261+
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14 /* CC::al */, $noreg
262+
renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
263+
$s2 = VMOVSR killed $r0, 14 /* CC::al */, $noreg
264+
renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg
265+
renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg
266+
VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.pResult)
267+
frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
268+
269+
...

0 commit comments

Comments
 (0)