Skip to content

Commit 835104a

Browse files
committed
[LSR] Drop potentially invalid nowrap flags when switching to post-inc IV (PR46943)
When LSR converts a branch on the pre-inc IV into a branch on the post-inc IV, the nowrap flags on the addition may no longer be valid. Previously, a poison result of the addition might have been ignored, in which case the program was well defined. After branching on the post-inc IV, we might be branching on poison, which is undefined behavior. Fix this by discarding nowrap flags which are not present on the SCEV expression. Nowrap flags on the SCEV expression are proven by SCEV to always hold, independently of how the expression will be used. This is essentially the same fix we applied to IndVars LFTR, which also performs this kind of pre-inc to post-inc conversion. I believe a similar problem can also exist for getelementptr inbounds, but I was not able to come up with a problematic test case. The inbounds case would have to be addressed in a differently anyway (as SCEV does not track this property). Fixes https://bugs.llvm.org/show_bug.cgi?id=46943. Differential Revision: https://reviews.llvm.org/D95286
1 parent 15141cd commit 835104a

File tree

6 files changed

+257
-274
lines changed

6 files changed

+257
-274
lines changed

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1440,6 +1440,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
14401440
assert(LatchBlock && "PostInc mode requires a unique loop latch!");
14411441
Result = PN->getIncomingValueForBlock(LatchBlock);
14421442

1443+
// We might be introducing a new use of the post-inc IV that is not poison
1444+
// safe, in which case we should drop poison generating flags. Only keep
1445+
// those flags for which SCEV has proven that they always hold.
1446+
if (isa<OverflowingBinaryOperator>(Result)) {
1447+
auto *I = cast<Instruction>(Result);
1448+
if (!S->hasNoUnsignedWrap())
1449+
I->setHasNoUnsignedWrap(false);
1450+
if (!S->hasNoSignedWrap())
1451+
I->setHasNoSignedWrap(false);
1452+
}
1453+
14431454
// For an expansion to use the postinc form, the client must call
14441455
// expandCodeFor with an InsertPoint that is either outside the PostIncLoop
14451456
// or dominated by IVIncInsertPos.

llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll

Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur
2727
; CHECK-NEXT: beq .LBB0_4
2828
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
2929
; CHECK-NEXT: subs r5, r3, #1
30-
; CHECK-NEXT: and r7, r3, #3
30+
; CHECK-NEXT: and lr, r3, #3
3131
; CHECK-NEXT: cmp r5, #3
3232
; CHECK-NEXT: bhs .LBB0_6
3333
; CHECK-NEXT: @ %bb.3:
34-
; CHECK-NEXT: mov.w r12, #0
34+
; CHECK-NEXT: movs r3, #0
3535
; CHECK-NEXT: b .LBB0_8
3636
; CHECK-NEXT: .LBB0_4: @ %vector.ph
3737
; CHECK-NEXT: mov.w r12, #0
@@ -46,44 +46,40 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur
4646
; CHECK-NEXT: letp lr, .LBB0_5
4747
; CHECK-NEXT: b .LBB0_11
4848
; CHECK-NEXT: .LBB0_6: @ %for.body.preheader.new
49-
; CHECK-NEXT: bic r3, r3, #3
50-
; CHECK-NEXT: movs r5, #1
51-
; CHECK-NEXT: subs r3, #4
52-
; CHECK-NEXT: mov.w r12, #0
53-
; CHECK-NEXT: add.w lr, r5, r3, lsr #2
49+
; CHECK-NEXT: sub.w r12, r3, lr
50+
; CHECK-NEXT: movs r4, #0
5451
; CHECK-NEXT: movs r3, #0
55-
; CHECK-NEXT: dls lr, lr
5652
; CHECK-NEXT: .LBB0_7: @ %for.body
5753
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
58-
; CHECK-NEXT: adds r4, r1, r3
59-
; CHECK-NEXT: adds r5, r2, r3
60-
; CHECK-NEXT: adds r6, r0, r3
61-
; CHECK-NEXT: adds r3, #16
62-
; CHECK-NEXT: vldr s0, [r4]
63-
; CHECK-NEXT: add.w r12, r12, #4
64-
; CHECK-NEXT: vldr s2, [r5]
54+
; CHECK-NEXT: adds r5, r1, r4
55+
; CHECK-NEXT: adds r6, r2, r4
56+
; CHECK-NEXT: adds r7, r0, r4
57+
; CHECK-NEXT: adds r3, #4
58+
; CHECK-NEXT: vldr s0, [r5]
59+
; CHECK-NEXT: adds r4, #16
60+
; CHECK-NEXT: vldr s2, [r6]
61+
; CHECK-NEXT: cmp r12, r3
6562
; CHECK-NEXT: vmul.f32 s0, s2, s0
66-
; CHECK-NEXT: vstr s0, [r6]
67-
; CHECK-NEXT: vldr s0, [r4, #4]
68-
; CHECK-NEXT: vldr s2, [r5, #4]
63+
; CHECK-NEXT: vstr s0, [r7]
64+
; CHECK-NEXT: vldr s0, [r5, #4]
65+
; CHECK-NEXT: vldr s2, [r6, #4]
6966
; CHECK-NEXT: vmul.f32 s0, s2, s0
70-
; CHECK-NEXT: vstr s0, [r6, #4]
71-
; CHECK-NEXT: vldr s0, [r4, #8]
72-
; CHECK-NEXT: vldr s2, [r5, #8]
67+
; CHECK-NEXT: vstr s0, [r7, #4]
68+
; CHECK-NEXT: vldr s0, [r5, #8]
69+
; CHECK-NEXT: vldr s2, [r6, #8]
7370
; CHECK-NEXT: vmul.f32 s0, s2, s0
74-
; CHECK-NEXT: vstr s0, [r6, #8]
75-
; CHECK-NEXT: vldr s0, [r4, #12]
76-
; CHECK-NEXT: vldr s2, [r5, #12]
71+
; CHECK-NEXT: vstr s0, [r7, #8]
72+
; CHECK-NEXT: vldr s0, [r5, #12]
73+
; CHECK-NEXT: vldr s2, [r6, #12]
7774
; CHECK-NEXT: vmul.f32 s0, s2, s0
78-
; CHECK-NEXT: vstr s0, [r6, #12]
79-
; CHECK-NEXT: le lr, .LBB0_7
75+
; CHECK-NEXT: vstr s0, [r7, #12]
76+
; CHECK-NEXT: bne .LBB0_7
8077
; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup.loopexit.unr-lcssa
81-
; CHECK-NEXT: wls lr, r7, .LBB0_11
78+
; CHECK-NEXT: wls lr, lr, .LBB0_11
8279
; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader
83-
; CHECK-NEXT: add.w r1, r1, r12, lsl #2
84-
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
85-
; CHECK-NEXT: add.w r0, r0, r12, lsl #2
86-
; CHECK-NEXT: mov lr, r7
80+
; CHECK-NEXT: add.w r1, r1, r3, lsl #2
81+
; CHECK-NEXT: add.w r2, r2, r3, lsl #2
82+
; CHECK-NEXT: add.w r0, r0, r3, lsl #2
8783
; CHECK-NEXT: .LBB0_10: @ %for.body.epil
8884
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
8985
; CHECK-NEXT: vldr s0, [r1]

llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll

Lines changed: 69 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,58 +1459,53 @@ define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* n
14591459
; CHECK-NEXT: cbz r2, .LBB9_3
14601460
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
14611461
; CHECK-NEXT: subs r3, r2, #1
1462-
; CHECK-NEXT: and r5, r2, #3
1462+
; CHECK-NEXT: and lr, r2, #3
1463+
; CHECK-NEXT: vldr s0, .LCPI9_0
14631464
; CHECK-NEXT: cmp r3, #3
14641465
; CHECK-NEXT: bhs .LBB9_4
14651466
; CHECK-NEXT: @ %bb.2:
1466-
; CHECK-NEXT: vldr s0, .LCPI9_0
1467-
; CHECK-NEXT: mov.w r12, #0
1467+
; CHECK-NEXT: movs r2, #0
14681468
; CHECK-NEXT: b .LBB9_6
14691469
; CHECK-NEXT: .LBB9_3:
14701470
; CHECK-NEXT: vldr s0, .LCPI9_0
14711471
; CHECK-NEXT: b .LBB9_9
14721472
; CHECK-NEXT: .LBB9_4: @ %for.body.preheader.new
1473-
; CHECK-NEXT: bic r2, r2, #3
1474-
; CHECK-NEXT: movs r3, #1
1475-
; CHECK-NEXT: subs r2, #4
1476-
; CHECK-NEXT: vldr s0, .LCPI9_0
1477-
; CHECK-NEXT: mov.w r12, #0
1478-
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
1473+
; CHECK-NEXT: sub.w r12, r2, lr
14791474
; CHECK-NEXT: movs r3, #0
1480-
; CHECK-NEXT: dls lr, lr
1475+
; CHECK-NEXT: movs r2, #0
14811476
; CHECK-NEXT: .LBB9_5: @ %for.body
14821477
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
1483-
; CHECK-NEXT: adds r4, r0, r3
1484-
; CHECK-NEXT: adds r2, r1, r3
1485-
; CHECK-NEXT: vldr.16 s2, [r2, #6]
1486-
; CHECK-NEXT: vldr.16 s4, [r4, #6]
1487-
; CHECK-NEXT: vldr.16 s6, [r4, #4]
1488-
; CHECK-NEXT: vldr.16 s8, [r4, #2]
1478+
; CHECK-NEXT: adds r5, r0, r3
1479+
; CHECK-NEXT: adds r4, r1, r3
1480+
; CHECK-NEXT: vldr.16 s2, [r4, #6]
1481+
; CHECK-NEXT: vldr.16 s4, [r5, #6]
1482+
; CHECK-NEXT: vldr.16 s6, [r5, #4]
1483+
; CHECK-NEXT: vldr.16 s8, [r5, #2]
14891484
; CHECK-NEXT: vmul.f16 s2, s4, s2
1490-
; CHECK-NEXT: vldr.16 s4, [r2, #4]
1491-
; CHECK-NEXT: vldr.16 s10, [r4]
1485+
; CHECK-NEXT: vldr.16 s4, [r4, #4]
1486+
; CHECK-NEXT: vldr.16 s10, [r5]
14921487
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
14931488
; CHECK-NEXT: vmul.f16 s4, s6, s4
1494-
; CHECK-NEXT: vldr.16 s6, [r2, #2]
1489+
; CHECK-NEXT: vldr.16 s6, [r4, #2]
14951490
; CHECK-NEXT: vcvtb.f32.f16 s4, s4
1496-
; CHECK-NEXT: adds r3, #8
1491+
; CHECK-NEXT: adds r2, #4
14971492
; CHECK-NEXT: vmul.f16 s6, s8, s6
1498-
; CHECK-NEXT: vldr.16 s8, [r2]
1493+
; CHECK-NEXT: vldr.16 s8, [r4]
14991494
; CHECK-NEXT: vcvtb.f32.f16 s6, s6
1500-
; CHECK-NEXT: add.w r12, r12, #4
1495+
; CHECK-NEXT: adds r3, #8
15011496
; CHECK-NEXT: vmul.f16 s8, s10, s8
1497+
; CHECK-NEXT: cmp r12, r2
15021498
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
15031499
; CHECK-NEXT: vadd.f32 s0, s0, s8
15041500
; CHECK-NEXT: vadd.f32 s0, s0, s6
15051501
; CHECK-NEXT: vadd.f32 s0, s0, s4
15061502
; CHECK-NEXT: vadd.f32 s0, s0, s2
1507-
; CHECK-NEXT: le lr, .LBB9_5
1503+
; CHECK-NEXT: bne .LBB9_5
15081504
; CHECK-NEXT: .LBB9_6: @ %for.cond.cleanup.loopexit.unr-lcssa
1509-
; CHECK-NEXT: wls lr, r5, .LBB9_9
1505+
; CHECK-NEXT: wls lr, lr, .LBB9_9
15101506
; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader
1511-
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
1512-
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
1513-
; CHECK-NEXT: mov lr, r5
1507+
; CHECK-NEXT: add.w r0, r0, r2, lsl #1
1508+
; CHECK-NEXT: add.w r1, r1, r2, lsl #1
15141509
; CHECK-NEXT: .LBB9_8: @ %for.body.epil
15151510
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
15161511
; CHECK-NEXT: vldr.16 s2, [r1]
@@ -1616,58 +1611,53 @@ define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* n
16161611
; CHECK-NEXT: cbz r2, .LBB10_3
16171612
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
16181613
; CHECK-NEXT: subs r3, r2, #1
1619-
; CHECK-NEXT: and r5, r2, #3
1614+
; CHECK-NEXT: and lr, r2, #3
1615+
; CHECK-NEXT: vldr s0, .LCPI10_0
16201616
; CHECK-NEXT: cmp r3, #3
16211617
; CHECK-NEXT: bhs .LBB10_4
16221618
; CHECK-NEXT: @ %bb.2:
1623-
; CHECK-NEXT: vldr s0, .LCPI10_0
1624-
; CHECK-NEXT: mov.w r12, #0
1619+
; CHECK-NEXT: movs r2, #0
16251620
; CHECK-NEXT: b .LBB10_6
16261621
; CHECK-NEXT: .LBB10_3:
16271622
; CHECK-NEXT: vldr s0, .LCPI10_0
16281623
; CHECK-NEXT: b .LBB10_9
16291624
; CHECK-NEXT: .LBB10_4: @ %for.body.preheader.new
1630-
; CHECK-NEXT: bic r2, r2, #3
1631-
; CHECK-NEXT: movs r3, #1
1632-
; CHECK-NEXT: subs r2, #4
1633-
; CHECK-NEXT: vldr s0, .LCPI10_0
1634-
; CHECK-NEXT: mov.w r12, #0
1635-
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
1625+
; CHECK-NEXT: sub.w r12, r2, lr
16361626
; CHECK-NEXT: movs r3, #0
1637-
; CHECK-NEXT: dls lr, lr
1627+
; CHECK-NEXT: movs r2, #0
16381628
; CHECK-NEXT: .LBB10_5: @ %for.body
16391629
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
1640-
; CHECK-NEXT: adds r4, r0, r3
1641-
; CHECK-NEXT: adds r2, r1, r3
1642-
; CHECK-NEXT: vldr.16 s2, [r2, #6]
1643-
; CHECK-NEXT: vldr.16 s4, [r4, #6]
1644-
; CHECK-NEXT: vldr.16 s6, [r4, #4]
1645-
; CHECK-NEXT: vldr.16 s8, [r4, #2]
1630+
; CHECK-NEXT: adds r5, r0, r3
1631+
; CHECK-NEXT: adds r4, r1, r3
1632+
; CHECK-NEXT: vldr.16 s2, [r4, #6]
1633+
; CHECK-NEXT: vldr.16 s4, [r5, #6]
1634+
; CHECK-NEXT: vldr.16 s6, [r5, #4]
1635+
; CHECK-NEXT: vldr.16 s8, [r5, #2]
16461636
; CHECK-NEXT: vadd.f16 s2, s4, s2
1647-
; CHECK-NEXT: vldr.16 s4, [r2, #4]
1648-
; CHECK-NEXT: vldr.16 s10, [r4]
1637+
; CHECK-NEXT: vldr.16 s4, [r4, #4]
1638+
; CHECK-NEXT: vldr.16 s10, [r5]
16491639
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
16501640
; CHECK-NEXT: vadd.f16 s4, s6, s4
1651-
; CHECK-NEXT: vldr.16 s6, [r2, #2]
1641+
; CHECK-NEXT: vldr.16 s6, [r4, #2]
16521642
; CHECK-NEXT: vcvtb.f32.f16 s4, s4
1653-
; CHECK-NEXT: adds r3, #8
1643+
; CHECK-NEXT: adds r2, #4
16541644
; CHECK-NEXT: vadd.f16 s6, s8, s6
1655-
; CHECK-NEXT: vldr.16 s8, [r2]
1645+
; CHECK-NEXT: vldr.16 s8, [r4]
16561646
; CHECK-NEXT: vcvtb.f32.f16 s6, s6
1657-
; CHECK-NEXT: add.w r12, r12, #4
1647+
; CHECK-NEXT: adds r3, #8
16581648
; CHECK-NEXT: vadd.f16 s8, s10, s8
1649+
; CHECK-NEXT: cmp r12, r2
16591650
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
16601651
; CHECK-NEXT: vadd.f32 s0, s0, s8
16611652
; CHECK-NEXT: vadd.f32 s0, s0, s6
16621653
; CHECK-NEXT: vadd.f32 s0, s0, s4
16631654
; CHECK-NEXT: vadd.f32 s0, s0, s2
1664-
; CHECK-NEXT: le lr, .LBB10_5
1655+
; CHECK-NEXT: bne .LBB10_5
16651656
; CHECK-NEXT: .LBB10_6: @ %for.cond.cleanup.loopexit.unr-lcssa
1666-
; CHECK-NEXT: wls lr, r5, .LBB10_9
1657+
; CHECK-NEXT: wls lr, lr, .LBB10_9
16671658
; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader
1668-
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
1669-
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
1670-
; CHECK-NEXT: mov lr, r5
1659+
; CHECK-NEXT: add.w r0, r0, r2, lsl #1
1660+
; CHECK-NEXT: add.w r1, r1, r2, lsl #1
16711661
; CHECK-NEXT: .LBB10_8: @ %for.body.epil
16721662
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
16731663
; CHECK-NEXT: vldr.16 s2, [r1]
@@ -1773,65 +1763,60 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
17731763
; CHECK-NEXT: cbz r2, .LBB11_3
17741764
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
17751765
; CHECK-NEXT: subs r3, r2, #1
1776-
; CHECK-NEXT: and r6, r2, #3
1766+
; CHECK-NEXT: and lr, r2, #3
1767+
; CHECK-NEXT: vldr s0, .LCPI11_0
17771768
; CHECK-NEXT: cmp r3, #3
17781769
; CHECK-NEXT: bhs .LBB11_4
17791770
; CHECK-NEXT: @ %bb.2:
1780-
; CHECK-NEXT: vldr s0, .LCPI11_0
1781-
; CHECK-NEXT: mov.w r12, #0
1771+
; CHECK-NEXT: movs r2, #0
17821772
; CHECK-NEXT: b .LBB11_6
17831773
; CHECK-NEXT: .LBB11_3:
17841774
; CHECK-NEXT: vldr s0, .LCPI11_0
17851775
; CHECK-NEXT: b .LBB11_9
17861776
; CHECK-NEXT: .LBB11_4: @ %for.body.preheader.new
1787-
; CHECK-NEXT: bic r2, r2, #3
1788-
; CHECK-NEXT: movs r3, #1
1789-
; CHECK-NEXT: subs r2, #4
1790-
; CHECK-NEXT: vldr s0, .LCPI11_0
1791-
; CHECK-NEXT: mov.w r12, #0
1792-
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
1777+
; CHECK-NEXT: sub.w r12, r2, lr
17931778
; CHECK-NEXT: adds r3, r1, #4
1794-
; CHECK-NEXT: dls lr, lr
1795-
; CHECK-NEXT: adds r2, r0, #4
1779+
; CHECK-NEXT: adds r4, r0, #4
1780+
; CHECK-NEXT: movs r2, #0
17961781
; CHECK-NEXT: .LBB11_5: @ %for.body
17971782
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
1798-
; CHECK-NEXT: ldrsh.w r4, [r3, #2]
1799-
; CHECK-NEXT: vldr.16 s2, [r2, #2]
1800-
; CHECK-NEXT: add.w r12, r12, #4
1801-
; CHECK-NEXT: vmov s4, r4
1802-
; CHECK-NEXT: ldrsh r4, [r3], #8
1783+
; CHECK-NEXT: ldrsh.w r5, [r3, #2]
1784+
; CHECK-NEXT: vldr.16 s2, [r4, #2]
1785+
; CHECK-NEXT: adds r2, #4
1786+
; CHECK-NEXT: cmp r12, r2
1787+
; CHECK-NEXT: vmov s4, r5
1788+
; CHECK-NEXT: ldrsh r5, [r3], #8
18031789
; CHECK-NEXT: vcvt.f16.s32 s4, s4
1804-
; CHECK-NEXT: ldrsh r5, [r3, #-10]
1790+
; CHECK-NEXT: ldrsh r6, [r3, #-10]
18051791
; CHECK-NEXT: vmul.f16 s2, s2, s4
1806-
; CHECK-NEXT: vmov s6, r4
1807-
; CHECK-NEXT: vldr.16 s4, [r2]
1792+
; CHECK-NEXT: vmov s6, r5
1793+
; CHECK-NEXT: vldr.16 s4, [r4]
18081794
; CHECK-NEXT: vcvt.f16.s32 s6, s6
1809-
; CHECK-NEXT: ldrsh r4, [r3, #-12]
1795+
; CHECK-NEXT: ldrsh r5, [r3, #-12]
18101796
; CHECK-NEXT: vmul.f16 s4, s4, s6
1811-
; CHECK-NEXT: vmov s8, r5
1812-
; CHECK-NEXT: vldr.16 s6, [r2, #-2]
1797+
; CHECK-NEXT: vmov s8, r6
1798+
; CHECK-NEXT: vldr.16 s6, [r4, #-2]
18131799
; CHECK-NEXT: vcvt.f16.s32 s8, s8
1814-
; CHECK-NEXT: vmov s10, r4
1800+
; CHECK-NEXT: vmov s10, r5
18151801
; CHECK-NEXT: vcvtb.f32.f16 s4, s4
18161802
; CHECK-NEXT: vmul.f16 s6, s6, s8
1817-
; CHECK-NEXT: vldr.16 s8, [r2, #-4]
1803+
; CHECK-NEXT: vldr.16 s8, [r4, #-4]
18181804
; CHECK-NEXT: vcvt.f16.s32 s10, s10
18191805
; CHECK-NEXT: vcvtb.f32.f16 s6, s6
18201806
; CHECK-NEXT: vmul.f16 s8, s8, s10
18211807
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
18221808
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
1823-
; CHECK-NEXT: adds r2, #8
1809+
; CHECK-NEXT: add.w r4, r4, #8
18241810
; CHECK-NEXT: vadd.f32 s0, s0, s8
18251811
; CHECK-NEXT: vadd.f32 s0, s0, s6
18261812
; CHECK-NEXT: vadd.f32 s0, s0, s4
18271813
; CHECK-NEXT: vadd.f32 s0, s0, s2
1828-
; CHECK-NEXT: le lr, .LBB11_5
1814+
; CHECK-NEXT: bne .LBB11_5
18291815
; CHECK-NEXT: .LBB11_6: @ %for.cond.cleanup.loopexit.unr-lcssa
1830-
; CHECK-NEXT: wls lr, r6, .LBB11_9
1816+
; CHECK-NEXT: wls lr, lr, .LBB11_9
18311817
; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader
1832-
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
1833-
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
1834-
; CHECK-NEXT: mov lr, r6
1818+
; CHECK-NEXT: add.w r0, r0, r2, lsl #1
1819+
; CHECK-NEXT: add.w r1, r1, r2, lsl #1
18351820
; CHECK-NEXT: .LBB11_8: @ %for.body.epil
18361821
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
18371822
; CHECK-NEXT: ldrsh r2, [r1], #2

0 commit comments

Comments
 (0)