Skip to content

Commit d69033d

Browse files
committed
[SCEVExpander] Fix GEP IV inc reuse logic for opaque pointers
Instead of checking the pointer type, check the element type of the GEP. Previously we ended up reusing GEP increments that were not in expanded form, thus not respecting LSRs choice of representation. The change in 2011-10-06-ReusePhi.ll recovers a regression that appeared when converting that test to opaque pointers. Changes in various Thumb tests now compute the step outside the loop instead of using add.w inside the loop, which is LSR's preferred representation for this target.
1 parent 7724c4b commit d69033d

File tree

7 files changed

+124
-132
lines changed

7 files changed

+124
-132
lines changed

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -999,15 +999,8 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
999999
// allow any kind of GEP as long as it can be hoisted.
10001000
continue;
10011001
}
1002-
// This must be a pointer addition of constants (pretty), which is already
1003-
// handled, or some number of address-size elements (ugly). Ugly geps
1004-
// have 2 operands. i1* is used by the expander to represent an
1005-
// address-size element.
1006-
if (IncV->getNumOperands() != 2)
1007-
return nullptr;
1008-
unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
1009-
if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
1010-
&& IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
1002+
// GEPs produced by SCEVExpander use i8 element type.
1003+
if (!cast<GEPOperator>(IncV)->getSourceElementType()->isIntegerTy(8))
10111004
return nullptr;
10121005
break;
10131006
}

llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,30 +60,31 @@ define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) {
6060
; CHECK-NEXT: cmp r1, #1
6161
; CHECK-NEXT: it lt
6262
; CHECK-NEXT: bxlt lr
63-
; CHECK-NEXT: .LBB1_1:
64-
; CHECK-NEXT: .save {r7, lr}
65-
; CHECK-NEXT: push {r7, lr}
63+
; CHECK-NEXT: .LBB1_1: @ %for.body.preheader
64+
; CHECK-NEXT: .save {r4, lr}
65+
; CHECK-NEXT: push {r4, lr}
66+
; CHECK-NEXT: lsl.w r12, r2, #2
6667
; CHECK-NEXT: vmov.i32 q0, #0x0
6768
; CHECK-NEXT: b .LBB1_2
6869
; CHECK-NEXT: .LBB1_2: @ %for.body
6970
; CHECK-NEXT: @ =>This Loop Header: Depth=1
7071
; CHECK-NEXT: @ Child Loop BB1_4 Depth 2
71-
; CHECK-NEXT: mov r12, r0
72+
; CHECK-NEXT: mov r4, r0
7273
; CHECK-NEXT: wlstp.8 lr, r2, .LBB1_3
7374
; CHECK-NEXT: b .LBB1_4
7475
; CHECK-NEXT: .LBB1_3: @ %for.body
7576
; CHECK-NEXT: @ in Loop: Header=BB1_2 Depth=1
76-
; CHECK-NEXT: add.w r0, r0, r2, lsl #2
77+
; CHECK-NEXT: add r0, r12
7778
; CHECK-NEXT: subs r1, #1
7879
; CHECK-NEXT: beq .LBB1_5
7980
; CHECK-NEXT: b .LBB1_2
8081
; CHECK-NEXT: .LBB1_4: @ Parent Loop BB1_2 Depth=1
8182
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
82-
; CHECK-NEXT: vstrb.8 q0, [r12], #16
83+
; CHECK-NEXT: vstrb.8 q0, [r4], #16
8384
; CHECK-NEXT: letp lr, .LBB1_4
8485
; CHECK-NEXT: b .LBB1_3
8586
; CHECK-NEXT: .LBB1_5:
86-
; CHECK-NEXT: pop.w {r7, lr}
87+
; CHECK-NEXT: pop.w {r4, lr}
8788
; CHECK-NEXT: bx lr
8889
entry:
8990
%cmp5 = icmp sgt i32 %n, 0

llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,12 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture
3939
; CHECK-NEXT: subs r3, #8
4040
; CHECK-NEXT: movs r4, #1
4141
; CHECK-NEXT: vdup.16 q0, r5
42-
; CHECK-NEXT: vdup.16 q5, r6
42+
; CHECK-NEXT: lsls r1, r1, #1
4343
; CHECK-NEXT: add.w r3, r4, r3, lsr #3
4444
; CHECK-NEXT: vstrw.32 q0, [sp, #48] @ 16-byte Spill
4545
; CHECK-NEXT: vmov.i16 q0, #0xf800
4646
; CHECK-NEXT: movs r4, #0
47+
; CHECK-NEXT: vdup.16 q5, r6
4748
; CHECK-NEXT: vmov.i16 q7, #0x78
4849
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
4950
; CHECK-NEXT: vstrw.32 q2, [sp, #32] @ 16-byte Spill
@@ -95,7 +96,7 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture
9596
; CHECK-NEXT: @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us
9697
; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
9798
; CHECK-NEXT: adds r4, #1
98-
; CHECK-NEXT: add.w r0, r0, r1, lsl #1
99+
; CHECK-NEXT: add r0, r1
99100
; CHECK-NEXT: cmp r4, r12
100101
; CHECK-NEXT: bne .LBB0_3
101102
; CHECK-NEXT: .LBB0_6:
@@ -219,6 +220,7 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(ptr noalias noc
219220
; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill
220221
; CHECK-NEXT: vdup.16 q0, r5
221222
; CHECK-NEXT: rsb.w r3, r7, #256
223+
; CHECK-NEXT: lsls r7, r1, #1
222224
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
223225
; CHECK-NEXT: vdup.16 q0, r6
224226
; CHECK-NEXT: vmov.i16 q2, #0xf8
@@ -265,8 +267,8 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(ptr noalias noc
265267
; CHECK-NEXT: letp lr, .LBB1_4
266268
; CHECK-NEXT: @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us
267269
; CHECK-NEXT: @ in Loop: Header=BB1_3 Depth=1
268-
; CHECK-NEXT: add.w r0, r0, r1, lsl #1
269270
; CHECK-NEXT: adds r4, #1
271+
; CHECK-NEXT: add r0, r7
270272
; CHECK-NEXT: cmp r4, r12
271273
; CHECK-NEXT: bne .LBB1_3
272274
; CHECK-NEXT: @ %bb.6:

llvm/test/CodeGen/Thumb2/mve-float16regloops.ll

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -663,34 +663,35 @@ for.cond.cleanup: ; preds = %vector.body, %entry
663663
define dso_local void @test_nested(ptr noalias nocapture %pInT1, ptr noalias nocapture readonly %pOutT1, ptr noalias nocapture readonly %pPRT_in, ptr noalias nocapture readnone %pPRT_pDst, i32 %numRows, i32 %numCols, i32 %l) local_unnamed_addr {
664664
; CHECK-LABEL: test_nested:
665665
; CHECK: @ %bb.0: @ %for.body.us.preheader
666-
; CHECK-NEXT: .save {r4, r5, r7, lr}
667-
; CHECK-NEXT: push {r4, r5, r7, lr}
666+
; CHECK-NEXT: .save {r4, r5, r6, lr}
667+
; CHECK-NEXT: push {r4, r5, r6, lr}
668668
; CHECK-NEXT: ldrd lr, r12, [sp, #16]
669+
; CHECK-NEXT: lsl.w r3, r12, #1
669670
; CHECK-NEXT: .LBB14_1: @ %for.body.us
670671
; CHECK-NEXT: @ =>This Loop Header: Depth=1
671672
; CHECK-NEXT: @ Child Loop BB14_2 Depth 2
672-
; CHECK-NEXT: ldrh r3, [r1]
673+
; CHECK-NEXT: ldrh r4, [r1]
674+
; CHECK-NEXT: mov r5, r2
675+
; CHECK-NEXT: mov r6, r12
676+
; CHECK-NEXT: vdup.16 q0, r4
673677
; CHECK-NEXT: mov r4, r0
674-
; CHECK-NEXT: mov r5, r12
675-
; CHECK-NEXT: vdup.16 q0, r3
676-
; CHECK-NEXT: add.w r3, r2, r12, lsl #1
677678
; CHECK-NEXT: .LBB14_2: @ %vector.body
678679
; CHECK-NEXT: @ Parent Loop BB14_1 Depth=1
679680
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
680-
; CHECK-NEXT: vldrw.u32 q1, [r2], #16
681+
; CHECK-NEXT: vldrw.u32 q1, [r5], #16
681682
; CHECK-NEXT: vldrw.u32 q2, [r4]
682-
; CHECK-NEXT: subs r5, #8
683+
; CHECK-NEXT: subs r6, #8
683684
; CHECK-NEXT: vfms.f16 q2, q1, q0
684685
; CHECK-NEXT: vstrb.8 q2, [r4], #16
685686
; CHECK-NEXT: bne .LBB14_2
686687
; CHECK-NEXT: @ %bb.3: @ %for.cond6.for.end_crit_edge.us
687688
; CHECK-NEXT: @ in Loop: Header=BB14_1 Depth=1
688-
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
689+
; CHECK-NEXT: add r0, r3
690+
; CHECK-NEXT: add r2, r3
689691
; CHECK-NEXT: adds r1, #2
690-
; CHECK-NEXT: mov r2, r3
691692
; CHECK-NEXT: le lr, .LBB14_1
692693
; CHECK-NEXT: @ %bb.4: @ %for.end14
693-
; CHECK-NEXT: pop {r4, r5, r7, pc}
694+
; CHECK-NEXT: pop {r4, r5, r6, pc}
694695
for.body.us.preheader:
695696
%cmp = icmp sgt i32 %numRows, 0
696697
tail call void @llvm.assume(i1 %cmp)

llvm/test/CodeGen/Thumb2/mve-float32regloops.ll

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -663,34 +663,35 @@ for.cond.cleanup: ; preds = %vector.body, %entry
663663
define dso_local void @test_nested(ptr noalias nocapture %pInT1, ptr noalias nocapture readonly %pOutT1, ptr noalias nocapture readonly %pPRT_in, ptr noalias nocapture readnone %pPRT_pDst, i32 %numRows, i32 %numCols, i32 %l) local_unnamed_addr {
664664
; CHECK-LABEL: test_nested:
665665
; CHECK: @ %bb.0: @ %for.body.us.preheader
666-
; CHECK-NEXT: .save {r4, r5, r7, lr}
667-
; CHECK-NEXT: push {r4, r5, r7, lr}
666+
; CHECK-NEXT: .save {r4, r5, r6, lr}
667+
; CHECK-NEXT: push {r4, r5, r6, lr}
668668
; CHECK-NEXT: ldrd lr, r12, [sp, #16]
669+
; CHECK-NEXT: lsl.w r3, r12, #2
669670
; CHECK-NEXT: .LBB14_1: @ %for.body.us
670671
; CHECK-NEXT: @ =>This Loop Header: Depth=1
671672
; CHECK-NEXT: @ Child Loop BB14_2 Depth 2
672-
; CHECK-NEXT: ldr r3, [r1]
673+
; CHECK-NEXT: ldr r4, [r1]
674+
; CHECK-NEXT: mov r5, r2
675+
; CHECK-NEXT: mov r6, r12
676+
; CHECK-NEXT: vdup.32 q0, r4
673677
; CHECK-NEXT: mov r4, r0
674-
; CHECK-NEXT: mov r5, r12
675-
; CHECK-NEXT: vdup.32 q0, r3
676-
; CHECK-NEXT: add.w r3, r2, r12, lsl #2
677678
; CHECK-NEXT: .LBB14_2: @ %vector.body
678679
; CHECK-NEXT: @ Parent Loop BB14_1 Depth=1
679680
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
680-
; CHECK-NEXT: vldrw.u32 q1, [r2], #16
681+
; CHECK-NEXT: vldrw.u32 q1, [r5], #16
681682
; CHECK-NEXT: vldrw.u32 q2, [r4]
682-
; CHECK-NEXT: subs r5, #4
683+
; CHECK-NEXT: subs r6, #4
683684
; CHECK-NEXT: vfms.f32 q2, q1, q0
684685
; CHECK-NEXT: vstrb.8 q2, [r4], #16
685686
; CHECK-NEXT: bne .LBB14_2
686687
; CHECK-NEXT: @ %bb.3: @ %for.cond6.for.end_crit_edge.us
687688
; CHECK-NEXT: @ in Loop: Header=BB14_1 Depth=1
688-
; CHECK-NEXT: add.w r0, r0, r12, lsl #2
689+
; CHECK-NEXT: add r0, r3
690+
; CHECK-NEXT: add r2, r3
689691
; CHECK-NEXT: adds r1, #4
690-
; CHECK-NEXT: mov r2, r3
691692
; CHECK-NEXT: le lr, .LBB14_1
692693
; CHECK-NEXT: @ %bb.4: @ %for.end14
693-
; CHECK-NEXT: pop {r4, r5, r7, pc}
694+
; CHECK-NEXT: pop {r4, r5, r6, pc}
694695
for.body.us.preheader:
695696
%cmp = icmp sgt i32 %numRows, 0
696697
tail call void @llvm.assume(i1 %cmp)

0 commit comments

Comments
 (0)