Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 7c4e3ce

Browse files
committed
[ARM][ParallelDSP] Fix pointer operand reordering
While combining two loads into a single load, we often need to reorder the pointer operands for the new load. This reordering was broken in the cases where there was a chain of values that built up the pointer. Differential Revision: https://reviews.llvm.org/D65193 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366881 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 097fe86 commit 7c4e3ce

File tree

2 files changed

+86
-2
lines changed

2 files changed

+86
-2
lines changed

lib/Target/ARM/ARMParallelDSP.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -761,8 +761,8 @@ LoadInst* ARMParallelDSP::CreateWideLoad(SmallVectorImpl<LoadInst*> &Loads,
761761
return;
762762

763763
Source->moveBefore(Sink);
764-
for (auto &U : Source->uses())
765-
MoveBefore(Source, U.getUser());
764+
for (auto &Op : Source->operands())
765+
MoveBefore(Op, Source);
766766
};
767767

768768
// Insert the load at the point of the original dominating load.
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
; RUN: opt -mtriple=thumbv7-unknown-linux-android -arm-parallel-dsp -S %s -o - | FileCheck %s
2+
3+
; CHECK-LABEL: undef_no_return
4+
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %incdec.ptr21 to i32*
5+
; CHECK: [[LOAD_A:%[^ ]+]] = load i32, i32* [[CAST_A]], align 2
6+
; CHECK: %uglygep15 = getelementptr i8, i8* undef, i32 undef
7+
; CHECK: [[GEP8:%[^ ]+]] = getelementptr i8, i8* undef, i32 undef
8+
; CHECK: [[CAST_GEP8:%[^ ]+]] = bitcast i8* [[GEP8]] to i16*
9+
; CHECK: [[GEP16:%[^ ]+]] = getelementptr i16, i16* [[CAST_GEP8]], i32 6
10+
; CHECK: [[CAST_GEP16:%[^ ]+]] = bitcast i16* [[GEP16]] to i32*
11+
; CHECK: [[LOAD_UNDEF:%[^ ]+]] = load i32, i32* [[CAST_GEP16]], align 2
12+
; CHECK: call i32 @llvm.arm.smladx(i32 [[LOAD_A]], i32 [[LOAD_UNDEF]], i32 undef)
13+
define void @undef_no_return(i16* %a) {
14+
entry:
15+
%incdec.ptr21 = getelementptr inbounds i16, i16* %a, i32 3
16+
%incdec.ptr29 = getelementptr inbounds i16, i16* %a, i32 4
17+
br label %for.body
18+
19+
for.body:
20+
%0 = load i16, i16* %incdec.ptr21, align 2
21+
%conv25 = sext i16 %0 to i32
22+
%uglygep15 = getelementptr i8, i8* undef, i32 undef
23+
%uglygep1516 = bitcast i8* %uglygep15 to i16*
24+
%scevgep17 = getelementptr i16, i16* %uglygep1516, i32 7
25+
%1 = load i16, i16* %scevgep17, align 2
26+
%conv31 = sext i16 %1 to i32
27+
%2 = load i16, i16* %incdec.ptr29, align 2
28+
%conv33 = sext i16 %2 to i32
29+
%uglygep12 = getelementptr i8, i8* undef, i32 undef
30+
%uglygep1213 = bitcast i8* %uglygep12 to i16*
31+
%scevgep14 = getelementptr i16, i16* %uglygep1213, i32 6
32+
%3 = load i16, i16* %scevgep14, align 2
33+
%conv39 = sext i16 %3 to i32
34+
%mul.i287.neg.neg = mul nsw i32 %conv31, %conv25
35+
%mul.i283.neg.neg = mul nsw i32 %conv39, %conv33
36+
%reass.add408 = add i32 undef, %mul.i287.neg.neg
37+
%reass.add409 = add i32 %reass.add408, %mul.i283.neg.neg
38+
br label %for.body
39+
}
40+
41+
; CHECK-LABEL: return
42+
; CHECK: phi i32 [ %N, %entry ]
43+
; CHECK: [[ACC:%[^ ]+]] = phi i32 [ 0, %entry ], [ [[ACC_NEXT:%[^ ]+]], %for.body ]
44+
; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %incdec.ptr21 to i32*
45+
; CHECK: [[LOAD_A:%[^ ]+]] = load i32, i32* [[CAST_A]], align 2
46+
; CHECK: [[GEP8:%[^ ]+]] = getelementptr i8, i8* %b, i32 0
47+
; CHECK: [[CAST_GEP8:%[^ ]+]] = bitcast i8* [[GEP8]] to i16*
48+
; CHECK: [[GEP16:%[^ ]+]] = getelementptr i16, i16* [[CAST_GEP8]], i32 %iv
49+
; CHECK: [[CAST_GEP16:%[^ ]+]] = bitcast i16* [[GEP16]] to i32*
50+
; CHECK: [[LOAD_B:%[^ ]+]] = load i32, i32* [[CAST_GEP16]], align 2
51+
; CHECK: [[ACC_NEXT]] = call i32 @llvm.arm.smladx(i32 [[LOAD_A]], i32 [[LOAD_B]], i32 [[ACC]])
52+
define i32 @return(i16* %a, i8* %b, i32 %N) {
53+
entry:
54+
%incdec.ptr21 = getelementptr inbounds i16, i16* %a, i32 3
55+
%incdec.ptr29 = getelementptr inbounds i16, i16* %a, i32 4
56+
br label %for.body
57+
58+
for.body:
59+
%iv = phi i32 [ %N, %entry ], [ %iv.next, %for.body ]
60+
%acc = phi i32 [ 0, %entry ], [ %reass.add409, %for.body ]
61+
%0 = load i16, i16* %incdec.ptr21, align 2
62+
%conv25 = sext i16 %0 to i32
63+
%uglygep15 = getelementptr i8, i8* %b, i32 0
64+
%uglygep1516 = bitcast i8* %uglygep15 to i16*
65+
%b.idx = add nuw nsw i32 %iv, 1
66+
%scevgep17 = getelementptr i16, i16* %uglygep1516, i32 %b.idx
67+
%scevgep14 = getelementptr i16, i16* %uglygep1516, i32 %iv
68+
%1 = load i16, i16* %scevgep17, align 2
69+
%conv31 = sext i16 %1 to i32
70+
%2 = load i16, i16* %incdec.ptr29, align 2
71+
%conv33 = sext i16 %2 to i32
72+
%3 = load i16, i16* %scevgep14, align 2
73+
%conv39 = sext i16 %3 to i32
74+
%mul.i287.neg.neg = mul nsw i32 %conv31, %conv25
75+
%mul.i283.neg.neg = mul nsw i32 %conv39, %conv33
76+
%reass.add408 = add i32 %acc, %mul.i287.neg.neg
77+
%reass.add409 = add i32 %reass.add408, %mul.i283.neg.neg
78+
%iv.next = add nuw nsw i32 %iv, -1
79+
%cmp = icmp ne i32 %iv.next, 0
80+
br i1 %cmp, label %for.body, label %exit
81+
82+
exit:
83+
ret i32 %reass.add409
84+
}

0 commit comments

Comments
 (0)