Skip to content

Commit fbf4ac5

Browse files
committed
Address review comments and add complex version
1 parent 77fe860 commit fbf4ac5

File tree

2 files changed

+199
-109
lines changed

2 files changed

+199
-109
lines changed
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN-VW1-IL4
3+
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN-VW4-IL1
4+
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s --check-prefix=CHECK-REV-MIN-VW4-IL2
5+
6+
; This test case is extracted from rnflow (fortran) benchmark in polyhedron benchmark suite.
7+
; The function minlst primarily takes two indices (i.e. range), scans backwards in the range
8+
; and returns the firstIV of the minimum value.
9+
10+
define i32 @minlst(i32 %first_index, i32 %last_index, ptr %array) {
11+
; CHECK-REV-MIN-VW1-IL4-LABEL: define i32 @minlst(
12+
; CHECK-REV-MIN-VW1-IL4-SAME: i32 [[FIRST_INDEX:%.*]], i32 [[LAST_INDEX:%.*]], ptr [[ARRAY:%.*]]) {
13+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[ENTRY:.*]]:
14+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[FIRST_INDEX_SEXT:%.*]] = sext i32 [[FIRST_INDEX]] to i64
15+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LAST_INDEX_NEG:%.*]] = sub i32 0, [[LAST_INDEX]]
16+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LAST_INDEX_NEG_SEXT:%.*]] = sext i32 [[LAST_INDEX_NEG]] to i64
17+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[ADD:%.*]] = add nsw i64 [[FIRST_INDEX_SEXT]], [[LAST_INDEX_NEG_SEXT]]
18+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[DIFF:%.*]] = sub nsw i64 0, [[ADD]]
19+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[FIRST_PTR:%.*]] = getelementptr i8, ptr [[ARRAY]], i64 -8
20+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[SECOND_PTR:%.*]] = getelementptr i8, ptr [[ARRAY]], i64 -4
21+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[EARLY_EXIT_COND:%.*]] = icmp slt i64 [[ADD]], 0
22+
; CHECK-REV-MIN-VW1-IL4-NEXT: br i1 [[EARLY_EXIT_COND]], label %[[LOOP_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]]
23+
; CHECK-REV-MIN-VW1-IL4: [[LOOP_PREHEADER]]:
24+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LAST_INDEX_SEXT:%.*]] = sext i32 [[LAST_INDEX]] to i64
25+
; CHECK-REV-MIN-VW1-IL4-NEXT: br label %[[LOOP:.*]]
26+
; CHECK-REV-MIN-VW1-IL4: [[LOOP]]:
27+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[LAST_INDEX_SEXT]], %[[LOOP_PREHEADER]] ]
28+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[DIFF]], %[[LOOP_PREHEADER]] ]
29+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[INDEX:%.*]] = phi i32 [ [[SELECT:%.*]], %[[LOOP]] ], [ [[LAST_INDEX]], %[[LOOP_PREHEADER]] ]
30+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
31+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LOAD1_PTR:%.*]] = getelementptr float, ptr [[FIRST_PTR]], i64 [[IV]]
32+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LOAD1:%.*]] = load float, ptr [[LOAD1_PTR]], align 4
33+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[INDEX_SEXT:%.*]] = sext i32 [[INDEX]] to i64
34+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LOAD2_PTR:%.*]] = getelementptr float, ptr [[SECOND_PTR]], i64 [[INDEX_SEXT]]
35+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LOAD2:%.*]] = load float, ptr [[LOAD2_PTR]], align 4
36+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[CMP:%.*]] = fcmp contract olt float [[LOAD1]], [[LOAD2]]
37+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[IV_NEXT_TRUNC:%.*]] = trunc nsw i64 [[IV_NEXT]] to i32
38+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[SELECT]] = select i1 [[CMP]], i32 [[IV_NEXT_TRUNC]], i32 [[INDEX]]
39+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[DEC]] = add nsw i64 [[DEC_IV]], -1
40+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LOOP_COND:%.*]] = icmp sgt i64 [[DEC_IV]], 1
41+
; CHECK-REV-MIN-VW1-IL4-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]]
42+
; CHECK-REV-MIN-VW1-IL4: [[__CRIT_EDGE_LOOPEXIT:.*:]]
43+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ [[SELECT]], %[[LOOP]] ]
44+
; CHECK-REV-MIN-VW1-IL4-NEXT: br [[DOT_CRIT_EDGE]]
45+
; CHECK-REV-MIN-VW1-IL4: [[__CRIT_EDGE:.*:]]
46+
; CHECK-REV-MIN-VW1-IL4-NEXT: [[LAST_INDEX_RET:%.*]] = phi i32 [ [[LAST_INDEX]], %[[ENTRY]] ], [ [[SELECT_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ]
47+
; CHECK-REV-MIN-VW1-IL4-NEXT: ret i32 [[LAST_INDEX_RET]]
48+
;
49+
; CHECK-REV-MIN-VW4-IL1-LABEL: define i32 @minlst(
50+
; CHECK-REV-MIN-VW4-IL1-SAME: i32 [[FIRST_INDEX:%.*]], i32 [[LAST_INDEX:%.*]], ptr [[ARRAY:%.*]]) {
51+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[ENTRY:.*]]:
52+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[FIRST_INDEX_SEXT:%.*]] = sext i32 [[FIRST_INDEX]] to i64
53+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LAST_INDEX_NEG:%.*]] = sub i32 0, [[LAST_INDEX]]
54+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LAST_INDEX_NEG_SEXT:%.*]] = sext i32 [[LAST_INDEX_NEG]] to i64
55+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[ADD:%.*]] = add nsw i64 [[FIRST_INDEX_SEXT]], [[LAST_INDEX_NEG_SEXT]]
56+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[DIFF:%.*]] = sub nsw i64 0, [[ADD]]
57+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[FIRST_PTR:%.*]] = getelementptr i8, ptr [[ARRAY]], i64 -8
58+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[SECOND_PTR:%.*]] = getelementptr i8, ptr [[ARRAY]], i64 -4
59+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[EARLY_EXIT_COND:%.*]] = icmp slt i64 [[ADD]], 0
60+
; CHECK-REV-MIN-VW4-IL1-NEXT: br i1 [[EARLY_EXIT_COND]], label %[[LOOP_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]]
61+
; CHECK-REV-MIN-VW4-IL1: [[LOOP_PREHEADER]]:
62+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LAST_INDEX_SEXT:%.*]] = sext i32 [[LAST_INDEX]] to i64
63+
; CHECK-REV-MIN-VW4-IL1-NEXT: br label %[[LOOP:.*]]
64+
; CHECK-REV-MIN-VW4-IL1: [[LOOP]]:
65+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[LAST_INDEX_SEXT]], %[[LOOP_PREHEADER]] ]
66+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[DIFF]], %[[LOOP_PREHEADER]] ]
67+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[INDEX:%.*]] = phi i32 [ [[SELECT:%.*]], %[[LOOP]] ], [ [[LAST_INDEX]], %[[LOOP_PREHEADER]] ]
68+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
69+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LOAD1_PTR:%.*]] = getelementptr float, ptr [[FIRST_PTR]], i64 [[IV]]
70+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LOAD1:%.*]] = load float, ptr [[LOAD1_PTR]], align 4
71+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[INDEX_SEXT:%.*]] = sext i32 [[INDEX]] to i64
72+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LOAD2_PTR:%.*]] = getelementptr float, ptr [[SECOND_PTR]], i64 [[INDEX_SEXT]]
73+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LOAD2:%.*]] = load float, ptr [[LOAD2_PTR]], align 4
74+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[CMP:%.*]] = fcmp contract olt float [[LOAD1]], [[LOAD2]]
75+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[IV_NEXT_TRUNC:%.*]] = trunc nsw i64 [[IV_NEXT]] to i32
76+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[SELECT]] = select i1 [[CMP]], i32 [[IV_NEXT_TRUNC]], i32 [[INDEX]]
77+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[DEC]] = add nsw i64 [[DEC_IV]], -1
78+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LOOP_COND:%.*]] = icmp sgt i64 [[DEC_IV]], 1
79+
; CHECK-REV-MIN-VW4-IL1-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]]
80+
; CHECK-REV-MIN-VW4-IL1: [[__CRIT_EDGE_LOOPEXIT:.*:]]
81+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ [[SELECT]], %[[LOOP]] ]
82+
; CHECK-REV-MIN-VW4-IL1-NEXT: br [[DOT_CRIT_EDGE]]
83+
; CHECK-REV-MIN-VW4-IL1: [[__CRIT_EDGE:.*:]]
84+
; CHECK-REV-MIN-VW4-IL1-NEXT: [[LAST_INDEX_RET:%.*]] = phi i32 [ [[LAST_INDEX]], %[[ENTRY]] ], [ [[SELECT_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ]
85+
; CHECK-REV-MIN-VW4-IL1-NEXT: ret i32 [[LAST_INDEX_RET]]
86+
;
87+
; CHECK-REV-MIN-VW4-IL2-LABEL: define i32 @minlst(
88+
; CHECK-REV-MIN-VW4-IL2-SAME: i32 [[FIRST_INDEX:%.*]], i32 [[LAST_INDEX:%.*]], ptr [[ARRAY:%.*]]) {
89+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[ENTRY:.*]]:
90+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[FIRST_INDEX_SEXT:%.*]] = sext i32 [[FIRST_INDEX]] to i64
91+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LAST_INDEX_NEG:%.*]] = sub i32 0, [[LAST_INDEX]]
92+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LAST_INDEX_NEG_SEXT:%.*]] = sext i32 [[LAST_INDEX_NEG]] to i64
93+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[ADD:%.*]] = add nsw i64 [[FIRST_INDEX_SEXT]], [[LAST_INDEX_NEG_SEXT]]
94+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[DIFF:%.*]] = sub nsw i64 0, [[ADD]]
95+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[FIRST_PTR:%.*]] = getelementptr i8, ptr [[ARRAY]], i64 -8
96+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[SECOND_PTR:%.*]] = getelementptr i8, ptr [[ARRAY]], i64 -4
97+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[EARLY_EXIT_COND:%.*]] = icmp slt i64 [[ADD]], 0
98+
; CHECK-REV-MIN-VW4-IL2-NEXT: br i1 [[EARLY_EXIT_COND]], label %[[LOOP_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]]
99+
; CHECK-REV-MIN-VW4-IL2: [[LOOP_PREHEADER]]:
100+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LAST_INDEX_SEXT:%.*]] = sext i32 [[LAST_INDEX]] to i64
101+
; CHECK-REV-MIN-VW4-IL2-NEXT: br label %[[LOOP:.*]]
102+
; CHECK-REV-MIN-VW4-IL2: [[LOOP]]:
103+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[LAST_INDEX_SEXT]], %[[LOOP_PREHEADER]] ]
104+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[DIFF]], %[[LOOP_PREHEADER]] ]
105+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[INDEX:%.*]] = phi i32 [ [[SELECT:%.*]], %[[LOOP]] ], [ [[LAST_INDEX]], %[[LOOP_PREHEADER]] ]
106+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
107+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LOAD1_PTR:%.*]] = getelementptr float, ptr [[FIRST_PTR]], i64 [[IV]]
108+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LOAD1:%.*]] = load float, ptr [[LOAD1_PTR]], align 4
109+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[INDEX_SEXT:%.*]] = sext i32 [[INDEX]] to i64
110+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LOAD2_PTR:%.*]] = getelementptr float, ptr [[SECOND_PTR]], i64 [[INDEX_SEXT]]
111+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LOAD2:%.*]] = load float, ptr [[LOAD2_PTR]], align 4
112+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[CMP:%.*]] = fcmp contract olt float [[LOAD1]], [[LOAD2]]
113+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[IV_NEXT_TRUNC:%.*]] = trunc nsw i64 [[IV_NEXT]] to i32
114+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[SELECT]] = select i1 [[CMP]], i32 [[IV_NEXT_TRUNC]], i32 [[INDEX]]
115+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[DEC]] = add nsw i64 [[DEC_IV]], -1
116+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LOOP_COND:%.*]] = icmp sgt i64 [[DEC_IV]], 1
117+
; CHECK-REV-MIN-VW4-IL2-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]]
118+
; CHECK-REV-MIN-VW4-IL2: [[__CRIT_EDGE_LOOPEXIT:.*:]]
119+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ [[SELECT]], %[[LOOP]] ]
120+
; CHECK-REV-MIN-VW4-IL2-NEXT: br [[DOT_CRIT_EDGE]]
121+
; CHECK-REV-MIN-VW4-IL2: [[__CRIT_EDGE:.*:]]
122+
; CHECK-REV-MIN-VW4-IL2-NEXT: [[LAST_INDEX_RET:%.*]] = phi i32 [ [[LAST_INDEX]], %[[ENTRY]] ], [ [[SELECT_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ]
123+
; CHECK-REV-MIN-VW4-IL2-NEXT: ret i32 [[LAST_INDEX_RET]]
124+
;
125+
entry:
126+
%first_index_sext = sext i32 %first_index to i64
127+
%last_index_neg = sub i32 0, %last_index
128+
%last_index_neg_sext = sext i32 %last_index_neg to i64
129+
%add = add nsw i64 %first_index_sext, %last_index_neg_sext
130+
%diff = sub nsw i64 0, %add
131+
%first_ptr = getelementptr i8, ptr %array, i64 -8
132+
%second_ptr = getelementptr i8, ptr %array, i64 -4
133+
%early_exit_cond = icmp slt i64 %add, 0
134+
br i1 %early_exit_cond, label %loop.preheader, label %._crit_edge
135+
136+
loop.preheader: ; preds = %entry
137+
%last_index_sext = sext i32 %last_index to i64
138+
br label %loop
139+
140+
loop: ; preds = %loop.preheader, %loop
141+
%iv = phi i64 [%iv.next, %loop], [ %last_index_sext, %loop.preheader ]
142+
%dec_iv = phi i64 [ %dec, %loop ], [ %diff, %loop.preheader ]
143+
%index = phi i32 [ %select, %loop ], [ %last_index, %loop.preheader ]
144+
%iv.next = add nsw i64 %iv, -1
145+
%load1_ptr = getelementptr float, ptr %first_ptr, i64 %iv
146+
%load1 = load float, ptr %load1_ptr, align 4
147+
%index_sext = sext i32 %index to i64
148+
%load2_ptr = getelementptr float, ptr %second_ptr, i64 %index_sext
149+
%load2 = load float, ptr %load2_ptr, align 4
150+
%cmp = fcmp contract olt float %load1, %load2
151+
%iv.next.trunc = trunc nsw i64 %iv.next to i32
152+
%select = select i1 %cmp, i32 %iv.next.trunc, i32 %index
153+
%dec = add nsw i64 %dec_iv, -1
154+
%loop_cond = icmp sgt i64 %dec_iv, 1
155+
br i1 %loop_cond, label %loop, label %._crit_edge
156+
157+
._crit_edge: ; preds = %loop, %entry
158+
%last_index_ret = phi i32 [ %select, %loop ], [ %last_index, %entry ]
159+
ret i32 %last_index_ret
160+
}

0 commit comments

Comments
 (0)