Skip to content

Commit 21d27b3

Browse files
committed
[LoopUnroll] Add tests for loop unrolling on Apple platforms.
Add first set of tests where runtime unrolling can be highly beneficial on Apple Silicon CPUs.
1 parent 637a1ae commit 21d27b3

File tree

1 file changed

+283
-0
lines changed

1 file changed

+283
-0
lines changed
Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -p loop-unroll -mcpu=apple-m1 -S %s | FileCheck --check-prefix=APPLE %s
3+
; RUN: opt -p loop-unroll -mcpu=apple-m2 -S %s | FileCheck --check-prefix=APPLE %s
4+
; RUN: opt -p loop-unroll -mcpu=apple-m3 -S %s | FileCheck --check-prefix=APPLE %s
5+
; RUN: opt -p loop-unroll -mcpu=apple-m4 -S %s | FileCheck --check-prefix=APPLE %s
6+
; RUN: opt -p loop-unroll -mcpu=cortex-a57 -S %s | FileCheck --check-prefix=OTHER %s
7+
8+
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
9+
target triple = "arm64-apple-macosx15.0.0"
10+
11+
define void @small_load_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale) {
12+
; APPLE-LABEL: define void @small_load_store_loop(
13+
; APPLE-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]]) #[[ATTR0:[0-9]+]] {
14+
; APPLE-NEXT: [[ENTRY:.*]]:
15+
; APPLE-NEXT: br label %[[LOOP:.*]]
16+
; APPLE: [[LOOP]]:
17+
; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP]] ]
18+
; APPLE-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_EPIL]], [[SCALE]]
19+
; APPLE-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]]
20+
; APPLE-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4
21+
; APPLE-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_EPIL]]
22+
; APPLE-NEXT: store float [[L_EPIL]], ptr [[GEP_DST_EPIL]], align 4
23+
; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1
24+
; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]]
25+
; APPLE-NEXT: br i1 [[EC_EPIL]], label %[[EXIT:.*]], label %[[LOOP]]
26+
; APPLE: [[EXIT]]:
27+
; APPLE-NEXT: ret void
28+
;
29+
; OTHER-LABEL: define void @small_load_store_loop(
30+
; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]]) #[[ATTR0:[0-9]+]] {
31+
; OTHER-NEXT: [[ENTRY:.*]]:
32+
; OTHER-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
33+
; OTHER-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
34+
; OTHER-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
35+
; OTHER-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
36+
; OTHER: [[ENTRY_NEW]]:
37+
; OTHER-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
38+
; OTHER-NEXT: br label %[[LOOP:.*]]
39+
; OTHER: [[LOOP]]:
40+
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
41+
; OTHER-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
42+
; OTHER-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
43+
; OTHER-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
44+
; OTHER-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
45+
; OTHER-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
46+
; OTHER-NEXT: store float [[L]], ptr [[GEP_DST]], align 4
47+
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
48+
; OTHER-NEXT: [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT]], [[SCALE]]
49+
; OTHER-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_1]]
50+
; OTHER-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
51+
; OTHER-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT]]
52+
; OTHER-NEXT: store float [[L_1]], ptr [[GEP_DST_1]], align 4
53+
; OTHER-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
54+
; OTHER-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
55+
; OTHER-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
56+
; OTHER-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
57+
; OTHER: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
58+
; OTHER-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
59+
; OTHER-NEXT: br label %[[EXIT_UNR_LCSSA]]
60+
; OTHER: [[EXIT_UNR_LCSSA]]:
61+
; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
62+
; OTHER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
63+
; OTHER-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
64+
; OTHER: [[LOOP_EPIL_PREHEADER]]:
65+
; OTHER-NEXT: br label %[[LOOP_EPIL:.*]]
66+
; OTHER: [[LOOP_EPIL]]:
67+
; OTHER-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_UNR]], [[SCALE]]
68+
; OTHER-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]]
69+
; OTHER-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4
70+
; OTHER-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_UNR]]
71+
; OTHER-NEXT: store float [[L_EPIL]], ptr [[GEP_DST_EPIL]], align 4
72+
; OTHER-NEXT: br label %[[EXIT]]
73+
; OTHER: [[EXIT]]:
74+
; OTHER-NEXT: ret void
75+
;
76+
entry:
77+
br label %loop
78+
79+
loop:
80+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
81+
%scaled.iv = mul nuw nsw i64 %iv, %scale
82+
%gep.src = getelementptr inbounds float, ptr %src, i64 %scaled.iv
83+
%l = load float, ptr %gep.src, align 4
84+
%gep.dst = getelementptr inbounds float, ptr %dst, i64 %iv
85+
store float %l, ptr %gep.dst, align 4
86+
%iv.next = add nuw nsw i64 %iv, 1
87+
%ec = icmp eq i64 %iv.next, %N
88+
br i1 %ec, label %exit, label %loop
89+
90+
exit:
91+
ret void
92+
}
93+
94+
@A = external constant [9 x i8], align 1
95+
@B = external constant [8 x i32], align 4
96+
@C = external constant [8 x i32], align 4
97+
98+
define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x, i32 %width, i32 %t.1, i32 %t.2) {
99+
; APPLE-LABEL: define void @early_continue_dep_on_load_large(
100+
; APPLE-SAME: ptr [[P_1:%.*]], ptr [[P_2:%.*]], i64 [[N:%.*]], i32 [[X:%.*]], i32 [[WIDTH:%.*]], i32 [[T_1:%.*]], i32 [[T_2:%.*]]) #[[ATTR0]] {
101+
; APPLE-NEXT: [[ENTRY:.*]]:
102+
; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
103+
; APPLE: [[LOOP_HEADER]]:
104+
; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_LATCH:.*]] ]
105+
; APPLE-NEXT: [[GEP_EPIL:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV_EPIL]]
106+
; APPLE-NEXT: [[L_1_EPIL:%.*]] = load i32, ptr [[GEP_EPIL]], align 4
107+
; APPLE-NEXT: [[CMP6_NOT_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[T_1]]
108+
; APPLE-NEXT: br i1 [[CMP6_NOT_EPIL]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
109+
; APPLE: [[THEN]]:
110+
; APPLE-NEXT: [[GEP_4_EPIL:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_EPIL]], i64 4
111+
; APPLE-NEXT: [[L_2_EPIL:%.*]] = load i8, ptr [[GEP_4_EPIL]], align 4
112+
; APPLE-NEXT: [[OR_COND_EPIL:%.*]] = icmp ugt i8 [[L_2_EPIL]], 7
113+
; APPLE-NEXT: br i1 [[OR_COND_EPIL]], label %[[MERGE:.*]], label %[[ELSE:.*]]
114+
; APPLE: [[ELSE]]:
115+
; APPLE-NEXT: [[CONV_I_EPIL:%.*]] = zext nneg i8 [[L_2_EPIL]] to i64
116+
; APPLE-NEXT: [[ARRAYIDX_I_EPIL:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I_EPIL]]
117+
; APPLE-NEXT: [[TMP27:%.*]] = load i8, ptr [[ARRAYIDX_I_EPIL]], align 1
118+
; APPLE-NEXT: [[IDXPROM_I_EPIL:%.*]] = sext i8 [[TMP27]] to i64
119+
; APPLE-NEXT: [[ARRAYIDX_I37_EPIL:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I_EPIL]]
120+
; APPLE-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX_I37_EPIL]], align 4
121+
; APPLE-NEXT: [[ARRAYIDX_I42_EPIL:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I_EPIL]]
122+
; APPLE-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX_I42_EPIL]], align 4
123+
; APPLE-NEXT: br label %[[MERGE]]
124+
; APPLE: [[MERGE]]:
125+
; APPLE-NEXT: [[RETVAL_0_I3851_EPIL:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[TMP28]], %[[ELSE]] ]
126+
; APPLE-NEXT: [[RETVAL_0_I43_EPIL:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[TMP29]], %[[ELSE]] ]
127+
; APPLE-NEXT: [[ADD14_EPIL:%.*]] = add nsw i32 [[RETVAL_0_I43_EPIL]], [[X]]
128+
; APPLE-NEXT: [[MUL15_EPIL:%.*]] = mul nsw i32 [[ADD14_EPIL]], [[WIDTH]]
129+
; APPLE-NEXT: [[TMP30:%.*]] = trunc nuw nsw i64 [[IV_EPIL]] to i32
130+
; APPLE-NEXT: [[ADD16_EPIL:%.*]] = add nsw i32 [[RETVAL_0_I3851_EPIL]], [[TMP30]]
131+
; APPLE-NEXT: [[ADD17_EPIL:%.*]] = add nsw i32 [[ADD16_EPIL]], [[MUL15_EPIL]]
132+
; APPLE-NEXT: [[IDXPROM18_EPIL:%.*]] = sext i32 [[ADD17_EPIL]] to i64
133+
; APPLE-NEXT: [[ARRAYIDX19_EPIL:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18_EPIL]]
134+
; APPLE-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX19_EPIL]], align 4
135+
; APPLE-NEXT: [[SUB_EPIL:%.*]] = sub nsw i32 [[X]], [[RETVAL_0_I43_EPIL]]
136+
; APPLE-NEXT: [[MUL21_EPIL:%.*]] = mul nsw i32 [[SUB_EPIL]], [[WIDTH]]
137+
; APPLE-NEXT: [[SUB22_EPIL:%.*]] = sub i32 [[TMP30]], [[RETVAL_0_I3851_EPIL]]
138+
; APPLE-NEXT: [[ADD23_EPIL:%.*]] = add nsw i32 [[SUB22_EPIL]], [[MUL21_EPIL]]
139+
; APPLE-NEXT: [[IDXPROM24_EPIL:%.*]] = sext i32 [[ADD23_EPIL]] to i64
140+
; APPLE-NEXT: [[ARRAYIDX25_EPIL:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24_EPIL]]
141+
; APPLE-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX25_EPIL]], align 4
142+
; APPLE-NEXT: [[CMP27_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[TMP31]]
143+
; APPLE-NEXT: [[CMP28_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[TMP32]]
144+
; APPLE-NEXT: [[AND34_EPIL:%.*]] = and i1 [[CMP27_EPIL]], [[CMP28_EPIL]]
145+
; APPLE-NEXT: br i1 [[AND34_EPIL]], label %[[STORE_RES:.*]], label %[[LOOP_LATCH]]
146+
; APPLE: [[STORE_RES]]:
147+
; APPLE-NEXT: [[CMP32_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[T_2]]
148+
; APPLE-NEXT: [[GEP_5_EPIL:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_EPIL]], i64 5
149+
; APPLE-NEXT: [[RES_EPIL:%.*]] = select i1 [[CMP32_EPIL]], i8 1, i8 2
150+
; APPLE-NEXT: store i8 [[RES_EPIL]], ptr [[GEP_5_EPIL]], align 1
151+
; APPLE-NEXT: br label %[[LOOP_LATCH]]
152+
; APPLE: [[LOOP_LATCH]]:
153+
; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1
154+
; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]]
155+
; APPLE-NEXT: br i1 [[EC_EPIL]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
156+
; APPLE: [[EXIT]]:
157+
; APPLE-NEXT: ret void
158+
;
159+
; OTHER-LABEL: define void @early_continue_dep_on_load_large(
160+
; OTHER-SAME: ptr [[P_1:%.*]], ptr [[P_2:%.*]], i64 [[N:%.*]], i32 [[X:%.*]], i32 [[WIDTH:%.*]], i32 [[T_1:%.*]], i32 [[T_2:%.*]]) #[[ATTR0]] {
161+
; OTHER-NEXT: [[ENTRY:.*]]:
162+
; OTHER-NEXT: br label %[[LOOP_HEADER:.*]]
163+
; OTHER: [[LOOP_HEADER]]:
164+
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
165+
; OTHER-NEXT: [[GEP:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV]]
166+
; OTHER-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP]], align 4
167+
; OTHER-NEXT: [[CMP6_NOT:%.*]] = icmp sgt i32 [[L_1]], [[T_1]]
168+
; OTHER-NEXT: br i1 [[CMP6_NOT]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
169+
; OTHER: [[THEN]]:
170+
; OTHER-NEXT: [[GEP_4:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 4
171+
; OTHER-NEXT: [[L_2:%.*]] = load i8, ptr [[GEP_4]], align 4
172+
; OTHER-NEXT: [[OR_COND:%.*]] = icmp ugt i8 [[L_2]], 7
173+
; OTHER-NEXT: br i1 [[OR_COND]], label %[[MERGE:.*]], label %[[ELSE:.*]]
174+
; OTHER: [[ELSE]]:
175+
; OTHER-NEXT: [[CONV_I:%.*]] = zext nneg i8 [[L_2]] to i64
176+
; OTHER-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I]]
177+
; OTHER-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I]], align 1
178+
; OTHER-NEXT: [[IDXPROM_I:%.*]] = sext i8 [[TMP0]] to i64
179+
; OTHER-NEXT: [[ARRAYIDX_I37:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I]]
180+
; OTHER-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_I37]], align 4
181+
; OTHER-NEXT: [[ARRAYIDX_I42:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I]]
182+
; OTHER-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_I42]], align 4
183+
; OTHER-NEXT: br label %[[MERGE]]
184+
; OTHER: [[MERGE]]:
185+
; OTHER-NEXT: [[RETVAL_0_I3851:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[TMP1]], %[[ELSE]] ]
186+
; OTHER-NEXT: [[RETVAL_0_I43:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ]
187+
; OTHER-NEXT: [[ADD14:%.*]] = add nsw i32 [[RETVAL_0_I43]], [[X]]
188+
; OTHER-NEXT: [[MUL15:%.*]] = mul nsw i32 [[ADD14]], [[WIDTH]]
189+
; OTHER-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[IV]] to i32
190+
; OTHER-NEXT: [[ADD16:%.*]] = add nsw i32 [[RETVAL_0_I3851]], [[TMP3]]
191+
; OTHER-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD16]], [[MUL15]]
192+
; OTHER-NEXT: [[IDXPROM18:%.*]] = sext i32 [[ADD17]] to i64
193+
; OTHER-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18]]
194+
; OTHER-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4
195+
; OTHER-NEXT: [[SUB:%.*]] = sub nsw i32 [[X]], [[RETVAL_0_I43]]
196+
; OTHER-NEXT: [[MUL21:%.*]] = mul nsw i32 [[SUB]], [[WIDTH]]
197+
; OTHER-NEXT: [[SUB22:%.*]] = sub i32 [[TMP3]], [[RETVAL_0_I3851]]
198+
; OTHER-NEXT: [[ADD23:%.*]] = add nsw i32 [[SUB22]], [[MUL21]]
199+
; OTHER-NEXT: [[IDXPROM24:%.*]] = sext i32 [[ADD23]] to i64
200+
; OTHER-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24]]
201+
; OTHER-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX25]], align 4
202+
; OTHER-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[L_1]], [[TMP4]]
203+
; OTHER-NEXT: [[CMP28:%.*]] = icmp sgt i32 [[L_1]], [[TMP5]]
204+
; OTHER-NEXT: [[AND34:%.*]] = and i1 [[CMP27]], [[CMP28]]
205+
; OTHER-NEXT: br i1 [[AND34]], label %[[STORE_RES:.*]], label %[[LOOP_LATCH]]
206+
; OTHER: [[STORE_RES]]:
207+
; OTHER-NEXT: [[CMP32:%.*]] = icmp sgt i32 [[L_1]], [[T_2]]
208+
; OTHER-NEXT: [[GEP_5:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 5
209+
; OTHER-NEXT: [[RES:%.*]] = select i1 [[CMP32]], i8 1, i8 2
210+
; OTHER-NEXT: store i8 [[RES]], ptr [[GEP_5]], align 1
211+
; OTHER-NEXT: br label %[[LOOP_LATCH]]
212+
; OTHER: [[LOOP_LATCH]]:
213+
; OTHER-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
214+
; OTHER-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
215+
; OTHER-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
216+
; OTHER: [[EXIT]]:
217+
; OTHER-NEXT: ret void
218+
;
219+
entry:
220+
br label %loop.header
221+
222+
loop.header:
223+
%iv = phi i64 [ 1, %entry ], [ %iv.next, %loop.latch ]
224+
%gep = getelementptr { i32, i8, i8, [2 x i8] }, ptr %p.1, i64 %iv
225+
%l.1 = load i32, ptr %gep, align 4
226+
%c.1 = icmp sgt i32 %l.1, %t.1
227+
br i1 %c.1, label %then, label %loop.latch
228+
229+
then:
230+
%gep.4 = getelementptr inbounds nuw i8, ptr %gep, i64 4
231+
%l.2 = load i8, ptr %gep.4, align 4
232+
%c.2 = icmp ugt i8 %l.2, 7
233+
br i1 %c.2, label %merge, label %else
234+
235+
else:
236+
%conv.i = zext nneg i8 %l.2 to i64
237+
%gep.A = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 %conv.i
238+
%l.3 = load i8, ptr %gep.A, align 1
239+
%idxprom.i = sext i8 %l.3 to i64
240+
%gep.B = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 %idxprom.i
241+
%l.4 = load i32, ptr %gep.B, align 4
242+
%gep.C = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 %idxprom.i
243+
%l.5 = load i32, ptr %gep.C, align 4
244+
br label %merge
245+
246+
merge:
247+
%merge.1 = phi i32 [ 0, %then ], [ %l.4, %else ]
248+
%merge.2 = phi i32 [ 0, %then ], [ %l.5, %else ]
249+
%add14 = add nsw i32 %merge.2, %x
250+
%mul15 = mul nsw i32 %add14, %width
251+
%12 = trunc nuw nsw i64 %iv to i32
252+
%add16 = add nsw i32 %merge.1, %12
253+
%add17 = add nsw i32 %add16, %mul15
254+
%idxprom18 = sext i32 %add17 to i64
255+
%gep.p.2 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %p.2, i64 %idxprom18
256+
%l.6 = load i32, ptr %gep.p.2, align 4
257+
%sub = sub nsw i32 %x, %merge.2
258+
%mul21 = mul nsw i32 %sub, %width
259+
%sub22 = sub i32 %12, %merge.1
260+
%add23 = add nsw i32 %sub22, %mul21
261+
%idxprom24 = sext i32 %add23 to i64
262+
%gep.p2.1 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %p.2, i64 %idxprom24
263+
%l.7 = load i32, ptr %gep.p2.1, align 4
264+
%c.3 = icmp sgt i32 %l.1, %l.6
265+
%c.4 = icmp sgt i32 %l.1, %l.7
266+
%and34 = and i1 %c.3, %c.4
267+
br i1 %and34, label %store.res, label %loop.latch
268+
269+
store.res:
270+
%c.5 = icmp sgt i32 %l.1, %t.2
271+
%gep.5 = getelementptr inbounds nuw i8, ptr %gep, i64 5
272+
%res = select i1 %c.5, i8 1, i8 2
273+
store i8 %res, ptr %gep.5, align 1
274+
br label %loop.latch
275+
276+
loop.latch:
277+
%iv.next = add nuw nsw i64 %iv, 1
278+
%ec = icmp eq i64 %iv.next, %N
279+
br i1 %ec, label %exit, label %loop.header
280+
281+
exit:
282+
ret void
283+
}

0 commit comments

Comments
 (0)