Skip to content

Commit aaa16eb

Browse files
committed
[LV][AArch64] Add test for scalar interleaving with predication. NFC
1 parent be7c865 commit aaa16eb

File tree

1 file changed

+248
-0
lines changed

1 file changed

+248
-0
lines changed
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -loop-vectorize -S -o - < %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5+
target triple = "aarch64-arm-none-eabi"
6+
7+
; This test is not vectorized on AArch64 due to requiring predicated loads.
8+
; It should also not be interleaved as the predicated interleaving will just
9+
; create less efficient code.
10+
11+
define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noundef %srcALen, half* nocapture noundef readonly %pSrcB, i32 noundef %srcBLen, half* nocapture noundef writeonly %pDst) {
12+
; CHECK-LABEL: @arm_correlate_f16(
13+
; CHECK-NEXT: entry:
14+
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[SRCBLEN:%.*]], -1
15+
; CHECK-NEXT: [[IDX_EXT:%.*]] = zext i32 [[SUB]] to i64
16+
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds half, half* [[PSRCB:%.*]], i64 [[IDX_EXT]]
17+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SRCALEN:%.*]], -2
18+
; CHECK-NEXT: [[SUB1:%.*]] = add i32 [[ADD]], [[SRCBLEN]]
19+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SRCALEN]], [[SRCBLEN]]
20+
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
21+
; CHECK: if.then:
22+
; CHECK-NEXT: [[SUB2:%.*]] = sub i32 [[SRCALEN]], [[SRCBLEN]]
23+
; CHECK-NEXT: [[IDX_EXT3:%.*]] = zext i32 [[SUB2]] to i64
24+
; CHECK-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds half, half* [[PDST:%.*]], i64 [[IDX_EXT3]]
25+
; CHECK-NEXT: br label [[IF_END12:%.*]]
26+
; CHECK: if.else:
27+
; CHECK-NEXT: [[CMP5:%.*]] = icmp ult i32 [[SRCALEN]], [[SRCBLEN]]
28+
; CHECK-NEXT: br i1 [[CMP5]], label [[IF_THEN6:%.*]], label [[IF_END12]]
29+
; CHECK: if.then6:
30+
; CHECK-NEXT: [[SUB7:%.*]] = add i32 [[SRCALEN]], -1
31+
; CHECK-NEXT: [[IDX_EXT8:%.*]] = zext i32 [[SUB7]] to i64
32+
; CHECK-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds half, half* [[PSRCA:%.*]], i64 [[IDX_EXT8]]
33+
; CHECK-NEXT: [[IDX_EXT10:%.*]] = zext i32 [[SUB1]] to i64
34+
; CHECK-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds half, half* [[PDST]], i64 [[IDX_EXT10]]
35+
; CHECK-NEXT: br label [[IF_END12]]
36+
; CHECK: if.end12:
37+
; CHECK-NEXT: [[SRCALEN_ADDR_0:%.*]] = phi i32 [ [[SRCALEN]], [[IF_THEN]] ], [ [[SRCBLEN]], [[IF_THEN6]] ], [ [[SRCALEN]], [[IF_ELSE]] ]
38+
; CHECK-NEXT: [[SRCBLEN_ADDR_0:%.*]] = phi i32 [ [[SRCBLEN]], [[IF_THEN]] ], [ [[SRCALEN]], [[IF_THEN6]] ], [ [[SRCBLEN]], [[IF_ELSE]] ]
39+
; CHECK-NEXT: [[PDST_ADDR_0:%.*]] = phi half* [ [[ADD_PTR4]], [[IF_THEN]] ], [ [[ADD_PTR11]], [[IF_THEN6]] ], [ [[PDST]], [[IF_ELSE]] ]
40+
; CHECK-NEXT: [[PIN1_0:%.*]] = phi half* [ [[PSRCA]], [[IF_THEN]] ], [ [[PSRCB]], [[IF_THEN6]] ], [ [[PSRCA]], [[IF_ELSE]] ]
41+
; CHECK-NEXT: [[PIN2_0:%.*]] = phi half* [ [[ADD_PTR]], [[IF_THEN]] ], [ [[ADD_PTR9]], [[IF_THEN6]] ], [ [[ADD_PTR]], [[IF_ELSE]] ]
42+
; CHECK-NEXT: [[CMP27:%.*]] = phi i64 [ 1, [[IF_THEN]] ], [ -1, [[IF_THEN6]] ], [ 1, [[IF_ELSE]] ]
43+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SRCBLEN]], [[SRCALEN]]
44+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], -1
45+
; CHECK-NEXT: br label [[FOR_COND14_PREHEADER:%.*]]
46+
; CHECK: for.cond14.preheader:
47+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 1, [[IF_END12]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_END:%.*]] ]
48+
; CHECK-NEXT: [[I_077:%.*]] = phi i32 [ 0, [[IF_END12]] ], [ [[INC33:%.*]], [[FOR_END]] ]
49+
; CHECK-NEXT: [[PDST_ADDR_176:%.*]] = phi half* [ [[PDST_ADDR_0]], [[IF_END12]] ], [ [[PDST_ADDR_2:%.*]], [[FOR_END]] ]
50+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[INDVARS_IV]], 2
51+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
52+
; CHECK: vector.scevcheck:
53+
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[I_077]])
54+
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
55+
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
56+
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[I_077]], [[MUL_RESULT]]
57+
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], [[I_077]]
58+
; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
59+
; CHECK-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
60+
; CHECK: vector.ph:
61+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[INDVARS_IV]], 2
62+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[INDVARS_IV]], [[N_MOD_VF]]
63+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
64+
; CHECK: vector.body:
65+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE9:%.*]] ]
66+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi half [ 0xH0000, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_LOAD_CONTINUE9]] ]
67+
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi half [ 0xH0000, [[VECTOR_PH]] ], [ [[PREDPHI10:%.*]], [[PRED_LOAD_CONTINUE9]] ]
68+
; CHECK-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
69+
; CHECK-NEXT: [[INDUCTION2:%.*]] = add i32 [[INDEX]], 1
70+
; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[I_077]], [[INDUCTION]]
71+
; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[I_077]], [[INDUCTION2]]
72+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP5]], [[SRCBLEN_ADDR_0]]
73+
; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP6]], [[SRCBLEN_ADDR_0]]
74+
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i32 [[INDUCTION]], [[SRCALEN_ADDR_0]]
75+
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i32 [[INDUCTION2]], [[SRCALEN_ADDR_0]]
76+
; CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP7]]
77+
; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP8]]
78+
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
79+
; CHECK: pred.load.if:
80+
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[INDUCTION]] to i64
81+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds half, half* [[PIN1_0]], i64 [[TMP13]]
82+
; CHECK-NEXT: [[TMP15:%.*]] = load half, half* [[TMP14]], align 2
83+
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
84+
; CHECK: pred.load.continue:
85+
; CHECK-NEXT: [[TMP16:%.*]] = phi half [ poison, [[VECTOR_BODY]] ], [ [[TMP15]], [[PRED_LOAD_IF]] ]
86+
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5:%.*]]
87+
; CHECK: pred.load.if4:
88+
; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[INDUCTION2]] to i64
89+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds half, half* [[PIN1_0]], i64 [[TMP17]]
90+
; CHECK-NEXT: [[TMP19:%.*]] = load half, half* [[TMP18]], align 2
91+
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE5]]
92+
; CHECK: pred.load.continue5:
93+
; CHECK-NEXT: [[TMP20:%.*]] = phi half [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP19]], [[PRED_LOAD_IF4]] ]
94+
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7:%.*]]
95+
; CHECK: pred.load.if6:
96+
; CHECK-NEXT: [[TMP21:%.*]] = sub nsw i32 0, [[TMP5]]
97+
; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[TMP21]] to i64
98+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds half, half* [[PIN2_0]], i64 [[TMP22]]
99+
; CHECK-NEXT: [[TMP24:%.*]] = load half, half* [[TMP23]], align 2
100+
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE7]]
101+
; CHECK: pred.load.continue7:
102+
; CHECK-NEXT: [[TMP25:%.*]] = phi half [ poison, [[PRED_LOAD_CONTINUE5]] ], [ [[TMP24]], [[PRED_LOAD_IF6]] ]
103+
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF8:%.*]], label [[PRED_LOAD_CONTINUE9]]
104+
; CHECK: pred.load.if8:
105+
; CHECK-NEXT: [[TMP26:%.*]] = sub nsw i32 0, [[TMP6]]
106+
; CHECK-NEXT: [[TMP27:%.*]] = sext i32 [[TMP26]] to i64
107+
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds half, half* [[PIN2_0]], i64 [[TMP27]]
108+
; CHECK-NEXT: [[TMP29:%.*]] = load half, half* [[TMP28]], align 2
109+
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE9]]
110+
; CHECK: pred.load.continue9:
111+
; CHECK-NEXT: [[TMP30:%.*]] = phi half [ poison, [[PRED_LOAD_CONTINUE7]] ], [ [[TMP29]], [[PRED_LOAD_IF8]] ]
112+
; CHECK-NEXT: [[TMP31:%.*]] = fmul fast half [[TMP25]], [[TMP16]]
113+
; CHECK-NEXT: [[TMP32:%.*]] = fmul fast half [[TMP30]], [[TMP20]]
114+
; CHECK-NEXT: [[TMP33:%.*]] = fadd fast half [[TMP31]], [[VEC_PHI]]
115+
; CHECK-NEXT: [[TMP34:%.*]] = fadd fast half [[TMP32]], [[VEC_PHI3]]
116+
; CHECK-NEXT: [[TMP35:%.*]] = xor i1 [[TMP11]], true
117+
; CHECK-NEXT: [[TMP36:%.*]] = xor i1 [[TMP12]], true
118+
; CHECK-NEXT: [[PREDPHI]] = select i1 [[TMP35]], half [[VEC_PHI]], half [[TMP33]]
119+
; CHECK-NEXT: [[PREDPHI10]] = select i1 [[TMP36]], half [[VEC_PHI3]], half [[TMP34]]
120+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
121+
; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
122+
; CHECK-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
123+
; CHECK: middle.block:
124+
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast half [[PREDPHI10]], [[PREDPHI]]
125+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[INDVARS_IV]], [[N_VEC]]
126+
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[SCALAR_PH]]
127+
; CHECK: scalar.ph:
128+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_COND14_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
129+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi half [ 0xH0000, [[VECTOR_SCEVCHECK]] ], [ 0xH0000, [[FOR_COND14_PREHEADER]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
130+
; CHECK-NEXT: br label [[FOR_BODY16:%.*]]
131+
; CHECK: for.body16:
132+
; CHECK-NEXT: [[J_074:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
133+
; CHECK-NEXT: [[SUM_073:%.*]] = phi half [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_1:%.*]], [[FOR_INC]] ]
134+
; CHECK-NEXT: [[SUB17:%.*]] = sub i32 [[I_077]], [[J_074]]
135+
; CHECK-NEXT: [[CMP18:%.*]] = icmp ult i32 [[SUB17]], [[SRCBLEN_ADDR_0]]
136+
; CHECK-NEXT: [[CMP19:%.*]] = icmp ult i32 [[J_074]], [[SRCALEN_ADDR_0]]
137+
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP19]], [[CMP18]]
138+
; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_THEN20:%.*]], label [[FOR_INC]]
139+
; CHECK: if.then20:
140+
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[J_074]] to i64
141+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, half* [[PIN1_0]], i64 [[IDXPROM]]
142+
; CHECK-NEXT: [[TMP38:%.*]] = load half, half* [[ARRAYIDX]], align 2
143+
; CHECK-NEXT: [[SUB22:%.*]] = sub nsw i32 0, [[SUB17]]
144+
; CHECK-NEXT: [[IDXPROM23:%.*]] = sext i32 [[SUB22]] to i64
145+
; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds half, half* [[PIN2_0]], i64 [[IDXPROM23]]
146+
; CHECK-NEXT: [[TMP39:%.*]] = load half, half* [[ARRAYIDX24]], align 2
147+
; CHECK-NEXT: [[MUL:%.*]] = fmul fast half [[TMP39]], [[TMP38]]
148+
; CHECK-NEXT: [[ADD25:%.*]] = fadd fast half [[MUL]], [[SUM_073]]
149+
; CHECK-NEXT: br label [[FOR_INC]]
150+
; CHECK: for.inc:
151+
; CHECK-NEXT: [[SUM_1]] = phi half [ [[ADD25]], [[IF_THEN20]] ], [ [[SUM_073]], [[FOR_BODY16]] ]
152+
; CHECK-NEXT: [[INC]] = add nuw i32 [[J_074]], 1
153+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[INDVARS_IV]]
154+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY16]], !llvm.loop [[LOOP2:![0-9]+]]
155+
; CHECK: for.end:
156+
; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi half [ [[SUM_1]], [[FOR_INC]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
157+
; CHECK-NEXT: [[PDST_ADDR_2]] = getelementptr inbounds half, half* [[PDST_ADDR_176]], i64 [[CMP27]]
158+
; CHECK-NEXT: store half [[SUM_1_LCSSA]], half* [[PDST_ADDR_176]], align 2
159+
; CHECK-NEXT: [[INC33]] = add nuw i32 [[I_077]], 1
160+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1
161+
; CHECK-NEXT: [[EXITCOND78_NOT:%.*]] = icmp eq i32 [[INC33]], [[TMP1]]
162+
; CHECK-NEXT: br i1 [[EXITCOND78_NOT]], label [[FOR_END34:%.*]], label [[FOR_COND14_PREHEADER]]
163+
; CHECK: for.end34:
164+
; CHECK-NEXT: ret void
165+
;
166+
entry:
167+
%sub = add i32 %srcBLen, -1
168+
%idx.ext = zext i32 %sub to i64
169+
%add.ptr = getelementptr inbounds half, half* %pSrcB, i64 %idx.ext
170+
%add = add i32 %srcALen, -2
171+
%sub1 = add i32 %add, %srcBLen
172+
%cmp = icmp ugt i32 %srcALen, %srcBLen
173+
br i1 %cmp, label %if.then, label %if.else
174+
175+
if.then: ; preds = %entry
176+
%sub2 = sub i32 %srcALen, %srcBLen
177+
%idx.ext3 = zext i32 %sub2 to i64
178+
%add.ptr4 = getelementptr inbounds half, half* %pDst, i64 %idx.ext3
179+
br label %if.end12
180+
181+
if.else: ; preds = %entry
182+
%cmp5 = icmp ult i32 %srcALen, %srcBLen
183+
br i1 %cmp5, label %if.then6, label %if.end12
184+
185+
if.then6: ; preds = %if.else
186+
%sub7 = add i32 %srcALen, -1
187+
%idx.ext8 = zext i32 %sub7 to i64
188+
%add.ptr9 = getelementptr inbounds half, half* %pSrcA, i64 %idx.ext8
189+
%idx.ext10 = zext i32 %sub1 to i64
190+
%add.ptr11 = getelementptr inbounds half, half* %pDst, i64 %idx.ext10
191+
br label %if.end12
192+
193+
if.end12: ; preds = %if.else, %if.then6, %if.then
194+
%srcALen.addr.0 = phi i32 [ %srcALen, %if.then ], [ %srcBLen, %if.then6 ], [ %srcALen, %if.else ]
195+
%srcBLen.addr.0 = phi i32 [ %srcBLen, %if.then ], [ %srcALen, %if.then6 ], [ %srcBLen, %if.else ]
196+
%pDst.addr.0 = phi half* [ %add.ptr4, %if.then ], [ %add.ptr11, %if.then6 ], [ %pDst, %if.else ]
197+
%pIn1.0 = phi half* [ %pSrcA, %if.then ], [ %pSrcB, %if.then6 ], [ %pSrcA, %if.else ]
198+
%pIn2.0 = phi half* [ %add.ptr, %if.then ], [ %add.ptr9, %if.then6 ], [ %add.ptr, %if.else ]
199+
%cmp27 = phi i64 [ 1, %if.then ], [ -1, %if.then6 ], [ 1, %if.else ]
200+
%0 = add i32 %srcBLen, %srcALen
201+
%1 = add i32 %0, -1
202+
br label %for.cond14.preheader
203+
204+
for.cond14.preheader: ; preds = %if.end12, %for.end
205+
%indvars.iv = phi i32 [ 1, %if.end12 ], [ %indvars.iv.next, %for.end ]
206+
%i.077 = phi i32 [ 0, %if.end12 ], [ %inc33, %for.end ]
207+
%pDst.addr.176 = phi half* [ %pDst.addr.0, %if.end12 ], [ %pDst.addr.2, %for.end ]
208+
br label %for.body16
209+
210+
for.body16: ; preds = %for.cond14.preheader, %for.inc
211+
%j.074 = phi i32 [ 0, %for.cond14.preheader ], [ %inc, %for.inc ]
212+
%sum.073 = phi half [ 0xH0000, %for.cond14.preheader ], [ %sum.1, %for.inc ]
213+
%sub17 = sub i32 %i.077, %j.074
214+
%cmp18 = icmp ult i32 %sub17, %srcBLen.addr.0
215+
%cmp19 = icmp ult i32 %j.074, %srcALen.addr.0
216+
%or.cond = and i1 %cmp19, %cmp18
217+
br i1 %or.cond, label %if.then20, label %for.inc
218+
219+
if.then20: ; preds = %for.body16
220+
%idxprom = zext i32 %j.074 to i64
221+
%arrayidx = getelementptr inbounds half, half* %pIn1.0, i64 %idxprom
222+
%2 = load half, half* %arrayidx, align 2
223+
%sub22 = sub nsw i32 0, %sub17
224+
%idxprom23 = sext i32 %sub22 to i64
225+
%arrayidx24 = getelementptr inbounds half, half* %pIn2.0, i64 %idxprom23
226+
%3 = load half, half* %arrayidx24, align 2
227+
%mul = fmul fast half %3, %2
228+
%add25 = fadd fast half %mul, %sum.073
229+
br label %for.inc
230+
231+
for.inc: ; preds = %for.body16, %if.then20
232+
%sum.1 = phi half [ %add25, %if.then20 ], [ %sum.073, %for.body16 ]
233+
%inc = add nuw i32 %j.074, 1
234+
%exitcond = icmp eq i32 %inc, %indvars.iv
235+
br i1 %exitcond, label %for.end, label %for.body16
236+
237+
for.end: ; preds = %for.inc
238+
%sum.1.lcssa = phi half [ %sum.1, %for.inc ]
239+
%pDst.addr.2 = getelementptr inbounds half, half* %pDst.addr.176, i64 %cmp27
240+
store half %sum.1.lcssa, half* %pDst.addr.176, align 2
241+
%inc33 = add nuw i32 %i.077, 1
242+
%indvars.iv.next = add i32 %indvars.iv, 1
243+
%exitcond78.not = icmp eq i32 %inc33, %1
244+
br i1 %exitcond78.not, label %for.end34, label %for.cond14.preheader
245+
246+
for.end34: ; preds = %for.end
247+
ret void
248+
}

0 commit comments

Comments
 (0)