Skip to content

Commit b19c99f

Browse files
committed
Simplify complex-deinterleaving-cdot.ll
1 parent 1bf2f2e commit b19c99f

File tree

1 file changed

+39
-151
lines changed

1 file changed

+39
-151
lines changed

llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll

Lines changed: 39 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -4,167 +4,55 @@
44
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
55
target triple = "aarch64-none-unknown-elf"
66

7-
define i32 @cdotp(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %N) {
7+
define i32 @cdotp() {
88
; CHECK-LABEL: define i32 @cdotp(
9-
; CHECK-SAME: ptr nocapture noundef readonly [[A:%.*]], ptr nocapture noundef readonly [[B:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
9+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
1010
; CHECK-NEXT: [[ENTRY:.*]]:
11-
; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2
12-
; CHECK-NEXT: br i1 [[CMP28_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
13-
; CHECK: [[FOR_BODY_PREHEADER]]:
14-
; CHECK-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
15-
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
16-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
17-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
18-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
19-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
20-
; CHECK: [[VECTOR_PH]]:
21-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
22-
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16
23-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
24-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
25-
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
26-
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
2711
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2812
; CHECK: [[VECTOR_BODY]]:
29-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
30-
; CHECK-NEXT: [[TMP11:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
31-
; CHECK-NEXT: [[INDEX_I:%.*]] = shl nuw nsw i64 [[INDEX]], 1
32-
; CHECK-NEXT: [[A_PTR:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX_I]]
33-
; CHECK-NEXT: [[A_LOAD:%.*]] = load <vscale x 32 x i8>, ptr [[A_PTR]], align 32
34-
; CHECK-NEXT: [[B_PTR:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX_I]]
35-
; CHECK-NEXT: [[B_LOAD:%.*]] = load <vscale x 32 x i8>, ptr [[B_PTR]], align 32
36-
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[A_LOAD]], i64 0)
37-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[B_LOAD]], i64 0)
38-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[A_LOAD]], i64 16)
39-
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[B_LOAD]], i64 16)
40-
; CHECK-NEXT: [[VEC_PHI:%.*]] = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[TMP11]], i64 0)
41-
; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[TMP11]], i64 4)
42-
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.cdot.nxv4i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i32 0)
43-
; CHECK-NEXT: [[TMP21:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.cdot.nxv4i32(<vscale x 4 x i32> [[TMP13]], <vscale x 16 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]], i32 0)
44-
; CHECK-NEXT: [[TMP22:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP10]], i64 0)
45-
; CHECK-NEXT: [[TMP20]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP22]], <vscale x 4 x i32> [[TMP21]], i64 4)
46-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
47-
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
48-
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
13+
; CHECK-NEXT: [[TMP0:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
14+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer))
15+
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[TMP1]], i64 0)
16+
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[TMP1]], i64 0)
17+
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[TMP1]], i64 16)
18+
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[TMP1]], i64 16)
19+
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[TMP0]], i64 0)
20+
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[TMP0]], i64 4)
21+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.cdot.nxv4i32(<vscale x 4 x i32> [[TMP6]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i32 0)
22+
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.cdot.nxv4i32(<vscale x 4 x i32> [[TMP7]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i32 0)
23+
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP8]], i64 0)
24+
; CHECK-NEXT: [[TMP11]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP10]], <vscale x 4 x i32> [[TMP9]], i64 4)
25+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
4926
; CHECK: [[MIDDLE_BLOCK]]:
50-
; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP20]])
51-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
52-
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[SCALAR_PH]]
53-
; CHECK: [[SCALAR_PH]]:
54-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
55-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP23]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
56-
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
57-
; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT]]:
58-
; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB:%.*]], %[[FOR_BODY]] ], [ [[TMP23]], %[[MIDDLE_BLOCK]] ]
59-
; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
60-
; CHECK: [[FOR_COND_CLEANUP]]:
61-
; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUB_LCSSA]], %[[FOR_COND_CLEANUP_LOOPEXIT]] ]
62-
; CHECK-NEXT: ret i32 [[RES_0_LCSSA]]
63-
; CHECK: [[FOR_BODY]]:
64-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
65-
; CHECK-NEXT: [[RES_030:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUB]], %[[FOR_BODY]] ]
66-
; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1
67-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP14]]
68-
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
69-
; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP15]] to i32
70-
; CHECK-NEXT: [[TMP16:%.*]] = or disjoint i64 [[TMP14]], 1
71-
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP16]]
72-
; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
73-
; CHECK-NEXT: [[CONV5:%.*]] = sext i8 [[TMP17]] to i32
74-
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
75-
; CHECK-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX9]], align 1
76-
; CHECK-NEXT: [[CONV10:%.*]] = sext i8 [[TMP18]] to i32
77-
; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP16]]
78-
; CHECK-NEXT: [[TMP19:%.*]] = load i8, ptr [[ARRAYIDX14]], align 1
79-
; CHECK-NEXT: [[CONV15:%.*]] = sext i8 [[TMP19]] to i32
80-
; CHECK-NEXT: [[MUL16:%.*]] = mul nsw i32 [[CONV10]], [[CONV]]
81-
; CHECK-NEXT: [[ADD17:%.*]] = add nsw i32 [[MUL16]], [[RES_030]]
82-
; CHECK-NEXT: [[MUL18:%.*]] = mul nsw i32 [[CONV15]], [[CONV5]]
83-
; CHECK-NEXT: [[SUB]] = sub i32 [[ADD17]], [[MUL18]]
84-
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
85-
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
86-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]]
27+
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP11]])
28+
; CHECK-NEXT: ret i32 [[TMP12]]
8729
;
8830
entry:
89-
%cmp28.not = icmp ult i32 %N, 2
90-
br i1 %cmp28.not, label %for.cond.cleanup, label %for.body.preheader
91-
for.body.preheader: ; preds = %entry
92-
%div27 = lshr i32 %N, 1
93-
%wide.trip.count = zext nneg i32 %div27 to i64
94-
%0 = call i64 @llvm.vscale.i64()
95-
%1 = mul i64 %0, 16
96-
%min.iters.check = icmp ult i64 %wide.trip.count, %1
97-
br i1 %min.iters.check, label %scalar.ph, label %vector.ph
98-
vector.ph: ; preds = %for.body.preheader
99-
%2 = call i64 @llvm.vscale.i64()
100-
%3 = mul i64 %2, 16
101-
%n.mod.vf = urem i64 %wide.trip.count, %3
102-
%n.vec = sub i64 %wide.trip.count, %n.mod.vf
103-
%4 = call i64 @llvm.vscale.i64()
104-
%5 = mul i64 %4, 16
10531
br label %vector.body
106-
vector.body:
107-
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
108-
%vec.phi = phi <vscale x 4 x i32> [ zeroinitializer, %vector.ph ], [ %partial.reduce.sub, %vector.body ]
109-
%index.i = shl nuw nsw i64 %index, 1
110-
%a.ptr = getelementptr inbounds i8, ptr %a, i64 %index.i
111-
%a.load = load <vscale x 32 x i8>, ptr %a.ptr
112-
%a.deinterleaved = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %a.load)
113-
%a.real = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %a.deinterleaved, 0
114-
%a.imag = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %a.deinterleaved, 1
115-
%b.ptr = getelementptr inbounds i8, ptr %b, i64 %index.i
116-
%b.load = load <vscale x 32 x i8>, ptr %b.ptr
117-
%b.deinterleaved = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %b.load)
118-
%b.real = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %b.deinterleaved, 0
119-
%b.imag = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %b.deinterleaved, 1
120-
%a.real.ext = sext <vscale x 16 x i8> %a.real to <vscale x 16 x i32>
121-
%a.imag.ext = sext <vscale x 16 x i8> %a.imag to <vscale x 16 x i32>
122-
%b.real.ext = sext <vscale x 16 x i8> %b.real to <vscale x 16 x i32>
123-
%b.imag.ext = sext <vscale x 16 x i8> %b.imag to <vscale x 16 x i32>
124-
%real.mul = mul nsw <vscale x 16 x i32> %b.real.ext, %a.real.ext
32+
33+
vector.body: ; preds = %vector.body, %entry
34+
%vec.phi = phi <vscale x 4 x i32> [ zeroinitializer, %entry ], [ %partial.reduce.sub, %vector.body ]
35+
%a.real.ext = sext <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer) to <vscale x 16 x i32>
36+
%a.imag.ext = sext <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer) to <vscale x 16 x i32>
37+
%b.real.ext = sext <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer) to <vscale x 16 x i32>
38+
%b.imag.ext = sext <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer) to <vscale x 16 x i32>
39+
%real.mul = mul <vscale x 16 x i32> %b.real.ext, %a.real.ext
12540
%real.mul.reduced = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %vec.phi, <vscale x 16 x i32> %real.mul)
126-
%imag.mul = mul nsw <vscale x 16 x i32> %b.imag.ext, %a.imag.ext
41+
%imag.mul = mul <vscale x 16 x i32> %b.imag.ext, %a.imag.ext
12742
%imag.mul.neg = sub <vscale x 16 x i32> zeroinitializer, %imag.mul
12843
%partial.reduce.sub = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %real.mul.reduced, <vscale x 16 x i32> %imag.mul.neg)
129-
%index.next = add nuw i64 %index, %5
130-
%22 = icmp eq i64 %index.next, %n.vec
131-
br i1 %22, label %middle.block, label %vector.body
44+
br i1 true, label %middle.block, label %vector.body
45+
13246
middle.block: ; preds = %vector.body
133-
%25 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %partial.reduce.sub)
134-
%cmp.n = icmp eq i64 %wide.trip.count, %n.vec
135-
br i1 %cmp.n, label %for.cond.cleanup.loopexit, label %scalar.ph
136-
scalar.ph: ; preds = %middle.block, %for.body.preheader
137-
%bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %for.body.preheader ]
138-
%bc.merge.rdx = phi i32 [ %25, %middle.block ], [ 0, %for.body.preheader ]
139-
br label %for.body
140-
for.cond.cleanup.loopexit: ; preds = %middle.block, %for.body
141-
%sub.lcssa = phi i32 [ %sub, %for.body ], [ %25, %middle.block ]
142-
br label %for.cond.cleanup
143-
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
144-
%res.0.lcssa = phi i32 [ 0, %entry ], [ %sub.lcssa, %for.cond.cleanup.loopexit ]
145-
ret i32 %res.0.lcssa
146-
for.body: ; preds = %scalar.ph, %for.body
147-
%indvars.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %indvars.iv.next, %for.body ]
148-
%res.030 = phi i32 [ %bc.merge.rdx, %scalar.ph ], [ %sub, %for.body ]
149-
%26 = shl nuw nsw i64 %indvars.iv, 1
150-
%arrayidx = getelementptr inbounds i8, ptr %a, i64 %26
151-
%27 = load i8, ptr %arrayidx, align 1
152-
%conv = sext i8 %27 to i32
153-
%28 = or disjoint i64 %26, 1
154-
%arrayidx4 = getelementptr inbounds i8, ptr %a, i64 %28
155-
%29 = load i8, ptr %arrayidx4, align 1
156-
%conv5 = sext i8 %29 to i32
157-
%arrayidx9 = getelementptr inbounds i8, ptr %b, i64 %26
158-
%30 = load i8, ptr %arrayidx9, align 1
159-
%conv10 = sext i8 %30 to i32
160-
%arrayidx14 = getelementptr inbounds i8, ptr %b, i64 %28
161-
%31 = load i8, ptr %arrayidx14, align 1
162-
%conv15 = sext i8 %31 to i32
163-
%mul16 = mul nsw i32 %conv10, %conv
164-
%add17 = add nsw i32 %mul16, %res.030
165-
%mul18 = mul nsw i32 %conv15, %conv5
166-
%sub = sub i32 %add17, %mul18
167-
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
168-
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
169-
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
47+
%0 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %partial.reduce.sub)
48+
ret i32 %0
17049
}
50+
51+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
52+
declare <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32>, <vscale x 16 x i32>) #0
53+
54+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
55+
declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>) #1
56+
57+
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
58+
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

0 commit comments

Comments
 (0)