Skip to content

Commit c03fc92

Browse files
authored
[DirectX] Add support for vector_reduce_add (#117646)
Use of `vector_reduce_add` will make it easier to write more intrinsics in `hlsl_intrinsics.h`.
1 parent 65d2177 commit c03fc92

File tree

2 files changed

+380
-0
lines changed

2 files changed

+380
-0
lines changed

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,43 @@ static bool isIntrinsicExpansion(Function &F) {
6767
case Intrinsic::dx_sign:
6868
case Intrinsic::dx_step:
6969
case Intrinsic::dx_radians:
70+
case Intrinsic::vector_reduce_add:
71+
case Intrinsic::vector_reduce_fadd:
7072
return true;
7173
}
7274
return false;
7375
}
76+
static Value *expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId) {
77+
assert(IntrinsicId == Intrinsic::vector_reduce_add ||
78+
IntrinsicId == Intrinsic::vector_reduce_fadd);
79+
80+
IRBuilder<> Builder(Orig);
81+
bool IsFAdd = (IntrinsicId == Intrinsic::vector_reduce_fadd);
82+
83+
Value *X = Orig->getOperand(IsFAdd ? 1 : 0);
84+
Type *Ty = X->getType();
85+
auto *XVec = dyn_cast<FixedVectorType>(Ty);
86+
unsigned XVecSize = XVec->getNumElements();
87+
Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));
88+
89+
// Handle the initial start value for floating-point addition.
90+
if (IsFAdd) {
91+
Constant *StartValue = dyn_cast<Constant>(Orig->getOperand(0));
92+
if (StartValue && !StartValue->isZeroValue())
93+
Sum = Builder.CreateFAdd(Sum, StartValue);
94+
}
95+
96+
// Accumulate the remaining vector elements.
97+
for (unsigned I = 1; I < XVecSize; I++) {
98+
Value *Elt = Builder.CreateExtractElement(X, I);
99+
if (IsFAdd)
100+
Sum = Builder.CreateFAdd(Sum, Elt);
101+
else
102+
Sum = Builder.CreateAdd(Sum, Elt);
103+
}
104+
105+
return Sum;
106+
}
74107

75108
static Value *expandAbs(CallInst *Orig) {
76109
Value *X = Orig->getOperand(0);
@@ -580,6 +613,10 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
580613
case Intrinsic::dx_radians:
581614
Result = expandRadiansIntrinsic(Orig);
582615
break;
616+
case Intrinsic::vector_reduce_add:
617+
case Intrinsic::vector_reduce_fadd:
618+
Result = expandVecReduceAdd(Orig, IntrinsicId);
619+
break;
583620
}
584621
if (Result) {
585622
Orig->replaceAllUsesWith(Result);
Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s
3+
4+
; Make sure dxil operation function calls for lvm.vector.reduce.fadd and lvm.vector.reduce.add are generate.
5+
6+
define noundef half @test_length_half2(<2 x half> noundef %p0) {
7+
; CHECK-LABEL: define noundef half @test_length_half2(
8+
; CHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x half> [[P0]], i64 0
11+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[P0]], i64 1
12+
; CHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
13+
; CHECK-NEXT: ret half [[TMP2]]
14+
;
15+
entry:
16+
%rdx.fadd = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> %p0)
17+
ret half %rdx.fadd
18+
}
19+
20+
define noundef half @test_length_half2_start1(<2 x half> noundef %p0) {
21+
; CHECK-LABEL: define noundef half @test_length_half2_start1(
22+
; CHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
23+
; CHECK-NEXT: [[ENTRY:.*:]]
24+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x half> [[P0]], i64 0
25+
; CHECK-NEXT: [[TMP1:%.*]] = fadd half [[TMP0]], 0xH0001
26+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x half> [[P0]], i64 1
27+
; CHECK-NEXT: [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
28+
; CHECK-NEXT: ret half [[TMP3]]
29+
;
30+
entry:
31+
%rdx.fadd = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0001, <2 x half> %p0)
32+
ret half %rdx.fadd
33+
}
34+
35+
define noundef half @test_length_half3(<3 x half> noundef %p0) {
36+
; CHECK-LABEL: define noundef half @test_length_half3(
37+
; CHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
38+
; CHECK-NEXT: [[ENTRY:.*:]]
39+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x half> [[P0]], i64 0
40+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x half> [[P0]], i64 1
41+
; CHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
42+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x half> [[P0]], i64 2
43+
; CHECK-NEXT: [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
44+
; CHECK-NEXT: ret half [[TMP4]]
45+
;
46+
entry:
47+
%rdx.fadd = call half @llvm.vector.reduce.fadd.v3f16(half 0xH0000, <3 x half> %p0)
48+
ret half %rdx.fadd
49+
}
50+
51+
define noundef half @test_length_half4(<4 x half> noundef %p0) {
52+
; CHECK-LABEL: define noundef half @test_length_half4(
53+
; CHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
54+
; CHECK-NEXT: [[ENTRY:.*:]]
55+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x half> [[P0]], i64 0
56+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x half> [[P0]], i64 1
57+
; CHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
58+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x half> [[P0]], i64 2
59+
; CHECK-NEXT: [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
60+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x half> [[P0]], i64 3
61+
; CHECK-NEXT: [[TMP6:%.*]] = fadd half [[TMP4]], [[TMP5]]
62+
; CHECK-NEXT: ret half [[TMP6]]
63+
;
64+
entry:
65+
%rdx.fadd = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> %p0)
66+
ret half %rdx.fadd
67+
}
68+
69+
define noundef float @test_length_float2(<2 x float> noundef %p0) {
70+
; CHECK-LABEL: define noundef float @test_length_float2(
71+
; CHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
72+
; CHECK-NEXT: [[ENTRY:.*:]]
73+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[P0]], i64 0
74+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[P0]], i64 1
75+
; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
76+
; CHECK-NEXT: ret float [[TMP2]]
77+
;
78+
entry:
79+
%rdx.fadd = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> %p0)
80+
ret float %rdx.fadd
81+
}
82+
83+
define noundef float @test_length_float3(<3 x float> noundef %p0) {
84+
; CHECK-LABEL: define noundef float @test_length_float3(
85+
; CHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
86+
; CHECK-NEXT: [[ENTRY:.*:]]
87+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x float> [[P0]], i64 0
88+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x float> [[P0]], i64 1
89+
; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
90+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x float> [[P0]], i64 2
91+
; CHECK-NEXT: [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
92+
; CHECK-NEXT: ret float [[TMP4]]
93+
;
94+
entry:
95+
%rdx.fadd = call float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> %p0)
96+
ret float %rdx.fadd
97+
}
98+
99+
define noundef float @test_length_float3_start1(<3 x float> noundef %p0) {
100+
; CHECK-LABEL: define noundef float @test_length_float3_start1(
101+
; CHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
102+
; CHECK-NEXT: [[ENTRY:.*:]]
103+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x float> [[P0]], i64 0
104+
; CHECK-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
105+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x float> [[P0]], i64 1
106+
; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
107+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x float> [[P0]], i64 2
108+
; CHECK-NEXT: [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]]
109+
; CHECK-NEXT: ret float [[TMP5]]
110+
;
111+
entry:
112+
%rdx.fadd = call float @llvm.vector.reduce.fadd.v3f32(float 1.000000e+00, <3 x float> %p0)
113+
ret float %rdx.fadd
114+
}
115+
116+
define noundef float @test_length_float4(<4 x float> noundef %p0) {
117+
; CHECK-LABEL: define noundef float @test_length_float4(
118+
; CHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
119+
; CHECK-NEXT: [[ENTRY:.*:]]
120+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[P0]], i64 0
121+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[P0]], i64 1
122+
; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
123+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[P0]], i64 2
124+
; CHECK-NEXT: [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
125+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[P0]], i64 3
126+
; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]]
127+
; CHECK-NEXT: ret float [[TMP6]]
128+
;
129+
entry:
130+
%rdx.fadd = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> %p0)
131+
ret float %rdx.fadd
132+
}
133+
134+
define noundef double @test_length_double2(<2 x double> noundef %p0) {
135+
; CHECK-LABEL: define noundef double @test_length_double2(
136+
; CHECK-SAME: <2 x double> noundef [[P0:%.*]]) {
137+
; CHECK-NEXT: [[ENTRY:.*:]]
138+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[P0]], i64 0
139+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[P0]], i64 1
140+
; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]]
141+
; CHECK-NEXT: ret double [[TMP2]]
142+
;
143+
entry:
144+
%rdx.fadd = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> %p0)
145+
ret double %rdx.fadd
146+
}
147+
148+
define noundef double @test_length_double3(<3 x double> noundef %p0) {
149+
; CHECK-LABEL: define noundef double @test_length_double3(
150+
; CHECK-SAME: <3 x double> noundef [[P0:%.*]]) {
151+
; CHECK-NEXT: [[ENTRY:.*:]]
152+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[P0]], i64 0
153+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x double> [[P0]], i64 1
154+
; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]]
155+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x double> [[P0]], i64 2
156+
; CHECK-NEXT: [[TMP4:%.*]] = fadd double [[TMP2]], [[TMP3]]
157+
; CHECK-NEXT: ret double [[TMP4]]
158+
;
159+
entry:
160+
%rdx.fadd = call double @llvm.vector.reduce.fadd.v3f64(double 0.000000e+00, <3 x double> %p0)
161+
ret double %rdx.fadd
162+
}
163+
164+
define noundef double @test_length_double4(<4 x double> noundef %p0) {
165+
; CHECK-LABEL: define noundef double @test_length_double4(
166+
; CHECK-SAME: <4 x double> noundef [[P0:%.*]]) {
167+
; CHECK-NEXT: [[ENTRY:.*:]]
168+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x double> [[P0]], i64 0
169+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x double> [[P0]], i64 1
170+
; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]]
171+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[P0]], i64 2
172+
; CHECK-NEXT: [[TMP4:%.*]] = fadd double [[TMP2]], [[TMP3]]
173+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[P0]], i64 3
174+
; CHECK-NEXT: [[TMP6:%.*]] = fadd double [[TMP4]], [[TMP5]]
175+
; CHECK-NEXT: ret double [[TMP6]]
176+
;
177+
entry:
178+
%rdx.fadd = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> %p0)
179+
ret double %rdx.fadd
180+
}
181+
182+
define noundef double @test_length_double4_start1(<4 x double> noundef %p0) {
183+
; CHECK-LABEL: define noundef double @test_length_double4_start1(
184+
; CHECK-SAME: <4 x double> noundef [[P0:%.*]]) {
185+
; CHECK-NEXT: [[ENTRY:.*:]]
186+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x double> [[P0]], i64 0
187+
; CHECK-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
188+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[P0]], i64 1
189+
; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
190+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[P0]], i64 2
191+
; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP3]], [[TMP4]]
192+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[P0]], i64 3
193+
; CHECK-NEXT: [[TMP7:%.*]] = fadd double [[TMP5]], [[TMP6]]
194+
; CHECK-NEXT: ret double [[TMP7]]
195+
;
196+
entry:
197+
%rdx.fadd = call double @llvm.vector.reduce.fadd.v4f64(double 1.000000e+00, <4 x double> %p0)
198+
ret double %rdx.fadd
199+
}
200+
201+
define noundef i16 @test_length_short2(<2 x i16> noundef %p0) {
202+
; CHECK-LABEL: define noundef i16 @test_length_short2(
203+
; CHECK-SAME: <2 x i16> noundef [[P0:%.*]]) {
204+
; CHECK-NEXT: [[ENTRY:.*:]]
205+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i16> [[P0]], i64 0
206+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i16> [[P0]], i64 1
207+
; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], [[TMP1]]
208+
; CHECK-NEXT: ret i16 [[TMP2]]
209+
;
210+
entry:
211+
%rdx.add = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %p0)
212+
ret i16 %rdx.add
213+
}
214+
215+
define noundef i16 @test_length_short3(<3 x i16> noundef %p0) {
216+
; CHECK-LABEL: define noundef i16 @test_length_short3(
217+
; CHECK-SAME: <3 x i16> noundef [[P0:%.*]]) {
218+
; CHECK-NEXT: [[ENTRY:.*:]]
219+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i16> [[P0]], i64 0
220+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[P0]], i64 1
221+
; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], [[TMP1]]
222+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i16> [[P0]], i64 2
223+
; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[TMP2]], [[TMP3]]
224+
; CHECK-NEXT: ret i16 [[TMP4]]
225+
;
226+
entry:
227+
%rdx.fadd = call i16 @llvm.vector.reduce.add.v3i16(<3 x i16> %p0)
228+
ret i16 %rdx.fadd
229+
}
230+
231+
define noundef i16 @test_length_short4(<4 x i16> noundef %p0) {
232+
; CHECK-LABEL: define noundef i16 @test_length_short4(
233+
; CHECK-SAME: <4 x i16> noundef [[P0:%.*]]) {
234+
; CHECK-NEXT: [[ENTRY:.*:]]
235+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[P0]], i64 0
236+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[P0]], i64 1
237+
; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], [[TMP1]]
238+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[P0]], i64 2
239+
; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[TMP2]], [[TMP3]]
240+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i16> [[P0]], i64 3
241+
; CHECK-NEXT: [[TMP6:%.*]] = add i16 [[TMP4]], [[TMP5]]
242+
; CHECK-NEXT: ret i16 [[TMP6]]
243+
;
244+
entry:
245+
%rdx.fadd = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %p0)
246+
ret i16 %rdx.fadd
247+
}
248+
249+
define noundef i32 @test_length_int2(<2 x i32> noundef %p0) {
250+
; CHECK-LABEL: define noundef i32 @test_length_int2(
251+
; CHECK-SAME: <2 x i32> noundef [[P0:%.*]]) {
252+
; CHECK-NEXT: [[ENTRY:.*:]]
253+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> [[P0]], i64 0
254+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[P0]], i64 1
255+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]]
256+
; CHECK-NEXT: ret i32 [[TMP2]]
257+
;
258+
entry:
259+
%rdx.add = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %p0)
260+
ret i32 %rdx.add
261+
}
262+
263+
define noundef i32 @test_length_int3(<3 x i32> noundef %p0) {
264+
; CHECK-LABEL: define noundef i32 @test_length_int3(
265+
; CHECK-SAME: <3 x i32> noundef [[P0:%.*]]) {
266+
; CHECK-NEXT: [[ENTRY:.*:]]
267+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i32> [[P0]], i64 0
268+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i32> [[P0]], i64 1
269+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]]
270+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i32> [[P0]], i64 2
271+
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
272+
; CHECK-NEXT: ret i32 [[TMP4]]
273+
;
274+
entry:
275+
%rdx.fadd = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %p0)
276+
ret i32 %rdx.fadd
277+
}
278+
279+
define noundef i32 @test_length_int4(<4 x i32> noundef %p0) {
280+
; CHECK-LABEL: define noundef i32 @test_length_int4(
281+
; CHECK-SAME: <4 x i32> noundef [[P0:%.*]]) {
282+
; CHECK-NEXT: [[ENTRY:.*:]]
283+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[P0]], i64 0
284+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[P0]], i64 1
285+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]]
286+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[P0]], i64 2
287+
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
288+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[P0]], i64 3
289+
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], [[TMP5]]
290+
; CHECK-NEXT: ret i32 [[TMP6]]
291+
;
292+
entry:
293+
%rdx.fadd = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %p0)
294+
ret i32 %rdx.fadd
295+
}
296+
297+
define noundef i64 @test_length_int64_2(<2 x i64> noundef %p0) {
298+
; CHECK-LABEL: define noundef i64 @test_length_int64_2(
299+
; CHECK-SAME: <2 x i64> noundef [[P0:%.*]]) {
300+
; CHECK-NEXT: [[ENTRY:.*:]]
301+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[P0]], i64 0
302+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[P0]], i64 1
303+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
304+
; CHECK-NEXT: ret i64 [[TMP2]]
305+
;
306+
entry:
307+
%rdx.add = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %p0)
308+
ret i64 %rdx.add
309+
}
310+
311+
define noundef i64 @test_length_int64_3(<3 x i64> noundef %p0) {
312+
; CHECK-LABEL: define noundef i64 @test_length_int64_3(
313+
; CHECK-SAME: <3 x i64> noundef [[P0:%.*]]) {
314+
; CHECK-NEXT: [[ENTRY:.*:]]
315+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i64> [[P0]], i64 0
316+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i64> [[P0]], i64 1
317+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
318+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i64> [[P0]], i64 2
319+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], [[TMP3]]
320+
; CHECK-NEXT: ret i64 [[TMP4]]
321+
;
322+
entry:
323+
%rdx.fadd = call i64 @llvm.vector.reduce.add.v3i64(<3 x i64> %p0)
324+
ret i64 %rdx.fadd
325+
}
326+
327+
define noundef i64 @test_length_int64_4(<4 x i64> noundef %p0) {
328+
; CHECK-LABEL: define noundef i64 @test_length_int64_4(
329+
; CHECK-SAME: <4 x i64> noundef [[P0:%.*]]) {
330+
; CHECK-NEXT: [[ENTRY:.*:]]
331+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[P0]], i64 0
332+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[P0]], i64 1
333+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
334+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[P0]], i64 2
335+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], [[TMP3]]
336+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[P0]], i64 3
337+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP4]], [[TMP5]]
338+
; CHECK-NEXT: ret i64 [[TMP6]]
339+
;
340+
entry:
341+
%rdx.fadd = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %p0)
342+
ret i64 %rdx.fadd
343+
}

0 commit comments

Comments
 (0)