@@ -305,3 +305,38 @@ declare void @llvm.assume(i1 noundef) #2
305
305
declare void @llvm.lifetime.end.p0i8 (i64 immarg, i8* nocapture ) #1
306
306
307
307
; Function Attrs: nounwind ssp uwtable mustprogress
308
+
309
+ define <4 x float > @reverse_hadd_v4f32 (<4 x float > %a , <4 x float > %b ) {
310
+ ; CHECK-LABEL: @reverse_hadd_v4f32(
311
+ ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
312
+ ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
313
+ ; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
314
+ ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[SHIFT1]], [[A]]
315
+ ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 undef, i32 undef, i32 6, i32 0>
316
+ ; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
317
+ ; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
318
+ ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> <i32 undef, i32 4, i32 2, i32 3>
319
+ ; CHECK-NEXT: [[SHIFT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
320
+ ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT3]], [[B]]
321
+ ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 6, i32 1, i32 2, i32 3>
322
+ ; CHECK-NEXT: ret <4 x float> [[TMP7]]
323
+ ;
324
+ %vecext = extractelement <4 x float > %a , i32 0
325
+ %vecext1 = extractelement <4 x float > %a , i32 1
326
+ %add = fadd float %vecext , %vecext1
327
+ %vecinit = insertelement <4 x float > undef , float %add , i32 0
328
+ %vecext2 = extractelement <4 x float > %a , i32 2
329
+ %vecext3 = extractelement <4 x float > %a , i32 3
330
+ %add4 = fadd float %vecext2 , %vecext3
331
+ %vecinit5 = insertelement <4 x float > %vecinit , float %add4 , i32 1
332
+ %vecext6 = extractelement <4 x float > %b , i32 0
333
+ %vecext7 = extractelement <4 x float > %b , i32 1
334
+ %add8 = fadd float %vecext6 , %vecext7
335
+ %vecinit9 = insertelement <4 x float > %vecinit5 , float %add8 , i32 2
336
+ %vecext10 = extractelement <4 x float > %b , i32 2
337
+ %vecext11 = extractelement <4 x float > %b , i32 3
338
+ %add12 = fadd float %vecext10 , %vecext11
339
+ %vecinit13 = insertelement <4 x float > %vecinit9 , float %add12 , i32 3
340
+ %shuffle = shufflevector <4 x float > %vecinit13 , <4 x float > %a , <4 x i32 > <i32 3 , i32 2 , i32 1 , i32 0 >
341
+ ret <4 x float > %shuffle
342
+ }
0 commit comments