@@ -6624,6 +6624,74 @@ entry:
6624
6624
ret i32 %op.rdx.7
6625
6625
}
6626
6626
6627
+ define i32 @extract_hi_lo (<8 x i16 > %a ) {
6628
+ ; CHECK-SD-BASE-LABEL: extract_hi_lo:
6629
+ ; CHECK-SD-BASE: // %bb.0: // %entry
6630
+ ; CHECK-SD-BASE-NEXT: uaddlv s0, v0.8h
6631
+ ; CHECK-SD-BASE-NEXT: fmov w0, s0
6632
+ ; CHECK-SD-BASE-NEXT: ret
6633
+ ;
6634
+ ; CHECK-SD-DOT-LABEL: extract_hi_lo:
6635
+ ; CHECK-SD-DOT: // %bb.0: // %entry
6636
+ ; CHECK-SD-DOT-NEXT: uaddlv s0, v0.8h
6637
+ ; CHECK-SD-DOT-NEXT: fmov w0, s0
6638
+ ; CHECK-SD-DOT-NEXT: ret
6639
+ ;
6640
+ ; CHECK-GI-BASE-LABEL: extract_hi_lo:
6641
+ ; CHECK-GI-BASE: // %bb.0: // %entry
6642
+ ; CHECK-GI-BASE-NEXT: ushll v1.4s, v0.4h, #0
6643
+ ; CHECK-GI-BASE-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
6644
+ ; CHECK-GI-BASE-NEXT: addv s0, v0.4s
6645
+ ; CHECK-GI-BASE-NEXT: fmov w0, s0
6646
+ ; CHECK-GI-BASE-NEXT: ret
6647
+ ;
6648
+ ; CHECK-GI-DOT-LABEL: extract_hi_lo:
6649
+ ; CHECK-GI-DOT: // %bb.0: // %entry
6650
+ ; CHECK-GI-DOT-NEXT: ushll v1.4s, v0.4h, #0
6651
+ ; CHECK-GI-DOT-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
6652
+ ; CHECK-GI-DOT-NEXT: addv s0, v0.4s
6653
+ ; CHECK-GI-DOT-NEXT: fmov w0, s0
6654
+ ; CHECK-GI-DOT-NEXT: ret
6655
+ entry:
6656
+ %e1 = shufflevector <8 x i16 > %a , <8 x i16 > undef , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
6657
+ %e2 = shufflevector <8 x i16 > %a , <8 x i16 > undef , <4 x i32 > <i32 4 , i32 5 , i32 6 , i32 7 >
6658
+ %z1 = zext <4 x i16 > %e1 to <4 x i32 >
6659
+ %z2 = zext <4 x i16 > %e2 to <4 x i32 >
6660
+ %z4 = add <4 x i32 > %z1 , %z2
6661
+ %z5 = call i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 > %z4 )
6662
+ ret i32 %z5
6663
+ }
6664
+
6665
+ define i32 @extract_hi_hi (<8 x i16 > %a ) {
6666
+ ; CHECK-LABEL: extract_hi_hi:
6667
+ ; CHECK: // %bb.0: // %entry
6668
+ ; CHECK-NEXT: uaddl2 v0.4s, v0.8h, v0.8h
6669
+ ; CHECK-NEXT: addv s0, v0.4s
6670
+ ; CHECK-NEXT: fmov w0, s0
6671
+ ; CHECK-NEXT: ret
6672
+ entry:
6673
+ %e2 = shufflevector <8 x i16 > %a , <8 x i16 > undef , <4 x i32 > <i32 4 , i32 5 , i32 6 , i32 7 >
6674
+ %z2 = zext <4 x i16 > %e2 to <4 x i32 >
6675
+ %z4 = add <4 x i32 > %z2 , %z2
6676
+ %z5 = call i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 > %z4 )
6677
+ ret i32 %z5
6678
+ }
6679
+
6680
+ define i32 @extract_lo_lo (<8 x i16 > %a ) {
6681
+ ; CHECK-LABEL: extract_lo_lo:
6682
+ ; CHECK: // %bb.0: // %entry
6683
+ ; CHECK-NEXT: uaddl v0.4s, v0.4h, v0.4h
6684
+ ; CHECK-NEXT: addv s0, v0.4s
6685
+ ; CHECK-NEXT: fmov w0, s0
6686
+ ; CHECK-NEXT: ret
6687
+ entry:
6688
+ %e1 = shufflevector <8 x i16 > %a , <8 x i16 > undef , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
6689
+ %z1 = zext <4 x i16 > %e1 to <4 x i32 >
6690
+ %z4 = add <4 x i32 > %z1 , %z1
6691
+ %z5 = call i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 > %z4 )
6692
+ ret i32 %z5
6693
+ }
6694
+
6627
6695
declare <8 x i32 > @llvm.abs.v8i32 (<8 x i32 >, i1 immarg) #1
6628
6696
declare i16 @llvm.vector.reduce.add.v16i16 (<16 x i16 >)
6629
6697
declare i16 @llvm.vector.reduce.add.v8i16 (<8 x i16 >)
0 commit comments