@@ -775,6 +775,38 @@ define <32 x i8> @combine_pshufb_pshufb_or_pshufb(<32 x i8> %a0) {
775
775
ret <32 x i8 > %4
776
776
}
777
777
778
+ ; TODO: Not beneficial to concatenate both inputs just to create a 256-bit vpaddb
779
+ define <32 x i8 > @concat_add_unnecessary (<16 x i8 > %a0 , <16 x i8 > noundef %a1 , <16 x i8 > %a2 ) nounwind {
780
+ ; CHECK-LABEL: concat_add_unnecessary:
781
+ ; CHECK: # %bb.0:
782
+ ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
783
+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
784
+ ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
785
+ ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
786
+ ; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0
787
+ ; CHECK-NEXT: ret{{[l|q]}}
788
+ %lo = add <16 x i8 > %a0 , %a1
789
+ %hi = add <16 x i8 > %a0 , %a2
790
+ %res = shufflevector <16 x i8 > %lo , <16 x i8 > %hi , <32 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
791
+ ret <32 x i8 > %res
792
+ }
793
+
794
+ ; TODO: Not beneficial to concatenate both inputs just to create a 256-bit vpmullw
795
+ define <16 x i16 > @concat_mul_unnecessary (<8 x i16 > %a0 , <8 x i16 > %a1 , <8 x i16 > %a2 ) nounwind {
796
+ ; CHECK-LABEL: concat_mul_unnecessary:
797
+ ; CHECK: # %bb.0:
798
+ ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
799
+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
800
+ ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
801
+ ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
802
+ ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
803
+ ; CHECK-NEXT: ret{{[l|q]}}
804
+ %lo = mul <8 x i16 > %a0 , %a1
805
+ %hi = mul <8 x i16 > %a0 , %a2
806
+ %res = shufflevector <8 x i16 > %lo , <8 x i16 > %hi , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
807
+ ret <16 x i16 > %res
808
+ }
809
+
778
810
; Not beneficial to concatenate both inputs just to create a 256-bit palignr
779
811
define <32 x i8 > @concat_alignr_unnecessary (<16 x i8 > %a0 , <16 x i8 > noundef %a1 , <16 x i8 > %a2 ) nounwind {
780
812
; CHECK-LABEL: concat_alignr_unnecessary:
0 commit comments