@@ -775,31 +775,27 @@ define <32 x i8> @combine_pshufb_pshufb_or_pshufb(<32 x i8> %a0) {
775
775
ret <32 x i8 > %4
776
776
}
777
777
778
- ; TODO: Not beneficial to concatenate both inputs just to create a 256-bit vpaddb
778
+ ; Not beneficial to concatenate both inputs just to create a 256-bit vpaddb
779
779
define <32 x i8 > @concat_add_unnecessary (<16 x i8 > %a0 , <16 x i8 > noundef %a1 , <16 x i8 > %a2 ) nounwind {
780
780
; CHECK-LABEL: concat_add_unnecessary:
781
781
; CHECK: # %bb.0:
782
- ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
783
- ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
784
- ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
785
- ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
786
- ; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0
782
+ ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm1
783
+ ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
784
+ ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
787
785
; CHECK-NEXT: ret{{[l|q]}}
788
786
%lo = add <16 x i8 > %a0 , %a1
789
787
%hi = add <16 x i8 > %a0 , %a2
790
788
%res = shufflevector <16 x i8 > %lo , <16 x i8 > %hi , <32 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
791
789
ret <32 x i8 > %res
792
790
}
793
791
794
- ; TODO: Not beneficial to concatenate both inputs just to create a 256-bit vpmullw
792
+ ; Not beneficial to concatenate both inputs just to create a 256-bit vpmullw
795
793
define <16 x i16 > @concat_mul_unnecessary (<8 x i16 > %a0 , <8 x i16 > %a1 , <8 x i16 > %a2 ) nounwind {
796
794
; CHECK-LABEL: concat_mul_unnecessary:
797
795
; CHECK: # %bb.0:
798
- ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
799
- ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
800
- ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
801
- ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
802
- ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
796
+ ; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm1
797
+ ; CHECK-NEXT: vpmullw %xmm2, %xmm0, %xmm0
798
+ ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
803
799
; CHECK-NEXT: ret{{[l|q]}}
804
800
%lo = mul <8 x i16 > %a0 , %a1
805
801
%hi = mul <8 x i16 > %a0 , %a2
0 commit comments