@@ -588,20 +588,19 @@ define void @store_i16_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
588
588
; AVX2-FCP-NEXT: vmovq {{.*#+}} xmm5 = mem[0],zero
589
589
; AVX2-FCP-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
590
590
; AVX2-FCP-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[2],ymm5[2]
591
- ; AVX2-FCP-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
592
- ; AVX2-FCP-NEXT: vpmovsxbd {{.*#+}} ymm1 = [5,7,1,3,7,0,0,0]
593
- ; AVX2-FCP-NEXT: vpermd %ymm4, %ymm1, %ymm1
594
- ; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5],zero,zero,zero,zero,zero,zero,ymm1[10,11,14,15,2,3,18,19],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
595
- ; AVX2-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,3,5,7,1,3,5,7]
596
- ; AVX2-FCP-NEXT: # ymm5 = mem[0,1,0,1]
597
- ; AVX2-FCP-NEXT: vpermd %ymm0, %ymm5, %ymm5
598
- ; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm5 = zero,zero,zero,zero,ymm5[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm5[18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
599
- ; AVX2-FCP-NEXT: vpor %ymm5, %ymm1, %ymm1
591
+ ; AVX2-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [1,5,3,7,1,5,3,7]
592
+ ; AVX2-FCP-NEXT: # ymm1 = mem[0,1,0,1]
593
+ ; AVX2-FCP-NEXT: vpermd %ymm0, %ymm1, %ymm1
594
+ ; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,ymm1[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
595
+ ; AVX2-FCP-NEXT: vpmovsxbd {{.*#+}} ymm5 = [5,7,1,3,7,0,0,0]
596
+ ; AVX2-FCP-NEXT: vpermd %ymm4, %ymm5, %ymm5
597
+ ; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm5 = ymm5[0,1,4,5],zero,zero,zero,zero,zero,zero,ymm5[10,11,14,15,2,3,18,19],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
598
+ ; AVX2-FCP-NEXT: vpor %ymm1, %ymm5, %ymm1
600
599
; AVX2-FCP-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
601
600
; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm2[0,1,8,9,4,5,6,7,4,5],zero,zero,ymm2[26,27],zero,zero,zero,zero,ymm2[24,25,20,21,22,23,20,21,28,29]
602
601
; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm3 = ymm4[0,1,8,9],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm4[2,3],zero,zero,ymm4[18,19,26,27],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
603
602
; AVX2-FCP-NEXT: vpor %ymm2, %ymm3, %ymm2
604
- ; AVX2-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,2,4, 6,0,2,4 ,6]
603
+ ; AVX2-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,4,2, 6,0,4,2 ,6]
605
604
; AVX2-FCP-NEXT: # ymm3 = mem[0,1,0,1]
606
605
; AVX2-FCP-NEXT: vpermd %ymm0, %ymm3, %ymm0
607
606
; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,0,1,4,5,8,9,u,u,u,u,u,u,u,u,18,19,22,23,26,27,u,u,u,u]
@@ -670,17 +669,16 @@ define void @store_i16_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
670
669
; AVX512-FCP-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm4
671
670
; AVX512-FCP-NEXT: vmovq {{.*#+}} xmm5 = mem[0],zero
672
671
; AVX512-FCP-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
673
- ; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} ymm1 = [0,2,4,0]
674
- ; AVX512-FCP-NEXT: vpermi2q %ymm5, %ymm0, %ymm1
675
- ; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm0 = [0,2,4,6,0,2,4,6]
676
- ; AVX512-FCP-NEXT: # ymm0 = mem[0,1,0,1]
677
- ; AVX512-FCP-NEXT: vpermd %ymm1, %ymm0, %ymm0
678
- ; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[18,19,22,23,26,27],zero,zero,zero,zero
679
- ; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,3,5,7,1,3,5,7]
672
+ ; AVX512-FCP-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[2],ymm5[2]
673
+ ; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,4,2,6,0,4,2,6]
674
+ ; AVX512-FCP-NEXT: # ymm1 = mem[0,1,0,1]
675
+ ; AVX512-FCP-NEXT: vpermd %ymm0, %ymm1, %ymm1
676
+ ; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[18,19,22,23,26,27],zero,zero,zero,zero
677
+ ; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,5,3,7,1,5,3,7]
680
678
; AVX512-FCP-NEXT: # ymm5 = mem[0,1,0,1]
681
- ; AVX512-FCP-NEXT: vpermd %ymm1 , %ymm5, %ymm1
682
- ; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,ymm1 [0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1 [18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
683
- ; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm1 , %zmm0 , %zmm0
679
+ ; AVX512-FCP-NEXT: vpermd %ymm0 , %ymm5, %ymm0
680
+ ; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0 [0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm0 [18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
681
+ ; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm0 , %zmm1 , %zmm0
684
682
; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm1 = ymm4[0,1,8,9],zero,zero,zero,zero,ymm4[u,u,u,u,u,u,2,3],zero,zero,ymm4[18,19,26,27,u,u,u,u,u,u],zero,zero,zero,zero
685
683
; AVX512-FCP-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
686
684
; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm2[0,1,8,9,u,u,u,u,u,u],zero,zero,ymm2[26,27],zero,zero,zero,zero,ymm2[u,u,u,u,u,u,20,21,28,29]
@@ -753,17 +751,16 @@ define void @store_i16_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
753
751
; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm4
754
752
; AVX512DQ-FCP-NEXT: vmovq {{.*#+}} xmm5 = mem[0],zero
755
753
; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
756
- ; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} ymm1 = [0,2,4,0]
757
- ; AVX512DQ-FCP-NEXT: vpermi2q %ymm5, %ymm0, %ymm1
758
- ; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm0 = [0,2,4,6,0,2,4,6]
759
- ; AVX512DQ-FCP-NEXT: # ymm0 = mem[0,1,0,1]
760
- ; AVX512DQ-FCP-NEXT: vpermd %ymm1, %ymm0, %ymm0
761
- ; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[18,19,22,23,26,27],zero,zero,zero,zero
762
- ; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,3,5,7,1,3,5,7]
754
+ ; AVX512DQ-FCP-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[2],ymm5[2]
755
+ ; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,4,2,6,0,4,2,6]
756
+ ; AVX512DQ-FCP-NEXT: # ymm1 = mem[0,1,0,1]
757
+ ; AVX512DQ-FCP-NEXT: vpermd %ymm0, %ymm1, %ymm1
758
+ ; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[18,19,22,23,26,27],zero,zero,zero,zero
759
+ ; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,5,3,7,1,5,3,7]
763
760
; AVX512DQ-FCP-NEXT: # ymm5 = mem[0,1,0,1]
764
- ; AVX512DQ-FCP-NEXT: vpermd %ymm1 , %ymm5, %ymm1
765
- ; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,ymm1 [0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1 [18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
766
- ; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm1 , %zmm0 , %zmm0
761
+ ; AVX512DQ-FCP-NEXT: vpermd %ymm0 , %ymm5, %ymm0
762
+ ; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0 [0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm0 [18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
763
+ ; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm0 , %zmm1 , %zmm0
767
764
; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm1 = ymm4[0,1,8,9],zero,zero,zero,zero,ymm4[u,u,u,u,u,u,2,3],zero,zero,ymm4[18,19,26,27,u,u,u,u,u,u],zero,zero,zero,zero
768
765
; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
769
766
; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm2[0,1,8,9,u,u,u,u,u,u],zero,zero,ymm2[26,27],zero,zero,zero,zero,ymm2[u,u,u,u,u,u,20,21,28,29]
0 commit comments