@@ -2837,7 +2837,8 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
2837
2837
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2838
2838
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2839
2839
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
2840
- ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2840
+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2841
+ ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
2841
2842
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
2842
2843
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
2843
2844
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2854,7 +2855,8 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
2854
2855
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2855
2856
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2856
2857
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
2857
- ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2858
+ ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2859
+ ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
2858
2860
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
2859
2861
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
2860
2862
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2870,7 +2872,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
2870
2872
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2871
2873
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2872
2874
; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
2873
- ; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2875
+ ; AVX512BW-NEXT: vinserti128 $1, %xmm0, % ymm1, %ymm1
2874
2876
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
2875
2877
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
2876
2878
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3098,7 +3100,8 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
3098
3100
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
3099
3101
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
3100
3102
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
3101
- ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3103
+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3104
+ ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3102
3105
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3103
3106
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3104
3107
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3115,7 +3118,8 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
3115
3118
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
3116
3119
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
3117
3120
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
3118
- ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3121
+ ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3122
+ ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3119
3123
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3120
3124
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3121
3125
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3131,7 +3135,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
3131
3135
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
3132
3136
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
3133
3137
; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
3134
- ; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3138
+ ; AVX512BW-NEXT: vinserti128 $1, %xmm0, % ymm1, %ymm1
3135
3139
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
3136
3140
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
3137
3141
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3864,11 +3868,12 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
3864
3868
; AVX512F: # %bb.0:
3865
3869
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
3866
3870
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
3867
- ; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3868
3871
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3869
3872
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
3873
+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3874
+ ; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3870
3875
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3871
- ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3876
+ ; AVX512F-NEXT: vinserti128 $1, %xmm0, % ymm1, %ymm1
3872
3877
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3873
3878
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3874
3879
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3880,11 +3885,12 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
3880
3885
; AVX512DQ: # %bb.0:
3881
3886
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
3882
3887
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
3883
- ; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3884
3888
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3885
3889
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
3890
+ ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3891
+ ; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3886
3892
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3887
- ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3893
+ ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, % ymm1, %ymm1
3888
3894
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3889
3895
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3890
3896
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
0 commit comments