@@ -2573,7 +2573,7 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
2573
2573
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2574
2574
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,15,3,4,15,6,7,15,9,10,15,12,13,15]
2575
2575
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
2576
- ; AVX512F-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
2576
+ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2577
2577
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
2578
2578
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
2579
2579
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2590,7 +2590,7 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
2590
2590
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2591
2591
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,15,3,4,15,6,7,15,9,10,15,12,13,15]
2592
2592
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
2593
- ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
2593
+ ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2594
2594
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
2595
2595
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
2596
2596
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2835,7 +2835,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
2835
2835
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2836
2836
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2837
2837
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
2838
- ; AVX512F-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
2838
+ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2839
2839
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
2840
2840
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
2841
2841
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2852,7 +2852,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
2852
2852
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2853
2853
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2854
2854
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
2855
- ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
2855
+ ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2856
2856
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
2857
2857
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
2858
2858
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2868,7 +2868,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
2868
2868
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
2869
2869
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2870
2870
; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
2871
- ; AVX512BW-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
2871
+ ; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2872
2872
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
2873
2873
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
2874
2874
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3096,7 +3096,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
3096
3096
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
3097
3097
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
3098
3098
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
3099
- ; AVX512F-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
3099
+ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3100
3100
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3101
3101
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3102
3102
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3113,7 +3113,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
3113
3113
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
3114
3114
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
3115
3115
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
3116
- ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
3116
+ ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3117
3117
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3118
3118
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3119
3119
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3129,7 +3129,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
3129
3129
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
3130
3130
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
3131
3131
; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
3132
- ; AVX512BW-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
3132
+ ; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3133
3133
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
3134
3134
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
3135
3135
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3612,7 +3612,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
3612
3612
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3613
3613
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
3614
3614
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
3615
- ; AVX512F-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
3615
+ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3616
3616
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3617
3617
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3618
3618
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3628,7 +3628,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
3628
3628
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3629
3629
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
3630
3630
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
3631
- ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
3631
+ ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3632
3632
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3633
3633
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3634
3634
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3864,7 +3864,7 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
3864
3864
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3865
3865
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
3866
3866
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3867
- ; AVX512F-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
3867
+ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3868
3868
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3869
3869
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3870
3870
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3880,7 +3880,7 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
3880
3880
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3881
3881
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
3882
3882
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3883
- ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
3883
+ ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3884
3884
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
3885
3885
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
3886
3886
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
0 commit comments