Skip to content

Commit 8eeeddb

Browse files
authored
[X86] lowerV8I16GeneralSingleInputShuffle - for splat PSHUFW+PSHUFD patterns, widen the splats to encourage combines (#129854)
For vXi16 patterns that lower to splats, ensure that PSHUFW mask splats to the entire LW/HW i64 half and then create a wide PSHUFD mask that splats the whole i64 element - this encourages further combines without depending on any unused elements from undef shuffle mask elements. Fixes #129276
1 parent d6c0839 commit 8eeeddb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+482
-505
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13776,6 +13776,14 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
1377613776
DWordPairs.resize(2, std::make_pair(-1, -1));
1377713777
int PSHUFHalfMask[4] = {DWordPairs[0].first, DWordPairs[0].second,
1377813778
DWordPairs[1].first, DWordPairs[1].second};
13779+
// For splat, ensure we widen the PSHUFDMask to allow vXi64 folds.
13780+
if (ShuffleVectorSDNode::isSplatMask(PSHUFDMask) &&
13781+
ShuffleVectorSDNode::isSplatMask(PSHUFHalfMask)) {
13782+
int SplatIdx = ShuffleVectorSDNode::getSplatMaskIndex(PSHUFHalfMask);
13783+
std::fill(PSHUFHalfMask, PSHUFHalfMask + 4, SplatIdx);
13784+
PSHUFDMask[0] = PSHUFDMask[2] = DOffset + 0;
13785+
PSHUFDMask[1] = PSHUFDMask[3] = DOffset + 1;
13786+
}
1377913787
if ((NumHToL + NumHToH) == 0)
1378013788
return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFLW);
1378113789
if ((NumLToL + NumLToH) == 0)

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2363,7 +2363,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
23632363
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
23642364
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
23652365
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2366-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
2366+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
23672367
; SSE2-NEXT: paddb (%rdx), %xmm0
23682368
; SSE2-NEXT: movdqa 16(%rdx), %xmm2
23692369
; SSE2-NEXT: paddb %xmm1, %xmm2
@@ -2384,7 +2384,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
23842384
; SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
23852385
; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
23862386
; SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2387-
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
2387+
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
23882388
; SSE42-NEXT: paddb (%rdx), %xmm0
23892389
; SSE42-NEXT: movdqa 16(%rdx), %xmm2
23902390
; SSE42-NEXT: paddb %xmm1, %xmm2
@@ -2405,7 +2405,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
24052405
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
24062406
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
24072407
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2408-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2408+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
24092409
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
24102410
; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
24112411
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
@@ -2497,7 +2497,7 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
24972497
; SSE2-NEXT: pand %xmm2, %xmm1
24982498
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
24992499
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2500-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2500+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
25012501
; SSE2-NEXT: pandn %xmm0, %xmm2
25022502
; SSE2-NEXT: por %xmm1, %xmm2
25032503
; SSE2-NEXT: paddb (%rdx), %xmm2
@@ -2762,7 +2762,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
27622762
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,0,255,255,255,255,255,0,255,255,255]
27632763
; SSE2-NEXT: pand %xmm2, %xmm1
27642764
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2765-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2765+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
27662766
; SSE2-NEXT: pandn %xmm0, %xmm2
27672767
; SSE2-NEXT: por %xmm1, %xmm2
27682768
; SSE2-NEXT: paddb (%rdx), %xmm2
@@ -2783,7 +2783,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
27832783
; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
27842784
; SSE42-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
27852785
; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2786-
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
2786+
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
27872787
; SSE42-NEXT: paddb (%rdx), %xmm0
27882788
; SSE42-NEXT: movdqa 16(%rdx), %xmm2
27892789
; SSE42-NEXT: paddb %xmm1, %xmm2
@@ -2802,7 +2802,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28022802
; AVX-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28032803
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28042804
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2805-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2805+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
28062806
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
28072807
; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
28082808
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
@@ -3549,7 +3549,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
35493549
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,0,65535,65535,0,65535]
35503550
; SSE2-NEXT: pand %xmm2, %xmm1
35513551
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3552-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3552+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
35533553
; SSE2-NEXT: pandn %xmm0, %xmm2
35543554
; SSE2-NEXT: por %xmm1, %xmm2
35553555
; SSE2-NEXT: paddb (%rdx), %xmm2
@@ -3568,7 +3568,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
35683568
; SSE42-NEXT: paddb 48(%rsi), %xmm1
35693569
; SSE42-NEXT: paddb (%rsi), %xmm0
35703570
; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3571-
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3571+
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
35723572
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
35733573
; SSE42-NEXT: paddb (%rdx), %xmm1
35743574
; SSE42-NEXT: movdqa 16(%rdx), %xmm2
@@ -3586,7 +3586,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
35863586
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
35873587
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
35883588
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3589-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3589+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
35903590
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
35913591
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
35923592
; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
@@ -4951,7 +4951,7 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
49514951
; SSE-NEXT: movdqa (%rdi), %xmm0
49524952
; SSE-NEXT: paddb (%rsi), %xmm0
49534953
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
4954-
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
4954+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
49554955
; SSE-NEXT: movdqa 16(%rdx), %xmm1
49564956
; SSE-NEXT: paddb %xmm0, %xmm1
49574957
; SSE-NEXT: movdqa (%rdx), %xmm2
@@ -4970,7 +4970,7 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
49704970
; AVX-NEXT: vmovdqa (%rdi), %xmm0
49714971
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
49724972
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
4973-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
4973+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
49744974
; AVX-NEXT: vpaddb 48(%rdx), %xmm0, %xmm1
49754975
; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
49764976
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm3

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1837,7 +1837,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
18371837
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
18381838
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
18391839
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1840-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
1840+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
18411841
; SSE2-NEXT: paddb (%rsi), %xmm0
18421842
; SSE2-NEXT: movdqa 16(%rsi), %xmm2
18431843
; SSE2-NEXT: paddb %xmm1, %xmm2
@@ -1856,7 +1856,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
18561856
; SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
18571857
; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
18581858
; SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1859-
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
1859+
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
18601860
; SSE42-NEXT: paddb (%rsi), %xmm0
18611861
; SSE42-NEXT: movdqa 16(%rsi), %xmm2
18621862
; SSE42-NEXT: paddb %xmm1, %xmm2
@@ -1875,9 +1875,9 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
18751875
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
18761876
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
18771877
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = mem[0,0,0,0,4,5,6,7]
1878-
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1878+
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
18791879
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1880-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1880+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
18811881
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
18821882
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
18831883
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
@@ -1954,7 +1954,7 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
19541954
; SSE2-NEXT: pand %xmm1, %xmm2
19551955
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
19561956
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1957-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1957+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
19581958
; SSE2-NEXT: pandn %xmm0, %xmm1
19591959
; SSE2-NEXT: por %xmm2, %xmm1
19601960
; SSE2-NEXT: paddb (%rsi), %xmm1
@@ -2174,7 +2174,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
21742174
; SSE2-NEXT: movdqa 48(%rdi), %xmm1
21752175
; SSE2-NEXT: pand %xmm0, %xmm1
21762176
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = mem[0,0,0,0,4,5,6,7]
2177-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
2177+
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
21782178
; SSE2-NEXT: pandn %xmm2, %xmm0
21792179
; SSE2-NEXT: por %xmm1, %xmm0
21802180
; SSE2-NEXT: paddb (%rsi), %xmm0
@@ -2192,7 +2192,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
21922192
; SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
21932193
; SSE42-NEXT: palignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
21942194
; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
2195-
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
2195+
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
21962196
; SSE42-NEXT: movdqa 16(%rsi), %xmm2
21972197
; SSE42-NEXT: paddb %xmm1, %xmm2
21982198
; SSE42-NEXT: paddb (%rsi), %xmm0
@@ -2208,9 +2208,9 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
22082208
; AVX-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
22092209
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
22102210
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = mem[0,0,0,0,4,5,6,7]
2211-
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
2211+
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
22122212
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2213-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2213+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
22142214
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
22152215
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
22162216
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
@@ -2835,7 +2835,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
28352835
; SSE2-NEXT: movdqa 48(%rdi), %xmm1
28362836
; SSE2-NEXT: pand %xmm0, %xmm1
28372837
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = mem[0,0,0,0,4,5,6,7]
2838-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
2838+
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
28392839
; SSE2-NEXT: pandn %xmm2, %xmm0
28402840
; SSE2-NEXT: por %xmm1, %xmm0
28412841
; SSE2-NEXT: paddb (%rsi), %xmm0
@@ -2850,7 +2850,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
28502850
; SSE42-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8:
28512851
; SSE42: # %bb.0:
28522852
; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
2853-
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2853+
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
28542854
; SSE42-NEXT: movdqa 48(%rdi), %xmm1
28552855
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
28562856
; SSE42-NEXT: paddb (%rsi), %xmm1
@@ -2865,7 +2865,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
28652865
; AVX-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8:
28662866
; AVX: # %bb.0:
28672867
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
2868-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2868+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
28692869
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2],xmm0[3],mem[4,5],xmm0[6],mem[7]
28702870
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
28712871
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
@@ -3913,7 +3913,7 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
39133913
; SSE-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32:
39143914
; SSE: # %bb.0:
39153915
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
3916-
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3916+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
39173917
; SSE-NEXT: movdqa 16(%rsi), %xmm1
39183918
; SSE-NEXT: paddb %xmm0, %xmm1
39193919
; SSE-NEXT: movdqa (%rsi), %xmm2
@@ -3930,7 +3930,7 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
39303930
; AVX-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32:
39313931
; AVX: # %bb.0:
39323932
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
3933-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3933+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
39343934
; AVX-NEXT: vpaddb 48(%rsi), %xmm0, %xmm1
39353935
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
39363936
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm3

llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,15 +1899,15 @@ define <4 x i64> @test_mm256_set1_epi16(i16 %a0) nounwind {
18991899
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
19001900
; X86-NEXT: vmovd %eax, %xmm0
19011901
; X86-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1902-
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1902+
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
19031903
; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
19041904
; X86-NEXT: retl
19051905
;
19061906
; X64-LABEL: test_mm256_set1_epi16:
19071907
; X64: # %bb.0:
19081908
; X64-NEXT: vmovd %edi, %xmm0
19091909
; X64-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1910-
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1910+
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
19111911
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
19121912
; X64-NEXT: retq
19131913
%res0 = insertelement <16 x i16> undef, i16 %a0, i32 0

llvm/test/CodeGen/X86/avx-splat.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
1717
; CHECK-LABEL: funcB:
1818
; CHECK: # %bb.0: # %entry
1919
; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
20-
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
20+
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2121
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2222
; CHECK-NEXT: ret{{[l|q]}}
2323
entry:

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ define <8 x i16> @ext_i8_8i16(i8 %a0) {
9797
; SSE2-SSSE3: # %bb.0:
9898
; SSE2-SSSE3-NEXT: movd %edi, %xmm0
9999
; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
100-
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
100+
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
101101
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
102102
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
103103
; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
@@ -107,7 +107,7 @@ define <8 x i16> @ext_i8_8i16(i8 %a0) {
107107
; AVX1: # %bb.0:
108108
; AVX1-NEXT: vmovd %edi, %xmm0
109109
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
110-
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
110+
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
111111
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
112112
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
113113
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -286,7 +286,7 @@ define <16 x i16> @ext_i16_16i16(i16 %a0) {
286286
; SSE2-SSSE3: # %bb.0:
287287
; SSE2-SSSE3-NEXT: movd %edi, %xmm0
288288
; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
289-
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
289+
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
290290
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
291291
; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
292292
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
@@ -300,7 +300,7 @@ define <16 x i16> @ext_i16_16i16(i16 %a0) {
300300
; AVX1: # %bb.0:
301301
; AVX1-NEXT: vmovd %edi, %xmm0
302302
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
303-
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
303+
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
304304
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
305305
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
306306
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
@@ -519,7 +519,7 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
519519
; SSE2-SSSE3: # %bb.0:
520520
; SSE2-SSSE3-NEXT: movd %edi, %xmm2
521521
; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7]
522-
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
522+
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
523523
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
524524
; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
525525
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
@@ -528,7 +528,7 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
528528
; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
529529
; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
530530
; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
531-
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
531+
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,1,0,1]
532532
; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
533533
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
534534
; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
@@ -540,7 +540,7 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
540540
; AVX1: # %bb.0:
541541
; AVX1-NEXT: vmovd %edi, %xmm1
542542
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7]
543-
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
543+
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
544544
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
545545
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
546546
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
@@ -550,7 +550,7 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
550550
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
551551
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
552552
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
553-
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
553+
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
554554
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
555555
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
556556
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3

0 commit comments

Comments
 (0)