Skip to content

Commit 7a1aa16

Browse files
committed
Avoid arm regressions
1 parent 932a1b6 commit 7a1aa16

12 files changed

+688
-662
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23885,9 +23885,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
2388523885
if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, VT) &&
2388623886
TLI.isTypeLegal(VT.getVectorElementType()) &&
2388723887
// VecIn[1].hasOneUse() &&
23888-
NumExtracts == 1
23889-
//&& TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
23890-
)
23888+
NumExtracts == 1 && TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
2389123889
return SDValue();
2389223890

2389323891
unsigned MaxIndex = 0;

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll

Lines changed: 126 additions & 114 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 65 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -3129,8 +3129,8 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
31293129
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
31303130
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],mem[1,2,3,4,5],xmm1[6],mem[7]
31313131
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
3132-
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
31333132
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
3133+
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
31343134
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm0
31353135
; AVX-NEXT: vpaddb 16(%rsi), %xmm2, %xmm2
31363136
; AVX-NEXT: vmovdqa %xmm2, 16(%rdx)
@@ -3141,14 +3141,13 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
31413141
; AVX2-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
31423142
; AVX2: # %bb.0:
31433143
; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
3144-
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
31453144
; AVX2-NEXT: vpbroadcastw (%rdi), %xmm1
3146-
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3147-
; AVX2-NEXT: vpbroadcastw (%rdi), %ymm1
3148-
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3149-
; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
3150-
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
3151-
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
3145+
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],mem[1,2,3,4,5],xmm1[6],mem[7]
3146+
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3147+
; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3148+
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3149+
; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
3150+
; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
31523151
; AVX2-NEXT: vzeroupper
31533152
; AVX2-NEXT: retq
31543153
;
@@ -3234,13 +3233,17 @@ define void @vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3(ptr %i
32343233
; AVX-LABEL: vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3:
32353234
; AVX: # %bb.0:
32363235
; AVX-NEXT: vmovdqa (%rdi), %xmm0
3237-
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5,6,7]
3238-
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
3239-
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
3236+
; AVX-NEXT: vmovdqa 16(%rdi), %xmm1
3237+
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],mem[1,2,3,4,5,6,7]
3238+
; AVX-NEXT: vpaddb (%rsi), %xmm2, %xmm2
3239+
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3240+
; AVX-NEXT: vmovdqa (%rdi), %xmm3
3241+
; AVX-NEXT: vpaddb 32(%rsi), %xmm3, %xmm3
32403242
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
32413243
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
3242-
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
3243-
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
3244+
; AVX-NEXT: vmovdqa %xmm3, 32(%rdx)
3245+
; AVX-NEXT: vmovdqa %xmm1, 48(%rdx)
3246+
; AVX-NEXT: vmovdqa %xmm2, (%rdx)
32443247
; AVX-NEXT: retq
32453248
;
32463249
; AVX2-LABEL: vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3:
@@ -3516,16 +3519,16 @@ define void @vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4(ptr %in.
35163519
; SSE42-LABEL: vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4:
35173520
; SSE42: # %bb.0:
35183521
; SSE42-NEXT: movdqa (%rdi), %xmm0
3519-
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
3520-
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
3521-
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
3522-
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3,4,5],xmm0[6,7]
3523-
; SSE42-NEXT: paddb (%rsi), %xmm0
3524-
; SSE42-NEXT: paddb 16(%rsi), %xmm2
3525-
; SSE42-NEXT: paddb 32(%rsi), %xmm1
3526-
; SSE42-NEXT: movdqa %xmm1, 32(%rdx)
3527-
; SSE42-NEXT: movdqa %xmm2, 16(%rdx)
3528-
; SSE42-NEXT: movdqa %xmm0, (%rdx)
3522+
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
3523+
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3,4,5],xmm1[6,7]
3524+
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
3525+
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
3526+
; SSE42-NEXT: paddb (%rsi), %xmm1
3527+
; SSE42-NEXT: paddb 16(%rsi), %xmm0
3528+
; SSE42-NEXT: paddb 32(%rsi), %xmm2
3529+
; SSE42-NEXT: movdqa %xmm2, 32(%rdx)
3530+
; SSE42-NEXT: movdqa %xmm0, 16(%rdx)
3531+
; SSE42-NEXT: movdqa %xmm1, (%rdx)
35293532
; SSE42-NEXT: retq
35303533
;
35313534
; AVX-LABEL: vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4:
@@ -3534,8 +3537,8 @@ define void @vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4(ptr %in.
35343537
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],mem[2,3,4,5,6,7]
35353538
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
35363539
; AVX-NEXT: vbroadcastss (%rdi), %xmm2
3540+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
35373541
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
3538-
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
35393542
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm0
35403543
; AVX-NEXT: vpaddb 16(%rsi), %xmm2, %xmm2
35413544
; AVX-NEXT: vmovdqa %xmm2, 16(%rdx)
@@ -3546,10 +3549,10 @@ define void @vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4(ptr %in.
35463549
; AVX2-LABEL: vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4:
35473550
; AVX2: # %bb.0:
35483551
; AVX2-NEXT: vmovdqa 32(%rdi), %ymm0
3552+
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = mem[0,0,1,1]
35493553
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3],ymm0[4,5,6,7]
3550-
; AVX2-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,5,6,0]
3551-
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
3552-
; AVX2-NEXT: vpbroadcastd (%rdi), %ymm1
3554+
; AVX2-NEXT: vpmovsxbd {{.*#+}} xmm2 = [0,5,6,0]
3555+
; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
35533556
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
35543557
; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
35553558
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -3631,15 +3634,19 @@ define void @vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3(ptr %i
36313634
;
36323635
; AVX-LABEL: vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3:
36333636
; AVX: # %bb.0:
3634-
; AVX-NEXT: vmovdqa 48(%rdi), %xmm0
3635-
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = mem[0,1],xmm0[2,3,4,5,6,7]
3636-
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3637-
; AVX-NEXT: vmovdqa (%rdi), %xmm1
3638-
; AVX-NEXT: vpaddb 32(%rsi), %xmm1, %xmm2
3639-
; AVX-NEXT: vpaddb 16(%rsi), %xmm1, %xmm1
3640-
; AVX-NEXT: vmovdqa %xmm1, 16(%rdx)
3637+
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
3638+
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],mem[1,2,3]
3639+
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
3640+
; AVX-NEXT: vmovdqa (%rdi), %xmm2
3641+
; AVX-NEXT: vmovdqa 16(%rdi), %xmm3
3642+
; AVX-NEXT: vpaddb 48(%rsi), %xmm3, %xmm3
3643+
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
3644+
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
36413645
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
3642-
; AVX-NEXT: vmovdqa %xmm0, (%rdx)
3646+
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
3647+
; AVX-NEXT: vmovdqa %xmm3, 48(%rdx)
3648+
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
3649+
; AVX-NEXT: vzeroupper
36433650
; AVX-NEXT: retq
36443651
;
36453652
; AVX2-LABEL: vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3:
@@ -3701,25 +3708,26 @@ define void @vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3(ptr %i
37013708
define void @vec384_i32_widen_to_i192_factor6_broadcast_to_v2i192_factor2(ptr %in.elt.ptr, ptr %out.vec.bias.ptr, ptr %out.vec.ptr) nounwind {
37023709
; SSE2-LABEL: vec384_i32_widen_to_i192_factor6_broadcast_to_v2i192_factor2:
37033710
; SSE2: # %bb.0:
3704-
; SSE2-NEXT: movdqa (%rdi), %xmm0
3711+
; SSE2-NEXT: movaps (%rdi), %xmm0
37053712
; SSE2-NEXT: movaps 48(%rdi), %xmm1
3706-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
37073713
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
3708-
; SSE2-NEXT: paddb 16(%rsi), %xmm2
3714+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
3715+
; SSE2-NEXT: paddb 16(%rsi), %xmm0
37093716
; SSE2-NEXT: paddb (%rsi), %xmm1
37103717
; SSE2-NEXT: movdqa %xmm1, (%rdx)
3711-
; SSE2-NEXT: movdqa %xmm2, 16(%rdx)
3718+
; SSE2-NEXT: movdqa %xmm0, 16(%rdx)
37123719
; SSE2-NEXT: retq
37133720
;
37143721
; SSE42-LABEL: vec384_i32_widen_to_i192_factor6_broadcast_to_v2i192_factor2:
37153722
; SSE42: # %bb.0:
37163723
; SSE42-NEXT: movdqa (%rdi), %xmm0
3717-
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
3718-
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3,4,5,6,7]
3719-
; SSE42-NEXT: paddb 16(%rsi), %xmm1
3720-
; SSE42-NEXT: paddb (%rsi), %xmm0
3721-
; SSE42-NEXT: movdqa %xmm0, (%rdx)
3722-
; SSE42-NEXT: movdqa %xmm1, 16(%rdx)
3724+
; SSE42-NEXT: movdqa 48(%rdi), %xmm1
3725+
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
3726+
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
3727+
; SSE42-NEXT: paddb 16(%rsi), %xmm0
3728+
; SSE42-NEXT: paddb (%rsi), %xmm1
3729+
; SSE42-NEXT: movdqa %xmm1, (%rdx)
3730+
; SSE42-NEXT: movdqa %xmm0, 16(%rdx)
37233731
; SSE42-NEXT: retq
37243732
;
37253733
; AVX-LABEL: vec384_i32_widen_to_i192_factor6_broadcast_to_v2i192_factor2:
@@ -3812,15 +3820,19 @@ define void @vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3(ptr %i
38123820
;
38133821
; AVX-LABEL: vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3:
38143822
; AVX: # %bb.0:
3815-
; AVX-NEXT: vmovdqa 48(%rdi), %xmm0
3816-
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = mem[0,1,2,3],xmm0[4,5,6,7]
3817-
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3818-
; AVX-NEXT: vmovdqa (%rdi), %xmm1
3819-
; AVX-NEXT: vpaddb 32(%rsi), %xmm1, %xmm2
3820-
; AVX-NEXT: vpaddb 16(%rsi), %xmm1, %xmm1
3821-
; AVX-NEXT: vmovdqa %xmm1, 16(%rdx)
3823+
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
3824+
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3]
3825+
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
3826+
; AVX-NEXT: vmovdqa (%rdi), %xmm2
3827+
; AVX-NEXT: vmovdqa 16(%rdi), %xmm3
3828+
; AVX-NEXT: vpaddb 48(%rsi), %xmm3, %xmm3
3829+
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
3830+
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
38223831
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
3823-
; AVX-NEXT: vmovdqa %xmm0, (%rdx)
3832+
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
3833+
; AVX-NEXT: vmovdqa %xmm3, 48(%rdx)
3834+
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
3835+
; AVX-NEXT: vzeroupper
38243836
; AVX-NEXT: retq
38253837
;
38263838
; AVX2-LABEL: vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3:

llvm/test/CodeGen/X86/buildvec-extract.ll

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) {
6969
define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
7070
; SSE2-LABEL: extract1_i32_zext_insert0_i64_zero:
7171
; SSE2: # %bb.0:
72-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
73-
; SSE2-NEXT: pxor %xmm0, %xmm0
74-
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
72+
; SSE2-NEXT: xorps %xmm1, %xmm1
73+
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[1,0]
74+
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
7575
; SSE2-NEXT: retq
7676
;
7777
; SSE41-LABEL: extract1_i32_zext_insert0_i64_zero:
@@ -114,9 +114,9 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) {
114114
define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
115115
; SSE2-LABEL: extract2_i32_zext_insert0_i64_zero:
116116
; SSE2: # %bb.0:
117-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
118-
; SSE2-NEXT: pxor %xmm0, %xmm0
119-
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
117+
; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
118+
; SSE2-NEXT: xorps %xmm1, %xmm1
119+
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
120120
; SSE2-NEXT: retq
121121
;
122122
; SSE41-LABEL: extract2_i32_zext_insert0_i64_zero:
@@ -375,8 +375,7 @@ define <2 x i64> @extract0_i16_zext_insert0_i64_undef(<8 x i16> %x) {
375375
define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) {
376376
; SSE2-LABEL: extract0_i16_zext_insert0_i64_zero:
377377
; SSE2: # %bb.0:
378-
; SSE2-NEXT: pextrw $0, %xmm0, %eax
379-
; SSE2-NEXT: movd %eax, %xmm0
378+
; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
380379
; SSE2-NEXT: retq
381380
;
382381
; SSE41-LABEL: extract0_i16_zext_insert0_i64_zero:
@@ -417,14 +416,14 @@ define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) {
417416
define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) {
418417
; SSE-LABEL: extract1_i16_zext_insert0_i64_zero:
419418
; SSE: # %bb.0:
420-
; SSE-NEXT: pextrw $1, %xmm0, %eax
421-
; SSE-NEXT: movd %eax, %xmm0
419+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
420+
; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
422421
; SSE-NEXT: retq
423422
;
424423
; AVX-LABEL: extract1_i16_zext_insert0_i64_zero:
425424
; AVX: # %bb.0:
426-
; AVX-NEXT: vpextrw $1, %xmm0, %eax
427-
; AVX-NEXT: vmovd %eax, %xmm0
425+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
426+
; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
428427
; AVX-NEXT: retq
429428
%e = extractelement <8 x i16> %x, i32 1
430429
%z = zext i16 %e to i64
@@ -453,14 +452,14 @@ define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) {
453452
define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) {
454453
; SSE-LABEL: extract2_i16_zext_insert0_i64_zero:
455454
; SSE: # %bb.0:
456-
; SSE-NEXT: pextrw $2, %xmm0, %eax
457-
; SSE-NEXT: movd %eax, %xmm0
455+
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
456+
; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
458457
; SSE-NEXT: retq
459458
;
460459
; AVX-LABEL: extract2_i16_zext_insert0_i64_zero:
461460
; AVX: # %bb.0:
462-
; AVX-NEXT: vpextrw $2, %xmm0, %eax
463-
; AVX-NEXT: vmovd %eax, %xmm0
461+
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
462+
; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
464463
; AVX-NEXT: retq
465464
%e = extractelement <8 x i16> %x, i32 2
466465
%z = zext i16 %e to i64
@@ -487,14 +486,14 @@ define <2 x i64> @extract3_i16_zext_insert0_i64_undef(<8 x i16> %x) {
487486
define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) {
488487
; SSE-LABEL: extract3_i16_zext_insert0_i64_zero:
489488
; SSE: # %bb.0:
490-
; SSE-NEXT: pextrw $3, %xmm0, %eax
491-
; SSE-NEXT: movd %eax, %xmm0
489+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
490+
; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
492491
; SSE-NEXT: retq
493492
;
494493
; AVX-LABEL: extract3_i16_zext_insert0_i64_zero:
495494
; AVX: # %bb.0:
496-
; AVX-NEXT: vpextrw $3, %xmm0, %eax
497-
; AVX-NEXT: vmovd %eax, %xmm0
495+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
496+
; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
498497
; AVX-NEXT: retq
499498
%e = extractelement <8 x i16> %x, i32 3
500499
%z = zext i16 %e to i64

llvm/test/CodeGen/X86/movmsk-bittest.ll

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -219,23 +219,14 @@ define i32 @movmsk_sgt_v16i8_15(<16 x i8> %v, i32 %a, i32 %b) {
219219
}
220220

221221
define i32 @movmsk_eq_v4i64_0(<4 x i64> %v, i32 %a, i32 %b) {
222-
; SSE2-LABEL: movmsk_eq_v4i64_0:
223-
; SSE2: # %bb.0:
224-
; SSE2-NEXT: movl %edi, %eax
225-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
226-
; SSE2-NEXT: movmskps %xmm0, %ecx
227-
; SSE2-NEXT: testb $1, %cl
228-
; SSE2-NEXT: cmovel %esi, %eax
229-
; SSE2-NEXT: retq
230-
;
231-
; SSE41-LABEL: movmsk_eq_v4i64_0:
232-
; SSE41: # %bb.0:
233-
; SSE41-NEXT: movl %edi, %eax
234-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
235-
; SSE41-NEXT: movmskps %xmm0, %ecx
236-
; SSE41-NEXT: testb $1, %cl
237-
; SSE41-NEXT: cmovel %esi, %eax
238-
; SSE41-NEXT: retq
222+
; SSE-LABEL: movmsk_eq_v4i64_0:
223+
; SSE: # %bb.0:
224+
; SSE-NEXT: movl %edi, %eax
225+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
226+
; SSE-NEXT: movmskps %xmm0, %ecx
227+
; SSE-NEXT: testb $1, %cl
228+
; SSE-NEXT: cmovel %esi, %eax
229+
; SSE-NEXT: retq
239230
;
240231
; AVX-LABEL: movmsk_eq_v4i64_0:
241232
; AVX: # %bb.0:
@@ -566,3 +557,5 @@ define i32 @movmsk_sgt_v32i8_31(<32 x i8> %v, i32 %a, i32 %b) {
566557

567558
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
568559
; AVX1OR2: {{.*}}
560+
; SSE2: {{.*}}
561+
; SSE41: {{.*}}
Lines changed: 15 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,21 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck -check-prefixes=CHECK,X32 %s
3-
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck -check-prefixes=CHECK,X64 %s
2+
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s
3+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s
44

55
define <4 x i64> @autogen_SD88863() {
6-
; X32-LABEL: autogen_SD88863:
7-
; X32: # %bb.0: # %BB
8-
; X32-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
9-
; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
10-
; X32-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[3]
11-
; X32-NEXT: movb $1, %al
12-
; X32-NEXT: .p2align 4
13-
; X32-NEXT: .LBB0_1: # %CF
14-
; X32-NEXT: # =>This Inner Loop Header: Depth=1
15-
; X32-NEXT: testb %al, %al
16-
; X32-NEXT: jne .LBB0_1
17-
; X32-NEXT: # %bb.2: # %CF240
18-
; X32-NEXT: retl
19-
;
20-
; X64-LABEL: autogen_SD88863:
21-
; X64: # %bb.0: # %BB
22-
; X64-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
23-
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
24-
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
25-
; X64-NEXT: movb $1, %al
26-
; X64-NEXT: .p2align 4
27-
; X64-NEXT: .LBB0_1: # %CF
28-
; X64-NEXT: # =>This Inner Loop Header: Depth=1
29-
; X64-NEXT: testb %al, %al
30-
; X64-NEXT: jne .LBB0_1
31-
; X64-NEXT: # %bb.2: # %CF240
32-
; X64-NEXT: retq
6+
; CHECK-LABEL: autogen_SD88863:
7+
; CHECK: # %bb.0: # %BB
8+
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
9+
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
10+
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[3]
11+
; CHECK-NEXT: movb $1, %al
12+
; CHECK-NEXT: .p2align 4
13+
; CHECK-NEXT: .LBB0_1: # %CF
14+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
15+
; CHECK-NEXT: testb %al, %al
16+
; CHECK-NEXT: jne .LBB0_1
17+
; CHECK-NEXT: # %bb.2: # %CF240
18+
; CHECK-NEXT: ret{{[l|q]}}
3319
BB:
3420
%I26 = insertelement <4 x i64> undef, i64 undef, i32 2
3521
br label %CF
@@ -43,5 +29,3 @@ CF:
4329
CF240:
4430
ret <4 x i64> %I68
4531
}
46-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
47-
; CHECK: {{.*}}

0 commit comments

Comments
 (0)