Skip to content

Commit 4b130b8

Browse files
committed
[X86][SSE] SimplifyDemandedVectorEltsForTargetNode - reduce vector width of X86ISD::BLENDI
If we don't need the upper subvector elements of the BLENDI node then use a smaller vector size. This causes a couple of minor regressions in insertelement-ones.ll which are more examples of PR26018; given how cheap allones generation is I don't consider that a showstopper, just an annoyance (and there's plenty of other poor codegen cases in that file).
1 parent 3dcc0db commit 4b130b8

14 files changed

+79
-62
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36400,6 +36400,23 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3640036400
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
3640136401
return TLO.CombineTo(Op, Insert);
3640236402
}
36403+
// Vector blend by immediate.
36404+
case X86ISD::BLENDI: {
36405+
SDLoc DL(Op);
36406+
MVT ExtVT = VT.getSimpleVT();
36407+
ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
36408+
ExtSizeInBits / ExtVT.getScalarSizeInBits());
36409+
SDValue Ext0 =
36410+
extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
36411+
SDValue Ext1 =
36412+
extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits);
36413+
SDValue ExtOp =
36414+
TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1, Op.getOperand(2));
36415+
SDValue UndefVec = TLO.DAG.getUNDEF(VT);
36416+
SDValue Insert =
36417+
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
36418+
return TLO.CombineTo(Op, Insert);
36419+
}
3640336420
}
3640436421
}
3640536422

llvm/test/CodeGen/X86/insertelement-ones.ll

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -290,18 +290,20 @@ define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
290290
;
291291
; AVX2-LABEL: insert_v16i16_x12345x789ABCDEx:
292292
; AVX2: # %bb.0:
293-
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
294-
; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7],ymm1[8],ymm0[9,10,11,12,13],ymm1[14],ymm0[15]
295-
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
296-
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
293+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
294+
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
295+
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
296+
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
297+
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
297298
; AVX2-NEXT: retq
298299
;
299300
; AVX512F-LABEL: insert_v16i16_x12345x789ABCDEx:
300301
; AVX512F: # %bb.0:
301-
; AVX512F-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
302-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7],ymm1[8],ymm0[9,10,11,12,13],ymm1[14],ymm0[15]
303-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
304-
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
302+
; AVX512F-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
303+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
304+
; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
305+
; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
306+
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
305307
; AVX512F-NEXT: retq
306308
;
307309
; AVX512VL-LABEL: insert_v16i16_x12345x789ABCDEx:

llvm/test/CodeGen/X86/pr31956.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,10 @@ target triple = "x86_64-scei-ps4"
99
define <4 x float> @foo() {
1010
; CHECK-LABEL: foo:
1111
; CHECK: # %bb.0: # %entry
12-
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
13-
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2,3,4,5,6,7]
12+
; CHECK-NEXT: vmovaps {{.*}}(%rip), %xmm0
13+
; CHECK-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1414
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0],mem[0,2]
1515
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
16-
; CHECK-NEXT: vzeroupper
1716
; CHECK-NEXT: retq
1817
entry:
1918
%V = load <2 x float>, <2 x float>* @G1, align 8

llvm/test/CodeGen/X86/vector-fshl-256.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1907,7 +1907,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
19071907
; AVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1
19081908
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm2
19091909
; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
1910-
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
1910+
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
19111911
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
19121912
; AVX2-NEXT: retq
19131913
;
@@ -1917,7 +1917,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
19171917
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1
19181918
; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm2
19191919
; AVX512F-NEXT: vpor %ymm1, %ymm2, %ymm1
1920-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
1920+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
19211921
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
19221922
; AVX512F-NEXT: retq
19231923
;
@@ -1927,7 +1927,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
19271927
; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1
19281928
; AVX512VL-NEXT: vpmullw %ymm2, %ymm0, %ymm2
19291929
; AVX512VL-NEXT: vpor %ymm1, %ymm2, %ymm1
1930-
; AVX512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
1930+
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
19311931
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
19321932
; AVX512VL-NEXT: retq
19331933
;
@@ -1940,7 +1940,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
19401940
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
19411941
; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm2
19421942
; AVX512BW-NEXT: vpor %ymm1, %ymm2, %ymm1
1943-
; AVX512BW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
1943+
; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
19441944
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
19451945
; AVX512BW-NEXT: retq
19461946
;
@@ -1953,7 +1953,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
19531953
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
19541954
; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm2
19551955
; AVX512VBMI2-NEXT: vpor %ymm1, %ymm2, %ymm1
1956-
; AVX512VBMI2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
1956+
; AVX512VBMI2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
19571957
; AVX512VBMI2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
19581958
; AVX512VBMI2-NEXT: retq
19591959
;
@@ -1992,7 +1992,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
19921992
; XOPAVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1
19931993
; XOPAVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm2
19941994
; XOPAVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
1995-
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
1995+
; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
19961996
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
19971997
; XOPAVX2-NEXT: retq
19981998
%res = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)

llvm/test/CodeGen/X86/vector-fshl-512.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,12 +1078,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin
10781078
; AVX512F-NEXT: vpmulhuw %ymm4, %ymm3, %ymm3
10791079
; AVX512F-NEXT: vpmullw %ymm4, %ymm2, %ymm5
10801080
; AVX512F-NEXT: vpor %ymm3, %ymm5, %ymm3
1081-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3,4,5,6,7],ymm2[8],ymm3[9,10,11,12,13,14,15]
1081+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4,5,6,7]
10821082
; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
10831083
; AVX512F-NEXT: vpmulhuw %ymm4, %ymm1, %ymm1
10841084
; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm3
10851085
; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
1086-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
1086+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
10871087
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
10881088
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
10891089
; AVX512F-NEXT: retq
@@ -1096,12 +1096,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin
10961096
; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm3, %ymm3
10971097
; AVX512VL-NEXT: vpmullw %ymm4, %ymm2, %ymm5
10981098
; AVX512VL-NEXT: vpor %ymm3, %ymm5, %ymm3
1099-
; AVX512VL-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3,4,5,6,7],ymm2[8],ymm3[9,10,11,12,13,14,15]
1099+
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4,5,6,7]
11001100
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
11011101
; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm1, %ymm1
11021102
; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm3
11031103
; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
1104-
; AVX512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
1104+
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
11051105
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
11061106
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
11071107
; AVX512VL-NEXT: retq

llvm/test/CodeGen/X86/vector-fshl-rot-512.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -523,13 +523,13 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
523523
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
524524
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = <u,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768>
525525
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
526-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm4 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15]
526+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7]
527527
; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
528528
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
529529
; AVX512F-NEXT: vpmullw %ymm4, %ymm1, %ymm1
530530
; AVX512F-NEXT: vpor %ymm3, %ymm1, %ymm1
531531
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2
532-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
532+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0],xmm2[1,2,3,4,5,6,7]
533533
; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
534534
; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm0
535535
; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
@@ -541,13 +541,13 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
541541
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
542542
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768>
543543
; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
544-
; AVX512VL-NEXT: vpblendw {{.*#+}} ymm4 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15]
544+
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7]
545545
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
546546
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
547547
; AVX512VL-NEXT: vpmullw %ymm4, %ymm1, %ymm1
548548
; AVX512VL-NEXT: vpor %ymm3, %ymm1, %ymm1
549549
; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2
550-
; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
550+
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0],xmm2[1,2,3,4,5,6,7]
551551
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
552552
; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm0
553553
; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0

llvm/test/CodeGen/X86/vector-fshr-256.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1910,7 +1910,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
19101910
; AVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
19111911
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
19121912
; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
1913-
; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
1913+
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
19141914
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
19151915
; AVX2-NEXT: retq
19161916
;
@@ -1920,7 +1920,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
19201920
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
19211921
; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm0
19221922
; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
1923-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
1923+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
19241924
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
19251925
; AVX512F-NEXT: retq
19261926
;
@@ -1930,7 +1930,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
19301930
; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
19311931
; AVX512VL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
19321932
; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
1933-
; AVX512VL-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
1933+
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
19341934
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
19351935
; AVX512VL-NEXT: retq
19361936
;
@@ -1943,7 +1943,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
19431943
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = <u,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1>
19441944
; AVX512BW-NEXT: vpsllvw %zmm3, %zmm0, %zmm0
19451945
; AVX512BW-NEXT: vpor %ymm2, %ymm0, %ymm0
1946-
; AVX512BW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
1946+
; AVX512BW-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
19471947
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
19481948
; AVX512BW-NEXT: retq
19491949
;
@@ -1956,7 +1956,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
19561956
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = <u,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1>
19571957
; AVX512VBMI2-NEXT: vpsllvw %zmm3, %zmm0, %zmm0
19581958
; AVX512VBMI2-NEXT: vpor %ymm2, %ymm0, %ymm0
1959-
; AVX512VBMI2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
1959+
; AVX512VBMI2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
19601960
; AVX512VBMI2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
19611961
; AVX512VBMI2-NEXT: retq
19621962
;
@@ -1995,7 +1995,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
19951995
; XOPAVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
19961996
; XOPAVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
19971997
; XOPAVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
1998-
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
1998+
; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
19991999
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
20002000
; XOPAVX2-NEXT: retq
20012001
%res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)

llvm/test/CodeGen/X86/vector-fshr-512.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,12 +1066,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin
10661066
; AVX512F-NEXT: vpmulhuw %ymm4, %ymm3, %ymm5
10671067
; AVX512F-NEXT: vpmullw %ymm4, %ymm2, %ymm2
10681068
; AVX512F-NEXT: vpor %ymm5, %ymm2, %ymm2
1069-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm2[1,2,3,4,5,6,7],ymm3[8],ymm2[9,10,11,12,13,14,15]
1069+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3,4,5,6,7]
10701070
; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
10711071
; AVX512F-NEXT: vpmulhuw %ymm4, %ymm1, %ymm3
10721072
; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm0
10731073
; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
1074-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
1074+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
10751075
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
10761076
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
10771077
; AVX512F-NEXT: retq
@@ -1084,12 +1084,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin
10841084
; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm3, %ymm5
10851085
; AVX512VL-NEXT: vpmullw %ymm4, %ymm2, %ymm2
10861086
; AVX512VL-NEXT: vpor %ymm5, %ymm2, %ymm2
1087-
; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm2[1,2,3,4,5,6,7],ymm3[8],ymm2[9,10,11,12,13,14,15]
1087+
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3,4,5,6,7]
10881088
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
10891089
; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm1, %ymm3
10901090
; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm0
10911091
; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
1092-
; AVX512VL-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
1092+
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
10931093
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
10941094
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
10951095
; AVX512VL-NEXT: retq

llvm/test/CodeGen/X86/vector-fshr-rot-512.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -543,13 +543,13 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
543543
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
544544
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
545545
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
546-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm4 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15]
546+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7]
547547
; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
548548
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
549549
; AVX512F-NEXT: vpmullw %ymm4, %ymm1, %ymm1
550550
; AVX512F-NEXT: vpor %ymm3, %ymm1, %ymm1
551551
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2
552-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
552+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0],xmm2[1,2,3,4,5,6,7]
553553
; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
554554
; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm0
555555
; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
@@ -561,13 +561,13 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
561561
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
562562
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
563563
; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
564-
; AVX512VL-NEXT: vpblendw {{.*#+}} ymm4 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15]
564+
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7]
565565
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
566566
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
567567
; AVX512VL-NEXT: vpmullw %ymm4, %ymm1, %ymm1
568568
; AVX512VL-NEXT: vpor %ymm3, %ymm1, %ymm1
569569
; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2
570-
; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
570+
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0],xmm2[1,2,3,4,5,6,7]
571571
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
572572
; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm0
573573
; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0

llvm/test/CodeGen/X86/vector-shift-ashr-256.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,9 +1184,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
11841184
; AVX2-LABEL: constant_shift_v16i16:
11851185
; AVX2: # %bb.0:
11861186
; AVX2-NEXT: vpmulhw {{.*}}(%rip), %ymm0, %ymm1
1187-
; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
1187+
; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7]
11881188
; AVX2-NEXT: vpsraw $1, %xmm0, %xmm0
1189-
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7,8],ymm0[9],ymm2[10,11,12,13,14,15]
1189+
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7]
11901190
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
11911191
; AVX2-NEXT: retq
11921192
;
@@ -1247,9 +1247,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
12471247
; X32-AVX2-LABEL: constant_shift_v16i16:
12481248
; X32-AVX2: # %bb.0:
12491249
; X32-AVX2-NEXT: vpmulhw {{\.LCPI.*}}, %ymm0, %ymm1
1250-
; X32-AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
1250+
; X32-AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7]
12511251
; X32-AVX2-NEXT: vpsraw $1, %xmm0, %xmm0
1252-
; X32-AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7,8],ymm0[9],ymm2[10,11,12,13,14,15]
1252+
; X32-AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7]
12531253
; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
12541254
; X32-AVX2-NEXT: retl
12551255
%shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>

0 commit comments

Comments
 (0)