Skip to content

Commit 148a7ae

Browse files
authored
[X86] combineConcatVectorOps - add support for AVX1 and/or/xor/andnp concatenation (#130645)
Limit AVX1-only targets to cases where at least one operand is beneficial to concatenate, but avoid unnecessary 256-bit NOT patterns unless it allows further folds. There are still a large number of regressions when limiting AVX2+ targets to beneficial concatenation - so added a TODO for now.
1 parent 976e413 commit 148a7ae

21 files changed

+1743
-1811
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58311,11 +58311,21 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5831158311
case ISD::OR:
5831258312
case ISD::XOR:
5831358313
case X86ISD::ANDNP:
58314-
if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||
58314+
// TODO: AVX2+ targets should only use CombineSubOperand like AVX1.
58315+
if (!IsSplat && (VT.is256BitVector() ||
5831558316
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
58316-
return DAG.getNode(Op0.getOpcode(), DL, VT,
58317-
ConcatSubOperand(VT, Ops, 0),
58318-
ConcatSubOperand(VT, Ops, 1));
58317+
// Don't concatenate root AVX1 NOT patterns.
58318+
if (Op0.getOpcode() == ISD::XOR && Depth == 0 &&
58319+
!Subtarget.hasInt256() && llvm::all_of(Ops, [](SDValue X) {
58320+
return ISD::isBuildVectorAllOnes(X.getOperand(1).getNode());
58321+
}))
58322+
break;
58323+
SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
58324+
SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
58325+
if (Concat0 || Concat1 || Subtarget.hasInt256())
58326+
return DAG.getNode(Op0.getOpcode(), DL, VT,
58327+
Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
58328+
Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
5831958329
}
5832058330
break;
5832158331
case X86ISD::PCMPEQ:

llvm/test/CodeGen/X86/avx-shift.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -136,13 +136,11 @@ define <32 x i8> @vshift10(<32 x i8> %a) {
136136
define <32 x i8> @vshift11(<32 x i8> %a) {
137137
; CHECK-LABEL: vshift11:
138138
; CHECK: # %bb.0:
139-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
140-
; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1
141-
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
142-
; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
139+
; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm1
140+
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
143141
; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0
144-
; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
145-
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
142+
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
143+
; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
146144
; CHECK-NEXT: retq
147145
%s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
148146
ret <32 x i8> %s
@@ -151,13 +149,11 @@ define <32 x i8> @vshift11(<32 x i8> %a) {
151149
define <32 x i8> @vshift12(<32 x i8> %a) {
152150
; CHECK-LABEL: vshift12:
153151
; CHECK: # %bb.0:
154-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
155-
; CHECK-NEXT: vpsllw $2, %xmm1, %xmm1
156-
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
157-
; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
152+
; CHECK-NEXT: vpsllw $2, %xmm0, %xmm1
153+
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
158154
; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0
159-
; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
160-
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
155+
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
156+
; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
161157
; CHECK-NEXT: retq
162158
%s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
163159
ret <32 x i8> %s

llvm/test/CodeGen/X86/gfni-shifts.ll

Lines changed: 36 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,21 +1232,19 @@ define <32 x i8> @constant_shl_v32i8(<32 x i8> %a) nounwind {
12321232
;
12331233
; GFNIAVX1-LABEL: constant_shl_v32i8:
12341234
; GFNIAVX1: # %bb.0:
1235-
; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1236-
; GFNIAVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = [1,4,16,64,128,32,8,2]
1237-
; GFNIAVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm3
1238-
; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
1239-
; GFNIAVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
1240-
; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1]
1241-
; GFNIAVX1-NEXT: vpmaddubsw %xmm5, %xmm1, %xmm1
1235+
; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1]
1236+
; GFNIAVX1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2
1237+
; GFNIAVX1-NEXT: vpsllw $8, %xmm2, %xmm2
1238+
; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1239+
; GFNIAVX1-NEXT: vpmaddubsw %xmm1, %xmm3, %xmm1
12421240
; GFNIAVX1-NEXT: vpsllw $8, %xmm1, %xmm1
1243-
; GFNIAVX1-NEXT: vpor %xmm1, %xmm3, %xmm1
1244-
; GFNIAVX1-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm2
1245-
; GFNIAVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
1246-
; GFNIAVX1-NEXT: vpmaddubsw %xmm5, %xmm0, %xmm0
1247-
; GFNIAVX1-NEXT: vpsllw $8, %xmm0, %xmm0
1248-
; GFNIAVX1-NEXT: vpor %xmm0, %xmm2, %xmm0
1249-
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1241+
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
1242+
; GFNIAVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = [1,4,16,64,128,32,8,2]
1243+
; GFNIAVX1-NEXT: vpmaddubsw %xmm2, %xmm3, %xmm3
1244+
; GFNIAVX1-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0
1245+
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1246+
; GFNIAVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1247+
; GFNIAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
12501248
; GFNIAVX1-NEXT: retq
12511249
;
12521250
; GFNIAVX2-LABEL: constant_shl_v32i8:
@@ -2575,33 +2573,31 @@ define <64 x i8> @constant_shl_v64i8(<64 x i8> %a) nounwind {
25752573
;
25762574
; GFNIAVX1-LABEL: constant_shl_v64i8:
25772575
; GFNIAVX1: # %bb.0:
2578-
; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2579-
; GFNIAVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = [1,4,16,64,128,32,8,2]
2580-
; GFNIAVX1-NEXT: vpmaddubsw %xmm3, %xmm2, %xmm4
2581-
; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
2582-
; GFNIAVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
2583-
; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1]
2584-
; GFNIAVX1-NEXT: vpmaddubsw %xmm6, %xmm2, %xmm2
2585-
; GFNIAVX1-NEXT: vpsllw $8, %xmm2, %xmm2
2586-
; GFNIAVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
2587-
; GFNIAVX1-NEXT: vpmaddubsw %xmm3, %xmm0, %xmm4
2588-
; GFNIAVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
2589-
; GFNIAVX1-NEXT: vpmaddubsw %xmm6, %xmm0, %xmm0
2590-
; GFNIAVX1-NEXT: vpsllw $8, %xmm0, %xmm0
2591-
; GFNIAVX1-NEXT: vpor %xmm0, %xmm4, %xmm0
2592-
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2593-
; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2594-
; GFNIAVX1-NEXT: vpmaddubsw %xmm3, %xmm2, %xmm4
2595-
; GFNIAVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
2596-
; GFNIAVX1-NEXT: vpmaddubsw %xmm6, %xmm2, %xmm2
2576+
; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1]
2577+
; GFNIAVX1-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm3
2578+
; GFNIAVX1-NEXT: vpsllw $8, %xmm3, %xmm3
2579+
; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
2580+
; GFNIAVX1-NEXT: vpmaddubsw %xmm2, %xmm4, %xmm5
2581+
; GFNIAVX1-NEXT: vpsllw $8, %xmm5, %xmm5
2582+
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3
2583+
; GFNIAVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = [1,4,16,64,128,32,8,2]
2584+
; GFNIAVX1-NEXT: vpmaddubsw %xmm5, %xmm4, %xmm4
2585+
; GFNIAVX1-NEXT: vpmaddubsw %xmm5, %xmm0, %xmm0
2586+
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
2587+
; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} ymm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
2588+
; GFNIAVX1-NEXT: vandps %ymm4, %ymm0, %ymm0
2589+
; GFNIAVX1-NEXT: vorps %ymm3, %ymm0, %ymm0
2590+
; GFNIAVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm3
2591+
; GFNIAVX1-NEXT: vpsllw $8, %xmm3, %xmm3
2592+
; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
2593+
; GFNIAVX1-NEXT: vpmaddubsw %xmm2, %xmm6, %xmm2
25972594
; GFNIAVX1-NEXT: vpsllw $8, %xmm2, %xmm2
2598-
; GFNIAVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
2599-
; GFNIAVX1-NEXT: vpmaddubsw %xmm3, %xmm1, %xmm3
2600-
; GFNIAVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
2601-
; GFNIAVX1-NEXT: vpmaddubsw %xmm6, %xmm1, %xmm1
2602-
; GFNIAVX1-NEXT: vpsllw $8, %xmm1, %xmm1
2603-
; GFNIAVX1-NEXT: vpor %xmm1, %xmm3, %xmm1
2604-
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2595+
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
2596+
; GFNIAVX1-NEXT: vpmaddubsw %xmm5, %xmm6, %xmm3
2597+
; GFNIAVX1-NEXT: vpmaddubsw %xmm5, %xmm1, %xmm1
2598+
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
2599+
; GFNIAVX1-NEXT: vandps %ymm4, %ymm1, %ymm1
2600+
; GFNIAVX1-NEXT: vorps %ymm2, %ymm1, %ymm1
26052601
; GFNIAVX1-NEXT: retq
26062602
;
26072603
; GFNIAVX2-LABEL: constant_shl_v64i8:

llvm/test/CodeGen/X86/known-bits-vector.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -384,23 +384,19 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
384384
define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind {
385385
; X86-LABEL: knownbits_mask_concat_uitofp:
386386
; X86: # %bb.0:
387-
; X86-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3,1,3]
388-
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [131071,131071,131071,131071]
389-
; X86-NEXT: vandps %xmm2, %xmm1, %xmm1
390387
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,0,2]
391-
; X86-NEXT: vandps %xmm2, %xmm0, %xmm0
388+
; X86-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3,1,3]
392389
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
390+
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
393391
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
394392
; X86-NEXT: retl
395393
;
396394
; X64-LABEL: knownbits_mask_concat_uitofp:
397395
; X64: # %bb.0:
398-
; X64-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3,1,3]
399-
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [131071,131071,131071,131071]
400-
; X64-NEXT: vandps %xmm2, %xmm1, %xmm1
401396
; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,0,2]
402-
; X64-NEXT: vandps %xmm2, %xmm0, %xmm0
397+
; X64-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3,1,3]
403398
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
399+
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
404400
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
405401
; X64-NEXT: retq
406402
%1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1>

llvm/test/CodeGen/X86/pmul.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -959,7 +959,7 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind {
959959
;
960960
; AVX512F-LABEL: mul_v64i8:
961961
; AVX512F: # %bb.0: # %entry
962-
; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
962+
; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
963963
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
964964
; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm4
965965
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5

llvm/test/CodeGen/X86/vector-fshl-256.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2270,20 +2270,16 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no
22702270
define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
22712271
; AVX1-LABEL: splatconstant_funnnel_v32i8:
22722272
; AVX1: # %bb.0:
2273-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2274-
; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2
2275-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2276-
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
2273+
; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
2274+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
22772275
; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
2278-
; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
2279-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2280-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2281-
; AVX1-NEXT: vpsllw $4, %xmm2, %xmm2
2282-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
2283-
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
2276+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2277+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
2278+
; AVX1-NEXT: vpsllw $4, %xmm0, %xmm2
2279+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
22842280
; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0
2285-
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
2286-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2281+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2282+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
22872283
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
22882284
; AVX1-NEXT: retq
22892285
;

llvm/test/CodeGen/X86/vector-fshl-rot-256.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,19 +1623,17 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind {
16231623
define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
16241624
; AVX1-LABEL: splatconstant_funnnel_v32i8:
16251625
; AVX1: # %bb.0:
1626-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1627-
; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
1628-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1629-
; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
1630-
; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
1631-
; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
1632-
; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1633-
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2
1634-
; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
1626+
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
1627+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1628+
; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3
1629+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
1630+
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1631+
; AVX1-NEXT: vandnps %ymm1, %ymm3, %ymm1
16351632
; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0
1636-
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
1637-
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1638-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1633+
; AVX1-NEXT: vpsllw $4, %xmm2, %xmm2
1634+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1635+
; AVX1-NEXT: vandps %ymm3, %ymm0, %ymm0
1636+
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
16391637
; AVX1-NEXT: retq
16401638
;
16411639
; AVX2-LABEL: splatconstant_funnnel_v32i8:

llvm/test/CodeGen/X86/vector-fshr-256.ll

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1625,32 +1625,30 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
16251625
; AVX1-NEXT: vpmullw %xmm6, %xmm2, %xmm2
16261626
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
16271627
; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2
1628-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
1629-
; AVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm4
1630-
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm7 = [128,32,8,2,128,2,8,32]
1631-
; AVX1-NEXT: vpmaddubsw %xmm7, %xmm4, %xmm8
1632-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm9 = [255,255,255,255,255,255,255,255]
1633-
; AVX1-NEXT: vpand %xmm9, %xmm8, %xmm8
1634-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm10 = [0,64,0,16,0,4,0,1,0,1,0,4,0,16,0,64]
1635-
; AVX1-NEXT: vpmaddubsw %xmm10, %xmm4, %xmm4
1636-
; AVX1-NEXT: vpsllw $8, %xmm4, %xmm4
1637-
; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm4
1638-
; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
16391628
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm3[8],xmm1[9],xmm3[9],xmm1[10],xmm3[10],xmm1[11],xmm3[11],xmm1[12],xmm3[12],xmm1[13],xmm3[13],xmm1[14],xmm3[14],xmm1[15],xmm3[15]
16401629
; AVX1-NEXT: vpmullw %xmm5, %xmm3, %xmm3
16411630
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
16421631
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
16431632
; AVX1-NEXT: vpmullw %xmm6, %xmm1, %xmm1
16441633
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
16451634
; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
1635+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1636+
; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm2
1637+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,64,0,16,0,4,0,1,0,1,0,4,0,16,0,64]
1638+
; AVX1-NEXT: vpmaddubsw %xmm3, %xmm2, %xmm4
1639+
; AVX1-NEXT: vpsllw $8, %xmm4, %xmm4
1640+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
16461641
; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm0
1647-
; AVX1-NEXT: vpmaddubsw %xmm7, %xmm0, %xmm3
1648-
; AVX1-NEXT: vpand %xmm3, %xmm9, %xmm3
1649-
; AVX1-NEXT: vpmaddubsw %xmm10, %xmm0, %xmm0
1650-
; AVX1-NEXT: vpsllw $8, %xmm0, %xmm0
1651-
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
1652-
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1653-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1642+
; AVX1-NEXT: vpmaddubsw %xmm3, %xmm0, %xmm3
1643+
; AVX1-NEXT: vpsllw $8, %xmm3, %xmm3
1644+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
1645+
; AVX1-NEXT: vorps %ymm1, %ymm3, %ymm1
1646+
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = [128,32,8,2,128,2,8,32]
1647+
; AVX1-NEXT: vpmaddubsw %xmm3, %xmm0, %xmm0
1648+
; AVX1-NEXT: vpmaddubsw %xmm3, %xmm2, %xmm2
1649+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1650+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1651+
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
16541652
; AVX1-NEXT: retq
16551653
;
16561654
; AVX2-LABEL: constant_funnnel_v32i8:
@@ -2058,20 +2056,16 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no
20582056
define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
20592057
; AVX1-LABEL: splatconstant_funnnel_v32i8:
20602058
; AVX1: # %bb.0:
2061-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2062-
; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2
2063-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2064-
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
2059+
; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
2060+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
20652061
; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
2066-
; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
2067-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2068-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2069-
; AVX1-NEXT: vpsllw $4, %xmm2, %xmm2
2070-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
2071-
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
2062+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2063+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
2064+
; AVX1-NEXT: vpsllw $4, %xmm0, %xmm2
2065+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
20722066
; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0
2073-
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
2074-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2067+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2068+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
20752069
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
20762070
; AVX1-NEXT: retq
20772071
;

llvm/test/CodeGen/X86/vector-fshr-rot-256.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1674,19 +1674,17 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind {
16741674
define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
16751675
; AVX1-LABEL: splatconstant_funnnel_v32i8:
16761676
; AVX1: # %bb.0:
1677-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1678-
; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
1679-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1680-
; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
1681-
; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
1682-
; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
1683-
; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
1684-
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2
1685-
; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
1677+
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
1678+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1679+
; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3
1680+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
1681+
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1682+
; AVX1-NEXT: vandnps %ymm1, %ymm3, %ymm1
16861683
; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0
1687-
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
1688-
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
1689-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1684+
; AVX1-NEXT: vpsllw $4, %xmm2, %xmm2
1685+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1686+
; AVX1-NEXT: vandps %ymm3, %ymm0, %ymm0
1687+
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
16901688
; AVX1-NEXT: retq
16911689
;
16921690
; AVX2-LABEL: splatconstant_funnnel_v32i8:

0 commit comments

Comments
 (0)