Skip to content

Commit 750d009

Browse files
authored
[X86] combineConcatVectorOps - add concatenation handling for BITCAST nodes (#133913)
These nodes are effectively free, so we should only concatenate if the inner nodes will concatenate together. This also exposed a regression in canonicalizeShuffleWithOp that failed to realize it could potentially merge shuffles with a CONCAT_VECTORS node.
1 parent 2f41fa3 commit 750d009

13 files changed

+3349
-3468
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41704,6 +41704,7 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
4170441704
getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op)) ||
4170541705
(Op.getOpcode() == Opc && Op->hasOneUse()) ||
4170641706
(Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op->hasOneUse()) ||
41707+
(Op.getOpcode() == ISD::CONCAT_VECTORS && Op->hasOneUse()) ||
4170741708
(FoldShuf && isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) ||
4170841709
DAG.isSplatValue(Op, /*AllowUndefs*/ false);
4170941710
};
@@ -58134,6 +58135,30 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5813458135

5813558136
unsigned Opcode = Op0.getOpcode();
5813658137
switch (Opcode) {
58138+
case ISD::BITCAST: {
58139+
// TODO: Support AVX1/AVX2 bitcasts.
58140+
SmallVector<SDValue, 4> SubOps;
58141+
for (SDValue SubOp : Ops)
58142+
SubOps.push_back(peekThroughBitcasts(SubOp.getOperand(0)));
58143+
EVT InnerVT = SubOps[0].getValueType();
58144+
unsigned InnerSizeInBits = InnerVT.getScalarSizeInBits();
58145+
if (!IsSplat && InnerVT.isSimple() && InnerVT.isVector() &&
58146+
(Subtarget.hasBWI() ||
58147+
(EltSizeInBits >= 32 && InnerSizeInBits >= 32)) &&
58148+
((VT.is256BitVector() && Subtarget.hasVLX()) ||
58149+
(VT.is512BitVector() && Subtarget.useAVX512Regs())) &&
58150+
llvm::all_of(SubOps, [InnerVT](SDValue Op) {
58151+
return Op.getValueType() == InnerVT;
58152+
})) {
58153+
MVT ConcatSVT = InnerVT.getScalarType().getSimpleVT();
58154+
MVT ConcatVT = MVT::getVectorVT(
58155+
ConcatSVT, VT.getSizeInBits() / ConcatSVT.getSizeInBits());
58156+
if (SDValue ConcatSrc = combineConcatVectorOps(
58157+
DL, ConcatVT, SubOps, DAG, Subtarget, Depth + 1))
58158+
return DAG.getBitcast(VT, ConcatSrc);
58159+
}
58160+
break;
58161+
}
5813758162
case ISD::VECTOR_SHUFFLE: {
5813858163
// TODO: Generalize NumOps support.
5813958164
if (!IsSplat && NumOps == 2 &&

llvm/test/CodeGen/X86/shift-i512.ll

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -123,20 +123,18 @@ define <8 x i64> @lshr_i512_1(<8 x i64> %a) {
123123
;
124124
; AVX512VBMI-LABEL: lshr_i512_1:
125125
; AVX512VBMI: # %bb.0:
126-
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm1
127-
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
128-
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm3
126+
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
127+
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm2
128+
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm3
129129
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
130130
; AVX512VBMI-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4
131-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
132-
; AVX512VBMI-NEXT: vpshldq $63, %xmm5, %xmm3, %xmm3
133-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
134131
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
135-
; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm1, %xmm2
136-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
137-
; AVX512VBMI-NEXT: vpsrlq $1, %xmm1, %xmm1
138-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
139-
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
132+
; AVX512VBMI-NEXT: vpsrlq $1, %xmm2, %xmm2
133+
; AVX512VBMI-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
134+
; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
135+
; AVX512VBMI-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
136+
; AVX512VBMI-NEXT: vpshldq $63, %ymm3, %ymm1, %ymm1
137+
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
140138
; AVX512VBMI-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
141139
; AVX512VBMI-NEXT: vpshldq $63, %zmm0, %zmm2, %zmm0
142140
; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
@@ -238,20 +236,18 @@ define <8 x i64> @ashr_i512_1(<8 x i64> %a) {
238236
;
239237
; AVX512VBMI-LABEL: ashr_i512_1:
240238
; AVX512VBMI: # %bb.0:
241-
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm1
242-
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
243-
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm3
239+
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
240+
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm2
241+
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm3
244242
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
245243
; AVX512VBMI-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4
246-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
247-
; AVX512VBMI-NEXT: vpshldq $63, %xmm5, %xmm3, %xmm3
248-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
249244
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
250-
; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm1, %xmm2
251-
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
252-
; AVX512VBMI-NEXT: vpsraq $1, %xmm1, %xmm1
253-
; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
254-
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
245+
; AVX512VBMI-NEXT: vpsraq $1, %xmm2, %xmm2
246+
; AVX512VBMI-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
247+
; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
248+
; AVX512VBMI-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
249+
; AVX512VBMI-NEXT: vpshldq $63, %ymm3, %ymm1, %ymm1
250+
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
255251
; AVX512VBMI-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
256252
; AVX512VBMI-NEXT: vpshldq $63, %zmm0, %zmm2, %zmm0
257253
; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]

llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -297,23 +297,21 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
297297
;
298298
; AVX512BW-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
299299
; AVX512BW: # %bb.0:
300-
; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,4,16,21]
301-
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm2
302-
; AVX512BW-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,17,21,25,30,u,u,u,u,u,u,u,u]
303-
; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u,17,21,25,29,u,u,u,u,u,u,u,u,u,u,u,u]
304-
; AVX512BW-NEXT: vpermt2d %zmm2, %zmm1, %zmm0
300+
; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,4,8,13]
301+
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u,17,21,25,29,u,u,u,u,u,u,u,u,u,u,u,u,33,37,41,45,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,49,53,57,62,u,u,u,u,u,u,u,u]
302+
; AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm0
305303
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
306304
; AVX512BW-NEXT: vzeroupper
307305
; AVX512BW-NEXT: retq
308306
;
309307
; AVX512BWVL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
310308
; AVX512BWVL: # %bb.0:
311-
; AVX512BWVL-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,4,8,13]
309+
; AVX512BWVL-NEXT: vpmovsxbd {{.*#+}} xmm1 = [8,12,0,5]
312310
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
313-
; AVX512BWVL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,17,21,25,30,u,u,u,u,u,u,u,u]
314-
; AVX512BWVL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u,17,21,25,29,u,u,u,u,u,u,u,u,u,u,u,u]
315-
; AVX512BWVL-NEXT: vpermt2d %ymm2, %ymm1, %ymm0
316-
; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
311+
; AVX512BWVL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
312+
; AVX512BWVL-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,17,21,25,30,u,u,u,u,u,u,u,u,33,37,41,45,u,u,u,u,u,u,u,u,u,u,u,u,49,53,57,61,u,u,u,u,u,u,u,u,u,u,u,u]
313+
; AVX512BWVL-NEXT: vpermd %zmm0, %zmm1, %zmm0
314+
; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
317315
; AVX512BWVL-NEXT: vzeroupper
318316
; AVX512BWVL-NEXT: retq
319317
;

llvm/test/CodeGen/X86/vector-fshr-256.ll

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,13 +1228,14 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
12281228
;
12291229
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
12301230
; AVX512VBMI2: # %bb.0:
1231-
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,4,6,8,10,12,14,64,66,68,70,72,74,76,78,16,18,20,22,24,26,28,30,80,82,84,86,88,90,92,94]
1232-
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
1233-
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1234-
; AVX512VBMI2-NEXT: vpsrlw %xmm2, %ymm4, %ymm4
1235-
; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1236-
; AVX512VBMI2-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
1237-
; AVX512VBMI2-NEXT: vpermt2b %zmm4, %zmm3, %zmm0
1231+
; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1232+
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1233+
; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95]
1234+
; AVX512VBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3
1235+
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
1236+
; AVX512VBMI2-NEXT: vpsrlw %xmm0, %zmm3, %zmm0
1237+
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
1238+
; AVX512VBMI2-NEXT: vpermb %zmm0, %zmm1, %zmm0
12381239
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
12391240
; AVX512VBMI2-NEXT: retq
12401241
;
@@ -1251,16 +1252,29 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
12511252
; AVX512VLBW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
12521253
; AVX512VLBW-NEXT: retq
12531254
;
1254-
; AVX10-LABEL: splatvar_funnnel_v32i8:
1255-
; AVX10: # %bb.0:
1256-
; AVX10-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
1257-
; AVX10-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1258-
; AVX10-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
1259-
; AVX10-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1260-
; AVX10-NEXT: vpsrlw %xmm2, %ymm0, %ymm1
1261-
; AVX10-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
1262-
; AVX10-NEXT: vpermi2b %ymm3, %ymm1, %ymm0
1263-
; AVX10-NEXT: retq
1255+
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
1256+
; AVX512VLVBMI2: # %bb.0:
1257+
; AVX512VLVBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1258+
; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1259+
; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95,0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87]
1260+
; AVX512VLVBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3
1261+
; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
1262+
; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %zmm3, %zmm0
1263+
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [32,34,36,38,40,42,44,46,0,2,4,6,8,10,12,14,48,50,52,54,56,58,60,62,16,18,20,22,24,26,28,30]
1264+
; AVX512VLVBMI2-NEXT: vpermb %zmm0, %zmm1, %zmm0
1265+
; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1266+
; AVX512VLVBMI2-NEXT: retq
1267+
;
1268+
; AVX10_256-LABEL: splatvar_funnnel_v32i8:
1269+
; AVX10_256: # %bb.0:
1270+
; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
1271+
; AVX10_256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1272+
; AVX10_256-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
1273+
; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1274+
; AVX10_256-NEXT: vpsrlw %xmm2, %ymm0, %ymm1
1275+
; AVX10_256-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
1276+
; AVX10_256-NEXT: vpermi2b %ymm3, %ymm1, %ymm0
1277+
; AVX10_256-NEXT: retq
12641278
;
12651279
; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
12661280
; XOPAVX1: # %bb.0:

llvm/test/CodeGen/X86/vector-fshr-rot-256.ll

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -992,25 +992,26 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
992992
;
993993
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
994994
; AVX512VBMI2: # %bb.0:
995-
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,64,66,68,70,72,74,76,78,16,18,20,22,24,26,28,30,80,82,84,86,88,90,92,94]
996-
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
995+
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
996+
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
997+
; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
998+
; AVX512VBMI2-NEXT: vpermb %zmm0, %zmm3, %zmm0
997999
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
998-
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
999-
; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1000-
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
1001-
; AVX512VBMI2-NEXT: vpermt2b %zmm3, %zmm2, %zmm0
1000+
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
1001+
; AVX512VBMI2-NEXT: vpermb %zmm0, %zmm2, %zmm0
10021002
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
10031003
; AVX512VBMI2-NEXT: retq
10041004
;
10051005
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
10061006
; AVX512VLVBMI2: # %bb.0:
1007-
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1007+
; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1008+
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [32,34,36,38,40,42,44,46,0,2,4,6,8,10,12,14,48,50,52,54,56,58,60,62,16,18,20,22,24,26,28,30]
1009+
; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1010+
; AVX512VLVBMI2-NEXT: vpermb %zmm0, %zmm3, %zmm0
10081011
; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1009-
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
1010-
; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1011-
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %ymm0, %ymm1
1012-
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
1013-
; AVX512VLVBMI2-NEXT: vpermi2b %ymm2, %ymm1, %ymm0
1012+
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
1013+
; AVX512VLVBMI2-NEXT: vpermb %zmm0, %zmm2, %zmm0
1014+
; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
10141015
; AVX512VLVBMI2-NEXT: retq
10151016
;
10161017
; XOPAVX1-LABEL: splatvar_funnnel_v32i8:

llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -130,13 +130,14 @@ define void @store_i16_stride4_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
130130
;
131131
; AVX512BW-FCP-LABEL: store_i16_stride4_vf2:
132132
; AVX512BW-FCP: # %bb.0:
133-
; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %xmm0
134-
; AVX512BW-FCP-NEXT: vmovdqa (%rdx), %xmm1
135-
; AVX512BW-FCP-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
136-
; AVX512BW-FCP-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
137-
; AVX512BW-FCP-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,2,8,10,1,3,9,11]
138-
; AVX512BW-FCP-NEXT: vpermi2w %xmm1, %xmm0, %xmm2
139-
; AVX512BW-FCP-NEXT: vmovdqa %xmm2, (%r8)
133+
; AVX512BW-FCP-NEXT: vmovdqa (%rdx), %xmm0
134+
; AVX512BW-FCP-NEXT: vmovdqa (%rcx), %xmm1
135+
; AVX512BW-FCP-NEXT: vpmovsxbw {{.*#+}} xmm2 = [8,24,0,16,9,25,1,17]
136+
; AVX512BW-FCP-NEXT: vinserti128 $1, (%rsi), %ymm1, %ymm1
137+
; AVX512BW-FCP-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0
138+
; AVX512BW-FCP-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
139+
; AVX512BW-FCP-NEXT: vmovdqa %xmm0, (%r8)
140+
; AVX512BW-FCP-NEXT: vzeroupper
140141
; AVX512BW-FCP-NEXT: retq
141142
;
142143
; AVX512DQ-BW-LABEL: store_i16_stride4_vf2:
@@ -152,13 +153,14 @@ define void @store_i16_stride4_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
152153
;
153154
; AVX512DQ-BW-FCP-LABEL: store_i16_stride4_vf2:
154155
; AVX512DQ-BW-FCP: # %bb.0:
155-
; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %xmm0
156-
; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdx), %xmm1
157-
; AVX512DQ-BW-FCP-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
158-
; AVX512DQ-BW-FCP-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
159-
; AVX512DQ-BW-FCP-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,2,8,10,1,3,9,11]
160-
; AVX512DQ-BW-FCP-NEXT: vpermi2w %xmm1, %xmm0, %xmm2
161-
; AVX512DQ-BW-FCP-NEXT: vmovdqa %xmm2, (%r8)
156+
; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdx), %xmm0
157+
; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rcx), %xmm1
158+
; AVX512DQ-BW-FCP-NEXT: vpmovsxbw {{.*#+}} xmm2 = [8,24,0,16,9,25,1,17]
159+
; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, (%rsi), %ymm1, %ymm1
160+
; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0
161+
; AVX512DQ-BW-FCP-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
162+
; AVX512DQ-BW-FCP-NEXT: vmovdqa %xmm0, (%r8)
163+
; AVX512DQ-BW-FCP-NEXT: vzeroupper
162164
; AVX512DQ-BW-FCP-NEXT: retq
163165
%in.vec0 = load <2 x i16>, ptr %in.vecptr0, align 64
164166
%in.vec1 = load <2 x i16>, ptr %in.vecptr1, align 64

0 commit comments

Comments
 (0)