Skip to content

Commit 0836965

Browse files
committed
Remove swapped concat handling
1 parent 1070f6b commit 0836965

File tree

2 files changed

+23
-29
lines changed

2 files changed

+23
-29
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42675,6 +42675,9 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4267542675
return SDValue();
4267642676
}
4267742677
case X86ISD::VPERMV3: {
42678+
MVT WideVT = VT.getDoubleNumVectorElementsVT();
42679+
bool CanConcat = VT.is128BitVector() ||
42680+
(VT.is256BitVector() && Subtarget.useAVX512Regs());
4267842681
SmallVector<SDValue, 2> SrcOps;
4267942682
SmallVector<int, 32> Mask;
4268042683
if (getTargetShuffleMask(N, /*AllowSentinelZero=*/false, SrcOps, Mask)) {
@@ -42712,12 +42715,25 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4271242715
return lowerShuffleWithPERMV(DL, VT, Mask, N.getOperand(2),
4271342716
N.getOperand(0), Subtarget, DAG);
4271442717
}
42718+
// Combine VPERMV3 to widened VPERMV if the two source operands can be
42719+
// freely concatenated, with a commuted shuffle mask.
42720+
if (CanConcat) {
42721+
if (SDValue ConcatSrc = combineConcatVectorOps(
42722+
DL, WideVT, {N.getOperand(2), N.getOperand(0)}, DAG,
42723+
Subtarget)) {
42724+
ShuffleVectorSDNode::commuteMask(Mask);
42725+
Mask.append(NumElts, SM_SentinelUndef);
42726+
SDValue Perm =
42727+
lowerShuffleWithPERMV(DL, WideVT, Mask, ConcatSrc,
42728+
DAG.getUNDEF(WideVT), Subtarget, DAG);
42729+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42730+
DAG.getVectorIdxConstant(0, DL));
42731+
}
42732+
}
4271542733
}
4271642734
// Combine VPERMV3 to widened VPERMV if the two source operands can be
4271742735
// freely concatenated.
42718-
MVT WideVT = VT.getDoubleNumVectorElementsVT();
42719-
if (VT.is128BitVector() ||
42720-
(VT.is256BitVector() && Subtarget.useAVX512Regs())) {
42736+
if (CanConcat) {
4272142737
SDValue Ops[] = {N.getOperand(0), N.getOperand(2)};
4272242738
if (SDValue ConcatSrc =
4272342739
combineConcatVectorOps(DL, WideVT, Ops, DAG, Subtarget)) {
@@ -42727,22 +42743,6 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4272742743
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
4272842744
DAG.getVectorIdxConstant(0, DL));
4272942745
}
42730-
// See if we can concatenate the commuted operands (and then cheaply
42731-
// shuffle them, for constant shuffle masks this should fold away).
42732-
SDValue SwapOps[] = {N.getOperand(2), N.getOperand(0)};
42733-
if (SDValue ConcatSrc =
42734-
combineConcatVectorOps(DL, WideVT, SwapOps, DAG, Subtarget)) {
42735-
SmallVector<int, 16> SwapMask(WideVT.getVectorNumElements());
42736-
std::iota(SwapMask.begin(), SwapMask.begin() + NumElts, NumElts);
42737-
std::iota(SwapMask.begin() + NumElts, SwapMask.end(), 0);
42738-
SDValue Swap = DAG.getVectorShuffle(WideVT, DL, ConcatSrc,
42739-
DAG.getUNDEF(WideVT), SwapMask);
42740-
SDValue Mask = widenSubVector(N.getOperand(1), false, Subtarget, DAG,
42741-
DL, WideVT.getSizeInBits());
42742-
SDValue Perm = DAG.getNode(X86ISD::VPERMV, DL, WideVT, Mask, Swap);
42743-
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42744-
DAG.getVectorIdxConstant(0, DL));
42745-
}
4274642746
}
4274742747
return SDValue();
4274842748
}

llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vl.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,21 +29,15 @@ define <4 x double> @concat_vpermv3_ops_vpermv_v4f64(ptr %p0, <4 x i64> %m) {
2929
define <4 x double> @concat_vpermv3_ops_vpermv_swap_v4f64(ptr %p0, <4 x i64> %m) {
3030
; X86-LABEL: concat_vpermv3_ops_vpermv_swap_v4f64:
3131
; X86: # %bb.0:
32-
; X86-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3332
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
34-
; X86-NEXT: vmovupd (%eax), %zmm1
35-
; X86-NEXT: vshuff64x2 {{.*#+}} zmm1 = zmm1[4,5,6,7,0,1,2,3]
36-
; X86-NEXT: vpermpd %zmm1, %zmm0, %zmm0
37-
; X86-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
33+
; X86-NEXT: vmovapd 32(%eax), %ymm1
34+
; X86-NEXT: vpermi2pd (%eax), %ymm1, %ymm0
3835
; X86-NEXT: retl
3936
;
4037
; X64-LABEL: concat_vpermv3_ops_vpermv_swap_v4f64:
4138
; X64: # %bb.0:
42-
; X64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
43-
; X64-NEXT: vmovupd (%rdi), %zmm1
44-
; X64-NEXT: vshuff64x2 {{.*#+}} zmm1 = zmm1[4,5,6,7,0,1,2,3]
45-
; X64-NEXT: vpermpd %zmm1, %zmm0, %zmm0
46-
; X64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
39+
; X64-NEXT: vmovapd 32(%rdi), %ymm1
40+
; X64-NEXT: vpermi2pd (%rdi), %ymm1, %ymm0
4741
; X64-NEXT: retq
4842
%p1 = getelementptr inbounds nuw i8, ptr %p0, i64 32
4943
%lo = load <4 x double>, ptr %p1, align 32

0 commit comments

Comments
 (0)