Skip to content

Commit afc8ad0

Browse files
committed
[X86] LowerFunnelShift - improve handling of vXi8 constant splat funnel shifts
This patch moves the promotion to vXi16 shifts and the upper/lower bit masking into LowerFunnelShift for targets that have a bit-select instruction (XOP's VPCMOV and AVX512's VPTERNLOG). This prevents the regressions in #89115 due to the masking of ((X << V) | (Y >> (8-V))) vXi8 shifts.
1 parent a6a4d4a commit afc8ad0

File tree

5 files changed

+48
-29
lines changed

5 files changed

+48
-29
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29830,6 +29830,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
2983029830
if (VT.isVector()) {
2983129831
APInt APIntShiftAmt;
2983229832
bool IsCstSplat = X86::isConstantSplat(Amt, APIntShiftAmt);
29833+
unsigned NumElts = VT.getVectorNumElements();
2983329834

2983429835
if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {
2983529836
if (IsFSHR)
@@ -29858,6 +29859,29 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
2985829859
uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
2985929860
uint64_t ShXAmt = IsFSHR ? (EltSizeInBits - ShiftAmt) : ShiftAmt;
2986029861
uint64_t ShYAmt = IsFSHR ? ShiftAmt : (EltSizeInBits - ShiftAmt);
29862+
assert((ShXAmt + ShYAmt) == EltSizeInBits && "Illegal funnel shift");
29863+
29864+
if (EltSizeInBits == 8 && ShXAmt > 1 &&
29865+
(Subtarget.hasXOP() || useVPTERNLOG(Subtarget, VT))) {
29866+
// For vXi8 cases on Subtargets that can perform VPCMOV/VPTERNLOG
29867+
// bit-select - lower using vXi16 shifts and then perform the bitmask at
29868+
// the original vector width to handle cases where we split.
29869+
MVT WideVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
29870+
APInt MaskX = APInt::getHighBitsSet(8, 8 - ShXAmt);
29871+
APInt MaskY = APInt::getLowBitsSet(8, 8 - ShYAmt);
29872+
SDValue ShX =
29873+
DAG.getNode(ISD::SHL, DL, WideVT, DAG.getBitcast(WideVT, Op0),
29874+
DAG.getShiftAmountConstant(ShXAmt, WideVT, DL));
29875+
SDValue ShY =
29876+
DAG.getNode(ISD::SRL, DL, WideVT, DAG.getBitcast(WideVT, Op1),
29877+
DAG.getShiftAmountConstant(ShYAmt, WideVT, DL));
29878+
ShX = DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, ShX),
29879+
DAG.getConstant(MaskX, DL, VT));
29880+
ShY = DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, ShY),
29881+
DAG.getConstant(MaskY, DL, VT));
29882+
return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
29883+
}
29884+
2986129885
SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, Op0,
2986229886
DAG.getShiftAmountConstant(ShXAmt, VT, DL));
2986329887
SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Op1,
@@ -29874,7 +29898,6 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
2987429898
return SDValue();
2987529899

2987629900
unsigned ShiftOpc = IsFSHR ? ISD::SRL : ISD::SHL;
29877-
unsigned NumElts = VT.getVectorNumElements();
2987829901
MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits);
2987929902
MVT ExtVT = MVT::getVectorVT(ExtSVT, NumElts / 2);
2988029903

llvm/test/CodeGen/X86/vector-fshl-128.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2453,9 +2453,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
24532453
;
24542454
; XOP-LABEL: splatconstant_funnnel_v16i8:
24552455
; XOP: # %bb.0:
2456-
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
2457-
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2458-
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
2456+
; XOP-NEXT: vpsrlw $4, %xmm1, %xmm1
2457+
; XOP-NEXT: vpsllw $4, %xmm0, %xmm0
2458+
; XOP-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0, %xmm0
24592459
; XOP-NEXT: retq
24602460
;
24612461
; X86-SSE2-LABEL: splatconstant_funnnel_v16i8:

llvm/test/CodeGen/X86/vector-fshl-256.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2344,17 +2344,15 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
23442344
;
23452345
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
23462346
; XOPAVX1: # %bb.0:
2347-
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2348-
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
2349-
; XOPAVX1-NEXT: vpshlb %xmm3, %xmm2, %xmm2
2350-
; XOPAVX1-NEXT: vpshlb %xmm3, %xmm1, %xmm1
2351-
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2352-
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2353-
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
2354-
; XOPAVX1-NEXT: vpshlb %xmm3, %xmm2, %xmm2
2355-
; XOPAVX1-NEXT: vpshlb %xmm3, %xmm0, %xmm0
2356-
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2357-
; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
2347+
; XOPAVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
2348+
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2349+
; XOPAVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
2350+
; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2351+
; XOPAVX1-NEXT: vpsllw $4, %xmm0, %xmm2
2352+
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2353+
; XOPAVX1-NEXT: vpsllw $4, %xmm0, %xmm0
2354+
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2355+
; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0
23582356
; XOPAVX1-NEXT: retq
23592357
;
23602358
; XOPAVX2-LABEL: splatconstant_funnnel_v32i8:

llvm/test/CodeGen/X86/vector-fshr-128.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2462,9 +2462,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
24622462
;
24632463
; XOP-LABEL: splatconstant_funnnel_v16i8:
24642464
; XOP: # %bb.0:
2465-
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
2466-
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2467-
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
2465+
; XOP-NEXT: vpsrlw $4, %xmm1, %xmm1
2466+
; XOP-NEXT: vpsllw $4, %xmm0, %xmm0
2467+
; XOP-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0, %xmm0
24682468
; XOP-NEXT: retq
24692469
;
24702470
; X86-SSE2-LABEL: splatconstant_funnnel_v16i8:

llvm/test/CodeGen/X86/vector-fshr-256.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2145,17 +2145,15 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
21452145
;
21462146
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
21472147
; XOPAVX1: # %bb.0:
2148-
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2149-
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
2150-
; XOPAVX1-NEXT: vpshlb %xmm3, %xmm2, %xmm2
2151-
; XOPAVX1-NEXT: vpshlb %xmm3, %xmm1, %xmm1
2152-
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2153-
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2154-
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
2155-
; XOPAVX1-NEXT: vpshlb %xmm3, %xmm2, %xmm2
2156-
; XOPAVX1-NEXT: vpshlb %xmm3, %xmm0, %xmm0
2157-
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2158-
; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
2148+
; XOPAVX1-NEXT: vpsrlw $4, %xmm1, %xmm2
2149+
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2150+
; XOPAVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
2151+
; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2152+
; XOPAVX1-NEXT: vpsllw $4, %xmm0, %xmm2
2153+
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2154+
; XOPAVX1-NEXT: vpsllw $4, %xmm0, %xmm0
2155+
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2156+
; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0
21592157
; XOPAVX1-NEXT: retq
21602158
;
21612159
; XOPAVX2-LABEL: splatconstant_funnnel_v32i8:

0 commit comments

Comments
 (0)