Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit e01c86d

Browse files
committed
[X86] Stop promoting integer loads to vXi64
Summary: Theoretically this was done to simplify the amount of isel patterns that were needed. But it also meant a substantial number of our isel patterns have to match an explicit bitcast. By making the vXi32/vXi16/vXi8 types legal for loads, DAG combiner should be able to change the load type to remove the bitcast. I had to add some additional plain load instruction patterns and a few other special cases, but overall the isel table has reduced in size by ~12000 bytes. So it looks like this promotion was hurting us more than helping. I still have one crash in vector-trunc.ll that I'm hoping @RKSimon can help with. It seems to relate to using getTargetConstantFromNode on a load that was shrunk due to an extract_subvector combine after the constant pool entry was created. So we end up decoding more mask elements than the load size. I'm hoping this patch will simplify the number of patterns needed to remove the and/or/xor promotion. Reviewers: RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits, RKSimon Differential Revision: https://reviews.llvm.org/D53306 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@344877 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent de6038d commit e01c86d

15 files changed

+710
-589
lines changed

lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2855,21 +2855,17 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
28552855
const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
28562856
Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
28572857

2858-
// If there is a load, it will be behind a bitcast. We don't need to check
2859-
// alignment on this load.
2858+
// Try to fold a load. No need to check alignment.
28602859
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2861-
if (MayFoldLoad && N1->getOpcode() == ISD::BITCAST && N1->hasOneUse() &&
2862-
tryFoldLoad(Node, N1.getNode(), N1.getOperand(0), Tmp0, Tmp1, Tmp2,
2863-
Tmp3, Tmp4)) {
2864-
SDValue Load = N1.getOperand(0);
2860+
if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
28652861
SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
2866-
Load.getOperand(0) };
2862+
N1.getOperand(0) };
28672863
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
28682864
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
28692865
// Update the chain.
2870-
ReplaceUses(Load.getValue(1), SDValue(CNode, 2));
2866+
ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
28712867
// Record the mem-refs
2872-
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(Load)->getMemOperand()});
2868+
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
28732869
return CNode;
28742870
}
28752871

@@ -2892,22 +2888,18 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
28922888
const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
28932889
Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
28942890

2895-
// If there is a load, it will be behind a bitcast. We don't need to check
2896-
// alignment on this load.
2891+
// Try to fold a load. No need to check alignment.
28972892
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2898-
if (MayFoldLoad && N2->getOpcode() == ISD::BITCAST && N2->hasOneUse() &&
2899-
tryFoldLoad(Node, N2.getNode(), N2.getOperand(0), Tmp0, Tmp1, Tmp2,
2900-
Tmp3, Tmp4)) {
2901-
SDValue Load = N2.getOperand(0);
2893+
if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
29022894
SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
2903-
Load.getOperand(0), InFlag };
2895+
N2.getOperand(0), InFlag };
29042896
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
29052897
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
29062898
InFlag = SDValue(CNode, 3);
29072899
// Update the chain.
2908-
ReplaceUses(Load.getValue(1), SDValue(CNode, 2));
2900+
ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
29092901
// Record the mem-refs
2910-
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(Load)->getMemOperand()});
2902+
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
29112903
return CNode;
29122904
}
29132905

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -869,11 +869,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
869869
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
870870
}
871871

872-
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
873-
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
874-
setOperationPromotedToType(ISD::LOAD, VT, MVT::v2i64);
875-
}
876-
877872
// Custom lower v2i64 and v2f64 selects.
878873
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
879874
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
@@ -1178,11 +1173,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
11781173
if (HasInt256)
11791174
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
11801175

1181-
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1182-
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1183-
setOperationPromotedToType(ISD::LOAD, VT, MVT::v4i64);
1184-
}
1185-
11861176
if (HasInt256) {
11871177
// Custom legalize 2x32 to get a little better code.
11881178
setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
@@ -1419,10 +1409,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
14191409
setOperationAction(ISD::MGATHER, VT, Custom);
14201410
setOperationAction(ISD::MSCATTER, VT, Custom);
14211411
}
1422-
for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
1423-
setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64);
1424-
}
1425-
14261412
// Need to custom split v32i16/v64i8 bitcasts.
14271413
if (!Subtarget.hasBWI()) {
14281414
setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
@@ -5539,7 +5525,7 @@ static const Constant *getTargetConstantFromNode(SDValue Op) {
55395525
if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)
55405526
return nullptr;
55415527

5542-
return dyn_cast<Constant>(CNode->getConstVal());
5528+
return CNode->getConstVal();
55435529
}
55445530

55455531
// Extract raw constant bits from constant pools.
@@ -6046,7 +6032,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
60466032
break;
60476033
}
60486034
if (auto *C = getTargetConstantFromNode(MaskNode)) {
6049-
DecodeVPERMILPMask(C, MaskEltSize, Mask);
6035+
DecodeVPERMILPMask(C, MaskEltSize, VT.getSizeInBits(), Mask);
60506036
break;
60516037
}
60526038
return false;
@@ -6063,7 +6049,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
60636049
break;
60646050
}
60656051
if (auto *C = getTargetConstantFromNode(MaskNode)) {
6066-
DecodePSHUFBMask(C, Mask);
6052+
DecodePSHUFBMask(C, VT.getSizeInBits(), Mask);
60676053
break;
60686054
}
60696055
return false;
@@ -6128,7 +6114,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
61286114
break;
61296115
}
61306116
if (auto *C = getTargetConstantFromNode(MaskNode)) {
6131-
DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
6117+
DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, VT.getSizeInBits(), Mask);
61326118
break;
61336119
}
61346120
}
@@ -6145,7 +6131,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
61456131
break;
61466132
}
61476133
if (auto *C = getTargetConstantFromNode(MaskNode)) {
6148-
DecodeVPPERMMask(C, Mask);
6134+
DecodeVPPERMMask(C, VT.getSizeInBits(), Mask);
61496135
break;
61506136
}
61516137
return false;
@@ -6163,7 +6149,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
61636149
break;
61646150
}
61656151
if (auto *C = getTargetConstantFromNode(MaskNode)) {
6166-
DecodeVPERMVMask(C, MaskEltSize, Mask);
6152+
DecodeVPERMVMask(C, MaskEltSize, VT.getSizeInBits(), Mask);
61676153
break;
61686154
}
61696155
return false;
@@ -6178,7 +6164,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
61786164
SDValue MaskNode = N->getOperand(1);
61796165
unsigned MaskEltSize = VT.getScalarSizeInBits();
61806166
if (auto *C = getTargetConstantFromNode(MaskNode)) {
6181-
DecodeVPERMV3Mask(C, MaskEltSize, Mask);
6167+
DecodeVPERMV3Mask(C, MaskEltSize, VT.getSizeInBits(), Mask);
61826168
break;
61836169
}
61846170
return false;

0 commit comments

Comments
 (0)