@@ -2111,6 +2111,28 @@ SDValue NVPTXTargetLowering::LowerSTACKSAVE(SDValue Op,
2111
2111
return DAG.getMergeValues ({ASC, SDValue (SS.getNode (), 1 )}, DL);
2112
2112
}
2113
2113
2114
+ // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
2115
+ // (see LegalizeDAG.cpp). This is slow and uses local memory.
2116
+ // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
2117
+ SDValue
2118
+ NVPTXTargetLowering::LowerCONCAT_VECTORS (SDValue Op, SelectionDAG &DAG) const {
2119
+ SDNode *Node = Op.getNode ();
2120
+ SDLoc dl (Node);
2121
+ SmallVector<SDValue, 8 > Ops;
2122
+ unsigned NumOperands = Node->getNumOperands ();
2123
+ for (unsigned i = 0 ; i < NumOperands; ++i) {
2124
+ SDValue SubOp = Node->getOperand (i);
2125
+ EVT VVT = SubOp.getNode ()->getValueType (0 );
2126
+ EVT EltVT = VVT.getVectorElementType ();
2127
+ unsigned NumSubElem = VVT.getVectorNumElements ();
2128
+ for (unsigned j = 0 ; j < NumSubElem; ++j) {
2129
+ Ops.push_back (DAG.getNode (ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
2130
+ DAG.getIntPtrConstant (j, dl)));
2131
+ }
2132
+ }
2133
+ return DAG.getBuildVector (Node->getValueType (0 ), dl, Ops);
2134
+ }
2135
+
2114
2136
SDValue NVPTXTargetLowering::LowerBITCAST (SDValue Op, SelectionDAG &DAG) const {
2115
2137
// Handle bitcasting from v2i8 without hitting the default promotion
2116
2138
// strategy which goes through stack memory.
@@ -2824,6 +2846,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
2824
2846
return LowerINSERT_VECTOR_ELT (Op, DAG);
2825
2847
case ISD::VECTOR_SHUFFLE:
2826
2848
return LowerVECTOR_SHUFFLE (Op, DAG);
2849
+ case ISD::CONCAT_VECTORS:
2850
+ return LowerCONCAT_VECTORS (Op, DAG);
2827
2851
case ISD::STORE:
2828
2852
return LowerSTORE (Op, DAG);
2829
2853
case ISD::LOAD:
0 commit comments