@@ -508,7 +508,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
508
508
setOperationAction (ISD::INSERT_VECTOR_ELT, MVT::v2i16, Expand);
509
509
setOperationAction (ISD::VECTOR_SHUFFLE, MVT::v2i16, Expand);
510
510
511
- // Conversion to/from i8/i8x4 is always legal.
512
511
setOperationAction (ISD::BUILD_VECTOR, MVT::v4i8, Custom);
513
512
setOperationAction (ISD::EXTRACT_VECTOR_ELT, MVT::v4i8, Custom);
514
513
setOperationAction (ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom);
@@ -718,8 +717,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
718
717
719
718
// We have some custom DAG combine patterns for these nodes
720
719
setTargetDAGCombine ({ISD::ADD, ISD::AND, ISD::EXTRACT_VECTOR_ELT, ISD::FADD,
721
- ISD::LOAD, ISD::MUL, ISD::SHL, ISD::SREM, ISD::STORE ,
722
- ISD::UREM, ISD:: VSELECT});
720
+ ISD::LOAD, ISD::MUL, ISD::SHL, ISD::SREM, ISD::UREM ,
721
+ ISD::VSELECT});
723
722
724
723
// setcc for f16x2 and bf16x2 needs special handling to prevent
725
724
// legalizer's attempt to scalarize it due to v2i1 not being legal.
@@ -2917,6 +2916,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
2917
2916
DAG.getMemIntrinsicNode (Opcode, DL, DAG.getVTList (MVT::Other), Ops,
2918
2917
MemSD->getMemoryVT (), MemSD->getMemOperand ());
2919
2918
2919
+ // return DCI.CombineTo(N, NewSt, true);
2920
2920
return NewSt;
2921
2921
}
2922
2922
@@ -5557,51 +5557,6 @@ static SDValue PerformLOADCombine(SDNode *N,
5557
5557
DL);
5558
5558
}
5559
5559
5560
- // Lower a v16i8 (or a v8i8) store into a StoreV4 (or StoreV2) operation with
5561
- // i32 results instead of letting ReplaceLoadVector split it into smaller stores
5562
- // during legalization. This is done at dag-combine1 time, so that vector
5563
- // operations with i8 elements can be optimised away instead of being needlessly
5564
- // split during legalization, which involves storing to the stack and loading it
5565
- // back.
5566
- static SDValue PerformSTORECombine (SDNode *N,
5567
- TargetLowering::DAGCombinerInfo &DCI) {
5568
- SelectionDAG &DAG = DCI.DAG ;
5569
- StoreSDNode *ST = cast<StoreSDNode>(N);
5570
- EVT VT = ST->getValue ().getValueType ();
5571
- if (VT != MVT::v16i8 && VT != MVT::v8i8)
5572
- return SDValue ();
5573
-
5574
- // Create a v4i32 vector store operation, effectively <4 x v4i8>.
5575
- unsigned Opc = VT == MVT::v16i8 ? NVPTXISD::StoreV4 : NVPTXISD::StoreV2;
5576
- EVT NewVT = VT == MVT::v16i8 ? MVT::v4i32 : MVT::v2i32;
5577
- unsigned NumElts = NewVT.getVectorNumElements ();
5578
-
5579
- // Create a vector of the type required by the new store: v16i8 -> v4i32.
5580
- SDValue NewStoreValue = DCI.DAG .getBitcast (NewVT, ST->getValue ());
5581
-
5582
- // Operands for the store.
5583
- SmallVector<SDValue, 8 > Ops;
5584
- Ops.reserve (N->getNumOperands () + NumElts - 1 );
5585
- // Chain value.
5586
- Ops.push_back (N->ops ().front ());
5587
-
5588
- SDLoc DL (N);
5589
- SmallVector<SDValue> Elts (NumElts);
5590
- // Break v4i32 (or v2i32) into four (or two) elements.
5591
- for (unsigned I = 0 ; I < NumElts; ++I)
5592
- Elts[I] = DAG.getNode (ISD::EXTRACT_VECTOR_ELT, DL,
5593
- NewStoreValue.getValueType ().getVectorElementType (),
5594
- NewStoreValue, DAG.getIntPtrConstant (I, DL));
5595
- Ops.append (Elts.begin (), Elts.end ());
5596
- // Any remaining operands.
5597
- Ops.append (N->op_begin () + 2 , N->op_end ());
5598
-
5599
- SDValue NewStore = DAG.getMemIntrinsicNode (Opc, DL, DAG.getVTList (MVT::Other),
5600
- Ops, NewVT, ST->getMemOperand ());
5601
- // Return the new chain.
5602
- return NewStore.getValue (0 );
5603
- }
5604
-
5605
5560
SDValue NVPTXTargetLowering::PerformDAGCombine (SDNode *N,
5606
5561
DAGCombinerInfo &DCI) const {
5607
5562
CodeGenOptLevel OptLevel = getTargetMachine ().getOptLevel ();
@@ -5623,8 +5578,6 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
5623
5578
return PerformSETCCCombine (N, DCI, STI.getSmVersion ());
5624
5579
case ISD::LOAD:
5625
5580
return PerformLOADCombine (N, DCI);
5626
- case ISD::STORE:
5627
- return PerformSTORECombine (N, DCI);
5628
5581
case NVPTXISD::StoreRetval:
5629
5582
case NVPTXISD::StoreRetvalV2:
5630
5583
case NVPTXISD::StoreRetvalV4:
0 commit comments