@@ -1355,10 +1355,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
1355
1355
setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1356
1356
}
1357
1357
if (Subtarget.hasMMA()) {
1358
- if (Subtarget.isISAFuture())
1358
+ if (Subtarget.isISAFuture()) {
1359
1359
addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1360
- else
1360
+ addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1361
+ setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1362
+ setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1363
+ } else {
1361
1364
addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1365
+ }
1362
1366
setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1363
1367
setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1364
1368
setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
@@ -11758,6 +11762,64 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11758
11762
return Op;
11759
11763
}
11760
11764
11765
+ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
11766
+ SelectionDAG &DAG) const {
11767
+ SDLoc dl(Op);
11768
+ LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11769
+ SDValue LoadChain = LN->getChain();
11770
+ SDValue BasePtr = LN->getBasePtr();
11771
+ EVT VT = Op.getValueType();
11772
+
11773
+ // Type v1024i1 is used for Dense Math dmr registers.
11774
+ assert(VT == MVT::v1024i1 && "Unsupported type.");
11775
+ assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
11776
+ "Dense Math support required.");
11777
+ assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
11778
+
11779
+ SmallVector<SDValue, 4> Loads;
11780
+ SmallVector<SDValue, 4> LoadChains;
11781
+ SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
11782
+ SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
11783
+ MachineMemOperand *MMO = LN->getMemOperand();
11784
+ unsigned NumVecs = VT.getSizeInBits() / 256;
11785
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11786
+ MachineMemOperand *NewMMO =
11787
+ DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
11788
+ if (Idx > 0) {
11789
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11790
+ DAG.getConstant(32, dl, BasePtr.getValueType()));
11791
+ LoadOps[2] = BasePtr;
11792
+ }
11793
+ SDValue Ld = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
11794
+ DAG.getVTList(MVT::v256i1, MVT::Other),
11795
+ LoadOps, MVT::v256i1, NewMMO);
11796
+ LoadChains.push_back(Ld.getValue(1));
11797
+ Loads.push_back(Ld);
11798
+ }
11799
+
11800
+ if (Subtarget.isLittleEndian()) {
11801
+ std::reverse(Loads.begin(), Loads.end());
11802
+ std::reverse(LoadChains.begin(), LoadChains.end());
11803
+ }
11804
+
11805
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11806
+ SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1, Loads[0],
11807
+ Loads[1]),
11808
+ 0);
11809
+ SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
11810
+ SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1,
11811
+ Loads[2], Loads[3]),
11812
+ 0);
11813
+ SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
11814
+ SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
11815
+ const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
11816
+ SDValue Value =
11817
+ SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
11818
+
11819
+ SDValue RetOps[] = {Value, TF};
11820
+ return DAG.getMergeValues(RetOps, dl);
11821
+ }
11822
+
11761
11823
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11762
11824
SelectionDAG &DAG) const {
11763
11825
SDLoc dl(Op);
@@ -11766,6 +11828,9 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11766
11828
SDValue BasePtr = LN->getBasePtr();
11767
11829
EVT VT = Op.getValueType();
11768
11830
11831
+ if (VT == MVT::v1024i1)
11832
+ return LowerDMFVectorLoad(Op, DAG);
11833
+
11769
11834
if (VT != MVT::v256i1 && VT != MVT::v512i1)
11770
11835
return Op;
11771
11836
@@ -11803,6 +11868,69 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11803
11868
return DAG.getMergeValues(RetOps, dl);
11804
11869
}
11805
11870
11871
+ SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
11872
+ SelectionDAG &DAG) const {
11873
+
11874
+ SDLoc dl(Op);
11875
+ StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11876
+ SDValue StoreChain = SN->getChain();
11877
+ SDValue BasePtr = SN->getBasePtr();
11878
+ SmallVector<SDValue, 4> Values;
11879
+ SmallVector<SDValue, 4> Stores;
11880
+ EVT VT = SN->getValue().getValueType();
11881
+
11882
+ // Type v1024i1 is used for Dense Math dmr registers.
11883
+ assert(VT == MVT::v1024i1 && "Unsupported type.");
11884
+ assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
11885
+ "Dense Math support required.");
11886
+ assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
11887
+
11888
+ SDValue Lo(
11889
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11890
+ Op.getOperand(1),
11891
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
11892
+ 0);
11893
+ SDValue Hi(
11894
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11895
+ Op.getOperand(1),
11896
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
11897
+ 0);
11898
+ EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11899
+ MachineSDNode *ExtNode =
11900
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
11901
+ Values.push_back(SDValue(ExtNode, 0));
11902
+ Values.push_back(SDValue(ExtNode, 1));
11903
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
11904
+ Values.push_back(SDValue(ExtNode, 0));
11905
+ Values.push_back(SDValue(ExtNode, 1));
11906
+
11907
+ if (Subtarget.isLittleEndian())
11908
+ std::reverse(Values.begin(), Values.end());
11909
+
11910
+ SDVTList Tys = DAG.getVTList(MVT::Other);
11911
+ SmallVector<SDValue, 4> Ops{
11912
+ StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
11913
+ Values[0], BasePtr};
11914
+ MachineMemOperand *MMO = SN->getMemOperand();
11915
+ unsigned NumVecs = VT.getSizeInBits() / 256;
11916
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11917
+ MachineMemOperand *NewMMO =
11918
+ DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
11919
+ if (Idx > 0) {
11920
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11921
+ DAG.getConstant(32, dl, BasePtr.getValueType()));
11922
+ Ops[3] = BasePtr;
11923
+ }
11924
+ Ops[2] = Values[Idx];
11925
+ SDValue St = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
11926
+ MVT::v256i1, NewMMO);
11927
+ Stores.push_back(St);
11928
+ }
11929
+
11930
+ SDValue TF = DAG.getTokenFactor(dl, Stores);
11931
+ return TF;
11932
+ }
11933
+
11806
11934
SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11807
11935
SelectionDAG &DAG) const {
11808
11936
SDLoc dl(Op);
@@ -11813,6 +11941,9 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11813
11941
SDValue Value2 = SN->getValue();
11814
11942
EVT StoreVT = Value.getValueType();
11815
11943
11944
+ if (StoreVT == MVT::v1024i1)
11945
+ return LowerDMFVectorStore(Op, DAG);
11946
+
11816
11947
if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11817
11948
return Op;
11818
11949
0 commit comments