@@ -1551,6 +1551,8 @@ static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
1551
1551
std::tie (Lo, Hi) = DAG.SplitScalar (In, DL, MVT::i64 , MVT::i64 );
1552
1552
}
1553
1553
1554
+ // FIXME: If v2i64 were a legal type, we could use it instead of
1555
+ // Untyped here. This might enable improved folding.
1554
1556
SDNode *Pair = DAG.getMachineNode (SystemZ::PAIR128, DL,
1555
1557
MVT::Untyped, Hi, Lo);
1556
1558
return SDValue (Pair, 0 );
@@ -6247,14 +6249,18 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
6247
6249
}
6248
6250
}
6249
6251
6250
- // Manually lower a bitcast to avoid introducing illegal types after type
6251
- // legalization.
6252
6252
static SDValue expandBitCastI128ToF128 (SelectionDAG &DAG, SDValue Src,
6253
- SDValue Chain, const SDLoc &SL) {
6254
- SDValue Hi =
6255
- DAG.getTargetExtractSubreg (SystemZ::subreg_h64, SL, MVT::i64 , Src);
6256
- SDValue Lo =
6257
- DAG.getTargetExtractSubreg (SystemZ::subreg_l64, SL, MVT::i64 , Src);
6253
+ const SDLoc &SL) {
6254
+ // If i128 is legal, just use a normal bitcast.
6255
+ if (DAG.getTargetLoweringInfo ().isTypeLegal (MVT::i128 ))
6256
+ return DAG.getBitcast (MVT::f128 , Src);
6257
+
6258
+ // Otherwise, f128 must live in FP128, so do a partwise move.
6259
+ assert (DAG.getTargetLoweringInfo ().getRepRegClassFor (MVT::f128 ) ==
6260
+ &SystemZ::FP128BitRegClass);
6261
+
6262
+ SDValue Hi, Lo;
6263
+ std::tie (Lo, Hi) = DAG.SplitScalar (Src, SL, MVT::i64 , MVT::i64 );
6258
6264
6259
6265
Hi = DAG.getBitcast (MVT::f64 , Hi);
6260
6266
Lo = DAG.getBitcast (MVT::f64 , Lo);
@@ -6267,24 +6273,24 @@ static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src,
6267
6273
return SDValue (Pair, 0 );
6268
6274
}
6269
6275
6270
- static std::pair<SDValue, SDValue>
6271
- expandBitCastF128ToI128Parts (SelectionDAG &DAG, SDValue Src, const SDLoc &SL) {
6276
+ static SDValue expandBitCastF128ToI128 (SelectionDAG &DAG, SDValue Src,
6277
+ const SDLoc &SL) {
6278
+ // If i128 is legal, just use a normal bitcast.
6279
+ if (DAG.getTargetLoweringInfo ().isTypeLegal (MVT::i128 ))
6280
+ return DAG.getBitcast (MVT::i128 , Src);
6281
+
6282
+ // Otherwise, f128 must live in FP128, so do a partwise move.
6283
+ assert (DAG.getTargetLoweringInfo ().getRepRegClassFor (MVT::f128 ) ==
6284
+ &SystemZ::FP128BitRegClass);
6285
+
6272
6286
SDValue LoFP =
6273
6287
DAG.getTargetExtractSubreg (SystemZ::subreg_l64, SL, MVT::f64 , Src);
6274
6288
SDValue HiFP =
6275
6289
DAG.getTargetExtractSubreg (SystemZ::subreg_h64, SL, MVT::f64 , Src);
6276
6290
SDValue Lo = DAG.getNode (ISD::BITCAST, SL, MVT::i64 , LoFP);
6277
6291
SDValue Hi = DAG.getNode (ISD::BITCAST, SL, MVT::i64 , HiFP);
6278
6292
6279
- return {Hi, Lo};
6280
- }
6281
-
6282
- static SDValue expandBitCastF128ToI128 (SelectionDAG &DAG, SDValue Src,
6283
- const SDLoc &SL) {
6284
-
6285
- auto [Hi, Lo] = expandBitCastF128ToI128Parts (DAG, Src, SL);
6286
- SDNode *Pair = DAG.getMachineNode (SystemZ::PAIR128, SL, MVT::Untyped, Hi, Lo);
6287
- return SDValue (Pair, 0 );
6293
+ return DAG.getNode (ISD::BUILD_PAIR, SL, MVT::i128 , Lo, Hi);
6288
6294
}
6289
6295
6290
6296
// Lower operations with invalid operand or result types (currently used
@@ -6302,38 +6308,20 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
6302
6308
SDValue Res = DAG.getMemIntrinsicNode (SystemZISD::ATOMIC_LOAD_128,
6303
6309
DL, Tys, Ops, MVT::i128 , MMO);
6304
6310
6305
- EVT VT = N->getValueType (0 );
6306
-
6307
- if (VT == MVT::i128 || isTypeLegal (MVT::i128 )) {
6308
- SDValue Lowered = lowerGR128ToI128 (DAG, Res);
6309
- Results.push_back (DAG.getBitcast (VT, Lowered));
6310
- Results.push_back (Res.getValue (1 ));
6311
- } else {
6312
- // For the f128 case, after type legalization, we cannot produce a bitcast
6313
- // with an illegal type (i.e. i128), so manually lower it.
6314
- //
6315
- // FIXME: Really v2i64 should be legal, and should be used in place of
6316
- // unttyped. Then we could emit the bitcast which will potentially fold
6317
- // into the use.
6318
- SDValue Cast = expandBitCastI128ToF128 (DAG, Res, Res.getValue (1 ), DL);
6319
- Results.push_back (Cast);
6320
- Results.push_back (Res.getValue (1 ));
6321
- }
6322
-
6311
+ SDValue Lowered = lowerGR128ToI128 (DAG, Res);
6312
+ if (N->getValueType (0 ) == MVT::f128 )
6313
+ Lowered = expandBitCastI128ToF128 (DAG, Lowered, DL);
6314
+ Results.push_back (Lowered);
6315
+ Results.push_back (Res.getValue (1 ));
6323
6316
break ;
6324
6317
}
6325
6318
case ISD::ATOMIC_STORE: {
6326
6319
SDLoc DL (N);
6327
6320
SDVTList Tys = DAG.getVTList (MVT::Other);
6328
6321
SDValue Val = N->getOperand (1 );
6329
- EVT VT = Val.getValueType ();
6330
-
6331
- if (VT == MVT::i128 || isTypeLegal (MVT::i128 )) {
6332
- Val = DAG.getBitcast (MVT::i128 , Val);
6333
- Val = lowerI128ToGR128 (DAG, Val);
6334
- } else {
6322
+ if (Val.getValueType () == MVT::f128 )
6335
6323
Val = expandBitCastF128ToI128 (DAG, Val, DL);
6336
- }
6324
+ Val = lowerI128ToGR128 (DAG, Val);
6337
6325
6338
6326
SDValue Ops[] = {N->getOperand (0 ), Val, N->getOperand (2 )};
6339
6327
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand ();
@@ -6370,21 +6358,7 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
6370
6358
if (N->getValueType (0 ) == MVT::i128 && Src.getValueType () == MVT::f128 &&
6371
6359
!useSoftFloat ()) {
6372
6360
SDLoc DL (N);
6373
- SDValue Lo, Hi;
6374
- if (getRepRegClassFor (MVT::f128 ) == &SystemZ::VR128BitRegClass) {
6375
- SDValue VecBC = DAG.getNode (ISD::BITCAST, DL, MVT::v2i64, Src);
6376
- Lo = DAG.getNode (ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64 , VecBC,
6377
- DAG.getConstant (1 , DL, MVT::i32 ));
6378
- Hi = DAG.getNode (ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64 , VecBC,
6379
- DAG.getConstant (0 , DL, MVT::i32 ));
6380
- } else {
6381
- // FIXME: Assert should be moved into expandBitCastF128ToI128Parts
6382
- assert (getRepRegClassFor (MVT::f128 ) == &SystemZ::FP128BitRegClass &&
6383
- " Unrecognized register class for f128." );
6384
- std::tie (Hi, Lo) = expandBitCastF128ToI128Parts (DAG, Src, DL);
6385
- }
6386
-
6387
- Results.push_back (DAG.getNode (ISD::BUILD_PAIR, DL, MVT::i128 , Lo, Hi));
6361
+ Results.push_back (expandBitCastF128ToI128 (DAG, Src, DL));
6388
6362
}
6389
6363
break ;
6390
6364
}
@@ -6829,72 +6803,118 @@ SDValue SystemZTargetLowering::combineMERGE(
6829
6803
return SDValue ();
6830
6804
}
6831
6805
6806
+ static bool isI128MovedToParts (LoadSDNode *LD, SDNode *&LoPart,
6807
+ SDNode *&HiPart) {
6808
+ LoPart = HiPart = nullptr ;
6809
+
6810
+ // Scan through all users.
6811
+ for (SDNode::use_iterator UI = LD->use_begin (), UIEnd = LD->use_end ();
6812
+ UI != UIEnd; ++UI) {
6813
+ // Skip the uses of the chain.
6814
+ if (UI.getUse ().getResNo () != 0 )
6815
+ continue ;
6816
+
6817
+ // Verify every user is a TRUNCATE to i64 of the low or high half.
6818
+ SDNode *User = *UI;
6819
+ bool IsLoPart = true ;
6820
+ if (User->getOpcode () == ISD::SRL &&
6821
+ User->getOperand (1 ).getOpcode () == ISD::Constant &&
6822
+ User->getConstantOperandVal (1 ) == 64 && User->hasOneUse ()) {
6823
+ User = *User->use_begin ();
6824
+ IsLoPart = false ;
6825
+ }
6826
+ if (User->getOpcode () != ISD::TRUNCATE || User->getValueType (0 ) != MVT::i64 )
6827
+ return false ;
6828
+
6829
+ if (IsLoPart) {
6830
+ if (LoPart)
6831
+ return false ;
6832
+ LoPart = User;
6833
+ } else {
6834
+ if (HiPart)
6835
+ return false ;
6836
+ HiPart = User;
6837
+ }
6838
+ }
6839
+ return true ;
6840
+ }
6841
+
6842
+ static bool isF128MovedToParts (LoadSDNode *LD, SDNode *&LoPart,
6843
+ SDNode *&HiPart) {
6844
+ LoPart = HiPart = nullptr ;
6845
+
6846
+ // Scan through all users.
6847
+ for (SDNode::use_iterator UI = LD->use_begin (), UIEnd = LD->use_end ();
6848
+ UI != UIEnd; ++UI) {
6849
+ // Skip the uses of the chain.
6850
+ if (UI.getUse ().getResNo () != 0 )
6851
+ continue ;
6852
+
6853
+ // Verify every user is an EXTRACT_SUBREG of the low or high half.
6854
+ SDNode *User = *UI;
6855
+ if (!User->hasOneUse () || !User->isMachineOpcode () ||
6856
+ User->getMachineOpcode () != TargetOpcode::EXTRACT_SUBREG)
6857
+ return false ;
6858
+
6859
+ switch (User->getConstantOperandVal (1 )) {
6860
+ case SystemZ::subreg_l64:
6861
+ if (LoPart)
6862
+ return false ;
6863
+ LoPart = User;
6864
+ break ;
6865
+ case SystemZ::subreg_h64:
6866
+ if (HiPart)
6867
+ return false ;
6868
+ HiPart = User;
6869
+ break ;
6870
+ default :
6871
+ return false ;
6872
+ }
6873
+ }
6874
+ return true ;
6875
+ }
6876
+
6832
6877
SDValue SystemZTargetLowering::combineLOAD (
6833
6878
SDNode *N, DAGCombinerInfo &DCI) const {
6834
6879
SelectionDAG &DAG = DCI.DAG ;
6835
6880
EVT LdVT = N->getValueType (0 );
6836
6881
SDLoc DL (N);
6837
6882
6838
- // Replace an i128 load that is used solely to move its value into GPRs
6883
+ // Replace a 128-bit load that is used solely to move its value into GPRs
6839
6884
// by separate loads of both halves.
6840
- if (LdVT == MVT::i128 ) {
6841
- LoadSDNode *LD = cast<LoadSDNode>(N);
6842
- if (!LD->isSimple () || !ISD::isNormalLoad (LD))
6843
- return SDValue ();
6844
-
6845
- // Scan through all users.
6846
- SmallVector<std::pair<SDNode *, int >, 2 > Users;
6847
- int UsedElements = 0 ;
6848
- for (SDNode::use_iterator UI = LD->use_begin (), UIEnd = LD->use_end ();
6849
- UI != UIEnd; ++UI) {
6850
- // Skip the uses of the chain.
6851
- if (UI.getUse ().getResNo () != 0 )
6852
- continue ;
6853
-
6854
- // Verify every user is a TRUNCATE to i64 of the low or high half ...
6855
- SDNode *User = *UI;
6856
- int Index = 1 ;
6857
- if (User->getOpcode () == ISD::SRL &&
6858
- User->getOperand (1 ).getOpcode () == ISD::Constant &&
6859
- User->getConstantOperandVal (1 ) == 64 && User->hasOneUse ()) {
6860
- User = *User->use_begin ();
6861
- Index = 0 ;
6885
+ LoadSDNode *LD = cast<LoadSDNode>(N);
6886
+ if (LD->isSimple () && ISD::isNormalLoad (LD)) {
6887
+ SDNode *LoPart, *HiPart;
6888
+ if ((LdVT == MVT::i128 && isI128MovedToParts (LD, LoPart, HiPart)) ||
6889
+ (LdVT == MVT::f128 && isF128MovedToParts (LD, LoPart, HiPart))) {
6890
+ // Rewrite each extraction as an independent load.
6891
+ SmallVector<SDValue, 2 > ArgChains;
6892
+ if (HiPart) {
6893
+ SDValue EltLoad = DAG.getLoad (
6894
+ HiPart->getValueType (0 ), DL, LD->getChain (), LD->getBasePtr (),
6895
+ LD->getPointerInfo (), LD->getOriginalAlign (),
6896
+ LD->getMemOperand ()->getFlags (), LD->getAAInfo ());
6897
+
6898
+ DCI.CombineTo (HiPart, EltLoad, true );
6899
+ ArgChains.push_back (EltLoad.getValue (1 ));
6900
+ }
6901
+ if (LoPart) {
6902
+ SDValue EltLoad = DAG.getLoad (
6903
+ LoPart->getValueType (0 ), DL, LD->getChain (),
6904
+ DAG.getObjectPtrOffset (DL, LD->getBasePtr (), TypeSize::getFixed (8 )),
6905
+ LD->getPointerInfo ().getWithOffset (8 ), LD->getOriginalAlign (),
6906
+ LD->getMemOperand ()->getFlags (), LD->getAAInfo ());
6907
+
6908
+ DCI.CombineTo (LoPart, EltLoad, true );
6909
+ ArgChains.push_back (EltLoad.getValue (1 ));
6862
6910
}
6863
- if (User->getOpcode () != ISD::TRUNCATE ||
6864
- User->getValueType (0 ) != MVT::i64 )
6865
- return SDValue ();
6866
-
6867
- // ... and no half is extracted twice.
6868
- if (UsedElements & (1 << Index))
6869
- return SDValue ();
6870
-
6871
- UsedElements |= 1 << Index;
6872
- Users.push_back (std::make_pair (User, Index));
6873
- }
6874
-
6875
- // Rewrite each extraction as an independent load.
6876
- SmallVector<SDValue, 2 > ArgChains;
6877
- for (auto UserAndIndex : Users) {
6878
- SDNode *User = UserAndIndex.first ;
6879
- unsigned Offset = User->getValueType (0 ).getStoreSize () * UserAndIndex.second ;
6880
- SDValue Ptr =
6881
- DAG.getMemBasePlusOffset (LD->getBasePtr (), TypeSize::getFixed (Offset), DL);
6882
- SDValue EltLoad =
6883
- DAG.getLoad (User->getValueType (0 ), DL, LD->getChain (), Ptr,
6884
- LD->getPointerInfo ().getWithOffset (Offset),
6885
- LD->getOriginalAlign (), LD->getMemOperand ()->getFlags (),
6886
- LD->getAAInfo ());
6887
6911
6888
- DCI.CombineTo (User, EltLoad, true );
6889
- ArgChains.push_back (EltLoad.getValue (1 ));
6912
+ // Collect all chains via TokenFactor.
6913
+ SDValue Chain = DAG.getNode (ISD::TokenFactor, DL, MVT::Other, ArgChains);
6914
+ DAG.ReplaceAllUsesOfValueWith (SDValue (N, 1 ), Chain);
6915
+ DCI.AddToWorklist (Chain.getNode ());
6916
+ return SDValue (N, 0 );
6890
6917
}
6891
-
6892
- // Collect all chains via TokenFactor.
6893
- SDValue Chain = DAG.getNode (ISD::TokenFactor, DL, MVT::Other,
6894
- ArgChains);
6895
- DAG.ReplaceAllUsesOfValueWith (SDValue (N, 1 ), Chain);
6896
- DCI.AddToWorklist (Chain.getNode ());
6897
- return SDValue (N, 0 );
6898
6918
}
6899
6919
6900
6920
if (LdVT.isVector () || LdVT.isInteger ())
@@ -6974,7 +6994,8 @@ static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
6974
6994
return true ;
6975
6995
}
6976
6996
6977
- static bool isMovedFromParts (SDValue Val, SDValue &LoPart, SDValue &HiPart) {
6997
+ static bool isI128MovedFromParts (SDValue Val, SDValue &LoPart,
6998
+ SDValue &HiPart) {
6978
6999
if (Val.getOpcode () != ISD::OR || !Val.getNode ()->hasOneUse ())
6979
7000
return false ;
6980
7001
@@ -7001,6 +7022,23 @@ static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
7001
7022
return true ;
7002
7023
}
7003
7024
7025
+ static bool isF128MovedFromParts (SDValue Val, SDValue &LoPart,
7026
+ SDValue &HiPart) {
7027
+ if (!Val.getNode ()->hasOneUse () || !Val.isMachineOpcode () ||
7028
+ Val.getMachineOpcode () != TargetOpcode::REG_SEQUENCE)
7029
+ return false ;
7030
+
7031
+ if (Val->getNumOperands () != 5 ||
7032
+ Val->getOperand (0 )->getAsZExtVal () != SystemZ::FP128BitRegClassID ||
7033
+ Val->getOperand (2 )->getAsZExtVal () != SystemZ::subreg_l64 ||
7034
+ Val->getOperand (4 )->getAsZExtVal () != SystemZ::subreg_h64)
7035
+ return false ;
7036
+
7037
+ LoPart = Val->getOperand (1 );
7038
+ HiPart = Val->getOperand (3 );
7039
+ return true ;
7040
+ }
7041
+
7004
7042
SDValue SystemZTargetLowering::combineSTORE (
7005
7043
SDNode *N, DAGCombinerInfo &DCI) const {
7006
7044
SelectionDAG &DAG = DCI.DAG ;
@@ -7070,10 +7108,11 @@ SDValue SystemZTargetLowering::combineSTORE(
7070
7108
Ops, MemVT, SN->getMemOperand ());
7071
7109
}
7072
7110
7073
- // Transform a store of an i128 moved from GPRs into two separate stores.
7074
- if (MemVT == MVT:: i128 && SN->isSimple () && ISD::isNormalStore (SN)) {
7111
+ // Transform a store of a 128-bit value moved from parts into two stores.
7112
+ if (SN->isSimple () && ISD::isNormalStore (SN)) {
7075
7113
SDValue LoPart, HiPart;
7076
- if (isMovedFromParts (Op1, LoPart, HiPart)) {
7114
+ if ((MemVT == MVT::i128 && isI128MovedFromParts (Op1, LoPart, HiPart)) ||
7115
+ (MemVT == MVT::f128 && isF128MovedFromParts (Op1, LoPart, HiPart))) {
7077
7116
SDLoc DL (SN);
7078
7117
SDValue Chain0 =
7079
7118
DAG.getStore (SN->getChain (), DL, HiPart, SN->getBasePtr (),
0 commit comments