Skip to content

Commit 0a0cac6

Browse files
authored
[SystemZ] Simplify f128 atomic load/store (#90977)
Change definition of expandBitCastI128ToF128 and expandBitCastF128ToI128 to allow for simplified use in atomic load/store. Update logic to split 128-bit loads and stores in DAGCombine to also handle the f128 case where appropriate. This fixes the regressions introduced by recent atomic load/store patches.
1 parent 522b4bf commit 0a0cac6

File tree

5 files changed

+174
-168
lines changed

5 files changed

+174
-168
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 157 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -1551,6 +1551,8 @@ static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
15511551
std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
15521552
}
15531553

1554+
// FIXME: If v2i64 were a legal type, we could use it instead of
1555+
// Untyped here. This might enable improved folding.
15541556
SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
15551557
MVT::Untyped, Hi, Lo);
15561558
return SDValue(Pair, 0);
@@ -6247,14 +6249,18 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
62476249
}
62486250
}
62496251

6250-
// Manually lower a bitcast to avoid introducing illegal types after type
6251-
// legalization.
62526252
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src,
6253-
SDValue Chain, const SDLoc &SL) {
6254-
SDValue Hi =
6255-
DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::i64, Src);
6256-
SDValue Lo =
6257-
DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::i64, Src);
6253+
const SDLoc &SL) {
6254+
// If i128 is legal, just use a normal bitcast.
6255+
if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6256+
return DAG.getBitcast(MVT::f128, Src);
6257+
6258+
// Otherwise, f128 must live in FP128, so do a partwise move.
6259+
assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) ==
6260+
&SystemZ::FP128BitRegClass);
6261+
6262+
SDValue Hi, Lo;
6263+
std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
62586264

62596265
Hi = DAG.getBitcast(MVT::f64, Hi);
62606266
Lo = DAG.getBitcast(MVT::f64, Lo);
@@ -6267,24 +6273,24 @@ static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src,
62676273
return SDValue(Pair, 0);
62686274
}
62696275

6270-
static std::pair<SDValue, SDValue>
6271-
expandBitCastF128ToI128Parts(SelectionDAG &DAG, SDValue Src, const SDLoc &SL) {
6276+
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src,
6277+
const SDLoc &SL) {
6278+
// If i128 is legal, just use a normal bitcast.
6279+
if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6280+
return DAG.getBitcast(MVT::i128, Src);
6281+
6282+
// Otherwise, f128 must live in FP128, so do a partwise move.
6283+
assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) ==
6284+
&SystemZ::FP128BitRegClass);
6285+
62726286
SDValue LoFP =
62736287
DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
62746288
SDValue HiFP =
62756289
DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
62766290
SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
62776291
SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
62786292

6279-
return {Hi, Lo};
6280-
}
6281-
6282-
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src,
6283-
const SDLoc &SL) {
6284-
6285-
auto [Hi, Lo] = expandBitCastF128ToI128Parts(DAG, Src, SL);
6286-
SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, SL, MVT::Untyped, Hi, Lo);
6287-
return SDValue(Pair, 0);
6293+
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
62886294
}
62896295

62906296
// Lower operations with invalid operand or result types (currently used
@@ -6302,38 +6308,20 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
63026308
SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
63036309
DL, Tys, Ops, MVT::i128, MMO);
63046310

6305-
EVT VT = N->getValueType(0);
6306-
6307-
if (VT == MVT::i128 || isTypeLegal(MVT::i128)) {
6308-
SDValue Lowered = lowerGR128ToI128(DAG, Res);
6309-
Results.push_back(DAG.getBitcast(VT, Lowered));
6310-
Results.push_back(Res.getValue(1));
6311-
} else {
6312-
// For the f128 case, after type legalization, we cannot produce a bitcast
6313-
// with an illegal type (i.e. i128), so manually lower it.
6314-
//
6315-
// FIXME: Really v2i64 should be legal, and should be used in place of
6316-
// unttyped. Then we could emit the bitcast which will potentially fold
6317-
// into the use.
6318-
SDValue Cast = expandBitCastI128ToF128(DAG, Res, Res.getValue(1), DL);
6319-
Results.push_back(Cast);
6320-
Results.push_back(Res.getValue(1));
6321-
}
6322-
6311+
SDValue Lowered = lowerGR128ToI128(DAG, Res);
6312+
if (N->getValueType(0) == MVT::f128)
6313+
Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
6314+
Results.push_back(Lowered);
6315+
Results.push_back(Res.getValue(1));
63236316
break;
63246317
}
63256318
case ISD::ATOMIC_STORE: {
63266319
SDLoc DL(N);
63276320
SDVTList Tys = DAG.getVTList(MVT::Other);
63286321
SDValue Val = N->getOperand(1);
6329-
EVT VT = Val.getValueType();
6330-
6331-
if (VT == MVT::i128 || isTypeLegal(MVT::i128)) {
6332-
Val = DAG.getBitcast(MVT::i128, Val);
6333-
Val = lowerI128ToGR128(DAG, Val);
6334-
} else {
6322+
if (Val.getValueType() == MVT::f128)
63356323
Val = expandBitCastF128ToI128(DAG, Val, DL);
6336-
}
6324+
Val = lowerI128ToGR128(DAG, Val);
63376325

63386326
SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
63396327
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
@@ -6370,21 +6358,7 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
63706358
if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
63716359
!useSoftFloat()) {
63726360
SDLoc DL(N);
6373-
SDValue Lo, Hi;
6374-
if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
6375-
SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
6376-
Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6377-
DAG.getConstant(1, DL, MVT::i32));
6378-
Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6379-
DAG.getConstant(0, DL, MVT::i32));
6380-
} else {
6381-
// FIXME: Assert should be moved into expandBitCastF128ToI128Parts
6382-
assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&
6383-
"Unrecognized register class for f128.");
6384-
std::tie(Hi, Lo) = expandBitCastF128ToI128Parts(DAG, Src, DL);
6385-
}
6386-
6387-
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
6361+
Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
63886362
}
63896363
break;
63906364
}
@@ -6829,72 +6803,118 @@ SDValue SystemZTargetLowering::combineMERGE(
68296803
return SDValue();
68306804
}
68316805

6806+
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
6807+
SDNode *&HiPart) {
6808+
LoPart = HiPart = nullptr;
6809+
6810+
// Scan through all users.
6811+
for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6812+
UI != UIEnd; ++UI) {
6813+
// Skip the uses of the chain.
6814+
if (UI.getUse().getResNo() != 0)
6815+
continue;
6816+
6817+
// Verify every user is a TRUNCATE to i64 of the low or high half.
6818+
SDNode *User = *UI;
6819+
bool IsLoPart = true;
6820+
if (User->getOpcode() == ISD::SRL &&
6821+
User->getOperand(1).getOpcode() == ISD::Constant &&
6822+
User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
6823+
User = *User->use_begin();
6824+
IsLoPart = false;
6825+
}
6826+
if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
6827+
return false;
6828+
6829+
if (IsLoPart) {
6830+
if (LoPart)
6831+
return false;
6832+
LoPart = User;
6833+
} else {
6834+
if (HiPart)
6835+
return false;
6836+
HiPart = User;
6837+
}
6838+
}
6839+
return true;
6840+
}
6841+
6842+
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
6843+
SDNode *&HiPart) {
6844+
LoPart = HiPart = nullptr;
6845+
6846+
// Scan through all users.
6847+
for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6848+
UI != UIEnd; ++UI) {
6849+
// Skip the uses of the chain.
6850+
if (UI.getUse().getResNo() != 0)
6851+
continue;
6852+
6853+
// Verify every user is an EXTRACT_SUBREG of the low or high half.
6854+
SDNode *User = *UI;
6855+
if (!User->hasOneUse() || !User->isMachineOpcode() ||
6856+
User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
6857+
return false;
6858+
6859+
switch (User->getConstantOperandVal(1)) {
6860+
case SystemZ::subreg_l64:
6861+
if (LoPart)
6862+
return false;
6863+
LoPart = User;
6864+
break;
6865+
case SystemZ::subreg_h64:
6866+
if (HiPart)
6867+
return false;
6868+
HiPart = User;
6869+
break;
6870+
default:
6871+
return false;
6872+
}
6873+
}
6874+
return true;
6875+
}
6876+
68326877
SDValue SystemZTargetLowering::combineLOAD(
68336878
SDNode *N, DAGCombinerInfo &DCI) const {
68346879
SelectionDAG &DAG = DCI.DAG;
68356880
EVT LdVT = N->getValueType(0);
68366881
SDLoc DL(N);
68376882

6838-
// Replace an i128 load that is used solely to move its value into GPRs
6883+
// Replace a 128-bit load that is used solely to move its value into GPRs
68396884
// by separate loads of both halves.
6840-
if (LdVT == MVT::i128) {
6841-
LoadSDNode *LD = cast<LoadSDNode>(N);
6842-
if (!LD->isSimple() || !ISD::isNormalLoad(LD))
6843-
return SDValue();
6844-
6845-
// Scan through all users.
6846-
SmallVector<std::pair<SDNode *, int>, 2> Users;
6847-
int UsedElements = 0;
6848-
for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6849-
UI != UIEnd; ++UI) {
6850-
// Skip the uses of the chain.
6851-
if (UI.getUse().getResNo() != 0)
6852-
continue;
6853-
6854-
// Verify every user is a TRUNCATE to i64 of the low or high half ...
6855-
SDNode *User = *UI;
6856-
int Index = 1;
6857-
if (User->getOpcode() == ISD::SRL &&
6858-
User->getOperand(1).getOpcode() == ISD::Constant &&
6859-
User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
6860-
User = *User->use_begin();
6861-
Index = 0;
6885+
LoadSDNode *LD = cast<LoadSDNode>(N);
6886+
if (LD->isSimple() && ISD::isNormalLoad(LD)) {
6887+
SDNode *LoPart, *HiPart;
6888+
if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
6889+
(LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
6890+
// Rewrite each extraction as an independent load.
6891+
SmallVector<SDValue, 2> ArgChains;
6892+
if (HiPart) {
6893+
SDValue EltLoad = DAG.getLoad(
6894+
HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
6895+
LD->getPointerInfo(), LD->getOriginalAlign(),
6896+
LD->getMemOperand()->getFlags(), LD->getAAInfo());
6897+
6898+
DCI.CombineTo(HiPart, EltLoad, true);
6899+
ArgChains.push_back(EltLoad.getValue(1));
6900+
}
6901+
if (LoPart) {
6902+
SDValue EltLoad = DAG.getLoad(
6903+
LoPart->getValueType(0), DL, LD->getChain(),
6904+
DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
6905+
LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(),
6906+
LD->getMemOperand()->getFlags(), LD->getAAInfo());
6907+
6908+
DCI.CombineTo(LoPart, EltLoad, true);
6909+
ArgChains.push_back(EltLoad.getValue(1));
68626910
}
6863-
if (User->getOpcode() != ISD::TRUNCATE ||
6864-
User->getValueType(0) != MVT::i64)
6865-
return SDValue();
6866-
6867-
// ... and no half is extracted twice.
6868-
if (UsedElements & (1 << Index))
6869-
return SDValue();
6870-
6871-
UsedElements |= 1 << Index;
6872-
Users.push_back(std::make_pair(User, Index));
6873-
}
6874-
6875-
// Rewrite each extraction as an independent load.
6876-
SmallVector<SDValue, 2> ArgChains;
6877-
for (auto UserAndIndex : Users) {
6878-
SDNode *User = UserAndIndex.first;
6879-
unsigned Offset = User->getValueType(0).getStoreSize() * UserAndIndex.second;
6880-
SDValue Ptr =
6881-
DAG.getMemBasePlusOffset(LD->getBasePtr(), TypeSize::getFixed(Offset), DL);
6882-
SDValue EltLoad =
6883-
DAG.getLoad(User->getValueType(0), DL, LD->getChain(), Ptr,
6884-
LD->getPointerInfo().getWithOffset(Offset),
6885-
LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
6886-
LD->getAAInfo());
68876911

6888-
DCI.CombineTo(User, EltLoad, true);
6889-
ArgChains.push_back(EltLoad.getValue(1));
6912+
// Collect all chains via TokenFactor.
6913+
SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
6914+
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
6915+
DCI.AddToWorklist(Chain.getNode());
6916+
return SDValue(N, 0);
68906917
}
6891-
6892-
// Collect all chains via TokenFactor.
6893-
SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
6894-
ArgChains);
6895-
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
6896-
DCI.AddToWorklist(Chain.getNode());
6897-
return SDValue(N, 0);
68986918
}
68996919

69006920
if (LdVT.isVector() || LdVT.isInteger())
@@ -6974,7 +6994,8 @@ static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
69746994
return true;
69756995
}
69766996

6977-
static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
6997+
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
6998+
SDValue &HiPart) {
69786999
if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
69797000
return false;
69807001

@@ -7001,6 +7022,23 @@ static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
70017022
return true;
70027023
}
70037024

7025+
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
7026+
SDValue &HiPart) {
7027+
if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
7028+
Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
7029+
return false;
7030+
7031+
if (Val->getNumOperands() != 5 ||
7032+
Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
7033+
Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
7034+
Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
7035+
return false;
7036+
7037+
LoPart = Val->getOperand(1);
7038+
HiPart = Val->getOperand(3);
7039+
return true;
7040+
}
7041+
70047042
SDValue SystemZTargetLowering::combineSTORE(
70057043
SDNode *N, DAGCombinerInfo &DCI) const {
70067044
SelectionDAG &DAG = DCI.DAG;
@@ -7070,10 +7108,11 @@ SDValue SystemZTargetLowering::combineSTORE(
70707108
Ops, MemVT, SN->getMemOperand());
70717109
}
70727110

7073-
// Transform a store of an i128 moved from GPRs into two separate stores.
7074-
if (MemVT == MVT::i128 && SN->isSimple() && ISD::isNormalStore(SN)) {
7111+
// Transform a store of a 128-bit value moved from parts into two stores.
7112+
if (SN->isSimple() && ISD::isNormalStore(SN)) {
70757113
SDValue LoPart, HiPart;
7076-
if (isMovedFromParts(Op1, LoPart, HiPart)) {
7114+
if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
7115+
(MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
70777116
SDLoc DL(SN);
70787117
SDValue Chain0 =
70797118
DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),

llvm/test/CodeGen/SystemZ/atomic-load-08.ll

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,16 @@
1-
; Test long double atomic loads.
1+
; Test long double atomic loads - via i128.
22
;
33
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefixes=CHECK,BASE %s
44
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck -check-prefixes=CHECK,Z13 %s
5-
; TODO: Is it worth testing softfp with vector?
65
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+soft-float | FileCheck -check-prefixes=SOFTFP %s
76

8-
; FIXME: Without vector support, v2i64 should be legal and we should
9-
; introduce a simple bitcast, which could fold into the store use
10-
; avoid the intermediate f registers.
117
define void @f1(ptr %ret, ptr %src) {
128
; CHECK-LABEL: f1:
139
; CHECK: # %bb.0:
14-
; Z13-NEXT: lpq %r0, 0(%r3)
15-
; Z13-NEXT: stg %r1, 8(%r2)
16-
; Z13-NEXT: stg %r0, 0(%r2)
17-
; Z13-NEXT: br %r14
18-
19-
; BASE: lpq %r0, 0(%r3)
20-
; BASE-NEXT: ldgr %f0, %r0
21-
; BASE-NEXT: ldgr %f2, %r1
22-
; BASE-NEXT: std %f0, 0(%r2)
23-
; BASE-NEXT: std %f2, 8(%r2)
24-
; BASE-NEXT: br %r14
10+
; CHECK-NEXT: lpq %r0, 0(%r3)
11+
; CHECK-NEXT: stg %r1, 8(%r2)
12+
; CHECK-NEXT: stg %r0, 0(%r2)
13+
; CHECK-NEXT: br %r14
2514

2615
; SOFTFP-LABEL: f1:
2716
; SOFTFP: # %bb.0:

0 commit comments

Comments
 (0)