Skip to content

Commit a09b6fd

Browse files
committed
[SystemZ] Simplify f128 atomic load/store
Change definition of expandBitCastI128ToF128 and expandBitCastF128ToI128 to allow for simplified use in atomic load/store. Update logic to split 128-bit loads and stores in DAGCombine to also handle the f128 case where appropriate. This fixes the regressions introduced by recent atomic load/store patches.
1 parent 01e91a2 commit a09b6fd

File tree

5 files changed

+141
-141
lines changed

5 files changed

+141
-141
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 124 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1551,6 +1551,8 @@ static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
15511551
std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
15521552
}
15531553

1554+
// FIXME: If v2i64 were a legal type, we could use it instead of
1555+
// Untyped here. This might enable improved folding.
15541556
SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
15551557
MVT::Untyped, Hi, Lo);
15561558
return SDValue(Pair, 0);
@@ -6247,14 +6249,18 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
62476249
}
62486250
}
62496251

6250-
// Manually lower a bitcast to avoid introducing illegal types after type
6251-
// legalization.
62526252
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src,
6253-
SDValue Chain, const SDLoc &SL) {
6254-
SDValue Hi =
6255-
DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::i64, Src);
6256-
SDValue Lo =
6257-
DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::i64, Src);
6253+
const SDLoc &SL) {
6254+
// If i128 is legal, just use a normal bitcast.
6255+
if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6256+
return DAG.getBitcast(MVT::f128, Src);
6257+
6258+
// Otherwise, f128 must live in FP128, so do a partwise move.
6259+
assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) ==
6260+
&SystemZ::FP128BitRegClass);
6261+
6262+
SDValue Hi, Lo;
6263+
std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
62586264

62596265
Hi = DAG.getBitcast(MVT::f64, Hi);
62606266
Lo = DAG.getBitcast(MVT::f64, Lo);
@@ -6267,24 +6273,24 @@ static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src,
62676273
return SDValue(Pair, 0);
62686274
}
62696275

6270-
static std::pair<SDValue, SDValue>
6271-
expandBitCastF128ToI128Parts(SelectionDAG &DAG, SDValue Src, const SDLoc &SL) {
6276+
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src,
6277+
const SDLoc &SL) {
6278+
// If i128 is legal, just use a normal bitcast.
6279+
if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6280+
return DAG.getBitcast(MVT::i128, Src);
6281+
6282+
// Otherwise, f128 must live in FP128, so do a partwise move.
6283+
assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) ==
6284+
&SystemZ::FP128BitRegClass);
6285+
62726286
SDValue LoFP =
62736287
DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
62746288
SDValue HiFP =
62756289
DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
62766290
SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
62776291
SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
62786292

6279-
return {Hi, Lo};
6280-
}
6281-
6282-
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src,
6283-
const SDLoc &SL) {
6284-
6285-
auto [Hi, Lo] = expandBitCastF128ToI128Parts(DAG, Src, SL);
6286-
SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, SL, MVT::Untyped, Hi, Lo);
6287-
return SDValue(Pair, 0);
6293+
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
62886294
}
62896295

62906296
// Lower operations with invalid operand or result types (currently used
@@ -6302,38 +6308,20 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
63026308
SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
63036309
DL, Tys, Ops, MVT::i128, MMO);
63046310

6305-
EVT VT = N->getValueType(0);
6306-
6307-
if (VT == MVT::i128 || isTypeLegal(MVT::i128)) {
6308-
SDValue Lowered = lowerGR128ToI128(DAG, Res);
6309-
Results.push_back(DAG.getBitcast(VT, Lowered));
6310-
Results.push_back(Res.getValue(1));
6311-
} else {
6312-
// For the f128 case, after type legalization, we cannot produce a bitcast
6313-
// with an illegal type (i.e. i128), so manually lower it.
6314-
//
6315-
// FIXME: Really v2i64 should be legal, and should be used in place of
6316-
// unttyped. Then we could emit the bitcast which will potentially fold
6317-
// into the use.
6318-
SDValue Cast = expandBitCastI128ToF128(DAG, Res, Res.getValue(1), DL);
6319-
Results.push_back(Cast);
6320-
Results.push_back(Res.getValue(1));
6321-
}
6322-
6311+
SDValue Lowered = lowerGR128ToI128(DAG, Res);
6312+
if (N->getValueType(0) == MVT::f128)
6313+
Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
6314+
Results.push_back(Lowered);
6315+
Results.push_back(Res.getValue(1));
63236316
break;
63246317
}
63256318
case ISD::ATOMIC_STORE: {
63266319
SDLoc DL(N);
63276320
SDVTList Tys = DAG.getVTList(MVT::Other);
63286321
SDValue Val = N->getOperand(1);
6329-
EVT VT = Val.getValueType();
6330-
6331-
if (VT == MVT::i128 || isTypeLegal(MVT::i128)) {
6332-
Val = DAG.getBitcast(MVT::i128, Val);
6333-
Val = lowerI128ToGR128(DAG, Val);
6334-
} else {
6322+
if (Val.getValueType() == MVT::f128)
63356323
Val = expandBitCastF128ToI128(DAG, Val, DL);
6336-
}
6324+
Val = lowerI128ToGR128(DAG, Val);
63376325

63386326
SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
63396327
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
@@ -6370,21 +6358,7 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
63706358
if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
63716359
!useSoftFloat()) {
63726360
SDLoc DL(N);
6373-
SDValue Lo, Hi;
6374-
if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
6375-
SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
6376-
Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6377-
DAG.getConstant(1, DL, MVT::i32));
6378-
Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6379-
DAG.getConstant(0, DL, MVT::i32));
6380-
} else {
6381-
// FIXME: Assert should be moved into expandBitCastF128ToI128Parts
6382-
assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&
6383-
"Unrecognized register class for f128.");
6384-
std::tie(Hi, Lo) = expandBitCastF128ToI128Parts(DAG, Src, DL);
6385-
}
6386-
6387-
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
6361+
Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
63886362
}
63896363
break;
63906364
}
@@ -6829,47 +6803,87 @@ SDValue SystemZTargetLowering::combineMERGE(
68296803
return SDValue();
68306804
}
68316805

6806+
static bool
6807+
isI128MovedToParts(LoadSDNode *LD,
6808+
SmallVector<std::pair<SDNode *, int>, 2> &Users) {
6809+
// Scan through all users.
6810+
for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6811+
UI != UIEnd; ++UI) {
6812+
// Skip the uses of the chain.
6813+
if (UI.getUse().getResNo() != 0)
6814+
continue;
6815+
6816+
// Verify every user is a TRUNCATE to i64 of the low or high half.
6817+
SDNode *User = *UI;
6818+
int Index = 1;
6819+
if (User->getOpcode() == ISD::SRL &&
6820+
User->getOperand(1).getOpcode() == ISD::Constant &&
6821+
User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
6822+
User = *User->use_begin();
6823+
Index = 0;
6824+
}
6825+
if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
6826+
return false;
6827+
6828+
Users.push_back(std::make_pair(User, Index));
6829+
}
6830+
return true;
6831+
}
6832+
6833+
static bool
6834+
isF128MovedToParts(LoadSDNode *LD,
6835+
SmallVector<std::pair<SDNode *, int>, 2> &Users) {
6836+
// Scan through all users.
6837+
for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6838+
UI != UIEnd; ++UI) {
6839+
// Skip the uses of the chain.
6840+
if (UI.getUse().getResNo() != 0)
6841+
continue;
6842+
6843+
// Verify every user is an EXTRACT_SUBREG of the low or high half.
6844+
SDNode *User = *UI;
6845+
if (!User->hasOneUse() || !User->isMachineOpcode() ||
6846+
User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
6847+
return false;
6848+
6849+
int Index;
6850+
switch (User->getConstantOperandVal(1)) {
6851+
case SystemZ::subreg_l64:
6852+
Index = 1;
6853+
break;
6854+
case SystemZ::subreg_h64:
6855+
Index = 0;
6856+
break;
6857+
default:
6858+
return false;
6859+
}
6860+
6861+
Users.push_back(std::make_pair(User, Index));
6862+
}
6863+
return true;
6864+
}
6865+
68326866
SDValue SystemZTargetLowering::combineLOAD(
68336867
SDNode *N, DAGCombinerInfo &DCI) const {
68346868
SelectionDAG &DAG = DCI.DAG;
68356869
EVT LdVT = N->getValueType(0);
68366870
SDLoc DL(N);
68376871

6838-
// Replace an i128 load that is used solely to move its value into GPRs
6872+
// Replace a 128-bit load that is used solely to move its value into GPRs
68396873
// by separate loads of both halves.
6840-
if (LdVT == MVT::i128) {
6841-
LoadSDNode *LD = cast<LoadSDNode>(N);
6842-
if (!LD->isSimple() || !ISD::isNormalLoad(LD))
6843-
return SDValue();
6874+
LoadSDNode *LD = cast<LoadSDNode>(N);
6875+
SmallVector<std::pair<SDNode *, int>, 2> Users;
6876+
if (LD->isSimple() && ISD::isNormalLoad(LD) &&
6877+
((LdVT == MVT::i128 && isI128MovedToParts(LD, Users)) ||
6878+
(LdVT == MVT::f128 && isF128MovedToParts(LD, Users)))) {
68446879

6845-
// Scan through all users.
6846-
SmallVector<std::pair<SDNode *, int>, 2> Users;
6880+
// Verify that no part is extracted twice.
68476881
int UsedElements = 0;
6848-
for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6849-
UI != UIEnd; ++UI) {
6850-
// Skip the uses of the chain.
6851-
if (UI.getUse().getResNo() != 0)
6852-
continue;
6853-
6854-
// Verify every user is a TRUNCATE to i64 of the low or high half ...
6855-
SDNode *User = *UI;
6856-
int Index = 1;
6857-
if (User->getOpcode() == ISD::SRL &&
6858-
User->getOperand(1).getOpcode() == ISD::Constant &&
6859-
User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
6860-
User = *User->use_begin();
6861-
Index = 0;
6862-
}
6863-
if (User->getOpcode() != ISD::TRUNCATE ||
6864-
User->getValueType(0) != MVT::i64)
6865-
return SDValue();
6866-
6867-
// ... and no half is extracted twice.
6882+
for (auto UserAndIndex : Users) {
6883+
unsigned Index = UserAndIndex.second;
68686884
if (UsedElements & (1 << Index))
68696885
return SDValue();
6870-
68716886
UsedElements |= 1 << Index;
6872-
Users.push_back(std::make_pair(User, Index));
68736887
}
68746888

68756889
// Rewrite each extraction as an independent load.
@@ -6974,7 +6988,8 @@ static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
69746988
return true;
69756989
}
69766990

6977-
static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
6991+
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
6992+
SDValue &HiPart) {
69786993
if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
69796994
return false;
69806995

@@ -7001,6 +7016,23 @@ static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
70017016
return true;
70027017
}
70037018

7019+
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
7020+
SDValue &HiPart) {
7021+
if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
7022+
Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
7023+
return false;
7024+
7025+
if (Val->getNumOperands() != 5 ||
7026+
Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
7027+
Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
7028+
Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
7029+
return false;
7030+
7031+
LoPart = Val->getOperand(1);
7032+
HiPart = Val->getOperand(3);
7033+
return true;
7034+
}
7035+
70047036
SDValue SystemZTargetLowering::combineSTORE(
70057037
SDNode *N, DAGCombinerInfo &DCI) const {
70067038
SelectionDAG &DAG = DCI.DAG;
@@ -7070,10 +7102,11 @@ SDValue SystemZTargetLowering::combineSTORE(
70707102
Ops, MemVT, SN->getMemOperand());
70717103
}
70727104

7073-
// Transform a store of an i128 moved from GPRs into two separate stores.
7074-
if (MemVT == MVT::i128 && SN->isSimple() && ISD::isNormalStore(SN)) {
7105+
// Transform a store of a 128-bit value moved from parts into two stores.
7106+
if (SN->isSimple() && ISD::isNormalStore(SN)) {
70757107
SDValue LoPart, HiPart;
7076-
if (isMovedFromParts(Op1, LoPart, HiPart)) {
7108+
if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
7109+
(MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
70777110
SDLoc DL(SN);
70787111
SDValue Chain0 =
70797112
DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),

llvm/test/CodeGen/SystemZ/atomic-load-08.ll

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,16 @@
1-
; Test long double atomic loads.
1+
; Test long double atomic loads - via i128.
22
;
33
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefixes=CHECK,BASE %s
44
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck -check-prefixes=CHECK,Z13 %s
5-
; TODO: Is it worth testing softfp with vector?
65
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+soft-float | FileCheck -check-prefixes=SOFTFP %s
76

8-
; FIXME: Without vector support, v2i64 should be legal and we should
9-
; introduce a simple bitcast, which could fold into the store use
10-
; avoid the intermediate f registers.
117
define void @f1(ptr %ret, ptr %src) {
128
; CHECK-LABEL: f1:
139
; CHECK: # %bb.0:
14-
; Z13-NEXT: lpq %r0, 0(%r3)
15-
; Z13-NEXT: stg %r1, 8(%r2)
16-
; Z13-NEXT: stg %r0, 0(%r2)
17-
; Z13-NEXT: br %r14
18-
19-
; BASE: lpq %r0, 0(%r3)
20-
; BASE-NEXT: ldgr %f0, %r0
21-
; BASE-NEXT: ldgr %f2, %r1
22-
; BASE-NEXT: std %f0, 0(%r2)
23-
; BASE-NEXT: std %f2, 8(%r2)
24-
; BASE-NEXT: br %r14
10+
; CHECK-NEXT: lpq %r0, 0(%r3)
11+
; CHECK-NEXT: stg %r1, 8(%r2)
12+
; CHECK-NEXT: stg %r0, 0(%r2)
13+
; CHECK-NEXT: br %r14
2514

2615
; SOFTFP-LABEL: f1:
2716
; SOFTFP: # %bb.0:

llvm/test/CodeGen/SystemZ/atomic-store-08.ll

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,17 @@
1-
; Test long double atomic stores. The atomic store is converted to i128
1+
; Test long double atomic stores - via i128.
22
;
33
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefixes=CHECK,BASE %s
44
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck -check-prefixes=CHECK,Z13 %s
5-
6-
; TODO: Is it worth testing softfp with vector?
75
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+soft-float | FileCheck -check-prefixes=SOFTFP %s
86

9-
10-
; FIXME: With legal 128-bit operation to bitcast, the base code would
11-
; be the same as z13
127
define void @f1(ptr %dst, ptr %src) {
138
; CHECK-LABEL: f1:
149
; CHECK: # %bb.0:
15-
; Z13-NEXT: lg %r1, 8(%r3)
16-
; Z13-NEXT: lg %r0, 0(%r3)
17-
; Z13-NEXT: stpq %r0, 0(%r2)
18-
; Z13-NEXT: bcr 1{{[45]}}, %r0
19-
; Z13-NEXT: br %r14
20-
21-
; BASE-NEXT: ld %f0, 0(%r3)
22-
; BASE-NEXT: ld %f2, 8(%r3)
23-
; BASE-NEXT: lgdr %r1, %f2
24-
; BASE-NEXT: lgdr %r0, %f0
25-
; BASE-NEXT: stpq %r0, 0(%r2)
26-
; BASE-NEXT: bcr 15, %r0
27-
; BASE-NEXT: br %r14
10+
; CHECK-NEXT: lg %r1, 8(%r3)
11+
; CHECK-NEXT: lg %r0, 0(%r3)
12+
; CHECK-NEXT: stpq %r0, 0(%r2)
13+
; CHECK-NEXT: bcr 1{{[45]}}, %r0
14+
; CHECK-NEXT: br %r14
2815

2916
; SOFTFP-LABEL: f1:
3017
; SOFTFP: # %bb.0:
@@ -99,13 +86,8 @@ define void @f2_fpuse(ptr %dst, ptr %src) {
9986
; CHECK-NEXT: .cfi_def_cfa_offset 336
10087
; CHECK-NEXT: ld %f0, 0(%r3)
10188
; CHECK-NEXT: ld %f2, 8(%r3)
102-
103-
; BASE-NEXT: lgr %r3, %r2
104-
; BASE-NEXT: axbr %f0, %f0
105-
106-
; Z13-NEXT: axbr %f0, %f0
107-
; Z13-NEXT: lgr %r3, %r2
108-
89+
; CHECK-DAG: lgr %r3, %r2
90+
; CHECK-DAG: axbr %f0, %f0
10991
; CHECK-NEXT: la %r4, 160(%r15)
11092
; CHECK-NEXT: lghi %r2, 16
11193
; CHECK-NEXT: lhi %r5, 5

0 commit comments

Comments
 (0)