-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SystemZ] Simplify f128 atomic load/store #90977
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Hi @arsenm, would this implementation conflict with anything you're working on? |
dba69e5
to
a09b6fd
Compare
isI128MovedToParts(LoadSDNode *LD, | ||
SmallVector<std::pair<SDNode *, int>, 2> &Users) { | ||
// Scan through all users. | ||
for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unbounded users scans are often a bad idea
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point. I've changed the logic to ensure early the loop exits early in case of more than two users.
✅ With the latest revision this PR passed the C/C++ code formatter. |
Change definition of expandBitCastI128ToF128 and expandBitCastF128ToI128 to allow for simplified use in atomic load/store. Update logic to split 128-bit loads and stores in DAGCombine to also handle the f128 case where appropriate. This fixes the regressions introduced by recent atomic load/store patches.
a09b6fd
to
a55993f
Compare
@llvm/pr-subscribers-backend-systemz Author: Ulrich Weigand (uweigand) ChangesChange definition of expandBitCastI128ToF128 and expandBitCastF128ToI128 to allow for simplified use in atomic load/store. Update logic to split 128-bit loads and stores in DAGCombine to also handle the f128 case where appropriate. This fixes the regressions introduced by recent atomic load/store patches. Full diff: https://github.com/llvm/llvm-project/pull/90977.diff 5 Files Affected:
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 5612e4ced4f607..2920c1f02a3146 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1551,6 +1551,8 @@ static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
}
+ // FIXME: If v2i64 were a legal type, we could use it instead of
+ // Untyped here. This might enable improved folding.
SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
MVT::Untyped, Hi, Lo);
return SDValue(Pair, 0);
@@ -6247,14 +6249,18 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
}
}
-// Manually lower a bitcast to avoid introducing illegal types after type
-// legalization.
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src,
- SDValue Chain, const SDLoc &SL) {
- SDValue Hi =
- DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::i64, Src);
- SDValue Lo =
- DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::i64, Src);
+ const SDLoc &SL) {
+ // If i128 is legal, just use a normal bitcast.
+ if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
+ return DAG.getBitcast(MVT::f128, Src);
+
+ // Otherwise, f128 must live in FP128, so do a partwise move.
+ assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) ==
+ &SystemZ::FP128BitRegClass);
+
+ SDValue Hi, Lo;
+ std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
Hi = DAG.getBitcast(MVT::f64, Hi);
Lo = DAG.getBitcast(MVT::f64, Lo);
@@ -6267,8 +6273,16 @@ static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src,
return SDValue(Pair, 0);
}
-static std::pair<SDValue, SDValue>
-expandBitCastF128ToI128Parts(SelectionDAG &DAG, SDValue Src, const SDLoc &SL) {
+static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src,
+ const SDLoc &SL) {
+ // If i128 is legal, just use a normal bitcast.
+ if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
+ return DAG.getBitcast(MVT::i128, Src);
+
+ // Otherwise, f128 must live in FP128, so do a partwise move.
+ assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) ==
+ &SystemZ::FP128BitRegClass);
+
SDValue LoFP =
DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
SDValue HiFP =
@@ -6276,15 +6290,7 @@ expandBitCastF128ToI128Parts(SelectionDAG &DAG, SDValue Src, const SDLoc &SL) {
SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
- return {Hi, Lo};
-}
-
-static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src,
- const SDLoc &SL) {
-
- auto [Hi, Lo] = expandBitCastF128ToI128Parts(DAG, Src, SL);
- SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, SL, MVT::Untyped, Hi, Lo);
- return SDValue(Pair, 0);
+ return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
}
// Lower operations with invalid operand or result types (currently used
@@ -6302,38 +6308,20 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
DL, Tys, Ops, MVT::i128, MMO);
- EVT VT = N->getValueType(0);
-
- if (VT == MVT::i128 || isTypeLegal(MVT::i128)) {
- SDValue Lowered = lowerGR128ToI128(DAG, Res);
- Results.push_back(DAG.getBitcast(VT, Lowered));
- Results.push_back(Res.getValue(1));
- } else {
- // For the f128 case, after type legalization, we cannot produce a bitcast
- // with an illegal type (i.e. i128), so manually lower it.
- //
- // FIXME: Really v2i64 should be legal, and should be used in place of
- // unttyped. Then we could emit the bitcast which will potentially fold
- // into the use.
- SDValue Cast = expandBitCastI128ToF128(DAG, Res, Res.getValue(1), DL);
- Results.push_back(Cast);
- Results.push_back(Res.getValue(1));
- }
-
+ SDValue Lowered = lowerGR128ToI128(DAG, Res);
+ if (N->getValueType(0) == MVT::f128)
+ Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
+ Results.push_back(Lowered);
+ Results.push_back(Res.getValue(1));
break;
}
case ISD::ATOMIC_STORE: {
SDLoc DL(N);
SDVTList Tys = DAG.getVTList(MVT::Other);
SDValue Val = N->getOperand(1);
- EVT VT = Val.getValueType();
-
- if (VT == MVT::i128 || isTypeLegal(MVT::i128)) {
- Val = DAG.getBitcast(MVT::i128, Val);
- Val = lowerI128ToGR128(DAG, Val);
- } else {
+ if (Val.getValueType() == MVT::f128)
Val = expandBitCastF128ToI128(DAG, Val, DL);
- }
+ Val = lowerI128ToGR128(DAG, Val);
SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
@@ -6370,21 +6358,7 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
!useSoftFloat()) {
SDLoc DL(N);
- SDValue Lo, Hi;
- if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
- SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
- Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
- DAG.getConstant(1, DL, MVT::i32));
- Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
- DAG.getConstant(0, DL, MVT::i32));
- } else {
- // FIXME: Assert should be moved into expandBitCastF128ToI128Parts
- assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&
- "Unrecognized register class for f128.");
- std::tie(Hi, Lo) = expandBitCastF128ToI128Parts(DAG, Src, DL);
- }
-
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
+ Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
}
break;
}
@@ -6829,72 +6803,118 @@ SDValue SystemZTargetLowering::combineMERGE(
return SDValue();
}
+static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
+ SDNode *&HiPart) {
+ LoPart = HiPart = nullptr;
+
+ // Scan through all users.
+ for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
+ UI != UIEnd; ++UI) {
+ // Skip the uses of the chain.
+ if (UI.getUse().getResNo() != 0)
+ continue;
+
+ // Verify every user is a TRUNCATE to i64 of the low or high half.
+ SDNode *User = *UI;
+ bool IsLoPart = true;
+ if (User->getOpcode() == ISD::SRL &&
+ User->getOperand(1).getOpcode() == ISD::Constant &&
+ User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
+ User = *User->use_begin();
+ IsLoPart = false;
+ }
+ if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
+ return false;
+
+ if (IsLoPart) {
+ if (LoPart)
+ return false;
+ LoPart = User;
+ } else {
+ if (HiPart)
+ return false;
+ HiPart = User;
+ }
+ }
+ return true;
+}
+
+static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
+ SDNode *&HiPart) {
+ LoPart = HiPart = nullptr;
+
+ // Scan through all users.
+ for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
+ UI != UIEnd; ++UI) {
+ // Skip the uses of the chain.
+ if (UI.getUse().getResNo() != 0)
+ continue;
+
+ // Verify every user is an EXTRACT_SUBREG of the low or high half.
+ SDNode *User = *UI;
+ if (!User->hasOneUse() || !User->isMachineOpcode() ||
+ User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
+ return false;
+
+ switch (User->getConstantOperandVal(1)) {
+ case SystemZ::subreg_l64:
+ if (LoPart)
+ return false;
+ LoPart = User;
+ break;
+ case SystemZ::subreg_h64:
+ if (HiPart)
+ return false;
+ HiPart = User;
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
SDValue SystemZTargetLowering::combineLOAD(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
EVT LdVT = N->getValueType(0);
SDLoc DL(N);
- // Replace an i128 load that is used solely to move its value into GPRs
+ // Replace a 128-bit load that is used solely to move its value into GPRs
// by separate loads of both halves.
- if (LdVT == MVT::i128) {
- LoadSDNode *LD = cast<LoadSDNode>(N);
- if (!LD->isSimple() || !ISD::isNormalLoad(LD))
- return SDValue();
-
- // Scan through all users.
- SmallVector<std::pair<SDNode *, int>, 2> Users;
- int UsedElements = 0;
- for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
- UI != UIEnd; ++UI) {
- // Skip the uses of the chain.
- if (UI.getUse().getResNo() != 0)
- continue;
-
- // Verify every user is a TRUNCATE to i64 of the low or high half ...
- SDNode *User = *UI;
- int Index = 1;
- if (User->getOpcode() == ISD::SRL &&
- User->getOperand(1).getOpcode() == ISD::Constant &&
- User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
- User = *User->use_begin();
- Index = 0;
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (LD->isSimple() && ISD::isNormalLoad(LD)) {
+ SDNode *LoPart, *HiPart;
+ if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
+ (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
+ // Rewrite each extraction as an independent load.
+ SmallVector<SDValue, 2> ArgChains;
+ if (HiPart) {
+ SDValue EltLoad = DAG.getLoad(
+ HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(), LD->getOriginalAlign(),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+
+ DCI.CombineTo(HiPart, EltLoad, true);
+ ArgChains.push_back(EltLoad.getValue(1));
+ }
+ if (LoPart) {
+ SDValue EltLoad = DAG.getLoad(
+ LoPart->getValueType(0), DL, LD->getChain(),
+ DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
+ LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+
+ DCI.CombineTo(LoPart, EltLoad, true);
+ ArgChains.push_back(EltLoad.getValue(1));
}
- if (User->getOpcode() != ISD::TRUNCATE ||
- User->getValueType(0) != MVT::i64)
- return SDValue();
-
- // ... and no half is extracted twice.
- if (UsedElements & (1 << Index))
- return SDValue();
-
- UsedElements |= 1 << Index;
- Users.push_back(std::make_pair(User, Index));
- }
-
- // Rewrite each extraction as an independent load.
- SmallVector<SDValue, 2> ArgChains;
- for (auto UserAndIndex : Users) {
- SDNode *User = UserAndIndex.first;
- unsigned Offset = User->getValueType(0).getStoreSize() * UserAndIndex.second;
- SDValue Ptr =
- DAG.getMemBasePlusOffset(LD->getBasePtr(), TypeSize::getFixed(Offset), DL);
- SDValue EltLoad =
- DAG.getLoad(User->getValueType(0), DL, LD->getChain(), Ptr,
- LD->getPointerInfo().getWithOffset(Offset),
- LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
- LD->getAAInfo());
- DCI.CombineTo(User, EltLoad, true);
- ArgChains.push_back(EltLoad.getValue(1));
+ // Collect all chains via TokenFactor.
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+ DCI.AddToWorklist(Chain.getNode());
+ return SDValue(N, 0);
}
-
- // Collect all chains via TokenFactor.
- SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- ArgChains);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
- DCI.AddToWorklist(Chain.getNode());
- return SDValue(N, 0);
}
if (LdVT.isVector() || LdVT.isInteger())
@@ -6974,7 +6994,8 @@ static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
return true;
}
-static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
+static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
+ SDValue &HiPart) {
if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
return false;
@@ -7001,6 +7022,23 @@ static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
return true;
}
+static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
+ SDValue &HiPart) {
+ if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
+ Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
+ return false;
+
+ if (Val->getNumOperands() != 5 ||
+ Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
+ Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
+ Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
+ return false;
+
+ LoPart = Val->getOperand(1);
+ HiPart = Val->getOperand(3);
+ return true;
+}
+
SDValue SystemZTargetLowering::combineSTORE(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -7070,10 +7108,11 @@ SDValue SystemZTargetLowering::combineSTORE(
Ops, MemVT, SN->getMemOperand());
}
- // Transform a store of an i128 moved from GPRs into two separate stores.
- if (MemVT == MVT::i128 && SN->isSimple() && ISD::isNormalStore(SN)) {
+ // Transform a store of a 128-bit value moved from parts into two stores.
+ if (SN->isSimple() && ISD::isNormalStore(SN)) {
SDValue LoPart, HiPart;
- if (isMovedFromParts(Op1, LoPart, HiPart)) {
+ if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
+ (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
SDLoc DL(SN);
SDValue Chain0 =
DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-08.ll b/llvm/test/CodeGen/SystemZ/atomic-load-08.ll
index 15ff2daa0a3846..90d4214037d222 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-load-08.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-load-08.ll
@@ -1,27 +1,16 @@
-; Test long double atomic loads.
+; Test long double atomic loads - via i128.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefixes=CHECK,BASE %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck -check-prefixes=CHECK,Z13 %s
-; TODO: Is it worth testing softfp with vector?
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+soft-float | FileCheck -check-prefixes=SOFTFP %s
-; FIXME: Without vector support, v2i64 should be legal and we should
-; introduce a simple bitcast, which could fold into the store use
-; avoid the intermediate f registers.
define void @f1(ptr %ret, ptr %src) {
; CHECK-LABEL: f1:
; CHECK: # %bb.0:
-; Z13-NEXT: lpq %r0, 0(%r3)
-; Z13-NEXT: stg %r1, 8(%r2)
-; Z13-NEXT: stg %r0, 0(%r2)
-; Z13-NEXT: br %r14
-
-; BASE: lpq %r0, 0(%r3)
-; BASE-NEXT: ldgr %f0, %r0
-; BASE-NEXT: ldgr %f2, %r1
-; BASE-NEXT: std %f0, 0(%r2)
-; BASE-NEXT: std %f2, 8(%r2)
-; BASE-NEXT: br %r14
+; CHECK-NEXT: lpq %r0, 0(%r3)
+; CHECK-NEXT: stg %r1, 8(%r2)
+; CHECK-NEXT: stg %r0, 0(%r2)
+; CHECK-NEXT: br %r14
; SOFTFP-LABEL: f1:
; SOFTFP: # %bb.0:
diff --git a/llvm/test/CodeGen/SystemZ/atomic-store-08.ll b/llvm/test/CodeGen/SystemZ/atomic-store-08.ll
index 422dcb63b531fe..57f1319365c4fd 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-store-08.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-store-08.ll
@@ -1,30 +1,17 @@
-; Test long double atomic stores. The atomic store is converted to i128
+; Test long double atomic stores - via i128.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefixes=CHECK,BASE %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck -check-prefixes=CHECK,Z13 %s
-
-; TODO: Is it worth testing softfp with vector?
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+soft-float | FileCheck -check-prefixes=SOFTFP %s
-
-; FIXME: With legal 128-bit operation to bitcast, the base code would
-; be the same as z13
define void @f1(ptr %dst, ptr %src) {
; CHECK-LABEL: f1:
; CHECK: # %bb.0:
-; Z13-NEXT: lg %r1, 8(%r3)
-; Z13-NEXT: lg %r0, 0(%r3)
-; Z13-NEXT: stpq %r0, 0(%r2)
-; Z13-NEXT: bcr 1{{[45]}}, %r0
-; Z13-NEXT: br %r14
-
-; BASE-NEXT: ld %f0, 0(%r3)
-; BASE-NEXT: ld %f2, 8(%r3)
-; BASE-NEXT: lgdr %r1, %f2
-; BASE-NEXT: lgdr %r0, %f0
-; BASE-NEXT: stpq %r0, 0(%r2)
-; BASE-NEXT: bcr 15, %r0
-; BASE-NEXT: br %r14
+; CHECK-NEXT: lg %r1, 8(%r3)
+; CHECK-NEXT: lg %r0, 0(%r3)
+; CHECK-NEXT: stpq %r0, 0(%r2)
+; CHECK-NEXT: bcr 1{{[45]}}, %r0
+; CHECK-NEXT: br %r14
; SOFTFP-LABEL: f1:
; SOFTFP: # %bb.0:
@@ -99,13 +86,8 @@ define void @f2_fpuse(ptr %dst, ptr %src) {
; CHECK-NEXT: .cfi_def_cfa_offset 336
; CHECK-NEXT: ld %f0, 0(%r3)
; CHECK-NEXT: ld %f2, 8(%r3)
-
-; BASE-NEXT: lgr %r3, %r2
-; BASE-NEXT: axbr %f0, %f0
-
-; Z13-NEXT: axbr %f0, %f0
-; Z13-NEXT: lgr %r3, %r2
-
+; CHECK-DAG: lgr %r3, %r2
+; CHECK-DAG: axbr %f0, %f0
; CHECK-NEXT: la %r4, 160(%r15)
; CHECK-NEXT: lghi %r2, 16
; CHECK-NEXT: lhi %r5, 5
diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll
index 3c8ea19f86f860..21e7c6e586dfe4 100644
--- a/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll
+++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll
@@ -20,10 +20,8 @@ define void @f1(ptr %ret, ptr %src, ptr %b) {
; CHECK: std [[FSL]], 176(%r15)
; CHECK: std [[FSH]], 184(%r15)
; CHECK: brasl %r14, fmaxl@PLT
-; CHECK: ld [[FL:%f[0-9]+]], 192(%r15)
-; CHECK: ld [[FH:%f[0-9]+]], 200(%r15)
-; CHECK: lgdr [[RH:%r[0-9]+]], [[FH]]
-; CHECK: lgdr [[RL:%r[0-9]+]], [[FL]]
+; CHECK: lg [[RH:%r[0-9]+]], 200(%r15)
+; CHECK: lg [[RL:%r[0-9]+]], 192(%r15)
; CHECK: lgdr [[RSH:%r[0-9]+]], [[FSH]]
; CHECK: lgdr [[RSL:%r[0-9]+]], [[FSL]]
; CHECK: cdsg [[RSL]], [[RL]], 0([[SRC]])
diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll
index dfa2cc021d1667..1c6f8e20aa4f8d 100644
--- a/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll
+++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll
@@ -20,10 +20,8 @@ define void @f1(ptr %ret, ptr %src, ptr %b) {
; CHECK: std [[FSL]], 176(%r15)
; CHECK: std [[FSH]], 184(%r15)
; CHECK: brasl %r14, fminl@PLT
-; CHECK: ld [[FL:%f[0-9]+]], 192(%r15)
-; CHECK: ld [[FH:%f[0-9]+]], 200(%r15)
-; CHECK: lgdr [[RH:%r[0-9]+]], [[FH]]
-; CHECK: lgdr [[RL:%r[0-9]+]], [[FL]]
+; CHECK: lg [[RH:%r[0-9]+]], 200(%r15)
+; CHECK: lg [[RL:%r[0-9]+]], 192(%r15)
; CHECK: lgdr [[RSH:%r[0-9]+]], [[FSH]]
; CHECK: lgdr [[RSL:%r[0-9]+]], [[FSL]]
; CHECK: cdsg [[RSL]], [[RL]], 0([[SRC]])
|
Change definition of expandBitCastI128ToF128 and expandBitCastF128ToI128 to allow for simplified use in atomic load/store.
Update logic to split 128-bit loads and stores in DAGCombine to also handle the f128 case where appropriate. This fixes the regressions introduced by recent atomic load/store patches.