-
Notifications
You must be signed in to change notification settings - Fork 14.3k
SystemZ: Stop casting fp typed atomic loads in the IR #90768
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-systemz Author: Matt Arsenault (arsenm) ChangesshouldCastAtomicLoadInIR is a hack that should be removed. Simple bitcasting of operations should be in the domain of ordinary type legalization and does not need to be done in the IR. This introduces a code quality regression due to the hack currently used to avoid using 128-bit values in the case where the floating point value is ultimately used as an integer. This would be avoidable if there were always a legal 128-bit type (like v2i64). This is a pretty niche situation so I assume it's not important. I implemented about 85% of the work necessary to make v2i64 legal, but it was taking too long and I lack the necessary familiarity with systemz to complete it. I've pushed it here for someone to pick up: Full diff: https://github.com/llvm/llvm-project/pull/90768.diff 2 Files Affected:
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2da4431cf077eb..e62d742b221f53 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -294,6 +294,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// the atomic operations in order to exploit SystemZ instructions.
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom);
// Mark sign/zero extending atomic loads as legal, which will make
// DAGCombiner fold extensions into atomic loads if possible.
@@ -935,9 +936,6 @@ bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const
TargetLowering::AtomicExpansionKind
SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
- // Lower fp128 the same way as i128.
- if (LI->getType()->isFP128Ty())
- return AtomicExpansionKind::CastToInteger;
return AtomicExpansionKind::None;
}
@@ -4550,7 +4548,9 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
- assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128.");
+ assert(
+ (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
+ "Only custom lowering i128 or f128.");
// Use same code to handle both legal and non-legal i128 types.
SmallVector<SDValue, 2> Results;
LowerOperationWrapper(Node, Results, DAG);
@@ -6249,6 +6249,41 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
}
}
+// Manually lower a bitcast to avoid introducing illegal types after type
+// legalization.
+static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src,
+ SDValue Chain, const SDLoc &SL) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const DataLayout &DL = DAG.getDataLayout();
+
+ assert(DL.isBigEndian());
+
+ Align F128Align = DL.getPrefTypeAlign(Type::getFP128Ty(*DAG.getContext()));
+ SDValue StackTemp = DAG.CreateStackTemporary(MVT::f128, F128Align.value());
+ int FI = cast<FrameIndexSDNode>(StackTemp)->getIndex();
+ Align A = MF.getFrameInfo().getObjectAlign(FI);
+
+ SDValue Hi =
+ DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::i64, Src);
+ SDValue Lo =
+ DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::i64, Src);
+
+ TypeSize Offset = MVT(MVT::i64).getStoreSize();
+ SDValue StackTempOffset = DAG.getObjectPtrOffset(SL, StackTemp, Offset);
+
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
+
+ SDValue StoreHi = DAG.getStore(Chain, SL, Lo, StackTempOffset, PtrInfo, A);
+ SDValue StoreLo =
+ DAG.getStore(Chain, SL, Hi, StackTemp, PtrInfo.getWithOffset(Offset),
+ commonAlignment(A, Offset));
+
+ SDValue StoreChain =
+ DAG.getNode(ISD::TokenFactor, SL, MVT::Other, {StoreLo, StoreHi});
+
+ return DAG.getLoad(MVT::f128, SL, StoreChain, StackTemp, PtrInfo, A);
+}
+
// Lower operations with invalid operand or result types (currently used
// only for 128-bit integer types).
void
@@ -6263,8 +6298,25 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
DL, Tys, Ops, MVT::i128, MMO);
- Results.push_back(lowerGR128ToI128(DAG, Res));
- Results.push_back(Res.getValue(1));
+
+ EVT VT = N->getValueType(0);
+
+ if (VT == MVT::i128 || isTypeLegal(MVT::i128)) {
+ SDValue Lowered = lowerGR128ToI128(DAG, Res);
+ Results.push_back(DAG.getBitcast(VT, Lowered));
+ Results.push_back(Res.getValue(1));
+ } else {
+ // For the f128 case, after type legalization, we cannot produce a bitcast
+ // with an illegal type (i.e. i128), so introduce a stack store and reload
+ //
+ // FIXME: Really v2i64 should be legal, and should be used in place of
+ // unttyped. Then we could emit the bitcast which will potentially avoid
+ // the stack usage after combining.
+ SDValue Cast = expandBitCastI128ToF128(DAG, Res, Res.getValue(1), DL);
+ Results.push_back(Cast);
+ Results.push_back(Cast.getValue(1));
+ }
+
break;
}
case ISD::ATOMIC_STORE: {
diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-08.ll b/llvm/test/CodeGen/SystemZ/atomic-load-08.ll
index 83050ef87591ae..94b8595bef2933 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-load-08.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-load-08.ll
@@ -5,13 +5,26 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefixes=CHECK,BASE %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck -check-prefixes=CHECK,Z13 %s
+; FIXME: Without vector support, v2i64 should be legal and we should
+; introduce a simple bitcast instead of the stack temporary store and
+; reload
define void @f1(ptr %ret, ptr %src) {
; CHECK-LABEL: f1:
; CHECK: # %bb.0:
-; CHECK-NEXT: lpq %r0, 0(%r3)
-; CHECK-NEXT: stg %r1, 8(%r2)
-; CHECK-NEXT: stg %r0, 0(%r2)
-; CHECK-NEXT: br %r14
+; Z13-NEXT: lpq %r0, 0(%r3)
+; Z13-NEXT: stg %r1, 8(%r2)
+; Z13-NEXT: stg %r0, 0(%r2)
+; Z13-NEXT: br %r14
+
+; BASE: aghi %r15, -176
+; BASE: lpq %r0, 0(%r3)
+; BASE-NEXT: stg %r1, 168(%r15)
+; BASE-NEXT: stg %r0, 160(%r15)
+; BASE-NEXT: ld %f0, 160(%r15)
+; BASE-NEXT: ld %f2, 168(%r15)
+; BASE-NEXT: std %f0, 0(%r2)
+; BASE-NEXT: std %f2, 8(%r2)
+; BASE-NEXT: aghi %r15, 176
%val = load atomic fp128, ptr %src seq_cst, align 16
store fp128 %val, ptr %ret, align 8
ret void
|
shouldCastAtomicLoadInIR is a hack that should be removed. Simple bitcasting of operations should be in the domain of ordinary type legalization and does not need to be done in the IR. This introduces a code quality regression due to the hack currently used to avoid using 128-bit values in the case where the floating point value is ultimately used as an integer. This would be avoidable if there were always a legal 128-bit type (like v2i64). This is a pretty niche situation so I assume it's not important. I implemented about 85% of the work necessary to make v2i64 legal, but it was taking too long and I lack the necessary familiarity with systemz to complete it. I've pushed it here for someone to pick up: https://github.com/arsenm/llvm-project/pull/new/systemz-legal-v2i64
b235a86
to
ed79692
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
; BASE-NEXT: ldgr %f0, %r0 | ||
; BASE-NEXT: ldgr %f2, %r1 | ||
; BASE-NEXT: std %f0, 0(%r2) | ||
; BASE-NEXT: std %f2, 8(%r2) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be nice to short-circuit the LDGR/STD to just an STG here. We actually do this already for spills in foldMemoryOperandImpl
, but not for regular stores. But this can be done separately.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This also should go away with the legal v2i64 fix I posted
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This version LGTM with the comment changes mentioned inline.
shouldCastAtomicLoadInIR is a hack that should be removed. Simple bitcasting of operations should be in the domain of ordinary type legalization and does not need to be done in the IR.
This introduces a code quality regression due to the hack currently used to avoid using 128-bit values in the case where the floating point value is ultimately used as an integer. This would be avoidable if there were always a legal 128-bit type (like v2i64). This is a pretty niche situation so I assume it's not important.
I implemented about 85% of the work necessary to make v2i64 legal, but it was taking too long and I lack the necessary familiarity with systemz to complete it. I've pushed it here for someone to pick up:
https://github.com/arsenm/llvm-project/pull/new/systemz-legal-v2i64
Depends #90861