Skip to content

Commit ed79692

Browse files
committed
SystemZ: Stop casting fp typed atomic loads in the IR
shouldCastAtomicLoadInIR is a hack that should be removed. Simple bitcasting of operations should be in the domain of ordinary type legalization and does not need to be done in the IR. This introduces a code quality regression due to the hack currently used to avoid using 128-bit values in the case where the floating point value is ultimately used as an integer. This would be avoidable if there were always a legal 128-bit type (like v2i64). This is a pretty niche situation so I assume it's not important. I implemented about 85% of the work necessary to make v2i64 legal, but it was taking too long and I lack the necessary familiarity with systemz to complete it. I've pushed it here for someone to pick up: https://github.com/arsenm/llvm-project/pull/new/systemz-legal-v2i64
1 parent 6d44a1e commit ed79692

File tree

2 files changed

+76
-13
lines changed

2 files changed

+76
-13
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
294294
// the atomic operations in order to exploit SystemZ instructions.
295295
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
296296
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
297+
setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom);
297298

298299
// Mark sign/zero extending atomic loads as legal, which will make
299300
// DAGCombiner fold extensions into atomic loads if possible.
@@ -935,9 +936,6 @@ bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const
935936

936937
TargetLowering::AtomicExpansionKind
937938
SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
938-
// Lower fp128 the same way as i128.
939-
if (LI->getType()->isFP128Ty())
940-
return AtomicExpansionKind::CastToInteger;
941939
return AtomicExpansionKind::None;
942940
}
943941

@@ -4550,7 +4548,9 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
45504548
SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
45514549
SelectionDAG &DAG) const {
45524550
auto *Node = cast<AtomicSDNode>(Op.getNode());
4553-
assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128.");
4551+
assert(
4552+
(Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
4553+
"Only custom lowering i128 or f128.");
45544554
// Use same code to handle both legal and non-legal i128 types.
45554555
SmallVector<SDValue, 2> Results;
45564556
LowerOperationWrapper(Node, Results, DAG);
@@ -6249,6 +6249,41 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
62496249
}
62506250
}
62516251

6252+
// Manually lower a bitcast to avoid introducing illegal types after type
6253+
// legalization.
6254+
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src,
6255+
SDValue Chain, const SDLoc &SL) {
6256+
MachineFunction &MF = DAG.getMachineFunction();
6257+
const DataLayout &DL = DAG.getDataLayout();
6258+
6259+
assert(DL.isBigEndian());
6260+
6261+
Align F128Align = DL.getPrefTypeAlign(Type::getFP128Ty(*DAG.getContext()));
6262+
SDValue StackTemp = DAG.CreateStackTemporary(MVT::f128, F128Align.value());
6263+
int FI = cast<FrameIndexSDNode>(StackTemp)->getIndex();
6264+
Align A = MF.getFrameInfo().getObjectAlign(FI);
6265+
6266+
SDValue Hi =
6267+
DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::i64, Src);
6268+
SDValue Lo =
6269+
DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::i64, Src);
6270+
6271+
TypeSize Offset = MVT(MVT::i64).getStoreSize();
6272+
SDValue StackTempOffset = DAG.getObjectPtrOffset(SL, StackTemp, Offset);
6273+
6274+
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
6275+
6276+
SDValue StoreHi = DAG.getStore(Chain, SL, Lo, StackTempOffset, PtrInfo, A);
6277+
SDValue StoreLo =
6278+
DAG.getStore(Chain, SL, Hi, StackTemp, PtrInfo.getWithOffset(Offset),
6279+
commonAlignment(A, Offset));
6280+
6281+
SDValue StoreChain =
6282+
DAG.getNode(ISD::TokenFactor, SL, MVT::Other, {StoreLo, StoreHi});
6283+
6284+
return DAG.getLoad(MVT::f128, SL, StoreChain, StackTemp, PtrInfo, A);
6285+
}
6286+
62526287
// Lower operations with invalid operand or result types (currently used
62536288
// only for 128-bit integer types).
62546289
void
@@ -6263,8 +6298,25 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
62636298
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
62646299
SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
62656300
DL, Tys, Ops, MVT::i128, MMO);
6266-
Results.push_back(lowerGR128ToI128(DAG, Res));
6267-
Results.push_back(Res.getValue(1));
6301+
6302+
EVT VT = N->getValueType(0);
6303+
6304+
if (VT == MVT::i128 || isTypeLegal(MVT::i128)) {
6305+
SDValue Lowered = lowerGR128ToI128(DAG, Res);
6306+
Results.push_back(DAG.getBitcast(VT, Lowered));
6307+
Results.push_back(Res.getValue(1));
6308+
} else {
6309+
// For the f128 case, after type legalization, we cannot produce a bitcast
6310+
// with an illegal type (i.e. i128), so introduce a stack store and reload
6311+
//
6312+
// FIXME: Really v2i64 should be legal, and should be used in place of
6313+
// unttyped. Then we could emit the bitcast which will potentially avoid
6314+
// the stack usage after combining.
6315+
SDValue Cast = expandBitCastI128ToF128(DAG, Res, Res.getValue(1), DL);
6316+
Results.push_back(Cast);
6317+
Results.push_back(Cast.getValue(1));
6318+
}
6319+
62686320
break;
62696321
}
62706322
case ISD::ATOMIC_STORE: {

llvm/test/CodeGen/SystemZ/atomic-load-08.ll

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,28 @@
1-
; Test long double atomic loads. These are emitted by the Clang FE as i128
2-
; loads with a bitcast, and this test case gets converted into that form as
3-
; well by the AtomicExpand pass.
1+
; Test long double atomic loads.
42
;
53
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefixes=CHECK,BASE %s
64
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck -check-prefixes=CHECK,Z13 %s
75

6+
; FIXME: Without vector support, v2i64 should be legal and we should
7+
; introduce a simple bitcast instead of the stack temporary store and
8+
; reload
89
define void @f1(ptr %ret, ptr %src) {
910
; CHECK-LABEL: f1:
1011
; CHECK: # %bb.0:
11-
; CHECK-NEXT: lpq %r0, 0(%r3)
12-
; CHECK-NEXT: stg %r1, 8(%r2)
13-
; CHECK-NEXT: stg %r0, 0(%r2)
14-
; CHECK-NEXT: br %r14
12+
; Z13-NEXT: lpq %r0, 0(%r3)
13+
; Z13-NEXT: stg %r1, 8(%r2)
14+
; Z13-NEXT: stg %r0, 0(%r2)
15+
; Z13-NEXT: br %r14
16+
17+
; BASE: aghi %r15, -176
18+
; BASE: lpq %r0, 0(%r3)
19+
; BASE-NEXT: stg %r1, 168(%r15)
20+
; BASE-NEXT: stg %r0, 160(%r15)
21+
; BASE-NEXT: ld %f0, 160(%r15)
22+
; BASE-NEXT: ld %f2, 168(%r15)
23+
; BASE-NEXT: std %f0, 0(%r2)
24+
; BASE-NEXT: std %f2, 8(%r2)
25+
; BASE-NEXT: aghi %r15, 176
1526
%val = load atomic fp128, ptr %src seq_cst, align 16
1627
store fp128 %val, ptr %ret, align 8
1728
ret void

0 commit comments

Comments
 (0)