Skip to content

Commit 06c3311

Browse files
authored
[PowerPC] Implement llvm.set.rounding intrinsic (llvm#67302)
1 parent ed0da00 commit 06c3311

File tree

3 files changed

+385
-5
lines changed

3 files changed

+385
-5
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 101 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,13 +435,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
435435
} else {
436436
setOperationAction(ISD::FMA , MVT::f64, Legal);
437437
setOperationAction(ISD::FMA , MVT::f32, Legal);
438+
setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
439+
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
438440
}
439441

440442
if (Subtarget.hasSPE())
441443
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
442444

443-
setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
444-
445445
// If we're enabling GP optimizations, use hardware square root
446446
if (!Subtarget.hasFSQRT() &&
447447
!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
@@ -9060,6 +9060,103 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
90609060
return FP;
90619061
}
90629062

9063+
SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9064+
SelectionDAG &DAG) const {
9065+
SDLoc Dl(Op);
9066+
MachineFunction &MF = DAG.getMachineFunction();
9067+
EVT PtrVT = getPointerTy(MF.getDataLayout());
9068+
SDValue Chain = Op.getOperand(0);
9069+
9070+
// If requested mode is constant, just use simpler mtfsb/mffscrni
9071+
if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9072+
uint64_t Mode = CVal->getZExtValue();
9073+
assert(Mode < 4 && "Unsupported rounding mode!");
9074+
unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9075+
if (Subtarget.isISA3_0())
9076+
return SDValue(
9077+
DAG.getMachineNode(
9078+
PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9079+
{DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9080+
1);
9081+
SDNode *SetHi = DAG.getMachineNode(
9082+
(InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9083+
{DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9084+
SDNode *SetLo = DAG.getMachineNode(
9085+
(InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9086+
{DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9087+
return SDValue(SetLo, 0);
9088+
}
9089+
9090+
// Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9091+
SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9092+
SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9093+
DAG.getConstant(3, Dl, MVT::i32));
9094+
SDValue DstFlag = DAG.getNode(
9095+
ISD::XOR, Dl, MVT::i32, SrcFlag,
9096+
DAG.getNode(ISD::AND, Dl, MVT::i32,
9097+
DAG.getNOT(Dl,
9098+
DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9099+
MVT::i32),
9100+
One));
9101+
// For Power9, there's faster mffscrn, and we don't need to read FPSCR
9102+
SDValue MFFS;
9103+
if (!Subtarget.isISA3_0()) {
9104+
MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9105+
Chain = MFFS.getValue(1);
9106+
}
9107+
SDValue NewFPSCR;
9108+
if (Subtarget.isPPC64()) {
9109+
if (Subtarget.isISA3_0()) {
9110+
NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9111+
} else {
9112+
// Set the last two bits (rounding mode) of bitcasted FPSCR.
9113+
SDNode *InsertRN = DAG.getMachineNode(
9114+
PPC::RLDIMI, Dl, MVT::i64,
9115+
{DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9116+
DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9117+
DAG.getTargetConstant(0, Dl, MVT::i32),
9118+
DAG.getTargetConstant(62, Dl, MVT::i32)});
9119+
NewFPSCR = SDValue(InsertRN, 0);
9120+
}
9121+
NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9122+
} else {
9123+
// In 32-bit mode, store f64, load and update the lower half.
9124+
int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9125+
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9126+
SDValue Addr = Subtarget.isLittleEndian()
9127+
? StackSlot
9128+
: DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9129+
DAG.getConstant(4, Dl, PtrVT));
9130+
if (Subtarget.isISA3_0()) {
9131+
Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9132+
} else {
9133+
Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9134+
SDValue Tmp =
9135+
DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9136+
Chain = Tmp.getValue(1);
9137+
Tmp = SDValue(DAG.getMachineNode(
9138+
PPC::RLWIMI, Dl, MVT::i32,
9139+
{Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9140+
DAG.getTargetConstant(30, Dl, MVT::i32),
9141+
DAG.getTargetConstant(31, Dl, MVT::i32)}),
9142+
0);
9143+
Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9144+
}
9145+
NewFPSCR =
9146+
DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9147+
Chain = NewFPSCR.getValue(1);
9148+
}
9149+
if (Subtarget.isISA3_0())
9150+
return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9151+
{NewFPSCR, Chain}),
9152+
1);
9153+
SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9154+
SDNode *MTFSF = DAG.getMachineNode(
9155+
PPC::MTFSF, Dl, MVT::Other,
9156+
{DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9157+
return SDValue(MTFSF, 0);
9158+
}
9159+
90639160
SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
90649161
SelectionDAG &DAG) const {
90659162
SDLoc dl(Op);
@@ -11921,6 +12018,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1192112018
case ISD::UINT_TO_FP:
1192212019
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
1192312020
case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12021+
case ISD::SET_ROUNDING:
12022+
return LowerSET_ROUNDING(Op, DAG);
1192412023

1192512024
// Lower 64-bit shifts.
1192612025
case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1296,6 +1296,7 @@ namespace llvm {
12961296
const SDLoc &dl) const;
12971297
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
12981298
SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1299+
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
12991300
SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
13001301
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
13011302
SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;

0 commit comments

Comments
 (0)