@@ -435,13 +435,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
435
435
} else {
436
436
setOperationAction(ISD::FMA , MVT::f64, Legal);
437
437
setOperationAction(ISD::FMA , MVT::f32, Legal);
438
+ setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
439
+ setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
438
440
}
439
441
440
442
if (Subtarget.hasSPE())
441
443
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
442
444
443
- setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
444
-
445
445
// If we're enabling GP optimizations, use hardware square root
446
446
if (!Subtarget.hasFSQRT() &&
447
447
!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
@@ -9060,6 +9060,103 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
9060
9060
return FP;
9061
9061
}
9062
9062
9063
+ SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9064
+ SelectionDAG &DAG) const {
9065
+ SDLoc Dl(Op);
9066
+ MachineFunction &MF = DAG.getMachineFunction();
9067
+ EVT PtrVT = getPointerTy(MF.getDataLayout());
9068
+ SDValue Chain = Op.getOperand(0);
9069
+
9070
+ // If requested mode is constant, just use simpler mtfsb/mffscrni
9071
+ if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9072
+ uint64_t Mode = CVal->getZExtValue();
9073
+ assert(Mode < 4 && "Unsupported rounding mode!");
9074
+ unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9075
+ if (Subtarget.isISA3_0())
9076
+ return SDValue(
9077
+ DAG.getMachineNode(
9078
+ PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9079
+ {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9080
+ 1);
9081
+ SDNode *SetHi = DAG.getMachineNode(
9082
+ (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9083
+ {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9084
+ SDNode *SetLo = DAG.getMachineNode(
9085
+ (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9086
+ {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9087
+ return SDValue(SetLo, 0);
9088
+ }
9089
+
9090
+ // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9091
+ SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9092
+ SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9093
+ DAG.getConstant(3, Dl, MVT::i32));
9094
+ SDValue DstFlag = DAG.getNode(
9095
+ ISD::XOR, Dl, MVT::i32, SrcFlag,
9096
+ DAG.getNode(ISD::AND, Dl, MVT::i32,
9097
+ DAG.getNOT(Dl,
9098
+ DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9099
+ MVT::i32),
9100
+ One));
9101
+ // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9102
+ SDValue MFFS;
9103
+ if (!Subtarget.isISA3_0()) {
9104
+ MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9105
+ Chain = MFFS.getValue(1);
9106
+ }
9107
+ SDValue NewFPSCR;
9108
+ if (Subtarget.isPPC64()) {
9109
+ if (Subtarget.isISA3_0()) {
9110
+ NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9111
+ } else {
9112
+ // Set the last two bits (rounding mode) of bitcasted FPSCR.
9113
+ SDNode *InsertRN = DAG.getMachineNode(
9114
+ PPC::RLDIMI, Dl, MVT::i64,
9115
+ {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9116
+ DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9117
+ DAG.getTargetConstant(0, Dl, MVT::i32),
9118
+ DAG.getTargetConstant(62, Dl, MVT::i32)});
9119
+ NewFPSCR = SDValue(InsertRN, 0);
9120
+ }
9121
+ NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9122
+ } else {
9123
+ // In 32-bit mode, store f64, load and update the lower half.
9124
+ int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9125
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9126
+ SDValue Addr = Subtarget.isLittleEndian()
9127
+ ? StackSlot
9128
+ : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9129
+ DAG.getConstant(4, Dl, PtrVT));
9130
+ if (Subtarget.isISA3_0()) {
9131
+ Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9132
+ } else {
9133
+ Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9134
+ SDValue Tmp =
9135
+ DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9136
+ Chain = Tmp.getValue(1);
9137
+ Tmp = SDValue(DAG.getMachineNode(
9138
+ PPC::RLWIMI, Dl, MVT::i32,
9139
+ {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9140
+ DAG.getTargetConstant(30, Dl, MVT::i32),
9141
+ DAG.getTargetConstant(31, Dl, MVT::i32)}),
9142
+ 0);
9143
+ Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9144
+ }
9145
+ NewFPSCR =
9146
+ DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9147
+ Chain = NewFPSCR.getValue(1);
9148
+ }
9149
+ if (Subtarget.isISA3_0())
9150
+ return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9151
+ {NewFPSCR, Chain}),
9152
+ 1);
9153
+ SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9154
+ SDNode *MTFSF = DAG.getMachineNode(
9155
+ PPC::MTFSF, Dl, MVT::Other,
9156
+ {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9157
+ return SDValue(MTFSF, 0);
9158
+ }
9159
+
9063
9160
SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9064
9161
SelectionDAG &DAG) const {
9065
9162
SDLoc dl(Op);
@@ -11921,6 +12018,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
11921
12018
case ISD::UINT_TO_FP:
11922
12019
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11923
12020
case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12021
+ case ISD::SET_ROUNDING:
12022
+ return LowerSET_ROUNDING(Op, DAG);
11924
12023
11925
12024
// Lower 64-bit shifts.
11926
12025
case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
0 commit comments