Skip to content

[PowerPC] Implement llvm.set.rounding intrinsic #67302

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Sep 10, 2024
80 changes: 80 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);

setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);

// If we're enabling GP optimizations, use hardware square root
if (!Subtarget.hasFSQRT() &&
Expand Down Expand Up @@ -8898,6 +8899,83 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
return FP;
}

SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
SelectionDAG &DAG) const {
SDLoc Dl(Op);
MachineFunction &MF = DAG.getMachineFunction();
EVT PtrVT = getPointerTy(MF.getDataLayout());
SDValue Chain = Op.getOperand(0);

// If requested mode is constant, just use simpler mtfsb.
if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use DAG.computeKnownBits() to handle more cases instead of just the constant inputs?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we want to make sure higher bits are all zeroes. KnownBits and constant don't make an difference?

uint64_t Mode = CVal->getZExtValue();
if (Mode >= 4)
llvm_unreachable("Unsupported rounding mode!");
unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
SDNode *SetHi = DAG.getMachineNode(
(InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
{DAG.getConstant(30, Dl, MVT::i32, true), Chain});
SDNode *SetLo = DAG.getMachineNode(
(InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
{DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
return SDValue(SetLo, 0);
}

// Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment does not match below logic. x should be (x & 3)?

And the LLVM mode 4(4 - to nearest, ties away from zero) is mapped to Power mode 1(1 - toward zero)? I think LLVM mode 4 should map to Power mode 0(0- Round to Nearest)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we are using a at-best-effort approach. The meaning looks implementation-defined:

The llvm.set.rounding intrinsic sets the current rounding mode. It is similar to C library function ‘fesetround’, however this intrinsic does not return any value and uses platform-independent representation of IEEE rounding modes.

SDValue One = DAG.getConstant(1, Dl, MVT::i32);
SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
DAG.getConstant(3, Dl, MVT::i32));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add an assert here too if compiler can infer that the high 29 bits of operand 1 is non-zero?

SDValue DstFlag = DAG.getNode(
ISD::XOR, Dl, MVT::i32, SrcFlag,
DAG.getNode(ISD::AND, Dl, MVT::i32,
DAG.getNOT(Dl,
DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
MVT::i32),
One));
SDValue MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
Chain = MFFS.getValue(1);
SDValue NewFPSCR;
if (isTypeLegal(MVT::i64)) {
// Set the last two bits (rounding mode) of bitcasted FPSCR.
NewFPSCR = DAG.getNode(
ISD::OR, Dl, MVT::i64,
DAG.getNode(ISD::AND, Dl, MVT::i64,
DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i64), MVT::i64)),
DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag));
NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
} else {
// In 32-bit mode, store f64, load and update the lower half.
int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
SDValue Addr;
if (Subtarget.isLittleEndian())
Addr = StackSlot;
else
Addr = DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
DAG.getConstant(4, Dl, PtrVT));
SDValue Tmp = DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
Chain = Tmp.getValue(1);

Tmp = DAG.getNode(
ISD::OR, Dl, MVT::i32,
DAG.getNode(ISD::AND, Dl, MVT::i32, Tmp,
DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i32), MVT::i32)),
DstFlag);

Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
NewFPSCR =
DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
Chain = NewFPSCR.getValue(1);
}
SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
SDNode *MTFSF = DAG.getMachineNode(
PPC::MTFSF, Dl, MVT::Other,
{DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
return SDValue(MTFSF, 0);
}

SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
Expand Down Expand Up @@ -11647,6 +11725,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
case ISD::SET_ROUNDING:
return LowerSET_ROUNDING(Op, DAG);

// Lower 64-bit shifts.
case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1286,6 +1286,7 @@ namespace llvm {
const SDLoc &dl) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
Expand Down
194 changes: 193 additions & 1 deletion llvm/test/CodeGen/PowerPC/frounds.ll
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,196 @@ return: ; preds = %entry
ret i32 %retval3
}

declare i32 @llvm.get.rounding() nounwind
define void @setrnd_tozero() {
; PPC32-LABEL: setrnd_tozero:
; PPC32: # %bb.0: # %entry
; PPC32-NEXT: mtfsb0 30
; PPC32-NEXT: mtfsb1 31
; PPC32-NEXT: blr
;
; PPC64-LABEL: setrnd_tozero:
; PPC64: # %bb.0: # %entry
; PPC64-NEXT: mtfsb0 30
; PPC64-NEXT: mtfsb1 31
; PPC64-NEXT: blr
;
; PPC64LE-LABEL: setrnd_tozero:
; PPC64LE: # %bb.0: # %entry
; PPC64LE-NEXT: mtfsb0 30
; PPC64LE-NEXT: mtfsb1 31
; PPC64LE-NEXT: blr
;
; DM-LABEL: setrnd_tozero:
; DM: # %bb.0: # %entry
; DM-NEXT: mtfsb0 30
; DM-NEXT: mtfsb1 31
; DM-NEXT: blr
entry:
call void @llvm.set.rounding(i32 0)
ret void
}

define void @setrnd_tonearest_tieeven() {
; PPC32-LABEL: setrnd_tonearest_tieeven:
; PPC32: # %bb.0: # %entry
; PPC32-NEXT: mtfsb0 30
; PPC32-NEXT: mtfsb0 31
; PPC32-NEXT: blr
;
; PPC64-LABEL: setrnd_tonearest_tieeven:
; PPC64: # %bb.0: # %entry
; PPC64-NEXT: mtfsb0 30
; PPC64-NEXT: mtfsb0 31
; PPC64-NEXT: blr
;
; PPC64LE-LABEL: setrnd_tonearest_tieeven:
; PPC64LE: # %bb.0: # %entry
; PPC64LE-NEXT: mtfsb0 30
; PPC64LE-NEXT: mtfsb0 31
; PPC64LE-NEXT: blr
;
; DM-LABEL: setrnd_tonearest_tieeven:
; DM: # %bb.0: # %entry
; DM-NEXT: mtfsb0 30
; DM-NEXT: mtfsb0 31
; DM-NEXT: blr
entry:
call void @llvm.set.rounding(i32 1)
ret void
}

define void @setrnd_toposinf() {
; PPC32-LABEL: setrnd_toposinf:
; PPC32: # %bb.0: # %entry
; PPC32-NEXT: mtfsb1 30
; PPC32-NEXT: mtfsb0 31
; PPC32-NEXT: blr
;
; PPC64-LABEL: setrnd_toposinf:
; PPC64: # %bb.0: # %entry
; PPC64-NEXT: mtfsb1 30
; PPC64-NEXT: mtfsb0 31
; PPC64-NEXT: blr
;
; PPC64LE-LABEL: setrnd_toposinf:
; PPC64LE: # %bb.0: # %entry
; PPC64LE-NEXT: mtfsb1 30
; PPC64LE-NEXT: mtfsb0 31
; PPC64LE-NEXT: blr
;
; DM-LABEL: setrnd_toposinf:
; DM: # %bb.0: # %entry
; DM-NEXT: mtfsb1 30
; DM-NEXT: mtfsb0 31
; DM-NEXT: blr
entry:
call void @llvm.set.rounding(i32 2)
ret void
}

define void @setrnd_toneginf() {
; PPC32-LABEL: setrnd_toneginf:
; PPC32: # %bb.0: # %entry
; PPC32-NEXT: mtfsb1 30
; PPC32-NEXT: mtfsb1 31
; PPC32-NEXT: blr
;
; PPC64-LABEL: setrnd_toneginf:
; PPC64: # %bb.0: # %entry
; PPC64-NEXT: mtfsb1 30
; PPC64-NEXT: mtfsb1 31
; PPC64-NEXT: blr
;
; PPC64LE-LABEL: setrnd_toneginf:
; PPC64LE: # %bb.0: # %entry
; PPC64LE-NEXT: mtfsb1 30
; PPC64LE-NEXT: mtfsb1 31
; PPC64LE-NEXT: blr
;
; DM-LABEL: setrnd_toneginf:
; DM: # %bb.0: # %entry
; DM-NEXT: mtfsb1 30
; DM-NEXT: mtfsb1 31
; DM-NEXT: blr
entry:
call void @llvm.set.rounding(i32 3)
ret void
}

define void @setrnd_var(i32 %x) {
; PPC32-LABEL: setrnd_var:
; PPC32: # %bb.0: # %entry
; PPC32-NEXT: stwu 1, -16(1)
; PPC32-NEXT: .cfi_def_cfa_offset 16
; PPC32-NEXT: mffs 0
; PPC32-NEXT: stfd 0, 8(1)
; PPC32-NEXT: clrlwi 4, 3, 30
; PPC32-NEXT: lwz 5, 12(1)
; PPC32-NEXT: rlwinm 3, 3, 31, 31, 31
; PPC32-NEXT: xor 3, 3, 4
; PPC32-NEXT: xori 3, 3, 1
; PPC32-NEXT: rlwinm 4, 5, 0, 0, 29
; PPC32-NEXT: rlwimi 4, 3, 0, 30, 31
; PPC32-NEXT: stw 4, 12(1)
; PPC32-NEXT: lfd 0, 8(1)
; PPC32-NEXT: mtfsf 255, 0
; PPC32-NEXT: addi 1, 1, 16
; PPC32-NEXT: blr
;
; PPC64-LABEL: setrnd_var:
; PPC64: # %bb.0: # %entry
; PPC64-NEXT: mffs 0
; PPC64-NEXT: stfd 0, -16(1)
; PPC64-NEXT: ld 5, -16(1)
; PPC64-NEXT: clrlwi 4, 3, 30
; PPC64-NEXT: rlwinm 3, 3, 31, 31, 31
; PPC64-NEXT: xor 3, 3, 4
; PPC64-NEXT: xori 3, 3, 1
; PPC64-NEXT: clrldi 3, 3, 32
; PPC64-NEXT: rldicr 4, 5, 0, 61
; PPC64-NEXT: or 3, 4, 3
; PPC64-NEXT: std 3, -8(1)
; PPC64-NEXT: lfd 0, -8(1)
; PPC64-NEXT: mtfsf 255, 0
; PPC64-NEXT: blr
;
; PPC64LE-LABEL: setrnd_var:
; PPC64LE: # %bb.0: # %entry
; PPC64LE-NEXT: mffs 0
; PPC64LE-NEXT: stfd 0, -16(1)
; PPC64LE-NEXT: clrlwi 4, 3, 30
; PPC64LE-NEXT: rlwinm 3, 3, 31, 31, 31
; PPC64LE-NEXT: xor 3, 3, 4
; PPC64LE-NEXT: ld 4, -16(1)
; PPC64LE-NEXT: xori 3, 3, 1
; PPC64LE-NEXT: clrldi 3, 3, 32
; PPC64LE-NEXT: rldicr 4, 4, 0, 61
; PPC64LE-NEXT: or 3, 4, 3
; PPC64LE-NEXT: std 3, -8(1)
; PPC64LE-NEXT: lfd 0, -8(1)
; PPC64LE-NEXT: mtfsf 255, 0
; PPC64LE-NEXT: blr
;
; DM-LABEL: setrnd_var:
; DM: # %bb.0: # %entry
; DM-NEXT: clrlwi 4, 3, 30
; DM-NEXT: rlwinm 3, 3, 31, 31, 31
; DM-NEXT: xor 3, 3, 4
; DM-NEXT: xori 3, 3, 1
; DM-NEXT: clrldi 3, 3, 32
; DM-NEXT: mffs 0
; DM-NEXT: mffprd 4, 0
; DM-NEXT: rldicr 4, 4, 0, 61
; DM-NEXT: or 3, 4, 3
; DM-NEXT: mtfprd 0, 3
; DM-NEXT: mtfsf 255, 0
; DM-NEXT: blr
entry:
call void @llvm.set.rounding(i32 %x)
ret void
}

declare i32 @llvm.get.rounding() #0
declare void @llvm.set.rounding(i32) #0

attributes #0 = { nounwind }