Skip to content

Commit 2f81788

Browse files
authored
[ARM][FPEnv] Lowering of fpmode intrinsics (#74054)
LLVM intrinsics `get_fpmode`, `set_fpmode` and `reset_fpmode` operate control modes, the bits of FP environment that affect FP operations. On ARM these bits are in FPSCR together with the status bits. The implementation of these intrinsics produces code close to that of functions `fegetmode` and `fesetmode` from GLIBC. Pull request: #74054
1 parent df3ddd7 commit 2f81788

File tree

4 files changed

+167
-0
lines changed

4 files changed

+167
-0
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,6 +1415,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
14151415
setOperationAction(ISD::GET_FPENV, MVT::i32, Legal);
14161416
setOperationAction(ISD::SET_FPENV, MVT::i32, Legal);
14171417
setOperationAction(ISD::RESET_FPENV, MVT::Other, Legal);
1418+
setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal);
1419+
setOperationAction(ISD::SET_FPMODE, MVT::i32, Custom);
1420+
setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
14181421
}
14191422

14201423
// We want to custom lower some of our intrinsics.
@@ -6447,6 +6450,57 @@ SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
64476450
return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
64486451
}
64496452

6453+
SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
6454+
SelectionDAG &DAG) const {
6455+
SDLoc DL(Op);
6456+
SDValue Chain = Op->getOperand(0);
6457+
SDValue Mode = Op->getOperand(1);
6458+
6459+
// Generate nodes to build:
6460+
// FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
6461+
SDValue Ops[] = {Chain,
6462+
DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6463+
SDValue FPSCR =
6464+
DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6465+
Chain = FPSCR.getValue(1);
6466+
FPSCR = FPSCR.getValue(0);
6467+
6468+
SDValue FPSCRMasked =
6469+
DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6470+
DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
6471+
SDValue InputMasked =
6472+
DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
6473+
DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
6474+
FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
6475+
6476+
SDValue Ops2[] = {
6477+
Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6478+
return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6479+
}
6480+
6481+
SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
6482+
SelectionDAG &DAG) const {
6483+
SDLoc DL(Op);
6484+
SDValue Chain = Op->getOperand(0);
6485+
6486+
// To get the default FP mode all control bits are cleared:
6487+
// FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
6488+
SDValue Ops[] = {Chain,
6489+
DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6490+
SDValue FPSCR =
6491+
DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6492+
Chain = FPSCR.getValue(1);
6493+
FPSCR = FPSCR.getValue(0);
6494+
6495+
SDValue FPSCRMasked = DAG.getNode(
6496+
ISD::AND, DL, MVT::i32, FPSCR,
6497+
DAG.getConstant(ARM::FPStatusBits | ARM::FPReservedBits, DL, MVT::i32));
6498+
SDValue Ops2[] = {Chain,
6499+
DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
6500+
FPSCRMasked};
6501+
return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6502+
}
6503+
64506504
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
64516505
const ARMSubtarget *ST) {
64526506
SDLoc dl(N);
@@ -10557,6 +10611,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1055710611
case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
1055810612
case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
1055910613
case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10614+
case ISD::SET_FPMODE:
10615+
return LowerSET_FPMODE(Op, DAG);
10616+
case ISD::RESET_FPMODE:
10617+
return LowerRESET_FPMODE(Op, DAG);
1056010618
case ISD::MUL: return LowerMUL(Op, DAG);
1056110619
case ISD::SDIV:
1056210620
if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,14 @@ class VectorType;
375375

376376
// Bit position of rounding mode bits in FPSCR.
377377
const unsigned RoundingBitsPos = 22;
378+
379+
// Bits of floating-point status. These are NZCV flags, QC bit and cumulative
380+
// FP exception bits.
381+
const unsigned FPStatusBits = 0xf800009f;
382+
383+
// Some bits in the FPSCR are not yet defined. They must be preserved when
384+
// modifying the contents.
385+
const unsigned FPReservedBits = 0x00006060;
378386
} // namespace ARM
379387

380388
/// Define some predicates that are used for node matching.
@@ -835,6 +843,8 @@ class VectorType;
835843
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
836844
SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
837845
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
846+
SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
847+
SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
838848
SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
839849
const ARMSubtarget *ST) const;
840850
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,

llvm/lib/Target/ARM/ARMInstrVFP.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2675,6 +2675,7 @@ def : Pat<(get_fpenv), (VMRS)>;
26752675
def : Pat<(set_fpenv GPRnopc:$Rt), (VMSR GPRnopc:$Rt)>;
26762676
def : Pat<(reset_fpenv), (VMSR (MOVi 0))>, Requires<[IsARM]>;
26772677
def : Pat<(reset_fpenv), (VMSR (tMOVi8 0))>, Requires<[IsThumb]>;
2678+
def : Pat<(get_fpmode), (VMRS)>;
26782679

26792680
//===----------------------------------------------------------------------===//
26802681
// Assembler aliases.

llvm/test/CodeGen/ARM/fpenv.ll

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,107 @@ entry:
142142
ret void
143143
}
144144

145+
define i32 @get_fpmode_01() #0 {
146+
; CHECK-LABEL: get_fpmode_01:
147+
; CHECK: @ %bb.0: @ %entry
148+
; CHECK-NEXT: .save {r11, lr}
149+
; CHECK-NEXT: push {r11, lr}
150+
; CHECK-NEXT: .pad #8
151+
; CHECK-NEXT: sub sp, sp, #8
152+
; CHECK-NEXT: add r0, sp, #4
153+
; CHECK-NEXT: bl fegetmode
154+
; CHECK-NEXT: ldr r0, [sp, #4]
155+
; CHECK-NEXT: add sp, sp, #8
156+
; CHECK-NEXT: pop {r11, lr}
157+
; CHECK-NEXT: mov pc, lr
158+
entry:
159+
%fpenv = call i32 @llvm.get.fpmode.i32()
160+
ret i32 %fpenv
161+
}
162+
163+
define i32 @get_fpmode_02() nounwind {
164+
; CHECK-LABEL: get_fpmode_02:
165+
; CHECK: @ %bb.0: @ %entry
166+
; CHECK-NEXT: vmrs r0, fpscr
167+
; CHECK-NEXT: mov pc, lr
168+
entry:
169+
%fpenv = call i32 @llvm.get.fpmode.i32()
170+
ret i32 %fpenv
171+
}
172+
173+
define void @set_fpmode_01(i32 %fpmode) #0 {
174+
; CHECK-LABEL: set_fpmode_01:
175+
; CHECK: @ %bb.0: @ %entry
176+
; CHECK-NEXT: .save {r11, lr}
177+
; CHECK-NEXT: push {r11, lr}
178+
; CHECK-NEXT: .pad #8
179+
; CHECK-NEXT: sub sp, sp, #8
180+
; CHECK-NEXT: str r0, [sp, #4]
181+
; CHECK-NEXT: add r0, sp, #4
182+
; CHECK-NEXT: bl fesetmode
183+
; CHECK-NEXT: add sp, sp, #8
184+
; CHECK-NEXT: pop {r11, lr}
185+
; CHECK-NEXT: mov pc, lr
186+
entry:
187+
call void @llvm.set.fpmode.i32(i32 %fpmode)
188+
ret void
189+
}
190+
191+
define void @set_fpmode_02(i32 %fpmode) nounwind {
192+
; CHECK-LABEL: set_fpmode_02:
193+
; CHECK: @ %bb.0: @ %entry
194+
; CHECK-NEXT: vmrs r1, fpscr
195+
; CHECK-NEXT: mvn r2, #159
196+
; CHECK-NEXT: sub r2, r2, #-134217728
197+
; CHECK-NEXT: and r0, r0, r2
198+
; CHECK-NEXT: mov r2, #159
199+
; CHECK-NEXT: orr r2, r2, #-134217728
200+
; CHECK-NEXT: and r1, r1, r2
201+
; CHECK-NEXT: orr r0, r1, r0
202+
; CHECK-NEXT: vmsr fpscr, r0
203+
; CHECK-NEXT: mov pc, lr
204+
entry:
205+
call void @llvm.set.fpmode.i32(i32 %fpmode)
206+
ret void
207+
}
208+
209+
define void @reset_fpmode_01() #0 {
210+
; CHECK-LABEL: reset_fpmode_01:
211+
; CHECK: @ %bb.0: @ %entry
212+
; CHECK-NEXT: .save {r11, lr}
213+
; CHECK-NEXT: push {r11, lr}
214+
; CHECK-NEXT: mvn r0, #0
215+
; CHECK-NEXT: bl fesetmode
216+
; CHECK-NEXT: pop {r11, lr}
217+
; CHECK-NEXT: mov pc, lr
218+
entry:
219+
call void @llvm.reset.fpmode()
220+
ret void
221+
}
222+
223+
define void @reset_fpmode_02() nounwind {
224+
; CHECK-LABEL: reset_fpmode_02:
225+
; CHECK: @ %bb.0: @ %entry
226+
; CHECK-NEXT: vmrs r0, fpscr
227+
; CHECK-NEXT: ldr r1, .LCPI16_0
228+
; CHECK-NEXT: and r0, r0, r1
229+
; CHECK-NEXT: vmsr fpscr, r0
230+
; CHECK-NEXT: mov pc, lr
231+
; CHECK-NEXT: .p2align 2
232+
; CHECK-NEXT: @ %bb.1:
233+
; CHECK-NEXT: .LCPI16_0:
234+
; CHECK-NEXT: .long 4160774399 @ 0xf80060ff
235+
entry:
236+
call void @llvm.reset.fpmode()
237+
ret void
238+
}
239+
145240
attributes #0 = { nounwind "use-soft-float"="true" }
146241

147242
declare void @llvm.set.rounding(i32)
148243
declare i32 @llvm.get.fpenv.i32()
149244
declare void @llvm.set.fpenv.i32(i32 %fpenv)
150245
declare void @llvm.reset.fpenv()
246+
declare i32 @llvm.get.fpmode.i32()
247+
declare void @llvm.set.fpmode.i32(i32 %fpmode)
248+
declare void @llvm.reset.fpmode()

0 commit comments

Comments
 (0)