Skip to content

Commit 9e30c96

Browse files
authored
[AArch64] Lowering of fpmode intrinsics in DAG (#80611)
LLVM intrinsics `get_fpmode`, `set_fpmode` and `reset_fpmode` operate control modes, the bits of FP environment that affect FP operations. On AArch64 these bits are in FPCR. The lowering implemented to produce code close to that of GLIBC.
1 parent b2c9f7d commit 9e30c96

File tree

3 files changed

+93
-24
lines changed

3 files changed

+93
-24
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
842842

843843
setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
844844
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
845+
setOperationAction(ISD::GET_FPMODE, MVT::i32, Custom);
846+
setOperationAction(ISD::SET_FPMODE, MVT::i32, Custom);
847+
setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
845848

846849
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
847850
if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
@@ -4870,6 +4873,65 @@ SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
48704873
return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
48714874
}
48724875

4876+
SDValue AArch64TargetLowering::LowerGET_FPMODE(SDValue Op,
4877+
SelectionDAG &DAG) const {
4878+
SDLoc DL(Op);
4879+
SDValue Chain = Op->getOperand(0);
4880+
4881+
// Get current value of FPCR.
4882+
SDValue Ops[] = {
4883+
Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
4884+
SDValue FPCR =
4885+
DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
4886+
Chain = FPCR.getValue(1);
4887+
FPCR = FPCR.getValue(0);
4888+
4889+
// Truncate FPCR to 32 bits.
4890+
SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPCR);
4891+
4892+
return DAG.getMergeValues({Result, Chain}, DL);
4893+
}
4894+
4895+
SDValue AArch64TargetLowering::LowerSET_FPMODE(SDValue Op,
4896+
SelectionDAG &DAG) const {
4897+
SDLoc DL(Op);
4898+
SDValue Chain = Op->getOperand(0);
4899+
SDValue Mode = Op->getOperand(1);
4900+
4901+
// Extend the specified value to 64 bits.
4902+
SDValue FPCR = DAG.getZExtOrTrunc(Mode, DL, MVT::i64);
4903+
4904+
// Set new value of FPCR.
4905+
SDValue Ops2[] = {
4906+
Chain, DAG.getConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64), FPCR};
4907+
return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
4908+
}
4909+
4910+
SDValue AArch64TargetLowering::LowerRESET_FPMODE(SDValue Op,
4911+
SelectionDAG &DAG) const {
4912+
SDLoc DL(Op);
4913+
SDValue Chain = Op->getOperand(0);
4914+
4915+
// Get current value of FPCR.
4916+
SDValue Ops[] = {
4917+
Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
4918+
SDValue FPCR =
4919+
DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
4920+
Chain = FPCR.getValue(1);
4921+
FPCR = FPCR.getValue(0);
4922+
4923+
// Clear bits that are not reserved.
4924+
SDValue FPSCRMasked = DAG.getNode(
4925+
ISD::AND, DL, MVT::i64, FPCR,
4926+
DAG.getConstant(AArch64::ReservedFPControlBits, DL, MVT::i64));
4927+
4928+
// Set new value of FPCR.
4929+
SDValue Ops2[] = {Chain,
4930+
DAG.getConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
4931+
FPSCRMasked};
4932+
return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
4933+
}
4934+
48734935
static unsigned selectUmullSmull(SDValue &N0, SDValue &N1, SelectionDAG &DAG,
48744936
SDLoc DL, bool &IsMLA) {
48754937
bool IsN0SExt = isSignExtended(N0, DAG);
@@ -6484,6 +6546,12 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
64846546
return LowerGET_ROUNDING(Op, DAG);
64856547
case ISD::SET_ROUNDING:
64866548
return LowerSET_ROUNDING(Op, DAG);
6549+
case ISD::GET_FPMODE:
6550+
return LowerGET_FPMODE(Op, DAG);
6551+
case ISD::SET_FPMODE:
6552+
return LowerSET_FPMODE(Op, DAG);
6553+
case ISD::RESET_FPMODE:
6554+
return LowerRESET_FPMODE(Op, DAG);
64876555
case ISD::MUL:
64886556
return LowerMUL(Op, DAG);
64896557
case ISD::MULHS:

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,9 @@ enum Rounding {
523523
// Bit position of rounding mode bits in FPCR.
524524
const unsigned RoundingBitsPos = 22;
525525

526+
// Reserved bits should be preserved when modifying FPCR.
527+
const uint64_t ReservedFPControlBits = 0xfffffffff80040f8;
528+
526529
// Registers used to pass function arguments.
527530
ArrayRef<MCPhysReg> getGPRArgRegs();
528531
ArrayRef<MCPhysReg> getFPRArgRegs();
@@ -1128,6 +1131,9 @@ class AArch64TargetLowering : public TargetLowering {
11281131
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
11291132
SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
11301133
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1134+
SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1135+
SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1136+
SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
11311137
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
11321138
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
11331139
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/AArch64/fpmode.ll

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,14 @@ declare i32 @llvm.get.fpmode.i32()
66
declare void @llvm.set.fpmode.i32(i32 %fpmode)
77
declare void @llvm.reset.fpmode()
88

9-
define i32 @func_get_fpmode_soft() #0 {
10-
; DAG-LABEL: func_get_fpmode_soft:
9+
define i32 @func_get_fpmode() #0 {
10+
; DAG-LABEL: func_get_fpmode:
1111
; DAG: // %bb.0: // %entry
12-
; DAG-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
13-
; DAG-NEXT: add x0, sp, #12
14-
; DAG-NEXT: bl fegetmode
15-
; DAG-NEXT: ldr w0, [sp, #12]
16-
; DAG-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
12+
; DAG-NEXT: mrs x0, FPCR
13+
; DAG-NEXT: // kill: def $w0 killed $w0 killed $x0
1714
; DAG-NEXT: ret
1815
;
19-
; GIS-LABEL: func_get_fpmode_soft:
16+
; GIS-LABEL: func_get_fpmode:
2017
; GIS: // %bb.0: // %entry
2118
; GIS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
2219
; GIS-NEXT: add x0, sp, #12
@@ -29,17 +26,14 @@ entry:
2926
ret i32 %fpmode
3027
}
3128

32-
define void @func_set_fpmode_soft(i32 %fpmode) #0 {
33-
; DAG-LABEL: func_set_fpmode_soft:
29+
define void @func_set_fpmode(i32 %fpmode) #0 {
30+
; DAG-LABEL: func_set_fpmode:
3431
; DAG: // %bb.0: // %entry
35-
; DAG-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
36-
; DAG-NEXT: str w0, [sp, #12]
37-
; DAG-NEXT: add x0, sp, #12
38-
; DAG-NEXT: bl fesetmode
39-
; DAG-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
32+
; DAG-NEXT: mov w8, w0
33+
; DAG-NEXT: msr FPCR, x8
4034
; DAG-NEXT: ret
4135
;
42-
; GIS-LABEL: func_set_fpmode_soft:
36+
; GIS-LABEL: func_set_fpmode:
4337
; GIS: // %bb.0: // %entry
4438
; GIS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
4539
; GIS-NEXT: str w0, [sp, #12]
@@ -52,16 +46,17 @@ entry:
5246
ret void
5347
}
5448

55-
define void @func_reset_fpmode_soft() #0 {
56-
; DAG-LABEL: func_reset_fpmode_soft:
49+
define void @func_reset_fpmode() #0 {
50+
; DAG-LABEL: func_reset_fpmode:
5751
; DAG: // %bb.0: // %entry
58-
; DAG-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
59-
; DAG-NEXT: mov x0, #-1 // =0xffffffffffffffff
60-
; DAG-NEXT: bl fesetmode
61-
; DAG-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
52+
; DAG-NEXT: mov x9, #-48904 // =0xffffffffffff40f8
53+
; DAG-NEXT: mrs x8, FPCR
54+
; DAG-NEXT: movk x9, #63488, lsl #16
55+
; DAG-NEXT: and x8, x8, x9
56+
; DAG-NEXT: msr FPCR, x8
6257
; DAG-NEXT: ret
6358
;
64-
; GIS-LABEL: func_reset_fpmode_soft:
59+
; GIS-LABEL: func_reset_fpmode:
6560
; GIS: // %bb.0: // %entry
6661
; GIS-NEXT: mov x0, #-1 // =0xffffffffffffffff
6762
; GIS-NEXT: b fesetmode
@@ -70,4 +65,4 @@ entry:
7065
ret void
7166
}
7267

73-
attributes #0 = { nounwind "use-soft-float"="true" }
68+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)