Skip to content

Commit 7c9efd5

Browse files
committed
[ARM][Thumb] Save FPSCR + FPEXC for save-vfp attribute
FPSCR and FPEXC will be stored in FPStatusRegs, after GPRCS2 has been saved. - GPRCS1 - GPRCS2 - FPStatusRegs (new) - DPRCS - GPRCS3 - DPRCS2 FPSCR is present on all targets with a VFP, but the FPEXC register is not present on Cortex-M devices, so different amounts of bytes are being pushed onto the stack depending on our target, which would affect alignment for subsequent saves. DPRCS1 will sum up all previous bytes that were saved, and will emit extra instructions to ensure that its alignment is correct. My assumption is that if DPRCS1 is able to correct its alignment to be correct, then all subsequent saves will also have correct alignment.
1 parent 6186aad commit 7c9efd5

13 files changed

+794
-170
lines changed

clang/include/clang/Basic/AttrDocs.td

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2668,11 +2668,10 @@ def ARMInterruptSaveFPDocs : Documentation {
26682668
let Content = [{
26692669
Clang supports the GNU style ``__attribute__((interrupt_save_fp("TYPE")))``
26702670
on ARM targets. This attribute behaves the same way as the ARM interrupt
2671-
attribute, except the general purpose floating point registers are also saved.
2672-
If the FPEXC or FPSCR are needed, that state must be saved manually. Note, even
2673-
on M-class CPUs, where the floating point context can be automatically saved
2674-
depending on the FPCCR, the general purpose floating point registers will be
2675-
saved.
2671+
attribute, except the general purpose floating point registers are also saved,
2672+
along with FPEXC and FPSCR. Note, even on M-class CPUs, where the floating
2673+
point context can be automatically saved depending on the FPCCR, the general
2674+
purpose floating point registers will be saved.
26762675
}];
26772676
}
26782677

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// REQUIRES: arm-registered-target
2+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r5 -mfpu=vfpv3-d16 -marm -S -o - %s \
3+
// RUN: | FileCheck %s --check-prefix=CHECK-R
4+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r5 -mfpu=vfpv3-d16 -mthumb -S -o - %s \
5+
// RUN: | FileCheck %s --check-prefix=CHECK-R
6+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r4 -mfpu=vfpv3-d16 -marm -S -o - %s \
7+
// RUN: | FileCheck %s --check-prefix=CHECK-R
8+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r4 -mfpu=vfpv3-d16 -mthumb -S -o - %s \
9+
// RUN: | FileCheck %s --check-prefix=CHECK-R
10+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -S -o - %s \
11+
// RUN: | FileCheck %s --check-prefix=CHECK-M
12+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-m33 -mfpu=fpv5-sp-d16 -S -o - %s \
13+
// RUN: | FileCheck %s --check-prefix=CHECK-M
14+
15+
void bar();
16+
17+
__attribute__((interrupt_save_fp)) void test_generic_interrupt() {
18+
// CHECK-R: vmrs r4, fpscr
19+
// CHECK-R-NEXT: vmrs r5, fpexc
20+
// CHECK-R-NEXT: .save {fpscr, fpexc}
21+
// CHECK-R-NEXT: push {r4, r5}
22+
// .....
23+
// CHECK-R: pop {r4, r5}
24+
// CHECK-R-NEXT: vmsr fpscr, r4
25+
// CHECK-R-NEXT: vmsr fpexc, r5
26+
27+
// CHECK-M: vmrs r4, fpscr
28+
// CHECK-M-NEXT: .save {fpscr}
29+
// CHECK-M-NEXT: push {r4}
30+
// .....
31+
// CHECK-M: pop {r4}
32+
// CHECK-M-NEXT: vmsr fpscr, r4
33+
bar();
34+
}

llvm/include/llvm/IR/IntrinsicsARM.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def int_arm_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
311311
// VFP
312312

313313
def int_arm_get_fpscr : ClangBuiltin<"__builtin_arm_get_fpscr">,
314-
DefaultAttrsIntrinsic<[llvm_i32_ty], [], []>;
314+
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
315315
def int_arm_set_fpscr : ClangBuiltin<"__builtin_arm_set_fpscr">,
316316
DefaultAttrsIntrinsic<[], [llvm_i32_ty], []>;
317317
def int_arm_vcvtr : DefaultAttrsIntrinsic<[llvm_float_ty],

llvm/lib/Target/ARM/ARMAsmPrinter.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,6 +1207,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
12071207
SrcReg = ~0U;
12081208
DstReg = MI->getOperand(0).getReg();
12091209
break;
1210+
case ARM::VMRS:
1211+
SrcReg = ARM::FPSCR;
1212+
DstReg = MI->getOperand(0).getReg();
1213+
break;
1214+
case ARM::VMRS_FPEXC:
1215+
SrcReg = ARM::FPEXC;
1216+
DstReg = MI->getOperand(0).getReg();
1217+
break;
12101218
default:
12111219
SrcReg = MI->getOperand(1).getReg();
12121220
DstReg = MI->getOperand(0).getReg();
@@ -1373,6 +1381,13 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
13731381
// correct ".save" later.
13741382
AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
13751383
break;
1384+
case ARM::VMRS:
1385+
case ARM::VMRS_FPEXC:
1386+
// If a function spills FPSCR or FPEXC, we copy the values to low
1387+
// registers before pushing them. Record the copy so we can emit the
1388+
// correct ".save" later.
1389+
AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
1390+
break;
13761391
case ARM::tLDRpci: {
13771392
// Grab the constpool index and check, whether it corresponds to
13781393
// original or cloned constpool entry.

llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,25 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
8080
? CSR_ATPCS_SplitPush_SwiftTail_SaveList
8181
: CSR_AAPCS_SwiftTail_SaveList);
8282
} else if (F.hasFnAttribute("interrupt")) {
83-
bool SaveFP = F.hasFnAttribute("save-fp");
84-
bool HasNEON = MF->getSubtarget<ARMSubtarget>().hasNEON();
83+
84+
// Don't bother saving the floating point registers if target is not hard
85+
// float. This will prevent the Thumb1FrameLowering (cortex-m0) from
86+
// crashing due to an llvm_unreachable being triggered when a D-class
87+
// register is in the calling convention.
88+
if (STI.isTargetHardFloat() && F.hasFnAttribute("save-fp")) {
89+
bool HasNEON = STI.hasNEON();
90+
91+
if (STI.isMClass()) {
92+
assert(!HasNEON && "NEON is only for Cortex-R/A");
93+
return UseSplitPush ? CSR_ATPCS_SplitPush_FP_SaveList
94+
: CSR_AAPCS_FP_SaveList;
95+
}
96+
if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
97+
return HasNEON ? CSR_FIQ_FP_NEON_SaveList : CSR_FIQ_FP_SaveList;
98+
}
99+
return HasNEON ? CSR_GenericInt_FP_NEON_SaveList
100+
: CSR_GenericInt_FP_SaveList;
101+
}
85102

86103
if (STI.isMClass()) {
87104
// M-class CPUs have hardware which saves the registers needed to allow a
@@ -101,23 +118,16 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
101118
? CSR_ATPCS_SplitPush_SaveList
102119
: CSR_AAPCS_SaveList;
103120
}
104-
} else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
121+
}
122+
123+
if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
105124
// Fast interrupt mode gives the handler a private copy of R8-R14, so less
106125
// need to be saved to restore user-mode state.
107-
if (SaveFP) {
108-
return HasNEON ? CSR_FIQ_FP_NEON_SaveList : CSR_FIQ_FP_SaveList;
109-
} else {
110-
return CSR_FIQ_SaveList;
111-
}
126+
return CSR_FIQ_SaveList;
112127
} else {
113128
// Generally only R13-R14 (i.e. SP, LR) are automatically preserved by
114129
// exception handling.
115-
if (SaveFP) {
116-
return HasNEON ? CSR_GenericInt_FP_NEON_SaveList
117-
: CSR_GenericInt_FP_SaveList;
118-
} else {
119-
return CSR_GenericInt_SaveList;
120-
}
130+
return CSR_GenericInt_SaveList;
121131
}
122132
}
123133

llvm/lib/Target/ARM/ARMCallingConv.td

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ def CC_ARM_Win32_CFGuard_Check : CallingConv<[
268268
def CSR_NoRegs : CalleeSavedRegs<(add)>;
269269
def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>;
270270

271-
def CSR_FP_Interrupt_Regs : CalleeSavedRegs<(add (sequence "D%u", 7, 0))>;
271+
def CSR_FP_Interrupt_Regs : CalleeSavedRegs<(add FPSCR, FPEXC, (sequence "D%u", 15, 0))>;
272272
def CSR_FP_NEON_Interrupt_Regs : CalleeSavedRegs<(add CSR_FP_Interrupt_Regs,
273273
(sequence "D%u", 31, 16))>;
274274

@@ -277,20 +277,6 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
277277

278278
def CSR_AAPCS_FP : CalleeSavedRegs<(add CSR_AAPCS, CSR_FP_Interrupt_Regs)>;
279279

280-
def CSR_AAPCS_FP_NEON : CalleeSavedRegs<(add CSR_AAPCS_FP,
281-
CSR_FP_NEON_Interrupt_Regs)>;
282-
283-
// The Windows Control Flow Guard Check function preserves the same registers as
284-
// AAPCS, and also preserves all floating point registers.
285-
def CSR_Win_AAPCS_CFGuard_Check : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7,
286-
R6, R5, R4, (sequence "D%u", 15, 0))>;
287-
288-
// R8 is used to pass swifterror, remove it from CSR.
289-
def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>;
290-
291-
// R10 is used to pass swiftself, remove it from CSR.
292-
def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>;
293-
294280
// The order of callee-saved registers needs to match the order we actually push
295281
// them in FrameLowering, because this order is what's used by
296282
// PrologEpilogInserter to allocate frame index slots. So when R7 is the frame
@@ -303,11 +289,19 @@ def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
303289
(sequence "D%u", 15, 8),
304290
LR, R11)>;
305291

306-
def CSR_AAPCS_SplitPush_FP : CalleeSavedRegs<(add CSR_AAPCS_SplitPush,
292+
def CSR_ATPCS_SplitPush_FP : CalleeSavedRegs<(add CSR_ATPCS_SplitPush,
307293
CSR_FP_Interrupt_Regs)>;
308294

309-
def CSR_AAPCS_SplitPush_FP_NEON : CalleeSavedRegs<(add CSR_AAPCS_SplitPush_FP,
310-
CSR_FP_NEON_Interrupt_Regs)>;
295+
// The Windows Control Flow Guard Check function preserves the same registers as
296+
// AAPCS, and also preserves all floating point registers.
297+
def CSR_Win_AAPCS_CFGuard_Check : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7,
298+
R6, R5, R4, (sequence "D%u", 15, 0))>;
299+
300+
// R8 is used to pass swifterror, remove it from CSR.
301+
def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>;
302+
303+
// R10 is used to pass swiftself, remove it from CSR.
304+
def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>;
311305

312306

313307
// R8 is used to pass swifterror, remove it from CSR.

0 commit comments

Comments
 (0)