Skip to content

Commit aa84337

Browse files
pestctrlBenson Chu
authored andcommitted
[ARM][Thumb] Save FPSCR + FPEXC for save-vfp attribute
Previously, the ARM frame code had 5 "phases" of saving registers. - GPRCS1 - GPRCS2 (splitFramePushPop) - DPRCS - GPRCS2 (splitFramePointerPush) - DPRCS2 There are 2 GPRCS2's because the frame code is assuming those two are mutually exclusive, and is therefore reusing the name for the zone on the stack. After some renaming and creating an extra space for saving the FP status registers, we now have: - GPRCS1 - GPRCS2 (splitFramePushPop) - FPStatusRegs (new) - DPRCS1 - GPRCS3 (splitFramePointerPush) - DPRCS2 FPSCR and FPEXC will be stored in FPStatusRegs, after GPRCS1 has been saved (and GPRCS2, if applicable). FPSCR is present on all targets with a VFP, but the FPEXC register is not present on Cortex-M devices, so different amounts of bytes are being pushed onto the stack depending on our target, which would affect alignment for subsequent saves. Thankfully, DPRCS1 will sum up all previous bytes that were saved, and will emit extra instructions to ensure that its alignment is correct. My assumption is that if DPRCS1 is able to correct its alignment to be correct, then all subsequent saves will also have correct alignment.
1 parent a935c45 commit aa84337

13 files changed

+801
-185
lines changed

clang/include/clang/Basic/AttrDocs.td

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2490,11 +2490,10 @@ def ARMInterruptSaveFPDocs : Documentation {
24902490
let Content = [{
24912491
Clang supports the GNU style ``__attribute__((interrupt_save_fp("TYPE")))``
24922492
on ARM targets. This attribute behaves the same way as the ARM interrupt
2493-
attribute, except the general purpose floating point registers are also saved.
2494-
If the FPEXC or FPSCR are needed, that state must be saved manually. Note, even
2495-
on M-class CPUs, where the floating point context can be automatically saved
2496-
depending on the FPCCR, the general purpose floating point registers will be
2497-
saved.
2493+
attribute, except the general purpose floating point registers are also saved,
2494+
along with FPEXC and FPSCR. Note, even on M-class CPUs, where the floating
2495+
point context can be automatically saved depending on the FPCCR, the general
2496+
purpose floating point registers will be saved.
24982497
}];
24992498
}
25002499

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// REQUIRES: arm-registered-target
2+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r5 -mfpu=vfpv3-d16 -marm -S -o - %s \
3+
// RUN: | FileCheck %s --check-prefix=CHECK-R
4+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r5 -mfpu=vfpv3-d16 -mthumb -S -o - %s \
5+
// RUN: | FileCheck %s --check-prefix=CHECK-R
6+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r4 -mfpu=vfpv3-d16 -marm -S -o - %s \
7+
// RUN: | FileCheck %s --check-prefix=CHECK-R
8+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r4 -mfpu=vfpv3-d16 -mthumb -S -o - %s \
9+
// RUN: | FileCheck %s --check-prefix=CHECK-R
10+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -S -o - %s \
11+
// RUN: | FileCheck %s --check-prefix=CHECK-M
12+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-m33 -mfpu=fpv5-sp-d16 -S -o - %s \
13+
// RUN: | FileCheck %s --check-prefix=CHECK-M
14+
15+
void bar();
16+
17+
__attribute__((interrupt_save_fp)) void test_generic_interrupt() {
18+
// CHECK-R: vmrs r4, fpscr
19+
// CHECK-R-NEXT: vmrs r5, fpexc
20+
// CHECK-R-NEXT: .save {fpscr, fpexc}
21+
// CHECK-R-NEXT: push {r4, r5}
22+
// .....
23+
// CHECK-R: pop {r4, r5}
24+
// CHECK-R-NEXT: vmsr fpscr, r4
25+
// CHECK-R-NEXT: vmsr fpexc, r5
26+
27+
// CHECK-M: vmrs r4, fpscr
28+
// CHECK-M-NEXT: .save {fpscr}
29+
// CHECK-M-NEXT: push {r4}
30+
// .....
31+
// CHECK-M: pop {r4}
32+
// CHECK-M-NEXT: vmsr fpscr, r4
33+
bar();
34+
}

llvm/include/llvm/IR/IntrinsicsARM.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def int_arm_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
311311
// VFP
312312

313313
def int_arm_get_fpscr : ClangBuiltin<"__builtin_arm_get_fpscr">,
314-
DefaultAttrsIntrinsic<[llvm_i32_ty], [], []>;
314+
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
315315
def int_arm_set_fpscr : ClangBuiltin<"__builtin_arm_set_fpscr">,
316316
DefaultAttrsIntrinsic<[], [llvm_i32_ty], []>;
317317
def int_arm_vcvtr : DefaultAttrsIntrinsic<[llvm_float_ty],

llvm/lib/Target/ARM/ARMAsmPrinter.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,6 +1205,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
12051205
SrcReg = ~0U;
12061206
DstReg = MI->getOperand(0).getReg();
12071207
break;
1208+
case ARM::VMRS:
1209+
SrcReg = ARM::FPSCR;
1210+
DstReg = MI->getOperand(0).getReg();
1211+
break;
1212+
case ARM::VMRS_FPEXC:
1213+
SrcReg = ARM::FPEXC;
1214+
DstReg = MI->getOperand(0).getReg();
1215+
break;
12081216
default:
12091217
SrcReg = MI->getOperand(1).getReg();
12101218
DstReg = MI->getOperand(0).getReg();
@@ -1371,6 +1379,13 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
13711379
// correct ".save" later.
13721380
AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
13731381
break;
1382+
case ARM::VMRS:
1383+
case ARM::VMRS_FPEXC:
1384+
// If a function spills FPSCR or FPEXC, we copy the values to low
1385+
// registers before pushing them. Record the copy so we can emit the
1386+
// correct ".save" later.
1387+
AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
1388+
break;
13741389
case ARM::tLDRpci: {
13751390
// Grab the constpool index and check, whether it corresponds to
13761391
// original or cloned constpool entry.

llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp

Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -80,44 +80,41 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
8080
? CSR_ATPCS_SplitPush_SwiftTail_SaveList
8181
: CSR_AAPCS_SwiftTail_SaveList);
8282
} else if (F.hasFnAttribute("interrupt")) {
83-
bool SaveFP = F.hasFnAttribute("save-fp");
84-
bool HasNEON = MF->getSubtarget<ARMSubtarget>().hasNEON();
83+
84+
// Don't bother saving the floating point registers if target is not hard
85+
// float. This will prevent the Thumb1FrameLowering (cortex-m0) from
86+
// crashing due to an llvm_unreachable being triggered when a D-class
87+
// register is in the calling convention.
88+
if (STI.isTargetHardFloat() && F.hasFnAttribute("save-fp")) {
89+
bool HasNEON = STI.hasNEON();
90+
91+
if (STI.isMClass()) {
92+
assert(!HasNEON && "NEON is only for Cortex-R/A");
93+
return PushPopSplit == ARMSubtarget::SplitR7
94+
? CSR_ATPCS_SplitPush_FP_SaveList
95+
: CSR_AAPCS_FP_SaveList;
96+
}
97+
if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
98+
return HasNEON ? CSR_FIQ_FP_NEON_SaveList : CSR_FIQ_FP_SaveList;
99+
}
100+
return HasNEON ? CSR_GenericInt_FP_NEON_SaveList
101+
: CSR_GenericInt_FP_SaveList;
102+
}
85103

86104
if (STI.isMClass()) {
87105
// M-class CPUs have hardware which saves the registers needed to allow a
88106
// function conforming to the AAPCS to function as a handler.
89-
// Additionally, M Class has hardware support for saving VFP registers,
90-
// but the option can be disabled
91-
if (SaveFP) {
92-
if (HasNEON) {
93-
return UseSplitPush ? CSR_AAPCS_SplitPush_FP_NEON_SaveList
94-
: CSR_AAPCS_FP_NEON_SaveList;
95-
} else {
96-
return UseSplitPush ? CSR_AAPCS_SplitPush_FP_SaveList
97-
: CSR_AAPCS_FP_SaveList;
98-
}
99-
} else {
100-
return PushPopSplit == ARMSubtarget::SplitR7
101-
? CSR_ATPCS_SplitPush_SaveList
102-
: CSR_AAPCS_SaveList;
103-
}
107+
return PushPopSplit == ARMSubtarget::SplitR7
108+
? CSR_ATPCS_SplitPush_SaveList
109+
: CSR_AAPCS_SaveList;
104110
} else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
105111
// Fast interrupt mode gives the handler a private copy of R8-R14, so less
106112
// need to be saved to restore user-mode state.
107-
if (SaveFP) {
108-
return HasNEON ? CSR_FIQ_FP_NEON_SaveList : CSR_FIQ_FP_SaveList;
109-
} else {
110-
return CSR_FIQ_SaveList;
111-
}
113+
return CSR_FIQ_SaveList;
112114
} else {
113115
// Generally only R13-R14 (i.e. SP, LR) are automatically preserved by
114116
// exception handling.
115-
if (SaveFP) {
116-
return HasNEON ? CSR_GenericInt_FP_NEON_SaveList
117-
: CSR_GenericInt_FP_SaveList;
118-
} else {
119-
return CSR_GenericInt_SaveList;
120-
}
117+
return CSR_GenericInt_SaveList;
121118
}
122119
}
123120

llvm/lib/Target/ARM/ARMCallingConv.td

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def CC_ARM_Win32_CFGuard_Check : CallingConv<[
267267
def CSR_NoRegs : CalleeSavedRegs<(add)>;
268268
def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>;
269269

270-
def CSR_FP_Interrupt_Regs : CalleeSavedRegs<(add (sequence "D%u", 7, 0))>;
270+
def CSR_FP_Interrupt_Regs : CalleeSavedRegs<(add FPSCR, FPEXC, (sequence "D%u", 15, 0))>;
271271
def CSR_FP_NEON_Interrupt_Regs : CalleeSavedRegs<(add CSR_FP_Interrupt_Regs,
272272
(sequence "D%u", 31, 16))>;
273273

@@ -276,20 +276,6 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
276276

277277
def CSR_AAPCS_FP : CalleeSavedRegs<(add CSR_AAPCS, CSR_FP_Interrupt_Regs)>;
278278

279-
def CSR_AAPCS_FP_NEON : CalleeSavedRegs<(add CSR_AAPCS_FP,
280-
CSR_FP_NEON_Interrupt_Regs)>;
281-
282-
// The Windows Control Flow Guard Check function preserves the same registers as
283-
// AAPCS, and also preserves all floating point registers.
284-
def CSR_Win_AAPCS_CFGuard_Check : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7,
285-
R6, R5, R4, (sequence "D%u", 15, 0))>;
286-
287-
// R8 is used to pass swifterror, remove it from CSR.
288-
def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>;
289-
290-
// R10 is used to pass swiftself, remove it from CSR.
291-
def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>;
292-
293279
// The order of callee-saved registers needs to match the order we actually push
294280
// them in FrameLowering, because this order is what's used by
295281
// PrologEpilogInserter to allocate frame index slots. So when R7 is the frame
@@ -302,11 +288,19 @@ def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
302288
(sequence "D%u", 15, 8),
303289
LR, R11)>;
304290

305-
def CSR_AAPCS_SplitPush_FP : CalleeSavedRegs<(add CSR_AAPCS_SplitPush,
291+
def CSR_ATPCS_SplitPush_FP : CalleeSavedRegs<(add CSR_ATPCS_SplitPush,
306292
CSR_FP_Interrupt_Regs)>;
307293

308-
def CSR_AAPCS_SplitPush_FP_NEON : CalleeSavedRegs<(add CSR_AAPCS_SplitPush_FP,
309-
CSR_FP_NEON_Interrupt_Regs)>;
294+
// The Windows Control Flow Guard Check function preserves the same registers as
295+
// AAPCS, and also preserves all floating point registers.
296+
def CSR_Win_AAPCS_CFGuard_Check : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7,
297+
R6, R5, R4, (sequence "D%u", 15, 0))>;
298+
299+
// R8 is used to pass swifterror, remove it from CSR.
300+
def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>;
301+
302+
// R10 is used to pass swiftself, remove it from CSR.
303+
def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>;
310304

311305

312306
// R8 is used to pass swifterror, remove it from CSR.

0 commit comments

Comments
 (0)