Skip to content

Commit c3aad46

Browse files
pestctrlBenson Chu
authored andcommitted
[ARM][Thumb] Save FPSCR + FPEXC for save-vfp attribute
Previously, the ARM frame code had 5 "phases" of saving registers. - GPRCS1 - GPRCS2 (splitFramePushPop) - DPRCS - GPRCS2 (splitFramePointerPush) - DPRCS2 There are 2 GPRCS2's because the frame code is assuming those two are mutually exclusive, and is therefore reusing the name for the zone on the stack. After some renaming and creating an extra space for saving the FP status registers, we now have: - GPRCS1 - GPRCS2 (splitFramePushPop) - FPStatusRegs (new) - DPRCS1 - GPRCS3 (splitFramePointerPush) - DPRCS2 FPSCR and FPEXC will be stored in FPStatusRegs, after GPRCS1 has been saved (and GPRCS2, if applicable). FPSCR is present on all targets with a VFP, but the FPEXC register is not present on Cortex-M devices, so different amounts of bytes are being pushed onto the stack depending on our target, which would affect alignment for subsequent saves. Thankfully, DPRCS1 will sum up all previous bytes that were saved, and will emit extra instructions to ensure that its alignment is correct. My assumption is that if DPRCS1 is able to correct its alignment to be correct, then all subsequent saves will also have correct alignment.
1 parent b9efa10 commit c3aad46

13 files changed

+883
-283
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// REQUIRES: arm-registered-target
2+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r5 -mfpu=vfpv3-d16 -marm -S -o - %s \
3+
// RUN: | FileCheck %s --check-prefix=CHECK-R
4+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r5 -mfpu=vfpv3-d16 -mthumb -S -o - %s \
5+
// RUN: | FileCheck %s --check-prefix=CHECK-R
6+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r4 -mfpu=vfpv3-d16 -marm -S -o - %s \
7+
// RUN: | FileCheck %s --check-prefix=CHECK-R
8+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r4 -mfpu=vfpv3-d16 -mthumb -S -o - %s \
9+
// RUN: | FileCheck %s --check-prefix=CHECK-R
10+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -S -o - %s \
11+
// RUN: | FileCheck %s --check-prefix=CHECK-M
12+
// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-m33 -mfpu=fpv5-sp-d16 -S -o - %s \
13+
// RUN: | FileCheck %s --check-prefix=CHECK-M
14+
15+
void bar();
16+
17+
__attribute__((interrupt_save_fp)) void test_generic_interrupt() {
18+
// CHECK-R: vmrs r4, fpscr
19+
// CHECK-R-NEXT: vmrs r5, fpexc
20+
// CHECK-R-NEXT: .save {fpscr, fpexc}
21+
// CHECK-R-NEXT: push {r4, r5}
22+
// .....
23+
// CHECK-R: pop {r4, r5}
24+
// CHECK-R-NEXT: vmsr fpscr, r4
25+
// CHECK-R-NEXT: vmsr fpexc, r5
26+
27+
// CHECK-M: vmrs r4, fpscr
28+
// CHECK-M-NEXT: .save {fpscr}
29+
// CHECK-M-NEXT: push {r4}
30+
// .....
31+
// CHECK-M: pop {r4}
32+
// CHECK-M-NEXT: vmsr fpscr, r4
33+
bar();
34+
}

llvm/include/llvm/IR/IntrinsicsARM.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def int_arm_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
311311
// VFP
312312

313313
def int_arm_get_fpscr : ClangBuiltin<"__builtin_arm_get_fpscr">,
314-
DefaultAttrsIntrinsic<[llvm_i32_ty], [], []>;
314+
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
315315
def int_arm_set_fpscr : ClangBuiltin<"__builtin_arm_set_fpscr">,
316316
DefaultAttrsIntrinsic<[], [llvm_i32_ty], []>;
317317
def int_arm_vcvtr : DefaultAttrsIntrinsic<[llvm_float_ty],

llvm/lib/Target/ARM/ARMAsmPrinter.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,6 +1207,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
12071207
SrcReg = ~0U;
12081208
DstReg = MI->getOperand(0).getReg();
12091209
break;
1210+
case ARM::VMRS:
1211+
SrcReg = ARM::FPSCR;
1212+
DstReg = MI->getOperand(0).getReg();
1213+
break;
1214+
case ARM::VMRS_FPEXC:
1215+
SrcReg = ARM::FPEXC;
1216+
DstReg = MI->getOperand(0).getReg();
1217+
break;
12101218
default:
12111219
SrcReg = MI->getOperand(1).getReg();
12121220
DstReg = MI->getOperand(0).getReg();
@@ -1369,6 +1377,13 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
13691377
// correct ".save" later.
13701378
AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
13711379
break;
1380+
case ARM::VMRS:
1381+
case ARM::VMRS_FPEXC:
1382+
// If a function spills FPSCR or FPEXC, we copy the values to low
1383+
// registers before pushing them. Record the copy so we can emit the
1384+
// correct ".save" later.
1385+
AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
1386+
break;
13721387
case ARM::tLDRpci: {
13731388
// Grab the constpool index and check, whether it corresponds to
13741389
// original or cloned constpool entry.

llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -79,42 +79,42 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
7979
: (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList
8080
: CSR_AAPCS_SwiftTail_SaveList);
8181
} else if (F.hasFnAttribute("interrupt")) {
82-
bool SaveFP = F.hasFnAttribute("save-fp");
83-
bool HasNEON = MF->getSubtarget<ARMSubtarget>().hasNEON();
82+
83+
// Don't bother saving the floating point registers if target is not hard
84+
// float. This will prevent the Thumb1FrameLowering (cortex-m0) from
85+
// crashing due to an llvm_unreachable being triggered when a D-class
86+
// register is in the calling convention.
87+
if (STI.isTargetHardFloat() && F.hasFnAttribute("save-fp")) {
88+
bool HasNEON = STI.hasNEON();
89+
90+
if (STI.isMClass()) {
91+
assert(!HasNEON && "NEON is only for Cortex-R/A");
92+
return UseSplitPush ? CSR_ATPCS_SplitPush_FP_SaveList
93+
: CSR_AAPCS_FP_SaveList;
94+
}
95+
if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
96+
return HasNEON ? CSR_FIQ_FP_NEON_SaveList : CSR_FIQ_FP_SaveList;
97+
}
98+
return HasNEON ? CSR_GenericInt_FP_NEON_SaveList
99+
: CSR_GenericInt_FP_SaveList;
100+
}
84101

85102
if (STI.isMClass()) {
86103
// M-class CPUs have hardware which saves the registers needed to allow a
87104
// function conforming to the AAPCS to function as a handler.
88105
// Additionally, M Class has hardware support for saving VFP registers,
89106
// but the option can be disabled
90-
if (SaveFP) {
91-
if (HasNEON) {
92-
return UseSplitPush ? CSR_AAPCS_SplitPush_FP_NEON_SaveList
93-
: CSR_AAPCS_FP_NEON_SaveList;
94-
} else {
95-
return UseSplitPush ? CSR_AAPCS_SplitPush_FP_SaveList
96-
: CSR_AAPCS_FP_SaveList;
97-
}
98-
} else {
99-
return UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList;
100-
}
101-
} else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
107+
return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList;
108+
}
109+
110+
if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
102111
// Fast interrupt mode gives the handler a private copy of R8-R14, so less
103112
// need to be saved to restore user-mode state.
104-
if (SaveFP) {
105-
return HasNEON ? CSR_FIQ_FP_NEON_SaveList : CSR_FIQ_FP_SaveList;
106-
} else {
107-
return CSR_FIQ_SaveList;
108-
}
113+
return CSR_FIQ_SaveList;
109114
} else {
110115
// Generally only R13-R14 (i.e. SP, LR) are automatically preserved by
111116
// exception handling.
112-
if (SaveFP) {
113-
return HasNEON ? CSR_GenericInt_FP_NEON_SaveList
114-
: CSR_GenericInt_FP_SaveList;
115-
} else {
116-
return CSR_GenericInt_SaveList;
117-
}
117+
return CSR_GenericInt_SaveList;
118118
}
119119
}
120120

llvm/lib/Target/ARM/ARMCallingConv.td

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def CC_ARM_Win32_CFGuard_Check : CallingConv<[
267267
def CSR_NoRegs : CalleeSavedRegs<(add)>;
268268
def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>;
269269

270-
def CSR_FP_Interrupt_Regs : CalleeSavedRegs<(add (sequence "D%u", 7, 0))>;
270+
def CSR_FP_Interrupt_Regs : CalleeSavedRegs<(add FPSCR, FPEXC, (sequence "D%u", 15, 0))>;
271271
def CSR_FP_NEON_Interrupt_Regs : CalleeSavedRegs<(add CSR_FP_Interrupt_Regs,
272272
(sequence "D%u", 31, 16))>;
273273

@@ -276,20 +276,6 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
276276

277277
def CSR_AAPCS_FP : CalleeSavedRegs<(add CSR_AAPCS, CSR_FP_Interrupt_Regs)>;
278278

279-
def CSR_AAPCS_FP_NEON : CalleeSavedRegs<(add CSR_AAPCS_FP,
280-
CSR_FP_NEON_Interrupt_Regs)>;
281-
282-
// The Windows Control Flow Guard Check function preserves the same registers as
283-
// AAPCS, and also preserves all floating point registers.
284-
def CSR_Win_AAPCS_CFGuard_Check : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7,
285-
R6, R5, R4, (sequence "D%u", 15, 0))>;
286-
287-
// R8 is used to pass swifterror, remove it from CSR.
288-
def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>;
289-
290-
// R10 is used to pass swiftself, remove it from CSR.
291-
def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>;
292-
293279
// The order of callee-saved registers needs to match the order we actually push
294280
// them in FrameLowering, because this order is what's used by
295281
// PrologEpilogInserter to allocate frame index slots. So when R7 is the frame
@@ -302,11 +288,19 @@ def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
302288
(sequence "D%u", 15, 8),
303289
LR, R11)>;
304290

305-
def CSR_AAPCS_SplitPush_FP : CalleeSavedRegs<(add CSR_AAPCS_SplitPush,
291+
def CSR_ATPCS_SplitPush_FP : CalleeSavedRegs<(add CSR_ATPCS_SplitPush,
306292
CSR_FP_Interrupt_Regs)>;
307293

308-
def CSR_AAPCS_SplitPush_FP_NEON : CalleeSavedRegs<(add CSR_AAPCS_SplitPush_FP,
309-
CSR_FP_NEON_Interrupt_Regs)>;
294+
// The Windows Control Flow Guard Check function preserves the same registers as
295+
// AAPCS, and also preserves all floating point registers.
296+
def CSR_Win_AAPCS_CFGuard_Check : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7,
297+
R6, R5, R4, (sequence "D%u", 15, 0))>;
298+
299+
// R8 is used to pass swifterror, remove it from CSR.
300+
def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>;
301+
302+
// R10 is used to pass swiftself, remove it from CSR.
303+
def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>;
310304

311305

312306
// R8 is used to pass swifterror, remove it from CSR.

0 commit comments

Comments
 (0)