Skip to content

Commit 4880bc9

Browse files
committed
[AArch64][PAC] Reduce the size of synchronous CFI
For synchronous unwind tables, the call frame information can be slightly reduced by bundling the `.cfi_negate_ra_state` instruction with other CFI instructions in the prolog, saving 1 byte per function used for `DW_CFA_advance_loc`. This was suggested in [D156428](https://reviews.llvm.org/D156428#4554317).
1 parent dc5f2ee commit 4880bc9

12 files changed

+84
-48
lines changed

llvm/lib/Target/AArch64/AArch64PointerAuth.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ void AArch64PointerAuth::signLR(MachineFunction &MF,
100100
auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
101101
bool UseBKey = MFnI.shouldSignWithBKey();
102102
bool EmitCFI = MFnI.needsDwarfUnwindInfo(MF);
103+
bool EmitAsyncCFI = MFnI.needsAsyncDwarfUnwindInfo(MF);
103104
bool NeedsWinCFI = MF.hasWinCFI();
104105

105106
MachineBasicBlock &MBB = *MBBI->getParent();
@@ -137,6 +138,18 @@ void AArch64PointerAuth::signLR(MachineFunction &MF,
137138
}
138139

139140
if (EmitCFI) {
141+
if (!EmitAsyncCFI) {
142+
// Reduce the size of the generated call frame information for synchronous
143+
// CFI by bundling the new CFI instruction with others in the prolog, so
144+
// that no additional DW_CFA_advance_loc is needed.
145+
for (auto I = MBBI; I != MBB.end(); ++I) {
146+
if (I->getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
147+
I->getFlag(MachineInstr::FrameSetup)) {
148+
MBBI = I;
149+
break;
150+
}
151+
}
152+
}
140153
unsigned CFIIndex =
141154
MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
142155
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))

llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ define void @a() "sign-return-address"="all" "sign-return-address-key"="b_key" {
1111
; CHECK-NEXT: .cfi_b_key_frame
1212
; V8A-NEXT: hint #27
1313
; V83A-NEXT: pacibsp
14-
; CHECK-NEXT: .cfi_negate_ra_state
14+
; CHECK: .cfi_negate_ra_state
15+
; CHECK-NEXT: .cfi_def_cfa_offset
1516
%1 = alloca i32, align 4
1617
%2 = alloca i32, align 4
1718
%3 = alloca i32, align 4

llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ define void @a() "sign-return-address"="all" {
77
; CHECK-LABEL: a: // @a
88
; V8A: hint #25
99
; V83A: paciasp
10-
; CHECK-NEXT: .cfi_negate_ra_state
10+
; CHECK: .cfi_negate_ra_state
11+
; CHECK-NEXT: .cfi_def_cfa_offset
1112
%1 = alloca i32, align 4
1213
%2 = alloca i32, align 4
1314
%3 = alloca i32, align 4
@@ -54,7 +55,8 @@ define void @c() "sign-return-address"="all" {
5455
; CHECK-LABEL: c: // @c
5556
; V8A: hint #25
5657
; V83A: paciasp
57-
; CHECK-NEXT: .cfi_negate_ra_state
58+
; CHECK: .cfi_negate_ra_state
59+
; CHECK-NEXT: .cfi_def_cfa_offset
5860
%1 = alloca i32, align 4
5961
%2 = alloca i32, align 4
6062
%3 = alloca i32, align 4

llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"
88
; CHECK: .cfi_b_key_frame
99
; V8A-NEXT: hint #27
1010
; V83A-NEXT: pacibsp
11-
; CHECK-NEXT: .cfi_negate_ra_state
11+
; CHECK: .cfi_negate_ra_state
12+
; CHECK-NEXT: .cfi_def_cfa_offset
1213
%1 = alloca i32, align 4
1314
%2 = alloca i32, align 4
1415
%3 = alloca i32, align 4
@@ -30,7 +31,8 @@ define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"
3031
; CHECK: .cfi_b_key_frame
3132
; V8A-NEXT: hint #27
3233
; V83A-NEXT: pacibsp
33-
; CHECK-NEXT: .cfi_negate_ra_state
34+
; CHECK: .cfi_negate_ra_state
35+
; CHECK-NEXT: .cfi_def_cfa_offset
3436
%1 = alloca i32, align 4
3537
%2 = alloca i32, align 4
3638
%3 = alloca i32, align 4
@@ -52,7 +54,8 @@ define i64 @c(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"
5254
; CHECK: .cfi_b_key_frame
5355
; V8A-NEXT: hint #27
5456
; V83A-NEXT: pacibsp
55-
; CHECK-NEXT: .cfi_negate_ra_state
57+
; CHECK: .cfi_negate_ra_state
58+
; CHECK-NEXT: .cfi_def_cfa_offset
5659
%1 = alloca i32, align 4
5760
%2 = alloca i32, align 4
5861
%3 = alloca i32, align 4

llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ body: |
8282
# CHECK: bb.0:
8383
# CHECK: frame-setup EMITBKEY
8484
# CHECK-NEXT: frame-setup PACIBSP implicit-def $lr, implicit $lr, implicit $sp
85-
# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state
85+
# CHECK: frame-setup CFI_INSTRUCTION negate_ra_sign_state
86+
# CHECK-NEXT: frame-setup CFI_INSTRUCTION
8687
# CHECK-NOT: OUTLINED_FUNCTION_
8788
# CHECK: bb.1:
8889
# CHECK-NOT: OUTLINED_FUNCTION_

llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ define void @a() "sign-return-address"="all" {
77
; CHECK-LABEL: a: // @a
88
; V8A: hint #25
99
; V83A: paciasp
10-
; CHECK-NEXT: .cfi_negate_ra_state
10+
; CHECK: .cfi_negate_ra_state
11+
; CHECK-NEXT: .cfi_def_cfa_offset
1112
%1 = alloca i32, align 4
1213
%2 = alloca i32, align 4
1314
%3 = alloca i32, align 4
@@ -31,7 +32,8 @@ define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" {
3132
; CHECK: .cfi_b_key_frame
3233
; V8A-NEXT: hint #27
3334
; V83A-NEXT: pacibsp
34-
; CHECK-NEXT: .cfi_negate_ra_state
35+
; CHECK: .cfi_negate_ra_state
36+
; CHECK-NEXT: .cfi_def_cfa_offset
3537
%1 = alloca i32, align 4
3638
%2 = alloca i32, align 4
3739
%3 = alloca i32, align 4
@@ -55,7 +57,8 @@ define void @c() "sign-return-address"="all" {
5557
; CHECK-LABEL: c: // @c
5658
; V8A: hint #25
5759
; V83A: paciasp
58-
; CHECK-NEXT: .cfi_negate_ra_state
60+
; CHECK: .cfi_negate_ra_state
61+
; CHECK-NEXT: .cfi_def_cfa_offset
5962
%1 = alloca i32, align 4
6063
%2 = alloca i32, align 4
6164
%3 = alloca i32, align 4

llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ define void @a() #0 {
1010
; CHECK: // %bb.0:
1111
; CHECK-NEXT: .cfi_b_key_frame
1212
; CHECK-NEXT: pacibsp
13-
; CHECK-NEXT: .cfi_negate_ra_state
13+
; CHECK: .cfi_negate_ra_state
14+
; CHECK-NEXT: .cfi_def_cfa_offset
1415
; CHECK-NOT: OUTLINED_FUNCTION_
1516
%1 = alloca i32, align 4
1617
%2 = alloca i32, align 4
@@ -34,7 +35,8 @@ define void @b() #0 {
3435
; CHECK: // %bb.0:
3536
; CHECK-NEXT: .cfi_b_key_frame
3637
; CHECK-NEXT: pacibsp
37-
; CHECK-NEXT: .cfi_negate_ra_state
38+
; CHECK: .cfi_negate_ra_state
39+
; CHECK-NEXT: .cfi_def_cfa_offset
3840
; CHECK-NOT: OUTLINED_FUNCTION_
3941
%1 = alloca i32, align 4
4042
%2 = alloca i32, align 4
@@ -58,7 +60,8 @@ define void @c() #1 {
5860
; CHECK: // %bb.0:
5961
; CHECK-NEXT: .cfi_b_key_frame
6062
; CHECK-NEXT: hint #27
61-
; CHECK-NEXT: .cfi_negate_ra_state
63+
; CHECK: .cfi_negate_ra_state
64+
; CHECK-NEXT: .cfi_def_cfa_offset
6265
; CHECK-NOT: OUTLINED_FUNCTION_
6366
%1 = alloca i32, align 4
6467
%2 = alloca i32, align 4

llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ define i32 @a() #0 {
1111
; CHECK: // %bb.0: // %entry
1212
; V8A-NEXT: hint #25
1313
; V83A-NEXT: paciasp
14-
; CHECK-NEXT: .cfi_negate_ra_state
14+
; CHECK: .cfi_negate_ra_state
15+
; CHECK-NEXT: .cfi_def_cfa_offset
1516
; V8A: hint #29
1617
; V8A-NEXT: ret
1718
; V83A: retaa
@@ -26,7 +27,8 @@ define i32 @b() #0 {
2627
; CHECK: // %bb.0: // %entry
2728
; V8A-NEXT: hint #25
2829
; V83A-NEXT: paciasp
29-
; CHECK-NEXT: .cfi_negate_ra_state
30+
; CHECK: .cfi_negate_ra_state
31+
; CHECK-NEXT: .cfi_def_cfa_offset
3032
; V8A: hint #29
3133
; V8A-NEXT: ret
3234
; V83A: retaa
@@ -41,7 +43,8 @@ define hidden i32 @c(ptr %fptr) #0 {
4143
; CHECK: // %bb.0: // %entry
4244
; V8A-NEXT: hint #25
4345
; V83A-NEXT: paciasp
44-
; CHECK-NEXT: .cfi_negate_ra_state
46+
; CHECK: .cfi_negate_ra_state
47+
; CHECK-NEXT: .cfi_def_cfa_offset
4548
; V8A: hint #29
4649
; V8A-NEXT: ret
4750
; V83A: retaa
@@ -56,7 +59,8 @@ define hidden i32 @d(ptr %fptr) #0 {
5659
; CHECK: // %bb.0: // %entry
5760
; V8A-NEXT: hint #25
5861
; V83A-NEXT: paciasp
59-
; CHECK-NEXT: .cfi_negate_ra_state
62+
; CHECK: .cfi_negate_ra_state
63+
; CHECK-NEXT: .cfi_def_cfa_offset
6064
; V8A: hint #29
6165
; V8A-NEXT: ret
6266
; V83A: retaa

llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
;; RUN: llc --mattr=+v8.3a %s -o - | FileCheck %s
1+
;; RUN: llc --mattr=+v8.3a %s -o - | tee %t.log | FileCheck %s
22
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
33
target triple = "aarch64-unknown-linux"
44

@@ -35,7 +35,8 @@ entry:
3535
;; CHECK-LABEL: __llvm_gcov_writeout:
3636
;; CHECK: .cfi_b_key_frame
3737
;; CHECK-NEXT: pacibsp
38-
;; CHECK-NEXT: .cfi_negate_ra_state
38+
;; CHECK: .cfi_negate_ra_state
39+
;; CHECK-NEXT: .cfi_def_cfa_offset
3940

4041
define internal void @__llvm_gcov_reset() unnamed_addr #2 {
4142
entry:
@@ -55,7 +56,9 @@ entry:
5556
;; CHECK-LABEL: __llvm_gcov_init:
5657
;; CHECK: .cfi_b_key_frame
5758
;; CHECK-NEXT: pacibsp
58-
;; CHECK-NEXT: .cfi_negate_ra_state
59+
;; CHECK-NEXT: .cfi_negate_ra_state
60+
;; CHECK-NOT: .cfi_
61+
;; CHECK: .cfi_endproc
5962

6063
attributes #0 = { norecurse nounwind readnone "sign-return-address"="all" "sign-return-address-key"="b_key" }
6164
attributes #1 = { noinline }

llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ define dso_local i32 @_Z3fooi(i32 %x) #0 {
1010
; CHECK-V8A-LABEL: _Z3fooi:
1111
; CHECK-V8A: // %bb.0: // %entry
1212
; CHECK-V8A-NEXT: hint #25
13-
; CHECK-V8A-NEXT: .cfi_negate_ra_state
1413
; CHECK-V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
14+
; CHECK-V8A-NEXT: .cfi_negate_ra_state
1515
; CHECK-V8A-NEXT: .cfi_def_cfa_offset 16
1616
; CHECK-V8A-NEXT: .cfi_offset w30, -16
1717
; CHECK-V8A-NEXT: str w0, [sp, #8]
@@ -28,8 +28,8 @@ define dso_local i32 @_Z3fooi(i32 %x) #0 {
2828
; CHECK-V83A-LABEL: _Z3fooi:
2929
; CHECK-V83A: // %bb.0: // %entry
3030
; CHECK-V83A-NEXT: paciasp
31-
; CHECK-V83A-NEXT: .cfi_negate_ra_state
3231
; CHECK-V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
32+
; CHECK-V83A-NEXT: .cfi_negate_ra_state
3333
; CHECK-V83A-NEXT: .cfi_def_cfa_offset 16
3434
; CHECK-V83A-NEXT: .cfi_offset w30, -16
3535
; CHECK-V83A-NEXT: str w0, [sp, #8]
@@ -57,7 +57,8 @@ return: ; No predecessors!
5757
}
5858

5959
; For asynchronous unwind tables, we need to flip the value of RA_SIGN_STATE
60-
; before and after the tail call.
60+
; before and after the tail call. In the prolog, RA_SIGN_STATE is updated right
61+
; after the corresponding 'PACIASP' instruction.
6162
define hidden noundef i32 @baz_async(i32 noundef %a) #0 uwtable(async) {
6263
; CHECK-V8A-LABEL: baz_async:
6364
; CHECK-V8A: // %bb.0: // %entry
@@ -137,12 +138,14 @@ return: ; preds = %if.else, %if.then
137138
; around the tail call. The tail-called function might throw an exception, but
138139
; at this point we are set up to return into baz's caller, so the unwinder will
139140
; never see baz's unwind table for that exception.
141+
; The '.cfi_negate_ra_state' instruction in the prolog can be bundled with other
142+
; CFI instructions to avoid emitting superfluous DW_CFA_advance_loc.
140143
define hidden noundef i32 @baz_sync(i32 noundef %a) #0 uwtable(sync) {
141144
; CHECK-V8A-LABEL: baz_sync:
142145
; CHECK-V8A: // %bb.0: // %entry
143146
; CHECK-V8A-NEXT: hint #25
144-
; CHECK-V8A-NEXT: .cfi_negate_ra_state
145147
; CHECK-V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
148+
; CHECK-V8A-NEXT: .cfi_negate_ra_state
146149
; CHECK-V8A-NEXT: .cfi_def_cfa_offset 16
147150
; CHECK-V8A-NEXT: .cfi_offset w30, -16
148151
; CHECK-V8A-NEXT: cbz w0, .LBB2_2
@@ -162,8 +165,8 @@ define hidden noundef i32 @baz_sync(i32 noundef %a) #0 uwtable(sync) {
162165
; CHECK-V83A-LABEL: baz_sync:
163166
; CHECK-V83A: // %bb.0: // %entry
164167
; CHECK-V83A-NEXT: paciasp
165-
; CHECK-V83A-NEXT: .cfi_negate_ra_state
166168
; CHECK-V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
169+
; CHECK-V83A-NEXT: .cfi_negate_ra_state
167170
; CHECK-V83A-NEXT: .cfi_def_cfa_offset 16
168171
; CHECK-V83A-NEXT: .cfi_offset w30, -16
169172
; CHECK-V83A-NEXT: cbz w0, .LBB2_2

0 commit comments

Comments
 (0)