Skip to content

Commit 98b694b

Browse files
authored
[AArch64] Fix range check for STGPostIndex (#117146)
When generating function epilogues using AArch64 stack tagging, we can fold an SP update into the tag-setting loop. The loop tags 32 bytes at a time using ST2G, so the final SP update might be done either by a post indexed STG which tags the final 16 bytes of the tagged region, or by an ADD/SUB instruction after the loop. However, we were only considering the range of the ADD/SUB instructions when deciding whether to do this, and the valid immediate range for STG is slightly lower when the offset is positive, because it is a signed immediate, and must include the extra 16 bytes being tagged.
1 parent 6d759f8 commit 98b694b

File tree

2 files changed

+99
-6
lines changed

2 files changed

+99
-6
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4368,26 +4368,37 @@ void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
43684368

43694369
int64_t ExtraBaseRegUpdate =
43704370
FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
4371+
LLVM_DEBUG(dbgs() << "TagStoreEdit::emitLoop: LoopSize=" << LoopSize
4372+
<< ", Size=" << Size
4373+
<< ", ExtraBaseRegUpdate=" << ExtraBaseRegUpdate
4374+
<< ", FrameRegUpdate=" << FrameRegUpdate
4375+
<< ", FrameRegOffset.getFixed()="
4376+
<< FrameRegOffset.getFixed() << "\n");
43714377
if (LoopSize < Size) {
43724378
assert(FrameRegUpdate);
43734379
assert(Size - LoopSize == 16);
43744380
// Tag 16 more bytes at BaseReg and update BaseReg.
4381+
int64_t STGOffset = ExtraBaseRegUpdate + 16;
4382+
assert(STGOffset % 16 == 0 && STGOffset >= -4096 && STGOffset <= 4080 &&
4383+
"STG immediate out of range");
43754384
BuildMI(*MBB, InsertI, DL,
43764385
TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
43774386
.addDef(BaseReg)
43784387
.addReg(BaseReg)
43794388
.addReg(BaseReg)
4380-
.addImm(1 + ExtraBaseRegUpdate / 16)
4389+
.addImm(STGOffset / 16)
43814390
.setMemRefs(CombinedMemRefs)
43824391
.setMIFlags(FrameRegUpdateFlags);
43834392
} else if (ExtraBaseRegUpdate) {
43844393
// Update BaseReg.
4394+
int64_t AddSubOffset = std::abs(ExtraBaseRegUpdate);
4395+
assert(AddSubOffset <= 4095 && "ADD/SUB immediate out of range");
43854396
BuildMI(
43864397
*MBB, InsertI, DL,
43874398
TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
43884399
.addDef(BaseReg)
43894400
.addReg(BaseReg)
4390-
.addImm(std::abs(ExtraBaseRegUpdate))
4401+
.addImm(AddSubOffset)
43914402
.addImm(0)
43924403
.setMIFlags(FrameRegUpdateFlags);
43934404
}
@@ -4406,10 +4417,20 @@ bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
44064417
int64_t Offset = MI.getOperand(2).getImm() << Shift;
44074418
if (MI.getOpcode() == AArch64::SUBXri)
44084419
Offset = -Offset;
4409-
int64_t AbsPostOffset = std::abs(Offset - Size);
4410-
const int64_t kMaxOffset =
4411-
0xFFF; // Max encoding for unshifted ADDXri / SUBXri
4412-
if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
4420+
int64_t PostOffset = Offset - Size;
4421+
// TagStoreEdit::emitLoop might emit either an ADD/SUB after the loop, or
4422+
// an STGPostIndex which does the last 16 bytes of tag write. Which one is
4423+
// chosen depends on the alignment of the loop size, but the difference
4424+
// between the valid ranges for the two instructions is small, so we
4425+
// conservatively assume that it could be either case here.
4426+
//
4427+
// Max offset of STGPostIndex, minus the 16 byte tag write folded into that
4428+
// instruction.
4429+
const int64_t kMaxOffset = 4080 - 16;
4430+
// Max offset of SUBXri.
4431+
const int64_t kMinOffset = -4095;
4432+
if (PostOffset <= kMaxOffset && PostOffset >= kMinOffset &&
4433+
PostOffset % 16 == 0) {
44134434
*TotalOffset = Offset;
44144435
return true;
44154436
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=aarch64 -mattr=+mte -run-pass=prologepilog %s -o - | FileCheck %s
3+
4+
--- |
5+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
6+
target triple = "aarch64-arm-none-eabi"
7+
8+
define void @F76(i32 %0, ...) #0 {
9+
ret void
10+
}
11+
12+
...
13+
---
14+
name: F76
15+
fixedStack:
16+
- { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
17+
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
18+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
19+
stack:
20+
- { id: 0, name: '', type: default, offset: 0, size: 4080, alignment: 16,
21+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
22+
local-offset: -4080, debug-info-variable: '', debug-info-expression: '',
23+
debug-info-location: '' }
24+
- { id: 1, name: '', type: default, offset: 0, size: 32, alignment: 16,
25+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
26+
local-offset: -4112, debug-info-variable: '', debug-info-expression: '',
27+
debug-info-location: '' }
28+
- { id: 2, name: '', type: default, offset: 0, size: 3888, alignment: 4,
29+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
30+
local-offset: -8000, debug-info-variable: '', debug-info-expression: '',
31+
debug-info-location: '' }
32+
- { id: 3, name: '', type: default, offset: 0, size: 56, alignment: 8,
33+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
34+
local-offset: -8056, debug-info-variable: '', debug-info-expression: '',
35+
debug-info-location: '' }
36+
- { id: 4, name: '', type: default, offset: 0, size: 128, alignment: 16,
37+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
38+
local-offset: -8192, debug-info-variable: '', debug-info-expression: '',
39+
debug-info-location: '' }
40+
body: |
41+
bb.0 (%ir-block.1):
42+
liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $x1, $x2, $x3, $x4, $x5, $x6, $x7
43+
44+
; CHECK-LABEL: name: F76
45+
; CHECK: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $fp
46+
; CHECK-NEXT: {{ $}}
47+
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.5)
48+
; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2, 12
49+
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8208
50+
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
51+
; CHECK-NEXT: renamable $x8 = IRGstack $sp, $xzr
52+
; CHECK-NEXT: renamable $x9 = TAGPstack $x8, 2, renamable $x8, 1
53+
; CHECK-NEXT: renamable $x10 = COPY renamable $x9
54+
; CHECK-NEXT: dead early-clobber renamable $x11, dead early-clobber renamable $x10 = STGloop_wback 4080, killed renamable $x10, implicit-def dead $nzcv
55+
; CHECK-NEXT: ST2Gi renamable $x8, renamable $x8, 0
56+
; CHECK-NEXT: $x9 = ADDXri $sp, 0, 0
57+
; CHECK-NEXT: dead early-clobber $x8, dead early-clobber $x9 = STGloop_wback 4112, $x9, implicit-def $nzcv
58+
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2, 12
59+
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.5)
60+
; CHECK-NEXT: RET_ReallyLR
61+
/* Prologue */
62+
renamable $x8 = IRGstack $sp, $xzr
63+
renamable $x9 = TAGPstack %stack.0, 0, renamable $x8, 1
64+
renamable $x10 = COPY renamable $x9
65+
dead early-clobber renamable $x11, dead early-clobber renamable $x10 = STGloop_wback 4080, killed renamable $x10, implicit-def dead $nzcv
66+
ST2Gi renamable $x8, renamable $x8, 0
67+
68+
/* Epilogue */
69+
dead early-clobber renamable $x8, dead early-clobber renamable $x9 = STGloop 4080, %stack.0, implicit-def dead $nzcv
70+
ST2Gi $sp, %stack.1, 0
71+
RET_ReallyLR
72+
...

0 commit comments

Comments
 (0)