Skip to content

Commit 7678e6e

Browse files
authored
[RISCV] Lower the alignment requirement for a GPR pair spill for Zdinx on RV32. (#85871)
I believe we can use XLen alignment as long as eliminateFrameIndex limits the maximum folded offset to 2043. This way when we split the load/store into two 2 instructions we'll be able to add 4 without overflowing simm12.
1 parent e4fa2e3 commit 7678e6e

File tree

3 files changed

+82
-1
lines changed

3 files changed

+82
-1
lines changed

llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,13 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
446446
(Lo12 & 0b11111) != 0) {
447447
// Prefetch instructions require the offset to be 32 byte aligned.
448448
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
449+
} else if ((Opc == RISCV::PseudoRV32ZdinxLD ||
450+
Opc == RISCV::PseudoRV32ZdinxSD) &&
451+
Lo12 >= 2044) {
452+
// This instruction will be split into 2 instructions. The second
453+
// instruction will add 4 to the immediate. If that would overflow 12
454+
// bits, we can't fold the offset.
455+
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
449456
} else {
450457
// We can encode an add with 12 bit signed immediate in the immediate
451458
// operand of our user instruction. As a result, the remaining

llvm/lib/Target/RISCV/RISCVRegisterInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,7 @@ let RegAltNameIndices = [ABIRegAltName] in {
573573
}
574574

575575
let RegInfos = RegInfoByHwMode<[RV32, RV64],
576-
[RegInfo<64, 64, 64>, RegInfo<128, 128, 128>]>,
576+
[RegInfo<64, 64, 32>, RegInfo<128, 128, 64>]>,
577577
DecoderMethod = "DecodeGPRPairRegisterClass" in
578578
def GPRPair : RegisterClass<"RISCV", [XLenPairFVT], 64, (add
579579
X10_X11, X12_X13, X14_X15, X16_X17,
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
# RUN: llc %s -mtriple=riscv32 -mattr=+zdinx -start-before=prologepilog -o - | FileCheck %s
3+
4+
# We want to make sure eliminateFrameIndex doesn't fold sp+2044 as an offset in
5+
# a GPR pair spill/reload instruction. When we split the pair spill, we would be
6+
# unable to add 4 to the immediate without overflowing simm12.
7+
8+
--- |
9+
define void @foo() {
10+
; CHECK-LABEL: foo:
11+
; CHECK: # %bb.0:
12+
; CHECK-NEXT: addi sp, sp, -2048
13+
; CHECK-NEXT: addi sp, sp, -16
14+
; CHECK-NEXT: .cfi_def_cfa_offset 2064
15+
; CHECK-NEXT: lui t0, 1
16+
; CHECK-NEXT: add t0, sp, t0
17+
; CHECK-NEXT: sw a0, -2040(t0)
18+
; CHECK-NEXT: sw a1, -2036(t0)
19+
; CHECK-NEXT: lui a0, 1
20+
; CHECK-NEXT: add a0, sp, a0
21+
; CHECK-NEXT: sw a2, -2048(a0)
22+
; CHECK-NEXT: sw a3, -2044(a0)
23+
; CHECK-NEXT: sw a4, 2040(sp)
24+
; CHECK-NEXT: sw a5, 2044(sp)
25+
; CHECK-NEXT: sw a6, 2032(sp)
26+
; CHECK-NEXT: sw a7, 2036(sp)
27+
; CHECK-NEXT: lui a0, 1
28+
; CHECK-NEXT: add a0, sp, a0
29+
; CHECK-NEXT: lw a1, -2036(a0)
30+
; CHECK-NEXT: lw a0, -2040(a0)
31+
; CHECK-NEXT: lui a0, 1
32+
; CHECK-NEXT: add a0, sp, a0
33+
; CHECK-NEXT: lw a2, -2048(a0)
34+
; CHECK-NEXT: lw a3, -2044(a0)
35+
; CHECK-NEXT: lw a4, 2040(sp)
36+
; CHECK-NEXT: lw a5, 2044(sp)
37+
; CHECK-NEXT: lw a6, 2032(sp)
38+
; CHECK-NEXT: lw a7, 2036(sp)
39+
; CHECK-NEXT: addi sp, sp, 2032
40+
; CHECK-NEXT: addi sp, sp, 32
41+
; CHECK-NEXT: ret
42+
ret void
43+
}
44+
...
45+
---
46+
name: foo
47+
tracksRegLiveness: true
48+
tracksDebugUserValues: true
49+
frameInfo:
50+
maxAlignment: 4
51+
stack:
52+
- { id: 0, type: spill-slot, size: 8, alignment: 4 }
53+
- { id: 1, type: spill-slot, size: 8, alignment: 4 }
54+
- { id: 2, type: spill-slot, size: 8, alignment: 4 }
55+
- { id: 3, type: spill-slot, size: 8, alignment: 4 }
56+
- { id: 4, type: spill-slot, size: 2024, alignment: 4 }
57+
machineFunctionInfo:
58+
varArgsFrameIndex: 0
59+
varArgsSaveSize: 0
60+
body: |
61+
bb.0:
62+
liveins: $x10_x11, $x12_x13, $x14_x15, $x16_x17
63+
64+
PseudoRV32ZdinxSD killed renamable $x10_x11, %stack.0, 0 :: (store (s64) into %stack.0, align 4)
65+
PseudoRV32ZdinxSD killed renamable $x12_x13, %stack.1, 0 :: (store (s64) into %stack.1, align 4)
66+
PseudoRV32ZdinxSD killed renamable $x14_x15, %stack.2, 0 :: (store (s64) into %stack.2, align 4)
67+
PseudoRV32ZdinxSD killed renamable $x16_x17, %stack.3, 0 :: (store (s64) into %stack.3, align 4)
68+
renamable $x10_x11 = PseudoRV32ZdinxLD %stack.0, 0 :: (load (s64) from %stack.0, align 4)
69+
renamable $x12_x13 = PseudoRV32ZdinxLD %stack.1, 0 :: (load (s64) from %stack.1, align 4)
70+
renamable $x14_x15 = PseudoRV32ZdinxLD %stack.2, 0 :: (load (s64) from %stack.2, align 4)
71+
renamable $x16_x17 = PseudoRV32ZdinxLD %stack.3, 0 :: (load (s64) from %stack.3, align 4)
72+
PseudoRET
73+
74+
...

0 commit comments

Comments
 (0)