Skip to content

Commit f051f4a

Browse files
committed
[RISCV] Make X5 allocatable for JALR on CPUs without RAS
Some microarchitectures may not support RAS, then we don't need to reserve X5 register for JALR. If RAS is supported, we will select the register allocation order without X5 (because alternative orders should be subsets of the default order).
1 parent b53169d commit f051f4a

File tree

4 files changed

+125
-48
lines changed

4 files changed

+125
-48
lines changed

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,6 +1125,9 @@ def FeatureFastUnalignedAccess
11251125
def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
11261126
"UsePostRAScheduler", "true", "Schedule again after register allocation">;
11271127

1128+
def FeatureNoRAS : SubtargetFeature<"no-ret-addr-stack", "HasRetAddrStack", "false",
1129+
"Hasn't RAS (Return Address Stack)">;
1130+
11281131
def TuneNoOptimizedZeroStrideLoad
11291132
: SubtargetFeature<"no-optimized-zero-stride-load", "HasOptimizedZeroStrideLoad",
11301133
"false", "Hasn't optimized (perform fewer memory operations)"

llvm/lib/Target/RISCV/RISCVRegisterInfo.td

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,13 @@ def GPRNoX0X2 : GPRRegisterClass<(sub GPR, X0, X2)>;
153153
// stack on some microarchitectures. Also remove the reserved registers X0, X2,
154154
// X3, and X4 as it reduces the number of register classes that get synthesized
155155
// by tablegen.
156-
def GPRJALR : GPRRegisterClass<(sub GPR, (sequence "X%u", 0, 5))>;
156+
// If RAS is supported, we select the alternative register order without X5.
157+
def GPRJALR : GPRRegisterClass<(sub GPR, (sequence "X%u", 0, 4))> {
158+
list<dag> AltOrders = [(sub GPR, (sequence "X%u", 0, 5))];
159+
code AltOrderSelect = [{
160+
return MF.getSubtarget<RISCVSubtarget>().hasRetAddrStack();
161+
}];
162+
}
157163

158164
def GPRC : GPRRegisterClass<(add (sequence "X%u", 10, 15),
159165
(sequence "X%u", 8, 9))>;
@@ -162,9 +168,17 @@ def GPRC : GPRRegisterClass<(add (sequence "X%u", 10, 15),
162168
// restored to the saved value before the tail call, which would clobber a call
163169
// address. We shouldn't use x5 since that is a hint for to pop the return
164170
// address stack on some microarchitectures.
165-
def GPRTC : GPRRegisterClass<(add (sequence "X%u", 6, 7),
171+
// If RAS is supported, we select the alternative register order without X5.
172+
def GPRTC : GPRRegisterClass<(add (sequence "X%u", 5, 7),
166173
(sequence "X%u", 10, 17),
167-
(sequence "X%u", 28, 31))>;
174+
(sequence "X%u", 28, 31))> {
175+
list<dag> AltOrders = [(add (sequence "X%u", 6, 7),
176+
(sequence "X%u", 10, 17),
177+
(sequence "X%u", 28, 31))];
178+
code AltOrderSelect = [{
179+
return MF.getSubtarget<RISCVSubtarget>().hasRetAddrStack();
180+
}];
181+
}
168182

169183
def SP : GPRRegisterClass<(add X2)>;
170184

llvm/test/CodeGen/RISCV/calls.ll

Lines changed: 52 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
44
; RUN: llc -relocation-model=pic -mtriple=riscv32 -verify-machineinstrs < %s \
55
; RUN: | FileCheck -check-prefixes=CHECK,RV32I-PIC %s
6+
; RUN: llc -mtriple=riscv32 -mattr=+no-ret-addr-stack -verify-machineinstrs < %s \
7+
; RUN: | FileCheck -check-prefixes=CHECK,RV32I-NO-RAS %s
68

79
declare i32 @external_function(i32)
810

@@ -74,22 +76,56 @@ define i32 @test_call_indirect(ptr %a, i32 %b) nounwind {
7476
; Make sure we don't use t0 as the source for jalr as that is a hint to pop the
7577
; return address stack on some microarchitectures.
7678
define i32 @test_call_indirect_no_t0(ptr %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) nounwind {
77-
; CHECK-LABEL: test_call_indirect_no_t0:
78-
; CHECK: # %bb.0:
79-
; CHECK-NEXT: addi sp, sp, -16
80-
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
81-
; CHECK-NEXT: mv t1, a0
82-
; CHECK-NEXT: mv a0, a1
83-
; CHECK-NEXT: mv a1, a2
84-
; CHECK-NEXT: mv a2, a3
85-
; CHECK-NEXT: mv a3, a4
86-
; CHECK-NEXT: mv a4, a5
87-
; CHECK-NEXT: mv a5, a6
88-
; CHECK-NEXT: mv a6, a7
89-
; CHECK-NEXT: jalr t1
90-
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
91-
; CHECK-NEXT: addi sp, sp, 16
92-
; CHECK-NEXT: ret
79+
; RV32I-LABEL: test_call_indirect_no_t0:
80+
; RV32I: # %bb.0:
81+
; RV32I-NEXT: addi sp, sp, -16
82+
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
83+
; RV32I-NEXT: mv t1, a0
84+
; RV32I-NEXT: mv a0, a1
85+
; RV32I-NEXT: mv a1, a2
86+
; RV32I-NEXT: mv a2, a3
87+
; RV32I-NEXT: mv a3, a4
88+
; RV32I-NEXT: mv a4, a5
89+
; RV32I-NEXT: mv a5, a6
90+
; RV32I-NEXT: mv a6, a7
91+
; RV32I-NEXT: jalr t1
92+
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
93+
; RV32I-NEXT: addi sp, sp, 16
94+
; RV32I-NEXT: ret
95+
;
96+
; RV32I-PIC-LABEL: test_call_indirect_no_t0:
97+
; RV32I-PIC: # %bb.0:
98+
; RV32I-PIC-NEXT: addi sp, sp, -16
99+
; RV32I-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
100+
; RV32I-PIC-NEXT: mv t1, a0
101+
; RV32I-PIC-NEXT: mv a0, a1
102+
; RV32I-PIC-NEXT: mv a1, a2
103+
; RV32I-PIC-NEXT: mv a2, a3
104+
; RV32I-PIC-NEXT: mv a3, a4
105+
; RV32I-PIC-NEXT: mv a4, a5
106+
; RV32I-PIC-NEXT: mv a5, a6
107+
; RV32I-PIC-NEXT: mv a6, a7
108+
; RV32I-PIC-NEXT: jalr t1
109+
; RV32I-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
110+
; RV32I-PIC-NEXT: addi sp, sp, 16
111+
; RV32I-PIC-NEXT: ret
112+
;
113+
; RV32I-NO-RAS-LABEL: test_call_indirect_no_t0:
114+
; RV32I-NO-RAS: # %bb.0:
115+
; RV32I-NO-RAS-NEXT: addi sp, sp, -16
116+
; RV32I-NO-RAS-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
117+
; RV32I-NO-RAS-NEXT: mv t0, a0
118+
; RV32I-NO-RAS-NEXT: mv a0, a1
119+
; RV32I-NO-RAS-NEXT: mv a1, a2
120+
; RV32I-NO-RAS-NEXT: mv a2, a3
121+
; RV32I-NO-RAS-NEXT: mv a3, a4
122+
; RV32I-NO-RAS-NEXT: mv a4, a5
123+
; RV32I-NO-RAS-NEXT: mv a5, a6
124+
; RV32I-NO-RAS-NEXT: mv a6, a7
125+
; RV32I-NO-RAS-NEXT: jalr t0
126+
; RV32I-NO-RAS-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
127+
; RV32I-NO-RAS-NEXT: addi sp, sp, 16
128+
; RV32I-NO-RAS-NEXT: ret
93129
%1 = call i32 %a(i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
94130
ret i32 %1
95131
}
@@ -184,6 +220,3 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind {
184220
i32 %a, i32 %a, i32 %a, i32 %a, i32 %a)
185221
ret i32 %1
186222
}
187-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
188-
; RV32I: {{.*}}
189-
; RV32I-PIC: {{.*}}

llvm/test/CodeGen/RISCV/tail-calls.ll

Lines changed: 53 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s | FileCheck %s
3-
; RUN: llc -mtriple riscv32-unknown-elf -o - %s | FileCheck %s
2+
; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s \
3+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RAS
4+
; RUN: llc -mtriple riscv32-unknown-elf -o - %s \
5+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RAS
6+
; RUN: llc -mtriple riscv32 -mattr=+no-ret-addr-stack -o - %s \
7+
; RUN: | FileCheck --check-prefixes=CHECK,CHECK-NO-RAS %s
48

59
; Perform tail call optimization for global address.
610
declare i32 @callee_tail(i32 %i)
@@ -52,19 +56,29 @@ entry:
5256
declare void @callee_indirect1()
5357
declare void @callee_indirect2()
5458
define void @caller_indirect_tail(i32 %a) nounwind {
55-
; CHECK-LABEL: caller_indirect_tail:
56-
; CHECK: # %bb.0: # %entry
57-
; CHECK-NEXT: beqz a0, .LBB3_2
58-
; CHECK-NEXT: # %bb.1: # %entry
59-
; CHECK-NEXT: lui a0, %hi(callee_indirect2)
60-
; CHECK-NEXT: addi t1, a0, %lo(callee_indirect2)
61-
; CHECK-NEXT: jr t1
62-
; CHECK-NEXT: .LBB3_2:
63-
; CHECK-NEXT: lui a0, %hi(callee_indirect1)
64-
; CHECK-NEXT: addi t1, a0, %lo(callee_indirect1)
65-
; CHECK-NEXT: jr t1
66-
67-
59+
; CHECK-RAS-LABEL: caller_indirect_tail:
60+
; CHECK-RAS: # %bb.0: # %entry
61+
; CHECK-RAS-NEXT: beqz a0, .LBB3_2
62+
; CHECK-RAS-NEXT: # %bb.1: # %entry
63+
; CHECK-RAS-NEXT: lui a0, %hi(callee_indirect2)
64+
; CHECK-RAS-NEXT: addi t1, a0, %lo(callee_indirect2)
65+
; CHECK-RAS-NEXT: jr t1
66+
; CHECK-RAS-NEXT: .LBB3_2:
67+
; CHECK-RAS-NEXT: lui a0, %hi(callee_indirect1)
68+
; CHECK-RAS-NEXT: addi t1, a0, %lo(callee_indirect1)
69+
; CHECK-RAS-NEXT: jr t1
70+
;
71+
; CHECK-NO-RAS-LABEL: caller_indirect_tail:
72+
; CHECK-NO-RAS: # %bb.0: # %entry
73+
; CHECK-NO-RAS-NEXT: beqz a0, .LBB3_2
74+
; CHECK-NO-RAS-NEXT: # %bb.1: # %entry
75+
; CHECK-NO-RAS-NEXT: lui a0, %hi(callee_indirect2)
76+
; CHECK-NO-RAS-NEXT: addi t0, a0, %lo(callee_indirect2)
77+
; CHECK-NO-RAS-NEXT: jr t0
78+
; CHECK-NO-RAS-NEXT: .LBB3_2:
79+
; CHECK-NO-RAS-NEXT: lui a0, %hi(callee_indirect1)
80+
; CHECK-NO-RAS-NEXT: addi t0, a0, %lo(callee_indirect1)
81+
; CHECK-NO-RAS-NEXT: jr t0
6882
entry:
6983
%tobool = icmp eq i32 %a, 0
7084
%callee = select i1 %tobool, ptr @callee_indirect1, ptr @callee_indirect2
@@ -75,17 +89,30 @@ entry:
7589
; Make sure we don't use t0 as the source for jr as that is a hint to pop the
7690
; return address stack on some microarchitectures.
7791
define i32 @caller_indirect_no_t0(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
78-
; CHECK-LABEL: caller_indirect_no_t0:
79-
; CHECK: # %bb.0:
80-
; CHECK-NEXT: mv t1, a0
81-
; CHECK-NEXT: mv a0, a1
82-
; CHECK-NEXT: mv a1, a2
83-
; CHECK-NEXT: mv a2, a3
84-
; CHECK-NEXT: mv a3, a4
85-
; CHECK-NEXT: mv a4, a5
86-
; CHECK-NEXT: mv a5, a6
87-
; CHECK-NEXT: mv a6, a7
88-
; CHECK-NEXT: jr t1
92+
; CHECK-RAS-LABEL: caller_indirect_no_t0:
93+
; CHECK-RAS: # %bb.0:
94+
; CHECK-RAS-NEXT: mv t1, a0
95+
; CHECK-RAS-NEXT: mv a0, a1
96+
; CHECK-RAS-NEXT: mv a1, a2
97+
; CHECK-RAS-NEXT: mv a2, a3
98+
; CHECK-RAS-NEXT: mv a3, a4
99+
; CHECK-RAS-NEXT: mv a4, a5
100+
; CHECK-RAS-NEXT: mv a5, a6
101+
; CHECK-RAS-NEXT: mv a6, a7
102+
; CHECK-RAS-NEXT: jr t1
103+
;
104+
; CHECK-NO-RAS-LABEL: caller_indirect_no_t0:
105+
; CHECK-NO-RAS: # %bb.0:
106+
; CHECK-NO-RAS-NEXT: mv t0, a0
107+
; CHECK-NO-RAS-NEXT: mv a0, a1
108+
; CHECK-NO-RAS-NEXT: mv a1, a2
109+
; CHECK-NO-RAS-NEXT: mv a2, a3
110+
; CHECK-NO-RAS-NEXT: mv a3, a4
111+
; CHECK-NO-RAS-NEXT: mv a4, a5
112+
; CHECK-NO-RAS-NEXT: mv a5, a6
113+
; CHECK-NO-RAS-NEXT: mv a6, a7
114+
; CHECK-NO-RAS-NEXT: jr t0
115+
89116
%9 = tail call i32 %0(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7)
90117
ret i32 %9
91118
}

0 commit comments

Comments
 (0)