Skip to content

Commit 9122c52

Browse files
authored
[RISCV] Enable bidirectional scheduling and tracking register pressure (llvm#115445)
This is based on other targets like PPC/AArch64 and some experiments. This PR will only enable bidirectional scheduling and tracking register pressure. Disclaimer: I haven't tested it on many cores, maybe we should make some options being features. I believe downstreams must have tried this before, so feedbacks are welcome.
1 parent 5bbe63e commit 9122c52

File tree

488 files changed

+56101
-53973
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

488 files changed

+56101
-53973
lines changed

llvm/lib/Target/RISCV/RISCVSubtarget.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "RISCV.h"
1717
#include "RISCVFrameLowering.h"
1818
#include "RISCVTargetMachine.h"
19+
#include "llvm/CodeGen/MachineScheduler.h"
1920
#include "llvm/CodeGen/MacroFusion.h"
2021
#include "llvm/CodeGen/ScheduleDAGMutation.h"
2122
#include "llvm/MC/TargetRegistry.h"
@@ -199,3 +200,15 @@ unsigned RISCVSubtarget::getMinimumJumpTableEntries() const {
199200
? RISCVMinimumJumpTableEntries
200201
: TuneInfo->MinimumJumpTableEntries;
201202
}
203+
204+
void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
205+
unsigned NumRegionInstrs) const {
206+
// Do bidirectional scheduling since it provides a more balanced scheduling
207+
// leading to better performance. This will increase compile time.
208+
Policy.OnlyTopDown = false;
209+
Policy.OnlyBottomUp = false;
210+
211+
// Spilling is generally expensive on all RISC-V cores, so always enable
212+
// register-pressure tracking. This will increase compile time.
213+
Policy.ShouldTrackPressure = true;
214+
}

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,9 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
327327
unsigned getTailDupAggressiveThreshold() const {
328328
return TuneInfo->TailDupAggressiveThreshold;
329329
}
330+
331+
void overrideSchedPolicy(MachineSchedPolicy &Policy,
332+
unsigned NumRegionInstrs) const override;
330333
};
331334
} // End llvm namespace
332335

llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,17 @@ define i32 @add_i8_signext_i32(i8 %a, i8 %b) {
2525
; RV32IM-LABEL: add_i8_signext_i32:
2626
; RV32IM: # %bb.0: # %entry
2727
; RV32IM-NEXT: slli a0, a0, 24
28-
; RV32IM-NEXT: srai a0, a0, 24
2928
; RV32IM-NEXT: slli a1, a1, 24
29+
; RV32IM-NEXT: srai a0, a0, 24
3030
; RV32IM-NEXT: srai a1, a1, 24
3131
; RV32IM-NEXT: add a0, a0, a1
3232
; RV32IM-NEXT: ret
3333
;
3434
; RV64IM-LABEL: add_i8_signext_i32:
3535
; RV64IM: # %bb.0: # %entry
3636
; RV64IM-NEXT: slli a0, a0, 56
37-
; RV64IM-NEXT: srai a0, a0, 56
3837
; RV64IM-NEXT: slli a1, a1, 56
38+
; RV64IM-NEXT: srai a0, a0, 56
3939
; RV64IM-NEXT: srai a1, a1, 56
4040
; RV64IM-NEXT: add a0, a0, a1
4141
; RV64IM-NEXT: ret

llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll

Lines changed: 68 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,17 @@ define i2 @bitreverse_i2(i2 %x) {
66
; RV32-LABEL: bitreverse_i2:
77
; RV32: # %bb.0:
88
; RV32-NEXT: slli a1, a0, 1
9-
; RV32-NEXT: andi a1, a1, 2
109
; RV32-NEXT: andi a0, a0, 3
10+
; RV32-NEXT: andi a1, a1, 2
1111
; RV32-NEXT: srli a0, a0, 1
1212
; RV32-NEXT: or a0, a1, a0
1313
; RV32-NEXT: ret
1414
;
1515
; RV64-LABEL: bitreverse_i2:
1616
; RV64: # %bb.0:
1717
; RV64-NEXT: slli a1, a0, 1
18-
; RV64-NEXT: andi a1, a1, 2
1918
; RV64-NEXT: andi a0, a0, 3
19+
; RV64-NEXT: andi a1, a1, 2
2020
; RV64-NEXT: srli a0, a0, 1
2121
; RV64-NEXT: or a0, a1, a0
2222
; RV64-NEXT: ret
@@ -28,8 +28,8 @@ define i3 @bitreverse_i3(i3 %x) {
2828
; RV32-LABEL: bitreverse_i3:
2929
; RV32: # %bb.0:
3030
; RV32-NEXT: slli a1, a0, 2
31-
; RV32-NEXT: andi a1, a1, 4
3231
; RV32-NEXT: andi a0, a0, 7
32+
; RV32-NEXT: andi a1, a1, 4
3333
; RV32-NEXT: andi a2, a0, 2
3434
; RV32-NEXT: or a1, a1, a2
3535
; RV32-NEXT: srli a0, a0, 2
@@ -39,8 +39,8 @@ define i3 @bitreverse_i3(i3 %x) {
3939
; RV64-LABEL: bitreverse_i3:
4040
; RV64: # %bb.0:
4141
; RV64-NEXT: slli a1, a0, 2
42-
; RV64-NEXT: andi a1, a1, 4
4342
; RV64-NEXT: andi a0, a0, 7
43+
; RV64-NEXT: andi a1, a1, 4
4444
; RV64-NEXT: andi a2, a0, 2
4545
; RV64-NEXT: or a1, a1, a2
4646
; RV64-NEXT: srli a0, a0, 2
@@ -54,11 +54,11 @@ define i4 @bitreverse_i4(i4 %x) {
5454
; RV32-LABEL: bitreverse_i4:
5555
; RV32: # %bb.0:
5656
; RV32-NEXT: slli a1, a0, 3
57-
; RV32-NEXT: andi a1, a1, 8
5857
; RV32-NEXT: slli a2, a0, 1
58+
; RV32-NEXT: andi a0, a0, 15
59+
; RV32-NEXT: andi a1, a1, 8
5960
; RV32-NEXT: andi a2, a2, 4
6061
; RV32-NEXT: or a1, a1, a2
61-
; RV32-NEXT: andi a0, a0, 15
6262
; RV32-NEXT: srli a2, a0, 1
6363
; RV32-NEXT: andi a2, a2, 2
6464
; RV32-NEXT: or a1, a1, a2
@@ -69,11 +69,11 @@ define i4 @bitreverse_i4(i4 %x) {
6969
; RV64-LABEL: bitreverse_i4:
7070
; RV64: # %bb.0:
7171
; RV64-NEXT: slli a1, a0, 3
72-
; RV64-NEXT: andi a1, a1, 8
7372
; RV64-NEXT: slli a2, a0, 1
73+
; RV64-NEXT: andi a0, a0, 15
74+
; RV64-NEXT: andi a1, a1, 8
7475
; RV64-NEXT: andi a2, a2, 4
7576
; RV64-NEXT: or a1, a1, a2
76-
; RV64-NEXT: andi a0, a0, 15
7777
; RV64-NEXT: srli a2, a0, 1
7878
; RV64-NEXT: andi a2, a2, 2
7979
; RV64-NEXT: or a1, a1, a2
@@ -88,21 +88,21 @@ define i7 @bitreverse_i7(i7 %x) {
8888
; RV32-LABEL: bitreverse_i7:
8989
; RV32: # %bb.0:
9090
; RV32-NEXT: slli a1, a0, 6
91-
; RV32-NEXT: andi a1, a1, 64
9291
; RV32-NEXT: slli a2, a0, 4
92+
; RV32-NEXT: slli a3, a0, 2
93+
; RV32-NEXT: andi a0, a0, 127
94+
; RV32-NEXT: andi a1, a1, 64
9395
; RV32-NEXT: andi a2, a2, 32
96+
; RV32-NEXT: andi a3, a3, 16
9497
; RV32-NEXT: or a1, a1, a2
95-
; RV32-NEXT: slli a2, a0, 2
96-
; RV32-NEXT: andi a2, a2, 16
97-
; RV32-NEXT: andi a0, a0, 127
98-
; RV32-NEXT: andi a3, a0, 8
99-
; RV32-NEXT: or a2, a2, a3
98+
; RV32-NEXT: andi a2, a0, 8
99+
; RV32-NEXT: or a2, a3, a2
100+
; RV32-NEXT: srli a3, a0, 2
100101
; RV32-NEXT: or a1, a1, a2
101-
; RV32-NEXT: srli a2, a0, 2
102-
; RV32-NEXT: andi a2, a2, 4
103-
; RV32-NEXT: srli a3, a0, 4
104-
; RV32-NEXT: andi a3, a3, 2
105-
; RV32-NEXT: or a2, a2, a3
102+
; RV32-NEXT: srli a2, a0, 4
103+
; RV32-NEXT: andi a3, a3, 4
104+
; RV32-NEXT: andi a2, a2, 2
105+
; RV32-NEXT: or a2, a3, a2
106106
; RV32-NEXT: or a1, a1, a2
107107
; RV32-NEXT: srli a0, a0, 6
108108
; RV32-NEXT: or a0, a1, a0
@@ -111,21 +111,21 @@ define i7 @bitreverse_i7(i7 %x) {
111111
; RV64-LABEL: bitreverse_i7:
112112
; RV64: # %bb.0:
113113
; RV64-NEXT: slli a1, a0, 6
114-
; RV64-NEXT: andi a1, a1, 64
115114
; RV64-NEXT: slli a2, a0, 4
115+
; RV64-NEXT: slli a3, a0, 2
116+
; RV64-NEXT: andi a0, a0, 127
117+
; RV64-NEXT: andi a1, a1, 64
116118
; RV64-NEXT: andi a2, a2, 32
119+
; RV64-NEXT: andi a3, a3, 16
117120
; RV64-NEXT: or a1, a1, a2
118-
; RV64-NEXT: slli a2, a0, 2
119-
; RV64-NEXT: andi a2, a2, 16
120-
; RV64-NEXT: andi a0, a0, 127
121-
; RV64-NEXT: andi a3, a0, 8
122-
; RV64-NEXT: or a2, a2, a3
121+
; RV64-NEXT: andi a2, a0, 8
122+
; RV64-NEXT: or a2, a3, a2
123+
; RV64-NEXT: srli a3, a0, 2
123124
; RV64-NEXT: or a1, a1, a2
124-
; RV64-NEXT: srli a2, a0, 2
125-
; RV64-NEXT: andi a2, a2, 4
126-
; RV64-NEXT: srli a3, a0, 4
127-
; RV64-NEXT: andi a3, a3, 2
128-
; RV64-NEXT: or a2, a2, a3
125+
; RV64-NEXT: srli a2, a0, 4
126+
; RV64-NEXT: andi a3, a3, 4
127+
; RV64-NEXT: andi a2, a2, 2
128+
; RV64-NEXT: or a2, a3, a2
129129
; RV64-NEXT: or a1, a1, a2
130130
; RV64-NEXT: srli a0, a0, 6
131131
; RV64-NEXT: or a0, a1, a0
@@ -139,67 +139,67 @@ define i24 @bitreverse_i24(i24 %x) {
139139
; RV32: # %bb.0:
140140
; RV32-NEXT: slli a1, a0, 16
141141
; RV32-NEXT: lui a2, 4096
142+
; RV32-NEXT: lui a3, 1048335
142143
; RV32-NEXT: addi a2, a2, -1
144+
; RV32-NEXT: addi a3, a3, 240
143145
; RV32-NEXT: and a0, a0, a2
144146
; RV32-NEXT: srli a0, a0, 16
145147
; RV32-NEXT: or a0, a0, a1
146-
; RV32-NEXT: lui a1, 1048335
147-
; RV32-NEXT: addi a1, a1, 240
148-
; RV32-NEXT: and a3, a1, a2
149-
; RV32-NEXT: and a3, a0, a3
150-
; RV32-NEXT: srli a3, a3, 4
148+
; RV32-NEXT: and a1, a3, a2
149+
; RV32-NEXT: and a1, a0, a1
151150
; RV32-NEXT: slli a0, a0, 4
152-
; RV32-NEXT: and a0, a0, a1
153-
; RV32-NEXT: or a0, a3, a0
154-
; RV32-NEXT: lui a1, 1047757
155-
; RV32-NEXT: addi a1, a1, -820
156-
; RV32-NEXT: and a3, a1, a2
157-
; RV32-NEXT: and a3, a0, a3
158-
; RV32-NEXT: srli a3, a3, 2
151+
; RV32-NEXT: and a0, a0, a3
152+
; RV32-NEXT: lui a3, 1047757
153+
; RV32-NEXT: addi a3, a3, -820
154+
; RV32-NEXT: srli a1, a1, 4
155+
; RV32-NEXT: or a0, a1, a0
156+
; RV32-NEXT: and a1, a3, a2
157+
; RV32-NEXT: and a1, a0, a1
159158
; RV32-NEXT: slli a0, a0, 2
160-
; RV32-NEXT: and a0, a0, a1
161-
; RV32-NEXT: or a0, a3, a0
162-
; RV32-NEXT: lui a1, 1047211
163-
; RV32-NEXT: addi a1, a1, -1366
164-
; RV32-NEXT: and a2, a1, a2
159+
; RV32-NEXT: and a0, a0, a3
160+
; RV32-NEXT: lui a3, 1047211
161+
; RV32-NEXT: addi a3, a3, -1366
162+
; RV32-NEXT: and a2, a3, a2
163+
; RV32-NEXT: srli a1, a1, 2
164+
; RV32-NEXT: or a0, a1, a0
165165
; RV32-NEXT: and a2, a0, a2
166-
; RV32-NEXT: srli a2, a2, 1
167166
; RV32-NEXT: slli a0, a0, 1
168-
; RV32-NEXT: and a0, a0, a1
167+
; RV32-NEXT: srli a2, a2, 1
168+
; RV32-NEXT: and a0, a0, a3
169169
; RV32-NEXT: or a0, a2, a0
170170
; RV32-NEXT: ret
171171
;
172172
; RV64-LABEL: bitreverse_i24:
173173
; RV64: # %bb.0:
174174
; RV64-NEXT: slli a1, a0, 16
175175
; RV64-NEXT: lui a2, 4096
176+
; RV64-NEXT: lui a3, 1048335
176177
; RV64-NEXT: addiw a2, a2, -1
178+
; RV64-NEXT: addiw a3, a3, 240
177179
; RV64-NEXT: and a0, a0, a2
178180
; RV64-NEXT: srli a0, a0, 16
179181
; RV64-NEXT: or a0, a0, a1
180-
; RV64-NEXT: lui a1, 1048335
181-
; RV64-NEXT: addiw a1, a1, 240
182-
; RV64-NEXT: and a3, a1, a2
183-
; RV64-NEXT: and a3, a0, a3
184-
; RV64-NEXT: srli a3, a3, 4
182+
; RV64-NEXT: and a1, a3, a2
183+
; RV64-NEXT: and a1, a0, a1
185184
; RV64-NEXT: slli a0, a0, 4
186-
; RV64-NEXT: and a0, a0, a1
187-
; RV64-NEXT: or a0, a3, a0
188-
; RV64-NEXT: lui a1, 1047757
189-
; RV64-NEXT: addiw a1, a1, -820
190-
; RV64-NEXT: and a3, a1, a2
191-
; RV64-NEXT: and a3, a0, a3
192-
; RV64-NEXT: srli a3, a3, 2
185+
; RV64-NEXT: and a0, a0, a3
186+
; RV64-NEXT: lui a3, 1047757
187+
; RV64-NEXT: addiw a3, a3, -820
188+
; RV64-NEXT: srli a1, a1, 4
189+
; RV64-NEXT: or a0, a1, a0
190+
; RV64-NEXT: and a1, a3, a2
191+
; RV64-NEXT: and a1, a0, a1
193192
; RV64-NEXT: slli a0, a0, 2
194-
; RV64-NEXT: and a0, a0, a1
195-
; RV64-NEXT: or a0, a3, a0
196-
; RV64-NEXT: lui a1, 1047211
197-
; RV64-NEXT: addiw a1, a1, -1366
198-
; RV64-NEXT: and a2, a1, a2
193+
; RV64-NEXT: and a0, a0, a3
194+
; RV64-NEXT: lui a3, 1047211
195+
; RV64-NEXT: addiw a3, a3, -1366
196+
; RV64-NEXT: and a2, a3, a2
197+
; RV64-NEXT: srli a1, a1, 2
198+
; RV64-NEXT: or a0, a1, a0
199199
; RV64-NEXT: and a2, a0, a2
200-
; RV64-NEXT: srli a2, a2, 1
201200
; RV64-NEXT: slli a0, a0, 1
202-
; RV64-NEXT: and a0, a0, a1
201+
; RV64-NEXT: srli a2, a2, 1
202+
; RV64-NEXT: and a0, a0, a3
203203
; RV64-NEXT: or a0, a2, a0
204204
; RV64-NEXT: ret
205205
%rev = call i24 @llvm.bitreverse.i24(i24 %x)

llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv32.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,34 +21,34 @@ define void @constant_fold_barrier_i128(ptr %p) {
2121
; RV32-LABEL: constant_fold_barrier_i128:
2222
; RV32: # %bb.0: # %entry
2323
; RV32-NEXT: li a1, 1
24-
; RV32-NEXT: slli a1, a1, 11
2524
; RV32-NEXT: lw a2, 0(a0)
2625
; RV32-NEXT: lw a3, 4(a0)
2726
; RV32-NEXT: lw a4, 8(a0)
2827
; RV32-NEXT: lw a5, 12(a0)
28+
; RV32-NEXT: slli a1, a1, 11
2929
; RV32-NEXT: and a2, a2, a1
3030
; RV32-NEXT: and a3, a3, zero
3131
; RV32-NEXT: and a4, a4, zero
3232
; RV32-NEXT: and a5, a5, zero
3333
; RV32-NEXT: add a2, a2, a1
34-
; RV32-NEXT: sltu a1, a2, a1
3534
; RV32-NEXT: add a6, a3, zero
35+
; RV32-NEXT: sltu a1, a2, a1
3636
; RV32-NEXT: sltu a3, a6, a3
3737
; RV32-NEXT: add a6, a6, a1
3838
; RV32-NEXT: seqz a7, a6
3939
; RV32-NEXT: and a1, a7, a1
40+
; RV32-NEXT: add a7, a4, zero
41+
; RV32-NEXT: add a5, a5, zero
42+
; RV32-NEXT: sltu a4, a7, a4
4043
; RV32-NEXT: or a1, a3, a1
41-
; RV32-NEXT: add a3, a4, zero
42-
; RV32-NEXT: sltu a4, a3, a4
43-
; RV32-NEXT: add a3, a3, a1
44-
; RV32-NEXT: seqz a7, a3
45-
; RV32-NEXT: and a1, a7, a1
44+
; RV32-NEXT: add a7, a7, a1
45+
; RV32-NEXT: seqz a3, a7
46+
; RV32-NEXT: and a1, a3, a1
4647
; RV32-NEXT: or a1, a4, a1
47-
; RV32-NEXT: add a5, a5, zero
4848
; RV32-NEXT: add a1, a5, a1
4949
; RV32-NEXT: sw a2, 0(a0)
5050
; RV32-NEXT: sw a6, 4(a0)
51-
; RV32-NEXT: sw a3, 8(a0)
51+
; RV32-NEXT: sw a7, 8(a0)
5252
; RV32-NEXT: sw a1, 12(a0)
5353
; RV32-NEXT: ret
5454
entry:

llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv64.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ define i128 @constant_fold_barrier_i128(i128 %x) {
2121
; RV64-LABEL: constant_fold_barrier_i128:
2222
; RV64: # %bb.0: # %entry
2323
; RV64-NEXT: li a2, 1
24+
; RV64-NEXT: and a1, a1, zero
2425
; RV64-NEXT: slli a2, a2, 11
2526
; RV64-NEXT: and a0, a0, a2
26-
; RV64-NEXT: and a1, a1, zero
2727
; RV64-NEXT: add a0, a0, a2
2828
; RV64-NEXT: sltu a2, a0, a2
2929
; RV64-NEXT: add a1, a1, zero

llvm/test/CodeGen/RISCV/GlobalISel/iabs.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,8 @@ define i64 @abs64(i64 %x) {
117117
; RV32I: # %bb.0:
118118
; RV32I-NEXT: srai a2, a1, 31
119119
; RV32I-NEXT: add a0, a0, a2
120-
; RV32I-NEXT: sltu a3, a0, a2
121120
; RV32I-NEXT: add a1, a1, a2
121+
; RV32I-NEXT: sltu a3, a0, a2
122122
; RV32I-NEXT: add a1, a1, a3
123123
; RV32I-NEXT: xor a0, a0, a2
124124
; RV32I-NEXT: xor a1, a1, a2
@@ -128,8 +128,8 @@ define i64 @abs64(i64 %x) {
128128
; RV32ZBB: # %bb.0:
129129
; RV32ZBB-NEXT: srai a2, a1, 31
130130
; RV32ZBB-NEXT: add a0, a0, a2
131-
; RV32ZBB-NEXT: sltu a3, a0, a2
132131
; RV32ZBB-NEXT: add a1, a1, a2
132+
; RV32ZBB-NEXT: sltu a3, a0, a2
133133
; RV32ZBB-NEXT: add a1, a1, a3
134134
; RV32ZBB-NEXT: xor a0, a0, a2
135135
; RV32ZBB-NEXT: xor a1, a1, a2

llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,8 @@ define i64 @rori_i64(i64 %a) nounwind {
302302
; CHECK-NEXT: slli a2, a0, 31
303303
; CHECK-NEXT: srli a0, a0, 1
304304
; CHECK-NEXT: slli a3, a1, 31
305-
; CHECK-NEXT: or a0, a0, a3
306305
; CHECK-NEXT: srli a1, a1, 1
306+
; CHECK-NEXT: or a0, a0, a3
307307
; CHECK-NEXT: or a1, a2, a1
308308
; CHECK-NEXT: ret
309309
%1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63)

0 commit comments

Comments
 (0)