Skip to content

Commit 0530022

Browse files
authored
[RISCV][GISel] Add really basic support for FP regbank selection for G_LOAD/G_STORE. (#70896)
Coerce the register bank based on the users of the G_LOAD or the defining instruction for the G_STORE. s64 on rv32 is handled by forcing the FPRB register bank.
1 parent 2696855 commit 0530022

File tree

2 files changed

+264
-2
lines changed

2 files changed

+264
-2
lines changed

llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,56 @@ static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) {
109109
return &RISCV::ValueMappings[Idx];
110110
}
111111

112+
// TODO: Make this more like AArch64?
113+
static bool onlyUsesFP(const MachineInstr &MI) {
114+
switch (MI.getOpcode()) {
115+
case TargetOpcode::G_FADD:
116+
case TargetOpcode::G_FSUB:
117+
case TargetOpcode::G_FMUL:
118+
case TargetOpcode::G_FDIV:
119+
case TargetOpcode::G_FNEG:
120+
case TargetOpcode::G_FABS:
121+
case TargetOpcode::G_FSQRT:
122+
case TargetOpcode::G_FMAXNUM:
123+
case TargetOpcode::G_FMINNUM:
124+
case TargetOpcode::G_FPEXT:
125+
case TargetOpcode::G_FPTRUNC:
126+
case TargetOpcode::G_FCMP:
127+
case TargetOpcode::G_FPTOSI:
128+
case TargetOpcode::G_FPTOUI:
129+
return true;
130+
default:
131+
break;
132+
}
133+
134+
return false;
135+
}
136+
137+
// TODO: Make this more like AArch64?
138+
static bool onlyDefinesFP(const MachineInstr &MI) {
139+
switch (MI.getOpcode()) {
140+
case TargetOpcode::G_FADD:
141+
case TargetOpcode::G_FSUB:
142+
case TargetOpcode::G_FMUL:
143+
case TargetOpcode::G_FDIV:
144+
case TargetOpcode::G_FNEG:
145+
case TargetOpcode::G_FABS:
146+
case TargetOpcode::G_FSQRT:
147+
case TargetOpcode::G_FMAXNUM:
148+
case TargetOpcode::G_FMINNUM:
149+
case TargetOpcode::G_FPEXT:
150+
case TargetOpcode::G_FPTRUNC:
151+
case TargetOpcode::G_SITOFP:
152+
case TargetOpcode::G_UITOFP:
153+
case TargetOpcode::G_FCONSTANT:
154+
return true;
155+
default:
156+
break;
157+
}
158+
159+
return false;
160+
}
161+
112162
const RegisterBankInfo::InstructionMapping &
113163
RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
114164
const unsigned Opc = MI.getOpcode();
@@ -159,11 +209,53 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
159209
case TargetOpcode::G_ANYEXT:
160210
case TargetOpcode::G_SEXT:
161211
case TargetOpcode::G_ZEXT:
162-
case TargetOpcode::G_LOAD:
163212
case TargetOpcode::G_SEXTLOAD:
164213
case TargetOpcode::G_ZEXTLOAD:
165-
case TargetOpcode::G_STORE:
166214
break;
215+
case TargetOpcode::G_LOAD: {
216+
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
217+
// Use FPR64 for s64 loads on rv32.
218+
if (GPRSize == 32 && Ty.getSizeInBits() == 64) {
219+
assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
220+
OperandsMapping =
221+
getOperandsMapping({getFPValueMapping(64), GPRValueMapping});
222+
break;
223+
}
224+
225+
// Check if that load feeds fp instructions.
226+
// In that case, we want the default mapping to be on FPR
227+
// instead of blind map every scalar to GPR.
228+
if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
229+
[&](const MachineInstr &UseMI) {
230+
// If we have at least one direct use in a FP instruction,
231+
// assume this was a floating point load in the IR. If it was
232+
// not, we would have had a bitcast before reaching that
233+
// instruction.
234+
return onlyUsesFP(UseMI);
235+
})) {
236+
OperandsMapping = getOperandsMapping(
237+
{getFPValueMapping(Ty.getSizeInBits()), GPRValueMapping});
238+
}
239+
240+
break;
241+
}
242+
case TargetOpcode::G_STORE: {
243+
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
244+
// Use FPR64 for s64 stores on rv32.
245+
if (GPRSize == 32 && Ty.getSizeInBits() == 64) {
246+
assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
247+
OperandsMapping =
248+
getOperandsMapping({getFPValueMapping(64), GPRValueMapping});
249+
break;
250+
}
251+
252+
MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(0).getReg());
253+
if (onlyDefinesFP(*DefMI)) {
254+
OperandsMapping = getOperandsMapping(
255+
{getFPValueMapping(Ty.getSizeInBits()), GPRValueMapping});
256+
}
257+
break;
258+
}
167259
case TargetOpcode::G_CONSTANT:
168260
case TargetOpcode::G_FRAME_INDEX:
169261
case TargetOpcode::G_GLOBAL_VALUE:
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=riscv32 -mattr=+d -run-pass=regbankselect \
3+
# RUN: -simplify-mir -verify-machineinstrs %s \
4+
# RUN: -o - | FileCheck %s --check-prefixes=CHECK,RV32
5+
# RUN: llc -mtriple=riscv64 -mattr=+d -run-pass=regbankselect \
6+
# RUN: -simplify-mir -verify-machineinstrs %s \
7+
# RUN: -o - | FileCheck %s --check-prefixes=CHECK,RV64
8+
9+
---
10+
name: fp_store_fp_def_f32
11+
legalized: true
12+
tracksRegLiveness: true
13+
body: |
14+
bb.1:
15+
liveins: $x10, $f10_f, $f11_f
16+
17+
; CHECK-LABEL: name: fp_store_fp_def_f32
18+
; CHECK: liveins: $x10, $f10_f, $f11_f
19+
; CHECK-NEXT: {{ $}}
20+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10
21+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f10_f
22+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fprb(s32) = COPY $f11_f
23+
; CHECK-NEXT: [[FADD:%[0-9]+]]:fprb(s32) = G_FADD [[COPY1]], [[COPY2]]
24+
; CHECK-NEXT: G_STORE [[FADD]](s32), [[COPY]](p0) :: (store (s32))
25+
; CHECK-NEXT: PseudoRET
26+
%0:_(p0) = COPY $x10
27+
%1:_(s32) = COPY $f10_f
28+
%2:_(s32) = COPY $f11_f
29+
%3:_(s32) = G_FADD %1, %2
30+
G_STORE %3(s32), %0(p0) :: (store (s32))
31+
PseudoRET
32+
33+
...
34+
---
35+
name: fp_store_fp_def_f64
36+
legalized: true
37+
tracksRegLiveness: true
38+
body: |
39+
bb.1:
40+
liveins: $x10, $f10_d, $f11_d
41+
42+
; CHECK-LABEL: name: fp_store_fp_def_f64
43+
; CHECK: liveins: $x10, $f10_d, $f11_d
44+
; CHECK-NEXT: {{ $}}
45+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10
46+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d
47+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fprb(s64) = COPY $f11_d
48+
; CHECK-NEXT: [[FADD:%[0-9]+]]:fprb(s64) = G_FADD [[COPY1]], [[COPY2]]
49+
; CHECK-NEXT: G_STORE [[FADD]](s64), [[COPY]](p0) :: (store (s64))
50+
; CHECK-NEXT: PseudoRET
51+
%0:_(p0) = COPY $x10
52+
%1:_(s64) = COPY $f10_d
53+
%2:_(s64) = COPY $f11_d
54+
%3:_(s64) = G_FADD %1, %2
55+
G_STORE %3(s64), %0(p0) :: (store (s64))
56+
PseudoRET
57+
58+
...
59+
---
60+
name: fp_store_no_def_f64
61+
legalized: true
62+
tracksRegLiveness: true
63+
body: |
64+
bb.1:
65+
liveins: $x10, $f10_d
66+
67+
; RV32-LABEL: name: fp_store_no_def_f64
68+
; RV32: liveins: $x10, $f10_d
69+
; RV32-NEXT: {{ $}}
70+
; RV32-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10
71+
; RV32-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d
72+
; RV32-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p0) :: (store (s64))
73+
; RV32-NEXT: PseudoRET
74+
;
75+
; RV64-LABEL: name: fp_store_no_def_f64
76+
; RV64: liveins: $x10, $f10_d
77+
; RV64-NEXT: {{ $}}
78+
; RV64-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10
79+
; RV64-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d
80+
; RV64-NEXT: [[COPY2:%[0-9]+]]:gprb(s64) = COPY [[COPY1]](s64)
81+
; RV64-NEXT: G_STORE [[COPY2]](s64), [[COPY]](p0) :: (store (s64))
82+
; RV64-NEXT: PseudoRET
83+
%0:_(p0) = COPY $x10
84+
%1:_(s64) = COPY $f10_d
85+
G_STORE %1(s64), %0(p0) :: (store (s64))
86+
PseudoRET
87+
88+
...
89+
---
90+
name: fp_load_fp_use_f32
91+
legalized: true
92+
tracksRegLiveness: true
93+
body: |
94+
bb.1:
95+
liveins: $x10, $f10_f
96+
97+
; CHECK-LABEL: name: fp_load_fp_use_f32
98+
; CHECK: liveins: $x10, $f10_f
99+
; CHECK-NEXT: {{ $}}
100+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10
101+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f10_f
102+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
103+
; CHECK-NEXT: [[FADD:%[0-9]+]]:fprb(s32) = G_FADD [[LOAD]], [[COPY1]]
104+
; CHECK-NEXT: $f10_f = COPY [[FADD]](s32)
105+
; CHECK-NEXT: PseudoRET implicit $f10_f
106+
%0:_(p0) = COPY $x10
107+
%1:_(s32) = COPY $f10_f
108+
%2:_(s32) = G_LOAD %0(p0) :: (load (s32))
109+
%3:_(s32) = G_FADD %2, %1
110+
$f10_f = COPY %3(s32)
111+
PseudoRET implicit $f10_f
112+
113+
...
114+
---
115+
name: fp_load_fp_use_f64
116+
legalized: true
117+
tracksRegLiveness: true
118+
body: |
119+
bb.1:
120+
liveins: $x10, $f10_d
121+
122+
; CHECK-LABEL: name: fp_load_fp_use_f64
123+
; CHECK: liveins: $x10, $f10_d
124+
; CHECK-NEXT: {{ $}}
125+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10
126+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d
127+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
128+
; CHECK-NEXT: [[FADD:%[0-9]+]]:fprb(s64) = G_FADD [[LOAD]], [[COPY1]]
129+
; CHECK-NEXT: $f10_d = COPY [[FADD]](s64)
130+
; CHECK-NEXT: PseudoRET implicit $f10_d
131+
%0:_(p0) = COPY $x10
132+
%1:_(s64) = COPY $f10_d
133+
%2:_(s64) = G_LOAD %0(p0) :: (load (s64))
134+
%3:_(s64) = G_FADD %2, %1
135+
$f10_d = COPY %3(s64)
136+
PseudoRET implicit $f10_d
137+
138+
...
139+
---
140+
name: fp_load_no_use_f64
141+
legalized: true
142+
tracksRegLiveness: true
143+
body: |
144+
bb.1:
145+
liveins: $x10, $f10_d
146+
147+
; RV32-LABEL: name: fp_load_no_use_f64
148+
; RV32: liveins: $x10, $f10_d
149+
; RV32-NEXT: {{ $}}
150+
; RV32-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10
151+
; RV32-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d
152+
; RV32-NEXT: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
153+
; RV32-NEXT: $f10_d = COPY [[LOAD]](s64)
154+
; RV32-NEXT: PseudoRET implicit $f10_d
155+
;
156+
; RV64-LABEL: name: fp_load_no_use_f64
157+
; RV64: liveins: $x10, $f10_d
158+
; RV64-NEXT: {{ $}}
159+
; RV64-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10
160+
; RV64-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d
161+
; RV64-NEXT: [[LOAD:%[0-9]+]]:gprb(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
162+
; RV64-NEXT: $f10_d = COPY [[LOAD]](s64)
163+
; RV64-NEXT: PseudoRET implicit $f10_d
164+
%0:_(p0) = COPY $x10
165+
%1:_(s64) = COPY $f10_d
166+
%2:_(s64) = G_LOAD %0(p0) :: (load (s64))
167+
$f10_d = COPY %2(s64)
168+
PseudoRET implicit $f10_d
169+
170+
...

0 commit comments

Comments
 (0)