Skip to content

Commit df04efa

Browse files
toppercAlexisPerry
authored andcommitted
[RISCV] Add FPR16 regbank and start legalizing f16 operations for Zfh. (llvm#96582)
1 parent e54ae4e commit df04efa

File tree

5 files changed

+802
-19
lines changed

5 files changed

+802
-19
lines changed

llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -371,17 +371,25 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
371371

372372
// FP Operations
373373

374-
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
375-
G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM})
376-
.legalIf(typeIsScalarFPArith(0, ST));
374+
auto &FPArithActions = getActionDefinitionsBuilder(
375+
{G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
376+
G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM})
377+
.legalIf(typeIsScalarFPArith(0, ST));
378+
// TODO: Fold this into typeIsScalarFPArith.
379+
if (ST.hasStdExtZfh())
380+
FPArithActions.legalFor({s16});
377381

378382
getActionDefinitionsBuilder(G_FREM)
379383
.libcallFor({s32, s64})
380384
.minScalar(0, s32)
381385
.scalarize(0);
382386

383-
getActionDefinitionsBuilder(G_FCOPYSIGN)
384-
.legalIf(all(typeIsScalarFPArith(0, ST), typeIsScalarFPArith(1, ST)));
387+
auto &CopySignActions =
388+
getActionDefinitionsBuilder(G_FCOPYSIGN)
389+
.legalIf(all(typeIsScalarFPArith(0, ST), typeIsScalarFPArith(1, ST)));
390+
// TODO: Fold this into typeIsScalarFPArith.
391+
if (ST.hasStdExtZfh())
392+
CopySignActions.legalFor({s16, s16});
385393

386394
getActionDefinitionsBuilder(G_FPTRUNC).legalIf(
387395
[=, &ST](const LegalityQuery &Query) -> bool {

llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ const RegisterBankInfo::PartialMapping PartMappings[] = {
2929
// clang-format off
3030
{0, 32, GPRBRegBank},
3131
{0, 64, GPRBRegBank},
32+
{0, 16, FPRBRegBank},
3233
{0, 32, FPRBRegBank},
3334
{0, 64, FPRBRegBank},
3435
{0, 64, VRBRegBank},
@@ -41,12 +42,13 @@ const RegisterBankInfo::PartialMapping PartMappings[] = {
4142
enum PartialMappingIdx {
4243
PMI_GPRB32 = 0,
4344
PMI_GPRB64 = 1,
44-
PMI_FPRB32 = 2,
45-
PMI_FPRB64 = 3,
46-
PMI_VRB64 = 4,
47-
PMI_VRB128 = 5,
48-
PMI_VRB256 = 6,
49-
PMI_VRB512 = 7,
45+
PMI_FPRB16 = 2,
46+
PMI_FPRB32 = 3,
47+
PMI_FPRB64 = 4,
48+
PMI_VRB64 = 5,
49+
PMI_VRB128 = 6,
50+
PMI_VRB256 = 7,
51+
PMI_VRB512 = 8,
5052
};
5153

5254
const RegisterBankInfo::ValueMapping ValueMappings[] = {
@@ -60,6 +62,10 @@ const RegisterBankInfo::ValueMapping ValueMappings[] = {
6062
{&PartMappings[PMI_GPRB64], 1},
6163
{&PartMappings[PMI_GPRB64], 1},
6264
{&PartMappings[PMI_GPRB64], 1},
65+
// Maximum 3 FPR operands; 16 bit.
66+
{&PartMappings[PMI_FPRB16], 1},
67+
{&PartMappings[PMI_FPRB16], 1},
68+
{&PartMappings[PMI_FPRB16], 1},
6369
// Maximum 3 FPR operands; 32 bit.
6470
{&PartMappings[PMI_FPRB32], 1},
6571
{&PartMappings[PMI_FPRB32], 1},
@@ -90,12 +96,13 @@ enum ValueMappingIdx {
9096
InvalidIdx = 0,
9197
GPRB32Idx = 1,
9298
GPRB64Idx = 4,
93-
FPRB32Idx = 7,
94-
FPRB64Idx = 10,
95-
VRB64Idx = 13,
96-
VRB128Idx = 16,
97-
VRB256Idx = 19,
98-
VRB512Idx = 22,
99+
FPRB16Idx = 7,
100+
FPRB32Idx = 10,
101+
FPRB64Idx = 13,
102+
VRB64Idx = 16,
103+
VRB128Idx = 19,
104+
VRB256Idx = 22,
105+
VRB512Idx = 25,
99106
};
100107
} // namespace RISCV
101108
} // namespace llvm
@@ -151,8 +158,20 @@ RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
151158
}
152159

153160
static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) {
154-
assert(Size == 32 || Size == 64);
155-
unsigned Idx = Size == 64 ? RISCV::FPRB64Idx : RISCV::FPRB32Idx;
161+
unsigned Idx;
162+
switch (Size) {
163+
default:
164+
llvm_unreachable("Unexpected size");
165+
case 16:
166+
Idx = RISCV::FPRB16Idx;
167+
break;
168+
case 32:
169+
Idx = RISCV::FPRB32Idx;
170+
break;
171+
case 64:
172+
Idx = RISCV::FPRB64Idx;
173+
break;
174+
}
156175
return &RISCV::ValueMappings[Idx];
157176
}
158177

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=riscv32 -mattr=+zfh -run-pass=instruction-select \
3+
# RUN: -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
4+
# RUN: llc -mtriple=riscv64 -mattr=+zfh -run-pass=instruction-select \
5+
# RUN: -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
6+
7+
---
8+
name: fadd_f16
9+
legalized: true
10+
regBankSelected: true
11+
tracksRegLiveness: true
12+
body: |
13+
bb.0:
14+
liveins: $f10_h, $f11_h
15+
16+
; CHECK-LABEL: name: fadd_f16
17+
; CHECK: liveins: $f10_h, $f11_h
18+
; CHECK-NEXT: {{ $}}
19+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
20+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
21+
; CHECK-NEXT: [[FADD_H:%[0-9]+]]:fpr16 = nofpexcept FADD_H [[COPY]], [[COPY1]], 7
22+
; CHECK-NEXT: $f10_h = COPY [[FADD_H]]
23+
; CHECK-NEXT: PseudoRET implicit $f10_h
24+
%0:fprb(s16) = COPY $f10_h
25+
%1:fprb(s16) = COPY $f11_h
26+
%2:fprb(s16) = G_FADD %0, %1
27+
$f10_h = COPY %2(s16)
28+
PseudoRET implicit $f10_h
29+
30+
...
31+
---
32+
name: fsub_f16
33+
legalized: true
34+
regBankSelected: true
35+
tracksRegLiveness: true
36+
body: |
37+
bb.0:
38+
liveins: $f10_h, $f11_h
39+
40+
; CHECK-LABEL: name: fsub_f16
41+
; CHECK: liveins: $f10_h, $f11_h
42+
; CHECK-NEXT: {{ $}}
43+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
44+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
45+
; CHECK-NEXT: [[FSUB_H:%[0-9]+]]:fpr16 = nofpexcept FSUB_H [[COPY]], [[COPY1]], 7
46+
; CHECK-NEXT: $f10_h = COPY [[FSUB_H]]
47+
; CHECK-NEXT: PseudoRET implicit $f10_h
48+
%0:fprb(s16) = COPY $f10_h
49+
%1:fprb(s16) = COPY $f11_h
50+
%2:fprb(s16) = G_FSUB %0, %1
51+
$f10_h = COPY %2(s16)
52+
PseudoRET implicit $f10_h
53+
54+
...
55+
---
56+
name: fmul_f16
57+
legalized: true
58+
regBankSelected: true
59+
tracksRegLiveness: true
60+
body: |
61+
bb.0:
62+
liveins: $f10_h, $f11_h
63+
64+
; CHECK-LABEL: name: fmul_f16
65+
; CHECK: liveins: $f10_h, $f11_h
66+
; CHECK-NEXT: {{ $}}
67+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
68+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
69+
; CHECK-NEXT: [[FMUL_H:%[0-9]+]]:fpr16 = nofpexcept FMUL_H [[COPY]], [[COPY1]], 7
70+
; CHECK-NEXT: $f10_h = COPY [[FMUL_H]]
71+
; CHECK-NEXT: PseudoRET implicit $f10_h
72+
%0:fprb(s16) = COPY $f10_h
73+
%1:fprb(s16) = COPY $f11_h
74+
%2:fprb(s16) = G_FMUL %0, %1
75+
$f10_h = COPY %2(s16)
76+
PseudoRET implicit $f10_h
77+
78+
...
79+
---
80+
name: fdiv_f16
81+
legalized: true
82+
regBankSelected: true
83+
tracksRegLiveness: true
84+
body: |
85+
bb.0:
86+
liveins: $f10_h, $f11_h
87+
88+
; CHECK-LABEL: name: fdiv_f16
89+
; CHECK: liveins: $f10_h, $f11_h
90+
; CHECK-NEXT: {{ $}}
91+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
92+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
93+
; CHECK-NEXT: [[FDIV_H:%[0-9]+]]:fpr16 = nofpexcept FDIV_H [[COPY]], [[COPY1]], 7
94+
; CHECK-NEXT: $f10_h = COPY [[FDIV_H]]
95+
; CHECK-NEXT: PseudoRET implicit $f10_h
96+
%0:fprb(s16) = COPY $f10_h
97+
%1:fprb(s16) = COPY $f11_h
98+
%2:fprb(s16) = G_FDIV %0, %1
99+
$f10_h = COPY %2(s16)
100+
PseudoRET implicit $f10_h
101+
102+
...
103+
---
104+
name: fma_f16
105+
legalized: true
106+
regBankSelected: true
107+
tracksRegLiveness: true
108+
body: |
109+
bb.0:
110+
liveins: $f10_h, $f11_h, $f12_h
111+
112+
; CHECK-LABEL: name: fma_f16
113+
; CHECK: liveins: $f10_h, $f11_h, $f12_h
114+
; CHECK-NEXT: {{ $}}
115+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
116+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
117+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $f12_h
118+
; CHECK-NEXT: [[FMADD_H:%[0-9]+]]:fpr16 = nofpexcept FMADD_H [[COPY]], [[COPY1]], [[COPY2]], 7
119+
; CHECK-NEXT: $f10_h = COPY [[FMADD_H]]
120+
; CHECK-NEXT: PseudoRET implicit $f10_h
121+
%0:fprb(s16) = COPY $f10_h
122+
%1:fprb(s16) = COPY $f11_h
123+
%2:fprb(s16) = COPY $f12_h
124+
%3:fprb(s16) = G_FMA %0, %1, %2
125+
$f10_h = COPY %3(s16)
126+
PseudoRET implicit $f10_h
127+
128+
...
129+
---
130+
name: fneg_f16
131+
legalized: true
132+
regBankSelected: true
133+
tracksRegLiveness: true
134+
body: |
135+
bb.0:
136+
liveins: $f10_h, $f11_h, $f12_h
137+
138+
; CHECK-LABEL: name: fneg_f16
139+
; CHECK: liveins: $f10_h, $f11_h, $f12_h
140+
; CHECK-NEXT: {{ $}}
141+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
142+
; CHECK-NEXT: [[FSGNJN_H:%[0-9]+]]:fpr16 = FSGNJN_H [[COPY]], [[COPY]]
143+
; CHECK-NEXT: $f10_h = COPY [[FSGNJN_H]]
144+
; CHECK-NEXT: PseudoRET implicit $f10_h
145+
%0:fprb(s16) = COPY $f10_h
146+
%1:fprb(s16) = G_FNEG %0
147+
$f10_h = COPY %1(s16)
148+
PseudoRET implicit $f10_h
149+
150+
...
151+
---
152+
name: fabs_f16
153+
legalized: true
154+
regBankSelected: true
155+
tracksRegLiveness: true
156+
body: |
157+
bb.0:
158+
liveins: $f10_h, $f11_h, $f12_h
159+
160+
; CHECK-LABEL: name: fabs_f16
161+
; CHECK: liveins: $f10_h, $f11_h, $f12_h
162+
; CHECK-NEXT: {{ $}}
163+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
164+
; CHECK-NEXT: [[FSGNJX_H:%[0-9]+]]:fpr16 = FSGNJX_H [[COPY]], [[COPY]]
165+
; CHECK-NEXT: $f10_h = COPY [[FSGNJX_H]]
166+
; CHECK-NEXT: PseudoRET implicit $f10_h
167+
%0:fprb(s16) = COPY $f10_h
168+
%1:fprb(s16) = G_FABS %0
169+
$f10_h = COPY %1(s16)
170+
PseudoRET implicit $f10_h
171+
172+
...
173+
---
174+
name: fsqrt_f16
175+
legalized: true
176+
regBankSelected: true
177+
tracksRegLiveness: true
178+
body: |
179+
bb.0:
180+
liveins: $f10_h, $f11_h, $f12_h
181+
182+
; CHECK-LABEL: name: fsqrt_f16
183+
; CHECK: liveins: $f10_h, $f11_h, $f12_h
184+
; CHECK-NEXT: {{ $}}
185+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
186+
; CHECK-NEXT: [[FSQRT_H:%[0-9]+]]:fpr16 = nofpexcept FSQRT_H [[COPY]], 7
187+
; CHECK-NEXT: $f10_h = COPY [[FSQRT_H]]
188+
; CHECK-NEXT: PseudoRET implicit $f10_h
189+
%0:fprb(s16) = COPY $f10_h
190+
%1:fprb(s16) = G_FSQRT %0
191+
$f10_h = COPY %1(s16)
192+
PseudoRET implicit $f10_h
193+
194+
...
195+
---
196+
name: fmaxnum_f16
197+
legalized: true
198+
regBankSelected: true
199+
tracksRegLiveness: true
200+
body: |
201+
bb.0:
202+
liveins: $f10_h, $f11_h
203+
204+
; CHECK-LABEL: name: fmaxnum_f16
205+
; CHECK: liveins: $f10_h, $f11_h
206+
; CHECK-NEXT: {{ $}}
207+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
208+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
209+
; CHECK-NEXT: [[FMAX_H:%[0-9]+]]:fpr16 = nofpexcept FMAX_H [[COPY]], [[COPY1]]
210+
; CHECK-NEXT: $f10_h = COPY [[FMAX_H]]
211+
; CHECK-NEXT: PseudoRET implicit $f10_h
212+
%0:fprb(s16) = COPY $f10_h
213+
%1:fprb(s16) = COPY $f11_h
214+
%2:fprb(s16) = G_FMAXNUM %0, %1
215+
$f10_h = COPY %2(s16)
216+
PseudoRET implicit $f10_h
217+
218+
...
219+
---
220+
name: fminnum_f16
221+
legalized: true
222+
regBankSelected: true
223+
tracksRegLiveness: true
224+
body: |
225+
bb.0:
226+
liveins: $f10_h, $f11_h
227+
228+
; CHECK-LABEL: name: fminnum_f16
229+
; CHECK: liveins: $f10_h, $f11_h
230+
; CHECK-NEXT: {{ $}}
231+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
232+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
233+
; CHECK-NEXT: [[FMIN_H:%[0-9]+]]:fpr16 = nofpexcept FMIN_H [[COPY]], [[COPY1]]
234+
; CHECK-NEXT: $f10_h = COPY [[FMIN_H]]
235+
; CHECK-NEXT: PseudoRET implicit $f10_h
236+
%0:fprb(s16) = COPY $f10_h
237+
%1:fprb(s16) = COPY $f11_h
238+
%2:fprb(s16) = G_FMINNUM %0, %1
239+
$f10_h = COPY %2(s16)
240+
PseudoRET implicit $f10_h
241+
242+
...
243+
---
244+
name: fcopysign_f16
245+
legalized: true
246+
regBankSelected: true
247+
tracksRegLiveness: true
248+
body: |
249+
bb.0:
250+
liveins: $f10_h, $f11_h
251+
252+
; CHECK-LABEL: name: fcopysign_f16
253+
; CHECK: liveins: $f10_h, $f11_h
254+
; CHECK-NEXT: {{ $}}
255+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
256+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
257+
; CHECK-NEXT: [[FSGNJ_H:%[0-9]+]]:fpr16 = FSGNJ_H [[COPY]], [[COPY1]]
258+
; CHECK-NEXT: $f10_h = COPY [[FSGNJ_H]]
259+
; CHECK-NEXT: PseudoRET implicit $f10_h
260+
%0:fprb(s16) = COPY $f10_h
261+
%1:fprb(s16) = COPY $f11_h
262+
%2:fprb(s16) = G_FCOPYSIGN %0, %1
263+
$f10_h = COPY %2(s16)
264+
PseudoRET implicit $f10_h
265+
266+
...

0 commit comments

Comments
 (0)