Skip to content

Commit 1c874bb

Browse files
authored
[RISCV] Don't promote f16/bf16 SELECT with Zfhmin/Zfbfmin. (#107138)
Select only needs branches and moves so we don't need to promote it. Promoting would canonicalize NaNs which select shouldn't do.
1 parent d3c10b5 commit 1c874bb

File tree

7 files changed

+208
-350
lines changed

7 files changed

+208
-350
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -434,26 +434,23 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
434434
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
435435

436436
static const unsigned ZfhminZfbfminPromoteOps[] = {
437-
ISD::FMINNUM, ISD::FMAXNUM,
438-
ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM,
439-
ISD::FADD, ISD::FSUB,
440-
ISD::FMUL, ISD::FMA,
441-
ISD::FDIV, ISD::FSQRT,
442-
ISD::STRICT_FMA, ISD::STRICT_FADD,
443-
ISD::STRICT_FSUB, ISD::STRICT_FMUL,
444-
ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
445-
ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
446-
ISD::SETCC, ISD::FCEIL,
447-
ISD::FFLOOR, ISD::FTRUNC,
448-
ISD::FRINT, ISD::FROUND,
449-
ISD::FROUNDEVEN, ISD::SELECT};
437+
ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM,
438+
ISD::FMINIMUMNUM, ISD::FADD, ISD::FSUB,
439+
ISD::FMUL, ISD::FMA, ISD::FDIV,
440+
ISD::FSQRT, ISD::STRICT_FMA, ISD::STRICT_FADD,
441+
ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
442+
ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
443+
ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
444+
ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
445+
ISD::FROUNDEVEN};
450446

451447
if (Subtarget.hasStdExtZfbfmin()) {
452448
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
453449
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
454450
setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom);
455451
setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
456452
setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
453+
setOperationAction(ISD::SELECT, MVT::bf16, Custom);
457454
setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
458455
setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
459456
setOperationAction(ISD::FREM, MVT::bf16, Promote);
@@ -469,7 +466,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
469466
setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
470467
setOperationAction(FPRndMode, MVT::f16,
471468
Subtarget.hasStdExtZfa() ? Legal : Custom);
472-
setOperationAction(ISD::SELECT, MVT::f16, Custom);
473469
setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
474470
} else {
475471
setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
@@ -488,6 +484,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
488484
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
489485
setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
490486
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
487+
setOperationAction(ISD::SELECT, MVT::f16, Custom);
491488
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
492489

493490
setOperationAction(ISD::FNEARBYINT, MVT::f16,

llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,19 @@ def FCVT_S_BF16 : FPUnaryOp_r_frmlegacy<0b0100000, 0b00110, FPR32, FPR16, "fcvt.
3939
//===----------------------------------------------------------------------===//
4040

4141
let Predicates = [HasStdExtZfbfmin] in {
42+
def : Pat<(riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), GPR:$rhs, cond,
43+
(bf16 FPR16:$truev), FPR16:$falsev),
44+
(Select_FPR16_Using_CC_GPR GPR:$lhs, GPR:$rhs,
45+
(IntCCtoRISCVCC $cc), FPR16:$truev, FPR16:$falsev)>;
46+
47+
// Explicitly select 0 in the condition to X0. The register coalescer doesn't
48+
// always do it.
49+
def : Pat<(riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), 0, cond,
50+
(bf16 FPR16:$truev),
51+
FPR16:$falsev),
52+
(Select_FPR16_Using_CC_GPR GPR:$lhs, (XLenVT X0),
53+
(IntCCtoRISCVCC $cc), FPR16:$truev, FPR16:$falsev)>;
54+
4255
/// Loads
4356
def : LdPat<load, FLH, bf16>;
4457

llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -405,18 +405,16 @@ foreach Ext = ZfhExts in {
405405
}
406406

407407
let Predicates = [HasStdExtZfh] in {
408-
defm Select_FPR16 : SelectCC_GPR_rrirr<FPR16, f16>;
409-
410408
def PseudoFROUND_H : PseudoFROUND<FPR16, f16>;
411409
} // Predicates = [HasStdExtZfh]
412410

413411
let Predicates = [HasStdExtZhinx] in {
414-
defm Select_FPR16INX : SelectCC_GPR_rrirr<FPR16INX, f16>;
415-
416412
def PseudoFROUND_H_INX : PseudoFROUND<FPR16INX, f16>;
417413
} // Predicates = [HasStdExtZhinx]
418414

419415
let Predicates = [HasStdExtZfhmin] in {
416+
defm Select_FPR16 : SelectCC_GPR_rrirr<FPR16, f16>;
417+
420418
/// Loads
421419
def : LdPat<load, FLH, f16>;
422420

@@ -425,6 +423,8 @@ def : StPat<store, FSH, FPR16, f16>;
425423
} // Predicates = [HasStdExtZfhmin]
426424

427425
let Predicates = [HasStdExtZhinxmin] in {
426+
defm Select_FPR16INX : SelectCC_GPR_rrirr<FPR16INX, f16>;
427+
428428
/// Loads
429429
def : Pat<(f16 (load (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))),
430430
(COPY_TO_REGCLASS (LH GPR:$rs1, simm12:$imm12), GPRF16)>;

llvm/test/CodeGen/RISCV/bfloat-select-fcmp.ll

Lines changed: 44 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,13 @@ define bfloat @select_fcmp_false(bfloat %a, bfloat %b) nounwind {
1717
define bfloat @select_fcmp_oeq(bfloat %a, bfloat %b) nounwind {
1818
; CHECK-LABEL: select_fcmp_oeq:
1919
; CHECK: # %bb.0:
20-
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
21-
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
22-
; CHECK-NEXT: feq.s a0, fa5, fa4
20+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
21+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
22+
; CHECK-NEXT: feq.s a0, fa4, fa5
2323
; CHECK-NEXT: bnez a0, .LBB1_2
2424
; CHECK-NEXT: # %bb.1:
25-
; CHECK-NEXT: fmv.s fa5, fa4
25+
; CHECK-NEXT: fmv.s fa0, fa1
2626
; CHECK-NEXT: .LBB1_2:
27-
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
2827
; CHECK-NEXT: ret
2928
%1 = fcmp oeq bfloat %a, %b
3029
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -39,9 +38,8 @@ define bfloat @select_fcmp_ogt(bfloat %a, bfloat %b) nounwind {
3938
; CHECK-NEXT: flt.s a0, fa4, fa5
4039
; CHECK-NEXT: bnez a0, .LBB2_2
4140
; CHECK-NEXT: # %bb.1:
42-
; CHECK-NEXT: fmv.s fa5, fa4
41+
; CHECK-NEXT: fmv.s fa0, fa1
4342
; CHECK-NEXT: .LBB2_2:
44-
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
4543
; CHECK-NEXT: ret
4644
%1 = fcmp ogt bfloat %a, %b
4745
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -56,9 +54,8 @@ define bfloat @select_fcmp_oge(bfloat %a, bfloat %b) nounwind {
5654
; CHECK-NEXT: fle.s a0, fa4, fa5
5755
; CHECK-NEXT: bnez a0, .LBB3_2
5856
; CHECK-NEXT: # %bb.1:
59-
; CHECK-NEXT: fmv.s fa5, fa4
57+
; CHECK-NEXT: fmv.s fa0, fa1
6058
; CHECK-NEXT: .LBB3_2:
61-
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
6259
; CHECK-NEXT: ret
6360
%1 = fcmp oge bfloat %a, %b
6461
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -68,14 +65,13 @@ define bfloat @select_fcmp_oge(bfloat %a, bfloat %b) nounwind {
6865
define bfloat @select_fcmp_olt(bfloat %a, bfloat %b) nounwind {
6966
; CHECK-LABEL: select_fcmp_olt:
7067
; CHECK: # %bb.0:
71-
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
72-
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
73-
; CHECK-NEXT: flt.s a0, fa5, fa4
68+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
69+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
70+
; CHECK-NEXT: flt.s a0, fa4, fa5
7471
; CHECK-NEXT: bnez a0, .LBB4_2
7572
; CHECK-NEXT: # %bb.1:
76-
; CHECK-NEXT: fmv.s fa5, fa4
73+
; CHECK-NEXT: fmv.s fa0, fa1
7774
; CHECK-NEXT: .LBB4_2:
78-
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
7975
; CHECK-NEXT: ret
8076
%1 = fcmp olt bfloat %a, %b
8177
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -85,14 +81,13 @@ define bfloat @select_fcmp_olt(bfloat %a, bfloat %b) nounwind {
8581
define bfloat @select_fcmp_ole(bfloat %a, bfloat %b) nounwind {
8682
; CHECK-LABEL: select_fcmp_ole:
8783
; CHECK: # %bb.0:
88-
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
89-
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
90-
; CHECK-NEXT: fle.s a0, fa5, fa4
84+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
85+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
86+
; CHECK-NEXT: fle.s a0, fa4, fa5
9187
; CHECK-NEXT: bnez a0, .LBB5_2
9288
; CHECK-NEXT: # %bb.1:
93-
; CHECK-NEXT: fmv.s fa5, fa4
89+
; CHECK-NEXT: fmv.s fa0, fa1
9490
; CHECK-NEXT: .LBB5_2:
95-
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
9691
; CHECK-NEXT: ret
9792
%1 = fcmp ole bfloat %a, %b
9893
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -102,16 +97,15 @@ define bfloat @select_fcmp_ole(bfloat %a, bfloat %b) nounwind {
10297
define bfloat @select_fcmp_one(bfloat %a, bfloat %b) nounwind {
10398
; CHECK-LABEL: select_fcmp_one:
10499
; CHECK: # %bb.0:
105-
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
106-
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
107-
; CHECK-NEXT: flt.s a0, fa5, fa4
108-
; CHECK-NEXT: flt.s a1, fa4, fa5
100+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
101+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
102+
; CHECK-NEXT: flt.s a0, fa4, fa5
103+
; CHECK-NEXT: flt.s a1, fa5, fa4
109104
; CHECK-NEXT: or a0, a1, a0
110105
; CHECK-NEXT: bnez a0, .LBB6_2
111106
; CHECK-NEXT: # %bb.1:
112-
; CHECK-NEXT: fmv.s fa5, fa4
107+
; CHECK-NEXT: fmv.s fa0, fa1
113108
; CHECK-NEXT: .LBB6_2:
114-
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
115109
; CHECK-NEXT: ret
116110
%1 = fcmp one bfloat %a, %b
117111
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -123,14 +117,13 @@ define bfloat @select_fcmp_ord(bfloat %a, bfloat %b) nounwind {
123117
; CHECK: # %bb.0:
124118
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
125119
; CHECK-NEXT: feq.s a0, fa5, fa5
126-
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
127-
; CHECK-NEXT: feq.s a1, fa4, fa4
120+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
121+
; CHECK-NEXT: feq.s a1, fa5, fa5
128122
; CHECK-NEXT: and a0, a1, a0
129123
; CHECK-NEXT: bnez a0, .LBB7_2
130124
; CHECK-NEXT: # %bb.1:
131-
; CHECK-NEXT: fmv.s fa4, fa5
125+
; CHECK-NEXT: fmv.s fa0, fa1
132126
; CHECK-NEXT: .LBB7_2:
133-
; CHECK-NEXT: fcvt.bf16.s fa0, fa4
134127
; CHECK-NEXT: ret
135128
%1 = fcmp ord bfloat %a, %b
136129
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -140,16 +133,15 @@ define bfloat @select_fcmp_ord(bfloat %a, bfloat %b) nounwind {
140133
define bfloat @select_fcmp_ueq(bfloat %a, bfloat %b) nounwind {
141134
; CHECK-LABEL: select_fcmp_ueq:
142135
; CHECK: # %bb.0:
143-
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
144-
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
145-
; CHECK-NEXT: flt.s a0, fa5, fa4
146-
; CHECK-NEXT: flt.s a1, fa4, fa5
136+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
137+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
138+
; CHECK-NEXT: flt.s a0, fa4, fa5
139+
; CHECK-NEXT: flt.s a1, fa5, fa4
147140
; CHECK-NEXT: or a0, a1, a0
148141
; CHECK-NEXT: beqz a0, .LBB8_2
149142
; CHECK-NEXT: # %bb.1:
150-
; CHECK-NEXT: fmv.s fa5, fa4
143+
; CHECK-NEXT: fmv.s fa0, fa1
151144
; CHECK-NEXT: .LBB8_2:
152-
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
153145
; CHECK-NEXT: ret
154146
%1 = fcmp ueq bfloat %a, %b
155147
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -159,14 +151,13 @@ define bfloat @select_fcmp_ueq(bfloat %a, bfloat %b) nounwind {
159151
define bfloat @select_fcmp_ugt(bfloat %a, bfloat %b) nounwind {
160152
; CHECK-LABEL: select_fcmp_ugt:
161153
; CHECK: # %bb.0:
162-
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
163-
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
164-
; CHECK-NEXT: fle.s a0, fa5, fa4
154+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
155+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
156+
; CHECK-NEXT: fle.s a0, fa4, fa5
165157
; CHECK-NEXT: beqz a0, .LBB9_2
166158
; CHECK-NEXT: # %bb.1:
167-
; CHECK-NEXT: fmv.s fa5, fa4
159+
; CHECK-NEXT: fmv.s fa0, fa1
168160
; CHECK-NEXT: .LBB9_2:
169-
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
170161
; CHECK-NEXT: ret
171162
%1 = fcmp ugt bfloat %a, %b
172163
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -176,14 +167,13 @@ define bfloat @select_fcmp_ugt(bfloat %a, bfloat %b) nounwind {
176167
define bfloat @select_fcmp_uge(bfloat %a, bfloat %b) nounwind {
177168
; CHECK-LABEL: select_fcmp_uge:
178169
; CHECK: # %bb.0:
179-
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
180-
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
181-
; CHECK-NEXT: flt.s a0, fa5, fa4
170+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
171+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
172+
; CHECK-NEXT: flt.s a0, fa4, fa5
182173
; CHECK-NEXT: beqz a0, .LBB10_2
183174
; CHECK-NEXT: # %bb.1:
184-
; CHECK-NEXT: fmv.s fa5, fa4
175+
; CHECK-NEXT: fmv.s fa0, fa1
185176
; CHECK-NEXT: .LBB10_2:
186-
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
187177
; CHECK-NEXT: ret
188178
%1 = fcmp uge bfloat %a, %b
189179
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -198,9 +188,8 @@ define bfloat @select_fcmp_ult(bfloat %a, bfloat %b) nounwind {
198188
; CHECK-NEXT: fle.s a0, fa4, fa5
199189
; CHECK-NEXT: beqz a0, .LBB11_2
200190
; CHECK-NEXT: # %bb.1:
201-
; CHECK-NEXT: fmv.s fa5, fa4
191+
; CHECK-NEXT: fmv.s fa0, fa1
202192
; CHECK-NEXT: .LBB11_2:
203-
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
204193
; CHECK-NEXT: ret
205194
%1 = fcmp ult bfloat %a, %b
206195
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -215,9 +204,8 @@ define bfloat @select_fcmp_ule(bfloat %a, bfloat %b) nounwind {
215204
; CHECK-NEXT: flt.s a0, fa4, fa5
216205
; CHECK-NEXT: beqz a0, .LBB12_2
217206
; CHECK-NEXT: # %bb.1:
218-
; CHECK-NEXT: fmv.s fa5, fa4
207+
; CHECK-NEXT: fmv.s fa0, fa1
219208
; CHECK-NEXT: .LBB12_2:
220-
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
221209
; CHECK-NEXT: ret
222210
%1 = fcmp ule bfloat %a, %b
223211
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -227,14 +215,13 @@ define bfloat @select_fcmp_ule(bfloat %a, bfloat %b) nounwind {
227215
define bfloat @select_fcmp_une(bfloat %a, bfloat %b) nounwind {
228216
; CHECK-LABEL: select_fcmp_une:
229217
; CHECK: # %bb.0:
230-
; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
231-
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
232-
; CHECK-NEXT: feq.s a0, fa5, fa4
218+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
219+
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
220+
; CHECK-NEXT: feq.s a0, fa4, fa5
233221
; CHECK-NEXT: beqz a0, .LBB13_2
234222
; CHECK-NEXT: # %bb.1:
235-
; CHECK-NEXT: fmv.s fa5, fa4
223+
; CHECK-NEXT: fmv.s fa0, fa1
236224
; CHECK-NEXT: .LBB13_2:
237-
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
238225
; CHECK-NEXT: ret
239226
%1 = fcmp une bfloat %a, %b
240227
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -246,14 +233,13 @@ define bfloat @select_fcmp_uno(bfloat %a, bfloat %b) nounwind {
246233
; CHECK: # %bb.0:
247234
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
248235
; CHECK-NEXT: feq.s a0, fa5, fa5
249-
; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
250-
; CHECK-NEXT: feq.s a1, fa4, fa4
236+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
237+
; CHECK-NEXT: feq.s a1, fa5, fa5
251238
; CHECK-NEXT: and a0, a1, a0
252239
; CHECK-NEXT: beqz a0, .LBB14_2
253240
; CHECK-NEXT: # %bb.1:
254-
; CHECK-NEXT: fmv.s fa4, fa5
241+
; CHECK-NEXT: fmv.s fa0, fa1
255242
; CHECK-NEXT: .LBB14_2:
256-
; CHECK-NEXT: fcvt.bf16.s fa0, fa4
257243
; CHECK-NEXT: ret
258244
%1 = fcmp uno bfloat %a, %b
259245
%2 = select i1 %1, bfloat %a, bfloat %b

0 commit comments

Comments
 (0)