-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Don't promote f16/bf16 SELECT with Zfhmin/Zfbfmin. #107138
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Select only needs branches and moves. We don't need to promote it. Promoting would canonicalize NaNs which select shouldn't do.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesSelect only needs branches and moves. We don't need to promote it. Promoting would canonicalize NaNs which select shouldn't do. Patch is 54.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/107138.diff 7 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f50d378ed97aa6..aba8d3edeb0764 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -434,19 +434,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
static const unsigned ZfhminZfbfminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM,
- ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM,
- ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FMA,
- ISD::FDIV, ISD::FSQRT,
- ISD::STRICT_FMA, ISD::STRICT_FADD,
- ISD::STRICT_FSUB, ISD::STRICT_FMUL,
- ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
- ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
- ISD::SETCC, ISD::FCEIL,
- ISD::FFLOOR, ISD::FTRUNC,
- ISD::FRINT, ISD::FROUND,
- ISD::FROUNDEVEN, ISD::SELECT};
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM,
+ ISD::FMINIMUMNUM, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FMA, ISD::FDIV,
+ ISD::FSQRT, ISD::STRICT_FMA, ISD::STRICT_FADD,
+ ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
+ ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
+ ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
+ ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
+ ISD::FROUNDEVEN};
if (Subtarget.hasStdExtZfbfmin()) {
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
@@ -454,6 +450,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom);
setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
+ setOperationAction(ISD::SELECT, MVT::bf16, Custom);
setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
setOperationAction(ISD::FREM, MVT::bf16, Promote);
@@ -467,7 +464,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
setOperationAction(FPRndMode, MVT::f16,
Subtarget.hasStdExtZfa() ? Legal : Custom);
- setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
} else {
setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
@@ -484,6 +480,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
+ setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f16,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
index 9028d31d08655b..cb26d55629f870 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
@@ -39,6 +39,19 @@ def FCVT_S_BF16 : FPUnaryOp_r_frmlegacy<0b0100000, 0b00110, FPR32, FPR16, "fcvt.
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtZfbfmin] in {
+def : Pat<(riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), GPR:$rhs, cond,
+ (bf16 FPR16:$truev), FPR16:$falsev),
+ (Select_FPR16_Using_CC_GPR GPR:$lhs, GPR:$rhs,
+ (IntCCtoRISCVCC $cc), FPR16:$truev, FPR16:$falsev)>;
+
+// Explicitly select 0 in the condition to X0. The register coalescer doesn't
+// always do it.
+def : Pat<(riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), 0, cond,
+ (bf16 FPR16:$truev),
+ FPR16:$falsev),
+ (Select_FPR16_Using_CC_GPR GPR:$lhs, (XLenVT X0),
+ (IntCCtoRISCVCC $cc), FPR16:$truev, FPR16:$falsev)>;
+
/// Loads
def : LdPat<load, FLH, bf16>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index d60cf33567d6d0..760045be38edf4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -405,18 +405,16 @@ foreach Ext = ZfhExts in {
}
let Predicates = [HasStdExtZfh] in {
-defm Select_FPR16 : SelectCC_GPR_rrirr<FPR16, f16>;
-
def PseudoFROUND_H : PseudoFROUND<FPR16, f16>;
} // Predicates = [HasStdExtZfh]
let Predicates = [HasStdExtZhinx] in {
-defm Select_FPR16INX : SelectCC_GPR_rrirr<FPR16INX, f16>;
-
def PseudoFROUND_H_INX : PseudoFROUND<FPR16INX, f16>;
} // Predicates = [HasStdExtZhinx]
let Predicates = [HasStdExtZfhmin] in {
+defm Select_FPR16 : SelectCC_GPR_rrirr<FPR16, f16>;
+
/// Loads
def : LdPat<load, FLH, f16>;
@@ -425,6 +423,8 @@ def : StPat<store, FSH, FPR16, f16>;
} // Predicates = [HasStdExtZfhmin]
let Predicates = [HasStdExtZhinxmin] in {
+defm Select_FPR16INX : SelectCC_GPR_rrirr<FPR16INX, f16>;
+
/// Loads
def : Pat<(f16 (load (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))),
(COPY_TO_REGCLASS (LH GPR:$rs1, simm12:$imm12), GPRF16)>;
diff --git a/llvm/test/CodeGen/RISCV/bfloat-select-fcmp.ll b/llvm/test/CodeGen/RISCV/bfloat-select-fcmp.ll
index 67acc9a772e357..2f7830c9c9d8ae 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-select-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-select-fcmp.ll
@@ -17,14 +17,13 @@ define bfloat @select_fcmp_false(bfloat %a, bfloat %b) nounwind {
define bfloat @select_fcmp_oeq(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: select_fcmp_oeq:
; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: feq.s a0, fa5, fa4
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: feq.s a0, fa4, fa5
; CHECK-NEXT: bnez a0, .LBB1_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa5, fa4
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB1_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = fcmp oeq bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -39,9 +38,8 @@ define bfloat @select_fcmp_ogt(bfloat %a, bfloat %b) nounwind {
; CHECK-NEXT: flt.s a0, fa4, fa5
; CHECK-NEXT: bnez a0, .LBB2_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa5, fa4
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB2_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = fcmp ogt bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -56,9 +54,8 @@ define bfloat @select_fcmp_oge(bfloat %a, bfloat %b) nounwind {
; CHECK-NEXT: fle.s a0, fa4, fa5
; CHECK-NEXT: bnez a0, .LBB3_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa5, fa4
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB3_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = fcmp oge bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -68,14 +65,13 @@ define bfloat @select_fcmp_oge(bfloat %a, bfloat %b) nounwind {
define bfloat @select_fcmp_olt(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: select_fcmp_olt:
; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: flt.s a0, fa5, fa4
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: flt.s a0, fa4, fa5
; CHECK-NEXT: bnez a0, .LBB4_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa5, fa4
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB4_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = fcmp olt bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -85,14 +81,13 @@ define bfloat @select_fcmp_olt(bfloat %a, bfloat %b) nounwind {
define bfloat @select_fcmp_ole(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: select_fcmp_ole:
; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fle.s a0, fa5, fa4
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: fle.s a0, fa4, fa5
; CHECK-NEXT: bnez a0, .LBB5_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa5, fa4
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB5_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = fcmp ole bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -102,16 +97,15 @@ define bfloat @select_fcmp_ole(bfloat %a, bfloat %b) nounwind {
define bfloat @select_fcmp_one(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: select_fcmp_one:
; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: flt.s a0, fa5, fa4
-; CHECK-NEXT: flt.s a1, fa4, fa5
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: flt.s a0, fa4, fa5
+; CHECK-NEXT: flt.s a1, fa5, fa4
; CHECK-NEXT: or a0, a1, a0
; CHECK-NEXT: bnez a0, .LBB6_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa5, fa4
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB6_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = fcmp one bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -123,14 +117,13 @@ define bfloat @select_fcmp_ord(bfloat %a, bfloat %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
; CHECK-NEXT: feq.s a0, fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT: feq.s a1, fa4, fa4
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: feq.s a1, fa5, fa5
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: bnez a0, .LBB7_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa4, fa5
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB7_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa4
; CHECK-NEXT: ret
%1 = fcmp ord bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -140,16 +133,15 @@ define bfloat @select_fcmp_ord(bfloat %a, bfloat %b) nounwind {
define bfloat @select_fcmp_ueq(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: select_fcmp_ueq:
; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: flt.s a0, fa5, fa4
-; CHECK-NEXT: flt.s a1, fa4, fa5
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: flt.s a0, fa4, fa5
+; CHECK-NEXT: flt.s a1, fa5, fa4
; CHECK-NEXT: or a0, a1, a0
; CHECK-NEXT: beqz a0, .LBB8_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa5, fa4
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB8_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = fcmp ueq bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -159,14 +151,13 @@ define bfloat @select_fcmp_ueq(bfloat %a, bfloat %b) nounwind {
define bfloat @select_fcmp_ugt(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: select_fcmp_ugt:
; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fle.s a0, fa5, fa4
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: fle.s a0, fa4, fa5
; CHECK-NEXT: beqz a0, .LBB9_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa5, fa4
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB9_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = fcmp ugt bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -176,14 +167,13 @@ define bfloat @select_fcmp_ugt(bfloat %a, bfloat %b) nounwind {
define bfloat @select_fcmp_uge(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: select_fcmp_uge:
; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: flt.s a0, fa5, fa4
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: flt.s a0, fa4, fa5
; CHECK-NEXT: beqz a0, .LBB10_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa5, fa4
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB10_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = fcmp uge bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -198,9 +188,8 @@ define bfloat @select_fcmp_ult(bfloat %a, bfloat %b) nounwind {
; CHECK-NEXT: fle.s a0, fa4, fa5
; CHECK-NEXT: beqz a0, .LBB11_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa5, fa4
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB11_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = fcmp ult bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -215,9 +204,8 @@ define bfloat @select_fcmp_ule(bfloat %a, bfloat %b) nounwind {
; CHECK-NEXT: flt.s a0, fa4, fa5
; CHECK-NEXT: beqz a0, .LBB12_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa5, fa4
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB12_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = fcmp ule bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -227,14 +215,13 @@ define bfloat @select_fcmp_ule(bfloat %a, bfloat %b) nounwind {
define bfloat @select_fcmp_une(bfloat %a, bfloat %b) nounwind {
; CHECK-LABEL: select_fcmp_une:
; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: feq.s a0, fa5, fa4
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
+; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
+; CHECK-NEXT: feq.s a0, fa4, fa5
; CHECK-NEXT: beqz a0, .LBB13_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa5, fa4
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB13_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = fcmp une bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
@@ -246,14 +233,13 @@ define bfloat @select_fcmp_uno(bfloat %a, bfloat %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
; CHECK-NEXT: feq.s a0, fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT: feq.s a1, fa4, fa4
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: feq.s a1, fa5, fa5
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: beqz a0, .LBB14_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fmv.s fa4, fa5
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB14_2:
-; CHECK-NEXT: fcvt.bf16.s fa0, fa4
; CHECK-NEXT: ret
%1 = fcmp uno bfloat %a, %b
%2 = select i1 %1, bfloat %a, bfloat %b
diff --git a/llvm/test/CodeGen/RISCV/bfloat-select-icmp.ll b/llvm/test/CodeGen/RISCV/bfloat-select-icmp.ll
index e802d53edfdedf..670218cacf0fe9 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-select-icmp.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-select-icmp.ll
@@ -1,20 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin -verify-machineinstrs \
-; RUN: -target-abi ilp32f < %s | FileCheck %s
+; RUN: -target-abi ilp32f < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+zfbfmin -verify-machineinstrs \
-; RUN: -target-abi lp64f < %s | FileCheck %s
+; RUN: -target-abi lp64f < %s | FileCheck %s --check-prefixes=CHECK,RV64
define bfloat @select_icmp_eq(i32 signext %a, i32 signext %b, bfloat %c, bfloat %d) {
; CHECK-LABEL: select_icmp_eq:
; CHECK: # %bb.0:
; CHECK-NEXT: beq a0, a1, .LBB0_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB0_2:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = icmp eq i32 %a, %b
%2 = select i1 %1, bfloat %c, bfloat %d
@@ -26,12 +22,8 @@ define bfloat @select_icmp_ne(i32 signext %a, i32 signext %b, bfloat %c, bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: bne a0, a1, .LBB1_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB1_2:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = icmp ne i32 %a, %b
%2 = select i1 %1, bfloat %c, bfloat %d
@@ -43,12 +35,8 @@ define bfloat @select_icmp_ugt(i32 signext %a, i32 signext %b, bfloat %c, bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: bltu a1, a0, .LBB2_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB2_2:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = icmp ugt i32 %a, %b
%2 = select i1 %1, bfloat %c, bfloat %d
@@ -60,12 +48,8 @@ define bfloat @select_icmp_uge(i32 signext %a, i32 signext %b, bfloat %c, bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: bgeu a0, a1, .LBB3_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB3_2:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = icmp uge i32 %a, %b
%2 = select i1 %1, bfloat %c, bfloat %d
@@ -77,12 +61,8 @@ define bfloat @select_icmp_ult(i32 signext %a, i32 signext %b, bfloat %c, bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: bltu a0, a1, .LBB4_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB4_2:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = icmp ult i32 %a, %b
%2 = select i1 %1, bfloat %c, bfloat %d
@@ -94,12 +74,8 @@ define bfloat @select_icmp_ule(i32 signext %a, i32 signext %b, bfloat %c, bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: bgeu a1, a0, .LBB5_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB5_2:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = icmp ule i32 %a, %b
%2 = select i1 %1, bfloat %c, bfloat %d
@@ -111,12 +87,8 @@ define bfloat @select_icmp_sgt(i32 signext %a, i32 signext %b, bfloat %c, bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: blt a1, a0, .LBB6_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB6_2:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = icmp sgt i32 %a, %b
%2 = select i1 %1, bfloat %c, bfloat %d
@@ -128,12 +100,8 @@ define bfloat @select_icmp_sge(i32 signext %a, i32 signext %b, bfloat %c, bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: bge a0, a1, .LBB7_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB7_2:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = icmp sge i32 %a, %b
%2 = select i1 %1, bfloat %c, bfloat %d
@@ -145,12 +113,8 @@ define bfloat @select_icmp_slt(i32 signext %a, i32 signext %b, bfloat %c, bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: blt a0, a1, .LBB8_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB8_2:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = icmp slt i32 %a, %b
%2 = select i1 %1, bfloat %c, bfloat %d
@@ -162,12 +126,8 @@ define bfloat @select_icmp_sle(i32 signext %a, i32 signext %b, bfloat %c, bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: bge a1, a0, .LBB9_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; CHECK-NEXT: fmv.s fa0, fa1
; CHECK-NEXT: .LBB9_2:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: ret
%1 = icmp...
[truncated]
|
@@ -39,6 +39,19 @@ def FCVT_S_BF16 : FPUnaryOp_r_frmlegacy<0b0100000, 0b00110, FPR32, FPR16, "fcvt. | |||
//===----------------------------------------------------------------------===// | |||
|
|||
let Predicates = [HasStdExtZfbfmin] in { | |||
def : Pat<(riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), GPR:$rhs, cond, | |||
(bf16 FPR16:$truev), FPR16:$falsev), | |||
(Select_FPR16_Using_CC_GPR GPR:$lhs, GPR:$rhs, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is using the same pseudo instruction as Zfhmin/Zfh since the register class is the same. That requires duplicating the isel patterns normally created by SelectCC_GPR_rrirr
.
Select only needs branches and moves. We don't need to promote it. Promoting would canonicalize NaNs which select shouldn't do.