-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SelectionDAG][RISCV] Promote VECREDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM} #128800
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This patch also adds the tests for VP_REDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM}, which have been supported for a while.
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-risc-v Author: Jim Lin (tclin914) ChangesThis patch also adds the tests for VP_REDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM}, which have been supported for a while. Patch is 24.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128800.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f56097fdbb51a..5f70588ab84e4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2913,7 +2913,9 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
}
SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) {
- MVT VecVT = Node->getOperand(1).getSimpleValueType();
+ bool IsVPOpcode = ISD::isVPOpcode(Node->getOpcode());
+ MVT VecVT = IsVPOpcode ? Node->getOperand(1).getSimpleValueType()
+ : Node->getOperand(0).getSimpleValueType();
MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
MVT ScalarVT = Node->getSimpleValueType(0);
MVT NewScalarVT = NewVecVT.getVectorElementType();
@@ -2921,16 +2923,13 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) {
SDLoc DL(Node);
SmallVector<SDValue, 4> Operands(Node->getNumOperands());
- // promote the initial value.
// FIXME: Support integer.
assert(Node->getOperand(0).getValueType().isFloatingPoint() &&
"Only FP promotion is supported");
- Operands[0] =
- DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0));
- for (unsigned j = 1; j != Node->getNumOperands(); ++j)
+ for (unsigned j = 0; j != Node->getNumOperands(); ++j)
if (Node->getOperand(j).getValueType().isVector() &&
- !(ISD::isVPOpcode(Node->getOpcode()) &&
+ !(IsVPOpcode &&
ISD::getVPMaskIdx(Node->getOpcode()) == j)) { // Skip mask operand.
// promote the vector operand.
// FIXME: Support integer.
@@ -2938,6 +2937,10 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) {
"Only FP promotion is supported");
Operands[j] =
DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
+ } else if (Node->getOperand(j).getValueType().isFloatingPoint()) {
+ // prmote the initial value.
+ Operands[j] =
+ DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(j));
} else {
Operands[j] = Node->getOperand(j); // Skip VL operand.
}
@@ -5049,7 +5052,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Node->getOpcode() == ISD::SINT_TO_FP ||
Node->getOpcode() == ISD::SETCC ||
Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ Node->getOpcode() == ISD::INSERT_VECTOR_ELT ||
+ Node->getOpcode() == ISD::VECREDUCE_FMAX ||
+ Node->getOpcode() == ISD::VECREDUCE_FMIN ||
+ Node->getOpcode() == ISD::VECREDUCE_FMAXIMUM ||
+ Node->getOpcode() == ISD::VECREDUCE_FMINIMUM) {
OVT = Node->getOperand(0).getSimpleValueType();
}
if (Node->getOpcode() == ISD::ATOMIC_STORE ||
@@ -5796,6 +5803,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
break;
}
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
case ISD::VP_REDUCE_FMAX:
case ISD::VP_REDUCE_FMIN:
case ISD::VP_REDUCE_FMAXIMUM:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index de4447fb0cf1a..13345b76e7e92 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -503,13 +503,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
+ case ISD::VECTOR_FIND_LAST_ACTIVE:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
- case ISD::VECTOR_FIND_LAST_ACTIVE:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
+ // Defer non-vector results to LegalizeDAG.
+ if (Action == TargetLowering::Promote)
+ Action = TargetLowering::Legal;
break;
case ISD::VECREDUCE_SEQ_FADD:
case ISD::VECREDUCE_SEQ_FMUL:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6076fe56416ad..759cf531b74b1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -959,13 +959,35 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// TODO: support more ops.
static const unsigned ZvfhminZvfbfminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
- ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND,
- ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS,
- ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD,
- ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
- ISD::STRICT_FMA};
+ ISD::FMINNUM,
+ ISD::FMAXNUM,
+ ISD::FADD,
+ ISD::FSUB,
+ ISD::FMUL,
+ ISD::FMA,
+ ISD::FDIV,
+ ISD::FSQRT,
+ ISD::FCEIL,
+ ISD::FTRUNC,
+ ISD::FFLOOR,
+ ISD::FROUND,
+ ISD::FROUNDEVEN,
+ ISD::FRINT,
+ ISD::FNEARBYINT,
+ ISD::IS_FPCLASS,
+ ISD::SETCC,
+ ISD::FMAXIMUM,
+ ISD::FMINIMUM,
+ ISD::STRICT_FADD,
+ ISD::STRICT_FSUB,
+ ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV,
+ ISD::STRICT_FSQRT,
+ ISD::STRICT_FMA,
+ ISD::VECREDUCE_FMIN,
+ ISD::VECREDUCE_FMAX,
+ ISD::VECREDUCE_FMINIMUM,
+ ISD::VECREDUCE_FMAXIMUM};
// TODO: support more vp ops.
static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll
new file mode 100644
index 0000000000000..e269b13137d44
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll
@@ -0,0 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
+
+define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmin_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmin_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmax_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmax_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmin_nnan_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmin_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmin_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmax_nnan_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmax_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmax_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fminimum_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fminimum_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vmfne.vv v9, v8, v8
+; ZVFH-NEXT: vcpop.m a0, v9
+; ZVFH-NEXT: beqz a0, .LBB4_2
+; ZVFH-NEXT: # %bb.1:
+; ZVFH-NEXT: lui a0, %hi(.LCPI4_0)
+; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0)
+; ZVFH-NEXT: ret
+; ZVFH-NEXT: .LBB4_2:
+; ZVFH-NEXT: vfredmin.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fminimum_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10
+; ZVFHMIN-NEXT: vcpop.m a0, v8
+; ZVFHMIN-NEXT: beqz a0, .LBB4_2
+; ZVFHMIN-NEXT: # %bb.1:
+; ZVFHMIN-NEXT: lui a0, 523264
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+; ZVFHMIN-NEXT: .LBB4_2:
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmaximum_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmaximum_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vmfne.vv v9, v8, v8
+; ZVFH-NEXT: vcpop.m a0, v9
+; ZVFH-NEXT: beqz a0, .LBB5_2
+; ZVFH-NEXT: # %bb.1:
+; ZVFH-NEXT: lui a0, %hi(.LCPI5_0)
+; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0)
+; ZVFH-NEXT: ret
+; ZVFH-NEXT: .LBB5_2:
+; ZVFH-NEXT: vfredmax.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmaximum_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10
+; ZVFHMIN-NEXT: vcpop.m a0, v8
+; ZVFHMIN-NEXT: beqz a0, .LBB5_2
+; ZVFHMIN-NEXT: # %bb.1:
+; ZVFHMIN-NEXT: lui a0, 523264
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+; ZVFHMIN-NEXT: .LBB5_2:
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fminimum_nnan_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fminimum_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fminimum_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmaximum_nnan_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmaximum_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmaximum_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll
new file mode 100644
index 0000000000000..8993bf8a767d8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll
@@ -0,0 +1,269 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
+
+define half @vpreduce_fmin_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmin_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmin_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vp.reduce.fmin.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fmax_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmax_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmax_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vp.reduce.fmax.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fmin_nnan_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmin_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmin_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vp.reduce.fmin.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fmax_nnan_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmax_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmax_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vp.reduce.fmax.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fminimum_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fminimum_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vmfne.vv v9, v8, v8, v0.t
+; ZVFH-NEXT: fcvt.s.h fa5, fa0
+; ZVFH-NEXT: vcpop.m a1, v9, v0.t
+; ZVFH-NEXT: feq.s a2, fa5, fa5
+; ZVFH-NEXT: xori a2, a2, 1
+; ZVFH-NEXT: or a1, a1, a2
+; ZVFH-NEXT: beqz a1, .LBB4_2
+; ZVFH-NEXT: # %bb.1:
+; ZVFH-NEXT: lui a0, %hi(.LCPI4_0)
+; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0)
+; ZVFH-NEXT: ret
+; ZVFH-NEXT: .LBB4_2:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fminimum_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli zero, zer...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff a565f9eb2997ab1614cad326b93ab21810e39f32 d1cff55fa333b8ddf947ed7668e2a99727e7b6a4 --extensions cpp -- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp llvm/lib/Target/RISCV/RISCVISelLowering.cpp View the diff from clang-format here.diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 2b8818482a..86f5337219 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -5049,8 +5049,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
SmallVector<SDValue, 8> Results;
MVT OVT = Node->getSimpleValueType(0);
if (Node->getOpcode() == ISD::UINT_TO_FP ||
- Node->getOpcode() == ISD::SINT_TO_FP ||
- Node->getOpcode() == ISD::SETCC ||
+ Node->getOpcode() == ISD::SINT_TO_FP || Node->getOpcode() == ISD::SETCC ||
Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
Node->getOpcode() == ISD::INSERT_VECTOR_ELT ||
Node->getOpcode() == ISD::VECREDUCE_FMAX ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, this makes sense to me
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ | ||
; RUN: -verify-machineinstrs < %s | FileCheck %s | ||
|
||
define bfloat @vreduce_fmin_nxv4f16(<vscale x 4 x bfloat> %val) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor nit,
define bfloat @vreduce_fmin_nxv4f16(<vscale x 4 x bfloat> %val) { | |
define bfloat @vreduce_fmin_nxv4bf16(<vscale x 4 x bfloat> %val) { |
etc.
…llvm#128800) This patch also adds the tests for VP_REDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM}, which have been supported for a while.
…ith f16/bf16 type for zvfhmin/zvfbfmin This PR enable scalable loop vectorization for fmax and fmin reductions with f16/bf16 type when only zvfhmin/zvfbfmin are enabled. After llvm#128800, we can promote the fmax/fmin reductions with f16/bf16 type to f32 reductions for zvfhmin/zvfbfmin.
…eductions with f16/bf16 type for zvfhmin/zvfbfmin (#129629) This PR enable scalable loop vectorization for fmax and fmin reductions with f16/bf16 type when only zvfhmin/zvfbfmin are enabled. After llvm/llvm-project#128800, we can promote the fmax/fmin reductions with f16/bf16 type to f32 reductions for zvfhmin/zvfbfmin.
…ith f16/bf16 type for zvfhmin/zvfbfmin (llvm#129629) This PR enable scalable loop vectorization for fmax and fmin reductions with f16/bf16 type when only zvfhmin/zvfbfmin are enabled. After llvm#128800, we can promote the fmax/fmin reductions with f16/bf16 type to f32 reductions for zvfhmin/zvfbfmin.
This patch also adds the tests for VP_REDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM}, which have been supported for a while.