-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Support STRICT_UINT_TO_FP and STRICT_SINT_TO_FP #102503
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
ac36c4f
to
1776f6c
Compare
7807c1d
to
a9386dc
Compare
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-selectiondag Author: Mikhail R. Gadelha (mikhailramalho) ChangesThis patch adds support for the missing STRICT_UINT_TO_FP and The code is mostly in line with how other strict_* nodes are handled The only difference is the call to DAG.ReplaceAllUsesOfValueWith, to To understand why this is needed, let us consider the following program: Result type 0 illegal: t4: i16 = truncate t3 The chain of the original strict conversion was still being used elsewhere i16 = truncate t3 which is an illegal return type in rv32 without zfhmin or zfh. This was not a problem for the non-strict version of the nodes (i.e., The DAG.ReplaceAllUsesOfValueWith replaces the usage of the STRICT_UINT_TO_FP Patch is 32.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/102503.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index b5c80005a0ecc1..09c2c20fb8a2e2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2385,32 +2385,26 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
//
static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
- if (OpVT == MVT::f16) {
+ if (OpVT == MVT::f16)
return ISD::FP16_TO_FP;
- } else if (RetVT == MVT::f16) {
+ if (RetVT == MVT::f16)
return ISD::FP_TO_FP16;
- } else if (OpVT == MVT::bf16) {
+ if (OpVT == MVT::bf16)
return ISD::BF16_TO_FP;
- } else if (RetVT == MVT::bf16) {
+ if (RetVT == MVT::bf16)
return ISD::FP_TO_BF16;
- }
-
report_fatal_error("Attempt at an invalid promotion-related conversion");
}
static ISD::NodeType GetPromotionOpcodeStrict(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f16)
return ISD::STRICT_FP16_TO_FP;
-
if (RetVT == MVT::f16)
return ISD::STRICT_FP_TO_FP16;
-
if (OpVT == MVT::bf16)
return ISD::STRICT_BF16_TO_FP;
-
if (RetVT == MVT::bf16)
return ISD::STRICT_FP_TO_BF16;
-
report_fatal_error("Attempt at an invalid promotion-related conversion");
}
@@ -3138,6 +3132,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
break;
case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break;
case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = SoftPromoteHalfRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftPromoteHalfRes_UNDEF(N); break;
@@ -3288,19 +3284,13 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FFREXP(SDNode *N) {
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
- EVT SVT = N->getOperand(0).getValueType();
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ EVT SVT = Op.getValueType();
- if (N->isStrictFPOpcode()) {
- // FIXME: assume we only have two f16 variants for now.
- unsigned Opcode;
- if (RVT == MVT::f16)
- Opcode = ISD::STRICT_FP_TO_FP16;
- else if (RVT == MVT::bf16)
- Opcode = ISD::STRICT_FP_TO_BF16;
- else
- llvm_unreachable("unknown half type");
- SDValue Res = DAG.getNode(Opcode, SDLoc(N), {MVT::i16, MVT::Other},
- {N->getOperand(0), N->getOperand(1)});
+ if (IsStrict) {
+ SDValue Res = DAG.getNode(GetPromotionOpcodeStrict(SVT, RVT), SDLoc(N),
+ {MVT::i16, MVT::Other}, {N->getOperand(0), Op});
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
@@ -3359,6 +3349,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_XINT_TO_FP(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDLoc dl(N);
+ if (N->isStrictFPOpcode()) {
+ SDValue Op = DAG.getNode(N->getOpcode(), dl, {NVT, MVT::Other},
+ {N->getOperand(0), N->getOperand(1)});
+ Op = DAG.getNode(GetPromotionOpcodeStrict(NVT, OVT), dl,
+ {MVT::i16, MVT::Other}, {N->getOperand(0), Op});
+ ReplaceValueWith(SDValue(N, 1), Op.getValue(1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Op.getValue(1));
+ return Op;
+ }
+
SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
// Round the value to the softened type.
@@ -3447,6 +3447,8 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
Res = SoftPromoteHalfOp_FAKE_USE(N, OpNo);
break;
case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
case ISD::FP_TO_SINT_SAT:
@@ -3473,7 +3475,7 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
assert(Res.getNode() != N && "Expected a new node!");
- assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ assert(Res.getValueType() == N->getValueType(0) &&
"Invalid operand expansion");
ReplaceValueWith(SDValue(N, 0), Res);
@@ -3517,16 +3519,8 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0));
if (IsStrict) {
- unsigned Opcode;
- if (SVT == MVT::f16)
- Opcode = ISD::STRICT_FP16_TO_FP;
- else if (SVT == MVT::bf16)
- Opcode = ISD::STRICT_BF16_TO_FP;
- else
- llvm_unreachable("unknown half type");
- SDValue Res =
- DAG.getNode(Opcode, SDLoc(N), {N->getValueType(0), MVT::Other},
- {N->getOperand(0), Op});
+ SDValue Res = DAG.getNode(GetPromotionOpcodeStrict(SVT, RVT), SDLoc(N),
+ {RVT, MVT::Other}, {N->getOperand(0), Op});
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
ReplaceValueWith(SDValue(N, 0), Res);
return SDValue();
@@ -3537,17 +3531,26 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
EVT RVT = N->getValueType(0);
- SDValue Op = N->getOperand(0);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT SVT = Op.getValueType();
SDLoc dl(N);
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
-
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), SVT);
Op = GetSoftPromotedHalf(Op);
- SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op);
+ if (IsStrict) {
+ Op = DAG.getNode(GetPromotionOpcodeStrict(SVT, RVT), dl, {NVT, MVT::Other},
+ {N->getOperand(0), Op});
+ Op = DAG.getNode(N->getOpcode(), dl, {RVT, MVT::Other},
+ {N->getOperand(0), Op});
+ ReplaceValueWith(SDValue(N, 1), Op.getValue(1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Op.getValue(1));
+ return Op;
+ }
- return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res);
+ SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op);
+ return DAG.getNode(N->getOpcode(), dl, RVT, Res);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a1cb74f43e6050..044ac49224b5c5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -3952,12 +3952,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT(SDNode *N, SDValue &Lo,
Op = GetPromotedFloat(Op);
if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
- EVT OFPVT = Op.getValueType();
- EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), OFPVT);
- Op = GetSoftPromotedHalf(Op);
- Op = DAG.getNode(OFPVT == MVT::f16 ? ISD::FP16_TO_FP : ISD::BF16_TO_FP, dl,
- NFPVT, Op);
- Op = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, VT, Op);
+ Op = SoftPromoteHalfOp_FP_TO_XINT(N);
SplitInteger(Op, Lo, Hi);
return;
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d400b2ea1ca2ca..fae2fa102d2992 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -529,6 +529,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Subtarget.isSoftFPABI() ? LibCall : Custom);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);
if (Subtarget.hasStdExtZfa()) {
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
@@ -577,6 +579,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Subtarget.isSoftFPABI() ? LibCall : Custom);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
}
if (Subtarget.is64Bit()) {
@@ -6851,30 +6855,35 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
return Res;
}
+ case ISD::STRICT_FP_TO_FP16:
case ISD::FP_TO_FP16: {
// Custom lower to ensure the libcall return is passed in an FPR on hard
// float ABIs.
assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
SDLoc DL(Op);
MakeLibCallOptions CallOptions;
- RTLIB::Libcall LC =
- RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
- SDValue Res =
- makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
+ SDValue Res = makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL).first;
if (Subtarget.is64Bit())
return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
- return DAG.getBitcast(MVT::i32, Res);
+ SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
+ if (IsStrict)
+ return DAG.getMergeValues({Result, Op.getOperand(0)}, DL);
+ return Result;
}
+ case ISD::STRICT_FP16_TO_FP:
case ISD::FP16_TO_FP: {
// Custom lower to ensure the libcall argument is passed in an FPR on hard
// float ABIs.
assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
SDLoc DL(Op);
MakeLibCallOptions CallOptions;
+ SDValue Op0 = Op->isStrictFPOpcode() ? Op.getOperand(1) : Op.getOperand(0);
SDValue Arg = Subtarget.is64Bit()
- ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
- Op.getOperand(0))
- : DAG.getBitcast(MVT::f32, Op.getOperand(0));
+ ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
+ : DAG.getBitcast(MVT::f32, Op0);
SDValue Res =
makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
.first;
diff --git a/llvm/test/CodeGen/RISCV/half-convert-strict.ll b/llvm/test/CodeGen/RISCV/half-convert-strict.ll
index 6bd3ef775609ec..17164e8da562a5 100644
--- a/llvm/test/CodeGen/RISCV/half-convert-strict.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert-strict.ll
@@ -47,6 +47,9 @@
; RUN: llc -mtriple=riscv64 -mattr=+zdinx,+zhinxmin -verify-machineinstrs \
; RUN: -target-abi lp64 -disable-strictnode-mutation < %s \
; RUN: | FileCheck -check-prefixes=CHECK64-IZDINXZHINXMIN %s
+; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs \
+; RUN: -target-abi ilp32d -disable-strictnode-mutation < %s \
+; RUN: | FileCheck -check-prefixes=CHECK32-D %s
; NOTE: The rounding mode metadata does not effect which instruction is
; selected. Dynamic rounding mode is always used for operations that
@@ -128,6 +131,20 @@ define i16 @fcvt_si_h(half %a) nounwind strictfp {
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.s.h a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.l.s a0, a0, rtz
; CHECK64-IZDINXZHINXMIN-NEXT: ret
+;
+; CHECK32-D-LABEL: fcvt_si_h:
+; CHECK32-D: # %bb.0:
+; CHECK32-D-NEXT: addi sp, sp, -16
+; CHECK32-D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK32-D-NEXT: fmv.x.w a0, fa0
+; CHECK32-D-NEXT: slli a0, a0, 16
+; CHECK32-D-NEXT: srli a0, a0, 16
+; CHECK32-D-NEXT: fmv.w.x fa0, a0
+; CHECK32-D-NEXT: call __extendhfsf2
+; CHECK32-D-NEXT: fcvt.w.s a0, fa0, rtz
+; CHECK32-D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK32-D-NEXT: addi sp, sp, 16
+; CHECK32-D-NEXT: ret
%1 = call i16 @llvm.experimental.constrained.fptosi.i16.f16(half %a, metadata !"fpexcept.strict")
ret i16 %1
}
@@ -209,6 +226,20 @@ define i16 @fcvt_ui_h(half %a) nounwind strictfp {
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.s.h a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.l.s a0, a0, rtz
; CHECK64-IZDINXZHINXMIN-NEXT: ret
+;
+; CHECK32-D-LABEL: fcvt_ui_h:
+; CHECK32-D: # %bb.0:
+; CHECK32-D-NEXT: addi sp, sp, -16
+; CHECK32-D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK32-D-NEXT: fmv.x.w a0, fa0
+; CHECK32-D-NEXT: slli a0, a0, 16
+; CHECK32-D-NEXT: srli a0, a0, 16
+; CHECK32-D-NEXT: fmv.w.x fa0, a0
+; CHECK32-D-NEXT: call __extendhfsf2
+; CHECK32-D-NEXT: fcvt.w.s a0, fa0, rtz
+; CHECK32-D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK32-D-NEXT: addi sp, sp, 16
+; CHECK32-D-NEXT: ret
%1 = call i16 @llvm.experimental.constrained.fptoui.i16.f16(half %a, metadata !"fpexcept.strict")
ret i16 %1
}
@@ -280,6 +311,20 @@ define i32 @fcvt_w_h(half %a) nounwind strictfp {
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.s.h a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.w.s a0, a0, rtz
; CHECK64-IZDINXZHINXMIN-NEXT: ret
+;
+; CHECK32-D-LABEL: fcvt_w_h:
+; CHECK32-D: # %bb.0:
+; CHECK32-D-NEXT: addi sp, sp, -16
+; CHECK32-D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK32-D-NEXT: fmv.x.w a0, fa0
+; CHECK32-D-NEXT: slli a0, a0, 16
+; CHECK32-D-NEXT: srli a0, a0, 16
+; CHECK32-D-NEXT: fmv.w.x fa0, a0
+; CHECK32-D-NEXT: call __extendhfsf2
+; CHECK32-D-NEXT: fcvt.w.s a0, fa0, rtz
+; CHECK32-D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK32-D-NEXT: addi sp, sp, 16
+; CHECK32-D-NEXT: ret
%1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict")
ret i32 %1
}
@@ -351,6 +396,20 @@ define i32 @fcvt_wu_h(half %a) nounwind strictfp {
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.s.h a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.wu.s a0, a0, rtz
; CHECK64-IZDINXZHINXMIN-NEXT: ret
+;
+; CHECK32-D-LABEL: fcvt_wu_h:
+; CHECK32-D: # %bb.0:
+; CHECK32-D-NEXT: addi sp, sp, -16
+; CHECK32-D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK32-D-NEXT: fmv.x.w a0, fa0
+; CHECK32-D-NEXT: slli a0, a0, 16
+; CHECK32-D-NEXT: srli a0, a0, 16
+; CHECK32-D-NEXT: fmv.w.x fa0, a0
+; CHECK32-D-NEXT: call __extendhfsf2
+; CHECK32-D-NEXT: fcvt.wu.s a0, fa0, rtz
+; CHECK32-D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK32-D-NEXT: addi sp, sp, 16
+; CHECK32-D-NEXT: ret
%1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict")
ret i32 %1
}
@@ -449,6 +508,24 @@ define i32 @fcvt_wu_h_multiple_use(half %x, ptr %y) strictfp {
; CHECK64-IZDINXZHINXMIN-NEXT: seqz a1, a0
; CHECK64-IZDINXZHINXMIN-NEXT: add a0, a0, a1
; CHECK64-IZDINXZHINXMIN-NEXT: ret
+;
+; CHECK32-D-LABEL: fcvt_wu_h_multiple_use:
+; CHECK32-D: # %bb.0:
+; CHECK32-D-NEXT: addi sp, sp, -16
+; CHECK32-D-NEXT: .cfi_def_cfa_offset 16
+; CHECK32-D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK32-D-NEXT: .cfi_offset ra, -4
+; CHECK32-D-NEXT: fmv.x.w a0, fa0
+; CHECK32-D-NEXT: slli a0, a0, 16
+; CHECK32-D-NEXT: srli a0, a0, 16
+; CHECK32-D-NEXT: fmv.w.x fa0, a0
+; CHECK32-D-NEXT: call __extendhfsf2
+; CHECK32-D-NEXT: fcvt.wu.s a0, fa0, rtz
+; CHECK32-D-NEXT: seqz a1, a0
+; CHECK32-D-NEXT: add a0, a0, a1
+; CHECK32-D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK32-D-NEXT: addi sp, sp, 16
+; CHECK32-D-NEXT: ret
%a = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict")
%b = icmp eq i32 %a, 0
%c = select i1 %b, i32 1, i32 %a
@@ -556,6 +633,20 @@ define i64 @fcvt_l_h(half %a) nounwind strictfp {
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.s.h a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.l.s a0, a0, rtz
; CHECK64-IZDINXZHINXMIN-NEXT: ret
+;
+; CHECK32-D-LABEL: fcvt_l_h:
+; CHECK32-D: # %bb.0:
+; CHECK32-D-NEXT: addi sp, sp, -16
+; CHECK32-D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK32-D-NEXT: fmv.x.w a0, fa0
+; CHECK32-D-NEXT: slli a0, a0, 16
+; CHECK32-D-NEXT: srli a0, a0, 16
+; CHECK32-D-NEXT: fmv.w.x fa0, a0
+; CHECK32-D-NEXT: call __extendhfsf2
+; CHECK32-D-NEXT: call __fixsfdi
+; CHECK32-D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK32-D-NEXT: addi sp, sp, 16
+; CHECK32-D-NEXT: ret
%1 = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %a, metadata !"fpexcept.strict")
ret i64 %1
}
@@ -662,6 +753,20 @@ define i64 @fcvt_lu_h(half %a) nounwind strictfp {
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.s.h a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.lu.s a0, a0, rtz
; CHECK64-IZDINXZHINXMIN-NEXT: ret
+;
+; CHECK32-D-LABEL: fcvt_lu_h:
+; CHECK32-D: # %bb.0:
+; CHECK32-D-NEXT: addi sp, sp, -16
+; CHECK32-D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK32-D-NEXT: fmv.x.w a0, fa0
+; CHECK32-D-NEXT: slli a0, a0, 16
+; CHECK32-D-NEXT: srli a0, a0, 16
+; CHECK32-D-NEXT: fmv.w.x fa0, a0
+; CHECK32-D-NEXT: call __extendhfsf2
+; CHECK32-D-NEXT: call __fixunssfdi
+; CHECK32-D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK32-D-NEXT: addi sp, sp, 16
+; CHECK32-D-NEXT: ret
%1 = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %a, metadata !"fpexcept.strict")
ret i64 %1
}
@@ -771,6 +876,22 @@ define half @fcvt_h_si(i16 %a) nounwind strictfp {
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.s.w a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.h.s a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: ret
+;
+; CHECK32-D-LABEL: fcvt_h_si:
+; CHECK32-D: # %bb.0:
+; CHECK32-D-NEXT: addi sp, sp, -16
+; CHECK32-D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK32-D-NEXT: slli a0, a0, 16
+; CHECK32-D-NEXT: srai a0, a0, 16
+; CHECK32-D-NEXT: fcvt.s.w fa0, a0
+; CHECK32-D-NEXT: call __truncsfhf2
+; CHECK32-D-NEXT: fmv.x.w a0, fa0
+; CHECK32-D-NEXT: lui a1, 1048560
+; CHECK32-D-NEXT: or a0, a0, a1
+; CHECK32-D-NEXT: fmv.w.x fa0, a0
+; CHECK32-D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK32-D-NEXT: addi sp, sp, 16
+; CHECK32-D-NEXT: ret
%1 = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret half %1
}
@@ -842,6 +963,20 @@ define half @fcvt_h_si_signext(i16 signext %a) nounwind strictfp {
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.s.w a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.h.s a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: ret
+;
+; CHECK32-D-LABEL: fcvt_h_si_signext:
+; CHECK32-D: # %bb.0:
+; CHECK32-D-NEXT: addi sp, sp, -16
+; CHECK32-D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK32-D-NEXT: fcvt.s.w fa0, a0
+; CHECK32-D-NEXT: call __truncsfhf2
+; CHECK32-D-NEXT: fmv.x.w a0, fa0
+; CHECK32-D-NEXT: lui a1, 1048560
+; CHECK32-D-NEXT: or a0, a0, a1
+; CHECK32-D-NEXT: fmv.w.x fa0, a0
+; CHECK32-D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK32-D-NEXT: addi sp, sp, 16
+; CHECK32-D-NEXT: ret
%1 = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret half %1
}
@@ -950,6 +1085,22 @@ define half @fcvt_h_ui(i16 %a) nounwind strictfp {
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.s.wu a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.h.s a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: ret
+;
+; CHECK32-D-LABEL: fcvt_h_ui:
+; CHECK32-D: # %bb.0:
+; CHECK32-D-NEXT: addi sp, sp, -16
+; CHECK32-D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK32-D-NEXT: slli a0, a0, 16
+; CHECK32-D-NEXT: srli a0, a0, 16
+; CHECK32-D-NEXT: fcvt.s.wu fa0, a0
+; CHECK32-D-NEXT: call __truncsfhf2
+; CHECK32-D-NEXT: fmv.x.w a0, fa0
+; CHECK32-D-NEXT: lui a1, 1048560
+; CHECK32-D-NEXT: or a0, a0, a1
+; CHECK32-D-NEXT: fmv.w.x fa0, a0
+; CHECK32-D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK32-D-NEXT: addi sp, sp, 16
+; CHECK32-D-NEXT: ret
%1 = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict...
[truncated]
|
5174ad7
to
982f101
Compare
…needs to be SoftPromoted. Create an FP_EXTEND instead of handling the soft promote directly. This FP_EXTEND will be visited and soft promoted itself. This removes zero extend from the generated code when the f32 type is itself softened. Previously we softened it as an fp16_to_fp which sees the operand as an integer type and we extend it. When we soften the result as an fp_extend we see the source as f16 and don't extend. It only becomes an integer inside call lowering not by type legalization. If this extend is really necessary, then we have an issue when an f16->f32 fp_extend exists in the source and f32 needs to be softened. This simplifies part of llvm#102503.
…needs to be SoftPromoted. (#107634) Create an FP_EXTEND instead of handling the soft promote directly. This FP_EXTEND will be visited and soft promoted itself. This removes a zero extend from the generated code when the f32 type is itself softened. Previously we softened it as an fp16_to_fp which sees the operand as an integer type so we extend it. When we soften the result as an fp_extend we see the source as f16 and don't extend. It only becomes an integer inside call lowering not by type legalization. If this extend is really necessary, then we have an issue when an f16->f32 fp_extend exists in the source and f32 needs to be softened. This simplifies part of #102503.
This patch adds support for the missing STRICT_UINT_TO_FP and STRICT_SINT_TO_FP for riscv and adds a test case for rv32 which was previously crashing. The code is in line with how other strict_* nodes are handled (e.g., getting op(1) instead of op(0) when it's a strict node, as op(0) in a strict node is the entry token).
982f101
to
c28307d
Compare
This patch adds support for the missing STRICT_UINT_TO_FP and STRICT_SINT_TO_FP for riscv and adds a test case for rv32 which was previously crashing. The code is in line with how other strict_* nodes are handled (e.g., getting op(1) instead of op(0) when it's a strict node, as op(0) in a strict node is the entry token).
This patch adds support for the missing STRICT_UINT_TO_FP and
STRICT_SINT_TO_FP for riscv and adds a test case for rv32 which was
previously crashing.
The code is in line with how other strict_* nodes are handled
(e.g., getting op(1) instead of op(0) when it's a strict node, as op(0)
in a strict node is the entry token).