-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV][GISel] Support fp128 arithmetic and conversion for RV64. #118707
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
We can support these via libcalls in libgcc/compiler-rt or integer operations for fneg/fabs/fcopysign. fp128 values will be passed in two 64-bit GPRs according to the psABI. Supporting RV32 requires sret which is not supported by libcall handling in LegalizerHelper.cpp yet. It doesn't call canLowerReturn.
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesWe can support these via libcalls in libgcc/compiler-rt or integer operations for fneg/fabs/fcopysign. fp128 values will be passed in two 64-bit GPRs according to the psABI. Supporting RV32 requires sret which is not supported by libcall handling in LegalizerHelper.cpp yet. It doesn't call canLowerReturn. Patch is 20.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118707.diff 3 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index b681a0708db4b9..5d7e03bbaeb7dc 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -778,6 +778,11 @@ class LegalizeRuleSet {
LegalizeRuleSet &libcallFor(std::initializer_list<LLT> Types) {
return actionFor(LegalizeAction::Libcall, Types);
}
+ LegalizeRuleSet &libcallFor(bool Pred, std::initializer_list<LLT> Types) {
+ if (!Pred)
+ return *this;
+ return actionFor(LegalizeAction::Libcall, Types);
+ }
LegalizeRuleSet &
libcallFor(std::initializer_list<std::pair<LLT, LLT>> Types) {
return actionFor(LegalizeAction::Libcall, Types);
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 456ca9894e6a7d..0f694a80cb40ca 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -491,21 +491,23 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
// FP Operations
+ // FIXME: Support s128 for rv32 when libcall handling is able to use sret.
getActionDefinitionsBuilder(
{G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM})
.legalFor(ST.hasStdExtF(), {s32})
.legalFor(ST.hasStdExtD(), {s64})
.legalFor(ST.hasStdExtZfh(), {s16})
- .libcallFor({s32, s64});
+ .libcallFor({s32, s64})
+ .libcallFor(ST.is64Bit(), {s128});
getActionDefinitionsBuilder({G_FNEG, G_FABS})
.legalFor(ST.hasStdExtF(), {s32})
.legalFor(ST.hasStdExtD(), {s64})
.legalFor(ST.hasStdExtZfh(), {s16})
- .lowerFor({s32, s64});
+ .lowerFor({s32, s64, s128});
getActionDefinitionsBuilder(G_FREM)
- .libcallFor({s32, s64})
+ .libcallFor({s32, s64, s128})
.minScalar(0, s32)
.scalarize(0);
@@ -521,19 +523,22 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.legalFor(ST.hasStdExtD(), {{s32, s64}})
.legalFor(ST.hasStdExtZfh(), {{s16, s32}})
.legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}})
- .libcallFor({{s32, s64}});
+ .libcallFor({{s32, s64}})
+ .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}});
getActionDefinitionsBuilder(G_FPEXT)
.legalFor(ST.hasStdExtD(), {{s64, s32}})
.legalFor(ST.hasStdExtZfh(), {{s32, s16}})
.legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s64, s16}})
- .libcallFor({{s64, s32}});
+ .libcallFor({{s64, s32}})
+ .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}});
getActionDefinitionsBuilder(G_FCMP)
.legalFor(ST.hasStdExtF(), {{sXLen, s32}})
.legalFor(ST.hasStdExtD(), {{sXLen, s64}})
.legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
.clampScalar(0, sXLen, sXLen)
- .libcallFor({{sXLen, s32}, {sXLen, s64}});
+ .libcallFor({{sXLen, s32}, {sXLen, s64}})
+ .libcallFor(ST.is64Bit(), {{sXLen, s128}});
// TODO: Support vector version of G_IS_FPCLASS.
getActionDefinitionsBuilder(G_IS_FPCLASS)
@@ -546,7 +551,7 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.legalFor(ST.hasStdExtF(), {s32})
.legalFor(ST.hasStdExtD(), {s64})
.legalFor(ST.hasStdExtZfh(), {s16})
- .lowerFor({s32, s64});
+ .lowerFor({s32, s64, s128});
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
.legalFor(ST.hasStdExtF(), {{sXLen, s32}})
@@ -558,7 +563,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.widenScalarToNextPow2(0)
.minScalar(0, s32)
.libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
- .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}});
+ .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}) // FIXME RV32.
+ .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}, {s128, s128}});
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
.legalFor(ST.hasStdExtF(), {{s32, sXLen}})
@@ -579,7 +585,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
// Otherwise only promote to s32 since we have si libcalls.
.minScalar(1, s32)
.libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
- .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}});
+ .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}) // FIXME RV32.
+ .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}, {s128, s128}});
// FIXME: We can do custom inline expansion like SelectionDAG.
// FIXME: Legal with Zfa.
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/fp128.ll b/llvm/test/CodeGen/RISCV/GlobalISel/fp128.ll
new file mode 100644
index 00000000000000..9f5c013c9ccd4a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/fp128.ll
@@ -0,0 +1,538 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -global-isel < %s \
+; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d < %s \
+; RUN: | FileCheck -check-prefixes=CHECK,RV64D %s
+
+; FIXME: Support RV32.
+
+define fp128 @fadd(fp128 %x, fp128 %y) nounwind {
+; CHECK-LABEL: fadd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __addtf3
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fadd fp128 %x, %y
+ ret fp128 %a
+}
+
+define fp128 @fsub(fp128 %x, fp128 %y) nounwind {
+; CHECK-LABEL: fsub:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __subtf3
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fsub fp128 %x, %y
+ ret fp128 %a
+}
+
+define fp128 @fmul(fp128 %x, fp128 %y) nounwind {
+; CHECK-LABEL: fmul:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __multf3
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fmul fp128 %x, %y
+ ret fp128 %a
+}
+
+define fp128 @fdiv(fp128 %x, fp128 %y) nounwind {
+; CHECK-LABEL: fdiv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __divtf3
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fdiv fp128 %x, %y
+ ret fp128 %a
+}
+
+define fp128 @frem(fp128 %x, fp128 %y) nounwind {
+; CHECK-LABEL: frem:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call fmodl
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = frem fp128 %x, %y
+ ret fp128 %a
+}
+
+define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind {
+; CHECK-LABEL: fma:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call fmal
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = call fp128 @llvm.fma.f128(fp128 %x, fp128 %y, fp128 %z)
+ ret fp128 %a
+}
+
+define fp128 @fneg(fp128 %x) {
+; CHECK-LABEL: fneg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, -1
+; CHECK-NEXT: slli a2, a2, 63
+; CHECK-NEXT: xor a1, a1, a2
+; CHECK-NEXT: ret
+ %a = fneg fp128 %x
+ ret fp128 %a
+}
+
+define fp128 @fabs(fp128 %x) {
+; CHECK-LABEL: fabs:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, -1
+; CHECK-NEXT: srli a2, a2, 1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: ret
+ %a = call fp128 @llvm.fabs.f128(fp128 %x)
+ ret fp128 %a
+}
+
+define fp128 @fcopysign(fp128 %x, fp128 %y) {
+; CHECK-LABEL: fcopysign:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, -1
+; CHECK-NEXT: slli a4, a2, 63
+; CHECK-NEXT: srli a2, a2, 1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: and a3, a3, a4
+; CHECK-NEXT: or a1, a1, a3
+; CHECK-NEXT: ret
+ %a = call fp128 @llvm.copysign.f128(fp128 %x, fp128 %y)
+ ret fp128 %a
+}
+
+define i1 @fcmp(fp128 %x, fp128 %y) nounwind {
+; CHECK-LABEL: fcmp:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __eqtf2
+; CHECK-NEXT: slli a0, a0, 32
+; CHECK-NEXT: srli a0, a0, 32
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fcmp oeq fp128 %x, %y
+ ret i1 %a
+}
+
+define fp128 @constant(fp128 %x) nounwind {
+; CHECK-LABEL: constant:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: lui a2, %hi(.LCPI10_0)
+; CHECK-NEXT: addi a3, a2, %lo(.LCPI10_0)
+; CHECK-NEXT: ld a2, 0(a3)
+; CHECK-NEXT: ld a3, 8(a3)
+; CHECK-NEXT: call __addtf3
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fadd fp128 %x, 0xL00000000000000007FFF000000000000
+ ret fp128 %a
+}
+
+define fp128 @fpext_f32(float %x, float %y) nounwind {
+; RV64I-LABEL: fpext_f32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: call __extendsftf2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64D-LABEL: fpext_f32:
+; RV64D: # %bb.0:
+; RV64D-NEXT: addi sp, sp, -16
+; RV64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64D-NEXT: fadd.s fa0, fa0, fa1
+; RV64D-NEXT: call __extendsftf2
+; RV64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64D-NEXT: addi sp, sp, 16
+; RV64D-NEXT: ret
+ %a = fadd float %x, %y
+ %b = fpext float %a to fp128
+ ret fp128 %b
+}
+
+define fp128 @fpext_f64(double %x, double %y) nounwind {
+; RV64I-LABEL: fpext_f64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: call __extenddftf2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64D-LABEL: fpext_f64:
+; RV64D: # %bb.0:
+; RV64D-NEXT: addi sp, sp, -16
+; RV64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64D-NEXT: fadd.d fa0, fa0, fa1
+; RV64D-NEXT: call __extenddftf2
+; RV64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64D-NEXT: addi sp, sp, 16
+; RV64D-NEXT: ret
+ %a = fadd double %x, %y
+ %b = fpext double %a to fp128
+ ret fp128 %b
+}
+
+define float @fptrunc_f32(fp128 %x, float %y) nounwind {
+; RV64I-LABEL: fptrunc_f32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: call __trunctfsf2
+; RV64I-NEXT: mv a1, s0
+; RV64I-NEXT: call __addsf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64D-LABEL: fptrunc_f32:
+; RV64D: # %bb.0:
+; RV64D-NEXT: addi sp, sp, -16
+; RV64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64D-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV64D-NEXT: fmv.s fs0, fa0
+; RV64D-NEXT: call __trunctfsf2
+; RV64D-NEXT: fadd.s fa0, fa0, fs0
+; RV64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64D-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
+; RV64D-NEXT: addi sp, sp, 16
+; RV64D-NEXT: ret
+ %a = fptrunc fp128 %x to float
+ %b = fadd float %a, %y
+ ret float %b
+}
+
+define double @fptrunc_f64(fp128 %x, double %y) nounwind {
+; RV64I-LABEL: fptrunc_f64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a2
+; RV64I-NEXT: call __trunctfdf2
+; RV64I-NEXT: mv a1, s0
+; RV64I-NEXT: call __adddf3
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64D-LABEL: fptrunc_f64:
+; RV64D: # %bb.0:
+; RV64D-NEXT: addi sp, sp, -16
+; RV64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64D-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV64D-NEXT: fmv.d fs0, fa0
+; RV64D-NEXT: call __trunctfdf2
+; RV64D-NEXT: fadd.d fa0, fa0, fs0
+; RV64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64D-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
+; RV64D-NEXT: addi sp, sp, 16
+; RV64D-NEXT: ret
+ %a = fptrunc fp128 %x to double
+ %b = fadd double %a, %y
+ ret double %b
+}
+
+define i8 @fptosi_i8(fp128 %x) nounwind {
+; CHECK-LABEL: fptosi_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __fixtfsi
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fptosi fp128 %x to i8
+ ret i8 %a
+}
+
+define i16 @fptosi_i16(fp128 %x) nounwind {
+; CHECK-LABEL: fptosi_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __fixtfsi
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fptosi fp128 %x to i16
+ ret i16 %a
+}
+
+define i32 @fptosi_i32(fp128 %x) nounwind {
+; CHECK-LABEL: fptosi_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __fixtfsi
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fptosi fp128 %x to i32
+ ret i32 %a
+}
+
+define i64 @fptosi_i64(fp128 %x) nounwind {
+; CHECK-LABEL: fptosi_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __fixtfdi
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fptosi fp128 %x to i64
+ ret i64 %a
+}
+
+define i128 @fptosi_i128(fp128 %x) nounwind {
+; CHECK-LABEL: fptosi_i128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __fixtfti
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fptosi fp128 %x to i128
+ ret i128 %a
+}
+
+define i8 @fptoui_i8(fp128 %x) nounwind {
+; CHECK-LABEL: fptoui_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __fixunstfsi
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fptoui fp128 %x to i8
+ ret i8 %a
+}
+
+define i16 @fptoui_i16(fp128 %x) nounwind {
+; CHECK-LABEL: fptoui_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __fixunstfsi
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fptoui fp128 %x to i16
+ ret i16 %a
+}
+
+define i32 @fptoui_i32(fp128 %x) nounwind {
+; CHECK-LABEL: fptoui_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __fixunstfsi
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fptoui fp128 %x to i32
+ ret i32 %a
+}
+
+define i64 @fptoui_i64(fp128 %x) nounwind {
+; CHECK-LABEL: fptoui_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __fixunstfdi
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fptoui fp128 %x to i64
+ ret i64 %a
+}
+
+define i128 @fptoui_i128(fp128 %x) nounwind {
+; CHECK-LABEL: fptoui_i128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __fixunstfti
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = fptoui fp128 %x to i128
+ ret i128 %a
+}
+
+define fp128 @sitofp_i8(i8 %x) nounwind {
+; CHECK-LABEL: sitofp_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: slli a0, a0, 56
+; CHECK-NEXT: srai a0, a0, 56
+; CHECK-NEXT: call __floatsitf
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = sitofp i8 %x to fp128
+ ret fp128 %a
+}
+
+define fp128 @sitofp_i16(i16 %x) nounwind {
+; CHECK-LABEL: sitofp_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: slli a0, a0, 48
+; CHECK-NEXT: srai a0, a0, 48
+; CHECK-NEXT: call __floatsitf
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = sitofp i16 %x to fp128
+ ret fp128 %a
+}
+
+define fp128 @sitofp_i32(i32 %x) nounwind {
+; CHECK-LABEL: sitofp_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: call __floatsitf
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = sitofp i32 %x to fp128
+ ret fp128 %a
+}
+
+define fp128 @sitofp_i64(i64 %x) nounwind {
+; CHECK-LABEL: sitofp_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __floatditf
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = sitofp i64 %x to fp128
+ ret fp128 %a
+}
+
+define fp128 @sitofp_i128(i128 %x) nounwind {
+; CHECK-LABEL: sitofp_i128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: call __floattitf
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = sitofp i128 %x to fp128
+ ret fp128 %a
+}
+
+define fp128 @uitofp_i8(i8 %x) nounwind {
+; CHECK-LABEL: uitofp_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: andi a0, a0, 255
+; CHECK-NEXT: call __floatunsitf
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = uitofp i8 %x to fp128
+ ret fp128 %a
+}
+
+define fp128 @uitofp_i16(i16 %x) nounwind {
+; CHECK-LABEL: uitofp_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: lui a1, 16
+; CHECK-NEXT: addiw a1, a1, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: call __floatunsitf
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = uitofp i16 %x to fp128
+ ret fp128 %a
+}
+
+define fp128 @uitofp_i32(i32 %x) nounwind {
+; CHECK-LABEL: uitofp_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: call __floatunsitf
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = uitofp i32 %x to fp128
+ ret fp128 %a
+}
+
+define fp128 @uitofp_i64(i64 %x) nounwind {
+; CHECK-LABEL: uitofp_i64:
+; CHECK: # %...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think all my questions below are about RV32, so you can ignore them if you feel like it.
|
||
getActionDefinitionsBuilder(G_FREM) | ||
.libcallFor({s32, s64}) | ||
.libcallFor({s32, s64, s128}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand why s128
is fine here, but not for G_FADD
. Both fmodl
and __addtf3
have the same signature. What am I missing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I messed up. It should check is64Bit.
@@ -521,19 +523,22 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) | |||
.legalFor(ST.hasStdExtD(), {{s32, s64}}) | |||
.legalFor(ST.hasStdExtZfh(), {{s16, s32}}) | |||
.legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}}) | |||
.libcallFor({{s32, s64}}); | |||
.libcallFor({{s32, s64}}) | |||
.libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why aren't these ok on 32-bit? They return 32/64-bit values, rather than 128-bit, right?
Maybe you're waiting so the FPTRUNC/FPEXT definitions can be symmetric? If so, that's reasonable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't think too hard about it and I didn't make a rv32 test. I can split off the rv32 subset into a new test and fix this.
|
||
getActionDefinitionsBuilder(G_FCMP) | ||
.legalFor(ST.hasStdExtF(), {{sXLen, s32}}) | ||
.legalFor(ST.hasStdExtD(), {{sXLen, s64}}) | ||
.legalFor(ST.hasStdExtZfh(), {{sXLen, s16}}) | ||
.clampScalar(0, sXLen, sXLen) | ||
.libcallFor({{sXLen, s32}, {sXLen, s64}}); | ||
.libcallFor({{sXLen, s32}, {sXLen, s64}}) | ||
.libcallFor(ST.is64Bit(), {{sXLen, s128}}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be fine on rv32 too, right? the libcall is just returning an int, not something 128-bits
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. I'll split off an rv32 test.
To be clear: I think you can do a follow-up commit with the rv32 testcases once you fix the libcall/sret issue, and fix everything then. This is an improvement for rv64 across the board, so I'm fine with this as currently written, knowing that we have a bunch of weird issues for this on rv32, which you are working on fixing. |
We can support these via libcalls in libgcc/compiler-rt or integer operations for fneg/fabs/fcopysign. fp128 values will be passed in two 64-bit GPRs according to the psABI.
Supporting RV32 requires sret which is not supported by libcall handling in LegalizerHelper.cpp yet. It doesn't call canLowerReturn.