Skip to content

[LoongArch] [CodeGen] Add options for Clang to generate LoongArch-specific frecipe & frsqrte instructions #109917

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -5387,6 +5387,10 @@ def mno_lasx : Flag<["-"], "mno-lasx">, Group<m_loongarch_Features_Group>,
let Flags = [TargetSpecific] in {
def msimd_EQ : Joined<["-"], "msimd=">, Group<m_loongarch_Features_Group>,
HelpText<"Select the SIMD extension(s) to be enabled in LoongArch either 'none', 'lsx', 'lasx'.">;
def mfrecipe : Flag<["-"], "mfrecipe">, Group<m_loongarch_Features_Group>,
HelpText<"Enable frecipe.{s/d} and frsqrte.{s/d}">;
def mno_frecipe : Flag<["-"], "mno-frecipe">, Group<m_loongarch_Features_Group>,
HelpText<"Disable frecipe.{s/d} and frsqrte.{s/d}">;
def mannotate_tablejump : Flag<["-"], "mannotate-tablejump">, Group<m_loongarch_Features_Group>,
HelpText<"Enable annotate table jump instruction to correlate it with the jump table.">;
def mno_annotate_tablejump : Flag<["-"], "mno-annotate-tablejump">, Group<m_loongarch_Features_Group>,
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,15 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D,
} else /*-mno-lasx*/
Features.push_back("-lasx");
}

// Select frecipe feature determined by -m[no-]frecipe.
if (const Arg *A =
Args.getLastArg(options::OPT_mfrecipe, options::OPT_mno_frecipe)) {
if (A->getOption().matches(options::OPT_mfrecipe))
Features.push_back("+frecipe");
else
Features.push_back("-frecipe");
}
}

std::string loongarch::postProcessTargetCPUString(const std::string &CPU,
Expand Down
30 changes: 30 additions & 0 deletions clang/test/Driver/loongarch-mfrecipe.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/// Test -m[no]frecipe options.

// RUN: %clang --target=loongarch64 -mfrecipe -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-FRECIPE
// RUN: %clang --target=loongarch64 -mno-frecipe -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-NO-FRECIPE
// RUN: %clang --target=loongarch64 -mno-frecipe -mfrecipe -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-FRECIPE
// RUN: %clang --target=loongarch64 -mfrecipe -mno-frecipe -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-NO-FRECIPE

// RUN: %clang --target=loongarch64 -mfrecipe -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-FRECIPE
// RUN: %clang --target=loongarch64 -mno-frecipe -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-NO-FRECIPE
// RUN: %clang --target=loongarch64 -mno-frecipe -mfrecipe -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-FRECIPE
// RUN: %clang --target=loongarch64 -mfrecipe -mno-frecipe -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-NO-FRECIPE


// CC1-FRECIPE: "-target-feature" "+frecipe"
// CC1-NO-FRECIPE: "-target-feature" "-frecipe"

// IR-FRECIPE: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+frecipe{{(,.*)?}}"
// IR-NO-FRECIPE: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-frecipe{{(,.*)?}}"

int foo(void) {
return 42;
}
6 changes: 6 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@ def SDT_LoongArchMOVGR2FR_W_LA64
def SDT_LoongArchMOVFR2GR_S_LA64
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
def SDT_LoongArchFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;

def loongarch_movgr2fr_w_la64
: SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>;
def loongarch_movfr2gr_s_la64
: SDNode<"LoongArchISD::MOVFR2GR_S_LA64", SDT_LoongArchMOVFR2GR_S_LA64>;
def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
def loongarch_frecipe : SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchFRECIPE>;
def loongarch_frsqrte : SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchFRSQRTE>;

//===----------------------------------------------------------------------===//
// Instructions
Expand Down Expand Up @@ -286,6 +290,8 @@ let Predicates = [HasFrecipe] in {
// FP approximate reciprocal operation
def : Pat<(int_loongarch_frecipe_s FPR32:$src), (FRECIPE_S FPR32:$src)>;
def : Pat<(int_loongarch_frsqrte_s FPR32:$src), (FRSQRTE_S FPR32:$src)>;
def : Pat<(loongarch_frecipe FPR32:$src), (FRECIPE_S FPR32:$src)>;
def : Pat<(loongarch_frsqrte FPR32:$src), (FRSQRTE_S FPR32:$src)>;
}

// fmadd.s: fj * fk + fa
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,8 @@ let Predicates = [HasFrecipe] in {
// FP approximate reciprocal operation
def : Pat<(int_loongarch_frecipe_d FPR64:$src), (FRECIPE_D FPR64:$src)>;
def : Pat<(int_loongarch_frsqrte_d FPR64:$src), (FRSQRTE_D FPR64:$src)>;
def : Pat<(loongarch_frecipe FPR64:$src), (FRECIPE_D FPR64:$src)>;
def : Pat<(loongarch_frsqrte FPR64:$src), (FRSQRTE_D FPR64:$src)>;
}

// fmadd.d: fj * fk + fa
Expand Down
67 changes: 67 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4697,6 +4697,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VANY_ZERO)
NODE_NAME_CASE(VALL_NONZERO)
NODE_NAME_CASE(VANY_NONZERO)
NODE_NAME_CASE(FRECIPE)
NODE_NAME_CASE(FRSQRTE)
}
#undef NODE_NAME_CASE
return nullptr;
Expand Down Expand Up @@ -5902,6 +5904,71 @@ Register LoongArchTargetLowering::getExceptionSelectorRegister(
return LoongArch::R5;
}

//===----------------------------------------------------------------------===//
// Target Optimization Hooks
//===----------------------------------------------------------------------===//

static int getEstimateRefinementSteps(EVT VT,
const LoongArchSubtarget &Subtarget) {
// Feature FRECIPE instrucions relative accuracy is 2^-14.
// IEEE float has 23 digits and double has 52 digits.
int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
return RefinementSteps;
}

SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &RefinementSteps,
bool &UseOneConstNR,
bool Reciprocal) const {
if (Subtarget.hasFrecipe()) {
SDLoc DL(Operand);
EVT VT = Operand.getValueType();

if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
(VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
(VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
(VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
(VT == MVT::v4f64 && Subtarget.hasExtLASX())) {

if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);

SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
if (Reciprocal)
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);

return Estimate;
}
}

return SDValue();
}

SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
SelectionDAG &DAG,
int Enabled,
int &RefinementSteps) const {
if (Subtarget.hasFrecipe()) {
SDLoc DL(Operand);
EVT VT = Operand.getValueType();

if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
(VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
(VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
(VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
(VT == MVT::v4f64 && Subtarget.hasExtLASX())) {

if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);

return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
}
}

return SDValue();
}

//===----------------------------------------------------------------------===//
// LoongArch Inline Assembly Support
//===----------------------------------------------------------------------===//
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@ enum NodeType : unsigned {
VALL_NONZERO,
VANY_NONZERO,

// Floating point approximate reciprocal operation
FRECIPE,
FRSQRTE

// Intrinsic operations end =============================================
};
} // end namespace LoongArchISD
Expand Down Expand Up @@ -216,6 +220,17 @@ class LoongArchTargetLowering : public TargetLowering {
Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;

bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
return true;
}

SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps, bool &UseOneConstNR,
bool Reciprocal) const override;

SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps) const override;

ISD::NodeType getExtendForAtomicOps() const override {
return ISD::SIGN_EXTEND;
}
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//

// Target nodes.
def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;

def lasxsplati8
Expand Down Expand Up @@ -2094,6 +2095,15 @@ foreach Inst = ["XVFRECIPE_S", "XVFRSQRTE_S"] in
foreach Inst = ["XVFRECIPE_D", "XVFRSQRTE_D"] in
def : Pat<(deriveLASXIntrinsic<Inst>.ret (v4f64 LASX256:$xj)),
(!cast<LAInst>(Inst) LASX256:$xj)>;

def : Pat<(loongarch_vfrecipe v8f32:$src),
(XVFRECIPE_S v8f32:$src)>;
def : Pat<(loongarch_vfrecipe v4f64:$src),
(XVFRECIPE_D v4f64:$src)>;
def : Pat<(loongarch_vfrsqrte v8f32:$src),
(XVFRSQRTE_S v8f32:$src)>;
def : Pat<(loongarch_vfrsqrte v4f64:$src),
(XVFRSQRTE_D v4f64:$src)>;
}

def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm),
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;

// Target nodes.
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
Expand Down Expand Up @@ -50,6 +52,8 @@ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;

def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;

def immZExt1 : ImmLeaf<i64, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<i64, [{return isUInt<2>(Imm);}]>;
Expand Down Expand Up @@ -2238,6 +2242,15 @@ foreach Inst = ["VFRECIPE_S", "VFRSQRTE_S"] in
foreach Inst = ["VFRECIPE_D", "VFRSQRTE_D"] in
def : Pat<(deriveLSXIntrinsic<Inst>.ret (v2f64 LSX128:$vj)),
(!cast<LAInst>(Inst) LSX128:$vj)>;

def : Pat<(loongarch_vfrecipe v4f32:$src),
(VFRECIPE_S v4f32:$src)>;
def : Pat<(loongarch_vfrecipe v2f64:$src),
(VFRECIPE_D v2f64:$src)>;
def : Pat<(loongarch_vfrsqrte v4f32:$src),
(VFRSQRTE_S v4f32:$src)>;
def : Pat<(loongarch_vfrsqrte v2f64:$src),
(VFRSQRTE_D v2f64:$src)>;
}

// load
Expand Down
80 changes: 80 additions & 0 deletions llvm/test/CodeGen/LoongArch/fdiv-reciprocal-estimate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=loongarch32 --mattr=+f,-d,-frecipe < %s | FileCheck %s --check-prefix=LA32F
; RUN: llc --mtriple=loongarch32 --mattr=+f,-d,+frecipe < %s | FileCheck %s --check-prefix=LA32F-FRECIPE
; RUN: llc --mtriple=loongarch64 --mattr=+d,-frecipe < %s | FileCheck %s --check-prefix=LA64D
; RUN: llc --mtriple=loongarch64 --mattr=+d,+frecipe < %s | FileCheck %s --check-prefix=LA64D-FRECIPE

;; Exercise the 'fdiv' LLVM IR: https://llvm.org/docs/LangRef.html#fdiv-instruction

define float @fdiv_s(float %x, float %y) {
; LA32F-LABEL: fdiv_s:
; LA32F: # %bb.0:
; LA32F-NEXT: fdiv.s $fa0, $fa0, $fa1
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: fdiv_s:
; LA32F-FRECIPE: # %bb.0:
; LA32F-FRECIPE-NEXT: frecipe.s $fa2, $fa1
; LA32F-FRECIPE-NEXT: fmul.s $fa3, $fa0, $fa2
; LA32F-FRECIPE-NEXT: fnmsub.s $fa0, $fa1, $fa3, $fa0
; LA32F-FRECIPE-NEXT: fmadd.s $fa0, $fa2, $fa0, $fa3
; LA32F-FRECIPE-NEXT: ret
;
; LA64D-LABEL: fdiv_s:
; LA64D: # %bb.0:
; LA64D-NEXT: fdiv.s $fa0, $fa0, $fa1
; LA64D-NEXT: ret
;
; LA64D-FRECIPE-LABEL: fdiv_s:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frecipe.s $fa2, $fa1
; LA64D-FRECIPE-NEXT: fmul.s $fa3, $fa0, $fa2
; LA64D-FRECIPE-NEXT: fnmsub.s $fa0, $fa1, $fa3, $fa0
; LA64D-FRECIPE-NEXT: fmadd.s $fa0, $fa2, $fa0, $fa3
; LA64D-FRECIPE-NEXT: ret
%div = fdiv fast float %x, %y
ret float %div
}

define double @fdiv_d(double %x, double %y) {
; LA32F-LABEL: fdiv_d:
; LA32F: # %bb.0:
; LA32F-NEXT: addi.w $sp, $sp, -16
; LA32F-NEXT: .cfi_def_cfa_offset 16
; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32F-NEXT: .cfi_offset 1, -4
; LA32F-NEXT: bl %plt(__divdf3)
; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32F-NEXT: addi.w $sp, $sp, 16
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: fdiv_d:
; LA32F-FRECIPE: # %bb.0:
; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -16
; LA32F-FRECIPE-NEXT: .cfi_def_cfa_offset 16
; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32F-FRECIPE-NEXT: .cfi_offset 1, -4
; LA32F-FRECIPE-NEXT: bl %plt(__divdf3)
; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 16
; LA32F-FRECIPE-NEXT: ret
;
; LA64D-LABEL: fdiv_d:
; LA64D: # %bb.0:
; LA64D-NEXT: fdiv.d $fa0, $fa0, $fa1
; LA64D-NEXT: ret
;
; LA64D-FRECIPE-LABEL: fdiv_d:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
; LA64D-FRECIPE-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI1_0)
; LA64D-FRECIPE-NEXT: frecipe.d $fa3, $fa1
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa1, $fa3, $fa2
; LA64D-FRECIPE-NEXT: fnmsub.d $fa2, $fa2, $fa3, $fa3
; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa0, $fa2
; LA64D-FRECIPE-NEXT: fnmsub.d $fa0, $fa1, $fa3, $fa0
; LA64D-FRECIPE-NEXT: fmadd.d $fa0, $fa2, $fa0, $fa3
; LA64D-FRECIPE-NEXT: ret
%div = fdiv fast double %x, %y
ret double %div
}
Loading
Loading