Skip to content

Commit b720995

Browse files
committed
[LoongArch] Add options for Clang to generate LoongArch-specific frecipe & frsqrte instructions
1 parent 3da5e82 commit b720995

14 files changed

+820
-0
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5373,6 +5373,10 @@ def mno_lasx : Flag<["-"], "mno-lasx">, Group<m_loongarch_Features_Group>,
53735373
def msimd_EQ : Joined<["-"], "msimd=">, Group<m_loongarch_Features_Group>,
53745374
Flags<[TargetSpecific]>,
53755375
HelpText<"Select the SIMD extension(s) to be enabled in LoongArch either 'none', 'lsx', 'lasx'.">;
5376+
def mfrecipe : Flag<["-"], "mfrecipe">, Group<m_loongarch_Features_Group>,
5377+
HelpText<"Enable frecipe.{s/d} and frsqrte.{s/d}">;
5378+
def mno_frecipe : Flag<["-"], "mno-frecipe">, Group<m_loongarch_Features_Group>,
5379+
HelpText<"Disable frecipe.{s/d} and frsqrte.{s/d}">;
53765380
def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">,
53775381
Visibility<[ClangOption, CC1Option]>, Group<m_Group>,
53785382
MarshallingInfoFlag<CodeGenOpts<"MNopMCount">>;

clang/lib/Driver/ToolChains/Arch/LoongArch.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,20 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D,
251251
} else /*-mno-lasx*/
252252
Features.push_back("-lasx");
253253
}
254+
255+
// Select frecipe feature determined by -m[no-]frecipe.
256+
if (const Arg *A =
257+
Args.getLastArg(options::OPT_mfrecipe, options::OPT_mno_frecipe)) {
258+
// FRECIPE depends on 64-bit FPU.
259+
// -mno-frecipe conflicts with -mfrecipe.
260+
if (A->getOption().matches(options::OPT_mfrecipe)) {
261+
if (llvm::find(Features, "-d") != Features.end())
262+
D.Diag(diag::err_drv_loongarch_wrong_fpu_width) << /*FRECIPE*/ 2;
263+
else /*-mfrecipe*/
264+
Features.push_back("+frecipe");
265+
} else /*-mnofrecipe*/
266+
Features.push_back("-frecipe");
267+
}
254268
}
255269

256270
std::string loongarch::postProcessTargetCPUString(const std::string &CPU,

llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,15 @@ def SDT_LoongArchMOVGR2FR_W_LA64
1919
def SDT_LoongArchMOVFR2GR_S_LA64
2020
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
2121
def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
22+
def SDT_LoongArchFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
2223

2324
def loongarch_movgr2fr_w_la64
2425
: SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>;
2526
def loongarch_movfr2gr_s_la64
2627
: SDNode<"LoongArchISD::MOVFR2GR_S_LA64", SDT_LoongArchMOVFR2GR_S_LA64>;
2728
def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
29+
def loongarch_frecipe_s : SDNode<"LoongArchISD::FRECIPE_S", SDT_LoongArchFRECIPE>;
30+
def loongarch_frsqrte_s : SDNode<"LoongArchISD::FRSQRTE_S", SDT_LoongArchFRECIPE>;
2831

2932
//===----------------------------------------------------------------------===//
3033
// Instructions
@@ -286,6 +289,8 @@ let Predicates = [HasFrecipe] in {
286289
// FP approximate reciprocal operation
287290
def : Pat<(int_loongarch_frecipe_s FPR32:$src), (FRECIPE_S FPR32:$src)>;
288291
def : Pat<(int_loongarch_frsqrte_s FPR32:$src), (FRSQRTE_S FPR32:$src)>;
292+
def : Pat<(loongarch_frecipe_s FPR32:$src), (FRECIPE_S FPR32:$src)>;
293+
def : Pat<(loongarch_frsqrte_s FPR32:$src), (FRSQRTE_S FPR32:$src)>;
289294
}
290295

291296
// fmadd.s: fj * fk + fa

llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,13 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
// ===----------------------------------------------------------------------===//
14+
// LoongArch specific DAG Nodes.
15+
// ===----------------------------------------------------------------------===//
16+
17+
def loongarch_frecipe_d : SDNode<"LoongArchISD::FRECIPE_D", SDT_LoongArchFRECIPE>;
18+
def loongarch_frsqrte_d : SDNode<"LoongArchISD::FRSQRTE_D", SDT_LoongArchFRECIPE>;
19+
1320
//===----------------------------------------------------------------------===//
1421
// Instructions
1522
//===----------------------------------------------------------------------===//
@@ -253,6 +260,8 @@ let Predicates = [HasFrecipe] in {
253260
// FP approximate reciprocal operation
254261
def : Pat<(int_loongarch_frecipe_d FPR64:$src), (FRECIPE_D FPR64:$src)>;
255262
def : Pat<(int_loongarch_frsqrte_d FPR64:$src), (FRSQRTE_D FPR64:$src)>;
263+
def : Pat<(loongarch_frecipe_d FPR64:$src), (FRECIPE_D FPR64:$src)>;
264+
def : Pat<(loongarch_frsqrte_d FPR64:$src), (FRSQRTE_D FPR64:$src)>;
256265
}
257266

258267
// fmadd.d: fj * fk + fa

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4697,6 +4697,18 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
46974697
NODE_NAME_CASE(VANY_ZERO)
46984698
NODE_NAME_CASE(VALL_NONZERO)
46994699
NODE_NAME_CASE(VANY_NONZERO)
4700+
NODE_NAME_CASE(FRECIPE_S)
4701+
NODE_NAME_CASE(FRECIPE_D)
4702+
NODE_NAME_CASE(FRSQRTE_S)
4703+
NODE_NAME_CASE(FRSQRTE_D)
4704+
NODE_NAME_CASE(VFRECIPE_S)
4705+
NODE_NAME_CASE(VFRECIPE_D)
4706+
NODE_NAME_CASE(VFRSQRTE_S)
4707+
NODE_NAME_CASE(VFRSQRTE_D)
4708+
NODE_NAME_CASE(XVFRECIPE_S)
4709+
NODE_NAME_CASE(XVFRECIPE_D)
4710+
NODE_NAME_CASE(XVFRSQRTE_S)
4711+
NODE_NAME_CASE(XVFRSQRTE_D)
47004712
}
47014713
#undef NODE_NAME_CASE
47024714
return nullptr;
@@ -5902,6 +5914,92 @@ Register LoongArchTargetLowering::getExceptionSelectorRegister(
59025914
return LoongArch::R5;
59035915
}
59045916

5917+
//===----------------------------------------------------------------------===//
5918+
// Target Optimization Hooks
5919+
//===----------------------------------------------------------------------===//
5920+
5921+
static int getEstimateRefinementSteps(EVT VT,
5922+
const LoongArchSubtarget &Subtarget) {
5923+
// Feature FRECIPE instrucions relative accuracy is 2^-14.
5924+
// IEEE float has 23 digits and double has 52 digits.
5925+
int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
5926+
return RefinementSteps;
5927+
}
5928+
5929+
SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
5930+
SelectionDAG &DAG, int Enabled,
5931+
int &RefinementSteps,
5932+
bool &UseOneConstNR,
5933+
bool Reciprocal) const {
5934+
if (Subtarget.hasFrecipe()) {
5935+
SDLoc DL(Operand);
5936+
EVT VT = Operand.getValueType();
5937+
unsigned Opcode;
5938+
5939+
if (VT == MVT::f32) {
5940+
Opcode = LoongArchISD::FRSQRTE_S;
5941+
} else if (VT == MVT::f64 && Subtarget.hasBasicD()) {
5942+
Opcode = LoongArchISD::FRSQRTE_D;
5943+
} else if (VT == MVT::v4f32 && Subtarget.hasExtLSX()) {
5944+
Opcode = LoongArchISD::VFRSQRTE_S;
5945+
} else if (VT == MVT::v2f64 && Subtarget.hasExtLSX()) {
5946+
Opcode = LoongArchISD::VFRSQRTE_D;
5947+
} else if (VT == MVT::v8f32 && Subtarget.hasExtLASX()) {
5948+
Opcode = LoongArchISD::XVFRSQRTE_S;
5949+
} else if (VT == MVT::v4f64 && Subtarget.hasExtLASX()) {
5950+
Opcode = LoongArchISD::XVFRSQRTE_D;
5951+
} else {
5952+
return SDValue();
5953+
}
5954+
5955+
UseOneConstNR = false;
5956+
if (RefinementSteps == ReciprocalEstimate::Unspecified)
5957+
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
5958+
5959+
SDValue Estimate = DAG.getNode(Opcode, DL, VT, Operand);
5960+
if (Reciprocal) {
5961+
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
5962+
}
5963+
return Estimate;
5964+
}
5965+
5966+
return SDValue();
5967+
}
5968+
5969+
SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
5970+
SelectionDAG &DAG,
5971+
int Enabled,
5972+
int &RefinementSteps) const {
5973+
if (Subtarget.hasFrecipe()) {
5974+
SDLoc DL(Operand);
5975+
EVT VT = Operand.getValueType();
5976+
unsigned Opcode;
5977+
5978+
if (VT == MVT::f32) {
5979+
Opcode = LoongArchISD::FRECIPE_S;
5980+
} else if (VT == MVT::f64 && Subtarget.hasBasicD()) {
5981+
Opcode = LoongArchISD::FRECIPE_D;
5982+
} else if (VT == MVT::v4f32 && Subtarget.hasExtLSX()) {
5983+
Opcode = LoongArchISD::VFRECIPE_S;
5984+
} else if (VT == MVT::v2f64 && Subtarget.hasExtLSX()) {
5985+
Opcode = LoongArchISD::VFRECIPE_D;
5986+
} else if (VT == MVT::v8f32 && Subtarget.hasExtLASX()) {
5987+
Opcode = LoongArchISD::XVFRECIPE_S;
5988+
} else if (VT == MVT::v4f64 && Subtarget.hasExtLASX()) {
5989+
Opcode = LoongArchISD::XVFRECIPE_D;
5990+
} else {
5991+
return SDValue();
5992+
}
5993+
5994+
if (RefinementSteps == ReciprocalEstimate::Unspecified)
5995+
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
5996+
5997+
return DAG.getNode(Opcode, DL, VT, Operand);
5998+
}
5999+
6000+
return SDValue();
6001+
}
6002+
59056003
//===----------------------------------------------------------------------===//
59066004
// LoongArch Inline Assembly Support
59076005
//===----------------------------------------------------------------------===//

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,22 @@ enum NodeType : unsigned {
141141
VALL_NONZERO,
142142
VANY_NONZERO,
143143

144+
// Floating point approximate reciprocal operation
145+
FRECIPE_S,
146+
FRECIPE_D,
147+
FRSQRTE_S,
148+
FRSQRTE_D,
149+
150+
VFRECIPE_S,
151+
VFRECIPE_D,
152+
VFRSQRTE_S,
153+
VFRSQRTE_D,
154+
155+
XVFRECIPE_S,
156+
XVFRECIPE_D,
157+
XVFRSQRTE_S,
158+
XVFRSQRTE_D,
159+
144160
// Intrinsic operations end =============================================
145161
};
146162
} // end namespace LoongArchISD
@@ -216,6 +232,17 @@ class LoongArchTargetLowering : public TargetLowering {
216232
Register
217233
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
218234

235+
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
236+
return true;
237+
}
238+
239+
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
240+
int &RefinementSteps, bool &UseOneConstNR,
241+
bool Reciprocal) const override;
242+
243+
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
244+
int &RefinementSteps) const override;
245+
219246
ISD::NodeType getExtendForAtomicOps() const override {
220247
return ISD::SIGN_EXTEND;
221248
}

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,18 @@
99
// This file describes the Advanced SIMD extension instructions.
1010
//
1111
//===----------------------------------------------------------------------===//
12+
def SDT_LoongArchXVFRECIPE_S : SDTypeProfile<1, 1, [SDTCisVT<0, v8f32>, SDTCisVT<1, v8f32>]>;
13+
def SDT_LoongArchXVFRECIPE_D : SDTypeProfile<1, 1, [SDTCisVT<0, v4f64>, SDTCisVT<1, v4f64>]>;
1214

15+
// Target nodes.
1316
def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;
1417

18+
def loongarch_xvfrecipe_s: SDNode<"LoongArchISD::XVFRECIPE_S", SDT_LoongArchXVFRECIPE_S>;
19+
def loongarch_xvfrecipe_d: SDNode<"LoongArchISD::XVFRECIPE_D", SDT_LoongArchXVFRECIPE_D>;
20+
def loongarch_xvfrsqrte_s: SDNode<"LoongArchISD::XVFRSQRTE_S", SDT_LoongArchXVFRECIPE_S>;
21+
def loongarch_xvfrsqrte_d: SDNode<"LoongArchISD::XVFRSQRTE_D", SDT_LoongArchXVFRECIPE_D>;
22+
23+
1524
def lasxsplati8
1625
: PatFrag<(ops node:$e0),
1726
(v32i8 (build_vector node:$e0, node:$e0, node:$e0, node:$e0,
@@ -2094,6 +2103,15 @@ foreach Inst = ["XVFRECIPE_S", "XVFRSQRTE_S"] in
20942103
foreach Inst = ["XVFRECIPE_D", "XVFRSQRTE_D"] in
20952104
def : Pat<(deriveLASXIntrinsic<Inst>.ret (v4f64 LASX256:$xj)),
20962105
(!cast<LAInst>(Inst) LASX256:$xj)>;
2106+
2107+
def : Pat<(loongarch_xvfrecipe_s v8f32:$src),
2108+
(XVFRECIPE_S v8f32:$src)>;
2109+
def : Pat<(loongarch_xvfrecipe_d v4f64:$src),
2110+
(XVFRECIPE_D v4f64:$src)>;
2111+
def : Pat<(loongarch_xvfrsqrte_s v8f32:$src),
2112+
(XVFRSQRTE_S v8f32:$src)>;
2113+
def : Pat<(loongarch_xvfrsqrte_d v4f64:$src),
2114+
(XVFRSQRTE_D v4f64:$src)>;
20972115
}
20982116

20992117
def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm),

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
2323
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
2424
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
2525
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
26+
def SDT_LoongArchVFRECIPE_S : SDTypeProfile<1, 1, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4f32>]>;
27+
def SDT_LoongArchVFRECIPE_D : SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>, SDTCisVT<1, v2f64>]>;
2628

2729
// Target nodes.
2830
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
@@ -50,6 +52,10 @@ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
5052

5153
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
5254
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
55+
def loongarch_vfrecipe_s: SDNode<"LoongArchISD::VFRECIPE_S", SDT_LoongArchVFRECIPE_S>;
56+
def loongarch_vfrecipe_d: SDNode<"LoongArchISD::VFRECIPE_D", SDT_LoongArchVFRECIPE_D>;
57+
def loongarch_vfrsqrte_s: SDNode<"LoongArchISD::VFRSQRTE_S", SDT_LoongArchVFRECIPE_S>;
58+
def loongarch_vfrsqrte_d: SDNode<"LoongArchISD::VFRSQRTE_D", SDT_LoongArchVFRECIPE_D>;
5359

5460
def immZExt1 : ImmLeaf<i64, [{return isUInt<1>(Imm);}]>;
5561
def immZExt2 : ImmLeaf<i64, [{return isUInt<2>(Imm);}]>;
@@ -2238,6 +2244,15 @@ foreach Inst = ["VFRECIPE_S", "VFRSQRTE_S"] in
22382244
foreach Inst = ["VFRECIPE_D", "VFRSQRTE_D"] in
22392245
def : Pat<(deriveLSXIntrinsic<Inst>.ret (v2f64 LSX128:$vj)),
22402246
(!cast<LAInst>(Inst) LSX128:$vj)>;
2247+
2248+
def : Pat<(loongarch_vfrecipe_s v4f32:$src),
2249+
(VFRECIPE_S v4f32:$src)>;
2250+
def : Pat<(loongarch_vfrecipe_d v2f64:$src),
2251+
(VFRECIPE_D v2f64:$src)>;
2252+
def : Pat<(loongarch_vfrsqrte_s v4f32:$src),
2253+
(VFRSQRTE_S v4f32:$src)>;
2254+
def : Pat<(loongarch_vfrsqrte_d v2f64:$src),
2255+
(VFRSQRTE_D v2f64:$src)>;
22412256
}
22422257

22432258
// load
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch64 --mattr=+d,-frecipe < %s | FileCheck %s --check-prefix=FAULT
3+
; RUN: llc --mtriple=loongarch64 --mattr=+d,+frecipe < %s | FileCheck %s
4+
5+
;; Exercise the 'fdiv' LLVM IR: https://llvm.org/docs/LangRef.html#fdiv-instruction
6+
7+
define float @fdiv_s(float %x, float %y) {
8+
; FAULT-LABEL: fdiv_s:
9+
; FAULT: # %bb.0:
10+
; FAULT-NEXT: fdiv.s $fa0, $fa0, $fa1
11+
; FAULT-NEXT: ret
12+
;
13+
; CHECK-LABEL: fdiv_s:
14+
; CHECK: # %bb.0:
15+
; CHECK-NEXT: frecipe.s $fa2, $fa1
16+
; CHECK-NEXT: fmul.s $fa3, $fa0, $fa2
17+
; CHECK-NEXT: fnmsub.s $fa0, $fa1, $fa3, $fa0
18+
; CHECK-NEXT: fmadd.s $fa0, $fa2, $fa0, $fa3
19+
; CHECK-NEXT: ret
20+
%div = fdiv fast float %x, %y
21+
ret float %div
22+
}
23+
24+
define double @fdiv_d(double %x, double %y) {
25+
; FAULT-LABEL: fdiv_d:
26+
; FAULT: # %bb.0:
27+
; FAULT-NEXT: fdiv.d $fa0, $fa0, $fa1
28+
; FAULT-NEXT: ret
29+
;
30+
; CHECK-LABEL: fdiv_d:
31+
; CHECK: # %bb.0:
32+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
33+
; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI1_0)
34+
; CHECK-NEXT: frecipe.d $fa3, $fa1
35+
; CHECK-NEXT: fmadd.d $fa2, $fa1, $fa3, $fa2
36+
; CHECK-NEXT: fnmsub.d $fa2, $fa2, $fa3, $fa3
37+
; CHECK-NEXT: fmul.d $fa3, $fa0, $fa2
38+
; CHECK-NEXT: fnmsub.d $fa0, $fa1, $fa3, $fa0
39+
; CHECK-NEXT: fmadd.d $fa0, $fa2, $fa0, $fa3
40+
; CHECK-NEXT: ret
41+
%div = fdiv fast double %x, %y
42+
ret double %div
43+
}

0 commit comments

Comments
 (0)