Skip to content

Commit 92be2cb

Browse files
authored
[LoongArch] Use LSX for scalar FP rounding with explicit rounding mode (llvm#114766)
LoongArch FP base ISA only have frint.{s/d} instruction which reads the global rounding mode. Utilize LSX for explicit rounding mode for scalar ceil/floor/trunc/roundeven calls when -mlsx opend. It is faster than calling the libm library functions. Same as what gcc did: https://gcc.gnu.org/pipermail/gcc-cvs/2023-November/394218.html
1 parent 435e584 commit 92be2cb

File tree

3 files changed

+70
-8
lines changed

3 files changed

+70
-8
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
286286
VT, Expand);
287287
}
288288
setOperationAction(ISD::CTPOP, GRLenVT, Legal);
289+
setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
290+
setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
291+
setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
292+
setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
289293
}
290294

291295
// Set operations for 'LASX' feature.

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2259,6 +2259,32 @@ def : Pat<(loongarch_vfrsqrte v2f64:$src),
22592259
(VFRSQRTE_D v2f64:$src)>;
22602260
}
22612261

2262+
// Vector floating-point conversion
2263+
def : Pat<(f32 (fceil FPR32:$fj)),
2264+
(f32 (EXTRACT_SUBREG (VFRINTRP_S (VREPLVEI_W
2265+
(SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)), sub_32))>;
2266+
def : Pat<(f64 (fceil FPR64:$fj)),
2267+
(f64 (EXTRACT_SUBREG (VFRINTRP_D (VREPLVEI_D
2268+
(SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;
2269+
def : Pat<(f32 (ffloor FPR32:$fj)),
2270+
(f32 (EXTRACT_SUBREG (VFRINTRM_S (VREPLVEI_W
2271+
(SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)), sub_32))>;
2272+
def : Pat<(f64 (ffloor FPR64:$fj)),
2273+
(f64 (EXTRACT_SUBREG (VFRINTRM_D (VREPLVEI_D
2274+
(SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;
2275+
def : Pat<(f32 (ftrunc FPR32:$fj)),
2276+
(f32 (EXTRACT_SUBREG (VFRINTRZ_S (VREPLVEI_W
2277+
(SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)), sub_32))>;
2278+
def : Pat<(f64 (ftrunc FPR64:$fj)),
2279+
(f64 (EXTRACT_SUBREG (VFRINTRZ_D (VREPLVEI_D
2280+
(SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;
2281+
def : Pat<(f32 (froundeven FPR32:$fj)),
2282+
(f32 (EXTRACT_SUBREG (VFRINTRNE_S (VREPLVEI_W
2283+
(SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)), sub_32))>;
2284+
def : Pat<(f64 (froundeven FPR64:$fj)),
2285+
(f64 (EXTRACT_SUBREG (VFRINTRNE_D (VREPLVEI_D
2286+
(SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;
2287+
22622288
// load
22632289
def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm),
22642290
(VLD GPR:$rj, (to_valid_timm timm:$imm))>;

llvm/test/CodeGen/LoongArch/fp-rounding.ll

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,11 @@ define float @ceil_f32(float %i) nounwind {
1010
;
1111
; LSX-LABEL: ceil_f32:
1212
; LSX: # %bb.0: # %entry
13-
; LSX-NEXT: b %plt(ceilf)
13+
; LSX-NEXT: # kill: def $f0 killed $f0 def $vr0
14+
; LSX-NEXT: vreplvei.w $vr0, $vr0, 0
15+
; LSX-NEXT: vfrintrp.s $vr0, $vr0
16+
; LSX-NEXT: # kill: def $f0 killed $f0 killed $vr0
17+
; LSX-NEXT: ret
1418
entry:
1519
%0 = call float @llvm.ceil.f32(float %i)
1620
ret float %0
@@ -24,7 +28,11 @@ define double @ceil_f64(double %i) nounwind {
2428
;
2529
; LSX-LABEL: ceil_f64:
2630
; LSX: # %bb.0: # %entry
27-
; LSX-NEXT: b %plt(ceil)
31+
; LSX-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
32+
; LSX-NEXT: vreplvei.d $vr0, $vr0, 0
33+
; LSX-NEXT: vfrintrp.d $vr0, $vr0
34+
; LSX-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
35+
; LSX-NEXT: ret
2836
entry:
2937
%0 = call double @llvm.ceil.f64(double %i)
3038
ret double %0
@@ -38,7 +46,11 @@ define float @floor_f32(float %i) nounwind {
3846
;
3947
; LSX-LABEL: floor_f32:
4048
; LSX: # %bb.0: # %entry
41-
; LSX-NEXT: b %plt(floorf)
49+
; LSX-NEXT: # kill: def $f0 killed $f0 def $vr0
50+
; LSX-NEXT: vreplvei.w $vr0, $vr0, 0
51+
; LSX-NEXT: vfrintrm.s $vr0, $vr0
52+
; LSX-NEXT: # kill: def $f0 killed $f0 killed $vr0
53+
; LSX-NEXT: ret
4254
entry:
4355
%0 = call float @llvm.floor.f32(float %i)
4456
ret float %0
@@ -52,7 +64,11 @@ define double @floor_f64(double %i) nounwind {
5264
;
5365
; LSX-LABEL: floor_f64:
5466
; LSX: # %bb.0: # %entry
55-
; LSX-NEXT: b %plt(floor)
67+
; LSX-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
68+
; LSX-NEXT: vreplvei.d $vr0, $vr0, 0
69+
; LSX-NEXT: vfrintrm.d $vr0, $vr0
70+
; LSX-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
71+
; LSX-NEXT: ret
5672
entry:
5773
%0 = call double @llvm.floor.f64(double %i)
5874
ret double %0
@@ -66,7 +82,11 @@ define float @trunc_f32(float %i) nounwind {
6682
;
6783
; LSX-LABEL: trunc_f32:
6884
; LSX: # %bb.0: # %entry
69-
; LSX-NEXT: b %plt(truncf)
85+
; LSX-NEXT: # kill: def $f0 killed $f0 def $vr0
86+
; LSX-NEXT: vreplvei.w $vr0, $vr0, 0
87+
; LSX-NEXT: vfrintrz.s $vr0, $vr0
88+
; LSX-NEXT: # kill: def $f0 killed $f0 killed $vr0
89+
; LSX-NEXT: ret
7090
entry:
7191
%0 = call float @llvm.trunc.f32(float %i)
7292
ret float %0
@@ -80,7 +100,11 @@ define double @trunc_f64(double %i) nounwind {
80100
;
81101
; LSX-LABEL: trunc_f64:
82102
; LSX: # %bb.0: # %entry
83-
; LSX-NEXT: b %plt(trunc)
103+
; LSX-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
104+
; LSX-NEXT: vreplvei.d $vr0, $vr0, 0
105+
; LSX-NEXT: vfrintrz.d $vr0, $vr0
106+
; LSX-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
107+
; LSX-NEXT: ret
84108
entry:
85109
%0 = call double @llvm.trunc.f64(double %i)
86110
ret double %0
@@ -94,7 +118,11 @@ define float @roundeven_f32(float %i) nounwind {
94118
;
95119
; LSX-LABEL: roundeven_f32:
96120
; LSX: # %bb.0: # %entry
97-
; LSX-NEXT: b %plt(roundevenf)
121+
; LSX-NEXT: # kill: def $f0 killed $f0 def $vr0
122+
; LSX-NEXT: vreplvei.w $vr0, $vr0, 0
123+
; LSX-NEXT: vfrintrne.s $vr0, $vr0
124+
; LSX-NEXT: # kill: def $f0 killed $f0 killed $vr0
125+
; LSX-NEXT: ret
98126
entry:
99127
%0 = call float @llvm.roundeven.f32(float %i)
100128
ret float %0
@@ -108,7 +136,11 @@ define double @roundeven_f64(double %i) nounwind {
108136
;
109137
; LSX-LABEL: roundeven_f64:
110138
; LSX: # %bb.0: # %entry
111-
; LSX-NEXT: b %plt(roundeven)
139+
; LSX-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
140+
; LSX-NEXT: vreplvei.d $vr0, $vr0, 0
141+
; LSX-NEXT: vfrintrne.d $vr0, $vr0
142+
; LSX-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
143+
; LSX-NEXT: ret
112144
entry:
113145
%0 = call double @llvm.roundeven.f64(double %i)
114146
ret double %0

0 commit comments

Comments
 (0)