Skip to content

Commit 2f071f5

Browse files
committed
[NVPTX] Lower -1/x to neg.f64(recp.rn.f64) instead of fdiv
The NVPTX backend lowers 1/x to rcp.rn.f64 instruction instead of slower fdiv instruction. However, in the case of -1/x, it uses the slower fdiv instruction. After this change, -1/x will be lowered into neg.f64 (rcp.rn.f64).
1 parent 0fa20c5 commit 2f071f5

File tree

2 files changed

+51
-0
lines changed

2 files changed

+51
-0
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,6 +1150,18 @@ def DoubleConst1 : PatLeaf<(fpimm), [{
11501150
return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() &&
11511151
N->getValueAPF().convertToDouble() == 1.0;
11521152
}]>;
1153+
// Constant -1.0 (double)
1154+
def DoubleConstNeg1 : PatLeaf<(fpimm), [{
1155+
return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() &&
1156+
N->getValueAPF().convertToDouble() == -1.0;
1157+
}]>;
1158+
1159+
1160+
// Constant -X -> X (double)
1161+
def NegDoubleConst : SDNodeXForm<fpimm, [{
1162+
return CurDAG->getTargetConstantFP(-(N->getValueAPF()),
1163+
SDLoc(N), MVT::f64);
1164+
}]>;
11531165

11541166
// Loads FP16 constant into a register.
11551167
//
@@ -1225,6 +1237,11 @@ def FDIV64ri :
12251237
"div.rn.f64 \t$dst, $a, $b;",
12261238
[(set Float64Regs:$dst, (fdiv Float64Regs:$a, fpimm:$b))]>;
12271239

1240+
// fdiv will be converted to rcp
1241+
// fneg (fdiv 1.0, X) => fneg (rcp.rn X)
1242+
def : Pat<(fdiv DoubleConstNeg1:$a, Float64Regs:$b),
1243+
(FNEGf64 (FDIV641r (NegDoubleConst node:$a), Float64Regs:$b))>;
1244+
12281245
//
12291246
// F32 Approximate reciprocal
12301247
//

llvm/test/CodeGen/NVPTX/rcp-opt.ll

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; RUN: llc < %s -march=nvptx64 | FileCheck %s
2+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
3+
4+
;; Check if fneg (fdiv 1, X) lowers to fneg (rcp.rn X).
5+
6+
; CHECK-LABEL: .func{{.*}}test1
7+
define double @test1(double %in) {
8+
; CHECK: rcp.rn.f64 [[RCP:%.*]], [[X:%.*]];
9+
; CHECK-NEXT: neg.f64 [[FNEG:%.*]], [[RCP]];
10+
%div = fdiv double 1.000000e+00, %in
11+
%neg = fsub double -0.000000e+00, %div
12+
ret double %neg
13+
}
14+
15+
;; Check if fdiv -1, X lowers to fneg (rcp.rn X).
16+
17+
; CHECK-LABEL: .func{{.*}}test2
18+
define double @test2(double %in) {
19+
; CHECK: rcp.rn.f64 [[RCP:%.*]], [[X:%.*]];
20+
; CHECK-NEXT: neg.f64 [[FNEG:%.*]], [[RCP]];
21+
%div = fdiv double -1.000000e+00, %in
22+
ret double %div
23+
}
24+
25+
;; Check if fdiv 1, (fneg X) lowers to fneg (rcp.rn X).
26+
27+
; CHECK-LABEL: .func{{.*}}test3
28+
define double @test3(double %in) {
29+
; CHECK: rcp.rn.f64 [[RCP:%.*]], [[X:%.*]];
30+
; CHECK-NEXT: neg.f64 [[FNEG:%.*]], [[RCP]];
31+
%neg = fsub double -0.000000e+00, %in
32+
%div = fdiv double 1.000000e+00, %neg
33+
ret double %div
34+
}

0 commit comments

Comments
 (0)