Skip to content

Commit 8e3b135

Browse files
[LLVM][NVPTX] Add support for div.full instruction
This commit adds NVPTX support for div.full PTX instruction with test under div.ll
1 parent 1e31a45 commit 8e3b135

File tree

3 files changed

+35
-0
lines changed

3 files changed

+35
-0
lines changed

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,13 @@ let TargetPrefix = "nvvm" in {
820820
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
821821
[IntrNoMem]>;
822822

823+
def int_nvvm_div_full : ClangBuiltin<"__nvvm_div_full">,
824+
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
825+
[IntrNoMem]>;
826+
def int_nvvm_div_full_ftz : ClangBuiltin<"__nvvm_div_full_ftz">,
827+
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
828+
[IntrNoMem]>;
829+
823830
//
824831
// Sad
825832
//

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,6 +1096,18 @@ def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
10961096
def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
10971097
Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
10981098

1099+
def : Pat<(int_nvvm_div_full Float32Regs:$a, Float32Regs:$b),
1100+
(FDIV32rr Float32Regs:$a, Float32Regs:$b)>;
1101+
1102+
def : Pat<(int_nvvm_div_full Float32Regs:$a, fpimm:$b),
1103+
(FDIV32ri Float32Regs:$a, f32imm:$b)>;
1104+
1105+
def : Pat<(int_nvvm_div_full_ftz Float32Regs:$a, Float32Regs:$b),
1106+
(FDIV32rr_ftz Float32Regs:$a, Float32Regs:$b)>;
1107+
1108+
def : Pat<(int_nvvm_div_full_ftz Float32Regs:$a, fpimm:$b),
1109+
(FDIV32ri_ftz Float32Regs:$a, f32imm:$b)>;
1110+
10991111
//
11001112
// Sad
11011113
//

llvm/test/CodeGen/NVPTX/div.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
; RUN: llc < %s -march=nvptx64 | FileCheck %s
2+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
3+
4+
define float @div_full(float %a, float %b) {
5+
; CHECK: div.full.f32 {{%f[0-9]+}}, {{%f[0-9]+}}, {{%f[0-9]+}}
6+
%1 = call float @llvm.nvvm.div.full(float %a, float %b)
7+
; CHECK: mov.f32 {{%f[0-9]+}}, 0f40400000
8+
; CHECK: div.full.f32 {{%f[0-9]+}}, {{%f[0-9]+}}, {{%f[0-9]+}}
9+
%2 = call float @llvm.nvvm.div.full(float %1, float 3.0)
10+
; CHECK: div.full.ftz.f32 {{%f[0-9]+}}, {{%f[0-9]+}}, {{%f[0-9]+}}
11+
%3 = call float @llvm.nvvm.div.full.ftz(float %2, float %b)
12+
; CHECK: mov.f32 {{%f[0-9]+}}, 0f40800000
13+
; CHECK: div.full.ftz.f32 {{%f[0-9]+}}, {{%f[0-9]+}}, {{%f[0-9]+}}
14+
%4 = call float @llvm.nvvm.div.full.ftz(float %3, float 4.0)
15+
ret float %4
16+
}

0 commit comments

Comments
 (0)