Skip to content

Commit e846148

Browse files
[LLVM][NVPTX] Add support for div.full instruction (#116482)
This commit adds NVPTX support for div.full PTX instruction with test under div.ll. [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-div)
1 parent 3359806 commit e846148

File tree

3 files changed

+45
-0
lines changed

3 files changed

+45
-0
lines changed

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,13 @@ let TargetPrefix = "nvvm" in {
820820
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
821821
[IntrNoMem]>;
822822

823+
def int_nvvm_div_full : ClangBuiltin<"__nvvm_div_full">,
824+
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
825+
[IntrNoMem]>;
826+
def int_nvvm_div_full_ftz : ClangBuiltin<"__nvvm_div_full_ftz">,
827+
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
828+
[IntrNoMem]>;
829+
823830
//
824831
// Sad
825832
//

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,6 +1096,18 @@ def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
10961096
def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
10971097
Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
10981098

1099+
def : Pat<(int_nvvm_div_full Float32Regs:$a, Float32Regs:$b),
1100+
(FDIV32rr Float32Regs:$a, Float32Regs:$b)>;
1101+
1102+
def : Pat<(int_nvvm_div_full Float32Regs:$a, fpimm:$b),
1103+
(FDIV32ri Float32Regs:$a, f32imm:$b)>;
1104+
1105+
def : Pat<(int_nvvm_div_full_ftz Float32Regs:$a, Float32Regs:$b),
1106+
(FDIV32rr_ftz Float32Regs:$a, Float32Regs:$b)>;
1107+
1108+
def : Pat<(int_nvvm_div_full_ftz Float32Regs:$a, fpimm:$b),
1109+
(FDIV32ri_ftz Float32Regs:$a, f32imm:$b)>;
1110+
10991111
//
11001112
// Sad
11011113
//

llvm/test/CodeGen/NVPTX/div.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx64 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
4+
5+
define float @div_full(float %a, float %b) {
6+
; CHECK-LABEL: div_full(
7+
; CHECK: {
8+
; CHECK-NEXT: .reg .f32 %f<9>;
9+
; CHECK-EMPTY:
10+
; CHECK-NEXT: // %bb.0:
11+
; CHECK-NEXT: ld.param.f32 %f1, [div_full_param_0];
12+
; CHECK-NEXT: ld.param.f32 %f2, [div_full_param_1];
13+
; CHECK-NEXT: div.full.f32 %f3, %f1, %f2;
14+
; CHECK-NEXT: mov.f32 %f4, 0f40400000;
15+
; CHECK-NEXT: div.full.f32 %f5, %f3, %f4;
16+
; CHECK-NEXT: div.full.ftz.f32 %f6, %f5, %f2;
17+
; CHECK-NEXT: mov.f32 %f7, 0f40800000;
18+
; CHECK-NEXT: div.full.ftz.f32 %f8, %f6, %f7;
19+
; CHECK-NEXT: st.param.f32 [func_retval0], %f8;
20+
; CHECK-NEXT: ret;
21+
%1 = call float @llvm.nvvm.div.full(float %a, float %b)
22+
%2 = call float @llvm.nvvm.div.full(float %1, float 3.0)
23+
%3 = call float @llvm.nvvm.div.full.ftz(float %2, float %b)
24+
%4 = call float @llvm.nvvm.div.full.ftz(float %3, float 4.0)
25+
ret float %4
26+
}

0 commit comments

Comments
 (0)