Skip to content

Commit 3c70731

Browse files
authored
[NVPTX] Add clang builtin for __nvvm_reflect intrinsic (#81277)
Summary: Some recent support made usage of `__nvvm_reflect` more consistent. We should expose it as a builtin rather than forcing users to externally define the function.
1 parent 5f26b90 commit 3c70731

File tree

6 files changed

+43
-5
lines changed

6 files changed

+43
-5
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
159159

160160
BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
161161
BUILTIN(__nvvm_exit, "v", "r")
162+
BUILTIN(__nvvm_reflect, "UicC*", "r")
162163
TARGET_BUILTIN(__nvvm_nanosleep, "vUi", "n", AND(SM_70, PTX63))
163164

164165
// Min Max

clang/test/CodeGen/builtins-nvptx.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@ __device__ int read_tid() {
4444

4545
}
4646

47+
__device__ bool reflect() {
48+
49+
// CHECK: call i32 @llvm.nvvm.reflect(ptr {{.*}})
50+
51+
unsigned x = __nvvm_reflect("__CUDA_ARCH");
52+
return x >= 700;
53+
}
54+
4755
__device__ int read_ntid() {
4856

4957
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()

clang/test/CodeGenOpenCL/reflect.cl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
2+
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s
3+
4+
// CHECK-LABEL: define dso_local zeroext i1 @device_function(
5+
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
6+
// CHECK-NEXT: entry:
7+
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.reflect(ptr addrspacecast (ptr addrspace(4) @.str to ptr))
8+
// CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[TMP0]], 700
9+
// CHECK-NEXT: ret i1 [[CMP]]
10+
//
11+
bool device_function() {
12+
return __nvvm_reflect("__CUDA_ARCH") >= 700;
13+
}
14+
15+
// CHECK-LABEL: define dso_local spir_kernel void @kernel_function(
16+
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
17+
// CHECK-NEXT: entry:
18+
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
19+
// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
20+
// CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @device_function() #[[ATTR3:[0-9]+]]
21+
// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32
22+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR]], align 4
23+
// CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP0]], align 4
24+
// CHECK-NEXT: ret void
25+
//
26+
__kernel void kernel_function(__global int *i) {
27+
*i = device_function();
28+
}

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1625,7 +1625,8 @@ def int_nvvm_compiler_warn :
16251625
Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
16261626

16271627
def int_nvvm_reflect :
1628-
Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.reflect">;
1628+
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem], "llvm.nvvm.reflect">,
1629+
ClangBuiltin<"__nvvm_reflect">;
16291630

16301631
// isspacep.{const, global, local, shared}
16311632
def int_nvvm_isspacep_const

llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ exit:
4141
ret float %ret
4242
}
4343

44-
declare i32 @llvm.nvvm.reflect.p0(ptr)
44+
declare i32 @llvm.nvvm.reflect(ptr)
4545

4646
; CHECK-LABEL: define noundef i32 @intrinsic
4747
define i32 @intrinsic() {
4848
; CHECK-NOT: call i32 @llvm.nvvm.reflect
4949
; USE_FTZ_0: ret i32 0
5050
; USE_FTZ_1: ret i32 1
5151
%ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str)
52-
%reflect = tail call i32 @llvm.nvvm.reflect.p0(ptr %ptr)
52+
%reflect = tail call i32 @llvm.nvvm.reflect(ptr %ptr)
5353
ret i32 %reflect
5454
}
5555

llvm/test/CodeGen/NVPTX/nvvm-reflect.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ exit:
4141
ret float %ret
4242
}
4343

44-
declare i32 @llvm.nvvm.reflect.p0(ptr)
44+
declare i32 @llvm.nvvm.reflect(ptr)
4545

4646
; CHECK-LABEL: define noundef i32 @intrinsic
4747
define i32 @intrinsic() {
4848
; CHECK-NOT: call i32 @llvm.nvvm.reflect
4949
; USE_FTZ_0: ret i32 0
5050
; USE_FTZ_1: ret i32 1
5151
%ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str)
52-
%reflect = tail call i32 @llvm.nvvm.reflect.p0(ptr %ptr)
52+
%reflect = tail call i32 @llvm.nvvm.reflect(ptr %ptr)
5353
ret i32 %reflect
5454
}
5555

0 commit comments

Comments
 (0)