Skip to content

Commit 8d5c50f

Browse files
committed
[NVPTX] Switch front-ends and tests to ptx_kernel cc
1 parent 94c0db0 commit 8d5c50f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+305
-477
lines changed

clang/lib/CodeGen/Targets/NVPTX.cpp

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "ABIInfoImpl.h"
1010
#include "TargetInfo.h"
1111
#include "llvm/ADT/STLExtras.h"
12+
#include "llvm/IR/CallingConv.h"
1213
#include "llvm/IR/IntrinsicsNVPTX.h"
1314

1415
using namespace clang;
@@ -79,13 +80,11 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
7980
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
8081
// resulting MDNode to the nvvm.annotations MDNode.
8182
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
82-
int Operand,
83-
const SmallVectorImpl<int> &GridConstantArgs);
83+
int Operand);
8484

85-
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
86-
int Operand) {
87-
addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
88-
}
85+
static void
86+
addGridConstantNVVMMetadata(llvm::GlobalValue *GV,
87+
const SmallVectorImpl<int> &GridConstantArgs);
8988

9089
private:
9190
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
@@ -259,7 +258,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
259258
if (FD->hasAttr<OpenCLKernelAttr>()) {
260259
// OpenCL __kernel functions get kernel metadata
261260
// Create !{<func-ref>, metadata !"kernel", i32 1} node
262-
addNVVMMetadata(F, "kernel", 1);
261+
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
263262
// And kernel functions are not subject to inlining
264263
F->addFnAttr(llvm::Attribute::NoInline);
265264
}
@@ -277,21 +276,21 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
277276
// For some reason arg indices are 1-based in NVVM
278277
GCI.push_back(IV.index() + 1);
279278
// Create !{<func-ref>, metadata !"kernel", i32 1} node
280-
addNVVMMetadata(F, "kernel", 1, GCI);
279+
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
280+
addGridConstantNVVMMetadata(F, GCI);
281281
}
282282
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
283283
M.handleCUDALaunchBoundsAttr(F, Attr);
284284
}
285285

286286
// Attach kernel metadata directly if compiling for NVPTX.
287287
if (FD->hasAttr<NVPTXKernelAttr>()) {
288-
addNVVMMetadata(F, "kernel", 1);
288+
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
289289
}
290290
}
291291

292-
void NVPTXTargetCodeGenInfo::addNVVMMetadata(
293-
llvm::GlobalValue *GV, StringRef Name, int Operand,
294-
const SmallVectorImpl<int> &GridConstantArgs) {
292+
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
293+
StringRef Name, int Operand) {
295294
llvm::Module *M = GV->getParent();
296295
llvm::LLVMContext &Ctx = M->getContext();
297296

@@ -302,6 +301,21 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(
302301
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
303302
llvm::ConstantAsMetadata::get(
304303
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
304+
305+
// Append metadata to nvvm.annotations
306+
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
307+
}
308+
309+
void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata(
310+
llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) {
311+
312+
llvm::Module *M = GV->getParent();
313+
llvm::LLVMContext &Ctx = M->getContext();
314+
315+
// Get "nvvm.annotations" metadata node
316+
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
317+
318+
SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)};
305319
if (!GridConstantArgs.empty()) {
306320
SmallVector<llvm::Metadata *, 10> GCM;
307321
for (int I : GridConstantArgs)
@@ -310,6 +324,7 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(
310324
MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
311325
llvm::MDNode::get(Ctx, GCM)});
312326
}
327+
313328
// Append metadata to nvvm.annotations
314329
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
315330
}

clang/test/CodeGen/nvptx_attributes.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,14 @@
1010
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
1111
// CHECK-NEXT: store i32 1, ptr [[TMP0]], align 4
1212
// CHECK-NEXT: ret void
13+
//
1314
__attribute__((nvptx_kernel)) void foo(int *ret) {
1415
*ret = 1;
1516
}
1617

17-
// CHECK: !0 = !{ptr @foo, !"kernel", i32 1}
18+
//.
19+
// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx32,+sm_61" }
20+
//.
21+
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
22+
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
23+
//.

clang/test/CodeGenCUDA/device-fun-linkage.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ template __device__ void func<int>();
1717
// RDC: define weak_odr void @_Z4funcIiEvv()
1818

1919
template __global__ void kernel<int>();
20-
// NORDC: define void @_Z6kernelIiEvv()
21-
// RDC: define weak_odr void @_Z6kernelIiEvv()
20+
// NORDC: define ptx_kernel void @_Z6kernelIiEvv()
21+
// RDC: define weak_odr ptx_kernel void @_Z6kernelIiEvv()
2222

2323
// Ensure that unused static device function is eliminated
2424
static __device__ void static_func() {}
@@ -28,5 +28,5 @@ static __device__ void static_func() {}
2828
// Ensure that kernel function has external or weak_odr
2929
// linkage regardless static specifier
3030
static __global__ void static_kernel() {}
31-
// NORDC: define void @_ZL13static_kernelv()
32-
// RDC: define weak_odr void @_ZL13static_kernelv[[FILEID:.*]]()
31+
// NORDC: define ptx_kernel void @_ZL13static_kernelv()
32+
// RDC: define weak_odr ptx_kernel void @_ZL13static_kernelv[[FILEID:.*]]()

clang/test/CodeGenCUDA/grid-constant.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ void foo() {
2121
}
2222
//.
2323
//.
24-
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]}
24+
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"grid_constant", [[META1:![0-9]+]]}
2525
// CHECK: [[META1]] = !{i32 1, i32 3}
26-
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]}
26+
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"grid_constant", [[META3:![0-9]+]]}
2727
// CHECK: [[META3]] = !{i32 1}
28-
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]}
29-
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]}
28+
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"grid_constant", [[META3]]}
29+
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"grid_constant", [[META6:![0-9]+]]}
3030
// CHECK: [[META6]] = !{i32 2}
3131
//.

clang/test/CodeGenCUDA/offload_via_llvm.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#define __OFFLOAD_VIA_LLVM__ 1
88
#include "Inputs/cuda.h"
99

10-
// HST-LABEL: define dso_local void @_Z18__device_stub__fooisPvS_(
10+
// HST-LABEL: define dso_local ptx_kernel void @_Z18__device_stub__fooisPvS_(
1111
// HST-SAME: i32 noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], ptr noundef [[TMP2:%.*]], ptr noundef [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] {
1212
// HST-NEXT: [[ENTRY:.*:]]
1313
// HST-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
@@ -50,7 +50,7 @@
5050
// HST: [[SETUP_END]]:
5151
// HST-NEXT: ret void
5252
//
53-
// DEV-LABEL: define dso_local void @_Z3fooisPvS_(
53+
// DEV-LABEL: define dso_local ptx_kernel void @_Z3fooisPvS_(
5454
// DEV-SAME: i32 noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], ptr noundef [[TMP2:%.*]], ptr noundef [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] {
5555
// DEV-NEXT: [[ENTRY:.*:]]
5656
// DEV-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4

clang/test/CodeGenCUDA/ptx-kernels.cu

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
extern "C"
1111
__device__ void device_function() {}
1212

13-
// CHECK-LABEL: define{{.*}} void @global_function
13+
// CHECK-LABEL: define{{.*}} ptx_kernel void @global_function
1414
extern "C"
1515
__global__ void global_function() {
1616
// CHECK: call void @device_function
@@ -19,7 +19,7 @@ __global__ void global_function() {
1919

2020
// Make sure host-instantiated kernels are preserved on device side.
2121
template <typename T> __global__ void templated_kernel(T param) {}
22-
// CHECK-DAG: define{{.*}} void @_Z16templated_kernelIiEvT_(
22+
// CHECK-DAG: define{{.*}} ptx_kernel void @_Z16templated_kernelIiEvT_(
2323

2424
namespace {
2525
__global__ void anonymous_ns_kernel() {}
@@ -30,6 +30,3 @@ void host_function() {
3030
templated_kernel<<<0, 0>>>(0);
3131
anonymous_ns_kernel<<<0,0>>>();
3232
}
33-
34-
// CHECK: !{{[0-9]+}} = !{ptr @global_function, !"kernel", i32 1}
35-
// CHECK: !{{[0-9]+}} = !{ptr @_Z16templated_kernelIiEvT_, !"kernel", i32 1}

clang/test/CodeGenCUDA/usual-deallocators.cu

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ __host__ __device__ void tests_hd(void *t) {
109109
}
110110

111111
// Make sure that we've generated the kernel used by A::~A.
112-
// DEVICE-LABEL: define void @_Z1fIiEvT_
112+
// DEVICE-LABEL: define ptx_kernel void @_Z1fIiEvT_
113113

114114
// Make sure we've picked deallocator for the correct side of compilation.
115115

@@ -147,5 +147,3 @@ __host__ __device__ void tests_hd(void *t) {
147147
// COMMON-LABEL: define linkonce_odr void @_ZN8H1H2D1D2dlEPv(ptr noundef %0)
148148
// DEVICE: call void @dev_fn()
149149
// HOST: call void @host_fn()
150-
151-
// DEVICE: !0 = !{ptr @_Z1fIiEvT_, !"kernel", i32 1}

clang/test/CodeGenOpenCL/ptx-calls.cl

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,5 @@ void device_function() {
77
__kernel void kernel_function() {
88
device_function();
99
}
10-
// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function()
10+
// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function()
1111
// CHECK: call void @device_function()
12-
// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1}
13-

clang/test/CodeGenOpenCL/ptx-kernels.cl

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,4 @@ void device_function() {
66

77
__kernel void kernel_function() {
88
}
9-
// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function()
10-
11-
// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1}
9+
// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function()

clang/test/CodeGenOpenCL/reflect.cl

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ bool device_function() {
1212
return __nvvm_reflect("__CUDA_ARCH") >= 700;
1313
}
1414

15-
// CHECK-LABEL: define dso_local spir_kernel void @kernel_function(
16-
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
15+
// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function(
16+
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
1717
// CHECK-NEXT: entry:
1818
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
1919
// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
@@ -26,3 +26,9 @@ bool device_function() {
2626
__kernel void kernel_function(__global int *i) {
2727
*i = device_function();
2828
}
29+
//.
30+
// CHECK: [[META3]] = !{i32 1}
31+
// CHECK: [[META4]] = !{!"none"}
32+
// CHECK: [[META5]] = !{!"int*"}
33+
// CHECK: [[META6]] = !{!""}
34+
//.

clang/test/Headers/gpuintrin.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
// AMDGPU-NEXT: call void @__gpu_exit() #[[ATTR8:[0-9]+]]
4545
// AMDGPU-NEXT: unreachable
4646
//
47-
// NVPTX-LABEL: define protected void @foo(
47+
// NVPTX-LABEL: define protected ptx_kernel void @foo(
4848
// NVPTX-SAME: ) #[[ATTR0:[0-9]+]] {
4949
// NVPTX-NEXT: [[ENTRY:.*:]]
5050
// NVPTX-NEXT: [[CALL:%.*]] = call i32 @__gpu_num_blocks_x() #[[ATTR6:[0-9]+]]

llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "MCTargetDesc/NVPTXBaseInfo.h"
1515
#include "NVPTX.h"
1616
#include "llvm/ADT/StringExtras.h"
17+
#include "llvm/IR/CallingConv.h"
1718
#include "llvm/IR/Constants.h"
1819
#include "llvm/IR/Function.h"
1920
#include "llvm/IR/GlobalVariable.h"
@@ -49,39 +50,34 @@ static std::string getHash(StringRef Str) {
4950
return llvm::utohexstr(Hash.low(), /*LowerCase=*/true);
5051
}
5152

52-
static void addKernelMetadata(Module &M, GlobalValue *GV) {
53+
static void addKernelMetadata(Module &M, Function *F) {
5354
llvm::LLVMContext &Ctx = M.getContext();
5455

5556
// Get "nvvm.annotations" metadata node.
5657
llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
5758

58-
llvm::Metadata *KernelMDVals[] = {
59-
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "kernel"),
60-
llvm::ConstantAsMetadata::get(
61-
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
62-
6359
// This kernel is only to be called single-threaded.
6460
llvm::Metadata *ThreadXMDVals[] = {
65-
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidx"),
61+
llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidx"),
6662
llvm::ConstantAsMetadata::get(
6763
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
6864
llvm::Metadata *ThreadYMDVals[] = {
69-
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidy"),
65+
llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidy"),
7066
llvm::ConstantAsMetadata::get(
7167
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
7268
llvm::Metadata *ThreadZMDVals[] = {
73-
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidz"),
69+
llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidz"),
7470
llvm::ConstantAsMetadata::get(
7571
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
7672

7773
llvm::Metadata *BlockMDVals[] = {
78-
llvm::ConstantAsMetadata::get(GV),
74+
llvm::ConstantAsMetadata::get(F),
7975
llvm::MDString::get(Ctx, "maxclusterrank"),
8076
llvm::ConstantAsMetadata::get(
8177
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
8278

8379
// Append metadata to nvvm.annotations.
84-
MD->addOperand(llvm::MDNode::get(Ctx, KernelMDVals));
80+
F->setCallingConv(CallingConv::PTX_Kernel);
8581
MD->addOperand(llvm::MDNode::get(Ctx, ThreadXMDVals));
8682
MD->addOperand(llvm::MDNode::get(Ctx, ThreadYMDVals));
8783
MD->addOperand(llvm::MDNode::get(Ctx, ThreadZMDVals));

llvm/lib/Target/NVPTX/NVPTXUtilities.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,11 +311,13 @@ std::optional<unsigned> getMaxNReg(const Function &F) {
311311
}
312312

313313
bool isKernelFunction(const Function &F) {
314+
if (F.getCallingConv() == CallingConv::PTX_Kernel)
315+
return true;
316+
314317
if (const auto X = findOneNVVMAnnotation(&F, "kernel"))
315318
return (*X == 1);
316319

317-
// There is no NVVM metadata, check the calling convention
318-
return F.getCallingConv() == CallingConv::PTX_Kernel;
320+
return false;
319321
}
320322

321323
MaybeAlign getAlign(const Function &F, unsigned Index) {

llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
44
target triple = "nvptx64-nvidia-cuda"
55

6-
define i32 @daorder(i32 %n) {
6+
define ptx_kernel i32 @daorder(i32 %n) {
77
; CHECK-LABEL: for function 'daorder'
88
entry:
99
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -43,6 +43,3 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
4343
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
4444
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
4545
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
46-
47-
!nvvm.annotations = !{!0}
48-
!0 = !{ptr @daorder, !"kernel", i32 1}

llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
44
target triple = "nvptx64-nvidia-cuda"
55

66
; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
7-
define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
7+
define ptx_kernel i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
88
; CHECK-LABEL: for function 'no_diverge'
99
entry:
1010
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -27,7 +27,7 @@ merge:
2727
; if (threadIdx.x < 5) // divergent: data dependent
2828
; c = b;
2929
; return c; // c is divergent: sync dependent
30-
define i32 @sync(i32 %a, i32 %b) {
30+
define ptx_kernel i32 @sync(i32 %a, i32 %b) {
3131
; CHECK-LABEL: for function 'sync'
3232
bb1:
3333
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
@@ -49,7 +49,7 @@ bb3:
4949
; }
5050
; // c here is divergent because it is sync dependent on threadIdx.x >= 5
5151
; return c;
52-
define i32 @mixed(i32 %n, i32 %a, i32 %b) {
52+
define ptx_kernel i32 @mixed(i32 %n, i32 %a, i32 %b) {
5353
; CHECK-LABEL: for function 'mixed'
5454
bb1:
5555
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
@@ -101,7 +101,7 @@ merge:
101101
; return i == 10 ? 0 : 1; // i here is divergent
102102
;
103103
; The i defined in the loop is used outside.
104-
define i32 @loop() {
104+
define ptx_kernel i32 @loop() {
105105
; CHECK-LABEL: for function 'loop'
106106
entry:
107107
%laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
@@ -149,7 +149,7 @@ else:
149149
}
150150

151151
; Verifies sync-dependence is computed correctly in the absense of loops.
152-
define i32 @sync_no_loop(i32 %arg) {
152+
define ptx_kernel i32 @sync_no_loop(i32 %arg) {
153153
; CHECK-LABEL: for function 'sync_no_loop'
154154
entry:
155155
%0 = add i32 %arg, 1
@@ -174,9 +174,3 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
174174
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
175175
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
176176

177-
!nvvm.annotations = !{!0, !1, !2, !3, !4}
178-
!0 = !{ptr @no_diverge, !"kernel", i32 1}
179-
!1 = !{ptr @sync, !"kernel", i32 1}
180-
!2 = !{ptr @mixed, !"kernel", i32 1}
181-
!3 = !{ptr @loop, !"kernel", i32 1}
182-
!4 = !{ptr @sync_no_loop, !"kernel", i32 1}

0 commit comments

Comments
 (0)