Skip to content

Commit 3d9b2ea

Browse files
committed
[NVPTX] Switch front-ends and tests to ptx_kernel cc
1 parent 1418018 commit 3d9b2ea

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+283
-463
lines changed

clang/lib/CodeGen/Targets/NVPTX.cpp

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "ABIInfoImpl.h"
1010
#include "TargetInfo.h"
1111
#include "llvm/ADT/STLExtras.h"
12+
#include "llvm/IR/CallingConv.h"
1213
#include "llvm/IR/IntrinsicsNVPTX.h"
1314

1415
using namespace clang;
@@ -79,13 +80,11 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
7980
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
8081
// resulting MDNode to the nvvm.annotations MDNode.
8182
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
82-
int Operand,
83-
const SmallVectorImpl<int> &GridConstantArgs);
83+
int Operand);
8484

85-
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
86-
int Operand) {
87-
addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
88-
}
85+
static void
86+
addGridConstantNVVMMetadata(llvm::GlobalValue *GV,
87+
const SmallVectorImpl<int> &GridConstantArgs);
8988

9089
private:
9190
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
@@ -259,7 +258,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
259258
if (FD->hasAttr<OpenCLKernelAttr>()) {
260259
// OpenCL __kernel functions get kernel metadata
261260
// Create !{<func-ref>, metadata !"kernel", i32 1} node
262-
addNVVMMetadata(F, "kernel", 1);
261+
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
263262
// And kernel functions are not subject to inlining
264263
F->addFnAttr(llvm::Attribute::NoInline);
265264
}
@@ -277,21 +276,21 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
277276
// For some reason arg indices are 1-based in NVVM
278277
GCI.push_back(IV.index() + 1);
279278
// Create !{<func-ref>, metadata !"kernel", i32 1} node
280-
addNVVMMetadata(F, "kernel", 1, GCI);
279+
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
280+
addGridConstantNVVMMetadata(F, GCI);
281281
}
282282
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
283283
M.handleCUDALaunchBoundsAttr(F, Attr);
284284
}
285285

286286
// Attach kernel metadata directly if compiling for NVPTX.
287287
if (FD->hasAttr<NVPTXKernelAttr>()) {
288-
addNVVMMetadata(F, "kernel", 1);
288+
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
289289
}
290290
}
291291

292-
void NVPTXTargetCodeGenInfo::addNVVMMetadata(
293-
llvm::GlobalValue *GV, StringRef Name, int Operand,
294-
const SmallVectorImpl<int> &GridConstantArgs) {
292+
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
293+
StringRef Name, int Operand) {
295294
llvm::Module *M = GV->getParent();
296295
llvm::LLVMContext &Ctx = M->getContext();
297296

@@ -302,6 +301,21 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(
302301
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
303302
llvm::ConstantAsMetadata::get(
304303
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
304+
305+
// Append metadata to nvvm.annotations
306+
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
307+
}
308+
309+
void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata(
310+
llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) {
311+
312+
llvm::Module *M = GV->getParent();
313+
llvm::LLVMContext &Ctx = M->getContext();
314+
315+
// Get "nvvm.annotations" metadata node
316+
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
317+
318+
SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)};
305319
if (!GridConstantArgs.empty()) {
306320
SmallVector<llvm::Metadata *, 10> GCM;
307321
for (int I : GridConstantArgs)
@@ -310,6 +324,7 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(
310324
MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
311325
llvm::MDNode::get(Ctx, GCM)});
312326
}
327+
313328
// Append metadata to nvvm.annotations
314329
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
315330
}

clang/test/CodeGenCUDA/grid-constant.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ void foo() {
2121
}
2222
//.
2323
//.
24-
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]}
24+
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"grid_constant", [[META1:![0-9]+]]}
2525
// CHECK: [[META1]] = !{i32 1, i32 3}
26-
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]}
26+
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"grid_constant", [[META3:![0-9]+]]}
2727
// CHECK: [[META3]] = !{i32 1}
28-
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]}
29-
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]}
28+
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"grid_constant", [[META3]]}
29+
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"grid_constant", [[META6:![0-9]+]]}
3030
// CHECK: [[META6]] = !{i32 2}
3131
//.

clang/test/CodeGenCUDA/offload_via_llvm.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#define __OFFLOAD_VIA_LLVM__ 1
88
#include "Inputs/cuda.h"
99

10-
// HST-LABEL: define dso_local void @_Z18__device_stub__fooisPvS_(
10+
// HST-LABEL: define dso_local ptx_kernel void @_Z18__device_stub__fooisPvS_(
1111
// HST-SAME: i32 noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], ptr noundef [[TMP2:%.*]], ptr noundef [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] {
1212
// HST-NEXT: [[ENTRY:.*:]]
1313
// HST-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
@@ -50,7 +50,7 @@
5050
// HST: [[SETUP_END]]:
5151
// HST-NEXT: ret void
5252
//
53-
// DEV-LABEL: define dso_local void @_Z3fooisPvS_(
53+
// DEV-LABEL: define dso_local ptx_kernel void @_Z3fooisPvS_(
5454
// DEV-SAME: i32 noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], ptr noundef [[TMP2:%.*]], ptr noundef [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] {
5555
// DEV-NEXT: [[ENTRY:.*:]]
5656
// DEV-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4

clang/test/CodeGenCUDA/ptx-kernels.cu

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
extern "C"
1111
__device__ void device_function() {}
1212

13-
// CHECK-LABEL: define{{.*}} void @global_function
13+
// CHECK-LABEL: define{{.*}} ptx_kernel void @global_function
1414
extern "C"
1515
__global__ void global_function() {
1616
// CHECK: call void @device_function
@@ -19,7 +19,7 @@ __global__ void global_function() {
1919

2020
// Make sure host-instantiated kernels are preserved on device side.
2121
template <typename T> __global__ void templated_kernel(T param) {}
22-
// CHECK-DAG: define{{.*}} void @_Z16templated_kernelIiEvT_(
22+
// CHECK-DAG: define{{.*}} ptx_kernel void @_Z16templated_kernelIiEvT_(
2323

2424
namespace {
2525
__global__ void anonymous_ns_kernel() {}
@@ -30,6 +30,3 @@ void host_function() {
3030
templated_kernel<<<0, 0>>>(0);
3131
anonymous_ns_kernel<<<0,0>>>();
3232
}
33-
34-
// CHECK: !{{[0-9]+}} = !{ptr @global_function, !"kernel", i32 1}
35-
// CHECK: !{{[0-9]+}} = !{ptr @_Z16templated_kernelIiEvT_, !"kernel", i32 1}

clang/test/CodeGenCUDA/usual-deallocators.cu

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ __host__ __device__ void tests_hd(void *t) {
109109
}
110110

111111
// Make sure that we've generated the kernel used by A::~A.
112-
// DEVICE-LABEL: define void @_Z1fIiEvT_
112+
// DEVICE-LABEL: define ptx_kernel void @_Z1fIiEvT_
113113

114114
// Make sure we've picked deallocator for the correct side of compilation.
115115

@@ -147,5 +147,3 @@ __host__ __device__ void tests_hd(void *t) {
147147
// COMMON-LABEL: define linkonce_odr void @_ZN8H1H2D1D2dlEPv(ptr noundef %0)
148148
// DEVICE: call void @dev_fn()
149149
// HOST: call void @host_fn()
150-
151-
// DEVICE: !0 = !{ptr @_Z1fIiEvT_, !"kernel", i32 1}

llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "MCTargetDesc/NVPTXBaseInfo.h"
1515
#include "NVPTX.h"
1616
#include "llvm/ADT/StringExtras.h"
17+
#include "llvm/IR/CallingConv.h"
1718
#include "llvm/IR/Constants.h"
1819
#include "llvm/IR/Function.h"
1920
#include "llvm/IR/GlobalVariable.h"
@@ -49,39 +50,34 @@ static std::string getHash(StringRef Str) {
4950
return llvm::utohexstr(Hash.low(), /*LowerCase=*/true);
5051
}
5152

52-
static void addKernelMetadata(Module &M, GlobalValue *GV) {
53+
static void addKernelMetadata(Module &M, Function *F) {
5354
llvm::LLVMContext &Ctx = M.getContext();
5455

5556
// Get "nvvm.annotations" metadata node.
5657
llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
5758

58-
llvm::Metadata *KernelMDVals[] = {
59-
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "kernel"),
60-
llvm::ConstantAsMetadata::get(
61-
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
62-
6359
// This kernel is only to be called single-threaded.
6460
llvm::Metadata *ThreadXMDVals[] = {
65-
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidx"),
61+
llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidx"),
6662
llvm::ConstantAsMetadata::get(
6763
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
6864
llvm::Metadata *ThreadYMDVals[] = {
69-
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidy"),
65+
llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidy"),
7066
llvm::ConstantAsMetadata::get(
7167
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
7268
llvm::Metadata *ThreadZMDVals[] = {
73-
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidz"),
69+
llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidz"),
7470
llvm::ConstantAsMetadata::get(
7571
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
7672

7773
llvm::Metadata *BlockMDVals[] = {
78-
llvm::ConstantAsMetadata::get(GV),
74+
llvm::ConstantAsMetadata::get(F),
7975
llvm::MDString::get(Ctx, "maxclusterrank"),
8076
llvm::ConstantAsMetadata::get(
8177
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
8278

8379
// Append metadata to nvvm.annotations.
84-
MD->addOperand(llvm::MDNode::get(Ctx, KernelMDVals));
80+
F->setCallingConv(CallingConv::PTX_Kernel);
8581
MD->addOperand(llvm::MDNode::get(Ctx, ThreadXMDVals));
8682
MD->addOperand(llvm::MDNode::get(Ctx, ThreadYMDVals));
8783
MD->addOperand(llvm::MDNode::get(Ctx, ThreadZMDVals));

llvm/lib/Target/NVPTX/NVPTXUtilities.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,11 +311,13 @@ std::optional<unsigned> getMaxNReg(const Function &F) {
311311
}
312312

313313
bool isKernelFunction(const Function &F) {
314+
if (F.getCallingConv() == CallingConv::PTX_Kernel)
315+
return true;
316+
314317
if (const auto X = findOneNVVMAnnotation(&F, "kernel"))
315318
return (*X == 1);
316319

317-
// There is no NVVM metadata, check the calling convention
318-
return F.getCallingConv() == CallingConv::PTX_Kernel;
320+
return false;
319321
}
320322

321323
MaybeAlign getAlign(const Function &F, unsigned Index) {

llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
44
target triple = "nvptx64-nvidia-cuda"
55

6-
define i32 @daorder(i32 %n) {
6+
define ptx_kernel i32 @daorder(i32 %n) {
77
; CHECK-LABEL: for function 'daorder'
88
entry:
99
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -43,6 +43,3 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
4343
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
4444
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
4545
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
46-
47-
!nvvm.annotations = !{!0}
48-
!0 = !{ptr @daorder, !"kernel", i32 1}

llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
44
target triple = "nvptx64-nvidia-cuda"
55

66
; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
7-
define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
7+
define ptx_kernel i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
88
; CHECK-LABEL: for function 'no_diverge'
99
entry:
1010
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -27,7 +27,7 @@ merge:
2727
; if (threadIdx.x < 5) // divergent: data dependent
2828
; c = b;
2929
; return c; // c is divergent: sync dependent
30-
define i32 @sync(i32 %a, i32 %b) {
30+
define ptx_kernel i32 @sync(i32 %a, i32 %b) {
3131
; CHECK-LABEL: for function 'sync'
3232
bb1:
3333
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
@@ -49,7 +49,7 @@ bb3:
4949
; }
5050
; // c here is divergent because it is sync dependent on threadIdx.x >= 5
5151
; return c;
52-
define i32 @mixed(i32 %n, i32 %a, i32 %b) {
52+
define ptx_kernel i32 @mixed(i32 %n, i32 %a, i32 %b) {
5353
; CHECK-LABEL: for function 'mixed'
5454
bb1:
5555
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
@@ -101,7 +101,7 @@ merge:
101101
; return i == 10 ? 0 : 1; // i here is divergent
102102
;
103103
; The i defined in the loop is used outside.
104-
define i32 @loop() {
104+
define ptx_kernel i32 @loop() {
105105
; CHECK-LABEL: for function 'loop'
106106
entry:
107107
%laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
@@ -149,7 +149,7 @@ else:
149149
}
150150

151151
; Verifies sync-dependence is computed correctly in the absense of loops.
152-
define i32 @sync_no_loop(i32 %arg) {
152+
define ptx_kernel i32 @sync_no_loop(i32 %arg) {
153153
; CHECK-LABEL: for function 'sync_no_loop'
154154
entry:
155155
%0 = add i32 %arg, 1
@@ -174,9 +174,3 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
174174
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
175175
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
176176

177-
!nvvm.annotations = !{!0, !1, !2, !3, !4}
178-
!0 = !{ptr @no_diverge, !"kernel", i32 1}
179-
!1 = !{ptr @sync, !"kernel", i32 1}
180-
!2 = !{ptr @mixed, !"kernel", i32 1}
181-
!3 = !{ptr @loop, !"kernel", i32 1}
182-
!4 = !{ptr @sync_no_loop, !"kernel", i32 1}

llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
44
target triple = "nvptx64-nvidia-cuda"
55

6-
define i32 @hidden_diverge(i32 %n, i32 %a, i32 %b) {
6+
define ptx_kernel i32 @hidden_diverge(i32 %n, i32 %a, i32 %b) {
77
; CHECK-LABEL: for function 'hidden_diverge'
88
entry:
99
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -27,6 +27,3 @@ merge:
2727
}
2828

2929
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
30-
31-
!nvvm.annotations = !{!0}
32-
!0 = !{ptr @hidden_diverge, !"kernel", i32 1}

llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ target triple = "nvptx64-nvidia-cuda"
2323
; V
2424
; if (i3 == 5) // divergent
2525
; because sync dependent on (tid / i3).
26-
define i32 @unstructured_loop(i1 %entry_cond) {
26+
define ptx_kernel i32 @unstructured_loop(i1 %entry_cond) {
2727
; CHECK-LABEL: for function 'unstructured_loop'
2828
entry:
2929
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
@@ -59,5 +59,3 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
5959
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
6060
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
6161

62-
!nvvm.annotations = !{!0}
63-
!0 = !{ptr @unstructured_loop, !"kernel", i32 1}

llvm/test/CodeGen/NVPTX/b52037.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ declare %int3 @hoge(i32, i32, i32) local_unnamed_addr
3939

4040
declare i64 @foo() local_unnamed_addr
4141

42-
define void @barney(ptr nocapture readonly %arg) local_unnamed_addr {
42+
define ptx_kernel void @barney(ptr nocapture readonly %arg) local_unnamed_addr {
4343
bb:
4444
tail call void asm sideeffect "// KEEP", ""() #1
4545
%tmp = alloca %struct.zot, align 16
@@ -210,9 +210,6 @@ bb14: ; preds = %bb49.i.lr.ph, %bb49
210210
attributes #0 = { argmemonly mustprogress nofree nounwind willreturn }
211211
attributes #1 = { nounwind }
212212

213-
!nvvm.annotations = !{!0}
214-
215-
!0 = !{ptr @barney, !"kernel", i32 1}
216213
!1 = !{!2, !11, i64 64}
217214
!2 = !{!"_ZTSN7cuneibs22neiblist_iterator_coreE", !3, i64 0, !3, i64 8, !6, i64 16, !8, i64 32, !9, i64 44, !10, i64 48, !11, i64 64, !9, i64 72, !4, i64 76, !9, i64 80}
218215
!3 = !{!"any pointer", !4, i64 0}

llvm/test/CodeGen/NVPTX/bug21465.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ target triple = "nvptx64-unknown-unknown"
88
%struct.S = type { i32, i32 }
99

1010
; Function Attrs: nounwind
11-
define void @_Z11TakesStruct1SPi(ptr byval(%struct.S) nocapture readonly %input, ptr nocapture %output) #0 {
11+
define ptx_kernel void @_Z11TakesStruct1SPi(ptr byval(%struct.S) nocapture readonly %input, ptr nocapture %output) #0 {
1212
entry:
1313
; CHECK-LABEL: @_Z11TakesStruct1SPi
1414
; PTX-LABEL: .visible .entry _Z11TakesStruct1SPi(
@@ -23,7 +23,3 @@ entry:
2323
}
2424

2525
attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
26-
27-
!nvvm.annotations = !{!0}
28-
29-
!0 = !{ptr @_Z11TakesStruct1SPi, !"kernel", i32 1}

llvm/test/CodeGen/NVPTX/bug22322.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ target triple = "nvptx64-nvidia-cuda"
88

99
; Function Attrs: nounwind
1010
; CHECK-LABEL: some_kernel
11-
define void @some_kernel(ptr nocapture %dst) #0 {
11+
define ptx_kernel void @some_kernel(ptr nocapture %dst) #0 {
1212
_ZL11compute_vecRK6float3jb.exit:
1313
%ret_vec.sroa.8.i = alloca float, align 4
1414
%0 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
@@ -55,8 +55,5 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "n
5555
attributes #1 = { nounwind readnone }
5656
attributes #2 = { nounwind }
5757

58-
!nvvm.annotations = !{!0}
5958
!llvm.ident = !{!1}
60-
61-
!0 = !{ptr @some_kernel, !"kernel", i32 1}
6259
!1 = !{!"clang version 3.5.1 (tags/RELEASE_351/final)"}

0 commit comments

Comments
 (0)