Skip to content

[NVPTX] Switch front-ends and tests to ptx_kernel cc #120806

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 27 additions & 12 deletions clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/IntrinsicsNVPTX.h"

using namespace clang;
Expand Down Expand Up @@ -79,13 +80,11 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
int Operand,
const SmallVectorImpl<int> &GridConstantArgs);
int Operand);

static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
int Operand) {
addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
}
static void
addGridConstantNVVMMetadata(llvm::GlobalValue *GV,
const SmallVectorImpl<int> &GridConstantArgs);

private:
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
Expand Down Expand Up @@ -259,7 +258,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
if (FD->hasAttr<OpenCLKernelAttr>()) {
// OpenCL __kernel functions get kernel metadata
// Create !{<func-ref>, metadata !"kernel", i32 1} node
addNVVMMetadata(F, "kernel", 1);
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
// And kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
}
Expand All @@ -277,21 +276,21 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// For some reason arg indices are 1-based in NVVM
GCI.push_back(IV.index() + 1);
// Create !{<func-ref>, metadata !"kernel", i32 1} node
addNVVMMetadata(F, "kernel", 1, GCI);
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
addGridConstantNVVMMetadata(F, GCI);
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
M.handleCUDALaunchBoundsAttr(F, Attr);
}

// Attach kernel metadata directly if compiling for NVPTX.
if (FD->hasAttr<NVPTXKernelAttr>()) {
addNVVMMetadata(F, "kernel", 1);
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
}
}

void NVPTXTargetCodeGenInfo::addNVVMMetadata(
llvm::GlobalValue *GV, StringRef Name, int Operand,
const SmallVectorImpl<int> &GridConstantArgs) {
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
StringRef Name, int Operand) {
llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();

Expand All @@ -302,6 +301,21 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};

// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}

void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata(
llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) {

llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();

// Get "nvvm.annotations" metadata node
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");

SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(GV)};
if (!GridConstantArgs.empty()) {
SmallVector<llvm::Metadata *, 10> GCM;
for (int I : GridConstantArgs)
Expand All @@ -310,6 +324,7 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(
MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
llvm::MDNode::get(Ctx, GCM)});
}

// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
Expand Down
8 changes: 7 additions & 1 deletion clang/test/CodeGen/nvptx_attributes.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,14 @@
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
// CHECK-NEXT: store i32 1, ptr [[TMP0]], align 4
// CHECK-NEXT: ret void
//
__attribute__((nvptx_kernel)) void foo(int *ret) {
*ret = 1;
}

// CHECK: !0 = !{ptr @foo, !"kernel", i32 1}
//.
// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx32,+sm_61" }
//.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was the dot added by the check generator script?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep

// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
8 changes: 4 additions & 4 deletions clang/test/CodeGenCUDA/device-fun-linkage.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ template __device__ void func<int>();
// RDC: define weak_odr void @_Z4funcIiEvv()

template __global__ void kernel<int>();
// NORDC: define void @_Z6kernelIiEvv()
// RDC: define weak_odr void @_Z6kernelIiEvv()
// NORDC: define ptx_kernel void @_Z6kernelIiEvv()
// RDC: define weak_odr ptx_kernel void @_Z6kernelIiEvv()

// Ensure that unused static device function is eliminated
static __device__ void static_func() {}
Expand All @@ -28,5 +28,5 @@ static __device__ void static_func() {}
// Ensure that kernel function has external or weak_odr
// linkage regardless static specifier
static __global__ void static_kernel() {}
// NORDC: define void @_ZL13static_kernelv()
// RDC: define weak_odr void @_ZL13static_kernelv[[FILEID:.*]]()
// NORDC: define ptx_kernel void @_ZL13static_kernelv()
// RDC: define weak_odr ptx_kernel void @_ZL13static_kernelv[[FILEID:.*]]()
8 changes: 4 additions & 4 deletions clang/test/CodeGenCUDA/grid-constant.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ void foo() {
}
//.
//.
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]}
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"grid_constant", [[META1:![0-9]+]]}
// CHECK: [[META1]] = !{i32 1, i32 3}
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]}
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"grid_constant", [[META3:![0-9]+]]}
// CHECK: [[META3]] = !{i32 1}
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]}
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]}
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"grid_constant", [[META3]]}
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"grid_constant", [[META6:![0-9]+]]}
// CHECK: [[META6]] = !{i32 2}
//.
4 changes: 2 additions & 2 deletions clang/test/CodeGenCUDA/offload_via_llvm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#define __OFFLOAD_VIA_LLVM__ 1
#include "Inputs/cuda.h"

// HST-LABEL: define dso_local void @_Z18__device_stub__fooisPvS_(
// HST-LABEL: define dso_local ptx_kernel void @_Z18__device_stub__fooisPvS_(
// HST-SAME: i32 noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], ptr noundef [[TMP2:%.*]], ptr noundef [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] {
// HST-NEXT: [[ENTRY:.*:]]
// HST-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
Expand Down Expand Up @@ -50,7 +50,7 @@
// HST: [[SETUP_END]]:
// HST-NEXT: ret void
//
// DEV-LABEL: define dso_local void @_Z3fooisPvS_(
// DEV-LABEL: define dso_local ptx_kernel void @_Z3fooisPvS_(
// DEV-SAME: i32 noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], ptr noundef [[TMP2:%.*]], ptr noundef [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] {
// DEV-NEXT: [[ENTRY:.*:]]
// DEV-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
Expand Down
7 changes: 2 additions & 5 deletions clang/test/CodeGenCUDA/ptx-kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
extern "C"
__device__ void device_function() {}

// CHECK-LABEL: define{{.*}} void @global_function
// CHECK-LABEL: define{{.*}} ptx_kernel void @global_function
extern "C"
__global__ void global_function() {
// CHECK: call void @device_function
Expand All @@ -19,7 +19,7 @@ __global__ void global_function() {

// Make sure host-instantiated kernels are preserved on device side.
template <typename T> __global__ void templated_kernel(T param) {}
// CHECK-DAG: define{{.*}} void @_Z16templated_kernelIiEvT_(
// CHECK-DAG: define{{.*}} ptx_kernel void @_Z16templated_kernelIiEvT_(

namespace {
__global__ void anonymous_ns_kernel() {}
Expand All @@ -30,6 +30,3 @@ void host_function() {
templated_kernel<<<0, 0>>>(0);
anonymous_ns_kernel<<<0,0>>>();
}

// CHECK: !{{[0-9]+}} = !{ptr @global_function, !"kernel", i32 1}
// CHECK: !{{[0-9]+}} = !{ptr @_Z16templated_kernelIiEvT_, !"kernel", i32 1}
4 changes: 1 addition & 3 deletions clang/test/CodeGenCUDA/usual-deallocators.cu
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ __host__ __device__ void tests_hd(void *t) {
}

// Make sure that we've generated the kernel used by A::~A.
// DEVICE-LABEL: define void @_Z1fIiEvT_
// DEVICE-LABEL: define ptx_kernel void @_Z1fIiEvT_

// Make sure we've picked deallocator for the correct side of compilation.

Expand Down Expand Up @@ -147,5 +147,3 @@ __host__ __device__ void tests_hd(void *t) {
// COMMON-LABEL: define linkonce_odr void @_ZN8H1H2D1D2dlEPv(ptr noundef %0)
// DEVICE: call void @dev_fn()
// HOST: call void @host_fn()

// DEVICE: !0 = !{ptr @_Z1fIiEvT_, !"kernel", i32 1}
4 changes: 1 addition & 3 deletions clang/test/CodeGenOpenCL/ptx-calls.cl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,5 @@ void device_function() {
__kernel void kernel_function() {
device_function();
}
// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function()
// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function()
// CHECK: call void @device_function()
// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1}

4 changes: 1 addition & 3 deletions clang/test/CodeGenOpenCL/ptx-kernels.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,4 @@ void device_function() {

__kernel void kernel_function() {
}
// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function()

// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1}
// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function()
10 changes: 8 additions & 2 deletions clang/test/CodeGenOpenCL/reflect.cl
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ bool device_function() {
return __nvvm_reflect("__CUDA_ARCH") >= 700;
}

// CHECK-LABEL: define dso_local spir_kernel void @kernel_function(
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function(
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4
Expand All @@ -26,3 +26,9 @@ bool device_function() {
__kernel void kernel_function(__global int *i) {
*i = device_function();
}
//.
// CHECK: [[META3]] = !{i32 1}
// CHECK: [[META4]] = !{!"none"}
// CHECK: [[META5]] = !{!"int*"}
// CHECK: [[META6]] = !{!""}
//.
2 changes: 1 addition & 1 deletion clang/test/Headers/gpuintrin.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
// AMDGPU-NEXT: call void @__gpu_exit() #[[ATTR8:[0-9]+]]
// AMDGPU-NEXT: unreachable
//
// NVPTX-LABEL: define protected void @foo(
// NVPTX-LABEL: define protected ptx_kernel void @foo(
// NVPTX-SAME: ) #[[ATTR0:[0-9]+]] {
// NVPTX-NEXT: [[ENTRY:.*:]]
// NVPTX-NEXT: [[CALL:%.*]] = call i32 @__gpu_num_blocks_x() #[[ATTR6:[0-9]+]]
Expand Down
18 changes: 7 additions & 11 deletions llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
Expand Down Expand Up @@ -49,39 +50,34 @@ static std::string getHash(StringRef Str) {
return llvm::utohexstr(Hash.low(), /*LowerCase=*/true);
}

static void addKernelMetadata(Module &M, GlobalValue *GV) {
static void addKernelMetadata(Module &M, Function *F) {
llvm::LLVMContext &Ctx = M.getContext();

// Get "nvvm.annotations" metadata node.
llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");

llvm::Metadata *KernelMDVals[] = {
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "kernel"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};

// This kernel is only to be called single-threaded.
llvm::Metadata *ThreadXMDVals[] = {
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidx"),
llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidx"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
llvm::Metadata *ThreadYMDVals[] = {
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidy"),
llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidy"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
llvm::Metadata *ThreadZMDVals[] = {
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidz"),
llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidz"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};

llvm::Metadata *BlockMDVals[] = {
llvm::ConstantAsMetadata::get(GV),
llvm::ConstantAsMetadata::get(F),
llvm::MDString::get(Ctx, "maxclusterrank"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};

// Append metadata to nvvm.annotations.
MD->addOperand(llvm::MDNode::get(Ctx, KernelMDVals));
F->setCallingConv(CallingConv::PTX_Kernel);
MD->addOperand(llvm::MDNode::get(Ctx, ThreadXMDVals));
MD->addOperand(llvm::MDNode::get(Ctx, ThreadYMDVals));
MD->addOperand(llvm::MDNode::get(Ctx, ThreadZMDVals));
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,11 +311,13 @@ std::optional<unsigned> getMaxNReg(const Function &F) {
}

bool isKernelFunction(const Function &F) {
if (F.getCallingConv() == CallingConv::PTX_Kernel)
return true;

if (const auto X = findOneNVVMAnnotation(&F, "kernel"))
return (*X == 1);

// There is no NVVM metadata, check the calling convention
return F.getCallingConv() == CallingConv::PTX_Kernel;
return false;
}

MaybeAlign getAlign(const Function &F, unsigned Index) {
Expand Down
5 changes: 1 addition & 4 deletions llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"

define i32 @daorder(i32 %n) {
define ptx_kernel i32 @daorder(i32 %n) {
; CHECK-LABEL: for function 'daorder'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
Expand Down Expand Up @@ -43,6 +43,3 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()

!nvvm.annotations = !{!0}
!0 = !{ptr @daorder, !"kernel", i32 1}
16 changes: 5 additions & 11 deletions llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"

; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
define ptx_kernel i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: for function 'no_diverge'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
Expand All @@ -27,7 +27,7 @@ merge:
; if (threadIdx.x < 5) // divergent: data dependent
; c = b;
; return c; // c is divergent: sync dependent
define i32 @sync(i32 %a, i32 %b) {
define ptx_kernel i32 @sync(i32 %a, i32 %b) {
; CHECK-LABEL: for function 'sync'
bb1:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
Expand All @@ -49,7 +49,7 @@ bb3:
; }
; // c here is divergent because it is sync dependent on threadIdx.x >= 5
; return c;
define i32 @mixed(i32 %n, i32 %a, i32 %b) {
define ptx_kernel i32 @mixed(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: for function 'mixed'
bb1:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
Expand Down Expand Up @@ -101,7 +101,7 @@ merge:
; return i == 10 ? 0 : 1; // i here is divergent
;
; The i defined in the loop is used outside.
define i32 @loop() {
define ptx_kernel i32 @loop() {
; CHECK-LABEL: for function 'loop'
entry:
%laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
Expand Down Expand Up @@ -149,7 +149,7 @@ else:
}

; Verifies sync-dependence is computed correctly in the absense of loops.
define i32 @sync_no_loop(i32 %arg) {
define ptx_kernel i32 @sync_no_loop(i32 %arg) {
; CHECK-LABEL: for function 'sync_no_loop'
entry:
%0 = add i32 %arg, 1
Expand All @@ -174,9 +174,3 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()

!nvvm.annotations = !{!0, !1, !2, !3, !4}
!0 = !{ptr @no_diverge, !"kernel", i32 1}
!1 = !{ptr @sync, !"kernel", i32 1}
!2 = !{ptr @mixed, !"kernel", i32 1}
!3 = !{ptr @loop, !"kernel", i32 1}
!4 = !{ptr @sync_no_loop, !"kernel", i32 1}
Loading
Loading