Skip to content

[SYCL] Add new kernel-arg-runtime-aligned metadata. #5111

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jan 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1280,15 +1280,17 @@ def SYCLRegisterNum : InheritableAttr {
let Documentation = [SYCLRegisterNumDocs];
}

// Used by FE to mark ESIMD kernel pointer parameters which correspond to the
// Used by FE to mark SYCL kernel pointer parameters which correspond to the
// original lambda's captured accessors. FE turns the attribute to some metadata
// required by the ESIMD Back-End.
// Not supposed to be used directly in the source - SYCL device compiler FE
// automatically adds it for ESIMD kernels, hence undocumented.
def SYCLSimdAccessorPtr : InheritableAttr {
// No spelling, as this attribute can't be created in the source code.
// required by the device back-end.
// This attribute does not require custom semantic handling
// hence we set the SemaHandler field to 0.
// The attribute is not for public consumption, and is an implicitly-created attribute
// that has no visible spelling, hence undocumented.
def SYCLAccessorPtr : Attr {
// This attribute has no spellings as it is only ever created implicitly.
let Spellings = [];
let Subjects = SubjectList<[ParmVar]>;
let SemaHandler = 0;
let Documentation = [Undocumented];
}

Expand Down
35 changes: 31 additions & 4 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1728,10 +1728,16 @@ void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn,
// MDNode for the intel_buffer_location attribute.
SmallVector<llvm::Metadata *, 8> argSYCLBufferLocationAttr;

// MDNode for listing SYCL kernel pointer arguments originating from
// accessors.
SmallVector<llvm::Metadata *, 8> argSYCLKernelRuntimeAligned;

// MDNode for listing ESIMD kernel pointer arguments originating from
// accessors
// accessors.
SmallVector<llvm::Metadata *, 8> argESIMDAccPtrs;

bool isKernelArgAnAccessor = false;

if (FD && CGF)
for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) {
const ParmVarDecl *parm = FD->getParamDecl(i);
Expand Down Expand Up @@ -1835,17 +1841,38 @@ void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn,
SYCLBufferLocationAttr->getLocationID()))
: llvm::ConstantAsMetadata::get(CGF->Builder.getInt32(-1)));

// If a kernel pointer argument comes from an accessor, we generate
// a new metadata(kernel_arg_runtime_aligned) to the kernel to indicate
// that this pointer has runtime allocated alignment. The value of any
// "kernel_arg_runtime_aligned" metadata element is 'true' for any kernel
// arguments that corresponds to the base pointer of an accessor and
// 'false' otherwise.
if (parm->hasAttr<SYCLAccessorPtrAttr>()) {
isKernelArgAnAccessor = true;
argSYCLKernelRuntimeAligned.push_back(
llvm::ConstantAsMetadata::get(CGF->Builder.getTrue()));
} else {
argSYCLKernelRuntimeAligned.push_back(
llvm::ConstantAsMetadata::get(CGF->Builder.getFalse()));
}

if (FD->hasAttr<SYCLSimdAttr>())
argESIMDAccPtrs.push_back(llvm::ConstantAsMetadata::get(
CGF->Builder.getInt1(parm->hasAttr<SYCLSimdAccessorPtrAttr>())));
CGF->Builder.getInt1(parm->hasAttr<SYCLAccessorPtrAttr>())));
}

bool IsEsimdFunction = FD && FD->hasAttr<SYCLSimdAttr>();

if (LangOpts.SYCLIsDevice && !IsEsimdFunction)
if (LangOpts.SYCLIsDevice && !IsEsimdFunction) {
Fn->setMetadata("kernel_arg_buffer_location",
llvm::MDNode::get(VMContext, argSYCLBufferLocationAttr));
else {
// Generate this metadata only if atleast one kernel argument is an
// accessor.
if (isKernelArgAnAccessor)
Fn->setMetadata(
"kernel_arg_runtime_aligned",
llvm::MDNode::get(VMContext, argSYCLKernelRuntimeAligned));
} else {
Fn->setMetadata("kernel_arg_addr_space",
llvm::MDNode::get(VMContext, addressQuals));
Fn->setMetadata("kernel_arg_access_qual",
Expand Down
6 changes: 2 additions & 4 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1962,10 +1962,6 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {
// Additional processing is required for accessor type.
void handleAccessorType(const CXXRecordDecl *RecordDecl, SourceLocation Loc) {
handleAccessorPropertyList(Params.back(), RecordDecl, Loc);
if (KernelDecl->hasAttr<SYCLSimdAttr>())
// In ESIMD, the kernels accessor's pointer argument needs to be marked.
Params.back()->addAttr(
SYCLSimdAccessorPtrAttr::CreateImplicit(SemaRef.getASTContext()));
// Get access mode of accessor.
const auto *AccessorSpecializationDecl =
cast<ClassTemplateSpecializationDecl>(RecordDecl);
Expand All @@ -1977,6 +1973,8 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {
if (isReadOnlyAccessor(AccessModeArg))
Params.back()->addAttr(
SYCLAccessorReadonlyAttr::CreateImplicit(SemaRef.getASTContext()));
Params.back()->addAttr(
SYCLAccessorPtrAttr::CreateImplicit(SemaRef.getASTContext()));
}

// All special SYCL objects must have __init method. We extract types for
Expand Down
153 changes: 153 additions & 0 deletions clang/test/CodeGenSYCL/kernel-arg-accessor-pointer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s

// This test checks if the metadata "kernel-arg-runtime-aligned"
// is generated if the kernel captures an accessor.

#include "sycl.hpp"

using namespace cl::sycl;

queue q;

int main() {

using Accessor =
accessor<int, 1, access::mode::read_write, access::target::global_buffer>;
Accessor acc[2];

accessor<int, 1, access::mode::read, access::target::global_buffer> readOnlyAccessor;

accessor<float, 2, access::mode::write,
access::target::local,
access::placeholder::true_t>
acc3;

// kernel_A parameters : int*, sycl::range<1>, sycl::range<1>, sycl::id<1>,
// int*, sycl::range<1>, sycl::range<1>,sycl::id<1>.
q.submit([&](handler &h) {
h.single_task<class kernel_A>([=]() {
acc[1].use();
});
});

// kernel_readOnlyAcc parameters : int*, sycl::range<1>, sycl::range<1>, sycl::id<1>.
q.submit([&](handler &h) {
h.single_task<class kernel_readOnlyAcc>([=]() {
readOnlyAccessor.use();
});
});

// kernel_B parameters : none.
q.submit([&](handler &h) {
h.single_task<class kernel_B>([=]() {
int result = 5;
});
});

int a = 10;

// kernel_C parameters : int.
q.submit([&](handler &h) {
h.single_task<class kernel_C>([=]() {
int x = a;
});
});

// Using raw pointers to represent USM pointers.
// kernel_arg_runtime_aligned is not generated for raw pointers.
int *x;
float *y;
q.submit([&](handler &h) {
h.single_task<class usm_ptr>([=]() {
*x = 42;
*y = 3.14;
});
});

// Using local accessor as a kernel parameter.
// kernel_arg_runtime_aligned is generated for pointers from local accessors.
q.submit([&](handler &h) {
h.single_task<class localAccessor>([=]() {
acc3.use();
});
});

// kernel_acc_raw_ptr parameters : int*, sycl::range<1>, sycl::range<1>, sycl::id<1>, int*.
int *rawPtr;
q.submit([&](handler &h) {
h.single_task<class kernel_acc_raw_ptr>([=]() {
readOnlyAccessor.use();
*rawPtr = 10;
});
});

// Check if kernel_arg_accessor_ptr metadata is generated for ESIMD kernels that capture
// an accessor.
q.submit([&](handler &h) {
h.single_task<class esimd_kernel_with_acc>([=]() __attribute__((sycl_explicit_simd)) {
readOnlyAccessor.use();
});
});
}

// Check kernel_A parameters
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_A
// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG1:%[a-zA-Z0-9_]+]],
// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+_1]],
// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+_2]],
// CHECK-SAME: %"struct.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET1:%[a-zA-Z0-9_]+_3]],
// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG2:%[a-zA-Z0-9_]+_4]],
// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+_6]],
// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE2:%[a-zA-Z0-9_]+_7]],
// CHECK-SAME: %"struct.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET2:%[a-zA-Z0-9_]+_8]])
// CHECK-SAME: !kernel_arg_runtime_aligned !5

// Check kernel_readOnlyAcc parameters
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_readOnlyAcc
// CHECK-SAME: i32 addrspace(1)* readonly [[MEM_ARG1:%[a-zA-Z0-9_]+]],
// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+_1]],
// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+_2]],
// CHECK-SAME: %"struct.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET1:%[a-zA-Z0-9_]+_3]]
// CHECK-SAME: !kernel_arg_runtime_aligned !14

// Check kernel_B parameters
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_B
// CHECK-NOT: kernel_arg_runtime_aligned

// Check kernel_C parameters
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_C
// CHECK-SAME: i32 [[MEM_ARG1:%[a-zA-Z0-9_]+]]
// CHECK-NOT: kernel_arg_runtime_aligned

// Check usm_ptr parameters
// CHECK: define {{.*}}spir_kernel void @{{.*}}usm_ptr
// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG1:%[a-zA-Z0-9_]+]],
// CHECK-SAME: float addrspace(1)* [[MEM_ARG1:%[a-zA-Z0-9_]+]]
// CHECK-NOT: kernel_arg_runtime_aligned

// CHECK: define {{.*}}spir_kernel void @{{.*}}localAccessor
// CHECK-SAME: float addrspace(1)* [[MEM_ARG1:%[a-zA-Z0-9_]+]],
// CHECK-SAME: %"struct.cl::sycl::range.5"* byval{{.*}}align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+_1]],
// CHECK-SAME: %"struct.cl::sycl::range.5"* byval{{.*}}align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+_2]],
// CHECK-SAME: %"struct.cl::sycl::id.6"* byval{{.*}}align 4 [[OFFSET1:%[a-zA-Z0-9_]+_3]]
// CHECK-SAME: !kernel_arg_runtime_aligned !14

// Check kernel_acc_raw_ptr parameters
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_acc_raw_ptr
// CHECK-SAME: i32 addrspace(1)* readonly [[MEM_ARG1:%[a-zA-Z0-9_]+]],
// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+_1]],
// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+_2]],
// CHECK-SAME: %"struct.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET1:%[a-zA-Z0-9_]+_3]]
// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG1:%[a-zA-Z0-9_]+]]
// CHECK-SAME: !kernel_arg_runtime_aligned !26

// Check esimd_kernel_with_acc parameters
// CHECK: define {{.*}}spir_kernel void @{{.*}}esimd_kernel_with_acc
// CHECK-SAME: !kernel_arg_accessor_ptr

// Check kernel-arg-runtime-aligned metadata.
// The value of any metadata element is 1 for any kernel arguments
// that corresponds to the base pointer of an accessor and 0 otherwise.
// CHECK: !5 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false}
// CHECK: !14 = !{i1 true, i1 false, i1 false, i1 false}
// CHECK: !26 = !{i1 true, i1 false, i1 false, i1 false, i1 false}
4 changes: 4 additions & 0 deletions clang/test/SemaSYCL/array-kernel-param.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,12 @@ int main() {
// Check Kernel_Accessor parameters
// CHECK: FunctionDecl {{.*}}Kernel_Accessor{{.*}} 'void (__global int *, sycl::range<1>, sycl::range<1>, sycl::id<1>, __global int *, sycl::range<1>, sycl::range<1>, sycl::id<1>)'
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ '__global int *'
// CHECK-NEXT: SYCLAccessorPtrAttr
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'sycl::range<1>'
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'sycl::range<1>'
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'sycl::id<1>'
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ '__global int *'
// CHECK-NEXT: SYCLAccessorPtrAttr
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'sycl::range<1>'
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'sycl::range<1>'
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'sycl::id<1>'
Expand Down Expand Up @@ -165,10 +167,12 @@ int main() {
// Check Kernel_StructAccArray parameters
// CHECK: FunctionDecl {{.*}}Kernel_StructAccArray{{.*}} 'void (__global int *, sycl::range<1>, sycl::range<1>, sycl::id<1>, __global int *, sycl::range<1>, sycl::range<1>, sycl::id<1>)'
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *'
// CHECK-NEXT: SYCLAccessorPtrAttr
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'sycl::range<1>'
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'sycl::range<1>'
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'sycl::id<1>'
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *'
// CHECK-NEXT: SYCLAccessorPtrAttr
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'sycl::range<1>'
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'sycl::range<1>'
// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'sycl::id<1>'
Expand Down