Skip to content

[SYCL] Add new kernel-arg-runtime-aligned metadata. #5111

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jan 18, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1275,6 +1275,20 @@ def SYCLSimdAccessorPtr : InheritableAttr {
let Documentation = [Undocumented];
}

// Used by FE to mark SYCL kernel pointer parameters which correspond to the
// original lambda's captured accessors. FE turns the attribute to some metadata
// required by the FPGA back-end.
// This attribute does not require custom semantic handling
// hence we set the SemaHandler field to 0.
// The attribute is not for public consumption, and is an implicitly-created attribute
// that has no visible spelling, hence undocumented.
def SYCLAccessorPtr : InheritableAttr {
// This attribute has no spellings as it is only ever created implicitly.
let Spellings = [];
let SemaHandler = 0;
let Documentation = [Undocumented];
}

// Used to mark readonly accessors. It is not to be used directly in the source.
def SYCLAccessorReadonly : Attr {
// This attribute has no spellings as it is only ever created implicitly.
Expand Down
34 changes: 31 additions & 3 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1699,10 +1699,16 @@ void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn,
// MDNode for the intel_buffer_location attribute.
SmallVector<llvm::Metadata *, 8> argSYCLBufferLocationAttr;

// MDNode for listing SYCL kernel pointer arguments originating from
// accessors.
SmallVector<llvm::Metadata *, 8> argSYCLKernelRuntimeAligned;

// MDNode for listing ESIMD kernel pointer arguments originating from
// accessors
// accessors.
SmallVector<llvm::Metadata *, 8> argESIMDAccPtrs;

bool isKernelArgAnAccessor = false;

if (FD && CGF)
for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) {
const ParmVarDecl *parm = FD->getParamDecl(i);
Expand Down Expand Up @@ -1806,17 +1812,39 @@ void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn,
SYCLBufferLocationAttr->getLocationID()))
: llvm::ConstantAsMetadata::get(CGF->Builder.getInt32(-1)));

// If a kernel pointer argument comes from a global accessor, we generate
// a new metadata(kernel_arg_runtime_aligned) to the kernel to indicate
// that this pointer has runtime allocated alignment. The value of any
// "kernel_arg_runtime_aligned" metadata element is 'true' for any kernel
// arguments that corresponds to the base pointer of an accessor and
// 'false' otherwise.
if (parm->hasAttr<SYCLAccessorReadonlyAttr>() ||
parm->hasAttr<SYCLAccessorPtrAttr>()) {
isKernelArgAnAccessor = true;
argSYCLKernelRuntimeAligned.push_back(
llvm::ConstantAsMetadata::get(CGF->Builder.getTrue()));
} else {
argSYCLKernelRuntimeAligned.push_back(
llvm::ConstantAsMetadata::get(CGF->Builder.getFalse()));
}

if (FD->hasAttr<SYCLSimdAttr>())
argESIMDAccPtrs.push_back(llvm::ConstantAsMetadata::get(
CGF->Builder.getInt1(parm->hasAttr<SYCLSimdAccessorPtrAttr>())));
}

bool IsEsimdFunction = FD && FD->hasAttr<SYCLSimdAttr>();

if (LangOpts.SYCLIsDevice && !IsEsimdFunction)
if (LangOpts.SYCLIsDevice && !IsEsimdFunction) {
Fn->setMetadata("kernel_arg_buffer_location",
llvm::MDNode::get(VMContext, argSYCLBufferLocationAttr));
else {
// Generate this metadata only if a kernel argument is an accessor.
if (isKernelArgAnAccessor)
Fn->setMetadata(
"kernel_arg_runtime_aligned",
llvm::MDNode::get(VMContext, argSYCLKernelRuntimeAligned));

} else {
Fn->setMetadata("kernel_arg_addr_space",
llvm::MDNode::get(VMContext, addressQuals));
Fn->setMetadata("kernel_arg_access_qual",
Expand Down
6 changes: 5 additions & 1 deletion clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1980,9 +1980,13 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {

// Add implicit attribute to parameter decl when it is a read only
// SYCL accessor.
if (isReadOnlyAccessor(AccessModeArg))
if (isReadOnlyAccessor(AccessModeArg)) {
Params.back()->addAttr(
SYCLAccessorReadonlyAttr::CreateImplicit(SemaRef.getASTContext()));
} else {
Params.back()->addAttr(
SYCLAccessorPtrAttr::CreateImplicit(SemaRef.getASTContext()));
}
}

// All special SYCL objects must have __init method. We extract types for
Expand Down
62 changes: 31 additions & 31 deletions clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,15 @@ class Functor10 {

int main() {
q.submit([&](handler &h) {
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name1() #0 !kernel_arg_buffer_location ![[NUM:[0-9]+]] !scheduler_target_fmax_mhz ![[NUM1:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name1() #0 !kernel_arg_buffer_location ![[NUM:[0-9]+]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !scheduler_target_fmax_mhz ![[NUM1:[0-9]+]]
Foo boo;
h.single_task<class kernel_name1>(boo);

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name2() #0 !kernel_arg_buffer_location ![[NUM]] !scheduler_target_fmax_mhz ![[NUM42:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name2() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !scheduler_target_fmax_mhz ![[NUM42:[0-9]+]]
h.single_task<class kernel_name2>(
[]() [[intel::scheduler_target_fmax_mhz(42)]]{});

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name3() #0 !kernel_arg_buffer_location ![[NUM]] !scheduler_target_fmax_mhz ![[NUM2:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name3() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !scheduler_target_fmax_mhz ![[NUM2:[0-9]+]]
Functor<2> f;
h.single_task<class kernel_name3>(f);

Expand All @@ -166,128 +166,128 @@ int main() {
h.single_task<class kernel_name4>(
[]() { foo(); });

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name5() #0 !kernel_arg_buffer_location ![[NUM]] !num_simd_work_items ![[NUM1]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name5() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !num_simd_work_items ![[NUM1]]
Foo1 boo1;
h.single_task<class kernel_name5>(boo1);

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name6() #0 !kernel_arg_buffer_location ![[NUM]] !num_simd_work_items ![[NUM42]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name6() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !num_simd_work_items ![[NUM42]]
h.single_task<class kernel_name6>(
[]() [[intel::num_simd_work_items(42)]]{});

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name7() #0 !kernel_arg_buffer_location ![[NUM]] !num_simd_work_items ![[NUM2]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name7() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !num_simd_work_items ![[NUM2]]
Functor1<2> f1;
h.single_task<class kernel_name7>(f1);

// Test attribute is not propagated.
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name8() #0 !kernel_arg_buffer_location ![[NUM]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name8() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]]
// CHECK-NOT: !num_simd_work_items
// CHECK-SAME: {
// CHECK: define dso_local spir_func void @_Z4foo1v()
h.single_task<class kernel_name8>(
[]() { foo1(); });
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name9() #0 !kernel_arg_buffer_location ![[NUM]] !no_global_work_offset ![[NUM:[0-9]+]]

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name9() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !no_global_work_offset ![[NUM:[0-9]+]]
Foo2 boo2;
h.single_task<class kernel_name9>(boo2);

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name10() #0 {{.*}} ![[NUM0:[0-9]+]]
h.single_task<class kernel_name10>(
[]() [[intel::no_global_work_offset(0)]]{});

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name11() #0 !kernel_arg_buffer_location ![[NUM]] !no_global_work_offset ![[NUM]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name11() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !no_global_work_offset ![[NUM]]
Functor2<1> f2;
h.single_task<class kernel_name11>(f2);

// Test attribute is not propagated.
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name12() #0 !kernel_arg_buffer_location ![[NUM]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name12() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]]
// CHECK-NOT: !no_global_work_offset
// CHECK-SAME: {
// CHECK: define dso_local spir_func void @_Z4foo2v()
h.single_task<class kernel_name12>(
[]() { foo2(); });

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name13() #0 !kernel_arg_buffer_location ![[NUM]] !max_global_work_dim ![[NUM1]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name13() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !max_global_work_dim ![[NUM1]]
Foo3 boo3;
h.single_task<class kernel_name13>(boo3);

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name14() #0 !kernel_arg_buffer_location ![[NUM]] !max_global_work_dim ![[NUM1]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name14() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !max_global_work_dim ![[NUM1]]
h.single_task<class kernel_name14>(
[]() [[intel::max_global_work_dim(1)]]{});

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name15() #0 !kernel_arg_buffer_location ![[NUM]] !max_global_work_dim ![[NUM2]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name15() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !max_global_work_dim ![[NUM2]]
Functor3<2> f3;
h.single_task<class kernel_name15>(f3);

// Test attribute is not propagated.
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name16() #0 !kernel_arg_buffer_location ![[NUM]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name16() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]]
// CHECK-NOT: !max_global_work_dim
// CHECK-SAME: {
// CHECK: define dso_local spir_func void @_Z4foo3v()
h.single_task<class kernel_name16>(
[]() { foo3(); });

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name17() #0 !kernel_arg_buffer_location ![[NUM]] !intel_reqd_sub_group_size ![[NUM16:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name17() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !intel_reqd_sub_group_size ![[NUM16:[0-9]+]]
Foo4 boo4;
h.single_task<class kernel_name17>(boo4);

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name18() #0 !kernel_arg_buffer_location ![[NUM]] !intel_reqd_sub_group_size ![[NUM1]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name18() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !intel_reqd_sub_group_size ![[NUM1]]
h.single_task<class kernel_name18>(
[]() [[sycl::reqd_sub_group_size(1)]]{});

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name19() #0 !kernel_arg_buffer_location ![[NUM]] !intel_reqd_sub_group_size ![[NUM2]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name19() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !intel_reqd_sub_group_size ![[NUM2]]
Functor5<2> f5;
h.single_task<class kernel_name19>(f5);

// Test attribute is not propagated.
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name20() #0 !kernel_arg_buffer_location ![[NUM]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name20() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]]
// CHECK-NOT: !reqd_sub_group_size
// CHECK-SAME: {
// CHECK: define dso_local spir_func void @_Z4foo4v()
Functor4 f4;
h.single_task<class kernel_name20>(f4);

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name21() #0 !kernel_arg_buffer_location ![[NUM]] !reqd_work_group_size ![[NUM32:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name21() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !reqd_work_group_size ![[NUM32:[0-9]+]]
Foo5 boo5;
h.single_task<class kernel_name21>(boo5);

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name22() #0 !kernel_arg_buffer_location ![[NUM]] !reqd_work_group_size ![[NUM88:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name22() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !reqd_work_group_size ![[NUM88:[0-9]+]]
h.single_task<class kernel_name22>(
[]() [[sycl::reqd_work_group_size(8, 8, 8)]]{});

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name23() #0 !kernel_arg_buffer_location ![[NUM]] !reqd_work_group_size ![[NUM22:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name23() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !reqd_work_group_size ![[NUM22:[0-9]+]]
Functor7<2, 2, 2> f7;
h.single_task<class kernel_name23>(f7);

// Test attribute is not propagated.
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name24() #0 !kernel_arg_buffer_location ![[NUM]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name24() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]]
// CHECK-NOT: !reqd_work_group_size
// CHECK-SAME: {
// CHECK: define dso_local spir_func void @_Z4foo5v()
Functor6 f6;
h.single_task<class kernel_name24>(f6);

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name25() #0 !kernel_arg_buffer_location ![[NUM]] !max_work_group_size ![[NUM32]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name25() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !max_work_group_size ![[NUM32]]
Foo6 boo6;
h.single_task<class kernel_name25>(boo6);

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name26() #0 !kernel_arg_buffer_location ![[NUM]] !max_work_group_size ![[NUM88]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name26() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !max_work_group_size ![[NUM88]]
h.single_task<class kernel_name26>(
[]() [[intel::max_work_group_size(8, 8, 8)]]{});

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name27() #0 !kernel_arg_buffer_location ![[NUM]] !max_work_group_size ![[NUM22]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name27() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]] !max_work_group_size ![[NUM22]]
Functor9<2, 2, 2> f9;
h.single_task<class kernel_name27>(f9);

// Test attribute is not propagated.
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name28() #0 !kernel_arg_buffer_location ![[NUM]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name28() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]]
// CHECK-NOT: !max_work_group_size
// CHECK-SAME: {
// CHECK: define dso_local spir_func void @_Z4foo6v()
Functor8 f8;
h.single_task<class kernel_name28>(f8);

// Test attribute is not propagated.
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name29() #0 !kernel_arg_buffer_location ![[NUM]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name29() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]]
// CHECK-NOT: !sycl_explicit_simd
// CHECK-SAME: {
// CHECK: define {{.*}}spir_func void @{{.*}}foo7{{.*}} !sycl_explicit_simd ![[NUM]]
Expand All @@ -303,20 +303,20 @@ int main() {
[]() [[intel::sycl_explicit_simd]]{});

// Test attribute is not propagated.
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name32() #0 !kernel_arg_buffer_location ![[NUM]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name32() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]]
// CHECK: define {{.*}}spir_func void @{{.*}}Functor10{{.*}}(%class.Functor10 addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2
// CHECK-NOT: noalias
// CHECK-SAME: {
// CHECK: define dso_local spir_func void @_Z4foo8v()
Functor10 f10;
h.single_task<class kernel_name32>(f10);

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name33() #0 !kernel_arg_buffer_location ![[NUM]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name33() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]]
// CHECK: define {{.*}}spir_func void @{{.*}}Foo8{{.*}}(%class.Foo8 addrspace(4)* noalias align 1 dereferenceable_or_null(1) %this) #3 comdat align 2
Foo8 boo8;
h.single_task<class kernel_name33>(boo8);

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name34() #0 !kernel_arg_buffer_location ![[NUM]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name34() #0 !kernel_arg_buffer_location ![[NUM]] !kernel_arg_runtime_aligned ![[NUM:[0-9]+]]
// CHECK: define {{.*}}spir_func void @{{.*}}(%class.anon{{.*}} addrspace(4)* noalias align 1 dereferenceable_or_null(1) %this) #4 align 2
h.single_task<class kernel_name34>(
[]() [[intel::kernel_args_restrict]]{});
Expand Down
Loading