Skip to content

[SYCL][FPGA] Allow use_stall_enable_clusters attribute to kernel #4031

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -2591,6 +2591,9 @@ is ignored on the host.
The ``intel::use_stall_enable_clusters`` attribute takes no argument and has an
effect when applied to a function, and no effect otherwise.

If ``intel::use_stall_enable_clusters`` is applied to a function called from a device
kernel, the attribute is ignored and it is not propagated to the kernel.

.. code-block:: c++

class Foo {
Expand All @@ -2604,6 +2607,14 @@ effect when applied to a function, and no effect otherwise.
[[intel::use_stall_enable_clusters]] void operator()() const {}
};

class Functor
{
[[intel::use_stall_enable_clusters]] void operator()(item<1> item)
{
/* kernel code */
}
}

The ``intel::use_stall_enable_clusters`` attribute supports a nonconforming
behavior when applied to a lambda in the type position.

Expand All @@ -2614,6 +2625,11 @@ behavior when applied to a lambda in the type position.
lambda();
}

kernel<class kernel_name>(
[]() [[intel::use_stall_enable_clusters]] {
/* kernel code */
});

}];
}

Expand Down
4 changes: 3 additions & 1 deletion clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,8 @@ static void collectSYCLAttributes(Sema &S, FunctionDecl *FD,
llvm::copy_if(FD->getAttrs(), std::back_inserter(Attrs), [](Attr *A) {
return isa<SYCLIntelLoopFuseAttr, SYCLIntelFPGAMaxConcurrencyAttr,
SYCLIntelFPGADisableLoopPipeliningAttr,
SYCLIntelFPGAInitiationIntervalAttr>(A);
SYCLIntelFPGAInitiationIntervalAttr,
SYCLIntelUseStallEnableClustersAttr>(A);
});
}
}
Expand Down Expand Up @@ -4077,6 +4078,7 @@ static void PropagateAndDiagnoseDeviceAttr(
case attr::Kind::SYCLIntelFPGAMaxConcurrency:
case attr::Kind::SYCLIntelFPGADisableLoopPipelining:
case attr::Kind::SYCLIntelFPGAInitiationInterval:
case attr::Kind::SYCLIntelUseStallEnableClusters:
SYCLKernel->addAttr(A);
break;
case attr::Kind::IntelNamedSubGroupSize:
Expand Down
50 changes: 35 additions & 15 deletions clang/test/CodeGenSYCL/stall_enable_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,21 @@

// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device.
// The metadata to be attached to the functionDecl that the attribute is applied to.
// The attributes do not get propagated to kernel metadata i.e. spir_kernel.
// The attributes get propagated to the kernel metadata i.e. spir_kernel if directly applied
// through functors/lambda function.

#include "sycl.hpp"

using namespace cl::sycl;
queue q;

[[intel::use_stall_enable_clusters]] void test() {}
[[intel::use_stall_enable_clusters]] void func() {}

struct FuncObj {
[[intel::use_stall_enable_clusters]] void operator()() const {}
};

void test1() {
void func1() {
auto lambda = []() [[intel::use_stall_enable_clusters]]{};
lambda();
}
Expand All @@ -25,30 +26,49 @@ class Foo {
[[intel::use_stall_enable_clusters]] void operator()() const {}
};

class Functor {
public:
[[intel::use_stall_enable_clusters]] void operator()() const {}
};

int main() {
q.submit([&](handler &h) {
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() #0 !kernel_arg_buffer_location ![[NUM4:[0-9]+]]
// CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM5:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() {{.*}} !stall_enable ![[NUM4:[0-9]+]]
// CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]]
h.single_task<class test_kernel1>(
FuncObj());

// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() #0 !kernel_arg_buffer_location ![[NUM4]]
// CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM5]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() {{.*}} !stall_enable ![[NUM4]]
// CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]]
Foo f;
h.single_task<class test_kernel2>(f);

// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3() #0 !kernel_arg_buffer_location ![[NUM4]]
// CHECK: define {{.*}}spir_func void @_Z4testv() #3 !stall_enable ![[NUM5]]
// Test attribute is not propagated to the kernel metadata i.e. spir_kernel.
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3()
// CHECK-NOT: !stall_enable
// CHECK-SAME: {
// CHECK: define {{.*}}spir_func void @{{.*}}func{{.*}} !stall_enable ![[NUM4]]
h.single_task<class test_kernel3>(
[]() { test(); });
[]() { func(); });

// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4() #0 !kernel_arg_buffer_location ![[NUM4]]
// CHECK: define {{.*}}spir_func void @{{.*}}test1vENKUlvE_clEv(%class.{{.*}}test1{{.*}}.anon addrspace(4)* align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM5]]
// Test attribute is not propagated to the kernel metadata i.e. spir_kernel.
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4()
// CHECK-NOT: !stall_enable
// CHECK-SAME: {
// CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(%class.{{.*}}func1{{.*}}.anon addrspace(4)* align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM4]]
h.single_task<class test_kernel4>(
[]() { test1(); });
[]() { func1(); });

// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel5() {{.*}} !stall_enable ![[NUM4]]
h.single_task<class test_kernel5>(
[]() [[intel::use_stall_enable_clusters]]{});

// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel6() {{.*}} !stall_enable ![[NUM4]]
// CHECK: define {{.*}}spir_func void @{{.*}}Functor{{.*}}(%class._ZTS7Functor.Functor addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]]
Functor f1;
h.single_task<class test_kernel6>(f1);
});
return 0;
}

// CHECK: ![[NUM4]] = !{}
// CHECK: ![[NUM5]] = !{i32 1}
// CHECK: ![[NUM4]] = !{i32 1}
39 changes: 29 additions & 10 deletions clang/test/SemaSYCL/stall_enable_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ using namespace cl::sycl;
queue q;

// Test attribute is presented on function definition.
[[intel::use_stall_enable_clusters]] void test() {}
// CHECK: FunctionDecl{{.*}}test
// CHECK: SYCLIntelUseStallEnableClustersAttr
[[intel::use_stall_enable_clusters]] void func() {}
// CHECK: FunctionDecl{{.*}}used func 'void ()'
// CHECK-NEXT: CompoundStmt{{.*}}
// CHECK-NEXT-NEXT: SYCLIntelUseStallEnableClustersAttr{{.*}}

// Tests for incorrect argument values for Intel FPGA use_stall_enable_clusters function attribute.
#ifdef TRIGGER_ERROR
Expand All @@ -24,7 +25,7 @@ struct FuncObj {
[[intel::use_stall_enable_clusters]] void operator()() const {}
// CHECK: CXXRecordDecl{{.*}}implicit struct FuncObj
// CHECK-NEXT: CXXMethodDecl{{.*}}used operator() 'void () const'
// CHECK-NEXT-NEXT:SYCLIntelUseStallEnableClustersAttr
// CHECK-NEXT-NEXT:SYCLIntelUseStallEnableClustersAttr{{.*}}
};

// Test attribute is presented on lambda function(applied to a function type for the lambda's call operator).
Expand All @@ -33,28 +34,46 @@ void test3() {
lambda();
// CHECK: FunctionDecl{{.*}}test3
// CHECK: LambdaExpr
// CHECK: SYCLIntelUseStallEnableClustersAttr
// CHECK: SYCLIntelUseStallEnableClustersAttr{{.*}}
}

// Test attribute is presented on functor.
// CHECK: CXXRecordDecl{{.*}}referenced class Functor definition
// CHECK: CXXRecordDecl{{.*}} implicit class Functor
// CHECK: AccessSpecDecl{{.*}} public
// CHECK-NEXT: CXXMethodDecl{{.*}}used operator() 'void () const'
// CHECK-NEXT-NEXT: SYCLIntelUseStallEnableClustersAttr{{.*}}
class Functor {
public:
[[intel::use_stall_enable_clusters]] void operator()() const {
}
};

int main() {
q.submit([&](handler &h) {
// Test attribute is not propagated to the kernel.
// Test attribute is propagated to the kernel.
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel1
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
// CHECK: SYCLIntelUseStallEnableClustersAttr {{.*}}
h.single_task<class test_kernel1>(
FuncObj());

// Test attribute does not present on LambdaExpr called by kernel.
// Test attribute is presented on LambdaExpr called by kernel.
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel2
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
// CHECK-: SYCLIntelUseStallEnableClustersAttr {{.*}}
h.single_task<class test_kernel2>(
[]() [[intel::use_stall_enable_clusters]]{});

// Test attribute is not propagated to the kernel.
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel3
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
h.single_task<class test_kernel3>(
[]() { test(); });
[]() { func(); });

// Test attribute is applied to kernel if directly applied through functor.
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel4
// CHECK: SYCLIntelUseStallEnableClustersAttr {{.*}}
Functor f2;
h.single_task<class test_kernel4>(f2);
});
return 0;
}