Skip to content

Commit 06e4ebc

Browse files
authored
[SYCL][FPGA] Allow use_stall_enable_clusters attribute to kernel (#4031)
This patch collects and applies the FPGA attribute intel::use_stall_enable_clusters to the callers/SYCL kernel if directly applied through functors/lambda function. The attribute has to be applicable to all functions, which can include the SYCL kernels and ​must not be propagated up to the caller/SYCL kernel when called from a function. This patch fixes FPGA emulator bug that was introduced on #3900.
1 parent 1dea757 commit 06e4ebc

File tree

4 files changed

+83
-26
lines changed

4 files changed

+83
-26
lines changed

clang/include/clang/Basic/AttrDocs.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2591,6 +2591,9 @@ is ignored on the host.
25912591
The ``intel::use_stall_enable_clusters`` attribute takes no argument and has an
25922592
effect when applied to a function, and no effect otherwise.
25932593

2594+
If ``intel::use_stall_enable_clusters`` is applied to a function called from a device
2595+
kernel, the attribute is ignored and it is not propagated to the kernel.
2596+
25942597
.. code-block:: c++
25952598

25962599
class Foo {
@@ -2604,6 +2607,14 @@ effect when applied to a function, and no effect otherwise.
26042607
[[intel::use_stall_enable_clusters]] void operator()() const {}
26052608
};
26062609

2610+
class Functor
2611+
{
2612+
[[intel::use_stall_enable_clusters]] void operator()(item<1> item)
2613+
{
2614+
/* kernel code */
2615+
}
2616+
}
2617+
26072618
The ``intel::use_stall_enable_clusters`` attribute supports a nonconforming
26082619
behavior when applied to a lambda in the type position.
26092620

@@ -2614,6 +2625,11 @@ behavior when applied to a lambda in the type position.
26142625
lambda();
26152626
}
26162627

2628+
kernel<class kernel_name>(
2629+
[]() [[intel::use_stall_enable_clusters]] {
2630+
/* kernel code */
2631+
});
2632+
26172633
}];
26182634
}
26192635

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,8 @@ static void collectSYCLAttributes(Sema &S, FunctionDecl *FD,
573573
llvm::copy_if(FD->getAttrs(), std::back_inserter(Attrs), [](Attr *A) {
574574
return isa<SYCLIntelLoopFuseAttr, SYCLIntelFPGAMaxConcurrencyAttr,
575575
SYCLIntelFPGADisableLoopPipeliningAttr,
576-
SYCLIntelFPGAInitiationIntervalAttr>(A);
576+
SYCLIntelFPGAInitiationIntervalAttr,
577+
SYCLIntelUseStallEnableClustersAttr>(A);
577578
});
578579
}
579580
}
@@ -4077,6 +4078,7 @@ static void PropagateAndDiagnoseDeviceAttr(
40774078
case attr::Kind::SYCLIntelFPGAMaxConcurrency:
40784079
case attr::Kind::SYCLIntelFPGADisableLoopPipelining:
40794080
case attr::Kind::SYCLIntelFPGAInitiationInterval:
4081+
case attr::Kind::SYCLIntelUseStallEnableClusters:
40804082
SYCLKernel->addAttr(A);
40814083
break;
40824084
case attr::Kind::IntelNamedSubGroupSize:

clang/test/CodeGenSYCL/stall_enable_device.cpp

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,21 @@
22

33
// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device.
44
// The metadata to be attached to the functionDecl that the attribute is applied to.
5-
// The attributes do not get propagated to kernel metadata i.e. spir_kernel.
5+
// The attributes get propagated to the kernel metadata i.e. spir_kernel if directly applied
6+
// through functors/lambda function.
67

78
#include "sycl.hpp"
89

910
using namespace cl::sycl;
1011
queue q;
1112

12-
[[intel::use_stall_enable_clusters]] void test() {}
13+
[[intel::use_stall_enable_clusters]] void func() {}
1314

1415
struct FuncObj {
1516
[[intel::use_stall_enable_clusters]] void operator()() const {}
1617
};
1718

18-
void test1() {
19+
void func1() {
1920
auto lambda = []() [[intel::use_stall_enable_clusters]]{};
2021
lambda();
2122
}
@@ -25,30 +26,49 @@ class Foo {
2526
[[intel::use_stall_enable_clusters]] void operator()() const {}
2627
};
2728

29+
class Functor {
30+
public:
31+
[[intel::use_stall_enable_clusters]] void operator()() const {}
32+
};
33+
2834
int main() {
2935
q.submit([&](handler &h) {
30-
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() #0 !kernel_arg_buffer_location ![[NUM4:[0-9]+]]
31-
// CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM5:[0-9]+]]
36+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() {{.*}} !stall_enable ![[NUM4:[0-9]+]]
37+
// CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]]
3238
h.single_task<class test_kernel1>(
3339
FuncObj());
3440

35-
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() #0 !kernel_arg_buffer_location ![[NUM4]]
36-
// CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM5]]
41+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() {{.*}} !stall_enable ![[NUM4]]
42+
// CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]]
3743
Foo f;
3844
h.single_task<class test_kernel2>(f);
3945

40-
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3() #0 !kernel_arg_buffer_location ![[NUM4]]
41-
// CHECK: define {{.*}}spir_func void @_Z4testv() #3 !stall_enable ![[NUM5]]
46+
// Test attribute is not propagated to the kernel metadata i.e. spir_kernel.
47+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3()
48+
// CHECK-NOT: !stall_enable
49+
// CHECK-SAME: {
50+
// CHECK: define {{.*}}spir_func void @{{.*}}func{{.*}} !stall_enable ![[NUM4]]
4251
h.single_task<class test_kernel3>(
43-
[]() { test(); });
52+
[]() { func(); });
4453

45-
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4() #0 !kernel_arg_buffer_location ![[NUM4]]
46-
// CHECK: define {{.*}}spir_func void @{{.*}}test1vENKUlvE_clEv(%class.{{.*}}test1{{.*}}.anon addrspace(4)* align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM5]]
54+
// Test attribute is not propagated to the kernel metadata i.e. spir_kernel.
55+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4()
56+
// CHECK-NOT: !stall_enable
57+
// CHECK-SAME: {
58+
// CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(%class.{{.*}}func1{{.*}}.anon addrspace(4)* align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM4]]
4759
h.single_task<class test_kernel4>(
48-
[]() { test1(); });
60+
[]() { func1(); });
61+
62+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel5() {{.*}} !stall_enable ![[NUM4]]
63+
h.single_task<class test_kernel5>(
64+
[]() [[intel::use_stall_enable_clusters]]{});
65+
66+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel6() {{.*}} !stall_enable ![[NUM4]]
67+
// CHECK: define {{.*}}spir_func void @{{.*}}Functor{{.*}}(%class._ZTS7Functor.Functor addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]]
68+
Functor f1;
69+
h.single_task<class test_kernel6>(f1);
4970
});
5071
return 0;
5172
}
5273

53-
// CHECK: ![[NUM4]] = !{}
54-
// CHECK: ![[NUM5]] = !{i32 1}
74+
// CHECK: ![[NUM4]] = !{i32 1}

clang/test/SemaSYCL/stall_enable_device.cpp

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ using namespace cl::sycl;
99
queue q;
1010

1111
// Test attribute is presented on function definition.
12-
[[intel::use_stall_enable_clusters]] void test() {}
13-
// CHECK: FunctionDecl{{.*}}test
14-
// CHECK: SYCLIntelUseStallEnableClustersAttr
12+
[[intel::use_stall_enable_clusters]] void func() {}
13+
// CHECK: FunctionDecl{{.*}}used func 'void ()'
14+
// CHECK-NEXT: CompoundStmt{{.*}}
15+
// CHECK-NEXT-NEXT: SYCLIntelUseStallEnableClustersAttr{{.*}}
1516

1617
// Tests for incorrect argument values for Intel FPGA use_stall_enable_clusters function attribute.
1718
#ifdef TRIGGER_ERROR
@@ -24,7 +25,7 @@ struct FuncObj {
2425
[[intel::use_stall_enable_clusters]] void operator()() const {}
2526
// CHECK: CXXRecordDecl{{.*}}implicit struct FuncObj
2627
// CHECK-NEXT: CXXMethodDecl{{.*}}used operator() 'void () const'
27-
// CHECK-NEXT-NEXT:SYCLIntelUseStallEnableClustersAttr
28+
// CHECK-NEXT-NEXT:SYCLIntelUseStallEnableClustersAttr{{.*}}
2829
};
2930

3031
// Test attribute is presented on lambda function(applied to a function type for the lambda's call operator).
@@ -33,28 +34,46 @@ void test3() {
3334
lambda();
3435
// CHECK: FunctionDecl{{.*}}test3
3536
// CHECK: LambdaExpr
36-
// CHECK: SYCLIntelUseStallEnableClustersAttr
37+
// CHECK: SYCLIntelUseStallEnableClustersAttr{{.*}}
3738
}
3839

40+
// Test attribute is presented on functor.
41+
// CHECK: CXXRecordDecl{{.*}}referenced class Functor definition
42+
// CHECK: CXXRecordDecl{{.*}} implicit class Functor
43+
// CHECK: AccessSpecDecl{{.*}} public
44+
// CHECK-NEXT: CXXMethodDecl{{.*}}used operator() 'void () const'
45+
// CHECK-NEXT-NEXT: SYCLIntelUseStallEnableClustersAttr{{.*}}
46+
class Functor {
47+
public:
48+
[[intel::use_stall_enable_clusters]] void operator()() const {
49+
}
50+
};
51+
3952
int main() {
4053
q.submit([&](handler &h) {
41-
// Test attribute is not propagated to the kernel.
54+
// Test attribute is propagated to the kernel.
4255
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel1
43-
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
56+
// CHECK: SYCLIntelUseStallEnableClustersAttr {{.*}}
4457
h.single_task<class test_kernel1>(
4558
FuncObj());
4659

47-
// Test attribute does not present on LambdaExpr called by kernel.
60+
// Test attribute is presented on LambdaExpr called by kernel.
4861
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel2
49-
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
62+
// CHECK-: SYCLIntelUseStallEnableClustersAttr {{.*}}
5063
h.single_task<class test_kernel2>(
5164
[]() [[intel::use_stall_enable_clusters]]{});
5265

5366
// Test attribute is not propagated to the kernel.
5467
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel3
5568
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
5669
h.single_task<class test_kernel3>(
57-
[]() { test(); });
70+
[]() { func(); });
71+
72+
// Test attribute is applied to kernel if directly applied through functor.
73+
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel4
74+
// CHECK: SYCLIntelUseStallEnableClustersAttr {{.*}}
75+
Functor f2;
76+
h.single_task<class test_kernel4>(f2);
5877
});
5978
return 0;
6079
}

0 commit comments

Comments
 (0)