Skip to content

Commit 15da879

Browse files
authored
[SYCL][FPGA] Apply [[intel::use_stall_enable_clusters]] attribute to any function (#3900)
* [SYCL][FPGA] Apply [[intel::use_stall_enable_clusters]] attribute to ANY function [[intel::use_stall_enable_clusters]] is a function attribute, not a kernel one. It can be applied to any function or lambda. This patch removes the restriction that was added on PR: #2734 to allow propagating the attribute to kernel if it is applied to any function the kernel calls. Signed-off-by: Soumi Manna <[email protected]>
1 parent e920682 commit 15da879

File tree

8 files changed

+177
-104
lines changed

8 files changed

+177
-104
lines changed

clang/include/clang/Basic/AttrDocs.td

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2581,32 +2581,39 @@ def SYCLIntelUseStallEnableClustersAttrDocs : Documentation {
25812581
let Category = DocCatFunction;
25822582
let Heading = "intel::use_stall_enable_clusters";
25832583
let Content = [{
2584-
When applied to a lambda or function call operator (of a function object)
2585-
on device, this requests, to the extent possible, that statically-scheduled
2586-
clusters handle stalls using a stall-enable signal to freeze computation
2587-
within the cluster. This attribute is ignored on the host.
2584+
The ``intel::use_stall_enable_clusters`` attribute requires SYCL.
2585+
When applied to a lambda function, function definition, or function call
2586+
operator (of a function object) on device, this requests, to the
2587+
extent possible, that statically-scheduled clusters handle stalls using a
2588+
stall-enable signal to freeze computation within the cluster. This attribute
2589+
is ignored on the host.
25882590

2589-
If ``intel::use_stall_enable_clusters`` is applied to a function called from a device
2590-
kernel, the attribute is ignored and it is not propagated to the kernel.
2591+
The ``intel::use_stall_enable_clusters`` attribute takes no argument and has an
2592+
effect when applied to a function, and no effect otherwise.
25912593

2592-
The ``intel::use_stall_enable_clusters`` attribute takes no argument and has an effect
2593-
when applied to a function, and no effect otherwise.
2594+
.. code-block:: c++
2595+
2596+
class Foo {
2597+
public:
2598+
[[intel::use_stall_enable_clusters]] void operator()() const {}
2599+
};
2600+
2601+
[[intel::use_stall_enable_clusters]] void test() {}
2602+
2603+
struct FuncObj {
2604+
[[intel::use_stall_enable_clusters]] void operator()() const {}
2605+
};
2606+
2607+
The ``intel::use_stall_enable_clusters`` attribute supports a nonconforming
2608+
behavior when applied to a lambda in the type position.
25942609

25952610
.. code-block:: c++
25962611

2597-
class Functor
2598-
{
2599-
[[intel::use_stall_enable_clusters]] void operator()(item<1> item)
2600-
{
2601-
/* kernel code */
2602-
}
2612+
void test1() {
2613+
auto lambda = []() [[intel::use_stall_enable_clusters]]{};
2614+
lambda();
26032615
}
26042616

2605-
kernel<class kernel_name>(
2606-
[]() [[intel::use_stall_enable_clusters]] {
2607-
/* kernel code */
2608-
});
2609-
26102617
}];
26112618
}
26122619

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -761,12 +761,6 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
761761
Fn->setMetadata("no_global_work_offset", llvm::MDNode::get(Context, {}));
762762
}
763763

764-
if (FD->hasAttr<SYCLIntelUseStallEnableClustersAttr>()) {
765-
llvm::Metadata *AttrMDArgs[] = {
766-
llvm::ConstantAsMetadata::get(Builder.getInt32(1))};
767-
Fn->setMetadata("stall_enable", llvm::MDNode::get(Context, AttrMDArgs));
768-
}
769-
770764
if (const auto *A = FD->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>()) {
771765
const auto *CE = cast<ConstantExpr>(A->getNThreadsExpr());
772766
llvm::APSInt ArgVal = CE->getResultAsAPSInt();
@@ -1064,6 +1058,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
10641058
}
10651059
}
10661060

1061+
if (getLangOpts().SYCLIsDevice && D &&
1062+
D->hasAttr<SYCLIntelUseStallEnableClustersAttr>()) {
1063+
llvm::Metadata *AttrMDArgs[] = {
1064+
llvm::ConstantAsMetadata::get(Builder.getInt32(1))};
1065+
Fn->setMetadata("stall_enable",
1066+
llvm::MDNode::get(getLLVMContext(), AttrMDArgs));
1067+
}
1068+
10671069
if (getLangOpts().OpenCL || getLangOpts().SYCLIsDevice) {
10681070
// Add metadata for a kernel function.
10691071
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -568,20 +568,6 @@ static void collectSYCLAttributes(Sema &S, FunctionDecl *FD,
568568
SYCLIntelNoGlobalWorkOffsetAttr, SYCLSimdAttr>(A);
569569
});
570570

571-
// Allow the kernel attribute "use_stall_enable_clusters" only on lambda
572-
// functions and function objects called directly from a kernel.
573-
// For all other cases, emit a warning and ignore.
574-
if (auto *A = FD->getAttr<SYCLIntelUseStallEnableClustersAttr>()) {
575-
if (DirectlyCalled) {
576-
Attrs.push_back(A);
577-
} else {
578-
S.Diag(A->getLocation(),
579-
diag::warn_attribute_on_direct_kernel_callee_only)
580-
<< A;
581-
FD->dropAttr<SYCLIntelUseStallEnableClustersAttr>();
582-
}
583-
}
584-
585571
// Attributes that should not be propagated from device functions to a kernel.
586572
if (DirectlyCalled) {
587573
llvm::copy_if(FD->getAttrs(), std::back_inserter(Attrs), [](Attr *A) {
@@ -4128,7 +4114,6 @@ static void PropagateAndDiagnoseDeviceAttr(
41284114
case attr::Kind::SYCLIntelSchedulerTargetFmaxMhz:
41294115
case attr::Kind::SYCLIntelMaxGlobalWorkDim:
41304116
case attr::Kind::SYCLIntelNoGlobalWorkOffset:
4131-
case attr::Kind::SYCLIntelUseStallEnableClusters:
41324117
case attr::Kind::SYCLIntelLoopFuse:
41334118
case attr::Kind::SYCLIntelFPGAMaxConcurrency:
41344119
case attr::Kind::SYCLIntelFPGADisableLoopPipelining:

clang/test/CodeGenSYCL/stall_enable.cpp

Lines changed: 0 additions & 26 deletions
This file was deleted.
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
2+
3+
// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device.
4+
// The metadata to be attached to the functionDecl that the attribute is applied to.
5+
// The attributes do not get propagated to kernel metadata i.e. spir_kernel.
6+
7+
#include "sycl.hpp"
8+
9+
using namespace cl::sycl;
10+
queue q;
11+
12+
[[intel::use_stall_enable_clusters]] void test() {}
13+
14+
struct FuncObj {
15+
[[intel::use_stall_enable_clusters]] void operator()() const {}
16+
};
17+
18+
void test1() {
19+
auto lambda = []() [[intel::use_stall_enable_clusters]]{};
20+
lambda();
21+
}
22+
23+
class Foo {
24+
public:
25+
[[intel::use_stall_enable_clusters]] void operator()() const {}
26+
};
27+
28+
int main() {
29+
q.submit([&](handler &h) {
30+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() #0 !kernel_arg_buffer_location ![[NUM4:[0-9]+]]
31+
// CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM5:[0-9]+]]
32+
h.single_task<class test_kernel1>(
33+
FuncObj());
34+
35+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() #0 !kernel_arg_buffer_location ![[NUM4]]
36+
// CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM5]]
37+
Foo f;
38+
h.single_task<class test_kernel2>(f);
39+
40+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3() #0 !kernel_arg_buffer_location ![[NUM4]]
41+
// CHECK: define {{.*}}spir_func void @_Z4testv() #3 !stall_enable ![[NUM5]]
42+
h.single_task<class test_kernel3>(
43+
[]() { test(); });
44+
45+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4() #0 !kernel_arg_buffer_location ![[NUM4]]
46+
// CHECK: define {{.*}}spir_func void @{{.*}}test1vENKUlvE_clEv(%class.{{.*}}test1{{.*}}.anon addrspace(4)* align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM5]]
47+
h.single_task<class test_kernel4>(
48+
[]() { test1(); });
49+
});
50+
return 0;
51+
}
52+
53+
// CHECK: ![[NUM4]] = !{}
54+
// CHECK: ![[NUM5]] = !{i32 1}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: %clang_cc1 -fsycl-is-host -triple -x86_64-unknown-linux-gnu -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
2+
3+
// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Host (no-op in IR-CodeGen for host-mode).
4+
5+
[[intel::use_stall_enable_clusters]] void test() {}
6+
7+
void test1() {
8+
auto lambda = []() [[intel::use_stall_enable_clusters]]{};
9+
lambda();
10+
}
11+
12+
template <typename name, typename Func>
13+
__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
14+
kernelFunc();
15+
}
16+
17+
class KernelFunctor {
18+
public:
19+
[[intel::use_stall_enable_clusters]] void operator()() const {}
20+
21+
};
22+
23+
void foo() {
24+
25+
KernelFunctor f;
26+
kernel<class kernel_name_1>(f);
27+
}
28+
29+
// CHECK-NOT: !stall_enable

clang/test/SemaSYCL/stall_enable.cpp

Lines changed: 0 additions & 38 deletions
This file was deleted.
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// RUN: %clang_cc1 %s -fsyntax-only -internal-isystem %S/Inputs -fsycl-is-device -Wno-sycl-2017-compat -DTRIGGER_ERROR -verify
2+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsyntax-only -ast-dump -Wno-sycl-2017-compat %s | FileCheck %s
3+
4+
// Test that checks [[intel::use_stall_enable_clusters]] attribute support on function.
5+
6+
#include "sycl.hpp"
7+
8+
using namespace cl::sycl;
9+
queue q;
10+
11+
// Test attribute is presented on function definition.
12+
[[intel::use_stall_enable_clusters]] void test() {}
13+
// CHECK: FunctionDecl{{.*}}test
14+
// CHECK: SYCLIntelUseStallEnableClustersAttr
15+
16+
// Tests for incorrect argument values for Intel FPGA use_stall_enable_clusters function attribute.
17+
#ifdef TRIGGER_ERROR
18+
[[intel::use_stall_enable_clusters(1)]] void test1() {} // expected-error{{'use_stall_enable_clusters' attribute takes no arguments}}
19+
[[intel::use_stall_enable_clusters]] int test2; // expected-error{{'use_stall_enable_clusters' attribute only applies to functions}}
20+
#endif
21+
22+
// Test attribute is presented on function call operator (of a function object).
23+
struct FuncObj {
24+
[[intel::use_stall_enable_clusters]] void operator()() const {}
25+
// CHECK: CXXRecordDecl{{.*}}implicit struct FuncObj
26+
// CHECK-NEXT: CXXMethodDecl{{.*}}used operator() 'void () const'
27+
// CHECK-NEXT-NEXT:SYCLIntelUseStallEnableClustersAttr
28+
};
29+
30+
// Test attribute is presented on lambda function(applied to a function type for the lambda's call operator).
31+
void test3() {
32+
auto lambda = []() [[intel::use_stall_enable_clusters]]{};
33+
lambda();
34+
// CHECK: FunctionDecl{{.*}}test3
35+
// CHECK: LambdaExpr
36+
// CHECK: SYCLIntelUseStallEnableClustersAttr
37+
}
38+
39+
int main() {
40+
q.submit([&](handler &h) {
41+
// Test attribute is not propagated to the kernel.
42+
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel1
43+
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
44+
h.single_task<class test_kernel1>(
45+
FuncObj());
46+
47+
// Test attribute does not present on LambdaExpr called by kernel.
48+
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel2
49+
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
50+
h.single_task<class test_kernel2>(
51+
[]() [[intel::use_stall_enable_clusters]]{});
52+
53+
// Test attribute is not propagated to the kernel.
54+
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel3
55+
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
56+
h.single_task<class test_kernel3>(
57+
[]() { test(); });
58+
});
59+
return 0;
60+
}

0 commit comments

Comments
 (0)