Skip to content

[SYCL][FPGA] Apply [[intel::use_stall_enable_clusters]] attribute to any function #3900

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jun 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 26 additions & 19 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -2581,32 +2581,39 @@ def SYCLIntelUseStallEnableClustersAttrDocs : Documentation {
let Category = DocCatFunction;
let Heading = "intel::use_stall_enable_clusters";
let Content = [{
When applied to a lambda or function call operator (of a function object)
on device, this requests, to the extent possible, that statically-scheduled
clusters handle stalls using a stall-enable signal to freeze computation
within the cluster. This attribute is ignored on the host.
The ``intel::use_stall_enable_clusters`` attribute requires SYCL.
When applied to a lambda function, function definition, or function call
operator (of a function object) on device, this requests, to the
extent possible, that statically-scheduled clusters handle stalls using a
stall-enable signal to freeze computation within the cluster. This attribute
is ignored on the host.

If ``intel::use_stall_enable_clusters`` is applied to a function called from a device
kernel, the attribute is ignored and it is not propagated to the kernel.
The ``intel::use_stall_enable_clusters`` attribute takes no argument and has an
effect when applied to a function, and no effect otherwise.

The ``intel::use_stall_enable_clusters`` attribute takes no argument and has an effect
when applied to a function, and no effect otherwise.
.. code-block:: c++

class Foo {
public:
[[intel::use_stall_enable_clusters]] void operator()() const {}
};

[[intel::use_stall_enable_clusters]] void test() {}

struct FuncObj {
[[intel::use_stall_enable_clusters]] void operator()() const {}
};

The ``intel::use_stall_enable_clusters`` attribute supports a nonconforming
behavior when applied to a lambda in the type position.

.. code-block:: c++

class Functor
{
[[intel::use_stall_enable_clusters]] void operator()(item<1> item)
{
/* kernel code */
}
void test1() {
auto lambda = []() [[intel::use_stall_enable_clusters]]{};
lambda();
}

kernel<class kernel_name>(
[]() [[intel::use_stall_enable_clusters]] {
/* kernel code */
});

}];
}

Expand Down
14 changes: 8 additions & 6 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -761,12 +761,6 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
Fn->setMetadata("no_global_work_offset", llvm::MDNode::get(Context, {}));
}

if (FD->hasAttr<SYCLIntelUseStallEnableClustersAttr>()) {
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(1))};
Fn->setMetadata("stall_enable", llvm::MDNode::get(Context, AttrMDArgs));
}

if (const auto *A = FD->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>()) {
const auto *CE = cast<ConstantExpr>(A->getNThreadsExpr());
llvm::APSInt ArgVal = CE->getResultAsAPSInt();
Expand Down Expand Up @@ -1064,6 +1058,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
}
}

if (getLangOpts().SYCLIsDevice && D &&
D->hasAttr<SYCLIntelUseStallEnableClustersAttr>()) {
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(1))};
Fn->setMetadata("stall_enable",
llvm::MDNode::get(getLLVMContext(), AttrMDArgs));
}

if (getLangOpts().OpenCL || getLangOpts().SYCLIsDevice) {
// Add metadata for a kernel function.
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
Expand Down
15 changes: 0 additions & 15 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -568,20 +568,6 @@ static void collectSYCLAttributes(Sema &S, FunctionDecl *FD,
SYCLIntelNoGlobalWorkOffsetAttr, SYCLSimdAttr>(A);
});

// Allow the kernel attribute "use_stall_enable_clusters" only on lambda
// functions and function objects called directly from a kernel.
// For all other cases, emit a warning and ignore.
if (auto *A = FD->getAttr<SYCLIntelUseStallEnableClustersAttr>()) {
if (DirectlyCalled) {
Attrs.push_back(A);
} else {
S.Diag(A->getLocation(),
diag::warn_attribute_on_direct_kernel_callee_only)
<< A;
FD->dropAttr<SYCLIntelUseStallEnableClustersAttr>();
}
}

// Attributes that should not be propagated from device functions to a kernel.
if (DirectlyCalled) {
llvm::copy_if(FD->getAttrs(), std::back_inserter(Attrs), [](Attr *A) {
Expand Down Expand Up @@ -4128,7 +4114,6 @@ static void PropagateAndDiagnoseDeviceAttr(
case attr::Kind::SYCLIntelSchedulerTargetFmaxMhz:
case attr::Kind::SYCLIntelMaxGlobalWorkDim:
case attr::Kind::SYCLIntelNoGlobalWorkOffset:
case attr::Kind::SYCLIntelUseStallEnableClusters:
case attr::Kind::SYCLIntelLoopFuse:
case attr::Kind::SYCLIntelFPGAMaxConcurrency:
case attr::Kind::SYCLIntelFPGADisableLoopPipelining:
Expand Down
26 changes: 0 additions & 26 deletions clang/test/CodeGenSYCL/stall_enable.cpp

This file was deleted.

54 changes: 54 additions & 0 deletions clang/test/CodeGenSYCL/stall_enable_device.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s

// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device.
// The metadata to be attached to the functionDecl that the attribute is applied to.
// The attributes do not get propagated to kernel metadata i.e. spir_kernel.

#include "sycl.hpp"

using namespace cl::sycl;
queue q;

[[intel::use_stall_enable_clusters]] void test() {}

struct FuncObj {
[[intel::use_stall_enable_clusters]] void operator()() const {}
};

void test1() {
auto lambda = []() [[intel::use_stall_enable_clusters]]{};
lambda();
}

class Foo {
public:
[[intel::use_stall_enable_clusters]] void operator()() const {}
};

int main() {
q.submit([&](handler &h) {
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() #0 !kernel_arg_buffer_location ![[NUM4:[0-9]+]]
// CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM5:[0-9]+]]
h.single_task<class test_kernel1>(
FuncObj());

// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() #0 !kernel_arg_buffer_location ![[NUM4]]
// CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM5]]
Foo f;
h.single_task<class test_kernel2>(f);

// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3() #0 !kernel_arg_buffer_location ![[NUM4]]
// CHECK: define {{.*}}spir_func void @_Z4testv() #3 !stall_enable ![[NUM5]]
h.single_task<class test_kernel3>(
[]() { test(); });

// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4() #0 !kernel_arg_buffer_location ![[NUM4]]
// CHECK: define {{.*}}spir_func void @{{.*}}test1vENKUlvE_clEv(%class.{{.*}}test1{{.*}}.anon addrspace(4)* align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM5]]
h.single_task<class test_kernel4>(
[]() { test1(); });
});
return 0;
}

// CHECK: ![[NUM4]] = !{}
// CHECK: ![[NUM5]] = !{i32 1}
29 changes: 29 additions & 0 deletions clang/test/CodeGenSYCL/stall_enable_host.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// RUN: %clang_cc1 -fsycl-is-host -triple -x86_64-unknown-linux-gnu -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s

// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Host (no-op in IR-CodeGen for host-mode).

[[intel::use_stall_enable_clusters]] void test() {}

void test1() {
auto lambda = []() [[intel::use_stall_enable_clusters]]{};
lambda();
}

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
kernelFunc();
}

class KernelFunctor {
public:
[[intel::use_stall_enable_clusters]] void operator()() const {}

};

void foo() {

KernelFunctor f;
kernel<class kernel_name_1>(f);
}

// CHECK-NOT: !stall_enable
38 changes: 0 additions & 38 deletions clang/test/SemaSYCL/stall_enable.cpp

This file was deleted.

60 changes: 60 additions & 0 deletions clang/test/SemaSYCL/stall_enable_device.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// RUN: %clang_cc1 %s -fsyntax-only -internal-isystem %S/Inputs -fsycl-is-device -Wno-sycl-2017-compat -DTRIGGER_ERROR -verify
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsyntax-only -ast-dump -Wno-sycl-2017-compat %s | FileCheck %s

// Test that checks [[intel::use_stall_enable_clusters]] attribute support on function.

#include "sycl.hpp"

using namespace cl::sycl;
queue q;

// Test attribute is presented on function definition.
[[intel::use_stall_enable_clusters]] void test() {}
// CHECK: FunctionDecl{{.*}}test
// CHECK: SYCLIntelUseStallEnableClustersAttr

// Tests for incorrect argument values for Intel FPGA use_stall_enable_clusters function attribute.
#ifdef TRIGGER_ERROR
[[intel::use_stall_enable_clusters(1)]] void test1() {} // expected-error{{'use_stall_enable_clusters' attribute takes no arguments}}
[[intel::use_stall_enable_clusters]] int test2; // expected-error{{'use_stall_enable_clusters' attribute only applies to functions}}
#endif

// Test attribute is presented on function call operator (of a function object).
struct FuncObj {
[[intel::use_stall_enable_clusters]] void operator()() const {}
// CHECK: CXXRecordDecl{{.*}}implicit struct FuncObj
// CHECK-NEXT: CXXMethodDecl{{.*}}used operator() 'void () const'
// CHECK-NEXT-NEXT:SYCLIntelUseStallEnableClustersAttr
};

// Test attribute is presented on lambda function(applied to a function type for the lambda's call operator).
void test3() {
auto lambda = []() [[intel::use_stall_enable_clusters]]{};
lambda();
// CHECK: FunctionDecl{{.*}}test3
// CHECK: LambdaExpr
// CHECK: SYCLIntelUseStallEnableClustersAttr
}

int main() {
q.submit([&](handler &h) {
// Test attribute is not propagated to the kernel.
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel1
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
h.single_task<class test_kernel1>(
FuncObj());

// Test attribute does not present on LambdaExpr called by kernel.
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel2
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
h.single_task<class test_kernel2>(
[]() [[intel::use_stall_enable_clusters]]{});

// Test attribute is not propagated to the kernel.
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel3
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
h.single_task<class test_kernel3>(
[]() { test(); });
});
return 0;
}