Skip to content

Commit 8fbf4bb

Browse files
authored
[SYCL] Add support for new FPGA function attribute stall_enable (#2734)
This patch adds support a new FPGA function attribute, stall_enable, to be sent through to the backend (and ignored by the emulator). Syntax: [[intel::stall_enable]] This function attribute applies to a lambda function, or function definition. Requests, to the extent possible, that statically-scheduled clusters handle stalls using a stall-enable signal to freeze computation within the cluster. Stall_enable attribute can be applied to device function/kernel and it takes no arguments. LLVM IR function metadata should simply be i32 1. Signed-off-by: Soumi Manna <[email protected]>
1 parent 06ce8db commit 8fbf4bb

File tree

8 files changed

+150
-1
lines changed

8 files changed

+150
-1
lines changed

clang/include/clang/Basic/Attr.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,6 +1226,19 @@ def SYCLIntelNumSimdWorkItems : InheritableAttr {
12261226
let PragmaAttributeSupport = 0;
12271227
}
12281228

1229+
def SYCLIntelStallEnable : InheritableAttr {
1230+
let Spellings = [CXX11<"intel","stall_enable">];
1231+
let LangOpts = [SYCLIsHost, SYCLIsDevice];
1232+
let Subjects = SubjectList<[Function], ErrorDiag>;
1233+
let AdditionalMembers = [{
1234+
static const char *getName() {
1235+
return "stall_enable";
1236+
}
1237+
}];
1238+
let Documentation = [SYCLIntelStallEnableAttrDocs];
1239+
let PragmaAttributeSupport = 0;
1240+
}
1241+
12291242
def SYCLIntelSchedulerTargetFmaxMhz : InheritableAttr {
12301243
let Spellings = [CXX11<"intelfpga","scheduler_target_fmax_mhz">,
12311244
CXX11<"intel","scheduler_target_fmax_mhz">];

clang/include/clang/Basic/AttrDocs.td

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2212,6 +2212,39 @@ device kernel, the attribute is ignored and it is not propagated to a kernel.
22122212
}];
22132213
}
22142214

2215+
def SYCLIntelStallEnableAttrDocs : Documentation {
2216+
let Category = DocCatFunction;
2217+
let Heading = "intel::stall_enable";
2218+
let Content = [{
2219+
When applied to a lambda or function call operator (of a function object)
2220+
on device, this requests, to the extent possible, that statically-scheduled
2221+
clusters handle stalls using a stall-enable signal to freeze computation
2222+
within the cluster. This attribute is ignored on the host.
2223+
2224+
If ``intel::stall_enable`` is applied to a function called from a device
2225+
kernel, the attribute is ignored and it is not propagated to a kernel.
2226+
2227+
The ``intel::stall_enable`` attribute takes no argument and has an effect
2228+
when applied to a function, and no effect otherwise.
2229+
2230+
.. code-block:: c++
2231+
2232+
class Functor
2233+
{
2234+
[[intel::stall_enable]] void operator()(item<1> item)
2235+
{
2236+
/* kernel code */
2237+
}
2238+
}
2239+
2240+
kernel<class kernel_name>(
2241+
[]() [[intel::stall_enable]] {
2242+
/* kernel code */
2243+
});
2244+
2245+
}];
2246+
}
2247+
22152248
def ReqdWorkGroupSizeAttrDocs : Documentation {
22162249
let Category = DocCatFunction;
22172250
let Heading = "reqd_work_group_size";

clang/include/clang/Basic/AttributeCommonInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,8 @@ class AttributeCommonInfo {
165165
ParsedAttr == AT_SYCLIntelSchedulerTargetFmaxMhz ||
166166
ParsedAttr == AT_SYCLIntelMaxWorkGroupSize ||
167167
ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim ||
168-
ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset)
168+
ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset ||
169+
ParsedAttr == AT_SYCLIntelStallEnable)
169170
return true;
170171

171172
return false;

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,12 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
676676
if (A->getEnabled())
677677
Fn->setMetadata("no_global_work_offset", llvm::MDNode::get(Context, {}));
678678
}
679+
680+
if (FD->hasAttr<SYCLIntelStallEnableAttr>()) {
681+
llvm::Metadata *AttrMDArgs[] = {
682+
llvm::ConstantAsMetadata::get(Builder.getInt32(1))};
683+
Fn->setMetadata("stall_enable", llvm::MDNode::get(Context, AttrMDArgs));
684+
}
679685
}
680686

681687
/// Determine whether the function F ends with a return stmt.

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3029,6 +3029,20 @@ static void handleNumSimdWorkItemsAttr(Sema &S, Decl *D,
30293029
E);
30303030
}
30313031

3032+
// Handles stall_enable
3033+
static void handleStallEnableAttr(Sema &S, Decl *D, const ParsedAttr &Attr) {
3034+
if (D->isInvalidDecl())
3035+
return;
3036+
3037+
unsigned NumArgs = Attr.getNumArgs();
3038+
if (NumArgs > 0) {
3039+
S.Diag(Attr.getLoc(), diag::warn_attribute_too_many_arguments) << Attr << 0;
3040+
return;
3041+
}
3042+
3043+
handleSimpleAttribute<SYCLIntelStallEnableAttr>(S, D, Attr);
3044+
}
3045+
30323046
// Add scheduler_target_fmax_mhz
30333047
void Sema::addSYCLIntelSchedulerTargetFmaxMhzAttr(
30343048
Decl *D, const AttributeCommonInfo &Attr, Expr *E) {
@@ -8388,6 +8402,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
83888402
case ParsedAttr::AT_SYCLIntelNoGlobalWorkOffset:
83898403
handleNoGlobalWorkOffsetAttr(S, D, AL);
83908404
break;
8405+
case ParsedAttr::AT_SYCLIntelStallEnable:
8406+
handleStallEnableAttr(S, D, AL);
8407+
break;
83918408
case ParsedAttr::AT_VecTypeHint:
83928409
handleVecTypeHint(S, D, AL);
83938410
break;

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,20 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
545545

546546
if (auto *A = FD->getAttr<SYCLSimdAttr>())
547547
Attrs.insert(A);
548+
549+
// Allow the kernel attribute "stall_enable" only on lambda functions
550+
// and function objects that are called directly from a kernel
551+
// (i.e. the one passed to the single_task or parallel_for functions).
552+
// For all other cases, emit a warning and ignore.
553+
if (auto *A = FD->getAttr<SYCLIntelStallEnableAttr>()) {
554+
if (ParentFD == SYCLKernel) {
555+
Attrs.insert(A);
556+
} else {
557+
SemaRef.Diag(A->getLocation(), diag::warn_attribute_ignored) << A;
558+
FD->dropAttr<SYCLIntelStallEnableAttr>();
559+
}
560+
}
561+
548562
// Propagate the explicit SIMD attribute through call graph - it is used
549563
// to distinguish ESIMD code in ESIMD LLVM passes.
550564
if (KernelBody && KernelBody->hasAttr<SYCLSimdAttr>() &&
@@ -3222,6 +3236,7 @@ void Sema::MarkDevice(void) {
32223236
case attr::Kind::SYCLIntelSchedulerTargetFmaxMhz:
32233237
case attr::Kind::SYCLIntelMaxGlobalWorkDim:
32243238
case attr::Kind::SYCLIntelNoGlobalWorkOffset:
3239+
case attr::Kind::SYCLIntelStallEnable:
32253240
case attr::Kind::SYCLSimd: {
32263241
if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody &&
32273242
!KernelBody->getAttr<SYCLSimdAttr>()) {
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
2+
3+
#include "sycl.hpp"
4+
5+
using namespace cl::sycl;
6+
queue q;
7+
8+
class Foo {
9+
public:
10+
[[intel::stall_enable]] void operator()() const {}
11+
};
12+
13+
int main() {
14+
q.submit([&](handler &h) {
15+
Foo f;
16+
h.single_task<class test_kernel1>(f);
17+
18+
h.single_task<class test_kernel2>(
19+
[]() [[intel::stall_enable]]{});
20+
});
21+
return 0;
22+
}
23+
24+
// CHECK: define spir_kernel void @"{{.*}}test_kernel1"() #0 {{.*}} !stall_enable ![[NUM5:[0-9]+]]
25+
// CHECK: define spir_kernel void @"{{.*}}test_kernel2"() #0 {{.*}} !stall_enable ![[NUM5]]
26+
// CHECK: ![[NUM5]] = !{i32 1}

clang/test/SemaSYCL/stall_enable.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// RUN: %clang_cc1 %s -fsyntax-only -fsycl -internal-isystem %S/Inputs -fsycl-is-device -Wno-sycl-2017-compat -DTRIGGER_ERROR -verify
2+
// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -fsyntax-only -ast-dump -Wno-sycl-2017-compat %s | FileCheck %s
3+
4+
#include "sycl.hpp"
5+
6+
using namespace cl::sycl;
7+
queue q;
8+
9+
[[intel::stall_enable]] void test() {} //expected-warning{{'stall_enable' attribute ignored}}
10+
11+
#ifdef TRIGGER_ERROR
12+
[[intel::stall_enable(1)]] void bar1() {} // expected-error{{'stall_enable' attribute takes no arguments}}
13+
[[intel::stall_enable]] int N; // expected-error{{'stall_enable' attribute only applies to functions}}
14+
#endif
15+
16+
struct FuncObj {
17+
[[intel::stall_enable]] void operator()() const {}
18+
};
19+
20+
int main() {
21+
q.submit([&](handler &h) {
22+
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel1
23+
// CHECK: SYCLIntelStallEnableAttr {{.*}}
24+
h.single_task<class test_kernel1>(
25+
FuncObj());
26+
27+
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel2
28+
// CHECK: SYCLIntelStallEnableAttr {{.*}}
29+
h.single_task<class test_kernel2>(
30+
[]() [[intel::stall_enable]]{});
31+
32+
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel3
33+
// CHECK-NOT: SYCLIntelStallEnableAttr {{.*}}
34+
h.single_task<class test_kernel3>(
35+
[]() { test(); });
36+
});
37+
return 0;
38+
}

0 commit comments

Comments
 (0)