Skip to content

[SYCL][FPGA] Allowing max-concurrency attribute on functions. #3388

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Apr 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
0d2c90b
Adding max-concurrency attr - Scratch PR.
zahiraam Mar 21, 2021
8140d90
Implementing max-attribute.
zahiraam Mar 22, 2021
8ee951b
Fix indentation
zahiraam Mar 22, 2021
1a4293d
Fix indentation
zahiraam Mar 22, 2021
301f83d
Fix indentation
zahiraam Mar 22, 2021
92f2be7
Fix indentation
zahiraam Mar 22, 2021
32f6908
Fix indentation
zahiraam Mar 23, 2021
284a29a
Change Diag's name
zahiraam Mar 23, 2021
39a2f45
Addressed comments from review
zahiraam Mar 24, 2021
cb1eb37
Addressed comments from review
zahiraam Mar 26, 2021
2428add
Merge remote-tracking branch 'remote/sycl' into max-attr-scratch
zahiraam Mar 26, 2021
01123e0
Merge remote-tracking branch 'remote/sycl' into max-attr-scratch
zahiraam Mar 29, 2021
a923b5c
Fixing case statement.
zahiraam Mar 29, 2021
a0c8ae4
Fixes for comments.
zahiraam Mar 29, 2021
a598d02
Fixes for comments.
zahiraam Mar 29, 2021
3619832
Merge remote-tracking branch 'remote/sycl' into max-attr-scratch
zahiraam Mar 30, 2021
04be2d3
Merge branch 'max-attr-scratch' of https://github.com/zahiraam/llvm-1…
zahiraam Mar 30, 2021
ffecab2
Fixing lit test by adding new attr max-concurrency
zahiraam Mar 30, 2021
495424f
Fix LIT test intel-fpga-loops.cpp
zahiraam Mar 30, 2021
dbb4348
Merge remote-tracking branch 'remote/sycl' into max-attr-scratch
zahiraam Mar 30, 2021
b681e86
Fix LIT test max-concurrency.cpp
zahiraam Mar 30, 2021
fbd9a03
Remove optional argument for max-concurrency
zahiraam Mar 31, 2021
350744a
Merge remote-tracking branch 'remote/sycl' into max-attr-scratch
zahiraam Mar 31, 2021
cdfa7a3
fix conflict
zahiraam Mar 31, 2021
447ca30
Indentation
zahiraam Mar 31, 2021
af4f734
Fixes after Prem's review
zahiraam Mar 31, 2021
47f0c4a
Merge remote-tracking branch 'remote/sycl' into max-attr-scratch
zahiraam Mar 31, 2021
c8df3cf
Fix comment
zahiraam Mar 31, 2021
b4e0e93
Fix LIT test
zahiraam Mar 31, 2021
5357212
Merge remote-tracking branch 'remote/sycl' into max-attr-scratch
zahiraam Apr 1, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1888,12 +1888,13 @@ def SYCLIntelFPGAInitiationInterval : DeclOrStmtAttr {
let SupportsNonconformingLambdaSyntax = 1;
}

def SYCLIntelFPGAMaxConcurrency : StmtAttr {
def SYCLIntelFPGAMaxConcurrency : DeclOrStmtAttr {
let Spellings = [CXX11<"intelfpga","max_concurrency">,
CXX11<"intel","max_concurrency">];
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt],
ErrorDiag, "'for', 'while', and 'do' statements">;
let Args = [ExprArgument<"NThreadsExpr", /*opt*/1>];
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt, Function],
ErrorDiag,
"'for', 'while', 'do' statements, and functions">;
let Args = [ExprArgument<"NThreadsExpr">];
let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
let HasCustomTypeTransform = 1;
let Documentation = [SYCLIntelFPGAMaxConcurrencyAttrDocs];
Expand Down
12 changes: 8 additions & 4 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -2865,10 +2865,11 @@ def SYCLIntelFPGAMaxConcurrencyAttrDocs : Documentation {
let Category = DocCatVariable;
let Heading = "intel::max_concurrency";
let Content = [{
This attribute applies to a loop. Indicates that the loop should allow no more
than N threads or iterations to execute it simultaneously. N must be a non
negative integer. '0' indicates the max_concurrency case to be unbounded. Cannot
be applied multiple times to the same loop.
This attribute applies to a loop or a function. It indicates that the
loop/function should allow no more than N threads or iterations to execute it
simultaneously. N must be a non negative integer. '0' indicates the
max_concurrency case to be unbounded. Cannot be applied multiple times to the
same loop.

.. code-block:: c++

Expand All @@ -2877,10 +2878,13 @@ be applied multiple times to the same loop.
[[intel::max_concurrency(2)]] for (int i = 0; i != 10; ++i) a[i] = 0;
}

[[intel::max_concurrency(2)]] void foo1 { }
template<int N>
void bar() {
[[intel::max_concurrency(N)]] for(;;) { }
}
template<int N>
[[intel::max_concurrency(N)]] void bar1() { }

}];
}
Expand Down
9 changes: 9 additions & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -10304,6 +10304,9 @@ class Sema final {
SYCLIntelFPGAInitiationIntervalAttr *MergeSYCLIntelFPGAInitiationIntervalAttr(
Decl *D, const SYCLIntelFPGAInitiationIntervalAttr &A);

SYCLIntelFPGAMaxConcurrencyAttr *MergeSYCLIntelFPGAMaxConcurrencyAttr(
Decl *D, const SYCLIntelFPGAMaxConcurrencyAttr &A);

/// AddAlignedAttr - Adds an aligned attribute to a particular declaration.
void AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E,
bool IsPackExpansion);
Expand Down Expand Up @@ -10358,6 +10361,12 @@ class Sema final {
/// declaration.
void addSYCLIntelPipeIOAttr(Decl *D, const AttributeCommonInfo &CI, Expr *ID);

/// AddSYCLIntelFPGAMaxConcurrencyAttr - Adds a max_concurrency attribute to a
/// particular declaration.
void AddSYCLIntelFPGAMaxConcurrencyAttr(Decl *D,
const AttributeCommonInfo &CI,
Expr *E);

bool checkNSReturnsRetainedReturnType(SourceLocation loc, QualType type);
bool checkAllowedSYCLInitializer(VarDecl *VD,
bool CheckValueDependent = false);
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,14 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
Fn->setMetadata("stall_enable", llvm::MDNode::get(Context, AttrMDArgs));
}

if (const auto *A = FD->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>()) {
const auto *CE = cast<ConstantExpr>(A->getNThreadsExpr());
llvm::APSInt ArgVal = CE->getResultAsAPSInt();
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(ArgVal.getSExtValue()))};
Fn->setMetadata("max_concurrency", llvm::MDNode::get(Context, AttrMDArgs));
}

if (FD->hasAttr<SYCLIntelFPGADisableLoopPipeliningAttr>()) {
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(1))};
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2628,6 +2628,8 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
NewAttr = S.MergeSYCLIntelNoGlobalWorkOffsetAttr(D, *A);
else if (const auto *A = dyn_cast<IntelFPGAMaxReplicatesAttr>(Attr))
NewAttr = S.MergeIntelFPGAMaxReplicatesAttr(D, *A);
else if (const auto *A = dyn_cast<SYCLIntelFPGAMaxConcurrencyAttr>(Attr))
NewAttr = S.MergeSYCLIntelFPGAMaxConcurrencyAttr(D, *A);
else if (const auto *A = dyn_cast<IntelFPGAForcePow2DepthAttr>(Attr))
NewAttr = S.MergeIntelFPGAForcePow2DepthAttr(D, *A);
else if (const auto *A = dyn_cast<SYCLIntelFPGAInitiationIntervalAttr>(Attr))
Expand Down
73 changes: 73 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6435,6 +6435,76 @@ static void handleSYCLIntelPipeIOAttr(Sema &S, Decl *D,
S.addSYCLIntelPipeIOAttr(D, Attr, E);
}

SYCLIntelFPGAMaxConcurrencyAttr *Sema::MergeSYCLIntelFPGAMaxConcurrencyAttr(
Decl *D, const SYCLIntelFPGAMaxConcurrencyAttr &A) {
// Check to see if there's a duplicate attribute with different values
// already applied to the declaration.
if (const auto *DeclAttr = D->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>()) {
const auto *DeclExpr = dyn_cast<ConstantExpr>(DeclAttr->getNThreadsExpr());
const auto *MergeExpr = dyn_cast<ConstantExpr>(A.getNThreadsExpr());
if (DeclExpr && MergeExpr &&
DeclExpr->getResultAsAPSInt() != MergeExpr->getResultAsAPSInt()) {
Diag(DeclAttr->getLoc(), diag::warn_duplicate_attribute) << &A;
Diag(A.getLoc(), diag::note_previous_attribute);
}
return nullptr;
}
// FIXME
// max_concurrency and disable_component_pipelining attributes can't be
// applied to the same function. Upcoming patch needs to have this code
// added to it:
// if (checkAttrMutualExclusion<IntelDisableComponentPipeline>(S, D, AL))
// return;

return ::new (Context)
SYCLIntelFPGAMaxConcurrencyAttr(Context, A, A.getNThreadsExpr());
}

void Sema::AddSYCLIntelFPGAMaxConcurrencyAttr(Decl *D,
const AttributeCommonInfo &CI,
Expr *E) {
if (!E->isValueDependent()) {
llvm::APSInt ArgVal;
ExprResult Res = VerifyIntegerConstantExpression(E, &ArgVal);
if (Res.isInvalid())
return;
E = Res.get();

// This attribute requires a non-negative value.
if (ArgVal < 0) {
Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer)
<< CI << /*non-negative*/ 1;
return;
}

if (const auto *DeclAttr = D->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>()) {
const auto *DeclExpr =
dyn_cast<ConstantExpr>(DeclAttr->getNThreadsExpr());
if (DeclExpr && ArgVal != DeclExpr->getResultAsAPSInt()) {
Diag(CI.getLoc(), diag::warn_duplicate_attribute) << CI;
Diag(DeclAttr->getLoc(), diag::note_previous_attribute);
}
return;
}
}

D->addAttr(::new (Context) SYCLIntelFPGAMaxConcurrencyAttr(Context, CI, E));
}

static void handleSYCLIntelFPGAMaxConcurrencyAttr(Sema &S, Decl *D,
const ParsedAttr &A) {
S.CheckDeprecatedSYCLAttributeSpelling(A);
// FIXME
// max_concurrency and disable_component_pipelining attributes can't be
// applied to the same function. Upcoming patch needs to have this code
// added to it:
// if (checkAttrMutualExclusion<IntelDisableComponentPipeline>(S, D, AL))
// return;

Expr *E = A.getArgAsExpr(0);
S.AddSYCLIntelFPGAMaxConcurrencyAttr(D, A, E);
}

namespace {
struct IntrinToName {
uint32_t Id;
Expand Down Expand Up @@ -9689,6 +9759,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case ParsedAttr::AT_SYCLIntelPipeIO:
handleSYCLIntelPipeIOAttr(S, D, AL);
break;
case ParsedAttr::AT_SYCLIntelFPGAMaxConcurrency:
handleSYCLIntelFPGAMaxConcurrencyAttr(S, D, AL);
break;

// Swift attributes.
case ParsedAttr::AT_SwiftAsyncName:
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,13 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
}
}

// Attribute "max_concurrency" is applied to device functions only. The
// attribute is not propagated to the caller.
if (auto *A = FD->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>())
if (ParentFD == SYCLKernel) {
Attrs.push_back(A);
}

// Attribute "disable_loop_pipelining" can be applied explicitly on
// kernel function. Attribute should not be propagated from device
// functions to kernel.
Expand Down Expand Up @@ -3535,6 +3542,7 @@ void Sema::MarkDevice(void) {
case attr::Kind::SYCLIntelNoGlobalWorkOffset:
case attr::Kind::SYCLIntelUseStallEnableClusters:
case attr::Kind::SYCLIntelLoopFuse:
case attr::Kind::SYCLIntelFPGAMaxConcurrency:
case attr::Kind::SYCLIntelFPGADisableLoopPipelining:
case attr::Kind::SYCLIntelFPGAInitiationInterval:
case attr::Kind::SYCLSimd: {
Expand Down
15 changes: 15 additions & 0 deletions clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,16 @@ static void instantiateIntelSYCLFunctionAttr(
S.addIntelSingleArgAttr<AttrName>(New, *Attr, Result.getAs<Expr>());
}

static void instantiateSYCLIntelFPGAMaxConcurrencyAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const SYCLIntelFPGAMaxConcurrencyAttr *A, Decl *New) {
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult Result = S.SubstExpr(A->getNThreadsExpr(), TemplateArgs);
if (!Result.isInvalid())
S.AddSYCLIntelFPGAMaxConcurrencyAttr(New, *A, Result.getAs<Expr>());
}

static void instantiateIntelFPGAPrivateCopiesAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const IntelFPGAPrivateCopiesAttr *A, Decl *New) {
Expand Down Expand Up @@ -949,6 +959,11 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
*this, TemplateArgs, SYCLIntelMaxWorkGroupSize, New);
continue;
}
if (const auto *SYCLIntelMaxConcurrency =
dyn_cast<SYCLIntelFPGAMaxConcurrencyAttr>(TmplAttr)) {
instantiateSYCLIntelFPGAMaxConcurrencyAttr(*this, TemplateArgs,
SYCLIntelMaxConcurrency, New);
}
if (const auto *SYCLIntelFPGAInitiationInterval =
dyn_cast<SYCLIntelFPGAInitiationIntervalAttr>(TmplAttr)) {
instantiateSYCLIntelFPGAInitiationIntervalAttr(
Expand Down
141 changes: 141 additions & 0 deletions clang/test/CodeGenSYCL/max-concurrency.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown-sycldevice -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s

#include "sycl.hpp"

// CHECK-LABEL: define{{.*}}@_Z15max_concurrencyILi5EEvv()
// CHECK: entry:
// CHECK: [[A:%.*]] = alloca [10 x i32], align 4
// CHECK: [[A_CAST:%.*]] = addrspacecast [10 x i32]* [[A]] to [10 x i32] addrspace(4)*
// CHECK: %4 = load i32, i32 addrspace(4)* %i.ascast, align 4
// CHECK: [[IDXPROM:%*]] = sext i32 %4 to i64
// CHECK: %arrayidx = getelementptr inbounds [10 x i32], [10 x i32] addrspace(4)* [[A_CAST]], i64 0, i64 [[IDXPROM]]
// CHECK: store i32 0, i32 addrspace(4)* %arrayidx, align 4
// CHECK: [[TMP2:%.*]] = load i32, i32 addrspace(4)* %i.ascast, align 4
// CHECK: %inc = add nsw i32 [[TMP2]], 1
// CHECK: store i32 %inc, i32 addrspace(4)* %i.ascast, align 4
// CHECK: br label %for.cond, !llvm.loop ![[MD_MC:[0-9]+]]
// CHECK: store i32 %inc10, i32 addrspace(4)* %i1.ascast, align 4
// CHECK: br label %for.cond2, !llvm.loop ![[MD_MC_1:[0-9]+]]
// CHECK: ret void

// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name1"() [[ATTR0:#.*]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]]
// CHECK: entry:
// CHECK: [[F1:%.*]] = alloca [[CLASS_F1:%.*]], align 1
// CHECK: [[F1_ASCAST:%.*]] = addrspacecast [[CLASS_F1]]* [[F1]] to [[CLASS_F1]] addrspace(4)*
// CHECK: [[TMP0:%.*]] = bitcast [[CLASS_F1]]* [[F1]] to i8*
// CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP0]])
// CHECK: call spir_func void @_ZNK8Functor1clEv([[CLASS_F1]] addrspace(4)* dereferenceable_or_null(1) [[F1_ASCAST]])
// CHECK: [[TMP1:%.*]] = bitcast [[CLASS_F1]]* [[F1]] to i8*
// CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[TMP1]])
// CHECK: ret void

// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name4"() [[ATTR0]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]]
// CHECK: entry
// CHECK: [[F3:%.*]] = alloca [[CLASS_F3:%.*]], align 1
// CHECK: [[F3_ASCAST:%.*]] = addrspacecast [[CLASS_F3]]* [[F3]] to [[CLASS_F3]] addrspace(4)*
// CHECK: [[TMP2:%.*]] = bitcast [[CLASS_F3]]* [[F3]] to i8*
// CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP2]])
// CHECK: call spir_func void @_ZNK8Functor3ILi4EEclEv([[CLASS_F3]] addrspace(4)* dereferenceable_or_null(1) [[F3_ASCAST]])
// CHECK: [[TMP3:%.*]] = bitcast [[CLASS_F3]]* [[F3]] to i8*
// CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[TMP3]]
// CHECK: ret void

// CHECK: define linkonce_odr spir_func void @_ZNK8Functor3ILi4EEclEv
// CHECK: entry:
// CHECK: [[ADDR_1:%.*]] = alloca [[CLASS_F3:%.*]] addrspace(4)*, align 8
// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast [[CLASS_F3]] addrspace(4)** [[ADDR_1]] to [[CLASS_F3]] addrspace(4)* addrspace(4)*
// CHECK: store [[CLASS_F3]] addrspace(4)* %this, [[CLASS_F3]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8
// CHECK: %this1 = load [[CLASS_F3]] addrspace(4)*, [[CLASS_F3]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8
// CHECK: ret void

// CHECK: define dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE12kernel_name5"()
// CHECK: entry:
// CHECK: [[H1:%.*]] = alloca [[H:%.*]], align 1
// CHECK: [[H2:%.*]] = addrspacecast [[H]]* [[H1]] to [[H]] addrspace(4)*
// CHECK: [[H3:%.*]] = bitcast [[H]]* [[H1]] to i8*
// CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[H3]])
// CHECK: call spir_func void @"_ZZZ4mainENK3$_1clERN2cl4sycl7handlerEENKUlvE_clEv"([[H]] addrspace(4)* dereferenceable_or_null(1) [[H2]])
// CHECK: [[TMP4:%.*]] = bitcast [[H]]* [[H1]] to i8*
// CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[TMP4]])
// CHECK: ret void

// CHECK: define {{.*}}spir_func void @"_ZZZ4mainENK3$_1clERN2cl4sycl7handlerEENKUlvE_clEv"
// CHECK: entry:
// CHECK: [[ADDR_1:%.*]] = alloca [[HH:%.*]] addrspace(4)*, align 8
// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast [[HH]] addrspace(4)** [[ADDR_1]] to [[HH]] addrspace(4)* addrspace(4)*
// CHECK: store [[HH]] addrspace(4)* %this, [[HH]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8
// CHECK: %this1 = load [[HH]] addrspace(4)*, [[HH]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8
// CHECK: call spir_func void @_Z4funcILi2EEvv()
// CHECK: ret void

template <int A>
void max_concurrency() {
int a[10];
// CHECK: ![[MD_MC]] = distinct !{![[MD_MC]], ![[MP:[0-9]+]], ![[MD_max_concurrency:[0-9]+]]}
// CHECK-NEXT: ![[MP]] = !{!"llvm.loop.mustprogress"}
// CHECK-NEXT: ![[MD_max_concurrency]] = !{!"llvm.loop.max_concurrency.count", i32 5}
[[intel::max_concurrency(A)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
// CHECK: ![[MD_MC_1]] = distinct !{![[MD_MC_1]], ![[MP]], ![[MD_max_concurrency_1:[0-9]+]]}
// CHECK-NEXT: ![[MD_max_concurrency_1]] = !{!"llvm.loop.max_concurrency.count", i32 4}
[[intel::max_concurrency(4)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
}

// CHECK: ![[NUM1]] = !{i32 4}

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel_single_task_1(const Func &kernelFunc) {
kernelFunc();
}

using namespace cl::sycl;

class Functor1 {
public:
[[intel::max_concurrency(4)]] void operator()() const {}
};

[[intel::max_concurrency(2)]] void foo() {}

class Functor2 {
public:
void operator()() const {
foo();
}
};
template <int NT>
class Functor3 {
public:
[[intel::max_concurrency(NT)]] void operator()() const {}
};

template <int NT>
[[intel::max_concurrency(NT)]] void func() {}

int main() {
queue q;

kernel_single_task_1<class kernel_function>([]() {
max_concurrency<5>();
});

q.submit([&](handler &h) {
Functor1 f1;
h.single_task<class kernel_name1>(f1);

Functor2 f2;
h.single_task<class kernel_name2>(f2);

Functor3<4> f3;
h.single_task<class kernel_name4>(f3);

h.single_task<class kernel_name5>([]() {
func<2>();
});

});


return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@
// CHECK-NEXT: SYCLDeviceIndirectlyCallable (SubjectMatchRule_function)
// CHECK-NEXT: SYCLIntelFPGADisableLoopPipelining (SubjectMatchRule_function)
// CHECK-NEXT: SYCLIntelFPGAInitiationInterval (SubjectMatchRule_function)
// CHECK-NEXT: SYCLIntelFPGAMaxConcurrency (SubjectMatchRule_function)
// CHECK-NEXT: SYCLIntelKernelArgsRestrict (SubjectMatchRule_function)
// CHECK-NEXT: SYCLIntelLoopFuse (SubjectMatchRule_function)
// CHECK-NEXT: SYCLIntelMaxGlobalWorkDim (SubjectMatchRule_function)
Expand Down
Loading