Skip to content

[SYCL] Enable template parameter support for ii and max_concurrency attributes #811

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1550,8 +1550,9 @@ def SYCLIntelFPGAIVDep : Attr {

def SYCLIntelFPGAII : Attr {
let Spellings = [CXX11<"intelfpga","ii">];
let Args = [IntArgument<"Interval">];
let Args = [ExprArgument<"IntervalExpr">];
let LangOpts = [SYCLIsDevice, SYCLIsHost];
let HasCustomTypeTransform = 1;
let AdditionalMembers = [{
static const char *getName() {
return "ii";
Expand All @@ -1562,8 +1563,9 @@ def SYCLIntelFPGAII : Attr {

def SYCLIntelFPGAMaxConcurrency : Attr {
let Spellings = [CXX11<"intelfpga","max_concurrency">];
let Args = [IntArgument<"NThreads">];
let Args = [ExprArgument<"NThreadsExpr">];
let LangOpts = [SYCLIsDevice, SYCLIsHost];
let HasCustomTypeTransform = 1;
let AdditionalMembers = [{
static const char *getName() {
return "max_concurrency";
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -1659,6 +1659,9 @@ class Sema {
SYCLIntelFPGAIVDepAttr *
BuildSYCLIntelFPGAIVDepAttr(const AttributeCommonInfo &CI, Expr *Expr1,
Expr *Expr2);
template <typename FPGALoopAttrT>
FPGALoopAttrT *BuildSYCLIntelFPGALoopAttr(const AttributeCommonInfo &A,
Expr *E);

bool CheckQualifiedFunctionForTypeId(QualType T, SourceLocation Loc);

Expand Down
17 changes: 13 additions & 4 deletions clang/lib/CodeGen/CGLoopInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,14 +895,23 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
}

if (IntelFPGAII) {
unsigned ValueInt = IntelFPGAII->getInterval();
if (ValueInt > 0)
setSYCLIInterval(ValueInt);
llvm::APSInt ArgVal(32);
bool IsValid =
IntelFPGAII->getIntervalExpr()->isIntegerConstantExpr(ArgVal, Ctx);
assert(IsValid && "Not an integer constant expression");
(void)IsValid;
setSYCLIInterval(ArgVal.getSExtValue());
}

if (IntelFPGAMaxConcurrency) {
llvm::APSInt ArgVal(32);
bool IsValid =
IntelFPGAMaxConcurrency->getNThreadsExpr()->isIntegerConstantExpr(
ArgVal, Ctx);
assert(IsValid && "Not an integer constant expression");
(void)IsValid;
setSYCLMaxConcurrencyEnable();
setSYCLMaxConcurrencyNThreads(IntelFPGAMaxConcurrency->getNThreads());
setSYCLMaxConcurrencyNThreads(ArgVal.getSExtValue());
}
}

Expand Down
79 changes: 42 additions & 37 deletions clang/lib/Sema/SemaStmtAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,7 @@ static Attr *handleSuppressAttr(Sema &S, Stmt *St, const ParsedAttr &A,
}

template <typename FPGALoopAttrT>
static Attr *handleIntelFPGALoopAttr(Sema &S, Stmt *St, const ParsedAttr &A) {

static Attr *handleIntelFPGALoopAttr(Sema &S, const ParsedAttr &A) {
if(S.LangOpts.SYCLIsHost)
return nullptr;

Expand All @@ -93,39 +92,7 @@ static Attr *handleIntelFPGALoopAttr(Sema &S, Stmt *St, const ParsedAttr &A) {
}
}

unsigned SafeInterval = 0;

if (NumArgs == 1) {
Expr *E = A.getArgAsExpr(0);
llvm::APSInt ArgVal(32);

if (!E->isIntegerConstantExpr(ArgVal, S.Context)) {
S.Diag(A.getLoc(), diag::err_attribute_argument_type)
<< A << AANT_ArgumentIntegerConstant << E->getSourceRange();
return nullptr;
}

int Val = ArgVal.getSExtValue();

if (A.getKind() != ParsedAttr::AT_SYCLIntelFPGAMaxConcurrency) {
if (Val <= 0) {
S.Diag(A.getRange().getBegin(),
diag::warn_attribute_requires_positive_integer)
<< A << /* positive */ 0;
return nullptr;
}
} else {
if (Val < 0) {
S.Diag(A.getRange().getBegin(),
diag::warn_attribute_requires_positive_integer)
<< A << /* non-negative */ 1;
return nullptr;
}
}
SafeInterval = Val;
}

return FPGALoopAttrT::CreateImplicit(S.Context, SafeInterval);
return S.BuildSYCLIntelFPGALoopAttr<FPGALoopAttrT>(A, A.getArgAsExpr(0));
}

static bool checkSYCLIntelFPGAIVDepSafeLen(Sema &S, llvm::APSInt &Value,
Expand Down Expand Up @@ -207,6 +174,44 @@ Sema::BuildSYCLIntelFPGAIVDepAttr(const AttributeCommonInfo &CI, Expr *Expr1,
SYCLIntelFPGAIVDepAttr(Context, CI, SafeLenExpr, ArrayExpr, SafelenValue);
}

template <typename FPGALoopAttrT>
FPGALoopAttrT *Sema::BuildSYCLIntelFPGALoopAttr(const AttributeCommonInfo &A,
Expr *E) {
if (!E)
return nullptr;

if (!E->isInstantiationDependent()) {
llvm::APSInt ArgVal(32);

if (!E->isIntegerConstantExpr(ArgVal, getASTContext())) {
Diag(E->getExprLoc(), diag::err_attribute_argument_type)
<< A.getAttrName() << AANT_ArgumentIntegerConstant
<< E->getSourceRange();
return nullptr;
}

int Val = ArgVal.getSExtValue();

if (A.getParsedKind() == ParsedAttr::AT_SYCLIntelFPGAII) {
if (Val <= 0) {
Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer)
<< "'ii'" << /* positive */ 0;
return nullptr;
}
} else if (A.getParsedKind() ==
ParsedAttr::AT_SYCLIntelFPGAMaxConcurrency) {
if (Val < 0) {
Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer)
<< "'max_concurrency'" << /* non-negative */ 1;
return nullptr;
}
} else {
llvm_unreachable("unknown sycl fpga loop attr");
}
}

return new (Context) FPGALoopAttrT(Context, A, E);
}
// Filters out any attributes from the list that are either not the specified
// type, or whose function isDependent returns true.
template <typename T>
Expand Down Expand Up @@ -611,9 +616,9 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A,
case ParsedAttr::AT_SYCLIntelFPGAIVDep:
return handleIntelFPGAIVDepAttr(S, A);
case ParsedAttr::AT_SYCLIntelFPGAII:
return handleIntelFPGALoopAttr<SYCLIntelFPGAIIAttr>(S, St, A);
return handleIntelFPGALoopAttr<SYCLIntelFPGAIIAttr>(S, A);
case ParsedAttr::AT_SYCLIntelFPGAMaxConcurrency:
return handleIntelFPGALoopAttr<SYCLIntelFPGAMaxConcurrencyAttr>(S, St, A);
return handleIntelFPGALoopAttr<SYCLIntelFPGAMaxConcurrencyAttr>(S, A);
case ParsedAttr::AT_OpenCLUnrollHint:
return handleLoopUnrollHint<OpenCLUnrollHintAttr>(S, St, A, Range);
case ParsedAttr::AT_LoopUnrollHint:
Expand Down
22 changes: 22 additions & 0 deletions clang/lib/Sema/SemaTemplateInstantiate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,11 @@ namespace {
const LoopHintAttr *TransformLoopHintAttr(const LoopHintAttr *LH);
const SYCLIntelFPGAIVDepAttr *
TransformSYCLIntelFPGAIVDepAttr(const SYCLIntelFPGAIVDepAttr *IV);
const SYCLIntelFPGAIIAttr *
TransformSYCLIntelFPGAIIAttr(const SYCLIntelFPGAIIAttr *II);
const SYCLIntelFPGAMaxConcurrencyAttr *
TransformSYCLIntelFPGAMaxConcurrencyAttr(
const SYCLIntelFPGAMaxConcurrencyAttr *MC);

ExprResult TransformPredefinedExpr(PredefinedExpr *E);
ExprResult TransformDeclRefExpr(DeclRefExpr *E);
Expand Down Expand Up @@ -1356,6 +1361,23 @@ TemplateInstantiator::TransformSYCLIntelFPGAIVDepAttr(
return getSema().BuildSYCLIntelFPGAIVDepAttr(*IVDep, Expr1, Expr2);
}

const SYCLIntelFPGAIIAttr *TemplateInstantiator::TransformSYCLIntelFPGAIIAttr(
const SYCLIntelFPGAIIAttr *II) {
Expr *TransformedExpr =
getDerived().TransformExpr(II->getIntervalExpr()).get();
return getSema().BuildSYCLIntelFPGALoopAttr<SYCLIntelFPGAIIAttr>(
*II, TransformedExpr);
}

const SYCLIntelFPGAMaxConcurrencyAttr *
TemplateInstantiator::TransformSYCLIntelFPGAMaxConcurrencyAttr(
const SYCLIntelFPGAMaxConcurrencyAttr *MC) {
Expr *TransformedExpr =
getDerived().TransformExpr(MC->getNThreadsExpr()).get();
return getSema().BuildSYCLIntelFPGALoopAttr<SYCLIntelFPGAMaxConcurrencyAttr>(
*MC, TransformedExpr);
}

ExprResult TemplateInstantiator::transformNonTypeTemplateParmRef(
NonTypeTemplateParmDecl *parm,
SourceLocation loc,
Expand Down
36 changes: 36 additions & 0 deletions clang/test/CodeGenSYCL/intel-fpga-loops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

// CHECK: br label %for.cond, !llvm.loop ![[MD_A:[0-9]+]]
// CHECK: br label %for.cond, !llvm.loop ![[MD_B:[0-9]+]]
// CHECK: br label %for.cond, !llvm.loop ![[MD_C:[0-9]+]]
// CHECK: br label %for.cond2, !llvm.loop ![[MD_D:[0-9]+]]
// CHECK: br label %for.cond, !llvm.loop ![[MD_E:[0-9]+]]
// CHECK: br label %for.cond2, !llvm.loop ![[MD_F:[0-9]+]]

// CHECK: ![[MD_A]] = distinct !{![[MD_A]], ![[MD_ii:[0-9]+]]}
// CHECK-NEXT: ![[MD_ii]] = !{!"llvm.loop.ii.count", i32 2}
Expand All @@ -21,6 +25,36 @@ void zoo() {
a[i] = 0;
}

// CHECK: ![[MD_C]] = distinct !{![[MD_C]], ![[MD_ii_2:[0-9]+]]}
// CHECK-NEXT: ![[MD_ii_2]] = !{!"llvm.loop.ii.count", i32 4}
template <int A>
void boo() {
int a[10];
[[intelfpga::ii(A)]]
for (int i = 0; i != 10; ++i)
a[i] = 0;
// CHECK: ![[MD_D]] = distinct !{![[MD_D]], ![[MD_ii_3:[0-9]+]]}
// CHECK-NEXT: ![[MD_ii_3]] = !{!"llvm.loop.ii.count", i32 8}
[[intelfpga::ii(8)]]
for (int i = 0; i != 10; ++i)
a[i] = 0;
}

// CHECK: ![[MD_E]] = distinct !{![[MD_E]], ![[MD_max_concurrency_2:[0-9]+]]}
// CHECK-NEXT: ![[MD_max_concurrency_2]] = !{!"llvm.loop.max_concurrency.count", i32 0}
template <int B>
void foo() {
int a[10];
[[intelfpga::max_concurrency(B)]]
for (int i = 0; i != 10; ++i)
a[i] = 0;
// CHECK: ![[MD_F]] = distinct !{![[MD_F]], ![[MD_max_concurrency_3:[0-9]+]]}
// CHECK-NEXT: ![[MD_max_concurrency_3]] = !{!"llvm.loop.max_concurrency.count", i32 4}
[[intelfpga::max_concurrency(4)]]
for (int i = 0; i != 10; ++i)
a[i] = 0;
}

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) {
kernelFunc();
Expand All @@ -30,6 +64,8 @@ int main() {
kernel_single_task<class kernel_function>([]() {
goo();
zoo();
boo<4>();
foo<0>();
});
return 0;
}
38 changes: 36 additions & 2 deletions clang/test/SemaSYCL/intel-fpga-loops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ void goo() {
[[intelfpga::ivdep(0)]]
for (int i = 0; i != 10; ++i)
a[i] = 0;
// expected-warning@+1 {{'ii' attribute requires a positive integral compile time constant expression - attribute ignored}}
// expected-error@+1 {{'ii' attribute requires a positive integral compile time constant expression}}
[[intelfpga::ii(0)]]
for (int i = 0; i != 10; ++i)
a[i] = 0;
// expected-warning@+1 {{'max_concurrency' attribute requires a non-negative integral compile time constant expression - attribute ignored}}
// expected-error@+1 {{'max_concurrency' attribute requires a non-negative integral compile time constant expression}}
[[intelfpga::max_concurrency(-1)]]
for (int i = 0; i != 10; ++i)
a[i] = 0;
Expand Down Expand Up @@ -229,6 +229,36 @@ void ivdep_dependent() {
};
}

template <int A, int B, int C>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you test a non-dependent attribute in a dependent context? I have a feeling that the transforms running on a non-dependent version will cause the int value to be lost.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, int value is lost in this case. Do you have any suggestions why it happens or how to handle this? As I see, it requires more than one-line fix, but unfortunately I have no idea where the error can be hidden.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the best way about this is to just remove the IntervalValue field from the attribute. I don't think it is necessary anyway. It'll simplify a bunch of code, and result in only the Expr needing to be handled everywhere. It means you need to do a touch more work in CGLoopInfo (call isIntegerConstantExpr() on the expr to get the integer value out), but it simplifies the rest of the patch a bunch.

void ii_dependent() {
int a[10];
// expected-error@+1 {{'ii' attribute requires a positive integral compile time constant expression}}
[[intelfpga::ii(C)]]
for (int i = 0; i != 10; ++i)
a[i] = 0;

// expected-error@+1 {{duplicate Intel FPGA loop attribute 'ii'}}
[[intelfpga::ii(A)]]
[[intelfpga::ii(B)]]
for (int i = 0; i != 10; ++i)
a[i] = 0;
}

template <int A, int B, int C>
void max_concurrency_dependent() {
int a[10];
// expected-error@+1 {{'max_concurrency' attribute requires a non-negative integral compile time constant expression}}
[[intelfpga::max_concurrency(C)]]
for (int i = 0; i != 10; ++i)
a[i] = 0;

// expected-error@+1 {{duplicate Intel FPGA loop attribute 'max_concurrency'}}
[[intelfpga::max_concurrency(A)]]
[[intelfpga::max_concurrency(B)]]
for (int i = 0; i != 10; ++i)
a[i] = 0;
}

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) {
kernelFunc();
Expand All @@ -244,6 +274,10 @@ int main() {
//expected-note@-1 +{{in instantiation of function template specialization}}
ivdep_dependent<2, 4, -1>();
//expected-note@-1 +{{in instantiation of function template specialization}}
ii_dependent<2, 4, -1>();
//expected-note@-1 +{{in instantiation of function template specialization}}
max_concurrency_dependent<1, 4, -2>();
//expected-note@-1 +{{in instantiation of function template specialization}}
});
return 0;
}