Skip to content

[SYCL][FPGA] Implement kernel attribute max_work_group_size #883

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,17 @@ def SYCLIntelNumSimdWorkItems : InheritableAttr {
let PragmaAttributeSupport = 0;
}

def SYCLIntelMaxWorkGroupSize : InheritableAttr {
let Spellings = [CXX11<"intelfpga","max_work_group_size">];
let Args = [UnsignedArgument<"XDim">,
UnsignedArgument<"YDim">,
UnsignedArgument<"ZDim">];
let LangOpts = [SYCLIsDevice, SYCLIsHost];
let Subjects = SubjectList<[Function], ErrorDiag>;
let Documentation = [SYCLIntelMaxWorkGroupSizeAttrDocs];
let PragmaAttributeSupport = 0;
}

def C11NoReturn : InheritableAttr {
let Spellings = [Keyword<"_Noreturn">];
let Subjects = SubjectList<[Function], ErrorDiag>;
Expand Down
13 changes: 13 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -1980,6 +1980,19 @@ device kernel, the attribute is ignored and it is not propagated to a kernel.
}];
}

def SYCLIntelMaxWorkGroupSizeAttrDocs : Documentation {
let Category = DocCatFunction;
let Heading = "max_work_group_size (IntelFPGA)";
let Content = [{
Applies to a device function/lambda function. Indicates the maximum dimensions
of a work group. Values must be positive integers. This is similar to
reqd_work_group_size, but allows work groups that are smaller or equal to the
specified sizes.
If ``intelfpga::max_work_group_size`` is applied to a function called from a
device kernel, the attribute is ignored and it is not propagated to a kernel.
}];
}

def SYCLFPGAPipeDocs : Documentation {
let Category = DocCatStmt;
let Heading = "pipe (read_only, write_only)";
Expand Down
3 changes: 2 additions & 1 deletion clang/include/clang/Basic/AttributeCommonInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@ class AttributeCommonInfo {
auto ParsedAttr = getParsedKind();
if (ParsedAttr == AT_SYCLIntelKernelArgsRestrict ||
(ParsedAttr == AT_ReqdWorkGroupSize && isCXX11Attribute()) ||
ParsedAttr == AT_SYCLIntelNumSimdWorkItems)
ParsedAttr == AT_SYCLIntelNumSimdWorkItems ||
ParsedAttr == AT_SYCLIntelMaxWorkGroupSize)
return true;

return false;
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -10155,6 +10155,9 @@ def err_sycl_non_std_layout_type : Error<
"kernel parameter has non-standard layout class/struct type">;
def err_conflicting_sycl_kernel_attributes : Error<
"conflicting attributes applied to a SYCL kernel">;
def err_conflicting_sycl_function_attributes : Error<
"%0 attribute conflicts with '%1' attribute">;

def err_sycl_attibute_cannot_be_applied_here
: Error<"%0 attribute cannot be applied to a "
"%select{static function or function in an anonymous namespace"
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,16 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
Fn->setMetadata("num_simd_work_items",
llvm::MDNode::get(Context, AttrMDArgs));
}

if (const SYCLIntelMaxWorkGroupSizeAttr *A =
FD->getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
Fn->setMetadata("max_work_group_size",
llvm::MDNode::get(Context, AttrMDArgs));
}
}

/// Determine whether the function F ends with a return stmt.
Expand Down
43 changes: 42 additions & 1 deletion clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2856,9 +2856,41 @@ static void handleWeakImportAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(::new (S.Context) WeakImportAttr(S.Context, AL));
}

// Handles reqd_work_group_size and work_group_size_hint.
// Checks correctness of mutual usage of different work_group_size attributes:
// reqd_work_group_size, max_work_group_size. Values of reqd_work_group_size
// arguments shall be equal or less than values coming from max_work_group_size.
static bool checkWorkGroupSizeValues(Sema &S, Decl *D, const ParsedAttr &Attr,
uint32_t WGSize[3]) {
if (const SYCLIntelMaxWorkGroupSizeAttr *A =
D->getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
if (!(WGSize[0] <= A->getXDim() && WGSize[1] <= A->getYDim() &&
WGSize[2] <= A->getZDim())) {
S.Diag(Attr.getLoc(), diag::err_conflicting_sycl_function_attributes)
<< Attr << A->getSpelling();
D->setInvalidDecl();
return false;
}
}

if (const ReqdWorkGroupSizeAttr *A = D->getAttr<ReqdWorkGroupSizeAttr>()) {
if (!(WGSize[0] >= A->getXDim() && WGSize[1] >= A->getYDim() &&
WGSize[2] >= A->getZDim())) {
S.Diag(Attr.getLoc(), diag::err_conflicting_sycl_function_attributes)
<< Attr << A->getSpelling();
D->setInvalidDecl();
return false;
}
}

return true;
}

// Handles reqd_work_group_size, work_group_size_hint and max_work_group_size
template <typename WorkGroupAttr>
static void handleWorkGroupSize(Sema &S, Decl *D, const ParsedAttr &AL) {
if (D->isInvalidDecl())
return;

uint32_t WGSize[3];
for (unsigned i = 0; i < 3; ++i) {
const Expr *E = AL.getArgAsExpr(i);
Expand All @@ -2872,6 +2904,9 @@ static void handleWorkGroupSize(Sema &S, Decl *D, const ParsedAttr &AL) {
}
}

if (!checkWorkGroupSizeValues(S, D, AL, WGSize))
return;

WorkGroupAttr *Existing = D->getAttr<WorkGroupAttr>();
if (Existing && !(Existing->getXDim() == WGSize[0] &&
Existing->getYDim() == WGSize[1] &&
Expand Down Expand Up @@ -7442,6 +7477,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case ParsedAttr::AT_ReqdWorkGroupSize:
handleWorkGroupSize<ReqdWorkGroupSizeAttr>(S, D, AL);
break;
case ParsedAttr::AT_SYCLIntelMaxWorkGroupSize:
handleWorkGroupSize<SYCLIntelMaxWorkGroupSizeAttr>(S, D, AL);
break;
case ParsedAttr::AT_IntelReqdSubGroupSize:
handleSubGroupSize(S, D, AL);
break;
Expand Down Expand Up @@ -7916,6 +7954,9 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D,
} else if (const auto *A = D->getAttr<WorkGroupSizeHintAttr>()) {
Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
D->setInvalidDecl();
} else if (const auto *A = D->getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
D->setInvalidDecl();
} else if (const auto *A = D->getAttr<VecTypeHintAttr>()) {
Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
D->setInvalidDecl();
Expand Down
12 changes: 10 additions & 2 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,6 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
Attrs.insert(A);
if (auto *A = FD->getAttr<ReqdWorkGroupSizeAttr>())
Attrs.insert(A);

// Allow the following kernel attributes only on lambda functions and
// function objects that are called directly from a kernel (i.e. the one
// passed to the parallel_for function). For all other cases,
Expand All @@ -449,6 +448,14 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
FD->dropAttr<SYCLIntelNumSimdWorkItemsAttr>();
}
}
if (auto *A = FD->getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
if (ParentFD == SYCLKernel) {
Attrs.insert(A);
} else {
SemaRef.Diag(A->getLocation(), diag::warn_attribute_ignored) << A;
FD->dropAttr<SYCLIntelMaxWorkGroupSizeAttr>();
}
}

// TODO: vec_len_hint should be handled here

Expand Down Expand Up @@ -1348,7 +1355,8 @@ void Sema::MarkDevice(void) {
break;
}
case attr::Kind::SYCLIntelKernelArgsRestrict:
case attr::Kind::SYCLIntelNumSimdWorkItems: {
case attr::Kind::SYCLIntelNumSimdWorkItems:
case attr::Kind::SYCLIntelMaxWorkGroupSize: {
SYCLKernel->addAttr(A);
break;
}
Expand Down
24 changes: 24 additions & 0 deletions clang/test/CodeGenSYCL/intel-max-work-group-size.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 -std=c++11 -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -fsycl-is-device -emit-llvm -o - %s | FileCheck %s

class Foo {
public:
[[intelfpga::max_work_group_size(1, 1, 1)]] void operator()() {}
};

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
kernelFunc();
}

void bar() {
Foo boo;
kernel<class kernel_name1>(boo);

kernel<class kernel_name2>(
[]() [[intelfpga::max_work_group_size(8, 8, 8)]] {});
}

// CHECK: define spir_kernel void @{{.*}}kernel_name1() {{.*}} !max_work_group_size ![[NUM1:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name2() {{.*}} !max_work_group_size ![[NUM8:[0-9]+]]
// CHECK: ![[NUM1]] = !{i32 1, i32 1, i32 1}
// CHECK: ![[NUM8]] = !{i32 8, i32 8, i32 8}
78 changes: 78 additions & 0 deletions clang/test/SemaSYCL/intel-max-work-group-size.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// RUN: %clang %s -fsyntax-only -fsycl-device-only -DTRIGGER_ERROR -Xclang -verify
// RUN: %clang %s -fsyntax-only -Xclang -ast-dump -fsycl-device-only | FileCheck %s
// RUN: %clang_cc1 -fsycl-is-host -fsyntax-only -verify %s

#ifndef __SYCL_DEVICE_ONLY__
struct FuncObj {
[[intelfpga::max_work_group_size(1, 1, 1)]] // expected-no-diagnostics
void operator()() {}
};

template <typename name, typename Func>
void kernel(Func kernelFunc) {
kernelFunc();
}

void foo() {
kernel<class test_kernel1>(
FuncObj());
}

#else // __SYCL_DEVICE_ONLY__

[[intelfpga::max_work_group_size(2, 2, 2)]] // expected-warning{{'max_work_group_size' attribute ignored}}
void func_ignore() {}

struct FuncObj {
[[intelfpga::max_work_group_size(4, 4, 4)]]
void operator()() {}
};

#ifdef TRIGGER_ERROR
struct DAFuncObj {
[[intelfpga::max_work_group_size(4, 4, 4)]]
[[cl::reqd_work_group_size(8, 8, 4)]] // expected-error{{'reqd_work_group_size' attribute conflicts with 'max_work_group_size' attribute}}
void operator()() {}
};
#endif // TRIGGER_ERROR

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
kernelFunc();
}

int main() {
// CHECK-LABEL: FunctionDecl {{.*}} _ZTSZ4mainE12test_kernel1
// CHECK: SYCLIntelMaxWorkGroupSizeAttr {{.*}} 4 4 4
kernel<class test_kernel1>(
FuncObj());

// CHECK-LABEL: FunctionDecl {{.*}} _ZTSZ4mainE12test_kernel2
// CHECK: SYCLIntelMaxWorkGroupSizeAttr {{.*}} 8 8 8
kernel<class test_kernel2>(
[]() [[intelfpga::max_work_group_size(8, 8, 8)]] {});

// CHECK-LABEL: FunctionDecl {{.*}} _ZTSZ4mainE12test_kernel3
// CHECK-NOT: SYCLIntelMaxWorkGroupSizeAttr {{.*}}
kernel<class test_kernel3>(
[]() {func_ignore();});

#ifdef TRIGGER_ERROR
[[intelfpga::max_work_group_size(1, 1, 1)]] int Var = 0; // expected-error{{'max_work_group_size' attribute only applies to functions}}

kernel<class test_kernel4>(
[]() [[intelfpga::max_work_group_size(0, 1, 3)]] {}); // expected-error{{'max_work_group_size' attribute must be greater than 0}}

kernel<class test_kernel5>(
[]() [[intelfpga::max_work_group_size(-8, 8, 1)]] {}); // expected-error{{'max_work_group_size' attribute requires a non-negative integral compile time constant expression}}

kernel<class test_kernel6>(
[]() [[intelfpga::max_work_group_size(16, 16, 16),
intelfpga::max_work_group_size(2, 2, 2)]] {}); // expected-warning{{attribute 'max_work_group_size' is already applied with different parameters}}

kernel<class test_kernel7>(
DAFuncObj());

#endif // TRIGGER_ERROR
}
#endif // __SYCL_DEVICE_ONLY__