Skip to content

[SYCL] Add template parameter support for max_global_work_dim attribute #2816

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Nov 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1293,7 +1293,7 @@ def SYCLIntelMaxWorkGroupSize : InheritableAttr {
def SYCLIntelMaxGlobalWorkDim : InheritableAttr {
let Spellings = [CXX11<"intelfpga","max_global_work_dim">,
CXX11<"intel","max_global_work_dim">];
let Args = [UnsignedArgument<"Number">];
let Args = [ExprArgument<"Value">];
let LangOpts = [SYCLIsDevice, SYCLIsHost];
let Subjects = SubjectList<[Function], ErrorDiag>;
let Documentation = [SYCLIntelMaxGlobalWorkDimAttrDocs];
Expand Down
2 changes: 0 additions & 2 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -11134,8 +11134,6 @@ def err_sycl_function_attribute_mismatch : Error<
"SYCL kernel without %0 attribute can't call a function with this attribute">;
def err_sycl_x_y_z_arguments_must_be_one : Error<
"%0 X-, Y- and Z- sizes must be 1 when %1 attribute is used with value 0">;
def err_intel_attribute_argument_is_not_in_range: Error<
"The value of %0 attribute must be in range from 0 to 3">;
def warn_boolean_attribute_argument_is_not_valid: Warning<
"The value of %0 attribute should be 0 or 1. Adjusted to 1">,
InGroup<AdjustedAttributes>;
Expand Down
23 changes: 19 additions & 4 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -12963,10 +12963,25 @@ void Sema::addIntelSYCLSingleArgFunctionAttr(Decl *D,
return;
}
int32_t ArgInt = ArgVal->getSExtValue();
if (ArgInt <= 0) {
Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer)
<< CI.getAttrName() << /*positive*/ 0;
return;
if (CI.getParsedKind() == ParsedAttr::AT_SYCLIntelNumSimdWorkItems ||
CI.getParsedKind() == ParsedAttr::AT_IntelReqdSubGroupSize) {
if (ArgInt <= 0) {
Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer)
<< CI.getAttrName() << /*positive*/ 0;
return;
}
}
if (CI.getParsedKind() == ParsedAttr::AT_SYCLIntelMaxGlobalWorkDim) {
if (ArgInt < 0) {
Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer)
<< CI.getAttrName() << /*non-negative*/ 1;
return;
}
if (ArgInt > 3) {
Diag(E->getBeginLoc(), diag::err_attribute_argument_out_of_range)
<< CI.getAttrName() << 0 << 3 << E->getSourceRange();
return;
}
}
}

Expand Down
8 changes: 6 additions & 2 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -682,8 +682,12 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,

if (const SYCLIntelMaxGlobalWorkDimAttr *A =
FD->getAttr<SYCLIntelMaxGlobalWorkDimAttr>()) {
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getNumber()))};
llvm::LLVMContext &Context = getLLVMContext();
Optional<llvm::APSInt> ArgVal =
A->getValue()->getIntegerConstantExpr(FD->getASTContext());
assert(ArgVal.hasValue() && "Not an integer constant expression");
llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
Builder.getInt32(ArgVal->getSExtValue()))};
Fn->setMetadata("max_global_work_dim",
llvm::MDNode::get(Context, AttrMDArgs));
}
Expand Down
30 changes: 11 additions & 19 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2925,10 +2925,13 @@ static bool checkWorkGroupSizeValues(Sema &S, Decl *D, const ParsedAttr &Attr,
return Result;
}

if (const auto *A = D->getAttr<SYCLIntelMaxGlobalWorkDimAttr>())
if (A->getNumber() == 0)
if (const auto *A = D->getAttr<SYCLIntelMaxGlobalWorkDimAttr>()) {
int64_t AttrValue =
A->getValue()->getIntegerConstantExpr(S.Context)->getSExtValue();
if (AttrValue == 0)
Result &= checkZeroDim(A, WGSize[0], WGSize[1], WGSize[2],
/*ReverseAttrs=*/true);
}

if (const auto *A = D->getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
if (!(WGSize[0] <= A->getXDim() && WGSize[1] <= A->getYDim() &&
Expand Down Expand Up @@ -3087,34 +3090,23 @@ static void handleMaxGlobalWorkDimAttr(Sema &S, Decl *D,
if (D->isInvalidDecl())
return;

uint32_t MaxGlobalWorkDim;
const Expr *E = Attr.getArgAsExpr(0);
if (!checkUInt32Argument(S, Attr, E, MaxGlobalWorkDim, 0,
/*StrictlyUnsigned=*/true))
return;
Expr *E = Attr.getArgAsExpr(0);

if (MaxGlobalWorkDim > 3) {
S.Diag(Attr.getLoc(), diag::err_intel_attribute_argument_is_not_in_range)
<< Attr;
uint32_t WGSize[3] = {1, 1, 1};
if (!checkWorkGroupSizeValues(S, D, Attr, WGSize)) {
D->setInvalidDecl();
return;
}

if (MaxGlobalWorkDim == 0) {
uint32_t WGSize[3] = {1, 1, 1};
if (!checkWorkGroupSizeValues(S, D, Attr, WGSize)) {
D->setInvalidDecl();
return;
}
}
if (D->getAttr<SYCLIntelMaxGlobalWorkDimAttr>())
S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute) << Attr;

if (checkDeprecatedSYCLAttributeSpelling(S, Attr))
S.Diag(Attr.getLoc(), diag::note_spelling_suggestion)
<< "'intel::max_global_work_dim'";

D->addAttr(::new (S.Context) SYCLIntelMaxGlobalWorkDimAttr(
S.Context, Attr, MaxGlobalWorkDim));
S.addIntelSYCLSingleArgFunctionAttr<SYCLIntelMaxGlobalWorkDimAttr>(D, Attr,
E);
}

static void handleVecTypeHint(Sema &S, Decl *D, const ParsedAttr &AL) {
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
*this, TemplateArgs, SYCLIntelSchedulerTargetFmaxMhz, New);
continue;
}
if (const auto *SYCLIntelMaxGlobalWorkDim =
dyn_cast<SYCLIntelMaxGlobalWorkDimAttr>(TmplAttr)) {
instantiateIntelSYCLFunctionAttr<SYCLIntelMaxGlobalWorkDimAttr>(
*this, TemplateArgs, SYCLIntelMaxGlobalWorkDim, New);
continue;
}
// Existing DLL attribute on the instantiation takes precedence.
if (TmplAttr->getKind() == attr::DLLExport ||
TmplAttr->getKind() == attr::DLLImport) {
Expand Down
39 changes: 26 additions & 13 deletions clang/test/CodeGenSYCL/intel-max-global-work-dim.cpp
Original file line number Diff line number Diff line change
@@ -1,24 +1,37 @@
// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s

#include "sycl.hpp"

using namespace cl::sycl;
queue q;

class Foo {
public:
[[intel::max_global_work_dim(1)]] void operator()() const {}
};

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
kernelFunc();
}
template <int SIZE>
class Functor {
public:
[[intel::max_global_work_dim(SIZE)]] void operator()() const {}
};

int main() {
q.submit([&](handler &h) {
Foo boo;
h.single_task<class kernel_name1>(boo);

void bar() {
Foo boo;
kernel<class kernel_name1>(boo);
h.single_task<class kernel_name2>(
[]() [[intel::max_global_work_dim(2)]]{});

kernel<class kernel_name2>(
[]() [[intel::max_global_work_dim(2)]]{});
Functor<2> f;
h.single_task<class kernel_name3>(f);
});
return 0;
}

// CHECK: define spir_kernel void @{{.*}}kernel_name1() {{.*}} !max_global_work_dim ![[NUM1:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name2() {{.*}} !max_global_work_dim ![[NUM8:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name1"() #0 {{.*}} !max_global_work_dim ![[NUM1:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name2"() #0 {{.*}} !max_global_work_dim ![[NUM2:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name3"() #0 {{.*}} !max_global_work_dim ![[NUM2]]
// CHECK: ![[NUM1]] = !{i32 1}
// CHECK: ![[NUM8]] = !{i32 2}
// CHECK: ![[NUM2]] = !{i32 2}
35 changes: 19 additions & 16 deletions clang/test/CodeGenSYCL/num-simd-work-items.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s

#include "sycl.hpp"

using namespace cl::sycl;
queue q;

class Foo {
public:
Expand All @@ -11,25 +16,23 @@ class Functor {
[[intel::num_simd_work_items(SIZE)]] void operator()() const {}
};

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
kernelFunc();
}

void bar() {
Foo boo;
kernel<class kernel_name1>(boo);
int main() {
q.submit([&](handler &h) {
Foo boo;
h.single_task<class kernel_name1>(boo);

kernel<class kernel_name2>(
[]() [[intel::num_simd_work_items(42)]]{});
h.single_task<class kernel_name2>(
[]() [[intel::num_simd_work_items(42)]]{});

Functor<2> f;
kernel<class kernel_name3>(f);
Functor<2> f;
h.single_task<class kernel_name3>(f);
});
return 0;
}

// CHECK: define spir_kernel void @{{.*}}kernel_name1() {{.*}} !num_simd_work_items ![[NUM1:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name2() {{.*}} !num_simd_work_items ![[NUM42:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name3() {{.*}} !num_simd_work_items ![[NUM2:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name1"() #0 {{.*}} !num_simd_work_items ![[NUM1:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name2"() #0 {{.*}} !num_simd_work_items ![[NUM42:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name3"() #0 {{.*}} !num_simd_work_items ![[NUM2:[0-9]+]]
// CHECK: ![[NUM1]] = !{i32 1}
// CHECK: ![[NUM42]] = !{i32 42}
// CHECK: ![[NUM2]] = !{i32 2}
45 changes: 24 additions & 21 deletions clang/test/CodeGenSYCL/reqd-sub-group-size.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
// RUN: %clang_cc1 -fsycl -fsycl-is-device -disable-llvm-passes -triple spir64-unknown-unknown-sycldevice -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown-sycldevice -emit-llvm -o - %s | FileCheck %s

#include "sycl.hpp"

using namespace cl::sycl;
queue q;

class Functor16 {
public:
Expand All @@ -7,42 +12,40 @@ class Functor16 {

[[intel::reqd_sub_group_size(8)]] void foo() {}

class Functor {
class Functor8 {
public:
void operator()() const {
foo();
}
};

template <int SIZE>
class Functor5 {
class Functor2 {
public:
[[intel::reqd_sub_group_size(SIZE)]] void operator()() const {}
};

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
kernelFunc();
}

void bar() {
Functor16 f16;
kernel<class kernel_name1>(f16);
int main() {
q.submit([&](handler &h) {
Functor16 f16;
h.single_task<class kernel_name1>(f16);

Functor f;
kernel<class kernel_name2>(f);
Functor8 f8;
h.single_task<class kernel_name2>(f8);

kernel<class kernel_name3>(
[]() [[intel::reqd_sub_group_size(4)]]{});
h.single_task<class kernel_name3>(
[]() [[intel::reqd_sub_group_size(4)]]{});

Functor5<2> f5;
kernel<class kernel_name4>(f5);
Functor2<2> f2;
h.single_task<class kernel_name4>(f2);
});
return 0;
}

// CHECK: define spir_kernel void @{{.*}}kernel_name1() {{.*}} !intel_reqd_sub_group_size ![[SGSIZE16:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name2() {{.*}} !intel_reqd_sub_group_size ![[SGSIZE8:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name3() {{.*}} !intel_reqd_sub_group_size ![[SGSIZE4:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name4() {{.*}} !intel_reqd_sub_group_size ![[SGSIZE2:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name1"() #0 {{.*}} !intel_reqd_sub_group_size ![[SGSIZE16:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name2"() #0 {{.*}} !intel_reqd_sub_group_size ![[SGSIZE8:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name3"() #0 {{.*}} !intel_reqd_sub_group_size ![[SGSIZE4:[0-9]+]]
// CHECK: define spir_kernel void @{{.*}}kernel_name4"() #0 {{.*}} !intel_reqd_sub_group_size ![[SGSIZE2:[0-9]+]]
// CHECK: ![[SGSIZE16]] = !{i32 16}
// CHECK: ![[SGSIZE8]] = !{i32 8}
// CHECK: ![[SGSIZE4]] = !{i32 4}
Expand Down
Loading