intel · bader · Dec 6, 2019 · Nov 8, 2019
@@ -1123,6 +1123,17 @@ def SYCLIntelNumSimdWorkItems : InheritableAttr {
   let PragmaAttributeSupport = 0;
 }
 
+def SYCLIntelMaxWorkGroupSize : InheritableAttr {
+  let Spellings = [CXX11<"intelfpga","max_work_group_size">];
+  let Args = [UnsignedArgument<"XDim">,
+              UnsignedArgument<"YDim">,
+              UnsignedArgument<"ZDim">];
+  let LangOpts = [SYCLIsDevice, SYCLIsHost];
+  let Subjects = SubjectList<[Function], ErrorDiag>;
+  let Documentation = [SYCLIntelMaxWorkGroupSizeAttrDocs];
+  let PragmaAttributeSupport = 0;
+}
+
 def C11NoReturn : InheritableAttr {
   let Spellings = [Keyword<"_Noreturn">];
   let Subjects = SubjectList<[Function], ErrorDiag>;

@@ -1980,6 +1980,19 @@ device kernel, the attribute is ignored and it is not propagated to a kernel.
   }];
 }
 
+def SYCLIntelMaxWorkGroupSizeAttrDocs : Documentation {
+  let Category = DocCatFunction;
+  let Heading = "max_work_group_size (IntelFPGA)";
+  let Content = [{
+Applies to a device function/lambda function. Indicates the maximum dimensions
+of a work group. Values must be positive integers. This is similar to
+reqd_work_group_size, but allows work groups that are smaller or equal to the
+specified sizes.
+If ``intelfpga::max_work_group_size`` is applied to a function called from a
+device kernel, the attribute is ignored and it is not propagated to a kernel.
+  }];
+}
+
 def SYCLFPGAPipeDocs : Documentation {
   let Category = DocCatStmt;
   let Heading = "pipe (read_only, write_only)";

@@ -155,7 +155,8 @@ class AttributeCommonInfo {
     auto ParsedAttr = getParsedKind();
     if (ParsedAttr == AT_SYCLIntelKernelArgsRestrict ||
         (ParsedAttr == AT_ReqdWorkGroupSize && isCXX11Attribute()) ||
-        ParsedAttr == AT_SYCLIntelNumSimdWorkItems)
+        ParsedAttr == AT_SYCLIntelNumSimdWorkItems ||
+        ParsedAttr == AT_SYCLIntelMaxWorkGroupSize)
       return true;
 
     return false;

@@ -10155,6 +10155,9 @@ def err_sycl_non_std_layout_type : Error<
   "kernel parameter has non-standard layout class/struct type">;
 def err_conflicting_sycl_kernel_attributes : Error<
   "conflicting attributes applied to a SYCL kernel">;
+def err_conflicting_sycl_function_attributes : Error<
+   "%0 attribute conflicts with '%1' attribute">;
+
 def err_sycl_attibute_cannot_be_applied_here
     : Error<"%0 attribute cannot be applied to a "
             "%select{static function or function in an anonymous namespace"

@@ -595,6 +595,16 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
     Fn->setMetadata("num_simd_work_items",
                     llvm::MDNode::get(Context, AttrMDArgs));
   }
+
+  if (const SYCLIntelMaxWorkGroupSizeAttr *A =
+      FD->getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
+    llvm::Metadata *AttrMDArgs[] = {
+        llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
+        llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
+        llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
+    Fn->setMetadata("max_work_group_size",
+                    llvm::MDNode::get(Context, AttrMDArgs));
+  }
 }
 
 /// Determine whether the function F ends with a return stmt.

@@ -2856,9 +2856,41 @@ static void handleWeakImportAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   D->addAttr(::new (S.Context) WeakImportAttr(S.Context, AL));
 }
 
-// Handles reqd_work_group_size and work_group_size_hint.
+// Checks correctness of mutual usage of different work_group_size attributes:
+// reqd_work_group_size, max_work_group_size. Values of reqd_work_group_size
+// arguments shall be equal or less than values coming from max_work_group_size.
+static bool checkWorkGroupSizeValues(Sema &S, Decl *D, const ParsedAttr &Attr,
+                                     uint32_t WGSize[3]) {
+  if (const SYCLIntelMaxWorkGroupSizeAttr *A =
+      D->getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
+    if (!(WGSize[0] <= A->getXDim() && WGSize[1] <= A->getYDim() &&
+          WGSize[2] <= A->getZDim())) {
+      S.Diag(Attr.getLoc(), diag::err_conflicting_sycl_function_attributes)
+          << Attr << A->getSpelling();
+      D->setInvalidDecl();
+      return false;
+    }
+  }
+
+  if (const ReqdWorkGroupSizeAttr *A = D->getAttr<ReqdWorkGroupSizeAttr>()) {
+    if (!(WGSize[0] >= A->getXDim() && WGSize[1] >= A->getYDim() &&
+          WGSize[2] >= A->getZDim())) {
+      S.Diag(Attr.getLoc(), diag::err_conflicting_sycl_function_attributes)
+          << Attr << A->getSpelling();
+      D->setInvalidDecl();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Handles reqd_work_group_size, work_group_size_hint and max_work_group_size
 template <typename WorkGroupAttr>
 static void handleWorkGroupSize(Sema &S, Decl *D, const ParsedAttr &AL) {
+  if (D->isInvalidDecl())
+    return;
+
   uint32_t WGSize[3];
   for (unsigned i = 0; i < 3; ++i) {
     const Expr *E = AL.getArgAsExpr(i);
@@ -2872,6 +2904,9 @@ static void handleWorkGroupSize(Sema &S, Decl *D, const ParsedAttr &AL) {
     }
   }
 
+  if (!checkWorkGroupSizeValues(S, D, AL, WGSize))
+    return;
+
   WorkGroupAttr *Existing = D->getAttr<WorkGroupAttr>();
   if (Existing && !(Existing->getXDim() == WGSize[0] &&
                     Existing->getYDim() == WGSize[1] &&
@@ -7442,6 +7477,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case ParsedAttr::AT_ReqdWorkGroupSize:
     handleWorkGroupSize<ReqdWorkGroupSizeAttr>(S, D, AL);
     break;
+  case ParsedAttr::AT_SYCLIntelMaxWorkGroupSize:
+    handleWorkGroupSize<SYCLIntelMaxWorkGroupSizeAttr>(S, D, AL);
+    break;
   case ParsedAttr::AT_IntelReqdSubGroupSize:
     handleSubGroupSize(S, D, AL);
     break;
@@ -7916,6 +7954,9 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D,
     } else if (const auto *A = D->getAttr<WorkGroupSizeHintAttr>()) {
       Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
       D->setInvalidDecl();
+    } else if (const auto *A = D->getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
+      Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
+      D->setInvalidDecl();
     } else if (const auto *A = D->getAttr<VecTypeHintAttr>()) {
       Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
       D->setInvalidDecl();

@@ -428,7 +428,6 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
         Attrs.insert(A);
       if (auto *A = FD->getAttr<ReqdWorkGroupSizeAttr>())
         Attrs.insert(A);
-
       // Allow the following kernel attributes only on lambda functions and
       // function objects that are called directly from a kernel (i.e. the one
       // passed to the parallel_for function). For all other cases,
@@ -449,6 +448,14 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
           FD->dropAttr<SYCLIntelNumSimdWorkItemsAttr>();
         }
       }
+      if (auto *A = FD->getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
+        if (ParentFD == SYCLKernel) {
+          Attrs.insert(A);
+        } else {
+          SemaRef.Diag(A->getLocation(), diag::warn_attribute_ignored) << A;
+          FD->dropAttr<SYCLIntelMaxWorkGroupSizeAttr>();
+        }
+      }
 
       // TODO: vec_len_hint should be handled here
 
@@ -1348,7 +1355,8 @@ void Sema::MarkDevice(void) {
           break;
         }
         case attr::Kind::SYCLIntelKernelArgsRestrict:
-        case attr::Kind::SYCLIntelNumSimdWorkItems: {
+        case attr::Kind::SYCLIntelNumSimdWorkItems:
+        case attr::Kind::SYCLIntelMaxWorkGroupSize: {
           SYCLKernel->addAttr(A);
           break;
         }

@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c++11 -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -fsycl-is-device -emit-llvm -o - %s | FileCheck %s
+
+class Foo {
+public:
+  [[intelfpga::max_work_group_size(1, 1, 1)]] void operator()() {}
+};
+
+template <typename name, typename Func>
+__attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
+  kernelFunc();
+}
+
+void bar() {
+  Foo boo;
+  kernel<class kernel_name1>(boo);
+
+  kernel<class kernel_name2>(
+  []() [[intelfpga::max_work_group_size(8, 8, 8)]] {});
+}
+
+// CHECK: define spir_kernel void @{{.*}}kernel_name1() {{.*}} !max_work_group_size ![[NUM1:[0-9]+]]
+// CHECK: define spir_kernel void @{{.*}}kernel_name2() {{.*}} !max_work_group_size ![[NUM8:[0-9]+]]
+// CHECK: ![[NUM1]] = !{i32 1, i32 1, i32 1}
+// CHECK: ![[NUM8]] = !{i32 8, i32 8, i32 8}
@@ -0,0 +1,78 @@
+// RUN: %clang %s -fsyntax-only -fsycl-device-only -DTRIGGER_ERROR -Xclang -verify
+// RUN: %clang %s -fsyntax-only -Xclang -ast-dump -fsycl-device-only | FileCheck %s
+// RUN: %clang_cc1 -fsycl-is-host -fsyntax-only -verify %s
+
+#ifndef __SYCL_DEVICE_ONLY__
+struct FuncObj {
+  [[intelfpga::max_work_group_size(1, 1, 1)]] // expected-no-diagnostics
+  void operator()() {}
+};
+
+template <typename name, typename Func>
+void kernel(Func kernelFunc) {
+  kernelFunc();
+}
+
+void foo() {
+  kernel<class test_kernel1>(
+      FuncObj());
+}
+
+#else // __SYCL_DEVICE_ONLY__
+
+[[intelfpga::max_work_group_size(2, 2, 2)]] // expected-warning{{'max_work_group_size' attribute ignored}}
+void func_ignore() {}
+
+struct FuncObj {
+  [[intelfpga::max_work_group_size(4, 4, 4)]]
+  void operator()() {}
+};
+
+#ifdef TRIGGER_ERROR
+struct DAFuncObj {
+  [[intelfpga::max_work_group_size(4, 4, 4)]]
+  [[cl::reqd_work_group_size(8, 8, 4)]] // expected-error{{'reqd_work_group_size' attribute conflicts with 'max_work_group_size' attribute}}
+  void operator()() {}
+};
+#endif // TRIGGER_ERROR
+
+template <typename name, typename Func>
+__attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
+  kernelFunc();
+}
+
+int main() {
+  // CHECK-LABEL: FunctionDecl {{.*}} _ZTSZ4mainE12test_kernel1
+  // CHECK:       SYCLIntelMaxWorkGroupSizeAttr {{.*}} 4 4 4
+  kernel<class test_kernel1>(
+      FuncObj());
+
+  // CHECK-LABEL: FunctionDecl {{.*}} _ZTSZ4mainE12test_kernel2
+  // CHECK:       SYCLIntelMaxWorkGroupSizeAttr {{.*}} 8 8 8
+  kernel<class test_kernel2>(
+      []() [[intelfpga::max_work_group_size(8, 8, 8)]] {});
+
+  // CHECK-LABEL: FunctionDecl {{.*}} _ZTSZ4mainE12test_kernel3
+  // CHECK-NOT:   SYCLIntelMaxWorkGroupSizeAttr {{.*}}
+  kernel<class test_kernel3>(
+      []() {func_ignore();});
+
+#ifdef TRIGGER_ERROR
+  [[intelfpga::max_work_group_size(1, 1, 1)]] int Var = 0; // expected-error{{'max_work_group_size' attribute only applies to functions}}
+
+  kernel<class test_kernel4>(
+      []() [[intelfpga::max_work_group_size(0, 1, 3)]] {}); // expected-error{{'max_work_group_size' attribute must be greater than 0}}
+
+  kernel<class test_kernel5>(
+      []() [[intelfpga::max_work_group_size(-8, 8, 1)]] {}); // expected-error{{'max_work_group_size' attribute requires a non-negative integral compile time constant expression}}
+
+  kernel<class test_kernel6>(
+      []() [[intelfpga::max_work_group_size(16, 16, 16),
+             intelfpga::max_work_group_size(2, 2, 2)]] {}); // expected-warning{{attribute 'max_work_group_size' is already applied with different parameters}}
+
+  kernel<class test_kernel7>(
+      DAFuncObj());
+
+#endif // TRIGGER_ERROR
+}
+#endif // __SYCL_DEVICE_ONLY__