Skip to content

Commit 3c3ca19

Browse files
committed
Merge remote-tracking branch 'intel_llvm/sycl' into llvmspirv_pulldown_ww46-47
2 parents 61e2db1 + 0e28541 commit 3c3ca19

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+1686
-200
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "llvm/Passes/PassBuilder.h"
4343
#include "llvm/Passes/PassPlugin.h"
4444
#include "llvm/Passes/StandardInstrumentations.h"
45+
#include "llvm/SYCLLowerIR/ESIMDVerifier.h"
4546
#include "llvm/SYCLLowerIR/LowerWGLocalMemory.h"
4647
#include "llvm/Support/BuryPointer.h"
4748
#include "llvm/Support/CommandLine.h"
@@ -856,6 +857,9 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
856857
FPM.add(createVerifierPass());
857858

858859
// Set up the per-module pass manager.
860+
if (LangOpts.SYCLIsDevice)
861+
MPM.add(createESIMDVerifierPass());
862+
859863
if (!CodeGenOpts.RewriteMapFiles.empty())
860864
addSymbolRewriterPass(CodeGenOpts, &MPM);
861865

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3509,9 +3509,26 @@ class SYCLKernelNameTypeVisitor
35093509
IsInvalid = true;
35103510
return;
35113511
}
3512-
// Check if the declaration is completely defined within a
3513-
// function or class/struct.
3514-
if (Tag->isCompleteDefinition()) {
3512+
3513+
// Diagnose used types without complete definition i.e.
3514+
// int main() {
3515+
// class KernelName1;
3516+
// parallel_for<class KernelName1>(..);
3517+
// }
3518+
// This case can only be diagnosed during host compilation because the
3519+
// integration header is required to distinguish between the invalid
3520+
// code (above) and the following valid code:
3521+
// int main() {
3522+
// parallel_for<class KernelName2>(..);
3523+
// }
3524+
// The device compiler forward declares both KernelName1 and
3525+
// KernelName2 in the integration header as ::KernelName1 and
3526+
// ::KernelName2. The problem with the former case is the additional
3527+
// declaration 'class KernelName1' in non-global scope. Lookup in this
3528+
// case will resolve to ::main::KernelName1 (instead of
3529+
// ::KernelName1). Since this is not visible to runtime code that
3530+
// submits kernels, this is invalid.
3531+
if (Tag->isCompleteDefinition() || S.getLangOpts().SYCLIsHost) {
35153532
S.Diag(KernelInvocationFuncLoc,
35163533
diag::err_sycl_kernel_incorrectly_named)
35173534
<< /* kernel name should be forward declarable at namespace
@@ -3561,14 +3578,20 @@ class SYCLKernelNameTypeVisitor
35613578

35623579
void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc,
35633580
ArrayRef<const Expr *> Args) {
3581+
QualType KernelNameType =
3582+
calculateKernelNameType(getASTContext(), KernelFunc);
3583+
SYCLKernelNameTypeVisitor KernelNameTypeVisitor(
3584+
*this, Args[0]->getExprLoc(), KernelNameType,
3585+
IsSYCLUnnamedKernel(*this, KernelFunc));
3586+
KernelNameTypeVisitor.Visit(KernelNameType.getCanonicalType());
3587+
35643588
// FIXME: In place until the library works around its 'host' invocation
35653589
// issues.
35663590
if (!LangOpts.SYCLIsDevice)
35673591
return;
3592+
35683593
const CXXRecordDecl *KernelObj =
35693594
GetSYCLKernelObjectType(KernelFunc)->getAsCXXRecordDecl();
3570-
QualType KernelNameType =
3571-
calculateKernelNameType(getASTContext(), KernelFunc);
35723595

35733596
if (!KernelObj) {
35743597
Diag(Args[0]->getExprLoc(), diag::err_sycl_kernel_not_function_object);
@@ -3609,15 +3632,10 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc,
36093632
IsSIMDKernel);
36103633

36113634
KernelObjVisitor Visitor{*this};
3612-
SYCLKernelNameTypeVisitor KernelNameTypeVisitor(
3613-
*this, Args[0]->getExprLoc(), KernelNameType,
3614-
IsSYCLUnnamedKernel(*this, KernelFunc));
36153635

36163636
DiagnosingSYCLKernel = true;
36173637

36183638
// Emit diagnostics for SYCL device kernels only
3619-
if (LangOpts.SYCLIsDevice)
3620-
KernelNameTypeVisitor.Visit(KernelNameType.getCanonicalType());
36213639
Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker, DecompMarker);
36223640
Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker,
36233641
DecompMarker);

clang/test/CodeGenSYCL/loop_fusion_host.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@ __attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
55
kernelFunc();
66
}
77

8+
// This test uses SYCL host only mode without integration header, so
9+
// forward declare used kernel name class, otherwise it will be diagnosed by
10+
// the diagnostic implemented in https://github.com/intel/llvm/pull/4945.
11+
// The error happens because in host mode it is assumed that all kernel names
12+
// are forward declared at global or namespace scope because of integration
13+
// header.
14+
class kernel_name_1;
15+
816
template <int SIZE>
917
class KernelFunctor5 {
1018
public:

clang/test/CodeGenSYCL/stall_enable_host.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22

33
// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Host (no-op in IR-CodeGen for host-mode).
44

5+
// This test uses SYCL host only mode without integration header, so
6+
// forward declare used kernel name class, otherwise it will be diagnosed by
7+
// the diagnostic implemented in https://github.com/intel/llvm/pull/4945.
8+
// The error happens because in host mode it is assumed that all kernel names
9+
// are forward declared at global or namespace scope because of integration
10+
// header.
11+
class kernel_name_1;
12+
513
[[intel::use_stall_enable_clusters]] void test() {}
614

715
void test1() {

clang/test/Driver/sycl-offload-with-split.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@
310310
// RUN: | FileCheck %s -check-prefixes=CHK-NO-SPLIT
311311
// RUN: %clang_cl -### -fsycl -fsycl-device-code-split -fsycl-device-code-split=off %s 2>&1 \
312312
// RUN: | FileCheck %s -check-prefixes=CHK-NO-SPLIT
313-
// CHK-NO-SPLIT-NOT: sycl-post-link{{.*}} -split{{.*}}
313+
// CHK-NO-SPLIT-NOT: sycl-post-link{{.*}} "-split={{.*}}
314314

315315
// Check no device code split mode is passed to sycl-post-link when -fsycl-device-code-split is not set and the target is FPGA
316316
// RUN: %clang -### -fsycl -fsycl-targets=spir64_fpga-unknown-unknown %s 2>&1 | FileCheck %s -check-prefixes=CHK-NO-SPLIT
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#pragma once
2+
3+
#ifndef __SYCL_DISABLE_NAMESPACE_INLINE__
4+
#define __SYCL_INLINE_NAMESPACE(X) inline namespace X
5+
#else
6+
#define __SYCL_INLINE_NAMESPACE(X) namespace X
7+
#endif // __SYCL_DISABLE_NAMESPACE_INLINE__
8+
#define __SYCL_DLL_LOCAL
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#pragma once
2+
3+
#include <CL/sycl/detail/defines_elementary.hpp>
4+
5+
__SYCL_INLINE_NAMESPACE(cl) {
6+
namespace sycl {
7+
namespace detail {
8+
9+
#ifndef __SYCL_DEVICE_ONLY__
10+
#define _Bool bool
11+
#endif
12+
13+
// kernel parameter kinds
14+
enum class kernel_param_kind_t {
15+
kind_accessor = 0,
16+
kind_std_layout = 1, // standard layout object parameters
17+
kind_sampler = 2,
18+
kind_pointer = 3,
19+
kind_specialization_constants_buffer = 4,
20+
kind_stream = 5,
21+
kind_invalid = 0xf, // not a valid kernel kind
22+
};
23+
24+
// describes a kernel parameter
25+
struct kernel_param_desc_t {
26+
// parameter kind
27+
kernel_param_kind_t kind;
28+
// kind == kind_std_layout
29+
// parameter size in bytes (includes padding for structs)
30+
// kind == kind_accessor
31+
// access target; possible access targets are defined in access/access.hpp
32+
int info;
33+
// offset of the captured value of the parameter in the lambda or function
34+
// object
35+
int offset;
36+
};
37+
38+
template <class KernelNameType> struct KernelInfo {
39+
static constexpr unsigned getNumParams() { return 0; }
40+
static const kernel_param_desc_t &getParamDesc(int) {
41+
static kernel_param_desc_t Dummy;
42+
return Dummy;
43+
}
44+
static constexpr const char *getName() { return ""; }
45+
static constexpr bool isESIMD() { return 0; }
46+
static constexpr bool callsThisItem() { return false; }
47+
static constexpr bool callsAnyThisFreeFunction() { return false; }
48+
};
49+
} // namespace detail
50+
} // namespace sycl
51+
} // __SYCL_INLINE_NAMESPACE(cl)

clang/test/SemaSYCL/Inputs/sycl.hpp

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ struct opencl_image_type;
130130
using type = __ocl_image##dim##d_##ifarray_##amsuffix##_t; \
131131
};
132132

133+
#ifdef __SYCL_DEVICE_ONLY__
134+
133135
#define IMAGETY_READ_3_DIM_IMAGE \
134136
IMAGETY_DEFINE(1, read, ro, image, ) \
135137
IMAGETY_DEFINE(2, read, ro, image, ) \
@@ -154,6 +156,8 @@ IMAGETY_WRITE_3_DIM_IMAGE
154156
IMAGETY_READ_2_DIM_IARRAY
155157
IMAGETY_WRITE_2_DIM_IARRAY
156158

159+
#endif // __SYCL_DEVICE_ONLY__
160+
157161
template <int dim, access::mode accessmode, access::target accesstarget>
158162
struct _ImageImplT {
159163
#ifdef __SYCL_DEVICE_ONLY__
@@ -232,60 +236,59 @@ template <typename Type> struct get_kernel_wrapper_name_t {
232236
#define ATTR_SYCL_KERNEL __attribute__((sycl_kernel))
233237
template <typename KernelName, typename KernelType>
234238
ATTR_SYCL_KERNEL void kernel_single_task(const KernelType &kernelFunc) { // #KernelSingleTaskFunc
239+
#ifdef __SYCL_DEVICE_ONLY__
235240
kernelFunc(); // #KernelSingleTaskKernelFuncCall
241+
#else
242+
(void)kernelFunc;
243+
#endif
236244
}
237245
template <typename KernelName, typename KernelType>
238246
ATTR_SYCL_KERNEL void kernel_single_task(const KernelType &kernelFunc, kernel_handler kh) {
247+
#ifdef __SYCL_DEVICE_ONLY__
239248
kernelFunc(kh);
249+
#else
250+
(void)kernelFunc;
251+
#endif
240252
}
241253
template <typename KernelName, typename KernelType>
242254
ATTR_SYCL_KERNEL void kernel_parallel_for(const KernelType &kernelFunc) {
255+
#ifdef __SYCL_DEVICE_ONLY__
243256
kernelFunc();
257+
#else
258+
(void)kernelFunc;
259+
#endif
244260
}
245261
template <typename KernelName, typename KernelType>
246262
ATTR_SYCL_KERNEL void kernel_parallel_for_work_group(const KernelType &KernelFunc, kernel_handler kh) {
263+
#ifdef __SYCL_DEVICE_ONLY__
247264
KernelFunc(group<1>(), kh);
265+
#else
266+
(void)KernelFunc;
267+
#endif
248268
}
249269

250270
class handler {
251271
public:
252272
template <typename KernelName = auto_name, typename KernelType>
253273
void single_task(const KernelType &kernelFunc) {
254274
using NameT = typename get_kernel_name_t<KernelName, KernelType>::name;
255-
#ifdef __SYCL_DEVICE_ONLY__
256275
kernel_single_task<NameT>(kernelFunc); // #KernelSingleTask
257-
#else
258-
kernelFunc();
259-
#endif
260276
}
261277
template <typename KernelName = auto_name, typename KernelType>
262278
void single_task(const KernelType &kernelFunc, kernel_handler kh) {
263279
using NameT = typename get_kernel_name_t<KernelName, KernelType>::name;
264-
#ifdef __SYCL_DEVICE_ONLY__
265280
kernel_single_task<NameT>(kernelFunc, kh);
266-
#else
267-
kernelFunc(kh);
268-
#endif
269281
}
270282
template <typename KernelName = auto_name, typename KernelType>
271283
void parallel_for(const KernelType &kernelObj) {
272284
using NameT = typename get_kernel_name_t<KernelName, KernelType>::name;
273285
using NameWT = typename get_kernel_wrapper_name_t<NameT>::name;
274-
#ifdef __SYCL_DEVICE_ONLY__
275286
kernel_parallel_for<NameT>(kernelObj);
276-
#else
277-
kernelObj();
278-
#endif
279287
}
280288
template <typename KernelName = auto_name, typename KernelType>
281289
void parallel_for_work_group(const KernelType &kernelFunc, kernel_handler kh) {
282290
using NameT = typename get_kernel_name_t<KernelName, KernelType>::name;
283-
#ifdef __SYCL_DEVICE_ONLY__
284291
kernel_parallel_for_work_group<NameT>(kernelFunc, kh);
285-
#else
286-
group<1> G;
287-
kernelFunc(G, kh);
288-
#endif
289292
}
290293
};
291294

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fno-sycl-unnamed-lambda -fsyntax-only -sycl-std=2020 -fsycl-int-header=%t.h %s
2+
// RUN: %clang_cc1 -fsycl-is-host -internal-isystem %S/Inputs -fno-sycl-unnamed-lambda -fsyntax-only -verify -include %t.h %s
3+
4+
// This test verifies that incorrect kernel names are diagnosed correctly.
5+
6+
#include "sycl.hpp"
7+
8+
using namespace cl::sycl;
9+
10+
// user-defined function
11+
void function() {
12+
}
13+
14+
// user-defined struct
15+
struct myWrapper {
16+
class insideStruct;
17+
};
18+
19+
template <typename KernelName> class RandomTemplate;
20+
21+
int main() {
22+
queue q;
23+
24+
q.submit([&](handler &h) {
25+
h.single_task<class Ok>([]() { function(); });
26+
});
27+
q.submit([&](handler &h) {
28+
h.single_task<RandomTemplate<class Ok>>([]() { function(); });
29+
});
30+
31+
class NotOk;
32+
// expected-error@#KernelSingleTask {{'NotOk' is invalid; kernel name should be forward declarable at namespace scope}}
33+
// expected-note@+2 {{in instantiation of function template specialization}}
34+
q.submit([&](handler &h) {
35+
h.single_task<class NotOk>([]() { function(); });
36+
});
37+
// expected-error@#KernelSingleTask {{'myWrapper::insideStruct' is invalid; kernel name should be forward declarable at namespace scope}}
38+
// expected-note@+2 {{in instantiation of function template specialization}}
39+
q.submit([&](handler &h) {
40+
h.single_task<class myWrapper::insideStruct>([]() { function(); });
41+
});
42+
// expected-error@#KernelSingleTask {{'RandomTemplate<NotOk>' is invalid; kernel name should be forward declarable at namespace scope}}
43+
// expected-note@+2 {{in instantiation of function template specialization}}
44+
q.submit([&](handler &h) {
45+
h.single_task<RandomTemplate<NotOk>>([]() { function(); });
46+
});
47+
return 0;
48+
}

libclc/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
322322
endforeach()
323323
endforeach()
324324

325+
# Please NOTE that variables in the foreach are not local and thus need
326+
# to be reset every iteration.
325327
foreach( d ${${t}_devices} )
326328
# Some targets don't have a specific GPU to target
327329
if( ${d} STREQUAL "none" OR ${ARCH} STREQUAL "spirv" OR ${ARCH} STREQUAL "spirv64" )
@@ -330,6 +332,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
330332
# Disables NVVM reflection to defer to after linking
331333
set( flags "SHELL:-Xclang -target-feature" "SHELL:-Xclang +ptx72"
332334
"SHELL:-march=sm_86" "SHELL:-mllvm --nvvm-reflect-enable=false")
335+
else()
336+
set ( flags )
333337
endif()
334338
set( arch_suffix "${t}" )
335339
else()

0 commit comments

Comments
 (0)