Skip to content

Commit 1644539

Browse files
Merge branch 'sycl' into pulldown-ww29
2 parents 81e0105 + d01371b commit 1644539

31 files changed

+483
-166
lines changed

clang/test/SemaSYCL/Inputs/CL/sycl/detail/kernel_desc.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,17 @@ __SYCL_INLINE_NAMESPACE(cl) {
3535
int offset;
3636
};
3737

38+
template <bool Cond, typename TrueT, typename FalseT>
39+
struct conditional {
40+
using type = TrueT;
41+
};
42+
template <typename TrueT, typename FalseT>
43+
struct conditional<false, TrueT, FalseT> {
44+
using type = FalseT;
45+
};
46+
47+
using int64_t = conditional<sizeof(long) == 8, long, long long>::type;
48+
3849
template <class KernelNameType> struct KernelInfo {
3950
static constexpr unsigned getNumParams() { return 0; }
4051
static const kernel_param_desc_t &getParamDesc(int) {
@@ -43,6 +54,7 @@ __SYCL_INLINE_NAMESPACE(cl) {
4354
}
4455
static constexpr const char *getName() { return ""; }
4556
static constexpr bool isESIMD() { return 0; }
57+
static constexpr int64_t getKernelSize() { return 0; }
4658
};
4759
} // namespace detail
4860
} // namespace sycl

libclc/ptx-nvidiacl/libspirv/group/collectives.cl

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,13 +249,34 @@ __CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, half, -HALF_MAX)
249249
__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, float, -FLT_MAX)
250250
__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, double, -DBL_MAX)
251251

252+
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseAndKHR, __CLC_AND, and, uchar, ~0)
253+
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseOrKHR, __CLC_OR, or, uchar, 0)
254+
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseXorKHR, __CLC_XOR, xor, uchar, 0)
255+
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseAndKHR, __CLC_AND, and, char, ~0)
256+
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseOrKHR, __CLC_OR, or, char, 0)
257+
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseXorKHR, __CLC_XOR, xor, char, 0)
258+
259+
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseAndKHR, __CLC_AND, and, ushort, ~0)
260+
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseOrKHR, __CLC_OR, or, ushort, 0)
261+
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseXorKHR, __CLC_XOR, xor, ushort, 0)
262+
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseAndKHR, __CLC_AND, and, short, ~0)
263+
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseOrKHR, __CLC_OR, or, short, 0)
264+
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseXorKHR, __CLC_XOR, xor, short, 0)
265+
252266
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseAndKHR, __CLC_AND, and, uint, ~0)
253267
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseOrKHR, __CLC_OR, or, uint, 0)
254268
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseXorKHR, __CLC_XOR, xor, uint, 0)
255269
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseAndKHR, __CLC_AND, and, int, ~0)
256270
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseOrKHR, __CLC_OR, or, int, 0)
257271
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseXorKHR, __CLC_XOR, xor, int, 0)
258272

273+
__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, ulong, ~0l)
274+
__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, ulong, 0l)
275+
__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, ulong, 0l)
276+
__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, long, ~0l)
277+
__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, long, 0l)
278+
__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, long, 0l)
279+
259280
#undef __CLC_SUBGROUP_COLLECTIVE_BODY
260281
#undef __CLC_SUBGROUP_COLLECTIVE
261282
#undef __CLC_SUBGROUP_COLLECTIVE_REDUX
@@ -376,13 +397,34 @@ __CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, half, -HALF_MAX)
376397
__CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, float, -FLT_MAX)
377398
__CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, double, -DBL_MAX)
378399

400+
__CLC_GROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, uchar, ~0)
401+
__CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, uchar, 0)
402+
__CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, uchar, 0)
403+
__CLC_GROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, char, ~0)
404+
__CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, char, 0)
405+
__CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, char, 0)
406+
407+
__CLC_GROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, ushort, ~0)
408+
__CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, ushort, 0)
409+
__CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, ushort, 0)
410+
__CLC_GROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, short, ~0)
411+
__CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, short, 0)
412+
__CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, short, 0)
413+
379414
__CLC_GROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, uint, ~0)
380415
__CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, uint, 0)
381416
__CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, uint, 0)
382417
__CLC_GROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, int, ~0)
383418
__CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, int, 0)
384419
__CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, int, 0)
385420

421+
__CLC_GROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, ulong, ~0l)
422+
__CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, ulong, 0l)
423+
__CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, ulong, 0l)
424+
__CLC_GROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, long, ~0l)
425+
__CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, long, 0l)
426+
__CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, long, 0l)
427+
386428
// half requires additional mangled entry points
387429
_CLC_DEF _CLC_CONVERGENT half _Z17__spirv_GroupFAddjjDF16_(uint scope, uint op,
388430
half x) {

sycl/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ include(SYCLUtils)
2828
set(SYCL_MAJOR_VERSION 5)
2929
set(SYCL_MINOR_VERSION 7)
3030
set(SYCL_PATCH_VERSION 0)
31-
set(SYCL_DEV_ABI_VERSION 0)
31+
set(SYCL_DEV_ABI_VERSION 1)
3232
if (SYCL_ADD_DEV_VERSION_POSTFIX)
3333
set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}")
3434
endif()

sycl/doc/EnvironmentVariables.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ variables in production code.</span>
157157
| `SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY` (experimental) | Integer | Allows the use of copy engine, if available in the device, in Level Zero plugin for device to device copy operations. The default is 0. This option is experimental and will be removed once heuristics are added to make a decision about use of copy engine for device to device copy operations. |
158158
| `SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS` | Any(\*) | Enable support of device-scope events whose state is not visible to the host. If enabled mode is SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1 the Level Zero plugin would create all events having device-scope only and create proxy host-visible events for them when their status is needed (wait/query) on the host. If enabled mode is SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 the Level Zero plugin would create all events having device-scope and add proxy host-visible event at the end of each command-list submission. The default is 2, meaning only the last event in a batch is host-visible. |
159159
| `SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS` | Integer | When set to a positive value enables use of Level Zero immediate commandlists, which means there is no batching and all commands are immediately submitted for execution. Default is 0. Note: When immediate commandlist usage is enabled it is necessary to also set SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS to either 0 or 1. |
160+
| `SYCL_PI_LEVEL_ZERO_USE_MULTIPLE_COMMANDLIST_BARRIERS` | Integer | When set to a positive value enables use of multiple Level Zero commandlists when submitting barriers. Default is 0. |
160161

161162
## Debugging variables for CUDA Plugin
162163

sycl/include/CL/sycl/detail/kernel_desc.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#include <CL/sycl/detail/defines_elementary.hpp>
1414
#include <CL/sycl/detail/export.hpp>
1515

16+
#include <cstdint>
17+
1618
__SYCL_INLINE_NAMESPACE(cl) {
1719
namespace sycl {
1820
namespace detail {
@@ -79,6 +81,7 @@ template <class KernelNameType> struct KernelInfo {
7981
static constexpr const char *getFunctionName() { return ""; }
8082
static constexpr unsigned getLineNumber() { return 0; }
8183
static constexpr unsigned getColumnNumber() { return 0; }
84+
static constexpr int64_t getKernelSize() { return 0; }
8285
};
8386
#else
8487
template <char...> struct KernelInfoData {
@@ -93,6 +96,7 @@ template <char...> struct KernelInfoData {
9396
static constexpr const char *getFunctionName() { return ""; }
9497
static constexpr unsigned getLineNumber() { return 0; }
9598
static constexpr unsigned getColumnNumber() { return 0; }
99+
static constexpr int64_t getKernelSize() { return 0; }
96100
};
97101

98102
// C++14 like index_sequence and make_index_sequence
@@ -135,6 +139,9 @@ template <class KernelNameType> struct KernelInfo {
135139
static constexpr const char *getFunctionName() { return ""; }
136140
static constexpr unsigned getLineNumber() { return 0; }
137141
static constexpr unsigned getColumnNumber() { return 0; }
142+
static constexpr int64_t getKernelSize() {
143+
return SubKernelInfo::getKernelSize();
144+
}
138145
};
139146
#endif //__SYCL_UNNAMED_LAMBDA__
140147

sycl/include/CL/sycl/handler.hpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -731,13 +731,25 @@ class __SYCL_EXPORT handler {
731731
"kernel_handler is not yet supported by host device.",
732732
PI_ERROR_INVALID_OPERATION);
733733
}
734+
734735
KernelType *KernelPtr =
735736
ResetHostKernel<KernelType, LambdaArgType, Dims>(KernelFunc);
736737

737738
using KI = sycl::detail::KernelInfo<KernelName>;
739+
constexpr bool KernelHasName =
740+
KI::getName() != nullptr && KI::getName()[0] != '\0';
741+
742+
// Some host compilers may have different captures from Clang. Currently
743+
// there is no stable way of handling this when extracting the captures, so
744+
// a static assert is made to fail for incompatible kernel lambdas.
745+
static_assert(!KernelHasName || sizeof(KernelFunc) == KI::getKernelSize(),
746+
"Unexpected kernel lambda size. This can be caused by an "
747+
"external host compiler producing a lambda with an "
748+
"unexpected layout. This is a limitation of the compiler.");
749+
738750
// Empty name indicates that the compilation happens without integration
739751
// header, so don't perform things that require it.
740-
if (KI::getName() != nullptr && KI::getName()[0] != '\0') {
752+
if (KernelHasName) {
741753
// TODO support ESIMD in no-integration-header case too.
742754
MArgs.clear();
743755
extractArgsAndReqsFromLambda(reinterpret_cast<char *>(KernelPtr),

sycl/include/sycl/ext/intel/experimental/online_compiler.hpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -219,13 +219,5 @@ online_compiler<source_language::cm>::compile(const std::string &src) {
219219
} // namespace experimental
220220
} // namespace intel
221221
} // namespace ext
222-
223-
namespace ext {
224-
namespace __SYCL2020_DEPRECATED(
225-
"use 'ext::intel::experimental' instead") intel {
226-
using namespace ext::intel::experimental;
227-
} // namespace intel
228-
} // namespace ext
229-
230222
} // namespace sycl
231223
} // __SYCL_INLINE_NAMESPACE(cl)

0 commit comments

Comments
 (0)