Skip to content

Commit a056133

Browse files
committed
Merge remote-tracking branch sycl into win_fpga_lit
2 parents 8fa9127 + 4d5b34d commit a056133

File tree

20 files changed

+61
-232
lines changed

20 files changed

+61
-232
lines changed

buildbot/dependency.conf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@ ocl_cpu_rt_ver=2020.10.7.0.15
44
# https://github.com/intel/llvm/releases/download/2020-WW31/win-oclcpuexp-2020.10.7.0.15_rel.zip
55
ocl_cpu_rt_ver_win=2020.10.7.0.15
66
# Same GPU driver supports Level Zero and OpenCL:
7-
# https://github.com/intel/compute-runtime/releases/tag/20.25.17111
7+
# https://github.com/intel/compute-runtime/releases/tag/20.29.17408
88
ocl_gpu_rt_ver=20.29.17408
99
# Same GPU driver supports Level Zero and OpenCL:
1010
# https://downloadmirror.intel.com/29674/a08/igfx_win10_100.8336.zip
1111
ocl_gpu_rt_ver_win=27.20.100.8336
1212
intel_sycl_ver=build
13-
# https://github.com/oneapi-src/oneTBB/releases/download/v2020.2/tbb-2020.2-lin.tgz
13+
# TODO provide URL for Linux TBB driver
1414
tbb_ver=2021.1.8.515
15-
# https://github.com/oneapi-src/oneTBB/releases/download/v2020.2/tbb-2020.2-win.zip
15+
# TODO provide URL for Windows TBB driver
1616
tbb_ver_win=2021.1.8.514
1717
# https://github.com/intel/llvm/releases/download/2020-WW31/fpgaemu-2020.10.7.0.15_rel.tar.gz
1818
ocl_fpga_emu_ver=2020.10.7.0.15

llvm-spirv/lib/SPIRV/SPIRVWriter.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -563,10 +563,11 @@ SPIRVFunction *LLVMToSPIRV::transFunctionDecl(Function *F) {
563563
// Order of integer numbers in MD node follows the order of function
564564
// parameters on which we shall attach the appropriate decoration. Add
565565
// decoration only if MD value is not negative.
566-
BM->addCapability(CapabilityFPGABufferLocationINTEL);
567566
int LocID = getMDOperandAsInt(BufferLocation, ArgNo);
568-
if (LocID >= 0)
567+
if (LocID >= 0) {
568+
BM->addCapability(CapabilityFPGABufferLocationINTEL);
569569
BA->addDecorate(DecorationBufferLocationINTEL, LocID);
570+
}
570571
}
571572
}
572573
if (Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))

sycl/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ if( NOT OpenCL_INCLUDE_DIRS )
107107
INSTALL_COMMAND ""
108108
STEP_TARGETS build
109109
COMMENT "Downloading OpenCL headers."
110+
LOG_DOWNLOAD 1
111+
LOG_UPDATE 1
112+
LOG_BUILD 1
110113
)
111114
add_definitions(-DCL_TARGET_OPENCL_VERSION=220)
112115
else()
@@ -154,6 +157,11 @@ if( NOT OpenCL_LIBRARIES )
154157
STEP_TARGETS configure,build,install
155158
DEPENDS ocl-headers
156159
BUILD_BYPRODUCTS ${OpenCL_LIBRARIES}
160+
LOG_DOWNLOAD 1
161+
LOG_UPDATE 1
162+
LOG_CONFIGURE 1
163+
LOG_BUILD 1
164+
LOG_INSTALL 1
157165
)
158166
ExternalProject_Add_Step(ocl-icd llvminstall
159167
COMMAND ${CMAKE_COMMAND} -E copy_directory <INSTALL_DIR>/ ${LLVM_BINARY_DIR}

sycl/doc/extensions/GroupAlgorithms/SYCL_INTEL_group_algorithms.asciidoc

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,7 @@ John Pennycook, Intel (john 'dot' pennycook 'at' intel 'dot' com)
5151

5252
== Dependencies
5353

54-
This extension is written against the SYCL 1.2.1 specification, Revision v1.2.1-6 and the following extensions:
55-
56-
- +SYCL_INTEL_extended_atomics+
54+
This extension is written against the SYCL 1.2.1 specification, Revision v1.2.1-6.
5755

5856
== Overview
5957

@@ -69,10 +67,6 @@ The extension introduces the following functions:
6967
- +reduce+
7068
- +exclusive_scan+
7169
- +inclusive_scan+
72-
- +barrier+
73-
74-
The definitions and behavior of the following functions are based on equivalents in the SYCL 2020 provisional specification:
75-
- +barrier+
7670

7771
=== Alignment with OpenCL vs C++
7872

@@ -258,21 +252,6 @@ The return types of the collective functions in {cpp}17 are not deduced from the
258252
|Perform an inclusive scan over the values in the range [_first_, _last_) using the operator _binary_op_, which must be one of the group algorithms library function objects. The value written to +result + i+ is the inclusive scan of the first +i+ values in the range and an initial value specified by _init_. Returns a pointer to the end of the output range. _first_, _last_, _result_, _binary_op_ and _init_ must be the same for all work-items in the group. _binary_op(init, *first)_ must return a value of type _T_.
259253
|===
260254

261-
==== Synchronization
262-
263-
The behavior of memory fences in this section is aligned with the single happens-before relationship defined by the +SYCL_INTEL_extended_atomics+ extension.
264-
265-
|===
266-
|Function|Description
267-
268-
|+template <typename Group> void barrier(Group g);+
269-
|Synchronize all work-items in the group, and ensure that all memory accesses to any address space prior to the barrier are visible to all work-items in the group after the barrier. The scope of the group memory fences implied by this barrier is the narrowest scope including all work-items in the group.
270-
271-
|+template <typename Group> void barrier(Group g, memory_scope scope);+
272-
|Synchronize all work-items in the group, and ensure that all memory accesses to any address space prior to the barrier are visible to all work-items specified by _scope_ after the barrier. The scope of the group memory fences implied by this barrier is controlled by _scope_ and must be broader than the narrowest scope including all work-items in the group. If the specified _scope_ is narrower than the narrowest scope including all work-items in the group, the _scope_ argument is ignored.
273-
274-
|===
275-
276255
== Issues
277256

278257
None.
@@ -291,7 +270,6 @@ None.
291270
|========================================
292271
|Rev|Date|Author|Changes
293272
|1|2020-01-30|John Pennycook|*Initial public working draft*
294-
|2|2020-07-28|John Pennycook|*Add group barrier*
295273
|========================================
296274
297275
//************************************************************************

sycl/doc/extensions/SubGroupAlgorithms/SYCL_INTEL_sub_group_algorithms.asciidoc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ This extension adds sub-group support to all of the functions from +SYCL_INTEL_g
7070
- +reduce+
7171
- +exclusive_scan+
7272
- +inclusive_scan+
73-
- +barrier+
7473

7574
It additionally introduces a number of functions that are currently specific to sub-groups:
7675

@@ -166,7 +165,6 @@ None.
166165
|========================================
167166
|Rev|Date|Author|Changes
168167
|1|2020-03-16|John Pennycook|*Initial public working draft*
169-
|2|2020-07-28|John Pennycook|*Add group barrier*
170168
|========================================
171169
172170
//************************************************************************

sycl/include/CL/sycl/intel/group_algorithm.hpp

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
#include <CL/sycl/detail/spirv.hpp>
1414
#include <CL/sycl/detail/type_traits.hpp>
1515
#include <CL/sycl/group.hpp>
16-
#include <CL/sycl/intel/atomic.hpp>
1716
#include <CL/sycl/intel/functional.hpp>
1817
#include <CL/sycl/intel/sub_group.hpp>
1918

@@ -78,15 +77,6 @@ template <> inline id<3> linear_id_to_id(range<3> r, size_t linear_id) {
7877
return result;
7978
}
8079

81-
// TODO: Replace with Group::fence_scope from SYCL 2020 provisional
82-
template <typename Group> struct FenceScope {
83-
static constexpr intel::memory_scope value = intel::memory_scope::work_group;
84-
};
85-
86-
template <> struct FenceScope<intel::sub_group> {
87-
static constexpr intel::memory_scope value = intel::memory_scope::sub_group;
88-
};
89-
9080
template <typename T, class BinaryOperation> struct identity {};
9181

9282
template <typename T, typename V> struct identity<T, intel::plus<V>> {
@@ -906,42 +896,6 @@ template <typename Group> bool leader(Group g) {
906896
#endif
907897
}
908898

909-
template <typename Group> void barrier(Group, memory_scope scope) {
910-
static_assert(sycl::detail::is_generic_group<Group>::value,
911-
"Group algorithms only support the sycl::group and "
912-
"intel::sub_group class.");
913-
#ifdef __SYCL_DEVICE_ONLY__
914-
// MemoryScope must be broader than Group scope for correctness
915-
auto GroupScope = detail::FenceScope<Group>::value;
916-
auto BroadestScope = (scope > GroupScope) ? scope : GroupScope;
917-
auto MemoryScope = sycl::detail::spirv::getScope(BroadestScope);
918-
auto ExecutionScope = sycl::detail::spirv::group_scope<Group>::value;
919-
__spirv_ControlBarrier(ExecutionScope, MemoryScope,
920-
__spv::MemorySemanticsMask::AcquireRelease |
921-
__spv::MemorySemanticsMask::SubgroupMemory |
922-
__spv::MemorySemanticsMask::WorkgroupMemory |
923-
__spv::MemorySemanticsMask::CrossWorkgroupMemory);
924-
#else
925-
(void)scope;
926-
throw runtime_error("Group algorithms are not supported on host device.",
927-
PI_INVALID_DEVICE);
928-
#endif
929-
}
930-
931-
template <typename Group> void barrier(Group g) {
932-
static_assert(sycl::detail::is_generic_group<Group>::value,
933-
"Group algorithms only support the sycl::group and "
934-
"intel::sub_group class.");
935-
#ifdef __SYCL_DEVICE_ONLY__
936-
auto MemoryScope = detail::FenceScope<Group>::value;
937-
barrier(g, MemoryScope);
938-
#else
939-
(void)g;
940-
throw runtime_error("Group algorithms are not supported on host device.",
941-
PI_INVALID_DEVICE);
942-
#endif
943-
}
944-
945899
} // namespace intel
946900
} // namespace sycl
947901
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/plugins/level_zero/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR)
3333
-DOpenCL_INCLUDE_DIR=${OpenCL_INCLUDE_DIRS}
3434
-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
3535
-DCMAKE_INSTALL_LIBDIR:PATH=lib${LLVM_LIBDIR_SUFFIX}
36+
LOG_DOWNLOAD 1
37+
LOG_UPDATE 1
38+
LOG_CONFIGURE 1
39+
LOG_BUILD 1
40+
LOG_INSTALL 1
3641
${AUX_CMAKE_FLAGS}
3742
STEP_TARGETS configure,build,install
3843
DEPENDS ocl-headers

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3123,7 +3123,8 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr,
31233123
// piEnqueueMemBufferMap, but can only do so after the above copy
31243124
// is completed. Instead of waiting for It here (blocking), we shall
31253125
// do so in piEventRelease called for the pi_event tracking the unmap.
3126-
(*Event)->CommandData = MemObj->MapHostPtr ? nullptr : MappedPtr;
3126+
if (Event)
3127+
(*Event)->CommandData = MemObj->MapHostPtr ? nullptr : MappedPtr;
31273128

31283129
// Execute command list asynchronously, as the event will be used
31293130
// to track down its completion.

sycl/test/group-algorithm/barrier.cpp

Lines changed: 0 additions & 58 deletions
This file was deleted.

sycl/test/lit.cfg.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
config.test_exec_root = os.path.join(config.sycl_obj_root, 'test')
3737

3838
# Propagate some variables from the host environment.
39-
llvm_config.with_system_environment(['PATH', 'OCL_ICD_FILENAME', 'SYCL_DEVICE_ALLOWLIST', 'SYCL_CONFIG_FILE_NAME'])
39+
llvm_config.with_system_environment(['PATH', 'OCL_ICD_FILENAMES', 'SYCL_DEVICE_ALLOWLIST', 'SYCL_CONFIG_FILE_NAME'])
4040

4141
# Configure LD_LIBRARY_PATH or corresponding os-specific alternatives
4242
if platform.system() == "Linux":

sycl/test/sub_group/broadcast.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,10 @@
88

99
#include "helper.hpp"
1010
#include <CL/sycl.hpp>
11-
template <typename T>
12-
class sycl_subgr;
11+
template <typename T> class sycl_subgr;
1312
using namespace cl::sycl;
14-
template <typename T>
15-
void check(queue &Queue) {
16-
const int G = 240, L = 60;
13+
template <typename T> void check(queue &Queue) {
14+
const int G = 256, L = 64;
1715
try {
1816
nd_range<1> NdRange(G, L);
1917
buffer<T> syclbuf(G);
@@ -23,9 +21,10 @@ void check(queue &Queue) {
2321
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
2422
cgh.parallel_for<sycl_subgr<T>>(NdRange, [=](nd_item<1> NdItem) {
2523
intel::sub_group SG = NdItem.get_sub_group();
26-
/*Broadcast GID of element with SGLID == SGID */
24+
/*Broadcast GID of element with SGLID == SGID % SGMLR*/
2725
syclacc[NdItem.get_global_id()] =
28-
broadcast(SG, T(NdItem.get_global_id(0)), SG.get_group_id());
26+
broadcast(SG, T(NdItem.get_global_id(0)),
27+
SG.get_group_id() % SG.get_max_local_range()[0]);
2928
if (NdItem.get_global_id(0) == 0)
3029
sgsizeacc[0] = SG.get_max_local_range()[0];
3130
});
@@ -44,7 +43,8 @@ void check(queue &Queue) {
4443
WGid++;
4544
SGid = 0;
4645
}
47-
exit_if_not_equal<T>(syclacc[j], L * WGid + SGid + SGid * sg_size,
46+
exit_if_not_equal<T>(syclacc[j],
47+
L * WGid + SGid % sg_size + SGid * sg_size,
4848
"broadcasted value");
4949
}
5050
} catch (exception e) {

sycl/test/sub_group/generic-shuffle.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
// UNSUPPORTED: cuda || cpu
1+
// UNSUPPORTED: cuda
22
// CUDA compilation and runtime do not yet support sub-groups.
3-
// #2245 failed on OpenCL CPU (2020.10.7.0.15) with avx2 instruction set
43
//
54
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
65
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
@@ -24,7 +23,7 @@ template <typename T> class pointer_kernel;
2423
using namespace cl::sycl;
2524

2625
template <typename T>
27-
void check_pointer(queue &Queue, size_t G = 240, size_t L = 60) {
26+
void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) {
2827
try {
2928
nd_range<1> NdRange(G, L);
3029
buffer<T *> buf(G);
@@ -118,7 +117,7 @@ void check_pointer(queue &Queue, size_t G = 240, size_t L = 60) {
118117
}
119118

120119
template <typename T, typename Generator>
121-
void check_struct(queue &Queue, Generator &Gen, size_t G = 240, size_t L = 60) {
120+
void check_struct(queue &Queue, Generator &Gen, size_t G = 256, size_t L = 64) {
122121

123122
// Fill a vector with values that will be shuffled
124123
std::vector<T> values(G);

sycl/test/sub_group/load_store.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
// UNSUPPORTED: cuda
1+
// UNSUPPORTED: cuda || cpu
22
// CUDA compilation and runtime do not yet support sub-groups.
3+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
4+
// runtime for every supported ISA
35
//
46
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
57
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out

sycl/test/sub_group/reduce.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
// UNSUPPORTED: cuda
1+
// UNSUPPORTED: cuda || cpu
22
// CUDA compilation and runtime do not yet support sub-groups.
3+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
4+
// runtime for every supported ISA
35
//
46
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
57
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out

sycl/test/sub_group/reduce.hpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,13 @@
99
#include "helper.hpp"
1010
#include <CL/sycl.hpp>
1111

12-
template <typename T, class BinaryOperation>
13-
class sycl_subgr;
12+
template <typename T, class BinaryOperation> class sycl_subgr;
1413

1514
using namespace cl::sycl;
1615

1716
template <typename T, class BinaryOperation>
1817
void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
19-
size_t G = 240, size_t L = 60) {
18+
size_t G = 256, size_t L = 64) {
2019
try {
2120
nd_range<1> NdRange(G, L);
2221
buffer<T> buf(G);
@@ -65,8 +64,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
6564
}
6665
}
6766

68-
template <typename T>
69-
void check(queue &Queue, size_t G = 240, size_t L = 60) {
67+
template <typename T> void check(queue &Queue, size_t G = 256, size_t L = 64) {
7068
// limit data range for half to avoid rounding issues
7169
if (std::is_same<T, cl::sycl::half>::value) {
7270
G = 64;

sycl/test/sub_group/reduce_fp64.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
// UNSUPPORTED: cuda
1+
// UNSUPPORTED: cuda || cpu
22
// CUDA compilation and runtime do not yet support sub-groups.
3+
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
4+
// runtime for every supported ISA
35
//
46
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
57
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out

0 commit comments

Comments
 (0)