Skip to content

Commit de9ec72

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents 3999e42 + 5d7e092 commit de9ec72

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+838
-664
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ llvm-spirv/ @AlexeySotkin @AlexeySachkov
88

99
opencl-aot/ @dm-vodopyanov @AlexeySachkov @romanovvlad
1010

11-
libdevice/ @asavonic @vzakhari
11+
libdevice/ @vzakhari
1212

1313
sycl/ @intel/llvm-reviewers-runtime
1414

CONTRIBUTING.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ is strongly encouraged that you submit the patch to https://llvm.org/ directly.
2020
See [LLVM contribution guidelines](https://llvm.org/docs/Contributing.html)
2121
for more information.
2222

23+
**NB**: A change in compiler and runtime should be accompanied with
24+
corresponding test changes.
25+
See [Test DPC++ toolchain](sycl/doc/GetStartedGuide.md#test-dpc-toolchain)
26+
section of Get Started guide for more information.
27+
2328
**Note (October, 2020)**: DPC++ runtime and compiler ABI is currently in frozen
2429
state. This means that no ABI-breaking changes will be accepted by default.
2530
Project maintainers may still approve breaking changes in some cases. Please,

buildbot/dependency.conf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ ocl_cpu_rt_ver=2020.11.11.0.04
44
# https://github.com/intel/llvm/releases/download/2020-WW45/win-oclcpuexp-2020.11.11.0.04_rel.zip
55
ocl_cpu_rt_ver_win=2020.11.11.0.04
66
# Same GPU driver supports Level Zero and OpenCL:
7-
# https://github.com/intel/compute-runtime/releases/tag/20.44.18297
8-
ocl_gpu_rt_ver=20.44.18297
7+
# https://github.com/intel/compute-runtime/releases/tag/20.45.18403
8+
ocl_gpu_rt_ver=20.45.18403
99
# Same GPU driver supports Level Zero and OpenCL:
1010
# https://downloadmirror.intel.com/29988/a08/igfx_win10_100.8935.zip
1111
ocl_gpu_rt_ver_win=27.20.100.8935
@@ -24,7 +24,7 @@ fpga_ver_win=20201022_000005
2424
[DRIVER VERSIONS]
2525
cpu_driver_lin=2020.11.11.0.04
2626
cpu_driver_win=2020.11.11.0.04
27-
gpu_driver_lin=20.44.18297
27+
gpu_driver_lin=20.45.18403
2828
gpu_driver_win=27.20.100.8935
2929
fpga_driver_lin=2020.11.11.0.04
3030
fpga_driver_win=2020.11.11.0.04

clang/include/clang/Basic/AttrDocs.td

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2397,6 +2397,22 @@ is unspecified and is therefore considered infinite.
23972397
In case of ivdep being applied both w/o an array variable and for a particular
23982398
array, the array variables that were not designated a separate ivdep will receive
23992399
the no-array ivdep's safelen, with the correspondent treatment by the backend.
2400+
2401+
.. code-block:: c++
2402+
2403+
void foo() {
2404+
int a[10];
2405+
[[intel::ivdep]] for (int i = 0; i != 10; ++i) { }
2406+
[[intel::ivdep(2)]] for (int i = 0; i != 10; ++i) { }
2407+
[[intel::ivdep(a)]] for (int i = 0; i != 10; ++i) { }
2408+
[[intel::ivdep(a, 2)]] for (int i = 0; i != 10; ++i) { }
2409+
}
2410+
2411+
template<int N>
2412+
void bar() {
2413+
[[intel::ivdep(N)]] for(;;) { }
2414+
}
2415+
24002416
}];
24012417
}
24022418

@@ -2407,6 +2423,19 @@ def SYCLIntelFPGAIIAttrDocs : Documentation {
24072423
This attribute applies to a loop. Indicates that the loop should be pipelined
24082424
with an initiation interval of N. N must be a positive integer. Cannot be
24092425
applied multiple times to the same loop.
2426+
2427+
.. code-block:: c++
2428+
2429+
void foo() {
2430+
int var = 0;
2431+
[[intel::ii(4)]] for (int i = 0; i < 10; ++i) var++;
2432+
}
2433+
2434+
template<int N>
2435+
void bar() {
2436+
[[intel::ii(N)]] for(;;) { }
2437+
}
2438+
24102439
}];
24112440
}
24122441

@@ -2418,6 +2447,19 @@ This attribute applies to a loop. Indicates that the loop should allow no more
24182447
than N threads or iterations to execute it simultaneously. N must be a non
24192448
negative integer. '0' indicates the max_concurrency case to be unbounded. Cannot
24202449
be applied multiple times to the same loop.
2450+
2451+
.. code-block:: c++
2452+
2453+
void foo() {
2454+
int a[10];
2455+
[[intel::max_concurrency(2)]] for (int i = 0; i != 10; ++i) a[i] = 0;
2456+
}
2457+
2458+
template<int N>
2459+
void bar() {
2460+
[[intel::max_concurrency(N)]] for(;;) { }
2461+
}
2462+
24212463
}];
24222464
}
24232465

@@ -2429,6 +2471,34 @@ This attribute applies to a loop. Indicates that the loop nest should be
24292471
coalesced into a single loop without affecting functionality. Parameter N is
24302472
optional. If specified, it shall be a positive integer, and indicates how many
24312473
of the nested loop levels should be coalesced.
2474+
2475+
.. code-block:: c++
2476+
2477+
void foo() {
2478+
int a[10];
2479+
[[intel::loop_coalesce]] for (int i = 0; i != 10; ++i) a[i] = 0;
2480+
}
2481+
2482+
template<int N>
2483+
void loop_coalesce() {
2484+
int j = 0, n = 48;
2485+
[[intel::loop_coalesce(N)]]
2486+
while (j < n) {
2487+
if (j % 4) {
2488+
++j;
2489+
continue;
2490+
}
2491+
}
2492+
j = 0;
2493+
[[intel::loop_coalesce]]
2494+
while (j < n) {
2495+
if (j % 6) {
2496+
++j;
2497+
continue;
2498+
}
2499+
}
2500+
}
2501+
24322502
}];
24332503
}
24342504

@@ -2440,6 +2510,14 @@ This attribute applies to a loop. Disables pipelining of the loop data path,
24402510
causing the loop to be executed serially. Cannot be used on the same loop in
24412511
conjunction with max_interleaving, speculated_iterations, max_concurrency, ii
24422512
or ivdep.
2513+
2514+
.. code-block:: c++
2515+
2516+
void foo() {
2517+
int var = 0;
2518+
[[intel::disable_loop_pipelining] for (int i = 0; i < 10; ++i) var++;
2519+
}
2520+
24432521
}];
24442522
}
24452523

@@ -2453,6 +2531,19 @@ mean that this attribute can only be applied to inner loops in user code - outer
24532531
loops in user code may still be contained in an implicit loop due to NDRange).
24542532
Parameter N is mandatory, and shall be non-negative integer. Cannot be
24552533
used on the same loop in conjunction with disable_loop_pipelining.
2534+
2535+
.. code-block:: c++
2536+
2537+
void foo() {
2538+
int a[10];
2539+
[[intel::max_interleaving(4)]] for (int i = 0; i != 10; ++i) a[i] = 0;
2540+
}
2541+
2542+
template<int N>
2543+
void bar() {
2544+
[[intel::max_interleaving(N)]] for(;;) { }
2545+
}
2546+
24562547
}];
24572548
}
24582549

@@ -2465,6 +2556,19 @@ iterations that will be in flight for a loop invocation (i.e. the exit
24652556
condition for these iterations will not have been evaluated yet).
24662557
Parameter N is mandatory, and may either be 0, or a positive integer. Cannot be
24672558
used on the same loop in conjunction with disable_loop_pipelining.
2559+
2560+
.. code-block:: c++
2561+
2562+
void foo() {
2563+
int var = 0;
2564+
[[intel::speculated_iterations(4)]] for (int i = 0; i < 10; ++i) var++;
2565+
}
2566+
2567+
template<int N>
2568+
void bar() {
2569+
[[intel::speculated_iterations(N)]] for(;;) { }
2570+
}
2571+
24682572
}];
24692573
}
24702574

@@ -2474,6 +2578,21 @@ def SYCLIntelFPGANofusionAttrDocs : Documentation {
24742578
let Content = [{
24752579
This attribute applies to a loop. Indicates that the annotated
24762580
loop should not be fused with any adjacent loop.
2581+
2582+
.. code-block:: c++
2583+
2584+
void foo() {
2585+
[[intel::nofusion]] for (int i=0; i<10;++i) { }
2586+
}
2587+
2588+
void nofusion() {
2589+
int a1[10];
2590+
for (int i = 0; i < 10; ++i) {
2591+
[[intel::nofusion]] for (int j = 0; j < 10; ++j) {
2592+
a1[i] += a1[j];
2593+
}
2594+
}
2595+
24772596
}];
24782597
}
24792598

sycl/doc/GetStartedGuide.md

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ folder:
218218
/opt/intel/oclfpgaemu_<fpga_version>/x64
219219
ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbbmalloc.so
220220
/opt/intel/oclfpgaemu_<fpga_version>/x64
221-
ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbb.so.2
221+
ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbb.so.12
222222
/opt/intel/oclfpgaemu_<fpga_version>/x64
223223
ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbbmalloc.so.2
224224
/opt/intel/oclfpgaemu_<fpga_version>/x64
@@ -227,7 +227,7 @@ folder:
227227
/opt/intel/oclcpuexp_<cpu_version>/x64
228228
ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbbmalloc.so
229229
/opt/intel/oclcpuexp_<cpu_version>/x64
230-
ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbb.so.2
230+
ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbb.so.12
231231
/opt/intel/oclcpuexp_<cpu_version>/x64
232232
ln -s /opt/intel/tbb_<tbb_version>/tbb/lib/intel64/gcc4.8/libtbbmalloc.so.2
233233
/opt/intel/oclcpuexp_<cpu_version>/x64
@@ -280,7 +280,40 @@ command:
280280
281281
### Test DPC++ toolchain
282282
283-
#### Run regression tests
283+
Every product change should be accompanied with corresponding test modification
284+
(adding new test(s), extending, removing or modifying existing test(s)).
285+
286+
There are 3 types of tests which are used for DPC++ toolchain validation:
287+
* DPC++ in-tree LIT tests including [check-llvm](../../llvm/test),
288+
[check-clang](../../clang/test), [check-llvm-spirv](../../llvm-spirv/test) and
289+
[check-sycl](../../sycl/test) targets stored in this repository. These tests
290+
should not have hardware (e.g. GPU, FPGA, etc.) or external software
291+
dependencies (e.g. OpenCL, Level Zero, CUDA runtimes). All tests not following
292+
this approach should be moved to DPC++ end-to-end or SYCL-CTS tests.
293+
However, the tests for a feature under active development requiring atomic
294+
change for tests and product can be put to
295+
[sycl/test/on-device](../../sycl/test/on-device) temporarily. It is developer
296+
responsibility to move the tests to DPC++ E2E test suite or SYCL-CTS once
297+
the feature is stabilized.
298+
299+
* DPC++ end-to-end (E2E) tests which are extension to
300+
[LLVM\* test suite](https://github.com/intel/llvm-test-suite/tree/intel/SYCL).
301+
A test which requires full stack including backend runtimes (e.g. OpenCL,
302+
Level Zero or CUDA) should be put to DPC++ E2E test suite following
303+
[CONTRIBUTING](https://github.com/intel/llvm-test-suite/blob/intel/CONTRIBUTING.md).
304+
305+
* SYCL-CTS are official
306+
[Khronos\* SYCL\* conformance tests](https://github.com/KhronosGroup/SYCL-CTS).
307+
They verify SYCL specification compatibility. All implementation details or
308+
extensions are out of scope for the tests. If SYCL specification has changed
309+
(SYCL CTS tests conflict with recent version of SYCL specification) or change
310+
is required in the way the tests are built with DPC++ compiler (defined in
311+
[FindIntel_SYCL](https://github.com/KhronosGroup/SYCL-CTS/blob/SYCL-1.2.1/master/cmake/FindIntel_SYCL.cmake))
312+
pull request should be created under
313+
[KhronosGroup/SYCL-CTS](https://github.com/KhronosGroup/SYCL-CTS) with required
314+
patch.
315+
316+
#### Run in-tree LIT tests
284317
285318
To verify that built DPC++ toolchain is working correctly, run:
286319
@@ -302,6 +335,11 @@ skipped.
302335
If CUDA support has been built, it is tested only if there are CUDA devices
303336
available.
304337
338+
#### Run DPC++ E2E test suite
339+
340+
Follow instructions from the link below to build and run tests:
341+
[README](https://github.com/intel/llvm-test-suite/tree/intel/SYCL#execution)
342+
305343
#### Run Khronos\* SYCL\* conformance test suite (optional)
306344
307345
Khronos\* SYCL\* conformance test suite (CTS) is intended to validate

sycl/include/CL/sycl/INTEL/esimd/detail/esimd_types.hpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include <CL/sycl/detail/stl_type_traits.hpp> // to define C++14,17 extensions
1717
#include <CL/sycl/half_type.hpp>
1818
#include <cstdint>
19-
#include <type_traits>
2019

2120
__SYCL_INLINE_NAMESPACE(cl) {
2221
namespace sycl {
@@ -208,8 +207,8 @@ template <typename T1, typename T2> struct computation_type {
208207
template <typename U> constexpr bool is_type() { return false; }
209208

210209
template <typename U, typename T, typename... Ts> constexpr bool is_type() {
211-
using UU = typename std::remove_const<U>::type;
212-
using TT = typename std::remove_const<T>::type;
210+
using UU = typename detail::remove_const_t<U>;
211+
using TT = typename detail::remove_const_t<T>;
213212
return std::is_same<UU, TT>::value || is_type<UU, Ts...>();
214213
}
215214

@@ -228,10 +227,10 @@ struct bitcast_helper {
228227
// Change the element type of a simd vector.
229228
template <typename ToEltTy, typename FromEltTy, int FromN,
230229
typename = csd::enable_if_t<is_vectorizable<ToEltTy>::value>>
231-
ESIMD_INLINE typename std::conditional<
230+
ESIMD_INLINE typename detail::conditional_t<
232231
std::is_same<FromEltTy, ToEltTy>::value, vector_type_t<FromEltTy, FromN>,
233232
vector_type_t<ToEltTy,
234-
bitcast_helper<ToEltTy, FromEltTy, FromN>::nToElems()>>::type
233+
bitcast_helper<ToEltTy, FromEltTy, FromN>::nToElems()>>
235234
bitcast(vector_type_t<FromEltTy, FromN> Val) {
236235
// Noop.
237236
if constexpr (std::is_same<FromEltTy, ToEltTy>::value)

0 commit comments

Comments
 (0)