Skip to content

Commit 9d2ef43

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents dc65937 + ac3de67 commit 9d2ef43

30 files changed

+1260
-758
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4971,10 +4971,21 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
49714971

49724972
// 4. Finish the call.
49734973

4974+
// SYCL does not support C++ exceptions or termination in device code, so all
4975+
// functions have to return.
4976+
bool SyclSkipNoReturn = false;
4977+
if (getLangOpts().SYCLIsDevice && CI->doesNotReturn()) {
4978+
if (auto *F = CI->getCalledFunction())
4979+
F->removeFnAttr(llvm::Attribute::NoReturn);
4980+
CI->removeAttribute(llvm::AttributeList::FunctionIndex,
4981+
llvm::Attribute::NoReturn);
4982+
SyclSkipNoReturn = true;
4983+
}
4984+
49744985
// If the call doesn't return for non-sycl devices, finish the basic block and
49754986
// clear the insertion point; this allows the rest of IRGen to discard
49764987
// unreachable code.
4977-
if (CI->doesNotReturn() && !getLangOpts().SYCLIsDevice) {
4988+
if (!SyclSkipNoReturn && CI->doesNotReturn()) {
49784989
if (UnusedReturnSizePtr)
49794990
PopCleanupBlock();
49804991

clang/test/CodeGenSYCL/remove-ur-inst.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s
2+
// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsycl-enable-optimizations -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s
23

34
SYCL_EXTERNAL void doesNotReturn() throw() __attribute__((__noreturn__));
45

@@ -11,6 +12,7 @@ int main() {
1112
kernel<class test>([]() {
1213
doesNotReturn();
1314
// CHECK-NOT: unreachable
15+
// CHECK-NOT: noreturn
1416
});
1517
return 0;
16-
}
18+
}

sycl/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ include(AddSYCLExecutable)
1414
set(SYCL_MAJOR_VERSION 2)
1515
set(SYCL_MINOR_VERSION 1)
1616
set(SYCL_PATCH_VERSION 0)
17-
set(SYCL_DEV_ABI_VERSION 2)
17+
set(SYCL_DEV_ABI_VERSION 3)
1818
if (SYCL_ADD_DEV_VERSION_POSTFIX)
1919
set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}")
2020
endif()
@@ -248,7 +248,7 @@ add_subdirectory( source )
248248
# Auxilliary extras for SYCL headers/library
249249
if (NOT WIN32)
250250
install(FILES
251-
"${CMAKE_CURRENT_SOURCE_DIR}/xmethods/libsycl.so-gdb.py"
251+
"${CMAKE_CURRENT_SOURCE_DIR}/gdb/libsycl.so-gdb.py"
252252
RENAME "libsycl.so.${SYCL_VERSION_STRING}-gdb.py"
253253
DESTINATION "lib${LLVM_LIBDIR_SUFFIX}/"
254254
COMPONENT sycl-headers-extras)

sycl/doc/GetStartedGuide.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ and a wide range of compute accelerators such as GPU and FPGA.
2727
* `ninja` -
2828
[Download](https://github.com/ninja-build/ninja/wiki/Pre-built-Ninja-packages)
2929
* C++ compiler
30-
* Linux: `GCC` version 5.1.0 or later (including libstdc++) -
30+
* Linux: `GCC` version 7.1.0 or later (including libstdc++) -
3131
[Download](https://gcc.gnu.org/install/)
3232
* Windows: `Visual Studio` version 15.7 preview 4 or later -
3333
[Download](https://visualstudio.microsoft.com/downloads/)
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
2+
# FPGA lsu
3+
4+
The Intel FPGA `lsu` class is implemented in `CL/sycl/intel/fpga_lsu.hpp` which
5+
is included in `CL/sycl/intel/fpga_extensions.hpp`.
6+
7+
The class `cl::sycl::intel::lsu` allows users to explicitly request that the
8+
implementation of a global memory access is configured in a certain way. The
9+
class has two member functions, `load()` and `store()` which allow loading from
10+
and storing to a `global_ptr`, respectively, and is templated on the following
11+
4 optional paremeters:
12+
13+
1. **`cl::sycl::intel::burst_coalesce<B>`, where `B` is a boolean**: request,
14+
to the extent possible, that a dynamic burst coalescer be implemented when
15+
`load` or `store` are called. The default value of this parameter is `false`.
16+
2. **`cl::sycl::intel::cache<N>`, where `N` is an integer greater or equal to
17+
0**: request, to the extent possible, that a read-only cache of the specified
18+
size in bytes be implemented when when `load` is called. It is not allowed to
19+
use that parameter for `store`. The default value of this parameter is `0`.
20+
3. **`cl::sycl::intel::statically_coalesce<N>`, where `B` is a boolean**:
21+
request, to the extent possible, that `load` or `store` accesses, is allowed to
22+
be statically coalesced with other memory accesses at compile time. The default
23+
value of this parameter is `true`.
24+
4. **`cl::sycl::intel::prefetch<B>`, where `N` is a boolean**: request, to the
25+
extent possible, that a prefetcher be implemented when `load` is called. It is
26+
not allowed to use that parameter for `store`. The default value of this
27+
parameter is `false`.
28+
29+
Currently, not every combination of parameters is allowed due to limitations in
30+
the backend. The following rules apply:
31+
1. For `store`, `cl::sycl::intel::cache` must be `0` and
32+
`cl::sycl::intel::prefetch` must be `false`.
33+
2. For `load`, if `cl::sycl::intel::cache` is set to a value greater than `0`,
34+
then `cl::sycl::intel::burst_coalesce` must be set to `true`.
35+
3. For `load`, exactly one of `cl::sycl::intel::prefetch` and
36+
`cl::sycl::intel::burst_coalesce` is allowed to be `true`.
37+
4. For `load`, exactly one of `cl::sycl::intel::prefetch` and
38+
`cl::sycl::intel::cache` is allowed to be `true`.
39+
40+
## Implementation
41+
42+
The implementation relies on the Clang built-in `__builtin_intel_fpga_mem` when
43+
parsing the SYCL device code. The built-in uses the LLVM `ptr.annotation`
44+
intrinsic under the hood to annotate the pointer that is being accessed.
45+
```c++
46+
template <class... mem_access_params> class lsu final {
47+
public:
48+
lsu() = delete;
49+
50+
template <typename T> static T &load(sycl::global_ptr<T> Ptr) {
51+
check_load();
52+
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
53+
return *__builtin_intel_fpga_mem((T *)Ptr,
54+
_burst_coalesce | _cache |
55+
_dont_statically_coalesce | _prefetch,
56+
_cache_val);
57+
#else
58+
return *Ptr;
59+
#endif
60+
}
61+
62+
template <typename T> static void store(sycl::global_ptr<T> Ptr, T Val) {
63+
check_store();
64+
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
65+
*__builtin_intel_fpga_mem((T *)Ptr,
66+
_burst_coalesce | _cache |
67+
_dont_statically_coalesce | _prefetch,
68+
_cache_val) = Val;
69+
#else
70+
*Ptr = Val;
71+
#endif
72+
}
73+
...
74+
}
75+
```
76+
77+
## Usage
78+
79+
```c++
80+
#include <CL/sycl/intel/fpga_extensions.hpp>
81+
...
82+
cl::sycl::buffer<int, 1> output_buffer(output_data, 1);
83+
cl::sycl::buffer<int, 1> input_buffer(input_data, 1);
84+
85+
Queue.submit([&](cl::sycl::handler &cgh) {
86+
auto output_accessor = output_buffer.get_access<cl::sycl::access::mode::write>(cgh);
87+
auto input_accessor = input_buffer.get_access<cl::sycl::access::mode::read>(cgh);
88+
89+
cgh.single_task<class kernel>([=] {
90+
auto input_ptr = input_accessor.get_pointer();
91+
auto output_ptr = output_accessor.get_pointer();
92+
93+
using PrefetchingLSU =
94+
cl::sycl::intel::lsu<cl::sycl::intel::prefetch<true>,
95+
cl::sycl::intel::statically_coalesce<false>>;
96+
97+
using BurstCoalescedLSU =
98+
cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<false>,
99+
cl::sycl::intel::statically_coalesce<false>>;
100+
101+
using CachingLSU =
102+
cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<true>,
103+
cl::sycl::intel::cache<1024>,
104+
cl::sycl::intel::statically_coalesce<true>>;
105+
106+
using PipelinedLSU = cl::sycl::intel::lsu<>;
107+
108+
int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0]
109+
int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1]
110+
111+
BurstCoalescedLSU::store(output_ptr, X); // output_ptr[0] = X
112+
PipelinedLSU::store(output_ptr + 1, Y); // output_ptr[1] = Y
113+
});
114+
});
115+
...
116+
```

sycl/doc/extensions/SubGroup/SYCL_INTEL_sub_group.asciidoc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ Providing a generic group abstraction encapsulating the shared functionality of
6868

6969
=== Attributes
7070

71-
The +[[intel::reqd_sub_group_size(n)]]+ attribute indicates that the kernel must be compiled and executed with a sub-group of size _n_. The value of _n_ must be set to a sub-group size that is both supported by the device and compatible with all language features used by the kernel, or device compilation will fail. The set of valid sub-group sizes can be queried as described below.
71+
The +[[intel::reqd_sub_group_size(n)]]+ attribute indicates that the kernel must be compiled and executed with a sub-group of size _n_. The value of _n_ must be a compile-time integral constant expression. The value of _n_ must be set to a sub-group size that is both supported by the device and compatible with all language features used by the kernel, or device compilation will fail. The set of valid sub-group sizes can be queried as described below.
7272

7373
In addition to device functions, the required sub-group size attribute may also be specified in the definition of a named functor object, as in the example below:
7474

@@ -314,6 +314,7 @@ Yes, the four shuffles in this extension are a defining feature of sub-groups.
314314
|4|2020-04-21|John Pennycook|*Restore missing barrier function*
315315
|5|2020-04-21|John Pennycook|*Restore sub-group shuffles as member functions*
316316
|6|2020-04-22|John Pennycook|*Align with SYCL_INTEL_device_specific_kernel_queries*
317+
|7|2020-07-13|John Pennycook|*Clarify that reqd_sub_group_size must be a compile-time constant*
317318
|========================================
318319
319320
//************************************************************************

sycl/xmethods/libsycl.so-gdb.py renamed to sycl/gdb/libsycl.so-gdb.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,14 @@
22
# See https://llvm.org/LICENSE.txt for license information.
33
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
44

5+
import gdb
56
import gdb.xmethod
7+
import gdb.printing
8+
import itertools
69
import re
710

11+
### XMethod implementations ###
12+
813
"""
914
Generalized base class for buffer index calculation
1015
"""
@@ -134,3 +139,86 @@ def match(self, class_type, method_name):
134139

135140

136141
gdb.xmethod.register_xmethod_matcher(None, AccessorOpIndexMatcher(), replace=True)
142+
143+
### Pretty-printer implementations ###
144+
145+
"""
146+
Print an object deriving from cl::sycl::detail::array
147+
"""
148+
class SyclArrayPrinter:
149+
class ElementIterator:
150+
def __init__(self, data, size):
151+
self.data = data
152+
self.size = size
153+
self.count = 0
154+
155+
def __iter__(self):
156+
return self
157+
158+
def __next__(self):
159+
if self.count == self.size:
160+
raise StopIteration
161+
count = self.count
162+
self.count = self.count + 1
163+
try:
164+
elt = self.data[count]
165+
except:
166+
elt = "<error reading variable>"
167+
return ('[%d]' % count, elt)
168+
169+
def __init__(self, value):
170+
if value.type.code == gdb.TYPE_CODE_REF:
171+
if hasattr(gdb.Value,"referenced_value"):
172+
value = value.referenced_value()
173+
174+
self.value = value
175+
self.type = value.type.unqualified().strip_typedefs()
176+
self.dimensions = self.type.template_argument(0)
177+
178+
def children(self):
179+
try:
180+
return self.ElementIterator(self.value['common_array'], self.dimensions)
181+
except:
182+
# There is no way to return an error from this method. Return an
183+
# empty iterable to make GDB happy and rely on to_string method
184+
# to take care of formatting.
185+
return [ ]
186+
187+
def to_string(self):
188+
try:
189+
# Check if accessing array value will succeed and resort to
190+
# error message otherwise. Individual array element access failures
191+
# will be caught by iterator itself.
192+
_ = self.value['common_array']
193+
return self.type.tag
194+
except:
195+
return "<error reading variable>"
196+
197+
def display_hint(self):
198+
return 'array'
199+
200+
"""
201+
Print a cl::sycl::buffer
202+
"""
203+
class SyclBufferPrinter:
204+
def __init__(self, value):
205+
self.value = value
206+
self.type = value.type.unqualified().strip_typedefs()
207+
self.elt_type = value.type.template_argument(0)
208+
self.dimensions = value.type.template_argument(1)
209+
self.typeregex = re.compile('^([a-zA-Z0-9_:]+)(<.*>)?$')
210+
211+
def to_string(self):
212+
match = self.typeregex.match(self.type.tag)
213+
if not match:
214+
return "<error parsing type>"
215+
return ('%s<%s, %s> = {impl=%s}'
216+
% (match.group(1), self.elt_type, self.dimensions,
217+
self.value['impl'].address))
218+
219+
sycl_printer = gdb.printing.RegexpCollectionPrettyPrinter("SYCL")
220+
sycl_printer.add_printer("cl::sycl::id", '^cl::sycl::id<.*$', SyclArrayPrinter)
221+
sycl_printer.add_printer("cl::sycl::range", '^cl::sycl::range<.*$', SyclArrayPrinter)
222+
sycl_printer.add_printer("cl::sycl::buffer", '^cl::sycl::buffer<.*$', SyclBufferPrinter)
223+
gdb.printing.register_pretty_printer(None, sycl_printer, True)
224+

0 commit comments

Comments
 (0)