Skip to content

Commit 6610ca9

Browse files
committed
Merge branch 'sycl' into e2e-regression
2 parents 612b042 + c2e5529 commit 6610ca9

36 files changed

+906
-1173
lines changed

devops/cts_exclude_filter

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,3 @@ marray
55
math_builtin_api
66
# https://github.com/intel/llvm/issues/13574
77
hierarchical
8-
accessor

devops/dependencies-igc-dev.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"linux": {
33
"igc_dev": {
4-
"github_tag": "igc-dev-3bd1d5e",
5-
"version": "3bd1d5e",
6-
"updated_at": "2024-06-08T23:45:49Z",
7-
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/1582291042/zip",
4+
"github_tag": "igc-dev-480f8b6",
5+
"version": "480f8b6",
6+
"updated_at": "2024-06-12T22:42:55Z",
7+
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/1595870554/zip",
88
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
99
}
1010
}

libclc/amdgcn-amdhsa/libspirv/synchronization/barrier.cl

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,42 +10,56 @@
1010
#include <spirv/spirv.h>
1111
#include <spirv/spirv_types.h>
1212

13-
#define BUILTIN_FENCE(semantics, scope_memory) \
14-
if (semantics & Acquire) \
15-
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, scope_memory); \
16-
else if (semantics & Release) \
17-
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, scope_memory); \
18-
else if (semantics & AcquireRelease) \
19-
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory); \
20-
else if (semantics & SequentiallyConsistent) \
21-
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, scope_memory); \
22-
else \
23-
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory);
2413

25-
_CLC_DEF _CLC_OVERLOAD void __mem_fence(unsigned int scope_memory,
26-
unsigned int semantics) {
14+
#define BUILTIN_FENCE(order, scope_memory) \
15+
/* None implies Monotonic (for llvm/AMDGPU), or relaxed in C++. \
16+
* This does not make sense as ordering argument for a fence instruction \
17+
* and is not part of the supported orderings for a fence in AMDGPU. */ \
18+
if (order != None) { \
19+
switch (order) { \
20+
case Acquire: \
21+
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, scope_memory); \
22+
case Release: \
23+
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, scope_memory); \
24+
case AcquireRelease: \
25+
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, scope_memory); \
26+
case SequentiallyConsistent: \
27+
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, scope_memory); \
28+
default: \
29+
__builtin_trap(); \
30+
__builtin_unreachable(); \
31+
} \
32+
}
33+
34+
_CLC_INLINE void builtin_fence_order(unsigned int scope_memory,
35+
unsigned int order) {
2736
switch ((enum Scope)scope_memory) {
2837
case CrossDevice:
29-
BUILTIN_FENCE(semantics, "")
38+
BUILTIN_FENCE(order, "")
3039
case Device:
31-
BUILTIN_FENCE(semantics, "agent")
40+
BUILTIN_FENCE(order, "agent")
3241
case Workgroup:
33-
BUILTIN_FENCE(semantics, "workgroup")
42+
BUILTIN_FENCE(order, "workgroup")
3443
case Subgroup:
35-
BUILTIN_FENCE(semantics, "wavefront")
44+
BUILTIN_FENCE(order, "wavefront")
3645
case Invocation:
37-
BUILTIN_FENCE(semantics, "singlethread")
46+
BUILTIN_FENCE(order, "singlethread")
3847
}
3948
}
4049
#undef BUILTIN_FENCE
4150

51+
_CLC_DEF _CLC_OVERLOAD void __mem_fence(unsigned int scope_memory,
52+
unsigned int semantics) {
53+
builtin_fence_order(scope_memory, semantics & 0x1F);
54+
}
55+
4256
_CLC_OVERLOAD _CLC_DEF void __spirv_MemoryBarrier(unsigned int scope_memory,
4357
unsigned int semantics) {
4458
__mem_fence(scope_memory, semantics);
4559
}
4660

4761
_CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT void
48-
__spirv_ControlBarrier(unsigned int scope_execution, unsigned scope_memory,
62+
__spirv_ControlBarrier(unsigned int scope_execution, unsigned int scope_memory,
4963
unsigned int semantics) {
5064
if (semantics) {
5165
__mem_fence(scope_memory, semantics);

libdevice/cmath_wrapper.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,18 @@ extern "C" SYCL_EXTERNAL float __devicelib_fminf(float, float);
3939
DEVICE_EXTERN_C_INLINE
4040
float fminf(float x, float y) { return __devicelib_fminf(x, y); }
4141

42+
DEVICE_EXTERN_C_INLINE
43+
float truncf(float x) { return __devicelib_truncf(x); }
44+
45+
DEVICE_EXTERN_C_INLINE
46+
float sinpif(float x) { return __devicelib_sinpif(x); }
47+
48+
DEVICE_EXTERN_C_INLINE
49+
float rsqrtf(float x) { return __devicelib_rsqrtf(x); }
50+
51+
DEVICE_EXTERN_C_INLINE
52+
float exp10f(float x) { return __devicelib_exp10f(x); }
53+
4254
DEVICE_EXTERN_C_INLINE
4355
div_t div(int x, int y) { return __devicelib_div(x, y); }
4456

libdevice/cmath_wrapper_fp64.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,18 @@ extern "C" SYCL_EXTERNAL double __devicelib_fmin(double, double);
3636
DEVICE_EXTERN_C_INLINE
3737
double fmin(double x, double y) { return __devicelib_fmin(x, y); }
3838

39+
DEVICE_EXTERN_C_INLINE
40+
double trunc(double x) { return __devicelib_trunc(x); }
41+
42+
DEVICE_EXTERN_C_INLINE
43+
double sinpi(double x) { return __devicelib_sinpi(x); }
44+
45+
DEVICE_EXTERN_C_INLINE
46+
double rsqrt(double x) { return __devicelib_rsqrt(x); }
47+
48+
DEVICE_EXTERN_C_INLINE
49+
double exp10(double x) { return __devicelib_exp10(x); }
50+
3951
DEVICE_EXTERN_C_INLINE
4052
double log(double x) { return __devicelib_log(x); }
4153

libdevice/device_math.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,30 @@ float __devicelib_fminf(float x, float y);
7676
DEVICE_EXTERN_C
7777
double __devicelib_fmin(double x, double y);
7878

79+
DEVICE_EXTERN_C
80+
float __devicelib_truncf(float x);
81+
82+
DEVICE_EXTERN_C
83+
double __devicelib_trunc(double x);
84+
85+
DEVICE_EXTERN_C
86+
double __devicelib_sinpi(double x);
87+
88+
DEVICE_EXTERN_C
89+
float __devicelib_sinpif(float x);
90+
91+
DEVICE_EXTERN_C
92+
double __devicelib_rsqrt(double x);
93+
94+
DEVICE_EXTERN_C
95+
float __devicelib_rsqrtf(float x);
96+
97+
DEVICE_EXTERN_C
98+
double __devicelib_exp10(double x);
99+
100+
DEVICE_EXTERN_C
101+
float __devicelib_exp10f(float x);
102+
79103
DEVICE_EXTERN_C
80104
div_t __devicelib_div(int x, int y);
81105

libdevice/fallback-cmath-fp64.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,18 @@ double __devicelib_fmax(double x, double y) { return __spirv_ocl_fmax(x, y); }
3535
DEVICE_EXTERN_C_INLINE
3636
double __devicelib_fmin(double x, double y) { return __spirv_ocl_fmin(x, y); }
3737

38+
DEVICE_EXTERN_C_INLINE
39+
double __devicelib_trunc(double x) { return __spirv_ocl_trunc(x); }
40+
41+
DEVICE_EXTERN_C_INLINE
42+
double __devicelib_sinpi(double x) { return __spirv_ocl_sinpi(x); }
43+
44+
DEVICE_EXTERN_C_INLINE
45+
double __devicelib_rsqrt(double x) { return __spirv_ocl_rsqrt(x); }
46+
47+
DEVICE_EXTERN_C_INLINE
48+
double __devicelib_exp10(double x) { return __spirv_ocl_exp10(x); }
49+
3850
DEVICE_EXTERN_C_INLINE
3951
double __devicelib_log(double x) { return __spirv_ocl_log(x); }
4052

libdevice/fallback-cmath.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,18 @@ float __devicelib_fmaxf(float x, float y) { return __spirv_ocl_fmax(x, y); }
4545
DEVICE_EXTERN_C_INLINE
4646
float __devicelib_fminf(float x, float y) { return __spirv_ocl_fmin(x, y); }
4747

48+
DEVICE_EXTERN_C_INLINE
49+
float __devicelib_truncf(float x) { return __spirv_ocl_trunc(x); }
50+
51+
DEVICE_EXTERN_C_INLINE
52+
float __devicelib_sinpif(float x) { return __spirv_ocl_sinpi(x); }
53+
54+
DEVICE_EXTERN_C_INLINE
55+
float __devicelib_rsqrtf(float x) { return __spirv_ocl_rsqrt(x); }
56+
57+
DEVICE_EXTERN_C_INLINE
58+
float __devicelib_exp10f(float x) { return __spirv_ocl_exp10(x); }
59+
4860
DEVICE_EXTERN_C_INLINE
4961
div_t __devicelib_div(int x, int y) { return {x / y, x % y}; }
5062

mlir/utils/vscode/package-lock.json

Lines changed: 14 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sycl/doc/developer/ContributeToDPCPP.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,39 @@ These tests verify SYCL specification conformance. All implementation details
164164
are out of scope for the tests.
165165
See DPC++ compiler invocation definitions at
166166
[FindIntel_SYCL](https://github.com/KhronosGroup/SYCL-CTS/blob/SYCL-1.2.1/master/cmake/FindIntel_SYCL.cmake))
167+
168+
## Unified Runtime Updates
169+
170+
To integrate changes from the [Unified Runtime][ur] project into DPC++ there
171+
two main options which depend on the scope of those changes and the current
172+
state of DPC++.
173+
174+
1. Synchronized update:
175+
* When: If the Unified Runtime change touches the API/ABI, more than one
176+
adapter, or common code such as the loader.
177+
* How: Update the `UNIFIED_RUNTIME_TAG` to point at the desired commit or tag
178+
name in the Unified Runtime repository and ensure that any tag for specific
179+
adapters are set to use `${UNIFIED_RUNTIME_TAG}`.
180+
181+
2. Decoupled update:
182+
* When: If only a single Unified Runtime adatper has changed.
183+
* How: Update the tag used in the `fetch_adapter_source()` call for a
184+
specific Unified Runtime adapter, e.g. Level Zero, OpenCL, CUDA, HIP, or
185+
Native CPU.
186+
187+
In general, a synchronized update should be the default. However, when there
188+
are a lot of changes in flight in parallel always synchronizing the tag can be
189+
troublesome. This is when a decoupled update can help sustain the merge
190+
velocity of Unified Runtime changes.
191+
192+
The [intel/unified-runtime-reviewers][ur-reviewers-team] team is responsible
193+
for ensuring that the Unified Runtime tag is updated correctly and will only
194+
provide code owner approval to pull requests once the following criteria are
195+
met:
196+
197+
* Tags are pointing to a valid commit or tag on Unified Runtime main branch.
198+
* Changes to additional code owned files are in a good state.
199+
* GitHub Actions checks are passing.
200+
201+
[ur]: https://github.com/oneapi-src/unified-runtime
202+
[ur-reviewers-team]: https://github.com/orgs/intel/teams/unified-runtime-reviewers

sycl/include/sycl/atomic_ref.hpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -568,9 +568,14 @@ class [[__sycl_detail__::__uses_aspects__(aspect::atomic64)]] atomic_ref_impl<
568568
// Partial specialization for pointer types
569569
// Arithmetic is emulated because target's representation of T* is unknown
570570
// TODO: Find a way to use intptr_t or uintptr_t atomics instead
571-
template <typename T, bool IsAspectAtomic64AttrUsed, memory_order DefaultOrder, memory_scope DefaultScope,
572-
access::address_space AddressSpace>
573-
class atomic_ref_impl<T *, IsAspectAtomic64AttrUsed, DefaultOrder, DefaultScope, AddressSpace>
571+
template <typename T, bool IsAspectAtomic64AttrUsed, memory_order DefaultOrder,
572+
memory_scope DefaultScope, access::address_space AddressSpace>
573+
#ifndef __SYCL_DEVICE_ONLY__
574+
class atomic_ref_impl<
575+
#else
576+
class [[__sycl_detail__::__uses_aspects__(aspect::atomic64)]] atomic_ref_impl<
577+
#endif
578+
T *, IsAspectAtomic64AttrUsed, DefaultOrder, DefaultScope, AddressSpace>
574579
: public atomic_ref_base<uintptr_t, DefaultOrder, DefaultScope,
575580
AddressSpace> {
576581

sycl/include/sycl/detail/generic_type_traits.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,8 @@ template <typename T> auto convertToOpenCLType(T &&x) {
342342
std::declval<ElemTy>()))>,
343343
no_ref::size()>;
344344
#ifdef __SYCL_DEVICE_ONLY__
345+
346+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
345347
// TODO: for some mysterious reasons on NonUniformGroups E2E tests fail if
346348
// we use the "else" version only. I suspect that's an issues with
347349
// non-uniform groups implementation.
@@ -350,6 +352,10 @@ template <typename T> auto convertToOpenCLType(T &&x) {
350352
else
351353
return static_cast<typename MatchingVec::vector_t>(
352354
x.template as<MatchingVec>());
355+
#else // __INTEL_PREVIEW_BREAKING_CHANGES
356+
return sycl::bit_cast<typename MatchingVec::vector_t>(x);
357+
#endif // __INTEL_PREVIEW_BREAKING_CHANGES
358+
353359
#else
354360
return x.template as<MatchingVec>();
355361
#endif

0 commit comments

Comments
 (0)