Skip to content

Commit 8085e49

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents 5a88f7b + 156b730 commit 8085e49

File tree

11 files changed

+295
-28
lines changed

11 files changed

+295
-28
lines changed

libclc/amdgcn-amdhsa/libspirv/atomic/atomic_cmpxchg.cl

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,17 @@
2323
memory_order_success) \
2424
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \
2525
memory_order_failure) \
26-
TYPE original_val = *p; \
27-
bool success = __hip_atomic_compare_exchange_strong( \
28-
p, &expected, desired, memory_order_success, memory_order_failure, \
29-
atomic_scope); \
30-
\
31-
return success ? original_val : *p; \
26+
__hip_atomic_compare_exchange_strong(p, &expected, desired, \
27+
memory_order_success, \
28+
memory_order_failure, atomic_scope); \
29+
/* If cmpxchg \
30+
* succeeds: \
31+
- `expected` is unchanged, holding the old val that was at `p` \
32+
- `p` is changed to hold `desired` \
33+
* fails: \
34+
- `expected` is changed to hold the current val at `p` \
35+
- `p` is unchanged*/ \
36+
return expected; \
3237
}
3338

3439
#define AMDGPU_ATOMIC_CMPXCHG(TYPE, TYPE_MANGLED) \
@@ -37,7 +42,7 @@
3742
AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, , , 0, 4)
3843

3944
AMDGPU_ATOMIC_CMPXCHG(int, i)
40-
AMDGPU_ATOMIC_CMPXCHG(unsigned int, j)
45+
AMDGPU_ATOMIC_CMPXCHG(unsigned, j)
4146
AMDGPU_ATOMIC_CMPXCHG(long, l)
4247
AMDGPU_ATOMIC_CMPXCHG(unsigned long, m)
4348
AMDGPU_ATOMIC_CMPXCHG(float, f)

libclc/amdgcn-amdhsa/libspirv/atomic/atomic_helpers.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,25 @@
7171
AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, BUILTIN) \
7272
AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, BUILTIN) \
7373
AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN)
74+
75+
#define AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \
76+
SUB1, OP) \
77+
_CLC_DEF TYPE \
78+
FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \
79+
volatile AS TYPE *p, enum Scope scope, \
80+
enum MemorySemanticsMask semantics, TYPE val) { \
81+
int atomic_scope = 0, memory_order = 0; \
82+
GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \
83+
TYPE oldval = __hip_atomic_load(p, memory_order, atomic_scope); \
84+
TYPE newval = 0; \
85+
do { \
86+
newval = oldval OP val; \
87+
} while (!__hip_atomic_compare_exchange_strong( \
88+
p, &oldval, newval, atomic_scope, atomic_scope, memory_order)); \
89+
return oldval; \
90+
}
91+
92+
#define AMDGPU_CAS_ATOMIC(FUNC_NAME, TYPE, TYPE_MANGLED, OP) \
93+
AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, OP) \
94+
AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, OP) \
95+
AMDGPU_CAS_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, OP)

libclc/amdgcn-amdhsa/libspirv/atomic/atomic_xor.cl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,14 @@
1010
#include <spirv/spirv.h>
1111
#include <spirv/spirv_types.h>
1212

13-
AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, int, i, __hip_atomic_fetch_xor)
14-
AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, unsigned int, j, __hip_atomic_fetch_xor)
15-
AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, long, l, __hip_atomic_fetch_xor)
16-
AMDGPU_ATOMIC(_Z17__spirv_AtomicXor, unsigned long, m, __hip_atomic_fetch_xor)
13+
#define __CLC_XOR ^
1714

15+
AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, int, i, __CLC_XOR)
16+
AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, unsigned int, j, __CLC_XOR)
17+
AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, long, l, __CLC_XOR)
18+
AMDGPU_CAS_ATOMIC(_Z17__spirv_AtomicXor, unsigned long, m, __CLC_XOR)
19+
20+
#undef __CLC_XOR
1821
#undef AMDGPU_ATOMIC
1922
#undef AMDGPU_ATOMIC_IMPL
2023
#undef AMDGPU_ARCH_GEQ

sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,11 @@ class depends_on {
336336
depends_on(NodeTN... nodes);
337337
};
338338
339+
class depends_on_all_leaves {
340+
public:
341+
depends_on_all_leaves() = default;
342+
};
343+
339344
} // namespace node
340345
} // namespace property
341346
@@ -481,6 +486,21 @@ class depends_on {
481486
}
482487
----
483488

489+
==== Depends-On-All-Leaves Property
490+
The API for explicitly adding nodes to a `command_graph` includes a
491+
`property_list` parameter. This extension defines the `depends_on_all_leaves`
492+
property to be passed here. `depends_on_all_leaves` provides a shortcut for
493+
adding all the current leaves of a graph as dependencies.
494+
[source,c++]
495+
----
496+
namespace sycl::ext::oneapi::experimental::property::node {
497+
class depends_on_all_leaves {
498+
public:
499+
depends_on_all_leaves();
500+
};
501+
}
502+
----
503+
484504
=== Graph
485505

486506
This extension adds a new `command_graph` object which follows the
@@ -737,11 +757,11 @@ print_graph(std::string path, bool verbose = false) const;
737757
----
738758

739759
|Synchronous operation that writes a DOT formatted description of the graph to the
740-
provided path. By default, this includes the graph topology, node types, node id,
741-
and kernel names.
742-
Verbose can be set to true to write more detailed information about each node type
760+
provided path. By default, this includes the graph topology, node types, node id,
761+
and kernel names.
762+
Verbose can be set to true to write more detailed information about each node type
743763
such as kernel arguments, copy source, and destination addresses.
744-
At the moment DOT format is the only supported format. The name of hte output file
764+
At the moment DOT format is the only supported format. The name of the output file
745765
must therefore match this extension, i.e. "<filename>.dot".
746766

747767
Parameters:
@@ -752,7 +772,7 @@ or memory access where applicable.
752772

753773
Exceptions:
754774

755-
* Throws synchronously with error code `invalid` if the path is invalid or
775+
* Throws synchronously with error code `invalid` if the path is invalid or
756776
the file extension is not supported or if the write operation failed.
757777

758778
|===
@@ -1200,11 +1220,12 @@ passed an invalid event.
12001220

12011221
The new handler methods, and queue shortcuts, defined by
12021222
link:../supported/sycl_ext_oneapi_enqueue_barrier.asciidoc[sycl_ext_oneapi_enqueue_barrier]
1203-
cannot be used in graph nodes. A synchronous exception will be thrown with
1204-
error code `invalid` if a user tries to add them to a graph.
1205-
1206-
Removing this restriction is something we may look at for future revisions of
1207-
`sycl_ext_oneapi_graph`.
1223+
can only be used in graph nodes created using the Record & Replay API, as
1224+
barriers rely on events to enforce dependencies. A synchronous exception will be
1225+
thrown with error code `invalid` if a user tries to add them to a graph using
1226+
the Explicit API. Empty nodes created with the `node::depends_on_all_leaves`
1227+
property can be used instead of barriers when a user is building a graph with
1228+
the explicit API.
12081229

12091230
==== sycl_ext_oneapi_memcpy2d
12101231

@@ -1773,12 +1794,14 @@ if used in application code.
17731794
. Using `handler::memset` in a graph node.
17741795
. Using `handler::prefetch` in a graph node.
17751796
. Using `handler::memadvise` in a graph node.
1776-
. Using specialization constants in a graph node.
17771797
. Using reductions in a graph node.
17781798
. Using sycl streams in a graph node.
1779-
. Using a kernel bundle in a graph node.
17801799
. Profiling an event returned from graph submission with
17811800
`event::get_profiling_info()`.
1801+
. Level Zero immediate command-lists are not supported, and
1802+
`sycl::ext::intel::property::queue::no_immediate_command_list`
1803+
should be set on construction to any queues an executable
1804+
graph is submitted to.
17821805

17831806
== Revision History
17841807

sycl/include/sycl/detail/property_helper.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,9 @@ enum DataLessPropKind {
4646
QueueSubmissionImmediate = 21,
4747
GraphAssumeDataOutlivesBuffer = 22,
4848
GraphAssumeBufferOutlivesGraph = 23,
49+
GraphDependOnAllLeaves = 24,
4950
// Indicates the last known dataless property.
50-
LastKnownDataLessPropKind = 23,
51+
LastKnownDataLessPropKind = 24,
5152
// Exceeding 32 may cause ABI breaking change on some of OSes.
5253
DataLessPropKindSize = 32
5354
};

sycl/include/sycl/ext/oneapi/experimental/graph.hpp

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,14 @@ class depends_on : public ::sycl::detail::PropertyWithData<
137137
const std::vector<::sycl::ext::oneapi::experimental::node> MDeps;
138138
};
139139

140+
/// Property used to to add all previous graph leaves as dependencies when
141+
/// creating a new node with command_graph::add().
142+
class depends_on_all_leaves : public ::sycl::detail::DataLessProperty<
143+
::sycl::detail::GraphDependOnAllLeaves> {
144+
public:
145+
depends_on_all_leaves() = default;
146+
};
147+
140148
} // namespace node
141149
} // namespace property
142150

@@ -159,9 +167,17 @@ class __SYCL_EXPORT modifiable_command_graph {
159167
node add(const property_list &PropList = {}) {
160168
if (PropList.has_property<property::node::depends_on>()) {
161169
auto Deps = PropList.get_property<property::node::depends_on>();
162-
return addImpl(Deps.get_dependencies());
170+
node Node = addImpl(Deps.get_dependencies());
171+
if (PropList.has_property<property::node::depends_on_all_leaves>()) {
172+
addGraphLeafDependencies(Node);
173+
}
174+
return Node;
163175
}
164-
return addImpl({});
176+
node Node = addImpl({});
177+
if (PropList.has_property<property::node::depends_on_all_leaves>()) {
178+
addGraphLeafDependencies(Node);
179+
}
180+
return Node;
165181
}
166182

167183
/// Add a command-group node to the graph.
@@ -171,9 +187,17 @@ class __SYCL_EXPORT modifiable_command_graph {
171187
template <typename T> node add(T CGF, const property_list &PropList = {}) {
172188
if (PropList.has_property<property::node::depends_on>()) {
173189
auto Deps = PropList.get_property<property::node::depends_on>();
174-
return addImpl(CGF, Deps.get_dependencies());
190+
node Node = addImpl(CGF, Deps.get_dependencies());
191+
if (PropList.has_property<property::node::depends_on_all_leaves>()) {
192+
addGraphLeafDependencies(Node);
193+
}
194+
return Node;
195+
}
196+
node Node = addImpl(CGF, {});
197+
if (PropList.has_property<property::node::depends_on_all_leaves>()) {
198+
addGraphLeafDependencies(Node);
175199
}
176-
return addImpl(CGF, {});
200+
return Node;
177201
}
178202

179203
/// Add a dependency between two nodes.
@@ -247,6 +271,11 @@ class __SYCL_EXPORT modifiable_command_graph {
247271
/// @return Node added to the graph.
248272
node addImpl(const std::vector<node> &Dep);
249273

274+
/// Adds all graph leaves as dependencies
275+
/// @param Node Destination node to which the leaves of the graph will be
276+
/// added as dependencies.
277+
void addGraphLeafDependencies(node Node);
278+
250279
template <class Obj>
251280
friend decltype(Obj::impl)
252281
sycl::detail::getSyclObjImpl(const Obj &SyclObject);

sycl/plugins/cuda/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,5 +63,10 @@ if (SYCL_ENABLE_XPTI_TRACING)
6363
)
6464
endif()
6565

66+
if(CUDA_cupti_LIBRARY)
67+
target_compile_definitions(pi_cuda PRIVATE
68+
"-DCUPTI_LIB_PATH=\"${CUDA_cupti_LIBRARY}\"")
69+
endif()
70+
6671
set_target_properties(pi_cuda PROPERTIES LINKER_LANGUAGE CXX)
6772

sycl/source/detail/graph_impl.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,19 @@ node modifiable_command_graph::addImpl(std::function<void(handler &)> CGF,
726726
return sycl::detail::createSyclObjFromImpl<node>(NodeImpl);
727727
}
728728

729+
void modifiable_command_graph::addGraphLeafDependencies(node Node) {
730+
// Find all exit nodes in the current graph and add them to the dependency
731+
// vector
732+
std::shared_ptr<detail::node_impl> DstImpl =
733+
sycl::detail::getSyclObjImpl(Node);
734+
graph_impl::WriteLock Lock(impl->MMutex);
735+
for (auto &NodeImpl : impl->MNodeStorage) {
736+
if ((NodeImpl->MSuccessors.size() == 0) && (NodeImpl != DstImpl)) {
737+
impl->makeEdge(NodeImpl, DstImpl);
738+
}
739+
}
740+
}
741+
729742
void modifiable_command_graph::make_edge(node &Src, node &Dest) {
730743
std::shared_ptr<detail::node_impl> SenderImpl =
731744
sycl::detail::getSyclObjImpl(Src);

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3734,6 +3734,7 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph7addImplES
37343734
_ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph9make_edgeERNS3_4nodeES7_
37353735
_ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC1ERKNS0_7contextERKNS0_6deviceERKNS0_13property_listE
37363736
_ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC2ERKNS0_7contextERKNS0_6deviceERKNS0_13property_listE
3737+
_ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph24addGraphLeafDependenciesENS3_4nodeE
37373738
_ZNK4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph11print_graphENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEb
37383739
_ZN4sycl3_V13ext6oneapi12experimental9image_memC1ERKNS3_16image_descriptorERKNS0_5queueE
37393740
_ZN4sycl3_V13ext6oneapi12experimental9image_memC1ERKNS3_16image_descriptorERKNS0_6deviceERKNS0_7contextE

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,7 @@
873873
?add@device_global_map@detail@_V1@sycl@@YAXPEBXPEBD@Z
874874
?add@host_pipe_map@detail@_V1@sycl@@YAXPEBXPEBD@Z
875875
?add@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA?AVnode@34567@AEBVproperty_list@67@@Z
876+
?addGraphLeafDependencies@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXVnode@34567@@Z
876877
?addHostAccessorAndWait@detail@_V1@sycl@@YAXPEAVAccessorImplHost@123@@Z
877878
?addHostSampledImageAccessorAndWait@detail@_V1@sycl@@YAXPEAVSampledImageAccessorImplHost@123@@Z
878879
?addHostUnsampledImageAccessorAndWait@detail@_V1@sycl@@YAXPEAVUnsampledImageAccessorImplHost@123@@Z

0 commit comments

Comments
 (0)