Skip to content

Commit 91087b9

Browse files
[SYCL][Graph] Fix memset queue shortcut when queue is recorded (#12508)
Memset queue shortcut `queue::memset()` manages the memset direclty from the host (without going through the normal path, i.e. the handler). We added a specific case when the queue is recorded to use the normal path instead of the optimized path.
1 parent c872cad commit 91087b9

File tree

3 files changed

+55
-38
lines changed

3 files changed

+55
-38
lines changed

sycl/source/detail/queue_impl.cpp

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,6 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
118118
// Emit a begin/end scope for this call
119119
PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
120120
#endif
121-
if (MGraph.lock()) {
122-
throw sycl::exception(make_error_code(errc::invalid),
123-
"The memset feature is not yet available "
124-
"for use with the SYCL Graph extension.");
125-
}
126121

127122
return submitMemOpHelper(
128123
Self, DepEvents, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); },
@@ -169,19 +164,14 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
169164
// Emit a begin/end scope for this call
170165
PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
171166
#endif
172-
// If we have a command graph set we need to capture the copy through normal
173-
// queue submission rather than execute the copy directly.
174-
auto HandlerFunc = [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); };
175-
if (MGraph.lock())
176-
return submitWithHandler(Self, DepEvents, HandlerFunc);
177167

178168
if ((!Src || !Dest) && Count != 0) {
179169
report(CodeLoc);
180170
throw runtime_error("NULL pointer argument in memory copy operation.",
181171
PI_ERROR_INVALID_VALUE);
182172
}
183173
return submitMemOpHelper(
184-
Self, DepEvents, HandlerFunc,
174+
Self, DepEvents, [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); },
185175
[](const auto &...Args) { MemoryManager::copy_usm(Args...); }, Src, Self,
186176
Count, Dest);
187177
}
@@ -190,14 +180,9 @@ event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
190180
const void *Ptr, size_t Length,
191181
pi_mem_advice Advice,
192182
const std::vector<event> &DepEvents) {
193-
// If we have a command graph set we need to capture the advise through normal
194-
// queue submission.
195-
auto HandlerFunc = [&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); };
196-
if (MGraph.lock())
197-
return submitWithHandler(Self, DepEvents, HandlerFunc);
198-
199183
return submitMemOpHelper(
200-
Self, DepEvents, HandlerFunc,
184+
Self, DepEvents,
185+
[&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); },
201186
[](const auto &...Args) { MemoryManager::advise_usm(Args...); }, Ptr,
202187
Self, Length, Advice);
203188
}
@@ -353,7 +338,10 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
353338
const std::vector<event> &ExpandedDepEvents =
354339
getExtendDependencyList(DepEvents, MutableDepEvents, Lock);
355340

356-
if (areEventsSafeForSchedulerBypass(ExpandedDepEvents, MContext)) {
341+
// If we have a command graph set we need to capture the op through the
342+
// handler rather than by-passing the scheduler.
343+
if (!MGraph.lock() &&
344+
areEventsSafeForSchedulerBypass(ExpandedDepEvents, MContext)) {
357345
if (MSupportsDiscardingPiEvents) {
358346
MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents),
359347
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
//
6+
// Tests adding a USM memset queue shortcut operation as a graph node.
7+
8+
#include "../graph_common.hpp"
9+
10+
int main() {
11+
12+
queue Queue;
13+
14+
if (!are_graphs_supported(Queue)) {
15+
return 0;
16+
}
17+
18+
exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
19+
20+
const size_t N = 10;
21+
unsigned char *Arr = malloc_device<unsigned char>(N, Queue);
22+
23+
int Value = 77;
24+
Graph.begin_recording(Queue);
25+
auto Init = Queue.memset(Arr, Value, N);
26+
Queue.submit([&](handler &CGH) {
27+
CGH.depends_on(Init);
28+
CGH.single_task<class double_dest>([=]() {
29+
for (int i = 0; i < Size; i++)
30+
Arr[i] = 2 * Arr[i];
31+
});
32+
});
33+
34+
Graph.end_recording();
35+
36+
auto ExecGraph = Graph.finalize();
37+
38+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }).wait();
39+
40+
std::vector<unsigned char> Output(N);
41+
Queue.memcpy(Output.data(), Arr, N).wait();
42+
for (int i = 0; i < N; i++)
43+
assert(Output[i] == (Value * 2));
44+
45+
sycl::free(Arr, Queue);
46+
47+
return 0;
48+
}

sycl/unittests/Extensions/CommandGraph.cpp

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1848,25 +1848,6 @@ TEST_F(CommandGraphTest, FusionExtensionExceptionCheck) {
18481848
ASSERT_EQ(ExceptionCode, sycl::errc::invalid);
18491849
}
18501850

1851-
TEST_F(CommandGraphTest, USMMemsetShortcutExceptionCheck) {
1852-
1853-
const size_t N = 10;
1854-
unsigned char *Arr = malloc_device<unsigned char>(N, Queue);
1855-
int Value = 77;
1856-
1857-
Graph.begin_recording(Queue);
1858-
1859-
std::error_code ExceptionCode = make_error_code(sycl::errc::success);
1860-
try {
1861-
Queue.memset(Arr, Value, N);
1862-
} catch (exception &Exception) {
1863-
ExceptionCode = Exception.code();
1864-
}
1865-
ASSERT_EQ(ExceptionCode, sycl::errc::invalid);
1866-
1867-
Graph.end_recording(Queue);
1868-
}
1869-
18701851
TEST_F(CommandGraphTest, Memcpy2DExceptionCheck) {
18711852
constexpr size_t RECT_WIDTH = 30;
18721853
constexpr size_t RECT_HEIGHT = 21;

0 commit comments

Comments
 (0)