Skip to content

Commit 95a858d

Browse files
fabiomestreEwanCBensuoreble
authored
[SYCL][Graph] Add specification for kernel binary updates (#14896)
Adds the kernel binary update feature to the sycl graph specification. This introduces a new dynamic_command_group class which can be used to update the command-group function of a kernel nodes in graphs. Implemented in: * #16154 * #15700 --------- Co-authored-by: Ewan Crawford <[email protected]> Co-authored-by: Ben Tracy <[email protected]> Co-authored-by: Pablo Reble <[email protected]>
1 parent 6347914 commit 95a858d

24 files changed

+493
-80
lines changed

sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc

Lines changed: 265 additions & 37 deletions
Large diffs are not rendered by default.

sycl/doc/syclgraph/SYCLGraphUsageGuide.md

Lines changed: 120 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -394,12 +394,12 @@ sycl_ext::command_graph myGraph(myContext, myDevice);
394394
395395
int myScalar = 42;
396396
// Create graph dynamic parameters
397-
dynamic_parameter dynParamInput(myGraph, ptrX);
398-
dynamic_parameter dynParamScalar(myGraph, myScalar);
397+
sycl_ext::dynamic_parameter dynParamInput(myGraph, ptrX);
398+
sycl_ext::dynamic_parameter dynParamScalar(myGraph, myScalar);
399399
400400
// The node uses ptrX as an input & output parameter, with operand
401401
// mySclar as another argument.
402-
node kernelNode = myGraph.add([&](handler& cgh) {
402+
sycl_ext::node kernelNode = myGraph.add([&](handler& cgh) {
403403
cgh.set_args(dynParamInput, ptrY, dynParamScalar);
404404
cgh.parallel_for(range {n}, builtinKernel);
405405
});
@@ -438,9 +438,9 @@ sycl::buffer bufferB{...};
438438

439439
// Create graph dynamic parameter using a placeholder accessor, since the
440440
// sycl::handler is not available here outside of the command-group scope.
441-
dynamic_parameter dynParamAccessor(myGraph, bufferA.get_access());
441+
sycl_ext::dynamic_parameter dynParamAccessor(myGraph, bufferA.get_access());
442442

443-
node kernelNode = myGraph.add([&](handler& cgh) {
443+
sycl_ext::node kernelNode = myGraph.add([&](handler& cgh) {
444444
// Require the accessor contained in the dynamic paramter
445445
cgh.require(dynParamAccessor);
446446
// Set the arg on the kernel using the dynamic parameter directly
@@ -453,6 +453,121 @@ node kernelNode = myGraph.add([&](handler& cgh) {
453453
dynParamAccessor.update(bufferB.get_access());
454454
```
455455

456+
### Dynamic Command Groups
457+
458+
Example showing how a graph with a dynamic command group node can be updated.
459+
460+
```cpp
461+
...
462+
using namespace sycl;
463+
namespace sycl_ext = sycl::ext::oneapi::experimental;
464+
465+
queue Queue{};
466+
sycl_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
467+
468+
int *PtrA = malloc_device<int>(1024, Queue);
469+
int *PtrB = malloc_device<int>(1024, Queue);
470+
471+
auto CgfA = [&](handler &cgh) {
472+
cgh.parallel_for(1024, [=](item<1> Item) {
473+
PtrA[Item.get_id()] = 1;
474+
});
475+
};
476+
477+
auto CgfB = [&](handler &cgh) {
478+
cgh.parallel_for(512, [=](item<1> Item) {
479+
PtrB[Item.get_id()] = 2;
480+
});
481+
};
482+
483+
// Construct a dynamic command-group with CgfA as the active cgf (index 0).
484+
auto DynamicCG = sycl_ext::dynamic_command_group(Graph, {CgfA, CgfB});
485+
486+
// Create a dynamic command-group graph node.
487+
auto DynamicCGNode = Graph.add(DynamicCG);
488+
489+
auto ExecGraph = Graph.finalize(sycl_ext::property::graph::updatable{});
490+
491+
// The graph will execute CgfA.
492+
Queue.ext_oneapi_graph(ExecGraph).wait();
493+
494+
// Sets CgfB as active in the dynamic command-group (index 1).
495+
DynamicCG.set_active_index(1);
496+
497+
// Calls update to update the executable graph node with the changes to DynamicCG.
498+
ExecGraph.update(DynamicCGNode);
499+
500+
// The graph will execute CgfB.
501+
Queue.ext_oneapi_graph(ExecGraph).wait();
502+
```
503+
504+
### Dynamic Command Groups With Dynamic Parameters
505+
506+
Example showing how a graph with a dynamic command group that uses dynamic
507+
parameters in a node can be updated.
508+
509+
```cpp
510+
...
511+
using namespace sycl;
512+
namespace sycl_ext = sycl::ext::oneapi::experimental;
513+
514+
size_t N = 1024;
515+
queue Queue{};
516+
auto MyContext = Queue.get_context();
517+
auto MyDevice = Queue.get_device();
518+
sycl_ext::command_graph Graph{MyContext, MyDevice};
519+
520+
int *PtrA = malloc_device<int>(N, Queue);
521+
int *PtrB = malloc_device<int>(N, Queue);
522+
523+
// Kernels loaded from kernel bundle
524+
const std::vector<kernel_id> BuiltinKernelIds =
525+
MyDevice.get_info<info::device::built_in_kernel_ids>();
526+
kernel_bundle<bundle_state::executable> MyBundle =
527+
get_kernel_bundle<sycl::bundle_state::executable>(MyContext, { MyDevice }, BuiltinKernelIds);
528+
529+
kernel BuiltinKernelA = MyBundle.get_kernel(BuiltinKernelIds[0]);
530+
kernel BuiltinKernelB = MyBundle.get_kernel(BuiltinKernelIds[1]);
531+
532+
// Create a dynamic parameter with an initial value of PtrA
533+
sycl_ext::dynamic_parameter DynamicPointerArg{Graph, PtrA};
534+
535+
// Create command groups for both kernels which use DynamicPointerArg
536+
auto CgfA = [&](handler &cgh) {
537+
cgh.set_arg(0, DynamicPointerArg);
538+
cgh.parallel_for(range {N}, BuiltinKernelA);
539+
};
540+
541+
auto CgfB = [&](handler &cgh) {
542+
cgh.set_arg(0, DynamicPointerArg);
543+
cgh.parallel_for(range {N / 2}, BuiltinKernelB);
544+
};
545+
546+
// Construct a dynamic command-group with CgfA as the active cgf (index 0).
547+
auto DynamicCG = sycl_ext::dynamic_command_group(Graph, {CgfA, CgfB});
548+
549+
// Create a dynamic command-group graph node.
550+
auto DynamicCGNode = Graph.add(DynamicCG);
551+
552+
auto ExecGraph = Graph.finalize(sycl_ext::property::graph::updatable{});
553+
554+
// The graph will execute CgfA with PtrA.
555+
Queue.ext_oneapi_graph(ExecGraph).wait();
556+
557+
//Update DynamicPointerArg with a new value
558+
DynamicPointerArg.update(PtrB);
559+
560+
// Sets CgfB as active in the dynamic command-group (index 1).
561+
DynamicCG.set_active_index(1);
562+
563+
// Calls update to update the executable graph node with the changes to
564+
// DynamicCG and DynamicPointerArg.
565+
ExecGraph.update(DynamicCGNode);
566+
567+
// The graph will execute CgfB with PtrB.
568+
Queue.ext_oneapi_graph(ExecGraph).wait();
569+
```
570+
456571
### Whole Graph Update
457572

458573
Example that shows recording and updating several nodes with different

sycl/include/sycl/ext/oneapi/experimental/graph.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,8 @@ class __SYCL_EXPORT dynamic_command_group {
226226
const command_graph<graph_state::modifiable> &Graph,
227227
const std::vector<std::function<void(handler &)>> &CGFList);
228228

229-
size_t get_active_cgf() const;
230-
void set_active_cgf(size_t Index);
229+
size_t get_active_index() const;
230+
void set_active_index(size_t Index);
231231

232232
private:
233233
template <class Obj>

sycl/source/detail/graph_impl.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2078,10 +2078,10 @@ dynamic_command_group::dynamic_command_group(
20782078
impl->finalizeCGFList(CGFList);
20792079
}
20802080

2081-
size_t dynamic_command_group::get_active_cgf() const {
2081+
size_t dynamic_command_group::get_active_index() const {
20822082
return impl->getActiveIndex();
20832083
}
2084-
void dynamic_command_group::set_active_cgf(size_t Index) {
2084+
void dynamic_command_group::set_active_index(size_t Index) {
20852085
return impl->setActiveIndex(Index);
20862086
}
20872087
} // namespace experimental

sycl/test-e2e/Graph/Update/dyn_cgf_accessor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ int main() {
4545
assert(HostData[i] == PatternA);
4646
}
4747

48-
DynamicCG.set_active_cgf(1);
48+
DynamicCG.set_active_index(1);
4949
ExecGraph.update(DynamicCGNode);
5050
Queue.ext_oneapi_graph(ExecGraph).wait();
5151

sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ int main() {
6060
assert(HostData[i] == Ref);
6161
}
6262

63-
DynamicCG.set_active_cgf(1);
63+
DynamicCG.set_active_index(1);
6464
ExecGraph.update(DynamicCGNode);
6565

6666
Queue.ext_oneapi_graph(ExecGraph).wait();

sycl/test-e2e/Graph/Update/dyn_cgf_accessor_deps2.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ int main() {
6969
assert(HostData[i] == (InitA + InitB + PatternA));
7070
}
7171

72-
DynamicCG.set_active_cgf(1);
72+
DynamicCG.set_active_index(1);
7373
ExecGraph.update(DynamicCGNode);
7474

7575
Queue.ext_oneapi_graph(ExecGraph).wait();

sycl/test-e2e/Graph/Update/dyn_cgf_accessor_spv.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ int main(int, char **argv) {
7373
assert(check_value(i, 0, HostDataB[i], "HostDataB"));
7474
}
7575

76-
DynamicCG.set_active_cgf(1);
76+
DynamicCG.set_active_index(1);
7777
ExecGraph.update(DynamicCGNode);
7878

7979
Queue.ext_oneapi_graph(ExecGraph).wait();

sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ int main() {
9090
// CHECK-SAME: .argIndex = 0
9191
// CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC
9292
// CHECK-SAME: .argIndex = 1
93-
DynamicCG.set_active_cgf(1);
93+
DynamicCG.set_active_index(1);
9494
ExecGraph.update(DynamicCGNode);
9595
Queue.ext_oneapi_graph(ExecGraph).wait();
9696
Queue.copy(Ptr, HostData.data(), Size).wait();
@@ -107,7 +107,7 @@ int main() {
107107
// CHECK-SAME: .numNewValueArgs = 0
108108
// CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC
109109
// CHECK-SAME: .argIndex = 0
110-
DynamicCG.set_active_cgf(2);
110+
DynamicCG.set_active_index(2);
111111
ExecGraph.update(DynamicCGNode);
112112
Queue.ext_oneapi_graph(ExecGraph).wait();
113113
Queue.copy(Ptr, HostData.data(), Size).wait();
@@ -130,7 +130,7 @@ int main() {
130130
// CHECK-SAME: .argIndex = 2
131131
// CHECK-SAME: .stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC
132132
// CHECK-SAME: .argIndex = 3
133-
DynamicCG.set_active_cgf(3);
133+
DynamicCG.set_active_index(3);
134134
ExecGraph.update(DynamicCGNode);
135135
Queue.ext_oneapi_graph(ExecGraph).wait();
136136
Queue.copy(Ptr, HostData.data(), Size).wait();

sycl/test-e2e/Graph/Update/dyn_cgf_event_deps.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ int main() {
5252
assert(HostData[i] == PatternA * PatternB);
5353
}
5454

55-
DynamicCG.set_active_cgf(1);
55+
DynamicCG.set_active_index(1);
5656
ExecGraph.update(DynamicCGNode);
5757

5858
Queue.ext_oneapi_graph(ExecGraph).wait();
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
// Tests the `get_active_index()` query
9+
10+
#include "../graph_common.hpp"
11+
12+
int main() {
13+
queue Queue{};
14+
exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
15+
16+
int *Ptr = malloc_device<int>(Size, Queue);
17+
std::vector<int> HostData(Size);
18+
19+
int PatternA = 42;
20+
auto CGFA = [&](handler &CGH) {
21+
CGH.parallel_for(Size,
22+
[=](item<1> Item) { Ptr[Item.get_id()] = PatternA; });
23+
};
24+
25+
int PatternB = 0xA;
26+
auto CGFB = [&](handler &CGH) {
27+
CGH.parallel_for(Size,
28+
[=](item<1> Item) { Ptr[Item.get_id()] = PatternB; });
29+
};
30+
31+
auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB});
32+
size_t ActiveIndex = DynamicCG.get_active_index();
33+
assert(0 == ActiveIndex); // Active index is zero by default
34+
35+
// Set active index to 1 before adding node to graph
36+
DynamicCG.set_active_index(1);
37+
ActiveIndex = DynamicCG.get_active_index();
38+
assert(1 == ActiveIndex);
39+
40+
auto DynamicCGNode = Graph.add(DynamicCG);
41+
42+
// Set active index to 0 before finalizing the graph
43+
DynamicCG.set_active_index(0);
44+
ActiveIndex = DynamicCG.get_active_index();
45+
assert(0 == ActiveIndex);
46+
auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
47+
48+
Queue.ext_oneapi_graph(ExecGraph).wait();
49+
Queue.copy(Ptr, HostData.data(), Size).wait();
50+
for (size_t i = 0; i < Size; i++) {
51+
assert(HostData[i] == PatternA);
52+
}
53+
54+
// Set active index to 1 before updating the graph
55+
DynamicCG.set_active_index(1);
56+
ActiveIndex = DynamicCG.get_active_index();
57+
assert(1 == ActiveIndex);
58+
59+
ExecGraph.update(DynamicCGNode);
60+
61+
Queue.ext_oneapi_graph(ExecGraph).wait();
62+
Queue.copy(Ptr, HostData.data(), Size).wait();
63+
for (size_t i = 0; i < Size; i++) {
64+
assert(HostData[i] == PatternB);
65+
}
66+
67+
sycl::free(Ptr, Queue);
68+
69+
return 0;
70+
}

sycl/test-e2e/Graph/Update/dyn_cgf_ndrange.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ int main() {
5151
}
5252
}
5353

54-
DynamicCG.set_active_cgf(1);
54+
DynamicCG.set_active_index(1);
5555
ExecGraph.update(DynamicCGNode);
5656

5757
Queue.ext_oneapi_graph(ExecGraph).wait();

sycl/test-e2e/Graph/Update/dyn_cgf_ndrange_3D.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ int main() {
5454
assert(HostData[i] == PatternA);
5555
}
5656

57-
DynamicCG.set_active_cgf(1);
57+
DynamicCG.set_active_index(1);
5858
ExecGraph.update(DynamicCGNode);
5959

6060
Queue.ext_oneapi_graph(ExecGraph).wait();
@@ -63,7 +63,7 @@ int main() {
6363
assert(HostData[i] == PatternB);
6464
}
6565

66-
DynamicCG.set_active_cgf(2);
66+
DynamicCG.set_active_index(2);
6767
ExecGraph.update(DynamicCGNode);
6868

6969
Queue.ext_oneapi_graph(ExecGraph).wait();

sycl/test-e2e/Graph/Update/dyn_cgf_overwrite_range.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ int main() {
3636
sycl::range<1> UpdateRange(NewRange);
3737
DynamicCGNode.update_range(UpdateRange);
3838

39-
DynamicCG.set_active_cgf(1);
39+
DynamicCG.set_active_index(1);
4040

4141
// Check that the UpdateRange from active CGF 0 is preserved
42-
DynamicCG.set_active_cgf(0);
42+
DynamicCG.set_active_index(0);
4343
auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
4444

4545
Queue.ext_oneapi_graph(ExecGraph).wait();

sycl/test-e2e/Graph/Update/dyn_cgf_parameters.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ int main() {
4949
assert(HostDataB[i] == 0);
5050
}
5151

52-
DynamicCG.set_active_cgf(1);
52+
DynamicCG.set_active_index(1);
5353
ExecGraph.update(DynamicCGNode);
5454

5555
Queue.ext_oneapi_graph(ExecGraph).wait();

sycl/test-e2e/Graph/Update/dyn_cgf_shared_nodes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ int main() {
5555
assert(HostData[i] == Ref);
5656
}
5757

58-
DynamicCG.set_active_cgf(1);
58+
DynamicCG.set_active_index(1);
5959
ExecGraph.update(Node1);
6060
ExecGraph.update(Node3);
6161

sycl/test-e2e/Graph/Update/dyn_cgf_update_before_finalize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ int main() {
3131

3232
auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB});
3333
auto DynamicCGNode = Graph.add(DynamicCG);
34-
DynamicCG.set_active_cgf(1);
34+
DynamicCG.set_active_index(1);
3535
auto ExecGraph = Graph.finalize();
3636

3737
Queue.ext_oneapi_graph(ExecGraph).wait();

sycl/test-e2e/Graph/Update/dyn_cgf_usm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ int main() {
3838
assert(HostData[i] == PatternA);
3939
}
4040

41-
DynamicCG.set_active_cgf(1);
41+
DynamicCG.set_active_index(1);
4242
ExecGraph.update(DynamicCGNode);
4343

4444
Queue.ext_oneapi_graph(ExecGraph).wait();

0 commit comments

Comments
 (0)