Skip to content

Commit b146027

Browse files
committed
[SYCL][Doc][RTC] Add device global queries
Signed-off-by: Julian Oppermann <[email protected]>
1 parent 0e9d697 commit b146027

File tree

1 file changed

+173
-0
lines changed

1 file changed

+173
-0
lines changed

sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ This extension also depends on the following other SYCL extensions:
4848
sycl_ext_oneapi_properties]
4949
* link:../proposed/sycl_ext_oneapi_free_function_kernels.asciidoc[
5050
sycl_ext_oneapi_free_function_kernels]
51+
* link:../experimental/sycl_ext_oneapi_device_global.asciidoc[
52+
sycl_ext_oneapi_device_global]
5153

5254

5355
== Status
@@ -572,6 +574,8 @@ class kernel_bundle {
572574
bool ext_oneapi_has_kernel(const std::string &name);
573575
kernel ext_oneapi_get_kernel(const std::string &name);
574576
std::string ext_oneapi_get_raw_kernel_name(const std::string &name);
577+
578+
// Continued below in "New kernel bundle member functions for device globals"
575579
};
576580
577581
} // namespace sycl
@@ -800,6 +804,109 @@ sycl::kernel k_float = kb.ext_oneapi_get_kernel("bartmpl<float>");
800804
sycl::kernel k_int = kb.ext_oneapi_get_kernel("bartmpl<int>");
801805
----
802806

807+
=== New kernel bundle member functions for device globals
808+
809+
This extensions adds the following new `kernel_bundle` member functions to let
810+
the host application interact with device globals defined in runtime-compiled
811+
code. Device globals are only supported for the `source_language::sycl`
812+
language.
813+
814+
Device globals declared with the `device_image_scope` property can be used in
815+
the runtime-compiled device code, but cannot be accessed from the host. We plan
816+
to lift this limitation in a future version of the extension.
817+
818+
[source,c++]
819+
----
820+
namespace sycl {
821+
822+
template <bundle_state State>
823+
class kernel_bundle {
824+
// Continued from "New kernel bundle member functions"
825+
826+
bool ext_oneapi_has_device_global(const std::string &name, const device &dev);
827+
void *ext_oneapi_get_device_global_address(const std::string &name,
828+
const device &dev);
829+
size_t ext_oneapi_get_device_global_size(const std::string &name,
830+
const device &dev);
831+
};
832+
833+
} // namespace sycl
834+
----
835+
836+
|====
837+
a|
838+
[frame=all,grid=none]
839+
!====
840+
a!
841+
[source,c++]
842+
----
843+
bool ext_oneapi_has_device_global(const std::string &name, const device &dev)
844+
----
845+
!====
846+
847+
_Constraints:_ This function is not available when `State` is
848+
`bundle_state::ext_oneapi_source`.
849+
850+
_Returns:_ The value `true` only if
851+
852+
* the kernel bundle was created from a bundle of state
853+
`bundle_state::ext_oneapi_source` in the language `source_language::sycl`, and
854+
* it defines a device global whose name is `name` and which was declared without
855+
the `device_image_scope` property, and
856+
* `dev` is contained by the context associated with this bundle.
857+
858+
`name` must be a {cpp} identifier that is valid for referencing the device
859+
global at the bottom of the source code.
860+
861+
a|
862+
[frame=all,grid=none]
863+
!====
864+
a!
865+
[source,c++]
866+
----
867+
void *ext_oneapi_get_device_global_address(const std::string &name,
868+
const device &dev)
869+
----
870+
!====
871+
872+
_Constraints:_ This function is not available when `State` is
873+
`bundle_state::ext_oneapi_source`.
874+
875+
_Effects:_ If device memory for `name` has not been allocated at the time of
876+
this call, it will be allocated and zero-initialized synchronously.
877+
878+
_Returns:_ Returns a USM pointer to the device global `name`'s storage on device
879+
`dev`.
880+
881+
_Throws:_
882+
883+
* An `exception` with the `errc::invalid` error code if
884+
`ext_oneapi_has_device_global(name, dev)` returns `false`.
885+
* An `exception` with the `errc::memory_allocation` error code if the allocation
886+
or initialization of the device global's storage fails.
887+
888+
a|
889+
[frame=all,grid=none]
890+
!====
891+
a!
892+
[source,c++]
893+
----
894+
size_t ext_oneapi_get_device_global_size(const std::string &name,
895+
const device &dev)
896+
----
897+
!====
898+
899+
_Constraints:_ This function is not available when `State` is
900+
`bundle_state::ext_oneapi_source`.
901+
902+
_Returns:_ Returns the size in bytes of device global `name`.
903+
904+
_Throws:_
905+
906+
* An `exception` with the `errc::invalid` error code if
907+
`ext_oneapi_has_device_global(name, dev)` returns `false`.
908+
|====
909+
803910

804911
== Examples
805912

@@ -927,6 +1034,72 @@ int main() {
9271034
}
9281035
----
9291036

1037+
=== Using device globals
1038+
1039+
This examples demonstrates how a device global defined in runtime-compiled code
1040+
can be accessed from the host and the device.
1041+
1042+
[source,c++]
1043+
----
1044+
#include <sycl/sycl.hpp>
1045+
namespace syclexp = sycl::ext::oneapi::experimental;
1046+
1047+
static constexpr size_t NUM = 1024;
1048+
static constexpr size_t WGSIZE = 16;
1049+
1050+
int main() {
1051+
sycl::queue q;
1052+
1053+
// The source code for a kernel, defined as a SYCL "free function kernel".
1054+
std::string source = R"""(
1055+
#include <sycl/sycl.hpp>
1056+
namespace syclext = sycl::ext::oneapi;
1057+
namespace syclexp = sycl::ext::oneapi::experimental;
1058+
1059+
syclexp::device_global<float> scale;
1060+
1061+
extern "C"
1062+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
1063+
void scaled_iota(float start, float *ptr) {
1064+
size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id();
1065+
ptr[id] = start + scale * static_cast<float>(id);
1066+
}
1067+
)""";
1068+
1069+
// Create a kernel bundle in "source" state.
1070+
sycl::kernel_bundle<sycl::bundle_state::ext_oneapi_source> kb_src =
1071+
syclexp::create_kernel_bundle_from_source(
1072+
q.get_context(),
1073+
syclexp::source_language::sycl,
1074+
source);
1075+
1076+
// Compile the kernel.
1077+
sycl::kernel_bundle<sycl::bundle_state::executable> kb_exe = syclexp::build(kb_src);
1078+
1079+
// Initialize the device global.
1080+
float scale = 0.1f;
1081+
void *scale_addr =
1082+
kb_exe.ext_oneapi_get_device_global_address("scale", q.get_device());
1083+
size_t scale_size =
1084+
kb_exe.ext_oneapi_get_device_global_size("scale", q.get_device());
1085+
q.memcpy(scale_addr, &scale, scale_size).wait();
1086+
1087+
// Get the kernel via its compiler-generated name, and launch it as before.
1088+
sycl::kernel scaled_iota = kb_exe.ext_oneapi_get_kernel("scaled_iota");
1089+
1090+
float *ptr = sycl::malloc_shared<float>(NUM, q);
1091+
q.submit([&](sycl::handler &cgh) {
1092+
// Set the values of the kernel arguments.
1093+
cgh.set_args(3.14f, ptr);
1094+
1095+
// Launch the kernel according to its type, in this case an nd-range kernel.
1096+
sycl::nd_range ndr{{NUM}, {WGSIZE}};
1097+
cgh.parallel_for(ndr, scaled_iota);
1098+
}).wait();
1099+
1100+
sycl::free(ptr, q);
1101+
}
1102+
----
9301103

9311104
== Issues
9321105

0 commit comments

Comments
 (0)