@@ -48,6 +48,8 @@ This extension also depends on the following other SYCL extensions:
48
48
sycl_ext_oneapi_properties]
49
49
* link:../proposed/sycl_ext_oneapi_free_function_kernels.asciidoc[
50
50
sycl_ext_oneapi_free_function_kernels]
51
+ * link:../experimental/sycl_ext_oneapi_device_global.asciidoc[
52
+ sycl_ext_oneapi_device_global]
51
53
52
54
53
55
== Status
@@ -572,6 +574,8 @@ class kernel_bundle {
572
574
bool ext_oneapi_has_kernel(const std::string &name);
573
575
kernel ext_oneapi_get_kernel(const std::string &name);
574
576
std::string ext_oneapi_get_raw_kernel_name(const std::string &name);
577
+
578
+ // Continued below in "New kernel bundle member functions for device globals"
575
579
};
576
580
577
581
} // namespace sycl
@@ -800,6 +804,109 @@ sycl::kernel k_float = kb.ext_oneapi_get_kernel("bartmpl<float>");
800
804
sycl::kernel k_int = kb.ext_oneapi_get_kernel("bartmpl<int>");
801
805
----
802
806
807
+ === New kernel bundle member functions for device globals
808
+
809
+ This extensions adds the following new `kernel_bundle` member functions to let
810
+ the host application interact with device globals defined in runtime-compiled
811
+ code. Device globals are only supported for the `source_language::sycl`
812
+ language.
813
+
814
+ Device globals declared with the `device_image_scope` property can be used in
815
+ the runtime-compiled device code, but cannot be accessed from the host. We plan
816
+ to lift this limitation in a future version of the extension.
817
+
818
+ [source,c++]
819
+ ----
820
+ namespace sycl {
821
+
822
+ template <bundle_state State>
823
+ class kernel_bundle {
824
+ // Continued from "New kernel bundle member functions"
825
+
826
+ bool ext_oneapi_has_device_global(const std::string &name, const device &dev);
827
+ void *ext_oneapi_get_device_global_address(const std::string &name,
828
+ const device &dev);
829
+ size_t ext_oneapi_get_device_global_size(const std::string &name,
830
+ const device &dev);
831
+ };
832
+
833
+ } // namespace sycl
834
+ ----
835
+
836
+ |====
837
+ a|
838
+ [frame=all,grid=none]
839
+ !====
840
+ a!
841
+ [source,c++]
842
+ ----
843
+ bool ext_oneapi_has_device_global(const std::string &name, const device &dev)
844
+ ----
845
+ !====
846
+
847
+ _Constraints:_ This function is not available when `State` is
848
+ `bundle_state::ext_oneapi_source`.
849
+
850
+ _Returns:_ The value `true` only if
851
+
852
+ * the kernel bundle was created from a bundle of state
853
+ `bundle_state::ext_oneapi_source` in the language `source_language::sycl`, and
854
+ * it defines a device global whose name is `name` and which was declared without
855
+ the `device_image_scope` property, and
856
+ * `dev` is contained by the context associated with this bundle.
857
+
858
+ `name` must be a {cpp} identifier that is valid for referencing the device
859
+ global at the bottom of the source code.
860
+
861
+ a|
862
+ [frame=all,grid=none]
863
+ !====
864
+ a!
865
+ [source,c++]
866
+ ----
867
+ void *ext_oneapi_get_device_global_address(const std::string &name,
868
+ const device &dev)
869
+ ----
870
+ !====
871
+
872
+ _Constraints:_ This function is not available when `State` is
873
+ `bundle_state::ext_oneapi_source`.
874
+
875
+ _Effects:_ If device memory for `name` has not been allocated at the time of
876
+ this call, it will be allocated and zero-initialized synchronously.
877
+
878
+ _Returns:_ Returns a USM pointer to the device global `name`'s storage on device
879
+ `dev`.
880
+
881
+ _Throws:_
882
+
883
+ * An `exception` with the `errc::invalid` error code if
884
+ `ext_oneapi_has_device_global(name, dev)` returns `false`.
885
+ * An `exception` with the `errc::memory_allocation` error code if the allocation
886
+ or initialization of the device global's storage fails.
887
+
888
+ a|
889
+ [frame=all,grid=none]
890
+ !====
891
+ a!
892
+ [source,c++]
893
+ ----
894
+ size_t ext_oneapi_get_device_global_size(const std::string &name,
895
+ const device &dev)
896
+ ----
897
+ !====
898
+
899
+ _Constraints:_ This function is not available when `State` is
900
+ `bundle_state::ext_oneapi_source`.
901
+
902
+ _Returns:_ Returns the size in bytes of device global `name`.
903
+
904
+ _Throws:_
905
+
906
+ * An `exception` with the `errc::invalid` error code if
907
+ `ext_oneapi_has_device_global(name, dev)` returns `false`.
908
+ |====
909
+
803
910
804
911
== Examples
805
912
@@ -927,6 +1034,72 @@ int main() {
927
1034
}
928
1035
----
929
1036
1037
+ === Using device globals
1038
+
1039
+ This examples demonstrates how a device global defined in runtime-compiled code
1040
+ can be accessed from the host and the device.
1041
+
1042
+ [source,c++]
1043
+ ----
1044
+ #include <sycl/sycl.hpp>
1045
+ namespace syclexp = sycl::ext::oneapi::experimental;
1046
+
1047
+ static constexpr size_t NUM = 1024;
1048
+ static constexpr size_t WGSIZE = 16;
1049
+
1050
+ int main() {
1051
+ sycl::queue q;
1052
+
1053
+ // The source code for a kernel, defined as a SYCL "free function kernel".
1054
+ std::string source = R"""(
1055
+ #include <sycl/sycl.hpp>
1056
+ namespace syclext = sycl::ext::oneapi;
1057
+ namespace syclexp = sycl::ext::oneapi::experimental;
1058
+
1059
+ syclexp::device_global<float> scale;
1060
+
1061
+ extern "C"
1062
+ SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
1063
+ void scaled_iota(float start, float *ptr) {
1064
+ size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id();
1065
+ ptr[id] = start + scale * static_cast<float>(id);
1066
+ }
1067
+ )""";
1068
+
1069
+ // Create a kernel bundle in "source" state.
1070
+ sycl::kernel_bundle<sycl::bundle_state::ext_oneapi_source> kb_src =
1071
+ syclexp::create_kernel_bundle_from_source(
1072
+ q.get_context(),
1073
+ syclexp::source_language::sycl,
1074
+ source);
1075
+
1076
+ // Compile the kernel.
1077
+ sycl::kernel_bundle<sycl::bundle_state::executable> kb_exe = syclexp::build(kb_src);
1078
+
1079
+ // Initialize the device global.
1080
+ float scale = 0.1f;
1081
+ void *scale_addr =
1082
+ kb_exe.ext_oneapi_get_device_global_address("scale", q.get_device());
1083
+ size_t scale_size =
1084
+ kb_exe.ext_oneapi_get_device_global_size("scale", q.get_device());
1085
+ q.memcpy(scale_addr, &scale, scale_size).wait();
1086
+
1087
+ // Get the kernel via its compiler-generated name, and launch it as before.
1088
+ sycl::kernel scaled_iota = kb_exe.ext_oneapi_get_kernel("scaled_iota");
1089
+
1090
+ float *ptr = sycl::malloc_shared<float>(NUM, q);
1091
+ q.submit([&](sycl::handler &cgh) {
1092
+ // Set the values of the kernel arguments.
1093
+ cgh.set_args(3.14f, ptr);
1094
+
1095
+ // Launch the kernel according to its type, in this case an nd-range kernel.
1096
+ sycl::nd_range ndr{{NUM}, {WGSIZE}};
1097
+ cgh.parallel_for(ndr, scaled_iota);
1098
+ }).wait();
1099
+
1100
+ sycl::free(ptr, q);
1101
+ }
1102
+ ----
930
1103
931
1104
== Issues
932
1105
0 commit comments