Skip to content

Commit 862c187

Browse files
committed
[SYCL][RTC] Initial support for device globals
Signed-off-by: Julian Oppermann <[email protected]>
1 parent 49fd770 commit 862c187

File tree

4 files changed

+141
-7
lines changed

4 files changed

+141
-7
lines changed

sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -503,8 +503,9 @@ jit_compiler::performPostLink(std::unique_ptr<llvm::Module> Module,
503503
/*IROutputOnly=*/false, EmitOnlyKernelsAsEntryPoints);
504504
assert(Splitter->hasMoreSplits());
505505

506-
// TODO: Call `verifyNoCrossModuleDeviceGlobalUsage` if device globals shall
507-
// be processed.
506+
if (auto Err = Splitter->verifyNoCrossModuleDeviceGlobalUsage()) {
507+
return std::move(Err);
508+
}
508509

509510
// TODO: This allocation assumes that there are no further splits required,
510511
// i.e. there are no mixed SYCL/ESIMD modules.
@@ -547,7 +548,7 @@ jit_compiler::performPostLink(std::unique_ptr<llvm::Module> Module,
547548
GlobalBinImageProps PropReq{
548549
/*EmitKernelParamInfo=*/true, /*EmitProgramMetadata=*/true,
549550
/*EmitExportedSymbols=*/true, /*EmitImportedSymbols=*/true,
550-
/*DeviceGlobals=*/false};
551+
/*DeviceGlobals=*/true};
551552
PropertySetRegistry Properties =
552553
computeModuleProperties(MDesc.getModule(), MDesc.entries(), PropReq);
553554
// TODO: Manually add `compile_target` property as in

sycl/source/detail/jit_compiler.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1150,14 +1150,19 @@ sycl_device_binaries jit_compiler::createDeviceBinaryImage(
11501150
}
11511151

11521152
for (const auto &FPS : DevImgInfo.Properties) {
1153+
bool IsDeviceGlobalsPropSet =
1154+
FPS.Name == __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS;
11531155
PropertySetContainer PropSet{FPS.Name.c_str()};
11541156
for (const auto &FPV : FPS.Values) {
11551157
if (FPV.IsUIntValue) {
11561158
PropSet.addProperty(
11571159
PropertyContainer{FPV.Name.c_str(), FPV.UIntValue});
11581160
} else {
1161+
std::string PrefixedName =
1162+
(IsDeviceGlobalsPropSet ? OffloadEntryPrefix : "") +
1163+
FPV.Name.c_str();
11591164
PropSet.addProperty(PropertyContainer{
1160-
FPV.Name.c_str(), FPV.Bytes.begin(), FPV.Bytes.size(),
1165+
PrefixedName.c_str(), FPV.Bytes.begin(), FPV.Bytes.size(),
11611166
sycl_property_type::SYCL_PROPERTY_TYPE_BYTE_ARRAY});
11621167
}
11631168
}

sycl/source/detail/kernel_bundle_impl.hpp

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,8 +518,45 @@ class kernel_bundle_impl {
518518
}
519519
}
520520

521-
return std::make_shared<kernel_bundle_impl>(
521+
// Create the executable bundle.
522+
auto ExecBundle = std::make_shared<kernel_bundle_impl>(
522523
MContext, MDevices, KernelIDs, KernelNames, Prefix, Language);
524+
525+
// Determine IDs of all device globals referenced by this bundle's
526+
// kernels. These IDs are also prefixed.
527+
std::set<std::string> UniqueDeviceGlobalIDs;
528+
std::vector<std::string> DeviceGlobalIDs;
529+
for (const auto &RawImg : PM.getRawDeviceImages(KernelIDs)) {
530+
for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals()) {
531+
auto [It, Ins] = UniqueDeviceGlobalIDs.insert(DeviceGlobalProp->Name);
532+
if (Ins) {
533+
DeviceGlobalIDs.push_back(*It);
534+
}
535+
}
536+
}
537+
538+
for (auto *DeviceGlobalEntry :
539+
PM.getDeviceGlobalEntries(DeviceGlobalIDs)) {
540+
// Device globals without `device_image_scope` are usually statically
541+
// allocated and registered in the integration footer, which we don't
542+
// have in the RTC context. Instead, we dynamically allocate storage
543+
// tied to the executable kernel bundle.
544+
if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated) {
545+
auto Alloc = std::make_unique<std::byte[]>(
546+
DeviceGlobalEntry->MDeviceGlobalTSize);
547+
PM.addOrInitDeviceGlobalEntry(Alloc.get(),
548+
DeviceGlobalEntry->MUniqueId.c_str());
549+
ExecBundle->DeviceGlobals.push_back(std::move(Alloc));
550+
}
551+
552+
// Drop the RTC prefix from the entry's symbol name. Note that the PM
553+
// still manages this device global under its prefixed name.
554+
assert(DeviceGlobalEntry->MUniqueId.find(Prefix) == 0);
555+
DeviceGlobalEntry->MUniqueId =
556+
DeviceGlobalEntry->MUniqueId.substr(Prefix.length());
557+
}
558+
559+
return ExecBundle;
523560
}
524561

525562
ur_program_handle_t UrProgram = nullptr;
@@ -960,6 +997,7 @@ class kernel_bundle_impl {
960997
std::vector<std::string> KernelNames;
961998
std::string Prefix;
962999
include_pairs_t IncludePairs;
1000+
std::vector<std::unique_ptr<std::byte[]>> DeviceGlobals;
9631001
};
9641002

9651003
} // namespace detail

sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp

Lines changed: 92 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,29 @@ void ff_templated(T *ptr, T *unused) {
5858
}
5959
)===";
6060

61+
auto constexpr DGSource = R"===(
62+
#include <sycl/sycl.hpp>
63+
64+
namespace syclex = sycl::ext::oneapi::experimental;
65+
66+
syclex::device_global<int> DG;
67+
68+
extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
69+
(syclex::single_task_kernel)) void ff_dg_setter(int val) {
70+
DG = val;
71+
}
72+
73+
extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
74+
(syclex::single_task_kernel)) void ff_dg_adder(int val) {
75+
DG = DG + val;
76+
}
77+
78+
extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
79+
(syclex::single_task_kernel)) void ff_dg_getter(int *val) {
80+
*val = DG;
81+
}
82+
)===";
83+
6184
auto constexpr ESIMDSource = R"===(
6285
#include <sycl/sycl.hpp>
6386
#include <sycl/ext/intel/esimd.hpp>
@@ -219,6 +242,73 @@ int test_build_and_run() {
219242
return 0;
220243
}
221244

245+
int test_device_global() {
246+
namespace syclex = sycl::ext::oneapi::experimental;
247+
using source_kb = sycl::kernel_bundle<sycl::bundle_state::ext_oneapi_source>;
248+
using exe_kb = sycl::kernel_bundle<sycl::bundle_state::executable>;
249+
250+
sycl::queue q;
251+
sycl::context ctx = q.get_context();
252+
253+
bool ok =
254+
q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl_jit);
255+
if (!ok) {
256+
std::cout << "Apparently this device does not support `sycl_jit` source "
257+
"kernel bundle extension: "
258+
<< q.get_device().get_info<sycl::info::device::name>()
259+
<< std::endl;
260+
return -1;
261+
}
262+
263+
auto modifyDG = [&q](sycl::kernel &k, int val) {
264+
q.submit([&](sycl::handler &CGH) {
265+
CGH.set_arg(0, val);
266+
CGH.single_task(k);
267+
});
268+
q.wait();
269+
};
270+
271+
auto getDG = [&q](sycl::kernel &k) -> int {
272+
int *buf = sycl::malloc_shared<int>(1, q);
273+
q.submit([&](sycl::handler &CGH) {
274+
CGH.set_arg(0, buf);
275+
CGH.single_task(k);
276+
});
277+
q.wait();
278+
int val = *buf;
279+
sycl::free(buf, q);
280+
return val;
281+
};
282+
283+
source_kb kbSrc = syclex::create_kernel_bundle_from_source(
284+
ctx, syclex::source_language::sycl_jit, DGSource);
285+
286+
exe_kb kbExe1 = syclex::build(kbSrc);
287+
288+
auto setK = kbExe1.ext_oneapi_get_kernel("ff_dg_setter");
289+
auto addK = kbExe1.ext_oneapi_get_kernel("ff_dg_adder");
290+
auto getK = kbExe1.ext_oneapi_get_kernel("ff_dg_getter");
291+
292+
assert(getDG(getK) == 0);
293+
modifyDG(setK, 42);
294+
assert(getDG(getK) == 42);
295+
modifyDG(addK, 1);
296+
assert(getDG(getK) == 43);
297+
298+
exe_kb kbExe2 = syclex::build(kbSrc);
299+
300+
auto setK2 = kbExe2.ext_oneapi_get_kernel("ff_dg_setter");
301+
auto getK2 = kbExe2.ext_oneapi_get_kernel("ff_dg_getter");
302+
303+
// `DG` is private per RTC bundle
304+
assert(getDG(getK2) == 0);
305+
modifyDG(setK2, -17);
306+
assert(getDG(getK2) == -17);
307+
assert(getDG(getK) == 43);
308+
309+
return 0;
310+
}
311+
222312
int test_esimd() {
223313
namespace syclex = sycl::ext::oneapi::experimental;
224314
using source_kb = sycl::kernel_bundle<sycl::bundle_state::ext_oneapi_source>;
@@ -390,8 +480,8 @@ int test_warning() {
390480
int main(int argc, char **) {
391481
#ifdef SYCL_EXT_ONEAPI_KERNEL_COMPILER
392482
int optional_tests = (argc > 1) ? test_warning() : 0;
393-
return test_build_and_run() || test_esimd() || test_unsupported_options() ||
394-
test_error() || optional_tests;
483+
return test_build_and_run() || test_device_global() || test_esimd() ||
484+
test_unsupported_options() || test_error() || optional_tests;
395485
#else
396486
static_assert(false, "Kernel Compiler feature test macro undefined");
397487
#endif

0 commit comments

Comments
 (0)