@@ -380,8 +380,9 @@ class kernel_bundle_impl {
380
380
// program manager integration, only for sycl_jit language
381
381
kernel_bundle_impl (context Ctx, std::vector<device> Devs,
382
382
const std::vector<kernel_id> &KernelIDs,
383
- std::vector<std::string> KNames, std::string Pfx,
384
- syclex::source_language Lang)
383
+ const std::vector<std::string> &KNames,
384
+ const std::vector<std::string> &DGNames,
385
+ const std::string &Pfx, syclex::source_language Lang)
385
386
: kernel_bundle_impl(Ctx, Devs, KernelIDs, bundle_state::executable) {
386
387
assert (Lang == syclex::source_language::sycl_jit);
387
388
// Mark this bundle explicitly as "interop" to ensure that its kernels are
@@ -391,6 +392,7 @@ class kernel_bundle_impl {
391
392
// from the (unprefixed) kernel name.
392
393
MIsInterop = true ;
393
394
KernelNames = KNames;
395
+ DeviceGlobalNames = DGNames;
394
396
Prefix = Pfx;
395
397
Language = Lang;
396
398
}
@@ -509,51 +511,60 @@ class kernel_bundle_impl {
509
511
// `jit_compiler::compileSYCL(..)` uses `CompilationID + '$'` as prefix
510
512
// for offload entry names.
511
513
std::string Prefix = CompilationID + ' $' ;
514
+ auto PrefixLen = Prefix.length ();
512
515
for (const auto &KernelID : PM.getAllSYCLKernelIDs ()) {
513
516
std::string_view KernelName{KernelID.get_name ()};
514
517
if (KernelName.find (Prefix) == 0 ) {
515
518
KernelIDs.push_back (KernelID);
516
- KernelName.remove_prefix (Prefix. length () );
519
+ KernelName.remove_prefix (PrefixLen );
517
520
KernelNames.emplace_back (KernelName);
518
521
}
519
522
}
520
523
521
- // Create the executable bundle.
522
- auto ExecBundle = std::make_shared<kernel_bundle_impl>(
523
- MContext, MDevices, KernelIDs, KernelNames, Prefix, Language);
524
-
525
524
// Determine IDs of all device globals referenced by this bundle's
526
525
// kernels. These IDs are also prefixed.
527
- std::set<std::string> UniqueDeviceGlobalIDs;
528
- std::vector<std::string> DeviceGlobalIDs;
526
+ std::set<std::string> DeviceGlobalIDSet;
527
+ std::vector<std::string> DeviceGlobalIDVec;
528
+ std::vector<std::string> DeviceGlobalNames;
529
529
for (const auto &RawImg : PM.getRawDeviceImages (KernelIDs)) {
530
530
for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals ()) {
531
- auto [It, Ins] = UniqueDeviceGlobalIDs.insert (DeviceGlobalProp->Name );
531
+ std::string_view DeviceGlobalName{DeviceGlobalProp->Name };
532
+ assert (DeviceGlobalName.find (Prefix) == 0 );
533
+ auto [It, Ins] = DeviceGlobalIDSet.emplace (DeviceGlobalName);
532
534
if (Ins) {
533
- DeviceGlobalIDs.push_back (*It);
535
+ DeviceGlobalIDVec.emplace_back (DeviceGlobalName);
536
+ DeviceGlobalName.remove_prefix (PrefixLen);
537
+ DeviceGlobalNames.emplace_back (DeviceGlobalName);
534
538
}
535
539
}
536
540
}
537
541
542
+ // Create the executable bundle.
543
+ auto ExecBundle = std::make_shared<kernel_bundle_impl>(
544
+ MContext, MDevices, KernelIDs, KernelNames, DeviceGlobalNames, Prefix,
545
+ Language);
546
+
547
+ // Device globals are usually statically allocated and registered in the
548
+ // integration footer, which we don't have in the RTC context. Instead, we
549
+ // dynamically allocate storage tied to the executable kernel bundle.
538
550
for (auto *DeviceGlobalEntry :
539
- PM.getDeviceGlobalEntries (DeviceGlobalIDs)) {
540
- // Device globals without `device_image_scope` are usually statically
541
- // allocated and registered in the integration footer, which we don't
542
- // have in the RTC context. Instead, we dynamically allocate storage
543
- // tied to the executable kernel bundle.
551
+ PM.getDeviceGlobalEntries (DeviceGlobalIDVec)) {
552
+
553
+ size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize ; // init value
544
554
if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated ) {
545
- auto Alloc = std::make_unique<std::byte[]>(
546
- DeviceGlobalEntry->MDeviceGlobalTSize );
547
- PM.addOrInitDeviceGlobalEntry (Alloc.get (),
548
- DeviceGlobalEntry->MUniqueId .c_str ());
549
- ExecBundle->DeviceGlobals .push_back (std::move (Alloc));
555
+ // USM pointer. TODO: it's actually a decorated multi_ptr.
556
+ AllocSize += sizeof (void *);
550
557
}
558
+ auto Alloc = std::make_unique<std::byte[]>(AllocSize);
559
+ std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId };
560
+ PM.addOrInitDeviceGlobalEntry (Alloc.get (), DeviceGlobalName.data ());
561
+ ExecBundle->DeviceGlobalAllocations .push_back (std::move (Alloc));
551
562
552
563
// Drop the RTC prefix from the entry's symbol name. Note that the PM
553
564
// still manages this device global under its prefixed name.
554
- assert (DeviceGlobalEntry-> MUniqueId .find (Prefix) == 0 );
555
- DeviceGlobalEntry-> MUniqueId =
556
- DeviceGlobalEntry->MUniqueId . substr (Prefix. length ()) ;
565
+ assert (DeviceGlobalName .find (Prefix) == 0 );
566
+ DeviceGlobalName. remove_prefix (PrefixLen);
567
+ DeviceGlobalEntry->MUniqueId = DeviceGlobalName ;
557
568
}
558
569
559
570
return ExecBundle;
@@ -735,6 +746,18 @@ class kernel_bundle_impl {
735
746
return detail::createSyclObjFromImpl<kernel>(KernelImpl);
736
747
}
737
748
749
+ std::string mangle_device_global_name (const std::string &Name) {
750
+ // TODO: Support device globals declared in namespaces.
751
+ return " _Z" + std::to_string (Name.length ()) + Name;
752
+ }
753
+
754
+ bool ext_oneapi_has_device_global (const std::string &Name,
755
+ [[maybe_unused]] const device &Dev) {
756
+ std::string MangledName = mangle_device_global_name (Name);
757
+ return std::find (DeviceGlobalNames.begin (), DeviceGlobalNames.end (),
758
+ MangledName) != DeviceGlobalNames.end ();
759
+ }
760
+
738
761
bool empty () const noexcept { return MDeviceImages.empty (); }
739
762
740
763
backend get_backend () const noexcept {
@@ -993,11 +1016,13 @@ class kernel_bundle_impl {
993
1016
// Language is for both state::source and state::executable.
994
1017
syclex::source_language Language = syclex::source_language::opencl;
995
1018
const std::variant<std::string, std::vector<std::byte>> Source;
996
- // only kernel_bundles created from source have KernelNames member .
1019
+ // only kernel_bundles created from source have the following members .
997
1020
std::vector<std::string> KernelNames;
1021
+ std::vector<std::string> DeviceGlobalNames;
998
1022
std::string Prefix;
999
1023
include_pairs_t IncludePairs;
1000
- std::vector<std::unique_ptr<std::byte[]>> DeviceGlobals;
1024
+
1025
+ std::vector<std::unique_ptr<std::byte[]>> DeviceGlobalAllocations;
1001
1026
};
1002
1027
1003
1028
} // namespace detail
0 commit comments