@@ -385,6 +385,7 @@ class kernel_bundle_impl {
385
385
std::vector<std::string> &&KernelNames,
386
386
std::unordered_map<std::string, std::string> &&MangledKernelNames,
387
387
std::vector<std::string> &&DeviceGlobalNames,
388
+ std::vector<std::unique_ptr<std::byte[]>> &&DeviceGlobalAllocations,
388
389
sycl_device_binaries Binaries, std::string &&Prefix,
389
390
syclex::source_language Lang)
390
391
: kernel_bundle_impl(std::move(Ctx), std::move(Devs), KernelIDs,
@@ -399,6 +400,7 @@ class kernel_bundle_impl {
399
400
MKernelNames = std::move (KernelNames);
400
401
MMangledKernelNames = std::move (MangledKernelNames);
401
402
MDeviceGlobalNames = std::move (DeviceGlobalNames);
403
+ MDeviceGlobalAllocations = std::move (DeviceGlobalAllocations);
402
404
MDeviceBinaries = Binaries;
403
405
MPrefix = std::move (Prefix);
404
406
MLanguage = Lang;
@@ -535,6 +537,12 @@ class kernel_bundle_impl {
535
537
std::vector<kernel_id> KernelIDs;
536
538
std::vector<std::string> KernelNames;
537
539
std::unordered_map<std::string, std::string> MangledKernelNames;
540
+
541
+ std::unordered_set<std::string> DeviceGlobalIDSet;
542
+ std::vector<std::string> DeviceGlobalIDVec;
543
+ std::vector<std::string> DeviceGlobalNames;
544
+ std::vector<std::unique_ptr<std::byte[]>> DeviceGlobalAllocations;
545
+
538
546
for (const auto &KernelID : PM.getAllSYCLKernelIDs ()) {
539
547
std::string_view KernelName{KernelID.get_name ()};
540
548
if (KernelName.find (Prefix) == 0 ) {
@@ -552,8 +560,8 @@ class kernel_bundle_impl {
552
560
}
553
561
}
554
562
555
- // Apply frontend information.
556
563
for (const auto *RawImg : PM.getRawDeviceImages (KernelIDs)) {
564
+ // Mangled names.
557
565
for (const sycl_device_binary_property &RKProp :
558
566
RawImg->getRegisteredKernels ()) {
559
567
@@ -563,14 +571,8 @@ class kernel_bundle_impl {
563
571
reinterpret_cast <const char *>(BA.begin ()), MangledNameLen};
564
572
MangledKernelNames.emplace (RKProp->Name , MangledName);
565
573
}
566
- }
567
574
568
- // Determine IDs of all device globals referenced by this bundle's
569
- // kernels. These IDs are also prefixed.
570
- std::unordered_set<std::string> DeviceGlobalIDSet;
571
- std::vector<std::string> DeviceGlobalIDVec;
572
- std::vector<std::string> DeviceGlobalNames;
573
- for (const auto &RawImg : PM.getRawDeviceImages (KernelIDs)) {
575
+ // Device globals.
574
576
for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals ()) {
575
577
std::string_view DeviceGlobalName{DeviceGlobalProp->Name };
576
578
assert (DeviceGlobalName.find (Prefix) == 0 );
@@ -585,12 +587,6 @@ class kernel_bundle_impl {
585
587
}
586
588
}
587
589
588
- // Create the executable bundle.
589
- auto ExecBundle = std::make_shared<kernel_bundle_impl>(
590
- MContext, MDevices, KernelIDs, std::move (KernelNames),
591
- std::move (MangledKernelNames), std::move (DeviceGlobalNames), Binaries,
592
- std::move (Prefix), MLanguage);
593
-
594
590
// Device globals are usually statically allocated and registered in the
595
591
// integration footer, which we don't have in the RTC context. Instead, we
596
592
// dynamically allocate storage tied to the executable kernel bundle.
@@ -599,13 +595,13 @@ class kernel_bundle_impl {
599
595
600
596
size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize ; // init value
601
597
if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated ) {
602
- // USM pointer. TODO: it's actually a decorated multi_ptr .
598
+ // Consider storage for device USM pointer .
603
599
AllocSize += sizeof (void *);
604
600
}
605
601
auto Alloc = std::make_unique<std::byte[]>(AllocSize);
606
602
std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId };
607
603
PM.addOrInitDeviceGlobalEntry (Alloc.get (), DeviceGlobalName.data ());
608
- ExecBundle-> MDeviceGlobalAllocations .push_back (std::move (Alloc));
604
+ DeviceGlobalAllocations .push_back (std::move (Alloc));
609
605
610
606
// Drop the RTC prefix from the entry's symbol name. Note that the PM
611
607
// still manages this device global under its prefixed name.
@@ -614,7 +610,11 @@ class kernel_bundle_impl {
614
610
DeviceGlobalEntry->MUniqueId = DeviceGlobalName;
615
611
}
616
612
617
- return ExecBundle;
613
+ return std::make_shared<kernel_bundle_impl>(
614
+ MContext, MDevices, KernelIDs, std::move (KernelNames),
615
+ std::move (MangledKernelNames), std::move (DeviceGlobalNames),
616
+ std::move (DeviceGlobalAllocations), Binaries, std::move (Prefix),
617
+ MLanguage);
618
618
}
619
619
620
620
ur_program_handle_t UrProgram = nullptr ;
@@ -781,6 +781,28 @@ class kernel_bundle_impl {
781
781
return Entries.front ();
782
782
}
783
783
784
+ void unregister_device_globals_from_context () {
785
+ if (MDeviceGlobalNames.empty ())
786
+ return ;
787
+
788
+ // Manually trigger the release of resources for all device global map
789
+ // entries associated with this runtime-compiled bundle. Normally, this
790
+ // would happen in `~context_impl()`, however in the RTC setting, the
791
+ // context outlives the DG map entries owned by the program manager.
792
+
793
+ std::vector<std::string> DeviceGlobalIDs;
794
+ std::transform (MDeviceGlobalNames.begin (), MDeviceGlobalNames.end (),
795
+ std::back_inserter (DeviceGlobalIDs),
796
+ [&](const std::string &DGName) { return MPrefix + DGName; });
797
+ auto ContextImpl = getSyclObjImpl (MContext);
798
+ for (DeviceGlobalMapEntry *Entry :
799
+ ProgramManager::getInstance ().getDeviceGlobalEntries (
800
+ DeviceGlobalIDs)) {
801
+ Entry->removeAssociatedResources (ContextImpl.get ());
802
+ ContextImpl->removeAssociatedDeviceGlobal (Entry->MDeviceGlobalPtr );
803
+ }
804
+ }
805
+
784
806
public:
785
807
bool ext_oneapi_has_kernel (const std::string &Name) {
786
808
return is_kernel_name (adjust_kernel_name (Name));
@@ -1121,6 +1143,7 @@ class kernel_bundle_impl {
1121
1143
~kernel_bundle_impl () {
1122
1144
try {
1123
1145
if (MDeviceBinaries) {
1146
+ unregister_device_globals_from_context ();
1124
1147
ProgramManager::getInstance ().removeImages (MDeviceBinaries);
1125
1148
syclex::detail::SYCL_JIT_destroy (MDeviceBinaries);
1126
1149
}
@@ -1162,11 +1185,10 @@ class kernel_bundle_impl {
1162
1185
std::vector<std::string> MKernelNames;
1163
1186
std::unordered_map<std::string, std::string> MMangledKernelNames;
1164
1187
std::vector<std::string> MDeviceGlobalNames;
1188
+ std::vector<std::unique_ptr<std::byte[]>> MDeviceGlobalAllocations;
1165
1189
sycl_device_binaries MDeviceBinaries = nullptr ;
1166
1190
std::string MPrefix;
1167
1191
include_pairs_t MIncludePairs;
1168
-
1169
- std::vector<std::unique_ptr<std::byte[]>> MDeviceGlobalAllocations;
1170
1192
};
1171
1193
1172
1194
} // namespace detail
0 commit comments