Skip to content

Commit 6409766

Browse files
committed
Untangle destruction of device global map entries
Signed-off-by: Julian Oppermann <[email protected]>
1 parent 2ea6266 commit 6409766

File tree

3 files changed

+52
-19
lines changed

3 files changed

+52
-19
lines changed

sycl/source/detail/context_impl.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,11 @@ void context_impl::addAssociatedDeviceGlobal(const void *DeviceGlobalPtr) {
329329
MAssociatedDeviceGlobals.insert(DeviceGlobalPtr);
330330
}
331331

332+
void context_impl::removeAssociatedDeviceGlobal(const void *DeviceGlobalPtr) {
333+
std::lock_guard<std::mutex> Lock{MAssociatedDeviceGlobalsMutex};
334+
MAssociatedDeviceGlobals.erase(DeviceGlobalPtr);
335+
}
336+
332337
void context_impl::addDeviceGlobalInitializer(
333338
ur_program_handle_t Program, const std::vector<device> &Devs,
334339
const RTDeviceBinaryImage *BinImage) {
@@ -407,6 +412,9 @@ std::vector<ur_event_handle_t> context_impl::initializeDeviceGlobals(
407412
// Device global map entry pointers will not die before the end of the
408413
// program and the pointers will stay the same, so we do not need
409414
// m_DeviceGlobalsMutex here.
415+
// The lifetimes of device global map entries representing globals in
416+
// runtime-compiled code will be tied to the kernel bundle, so the
417+
// assumption holds in that setting as well.
410418
for (DeviceGlobalMapEntry *DeviceGlobalEntry : DeviceGlobalEntries) {
411419
// Get or allocate the USM memory associated with the device global.
412420
DeviceGlobalUSMMem &DeviceGlobalUSM =

sycl/source/detail/context_impl.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,9 @@ class context_impl {
203203
/// Adds an associated device global to the tracked associates.
204204
void addAssociatedDeviceGlobal(const void *DeviceGlobalPtr);
205205

206+
/// Removes an associated device global from the tracked associates.
207+
void removeAssociatedDeviceGlobal(const void *DeviceGlobalPtr);
208+
206209
/// Adds a device global initializer.
207210
void addDeviceGlobalInitializer(ur_program_handle_t Program,
208211
const std::vector<device> &Devs,

sycl/source/detail/kernel_bundle_impl.hpp

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ class kernel_bundle_impl {
385385
std::vector<std::string> &&KernelNames,
386386
std::unordered_map<std::string, std::string> &&MangledKernelNames,
387387
std::vector<std::string> &&DeviceGlobalNames,
388+
std::vector<std::unique_ptr<std::byte[]>> &&DeviceGlobalAllocations,
388389
sycl_device_binaries Binaries, std::string &&Prefix,
389390
syclex::source_language Lang)
390391
: kernel_bundle_impl(std::move(Ctx), std::move(Devs), KernelIDs,
@@ -399,6 +400,7 @@ class kernel_bundle_impl {
399400
MKernelNames = std::move(KernelNames);
400401
MMangledKernelNames = std::move(MangledKernelNames);
401402
MDeviceGlobalNames = std::move(DeviceGlobalNames);
403+
MDeviceGlobalAllocations = std::move(DeviceGlobalAllocations);
402404
MDeviceBinaries = Binaries;
403405
MPrefix = std::move(Prefix);
404406
MLanguage = Lang;
@@ -535,6 +537,12 @@ class kernel_bundle_impl {
535537
std::vector<kernel_id> KernelIDs;
536538
std::vector<std::string> KernelNames;
537539
std::unordered_map<std::string, std::string> MangledKernelNames;
540+
541+
std::unordered_set<std::string> DeviceGlobalIDSet;
542+
std::vector<std::string> DeviceGlobalIDVec;
543+
std::vector<std::string> DeviceGlobalNames;
544+
std::vector<std::unique_ptr<std::byte[]>> DeviceGlobalAllocations;
545+
538546
for (const auto &KernelID : PM.getAllSYCLKernelIDs()) {
539547
std::string_view KernelName{KernelID.get_name()};
540548
if (KernelName.find(Prefix) == 0) {
@@ -552,8 +560,8 @@ class kernel_bundle_impl {
552560
}
553561
}
554562

555-
// Apply frontend information.
556563
for (const auto *RawImg : PM.getRawDeviceImages(KernelIDs)) {
564+
// Mangled names.
557565
for (const sycl_device_binary_property &RKProp :
558566
RawImg->getRegisteredKernels()) {
559567

@@ -563,14 +571,8 @@ class kernel_bundle_impl {
563571
reinterpret_cast<const char *>(BA.begin()), MangledNameLen};
564572
MangledKernelNames.emplace(RKProp->Name, MangledName);
565573
}
566-
}
567574

568-
// Determine IDs of all device globals referenced by this bundle's
569-
// kernels. These IDs are also prefixed.
570-
std::unordered_set<std::string> DeviceGlobalIDSet;
571-
std::vector<std::string> DeviceGlobalIDVec;
572-
std::vector<std::string> DeviceGlobalNames;
573-
for (const auto &RawImg : PM.getRawDeviceImages(KernelIDs)) {
575+
// Device globals.
574576
for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals()) {
575577
std::string_view DeviceGlobalName{DeviceGlobalProp->Name};
576578
assert(DeviceGlobalName.find(Prefix) == 0);
@@ -585,12 +587,6 @@ class kernel_bundle_impl {
585587
}
586588
}
587589

588-
// Create the executable bundle.
589-
auto ExecBundle = std::make_shared<kernel_bundle_impl>(
590-
MContext, MDevices, KernelIDs, std::move(KernelNames),
591-
std::move(MangledKernelNames), std::move(DeviceGlobalNames), Binaries,
592-
std::move(Prefix), MLanguage);
593-
594590
// Device globals are usually statically allocated and registered in the
595591
// integration footer, which we don't have in the RTC context. Instead, we
596592
// dynamically allocate storage tied to the executable kernel bundle.
@@ -599,13 +595,13 @@ class kernel_bundle_impl {
599595

600596
size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize; // init value
601597
if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated) {
602-
// USM pointer. TODO: it's actually a decorated multi_ptr.
598+
// Consider storage for device USM pointer.
603599
AllocSize += sizeof(void *);
604600
}
605601
auto Alloc = std::make_unique<std::byte[]>(AllocSize);
606602
std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId};
607603
PM.addOrInitDeviceGlobalEntry(Alloc.get(), DeviceGlobalName.data());
608-
ExecBundle->MDeviceGlobalAllocations.push_back(std::move(Alloc));
604+
DeviceGlobalAllocations.push_back(std::move(Alloc));
609605

610606
// Drop the RTC prefix from the entry's symbol name. Note that the PM
611607
// still manages this device global under its prefixed name.
@@ -614,7 +610,11 @@ class kernel_bundle_impl {
614610
DeviceGlobalEntry->MUniqueId = DeviceGlobalName;
615611
}
616612

617-
return ExecBundle;
613+
return std::make_shared<kernel_bundle_impl>(
614+
MContext, MDevices, KernelIDs, std::move(KernelNames),
615+
std::move(MangledKernelNames), std::move(DeviceGlobalNames),
616+
std::move(DeviceGlobalAllocations), Binaries, std::move(Prefix),
617+
MLanguage);
618618
}
619619

620620
ur_program_handle_t UrProgram = nullptr;
@@ -781,6 +781,28 @@ class kernel_bundle_impl {
781781
return Entries.front();
782782
}
783783

784+
void unregister_device_globals_from_context() {
785+
if (MDeviceGlobalNames.empty())
786+
return;
787+
788+
// Manually trigger the release of resources for all device global map
789+
// entries associated with this runtime-compiled bundle. Normally, this
790+
// would happen in `~context_impl()`, however in the RTC setting, the
791+
// context outlives the DG map entries owned by the program manager.
792+
793+
std::vector<std::string> DeviceGlobalIDs;
794+
std::transform(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(),
795+
std::back_inserter(DeviceGlobalIDs),
796+
[&](const std::string &DGName) { return MPrefix + DGName; });
797+
auto ContextImpl = getSyclObjImpl(MContext);
798+
for (DeviceGlobalMapEntry *Entry :
799+
ProgramManager::getInstance().getDeviceGlobalEntries(
800+
DeviceGlobalIDs)) {
801+
Entry->removeAssociatedResources(ContextImpl.get());
802+
ContextImpl->removeAssociatedDeviceGlobal(Entry->MDeviceGlobalPtr);
803+
}
804+
}
805+
784806
public:
785807
bool ext_oneapi_has_kernel(const std::string &Name) {
786808
return is_kernel_name(adjust_kernel_name(Name));
@@ -1121,6 +1143,7 @@ class kernel_bundle_impl {
11211143
~kernel_bundle_impl() {
11221144
try {
11231145
if (MDeviceBinaries) {
1146+
unregister_device_globals_from_context();
11241147
ProgramManager::getInstance().removeImages(MDeviceBinaries);
11251148
syclex::detail::SYCL_JIT_destroy(MDeviceBinaries);
11261149
}
@@ -1162,11 +1185,10 @@ class kernel_bundle_impl {
11621185
std::vector<std::string> MKernelNames;
11631186
std::unordered_map<std::string, std::string> MMangledKernelNames;
11641187
std::vector<std::string> MDeviceGlobalNames;
1188+
std::vector<std::unique_ptr<std::byte[]>> MDeviceGlobalAllocations;
11651189
sycl_device_binaries MDeviceBinaries = nullptr;
11661190
std::string MPrefix;
11671191
include_pairs_t MIncludePairs;
1168-
1169-
std::vector<std::unique_ptr<std::byte[]>> MDeviceGlobalAllocations;
11701192
};
11711193

11721194
} // namespace detail

0 commit comments

Comments
 (0)