Skip to content

Commit fda9171

Browse files
[SYCL][ABI-break] Remove the workaround for release of auxiliary buffers (#9914)
Remove the thread local variable workaround for deferred release of auxiliary resources by passing the required information from reduction headers.
1 parent b5c04e4 commit fda9171

File tree

14 files changed

+70
-43
lines changed

14 files changed

+70
-43
lines changed

sycl/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ set(SYCL_MAJOR_VERSION 7)
3636
set(SYCL_MINOR_VERSION 0)
3737
set(SYCL_PATCH_VERSION 0)
3838

39-
set(SYCL_DEV_ABI_VERSION 5)
39+
set(SYCL_DEV_ABI_VERSION 6)
4040
if (SYCL_ADD_DEV_VERSION_POSTFIX)
4141
set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}")
4242
endif()

sycl/include/sycl/detail/cg.hpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,12 @@ class CG {
140140
}
141141
std::vector<detail::EventImplPtr> &getEvents() { return MData.MEvents; }
142142

143+
virtual std::vector<std::shared_ptr<const void>>
144+
getAuxiliaryResources() const {
145+
return {};
146+
}
147+
virtual void clearAuxiliaryResources(){};
148+
143149
virtual ~CG() = default;
144150

145151
private:
@@ -198,32 +204,40 @@ class CGExecKernel : public CG {
198204
return MStreams;
199205
}
200206

201-
std::vector<std::shared_ptr<const void>> getAuxiliaryResources() const {
207+
std::vector<std::shared_ptr<const void>>
208+
getAuxiliaryResources() const override {
202209
return MAuxiliaryResources;
203210
}
211+
void clearAuxiliaryResources() override { MAuxiliaryResources.clear(); }
204212

205213
std::shared_ptr<detail::kernel_bundle_impl> getKernelBundle() {
206214
return MKernelBundle;
207215
}
208216

209217
void clearStreams() { MStreams.clear(); }
210218
bool hasStreams() { return !MStreams.empty(); }
211-
212-
void clearAuxiliaryResources() { MAuxiliaryResources.clear(); }
213-
bool hasAuxiliaryResources() { return !MAuxiliaryResources.empty(); }
214219
};
215220

216221
/// "Copy memory" command group class.
217222
class CGCopy : public CG {
218223
void *MSrc;
219224
void *MDst;
225+
std::vector<std::shared_ptr<const void>> MAuxiliaryResources;
220226

221227
public:
222228
CGCopy(CGTYPE CopyType, void *Src, void *Dst, CG::StorageInitHelper CGData,
229+
std::vector<std::shared_ptr<const void>> AuxiliaryResources,
223230
detail::code_location loc = {})
224-
: CG(CopyType, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst) {}
231+
: CG(CopyType, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst),
232+
MAuxiliaryResources{AuxiliaryResources} {}
225233
void *getSrc() { return MSrc; }
226234
void *getDst() { return MDst; }
235+
236+
std::vector<std::shared_ptr<const void>>
237+
getAuxiliaryResources() const override {
238+
return MAuxiliaryResources;
239+
}
240+
void clearAuxiliaryResources() override { MAuxiliaryResources.clear(); }
227241
};
228242

229243
/// "Fill memory" command group class.

sycl/include/sycl/detail/helpers.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ enum class memory_order;
3737

3838
namespace detail {
3939

40+
class buffer_impl;
4041
class context_impl;
4142
// The function returns list of events that can be passed to OpenCL API as
4243
// dependency list and waits for others.
@@ -46,6 +47,9 @@ getOrWaitEvents(std::vector<sycl::event> DepEvents,
4647

4748
__SYCL_EXPORT void waitEvents(std::vector<sycl::event> DepEvents);
4849

50+
__SYCL_EXPORT void
51+
markBufferAsInternal(const std::shared_ptr<buffer_impl> &BufImpl);
52+
4953
template <typename T> T *declptr() { return static_cast<T *>(nullptr); }
5054

5155
// Function to get of store id, item, nd_item, group for the host implementation
@@ -246,7 +250,6 @@ void loop_impl(std::integer_sequence<size_t, Inds...>, F &&f) {
246250
template <size_t count, class F> void loop(F &&f) {
247251
loop_impl(std::make_index_sequence<count>{}, std::forward<F>(f));
248252
}
249-
250253
} // namespace detail
251254

252255
} // __SYCL_INLINE_VER_NAMESPACE(_V1)

sycl/include/sycl/handler.hpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,13 +457,26 @@ class __SYCL_EXPORT handler {
457457
MStreamStorage.push_back(Stream);
458458
}
459459

460-
/// Saves buffers created by handling reduction feature in handler.
460+
/// Saves resources created by handling reduction feature in handler.
461461
/// They are then forwarded to command group and destroyed only after
462462
/// the command group finishes the work on device/host.
463463
///
464464
/// @param ReduObj is a pointer to object that must be stored.
465465
void addReduction(const std::shared_ptr<const void> &ReduObj);
466466

467+
/// Saves buffers created by handling reduction feature in handler and marks
468+
/// them as internal. They are then forwarded to command group and destroyed
469+
/// only after the command group finishes the work on device/host.
470+
///
471+
/// @param ReduBuf is a pointer to buffer that must be stored.
472+
template <typename T, int Dimensions, typename AllocatorT, typename Enable>
473+
void
474+
addReduction(const std::shared_ptr<buffer<T, Dimensions, AllocatorT, Enable>>
475+
&ReduBuf) {
476+
detail::markBufferAsInternal(getSyclObjImpl(*ReduBuf));
477+
addReduction(std::shared_ptr<const void>(ReduBuf));
478+
}
479+
467480
~handler() = default;
468481

469482
// TODO: Private and unusued. Remove when ABI break is allowed.

sycl/include/sycl/reduction.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -960,11 +960,9 @@ class reduction_impl_algo {
960960
for (int i = 0; i < num_elements; ++i) {
961961
(*RWReduVal)[i] = decltype(MIdentityContainer)::getIdentity();
962962
}
963-
CGH.addReduction(RWReduVal);
964963
auto Buf = std::make_shared<buffer<T, 1>>(RWReduVal.get()->data(),
965964
range<1>(num_elements));
966965
Buf->set_final_data();
967-
CGH.addReduction(Buf);
968966
accessor Mem{*Buf, CGH};
969967
Func(Mem);
970968

@@ -975,6 +973,10 @@ class reduction_impl_algo {
975973
// so use the old-style API.
976974
auto Mem =
977975
Buf->template get_access<access::mode::read_write>(CopyHandler);
976+
// Since this CG is dependent on the one associated with CGH,
977+
// registering the auxiliary resources here is enough.
978+
CopyHandler.addReduction(RWReduVal);
979+
CopyHandler.addReduction(Buf);
978980
if constexpr (is_usm) {
979981
// Can't capture whole reduction, copy into distinct variables.
980982
bool IsUpdateOfUserVar = !initializeToIdentity();

sycl/source/detail/helpers.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <detail/scheduler/commands.hpp>
1010
#include <sycl/detail/helpers.hpp>
1111

12+
#include <detail/buffer_impl.hpp>
1213
#include <detail/context_impl.hpp>
1314
#include <detail/event_impl.hpp>
1415
#include <detail/queue_impl.hpp>
@@ -63,6 +64,10 @@ void waitEvents(std::vector<sycl::event> DepEvents) {
6364
}
6465
}
6566

67+
void markBufferAsInternal(const std::shared_ptr<buffer_impl> &BufImpl) {
68+
BufImpl->markAsInternal();
69+
}
70+
6671
} // namespace detail
6772
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
6873
} // namespace sycl

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,14 +93,11 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
9393
const CG::CGTYPE Type = CommandGroup->getType();
9494
std::vector<Command *> AuxiliaryCmds;
9595
std::vector<StreamImplPtr> Streams;
96-
std::vector<std::shared_ptr<const void>> AuxiliaryResources;
9796

9897
if (Type == CG::Kernel) {
9998
auto *CGExecKernelPtr = static_cast<CGExecKernel *>(CommandGroup.get());
10099
Streams = CGExecKernelPtr->getStreams();
101100
CGExecKernelPtr->clearStreams();
102-
AuxiliaryResources = CGExecKernelPtr->getAuxiliaryResources();
103-
CGExecKernelPtr->clearAuxiliaryResources();
104101
// Stream's flush buffer memory is mainly initialized in stream's __init
105102
// method. However, this method is not available on host device.
106103
// Initializing stream's flush buffer on the host side in a separate task.
@@ -110,6 +107,9 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
110107
}
111108
}
112109
}
110+
std::vector<std::shared_ptr<const void>> AuxiliaryResources;
111+
AuxiliaryResources = CommandGroup->getAuxiliaryResources();
112+
CommandGroup->clearAuxiliaryResources();
113113

114114
bool ShouldEnqueue = true;
115115
{
@@ -546,7 +546,6 @@ void Scheduler::registerAuxiliaryResources(
546546

547547
void Scheduler::cleanupAuxiliaryResources(BlockingT Blocking) {
548548
std::unique_lock<std::mutex> Lock{MAuxiliaryResourcesMutex};
549-
ForceDeferredReleaseWrapper ForceDeferredRelease;
550549
for (auto It = MAuxiliaryResources.begin();
551550
It != MAuxiliaryResources.end();) {
552551
const EventImplPtr &Event = It->first;
@@ -560,8 +559,6 @@ void Scheduler::cleanupAuxiliaryResources(BlockingT Blocking) {
560559
}
561560
}
562561

563-
thread_local bool Scheduler::ForceDeferredMemObjRelease = false;
564-
565562
void Scheduler::startFusion(QueueImplPtr Queue) {
566563
WriteLockT Lock = acquireWriteLock();
567564
MGraphBuilder.startFusion(Queue);

sycl/source/detail/scheduler/scheduler.hpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -870,21 +870,6 @@ class Scheduler {
870870

871871
QueueImplPtr DefaultHostQueue;
872872

873-
// This thread local flag is a workaround for a problem with managing
874-
// auxiliary resources. We would like to release internal buffers used for
875-
// reductions in a deferred manner, but marking them individually isn't an
876-
// option since all auxiliary resources (buffers, host memory, USM) are passed
877-
// to the library as type erased shared pointers. This flag makes it so that
878-
// release of every memory object is deferred while it's set, and it should
879-
// only be set during release of auxiliary resources.
880-
// TODO Remove once ABI breaking changes are allowed.
881-
friend class SYCLMemObjT;
882-
static thread_local bool ForceDeferredMemObjRelease;
883-
struct ForceDeferredReleaseWrapper {
884-
ForceDeferredReleaseWrapper() { ForceDeferredMemObjRelease = true; };
885-
~ForceDeferredReleaseWrapper() { ForceDeferredMemObjRelease = false; };
886-
};
887-
888873
friend class Command;
889874
friend class DispatchHostTask;
890875
friend class queue_impl;

sycl/source/detail/sycl_mem_obj_t.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,10 +219,7 @@ void SYCLMemObjT::detachMemoryObject(
219219
// buffer creation and set to meaningfull
220220
// value only if any operation on buffer submitted inside addCG call. addCG is
221221
// called from queue::submit and buffer destruction could not overlap with it.
222-
// ForceDeferredMemObjRelease is a workaround for managing auxiliary resources
223-
// while preserving backward compatibility, see the comment for
224-
// ForceDeferredMemObjRelease in scheduler.
225-
if (MRecord && (!MHostPtrProvided || Scheduler::ForceDeferredMemObjRelease))
222+
if (MRecord && (!MHostPtrProvided || MIsInternal))
226223
Scheduler::getInstance().deferMemObjRelease(Self);
227224
}
228225

sycl/source/detail/sycl_mem_obj_t.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,8 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI {
272272

273273
void detachMemoryObject(const std::shared_ptr<SYCLMemObjT> &Self) const;
274274

275+
void markAsInternal() { MIsInternal = true; }
276+
275277
protected:
276278
// An allocateMem helper that determines which host ptr to use
277279
void determineHostPtr(const ContextImplPtr &Context, bool InitFromUserData,
@@ -312,6 +314,10 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI {
312314
// we have read only HostPtr - MUploadDataFunctor is empty but delayed release
313315
// must be not allowed.
314316
bool MHostPtrProvided;
317+
// Indicates that the memory object was allocated internally. Such memory
318+
// objects can be released in a deferred manner regardless of whether a host
319+
// pointer was provided or not.
320+
bool MIsInternal = false;
315321
};
316322
} // namespace detail
317323
} // __SYCL_INLINE_VER_NAMESPACE(_V1)

sycl/source/handler.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,9 @@ event handler::finalize() {
293293
case detail::CG::CopyAccToPtr:
294294
case detail::CG::CopyPtrToAcc:
295295
case detail::CG::CopyAccToAcc:
296-
CommandGroup.reset(new detail::CGCopy(MCGType, MSrcPtr, MDstPtr,
297-
std::move(CGData), MCodeLoc));
296+
CommandGroup.reset(
297+
new detail::CGCopy(MCGType, MSrcPtr, MDstPtr, std::move(CGData),
298+
std::move(MImpl->MAuxiliaryResources), MCodeLoc));
298299
break;
299300
case detail::CG::Fill:
300301
CommandGroup.reset(new detail::CGFill(std::move(MPattern), MDstPtr,

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3896,6 +3896,7 @@ _ZN4sycl3_V16detail20associateWithHandlerERNS0_7handlerEPNS1_16AccessorBaseHostE
38963896
_ZN4sycl3_V16detail20associateWithHandlerERNS0_7handlerEPNS1_28SampledImageAccessorBaseHostENS0_12image_targetE
38973897
_ZN4sycl3_V16detail20associateWithHandlerERNS0_7handlerEPNS1_30UnsampledImageAccessorBaseHostENS0_12image_targetE
38983898
_ZN4sycl3_V16detail20getDeviceFromHandlerERNS0_7handlerE
3899+
_ZN4sycl3_V16detail20markBufferAsInternalERKSt10shared_ptrINS1_11buffer_implEE
38993900
_ZN4sycl3_V16detail21LocalAccessorBaseHost12getNumOfDimsEv
39003901
_ZN4sycl3_V16detail21LocalAccessorBaseHost14getElementSizeEv
39013902
_ZN4sycl3_V16detail21LocalAccessorBaseHost6getPtrEv

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,6 +1242,8 @@
12421242
?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBUcode_location@detail@12@@Z
12431243
?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z
12441244
?map@MemoryManager@detail@_V1@sycl@@SAPEAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@W4mode@access@34@IV?$range@$02@34@4V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z
1245+
?markAsInternal@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ
1246+
?markBufferAsInternal@detail@_V1@sycl@@YAXAEBV?$shared_ptr@Vbuffer_impl@detail@_V1@sycl@@@std@@@Z
12451247
?mem_advise@handler@_V1@sycl@@QEAAXPEBX_KH@Z
12461248
?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHAEBUcode_location@detail@23@@Z
12471249
?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z

sycl/test/abi/vtable.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,13 @@ void foo(sycl::detail::HostKernelBase &HKB) {
2424
// CHECK-NEXT: 5 | sycl::detail::HostKernelBase::~HostKernelBase() [deleting]
2525

2626
void foo(sycl::detail::CG *CG) { delete CG; }
27-
// CHECK: Vtable for 'sycl::detail::CG' (4 entries).
28-
// CHECK-NEXT: 0 | offset_to_top (0)
29-
// CHECK-NEXT: 1 | sycl::detail::CG RTTI
30-
// CHECK-NEXT: -- (sycl::detail::CG, 0) vtable address --
31-
// CHECK-NEXT: 2 | sycl::detail::CG::~CG() [complete]
32-
// CHECK-NEXT: 3 | sycl::detail::CG::~CG() [deleting]
27+
// CHECK: Vtable for 'sycl::detail::CG' (6 entries).
28+
// CHECK-NEXT: 0 | offset_to_top (0)
29+
// CHECK-NEXT: 1 | sycl::detail::CG RTTI
30+
// CHECK-NEXT: -- (sycl::detail::CG, 0) vtable address --
31+
// CHECK-NEXT: 2 | std::vector<std::shared_ptr<const void>> sycl::detail::CG::getAuxiliaryResources() const
32+
// CHECK-NEXT: 3 | void sycl::detail::CG::clearAuxiliaryResources()
33+
// CHECK-NEXT: 4 | sycl::detail::CG::~CG() [complete]
3334

3435
void foo(sycl::detail::PropertyWithDataBase *Prop) { delete Prop; }
3536
// CHECK: Vtable for 'sycl::detail::PropertyWithDataBase' (4 entries).

0 commit comments

Comments
 (0)