Skip to content

[GpuOclRuntime] Retain input and release created cl_events #367

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 13 additions & 28 deletions include/gc/ExecutionEngine/GPURuntime/GpuOclRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,33 +149,27 @@ static constexpr auto ZERO_PTR = const_cast<int64_t *>(&ZERO);
struct OclContext {
const OclRuntime &runtime;
const cl_command_queue queue;
// Preserve the execution order. This is required in case of out-of-order
// execution (CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE). When the execution
// is completed, the 'lastEvent' field contains the event of the last enqueued
// command. If this field is false, 'waitList' is ignored.
const bool preserveOrder;
// Create 'cl_event' object, for each enqueued command, that can be used to
// query or wait for the command to complete. This is required in case of
// out-of-order execution (CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE), but can
// also be used to get the last event. When the execution is completed, the
// 'lastEvent' field contains the event of the last enqueued command. If this
// field is false, 'waitList' is ignored.
const bool createEvents;
cl_uint waitListLen;
cl_event *waitList;
cl_event lastEvent;

explicit OclContext(const OclRuntime &runtime, cl_command_queue queue,
cl_uint waitListLen = 0, cl_event *waitList = nullptr)
: OclContext(runtime, queue, OclRuntime::isOutOfOrder(queue), waitListLen,
waitList) {}
explicit OclContext(const OclRuntime &runtime, cl_command_queue queue)
: OclContext(runtime, queue, OclRuntime::isOutOfOrder(queue)) {}

explicit OclContext(const OclRuntime &runtime, cl_command_queue queue,
bool preserveOrder, cl_uint waitListLen,
cl_event *waitList)
: runtime(runtime), queue(queue), preserveOrder(preserveOrder),
waitListLen(preserveOrder ? waitListLen : 0),
waitList(preserveOrder ? waitList : nullptr), lastEvent(nullptr),
clPtrs(nullptr) {
assert(!OclRuntime::isOutOfOrder(queue) || preserveOrder);
assert(preserveOrder || (waitListLen == 0 && waitList == nullptr));
}
bool createEvents, cl_uint waitListLen = 0,
cl_event *waitList = nullptr);

OclContext(const OclContext &) = delete;
OclContext &operator=(const OclContext &) = delete;
~OclContext();

[[nodiscard]] llvm::Expected<bool> finish();

Expand All @@ -186,16 +180,7 @@ struct OclContext {
template <unsigned N> friend struct StaticExecutor;
std::unordered_set<void *> *clPtrs;

void setLastEvent(cl_event event) {
lastEvent = event;
if (event) {
waitListLen = 1;
waitList = &lastEvent;
} else {
waitListLen = 0;
waitList = nullptr;
}
}
void setLastEvent(cl_event event);
};

struct OclModule {
Expand Down
69 changes: 61 additions & 8 deletions lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ struct OclRuntime::Exports {
}
va_end(args);

if (ctx->preserveOrder) {
if (ctx->createEvents) {
cl_event event = nullptr;
err = clEnqueueNDRangeKernel(ctx->queue, cloned.kernel, 3, nullptr,
kernel->globalSize, kernel->localSize,
Expand Down Expand Up @@ -541,7 +541,7 @@ llvm::Expected<bool> OclRuntime::usmFree(const void *ptr) const {
llvm::Expected<bool> OclRuntime::usmCpy(OclContext &ctx, const void *src,
void *dst, size_t size) const {
cl_int err;
if (ctx.preserveOrder) {
if (ctx.createEvents) {
cl_event event;
err = ext.clEnqueueMemcpyINTEL(ctx.queue, false, dst, src, size,
ctx.waitListLen, ctx.waitList, &event);
Expand Down Expand Up @@ -572,16 +572,69 @@ void OclRuntime::debug(const char *file, int line, const char *msg) {
}
#endif

OclContext::OclContext(const OclRuntime &runtime, cl_command_queue queue,
bool createEvents, cl_uint waitListLen,
cl_event *waitList)
: runtime(runtime), queue(queue), createEvents(createEvents),
waitListLen(createEvents ? waitListLen : 0),
waitList(createEvents ? waitList : nullptr), lastEvent(nullptr),
clPtrs(nullptr) {
assert(!OclRuntime::isOutOfOrder(queue) || createEvents);
assert(createEvents || (waitListLen == 0 && waitList == nullptr));
for (cl_uint i = 0; i < waitListLen; i++) {
gcLogD("Retaining OpenCL event: ", waitList[i]);
CL_CHECKR(clRetainEvent(waitList[i]),
"Failed to retain OpenCL event: ", waitList[i]);
}
}

OclContext::~OclContext() {
for (cl_uint i = 0; i < waitListLen; i++) {
gcLogD("Releasing OpenCL event: ", waitList[i]);
CL_CHECKR(clReleaseEvent(waitList[i]),
"Failed to release OpenCL event: ", waitList[i]);
}
}

llvm::Expected<bool> OclContext::finish() {
gcLogD("Waiting for the enqueued OpenCL commands to finish: ", queue);
CL_CHECK(clFinish(queue),
"Failed to finish the OpenCL command queue: ", queue);
if (preserveOrder) {
if (createEvents) {
if (waitListLen) {
gcLogD("Waiting for ", waitListLen, " OpenCL events to finish.");
CL_CHECK(clWaitForEvents(waitListLen, waitList),
"Failed to wait for OpenCL events.");

for (cl_uint i = 0; i < waitListLen; i++) {
gcLogD("Releasing OpenCL event: ", waitList[i]);
CL_CHECK(clReleaseEvent(waitList[i]),
"Failed to release OpenCL event: ", waitList[i]);
}
waitListLen = 0;
waitList = nullptr;
}
} else {
gcLogD("Waiting for the enqueued OpenCL commands to finish: ", queue);
CL_CHECK(clFinish(queue),
"Failed to finish the OpenCL command queue: ", queue);
}
return true;
}

void OclContext::setLastEvent(cl_event event) {
for (cl_uint i = 0; i < waitListLen; i++) {
gcLogD("Releasing OpenCL event: ", waitList[i]);
CL_CHECKR(clReleaseEvent(waitList[i]),
"Failed to release OpenCL event: ", waitList[i]);
}

gcLogD("Setting the last OpenCL event: ", event);
lastEvent = event;
if (event) {
waitListLen = 1;
waitList = &lastEvent;
} else {
waitListLen = 0;
waitList = nullptr;
lastEvent = nullptr;
}
return true;
}

OclModule::~OclModule() {
Expand Down