Skip to content

Commit e292aa5

Browse files
authored
[SYCL] Reduce kernel submission overhead (#4524)
This patch slightly reduces the number of instructions for each submission: handler.hpp executes isKernelLambdaCallableWithKernelHandler at compile time. queue_impl.hpp stores the result of has_property<property::queue::in_order>() in a member variable and stores getType() in a local variable. scheduler.cpp stores the result of getType() in a local variable and uses it in all the following if-conditions. Signed-off-by: Alexander Flegontov [email protected]
1 parent 560a214 commit e292aa5

File tree

3 files changed

+20
-18
lines changed

3 files changed

+20
-18
lines changed

sycl/include/CL/sycl/handler.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -560,9 +560,10 @@ class __SYCL_EXPORT handler {
560560
template <typename KernelName, typename KernelType, int Dims,
561561
typename LambdaArgType>
562562
void StoreLambda(KernelType KernelFunc) {
563-
if (detail::isKernelLambdaCallableWithKernelHandler<KernelType,
564-
LambdaArgType>() &&
565-
MIsHost) {
563+
constexpr bool IsCallableWithKernelHandler =
564+
detail::isKernelLambdaCallableWithKernelHandler<KernelType,
565+
LambdaArgType>();
566+
if (IsCallableWithKernelHandler && MIsHost) {
566567
throw cl::sycl::feature_not_supported(
567568
"kernel_handler is not yet supported by host device.",
568569
PI_INVALID_OPERATION);
@@ -590,8 +591,7 @@ class __SYCL_EXPORT handler {
590591

591592
// If the kernel lambda is callable with a kernel_handler argument, manifest
592593
// the associated kernel handler.
593-
if (detail::isKernelLambdaCallableWithKernelHandler<KernelType,
594-
LambdaArgType>()) {
594+
if constexpr (IsCallableWithKernelHandler) {
595595
getOrInsertHandlerKernelBundle(/*Insert=*/true);
596596
}
597597
}

sycl/source/detail/queue_impl.hpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ class queue_impl {
9191
const async_handler &AsyncHandler, const property_list &PropList)
9292
: MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler),
9393
MPropList(PropList), MHostQueue(MDevice->is_host()),
94-
MAssertHappenedBuffer(range<1>{1}) {
94+
MAssertHappenedBuffer(range<1>{1}),
95+
MIsInorder(has_property<property::queue::in_order>()) {
9596
if (!Context->hasDevice(Device))
9697
throw cl::sycl::invalid_parameter_error(
9798
"Queue cannot be constructed with the given context and device "
@@ -114,8 +115,9 @@ class queue_impl {
114115
/// \param AsyncHandler is a SYCL asynchronous exception handler.
115116
queue_impl(RT::PiQueue PiQueue, const ContextImplPtr &Context,
116117
const async_handler &AsyncHandler)
117-
: MContext(Context), MAsyncHandler(AsyncHandler), MHostQueue(false),
118-
MAssertHappenedBuffer(range<1>{1}) {
118+
: MContext(Context), MAsyncHandler(AsyncHandler), MPropList(),
119+
MHostQueue(false), MAssertHappenedBuffer(range<1>{1}),
120+
MIsInorder(has_property<property::queue::in_order>()) {
119121

120122
MQueues.push_back(pi::cast<RT::PiQueue>(PiQueue));
121123

@@ -434,15 +436,15 @@ class queue_impl {
434436
// Scheduler will later omit events, that are not required to execute tasks.
435437
// Host and interop tasks, however, are not submitted to low-level runtimes
436438
// and require separate dependency management.
437-
if (has_property<property::queue::in_order>() &&
438-
(Handler.getType() == CG::CGTYPE::CodeplayHostTask ||
439-
Handler.getType() == CG::CGTYPE::CodeplayInteropTask))
439+
const CG::CGTYPE Type = Handler.getType();
440+
if (MIsInorder && (Type == CG::CGTYPE::CodeplayHostTask ||
441+
Type == CG::CGTYPE::CodeplayInteropTask))
440442
Handler.depends_on(MLastEvent);
441443

442444
event Event;
443445

444446
if (PostProcess) {
445-
bool IsKernel = Handler.getType() == CG::Kernel;
447+
bool IsKernel = Type == CG::Kernel;
446448
bool KernelUsesAssert = false;
447449
if (IsKernel)
448450
KernelUsesAssert =
@@ -456,7 +458,7 @@ class queue_impl {
456458
} else
457459
Event = Handler.finalize();
458460

459-
if (has_property<property::queue::in_order>())
461+
if (MIsInorder)
460462
MLastEvent = Event;
461463

462464
addEvent(Event);
@@ -520,6 +522,7 @@ class queue_impl {
520522
buffer<AssertHappened, 1> MAssertHappenedBuffer;
521523

522524
event MLastEvent;
525+
const bool MIsInorder;
523526
};
524527

525528
} // namespace detail

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,11 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record,
7171
EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
7272
QueueImplPtr Queue) {
7373
EventImplPtr NewEvent = nullptr;
74-
const bool IsKernel = CommandGroup->getType() == CG::Kernel;
74+
const CG::CGTYPE Type = CommandGroup->getType();
7575
std::vector<Command *> AuxiliaryCmds;
76-
const bool IsHostKernel = CommandGroup->getType() == CG::RunOnHostIntel;
7776
std::vector<StreamImplPtr> Streams;
7877

79-
if (IsKernel) {
78+
if (Type == CG::Kernel) {
8079
Streams = ((CGExecKernel *)CommandGroup.get())->getStreams();
8180
// Stream's flush buffer memory is mainly initialized in stream's __init
8281
// method. However, this method is not available on host device.
@@ -146,7 +145,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
146145
acquireWriteLock(Lock);
147146

148147
Command *NewCmd = nullptr;
149-
switch (CommandGroup->getType()) {
148+
switch (Type) {
150149
case CG::UpdateHost:
151150
NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup),
152151
DefaultHostQueue, AuxiliaryCmds);
@@ -172,7 +171,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
172171

173172
auto CleanUp = [&]() {
174173
if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) {
175-
if (IsHostKernel)
174+
if (Type == CG::RunOnHostIntel)
176175
static_cast<ExecCGCommand *>(NewCmd)->releaseCG();
177176

178177
NewEvent->setCommand(nullptr);

0 commit comments

Comments
 (0)