Skip to content

Commit 253a018

Browse files
authored
Refactor indirect access calls to minimize invocations. (#2185)
We only need to set indirect access flags once after a kernel is created. Previously we were doing it before every invocation, which is redundant and adds overhead. Signed-off-by: James Brodman <[email protected]>
1 parent 91da13e commit 253a018

File tree

4 files changed

+19
-5
lines changed

4 files changed

+19
-5
lines changed

sycl/source/detail/program_impl.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,11 @@ RT::PiKernel program_impl::get_pi_kernel(const string_class &KernelName) const {
416416
Err);
417417
}
418418
Plugin.checkPiResult(Err);
419+
420+
// Some PI Plugins (like OpenCL) require this call to enable USM
421+
// For others, PI will turn this into a NOP.
422+
Plugin.call<PiApiKind::piKernelSetExecInfo>(Kernel, PI_USM_INDIRECT_ACCESS,
423+
sizeof(pi_bool), &PI_TRUE);
419424
}
420425

421426
return Kernel;

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,11 @@ ProgramManager::getOrCreateKernel(OSModuleHandle M, const context &Context,
442442
Plugin.call<PiApiKind::piKernelCreate>(Program, KernelName.c_str(),
443443
&Result);
444444

445+
// Some PI Plugins (like OpenCL) require this call to enable USM
446+
// For others, PI will turn this into a NOP.
447+
Plugin.call<PiApiKind::piKernelSetExecInfo>(Result, PI_USM_INDIRECT_ACCESS,
448+
sizeof(pi_bool), &PI_TRUE);
449+
445450
return Result;
446451
};
447452

sycl/source/detail/scheduler/commands.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,11 +1691,6 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch(
16911691
adjustNDRangePerKernel(NDRDesc, Kernel,
16921692
*(detail::getSyclObjImpl(MQueue->get_device())));
16931693

1694-
// Some PI Plugins (like OpenCL) require this call to enable USM
1695-
// For others, PI will turn this into a NOP.
1696-
Plugin.call<PiApiKind::piKernelSetExecInfo>(Kernel, PI_USM_INDIRECT_ACCESS,
1697-
sizeof(pi_bool), &PI_TRUE);
1698-
16991694
// Remember this information before the range dimensions are reversed
17001695
const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0);
17011696

sycl/unittests/program/KernelRelease.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,13 @@ pi_result redefinedKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name,
6868
return PI_SUCCESS;
6969
}
7070

71+
pi_result redefinedKernelSetExecInfo(pi_kernel kernel,
72+
pi_kernel_exec_info param_name,
73+
size_t param_value_size,
74+
const void *param_value) {
75+
return PI_SUCCESS;
76+
}
77+
7178
TEST(KernelReleaseTest, GetKernelRelease) {
7279
platform Plt{default_selector()};
7380
if (Plt.is_host()) {
@@ -85,6 +92,8 @@ TEST(KernelReleaseTest, GetKernelRelease) {
8592
Mock.redefine<detail::PiApiKind::piKernelRetain>(redefinedKernelRetain);
8693
Mock.redefine<detail::PiApiKind::piKernelRelease>(redefinedKernelRelease);
8794
Mock.redefine<detail::PiApiKind::piKernelGetInfo>(redefinedKernelGetInfo);
95+
Mock.redefine<detail::PiApiKind::piKernelSetExecInfo>(
96+
redefinedKernelSetExecInfo);
8897

8998
context Ctx{Plt};
9099
TestContext.reset(new TestCtx(Ctx));

0 commit comments

Comments
 (0)