Skip to content

Commit b9b6503

Browse files
committed
Save the function pointer for clSetKernelArgMemPointerINTEL per kernel
This eliminates the need to query the context for the kernel and look up the function pointer in the extension function pointer cache each time a pointer is set as a kernel argument. Signed-off-by: Ben Ashbaugh <[email protected]>
1 parent 7175536 commit b9b6503

File tree

2 files changed

+31
-25
lines changed

2 files changed

+31
-25
lines changed

unified-runtime/source/adapters/opencl/kernel.cpp

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,13 @@ urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex,
100100
return UR_RESULT_SUCCESS;
101101
}
102102

103-
static cl_int mapURKernelInfoToCL(ur_kernel_info_t URPropName) {
103+
// Querying the number of registers that a kernel uses is supported unofficially
104+
// on some devices.
105+
#ifndef CL_KERNEL_REGISTER_COUNT_INTEL
106+
#define CL_KERNEL_REGISTER_COUNT_INTEL 0x425B
107+
#endif
104108

109+
static cl_int mapURKernelInfoToCL(ur_kernel_info_t URPropName) {
105110
switch (static_cast<uint32_t>(URPropName)) {
106111
case UR_KERNEL_INFO_FUNCTION_NAME:
107112
return CL_KERNEL_FUNCTION_NAME;
@@ -115,9 +120,10 @@ static cl_int mapURKernelInfoToCL(ur_kernel_info_t URPropName) {
115120
return CL_KERNEL_PROGRAM;
116121
case UR_KERNEL_INFO_ATTRIBUTES:
117122
return CL_KERNEL_ATTRIBUTES;
118-
// NUM_REGS doesn't have a CL equivalent
119-
case UR_KERNEL_INFO_NUM_REGS:
120123
case UR_KERNEL_INFO_SPILL_MEM_SIZE:
124+
return CL_KERNEL_SPILL_MEM_SIZE_INTEL;
125+
case UR_KERNEL_INFO_NUM_REGS:
126+
return CL_KERNEL_REGISTER_COUNT_INTEL;
121127
default:
122128
return -1;
123129
}
@@ -132,10 +138,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel,
132138
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
133139

134140
switch (propName) {
135-
// OpenCL doesn't have a way to support this.
136-
case UR_KERNEL_INFO_NUM_REGS: {
137-
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
138-
}
139141
case UR_KERNEL_INFO_PROGRAM: {
140142
return ReturnValue(hKernel->Program);
141143
}
@@ -145,14 +147,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel,
145147
case UR_KERNEL_INFO_REFERENCE_COUNT: {
146148
return ReturnValue(hKernel->getReferenceCount());
147149
}
148-
case UR_KERNEL_INFO_SPILL_MEM_SIZE: {
149-
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
150-
}
151150
default: {
152151
size_t CheckPropSize = 0;
153152
cl_int ClResult =
154153
clGetKernelInfo(hKernel->CLKernel, mapURKernelInfoToCL(propName),
155154
propSize, pPropValue, &CheckPropSize);
155+
if (ClResult == CL_INVALID_VALUE) {
156+
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
157+
}
156158
if (pPropValue && CheckPropSize != propSize) {
157159
return UR_RESULT_ERROR_INVALID_SIZE;
158160
}
@@ -334,12 +336,18 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
334336
}
335337

336338
UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) {
339+
340+
UR_ASSERT(hKernel->getReferenceCount() > 0u, UR_RESULT_ERROR_INVALID_KERNEL);
341+
337342
hKernel->incrementReferenceCount();
338343
return UR_RESULT_SUCCESS;
339344
}
340345

341346
UR_APIEXPORT ur_result_t UR_APICALL
342347
urKernelRelease(ur_kernel_handle_t hKernel) {
348+
349+
UR_ASSERT(hKernel->getReferenceCount() != 0, UR_RESULT_ERROR_INVALID_KERNEL);
350+
343351
if (hKernel->decrementReferenceCount() == 0) {
344352
delete hKernel;
345353
}
@@ -428,25 +436,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer(
428436
ur_kernel_handle_t hKernel, uint32_t argIndex,
429437
const ur_kernel_arg_pointer_properties_t *, const void *pArgValue) {
430438

431-
cl_context CLContext;
432-
CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->CLKernel, CL_KERNEL_CONTEXT,
433-
sizeof(cl_context), &CLContext,
434-
nullptr));
435-
436-
clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr;
437-
UR_RETURN_ON_FAILURE(
438-
cl_ext::getExtFuncFromContext<clSetKernelArgMemPointerINTEL_fn>(
439-
CLContext,
440-
ur::cl::getAdapter()->fnCache.clSetKernelArgMemPointerINTELCache,
441-
cl_ext::SetKernelArgMemPointerName, &FuncPtr));
442-
443-
if (FuncPtr) {
444-
CL_RETURN_ON_FAILURE(
445-
FuncPtr(hKernel->CLKernel, static_cast<cl_uint>(argIndex), pArgValue));
439+
if (hKernel->clSetKernelArgMemPointerINTEL == nullptr) {
440+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
446441
}
447442

443+
CL_RETURN_ON_FAILURE(hKernel->clSetKernelArgMemPointerINTEL(
444+
hKernel->CLKernel, static_cast<cl_uint>(argIndex), pArgValue));
445+
448446
return UR_RESULT_SUCCESS;
449447
}
448+
450449
UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle(
451450
ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) {
452451

unified-runtime/source/adapters/opencl/kernel.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
//===----------------------------------------------------------------------===//
1010
#pragma once
1111

12+
#include "adapter.hpp"
1213
#include "common.hpp"
1314
#include "context.hpp"
1415
#include "program.hpp"
@@ -22,13 +23,19 @@ struct ur_kernel_handle_t_ {
2223
ur_context_handle_t Context;
2324
std::atomic<uint32_t> RefCount = 0;
2425
bool IsNativeHandleOwned = true;
26+
clSetKernelArgMemPointerINTEL_fn clSetKernelArgMemPointerINTEL = nullptr;
2527

2628
ur_kernel_handle_t_(native_type Kernel, ur_program_handle_t Program,
2729
ur_context_handle_t Context)
2830
: CLKernel(Kernel), Program(Program), Context(Context) {
2931
RefCount = 1;
3032
urProgramRetain(Program);
3133
urContextRetain(Context);
34+
35+
cl_ext::getExtFuncFromContext<clSetKernelArgMemPointerINTEL_fn>(
36+
Context->CLContext,
37+
ur::cl::getAdapter()->fnCache.clSetKernelArgMemPointerINTELCache,
38+
cl_ext::SetKernelArgMemPointerName, &clSetKernelArgMemPointerINTEL);
3239
}
3340

3441
~ur_kernel_handle_t_() {

0 commit comments

Comments
 (0)