Skip to content

Commit dc9db24

Browse files
AlexeySachkovromanovvlad
authored andcommitted
[SYCL] Add support of function pointers API to SYCL RT (#490)
* [SYCL] Add support of function pointers API to SYCL RT Signed-off-by: Alexey Sachkov <[email protected]>
1 parent 04c37a0 commit dc9db24

File tree

8 files changed

+313
-0
lines changed

8 files changed

+313
-0
lines changed

sycl/include/CL/sycl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <CL/sycl/handler.hpp>
2222
#include <CL/sycl/id.hpp>
2323
#include <CL/sycl/image.hpp>
24+
#include <CL/sycl/intel/function_pointer.hpp>
2425
#include <CL/sycl/intel/sub_group.hpp>
2526
#include <CL/sycl/item.hpp>
2627
#include <CL/sycl/kernel.hpp>

sycl/include/CL/sycl/detail/pi.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ _PI_API(piDevicePartition)
2424
_PI_API(piDeviceRetain)
2525
_PI_API(piDeviceRelease)
2626
_PI_API(piextDeviceSelectBinary)
27+
_PI_API(piextGetDeviceFunctionPointer)
2728
// Context
2829
_PI_API(piContextCreate)
2930
_PI_API(piContextGetInfo)

sycl/include/CL/sycl/detail/pi.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,20 @@ pi_result piextDeviceSelectBinary(
409409
pi_uint32 num_binaries,
410410
pi_device_binary * selected_binary);
411411

412+
/// Retrieves a device function pointer to a user-defined function
413+
/// \arg \c function_name. \arg \c function_pointer_ret is set to 0 if query
414+
/// failed.
415+
///
416+
/// \arg \c program must be built before calling this API. \arg \c device
417+
/// must present in the list of devices returned by \c get_device method for
418+
/// \arg \c program.
419+
///
420+
pi_result piextGetDeviceFunctionPointer(
421+
pi_device device,
422+
pi_program program,
423+
const char * function_name,
424+
pi_uint64 * function_pointer_ret);
425+
412426
//
413427
// Context
414428
//

sycl/include/CL/sycl/detail/program_impl.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,9 @@ class program_impl {
172172
return pi::cast<cl_program>(Program);
173173
}
174174

175+
RT::PiProgram &getHandleRef() { return Program; }
176+
const RT::PiProgram &getHandleRef() const { return Program; }
177+
175178
bool is_host() const { return Context.is_host(); }
176179

177180
template <typename KernelT>
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
//==----------- function_pointer.hpp --- SYCL Function pointers ------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <CL/sycl/device.hpp>
12+
#include <CL/sycl/program.hpp>
13+
#include <CL/sycl/stl.hpp>
14+
15+
#include <type_traits>
16+
17+
namespace cl {
18+
namespace sycl {
19+
namespace intel {
20+
21+
// This is a preview extension implementation, intended to provide early access
22+
// to a feature for review and community feedback.
23+
//
24+
// Because the interfaces defined by this header file are not final and are
25+
// subject to change they are not intended to be used by shipping software
26+
// products. If you are interested in using this feature in your software
27+
// product, please let us know!
28+
29+
using device_func_ptr_holder_t = cl::sycl::cl_ulong;
30+
31+
/// \brief this function performs a cast from device_func_ptr_holder_t type
32+
/// to the provided function pointer type.
33+
template <
34+
class FuncType,
35+
typename FuncPtrType = typename std::add_pointer<FuncType>::type,
36+
typename std::enable_if<std::is_function<FuncType>::value, int>::type = 0>
37+
inline FuncPtrType to_device_func_ptr(device_func_ptr_holder_t FptrHolder) {
38+
return reinterpret_cast<FuncPtrType>(FptrHolder);
39+
}
40+
41+
template <class FuncType>
42+
using enable_if_is_function_pointer_t = typename std::enable_if<
43+
std::is_pointer<FuncType>::value &&
44+
std::is_function<typename std::remove_pointer<FuncType>::type>::value,
45+
int>::type;
46+
47+
/// \brief this function can be used only on host side to obtain device function
48+
/// pointer for the specified function.
49+
///
50+
/// \param F - pointer to function to make it work for SYCL Host device
51+
/// \param FuncName - name of the function. Please note that by default names of
52+
/// functions are mangled since SYCL is a C++. To avoid the need ot specifying
53+
/// mangled name here, use `extern "C"`
54+
/// \param P - sycl::program object which will be used to extract device
55+
/// function pointer
56+
/// \param D - sycl::device object which will be used to extract device
57+
/// function pointer
58+
///
59+
/// \returns device_func_ptr_holder_t object which can be used inside a device
60+
/// code. This object must be converted back to a function pointer using
61+
/// `to_device_func_ptr` prior to actual usage.
62+
///
63+
/// Returned value is valid only within device code which was compiled for the
64+
/// specified program and device. Returned value invalidates whenever program
65+
/// is released or re-built
66+
template <class FuncType, enable_if_is_function_pointer_t<FuncType> = 0>
67+
device_func_ptr_holder_t get_device_func_ptr(FuncType F, const char *FuncName,
68+
program &P, device &D) {
69+
// TODO: drop function name argument and map host function pointer directly to
70+
// a device function pointer
71+
if (D.is_host()) {
72+
return reinterpret_cast<device_func_ptr_holder_t>(F);
73+
}
74+
75+
if (program_state::linked != P.get_state()) {
76+
throw invalid_parameter_error(
77+
"Program must be built before passing to get_device_func_ptr");
78+
}
79+
80+
device_func_ptr_holder_t FPtr = 0;
81+
// FIXME: return value must be checked here, but since we cannot yet check
82+
// if corresponding extension is supported, let's silently ignore it here.
83+
PI_CALL_RESULT(RT::piextGetDeviceFunctionPointer(
84+
detail::pi::cast<pi_device>(detail::getSyclObjImpl(D)->getHandleRef()),
85+
detail::pi::cast<pi_program>(detail::getSyclObjImpl(P)->getHandleRef()),
86+
FuncName, &FPtr));
87+
88+
return FPtr;
89+
}
90+
91+
} // namespace intel
92+
} // namespace sycl
93+
} // namespace cl

sycl/source/detail/pi_opencl.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,39 @@ pi_result OCL(piSamplerCreate)(pi_context context,
241241
return error_code;
242242
}
243243

244+
pi_result OCL(piextGetDeviceFunctionPointer)(pi_device device,
245+
pi_program program,
246+
const char *func_name,
247+
pi_uint64 *function_pointer_ret) {
248+
pi_platform platform;
249+
PI_CALL(piDeviceGetInfo(device, PI_DEVICE_INFO_PLATFORM, sizeof(platform),
250+
&platform, nullptr));
251+
using FuncT =
252+
cl_int(CL_API_CALL *)(cl_device_id, cl_program, const char *, cl_ulong *);
253+
254+
// TODO: add check that device supports corresponding extension
255+
FuncT func_ptr =
256+
reinterpret_cast<FuncT>(clGetExtensionFunctionAddressForPlatform(
257+
cast<cl_platform_id>(platform),
258+
"clGetDeviceFunctionPointerINTEL"));
259+
// TODO: once we have check that device supports corresponding extension,
260+
// we can insert an assertion that func_ptr is not nullptr. For now, let's
261+
// just return an error if failed to query such function
262+
// PI_ASSERT(
263+
// func_ptr != nullptr,
264+
// "Failed to get address of clGetDeviceFunctionPointerINTEL function");
265+
266+
if (!func_ptr) {
267+
if (function_pointer_ret)
268+
*function_pointer_ret = 0;
269+
return PI_INVALID_DEVICE;
270+
}
271+
272+
return PI_CALL_RESULT(func_ptr(cast<cl_device_id>(device),
273+
cast<cl_program>(program), func_name,
274+
function_pointer_ret));
275+
}
276+
244277
// Forward calls to OpenCL RT.
245278
#define _PI_CL(pi_api, ocl_api) \
246279
decltype(::pi_api) * pi_api##OclPtr = \
@@ -256,6 +289,7 @@ _PI_CL(piDevicePartition, clCreateSubDevices)
256289
_PI_CL(piDeviceRetain, clRetainDevice)
257290
_PI_CL(piDeviceRelease, clReleaseDevice)
258291
_PI_CL(piextDeviceSelectBinary, OCL(piextDeviceSelectBinary))
292+
_PI_CL(piextGetDeviceFunctionPointer, OCL(piextGetDeviceFunctionPointer))
259293
// Context
260294
_PI_CL(piContextCreate, clCreateContext)
261295
_PI_CL(piContextGetInfo, clGetContextInfo)
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// RUN: %clangxx -Xclang -fsycl-allow-func-ptr -std=c++14 -fsycl %s -o %t.out -lOpenCL
2+
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// FIXME: This test should use runtime early exit once correct check for
6+
// corresponding extension is implemented
7+
// UNSUPPORTED: windows
8+
9+
#include <CL/sycl.hpp>
10+
11+
#include <algorithm>
12+
#include <iostream>
13+
#include <vector>
14+
15+
[[intel::device_indirectly_callable]]
16+
extern "C" int add(int A, int B) { return A + B; }
17+
18+
int main() {
19+
const int Size = 10;
20+
std::vector<long> A(Size, 1);
21+
std::vector<long> B(Size, 2);
22+
23+
cl::sycl::queue Q;
24+
cl::sycl::device D = Q.get_device();
25+
cl::sycl::context C = Q.get_context();
26+
cl::sycl::program P(C);
27+
28+
P.build_with_kernel_type<class K>();
29+
cl::sycl::kernel KE = P.get_kernel<class K>();
30+
31+
auto FptrStorage = cl::sycl::intel::get_device_func_ptr(&add, "add", P, D);
32+
if (!D.is_host()) {
33+
// FIXME: update this check with query to supported extension
34+
// For now, we don't have runtimes that report required OpenCL extension and
35+
// it is hard to understand should this functionality be supported or not.
36+
// So, let's skip this test if FptrStorage is 0, which means that by some
37+
// reason we failed to obtain device function pointer. Just to avoid false
38+
// alarms
39+
if (0 == FptrStorage) {
40+
std::cout << "Test PASSED. (it was actually skipped)" << std::endl;
41+
return 0;
42+
}
43+
}
44+
45+
cl::sycl::buffer<long> BufA(A.data(), cl::sycl::range<1>(Size));
46+
cl::sycl::buffer<long> BufB(B.data(), cl::sycl::range<1>(Size));
47+
48+
Q.submit([&](cl::sycl::handler &CGH) {
49+
auto AccA =
50+
BufA.template get_access<cl::sycl::access::mode::read_write>(CGH);
51+
auto AccB = BufB.template get_access<cl::sycl::access::mode::read>(CGH);
52+
CGH.parallel_for<class K>(
53+
KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) {
54+
auto Fptr =
55+
cl::sycl::intel::to_device_func_ptr<decltype(add)>(FptrStorage);
56+
AccA[Index] = Fptr(AccA[Index], AccB[Index]);
57+
});
58+
});
59+
60+
auto HostAcc = BufA.get_access<cl::sycl::access::mode::read>();
61+
auto *Data = HostAcc.get_pointer();
62+
63+
if (std::all_of(Data, Data + Size, [](long V) { return V == 3; })) {
64+
std::cout << "Test PASSED." << std::endl;
65+
} else {
66+
std::cout << "Test FAILED." << std::endl;
67+
for (int I = 0; I < Size; ++I) {
68+
std::cout << HostAcc[I] << " ";
69+
}
70+
std::cout << std::endl;
71+
}
72+
73+
return 0;
74+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// RUN: %clangxx -Xclang -fsycl-allow-func-ptr -std=c++14 -fsycl %s -o %t.out -lOpenCL
2+
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// FIXME: This test should use runtime early exit once correct check for
6+
// corresponding extension is implemented
7+
// UNSUPPORTED: windows
8+
9+
#include <CL/sycl.hpp>
10+
11+
#include <iostream>
12+
#include <vector>
13+
14+
[[intel::device_indirectly_callable]] extern "C" int add(int A, int B) {
15+
return A + B;
16+
}
17+
18+
[[intel::device_indirectly_callable]] extern "C" int sub(int A, int B) {
19+
return A - B;
20+
}
21+
22+
int main() {
23+
const int Size = 10;
24+
25+
cl::sycl::queue Q;
26+
cl::sycl::device D = Q.get_device();
27+
cl::sycl::context C = Q.get_context();
28+
cl::sycl::program P(C);
29+
30+
P.build_with_kernel_type<class K>();
31+
cl::sycl::kernel KE = P.get_kernel<class K>();
32+
33+
cl::sycl::buffer<cl::sycl::intel::device_func_ptr_holder_t> DispatchTable(2);
34+
{
35+
auto DTAcc =
36+
DispatchTable.get_access<cl::sycl::access::mode::discard_write>();
37+
DTAcc[0] = cl::sycl::intel::get_device_func_ptr(&add, "add", P, D);
38+
DTAcc[1] = cl::sycl::intel::get_device_func_ptr(&sub, "sub", P, D);
39+
if (!D.is_host()) {
40+
// FIXME: update this check with query to supported extension
41+
// For now, we don't have runtimes that report required OpenCL extension
42+
// and it is hard to understand should this functionality be supported or
43+
// not. So, let's skip this test if DTAcc[i] is 0, which means that by
44+
// some reason we failed to obtain device function pointer. Just to avoid
45+
// false alarms
46+
if (0 == DTAcc[0] || 0 == DTAcc[1]) {
47+
std::cout << "Test PASSED. (it was actually skipped)" << std::endl;
48+
return 0;
49+
}
50+
}
51+
}
52+
53+
for (int Mode = 0; Mode < 2; ++Mode) {
54+
std::vector<int> A(Size, 1);
55+
std::vector<int> B(Size, 2);
56+
57+
cl::sycl::buffer<int> bufA(A.data(), cl::sycl::range<1>(Size));
58+
cl::sycl::buffer<int> bufB(B.data(), cl::sycl::range<1>(Size));
59+
60+
Q.submit([&](cl::sycl::handler &CGH) {
61+
auto AccA =
62+
bufA.template get_access<cl::sycl::access::mode::read_write>(CGH);
63+
auto AccB = bufB.template get_access<cl::sycl::access::mode::read>(CGH);
64+
auto AccDT =
65+
DispatchTable.template get_access<cl::sycl::access ::mode::read>(CGH);
66+
CGH.parallel_for<class K>(
67+
KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) {
68+
auto FP =
69+
cl::sycl::intel::to_device_func_ptr<int(int, int)>(AccDT[Mode]);
70+
71+
AccA[Index] = FP(AccA[Index], AccB[Index]);
72+
});
73+
});
74+
75+
auto HostAcc = bufA.get_access<cl::sycl::access::mode::read>();
76+
77+
int Reference = Mode == 0 ? 3 : -1;
78+
auto *Data = HostAcc.get_pointer();
79+
80+
if (std::all_of(Data, Data + Size,
81+
[=](long V) { return V == Reference; })) {
82+
std::cout << "Test " << Mode << " PASSED." << std::endl;
83+
} else {
84+
std::cout << "Test " << Mode << " FAILED." << std::endl;
85+
for (int I = 0; I < Size; ++I) {
86+
std::cout << HostAcc[I] << " ";
87+
}
88+
std::cout << std::endl;
89+
}
90+
}
91+
92+
return 0;
93+
}

0 commit comments

Comments
 (0)