Skip to content

Commit d15b841

Browse files
[SYCL] Add DPC++ RT support for non-native SYCL 2020 spec constants (#3589)
This patch adds support of non-native SYCL 2020 specialization constants to DPC++ runtime. Non-native specialization constants emulate the usage of native specialization constants for AOT compilation and CUDA
1 parent 72e1611 commit d15b841

File tree

13 files changed

+183
-24
lines changed

13 files changed

+183
-24
lines changed

sycl/source/detail/device_image_impl.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,19 @@ class device_image_impl {
169169
return MSpecConstsBlob;
170170
}
171171

172+
RT::PiMem &get_spec_const_buffer_ref() noexcept {
173+
std::lock_guard<std::mutex> Lock{MSpecConstAccessMtx};
174+
if (nullptr == MSpecConstsBuffer) {
175+
const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin();
176+
Plugin.call<PiApiKind::piMemBufferCreate>(
177+
detail::getSyclObjImpl(MContext)->getHandleRef(),
178+
PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_USE,
179+
MSpecConstsBlob.size(), MSpecConstsBlob.data(), &MSpecConstsBuffer,
180+
nullptr);
181+
}
182+
return MSpecConstsBuffer;
183+
}
184+
172185
const std::map<std::string, std::vector<SpecConstDescT>> &
173186
get_spec_const_data_ref() const noexcept {
174187
return MSpecConstSymMap;
@@ -262,6 +275,10 @@ class device_image_impl {
262275
// Binary blob which can have values of all specialization constants in the
263276
// image
264277
std::vector<unsigned char> MSpecConstsBlob;
278+
// Buffer containing binary blob which can have values of all specialization
279+
// constants in the image, it is using for storing non-native specialization
280+
// constants
281+
RT::PiMem MSpecConstsBuffer = nullptr;
265282
// Contains map of spec const names to their descriptions + offsets in
266283
// the MSpecConstsBlob
267284
std::map<std::string, std::vector<SpecConstDescT>> MSpecConstSymMap;

sycl/source/detail/kernel_bundle_impl.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,11 @@ class kernel_bundle_impl {
442442
return SetInDevImg || MSpecConstValues.count(std::string{SpecName}) != 0;
443443
}
444444

445-
const device_image_plain *begin() const { return &MDeviceImages.front(); }
445+
const device_image_plain *begin() const {
446+
assert(!MDeviceImages.empty() && "MDeviceImages can't be empty");
447+
// UB in case MDeviceImages is empty
448+
return &MDeviceImages.front();
449+
}
446450

447451
const device_image_plain *end() const { return &MDeviceImages.back() + 1; }
448452

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1309,7 +1309,8 @@ void ProgramManager::bringSYCLDeviceImagesToState(
13091309
break;
13101310
}
13111311
case bundle_state::executable:
1312-
// Device image is already in the desired state.
1312+
DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(),
1313+
/*PropList=*/{});
13131314
break;
13141315
}
13151316
break;

sycl/source/detail/scheduler/commands.cpp

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,8 +1636,9 @@ static void ReverseRangeDimensionsForKernel(NDRDescT &NDR) {
16361636
}
16371637

16381638
pi_result ExecCGCommand::SetKernelParamsAndLaunch(
1639-
CGExecKernel *ExecKernel, RT::PiKernel Kernel, NDRDescT &NDRDesc,
1640-
std::vector<RT::PiEvent> &RawEvents, RT::PiEvent &Event,
1639+
CGExecKernel *ExecKernel,
1640+
std::shared_ptr<device_image_impl> DeviceImageImpl, RT::PiKernel Kernel,
1641+
NDRDescT &NDRDesc, std::vector<RT::PiEvent> &RawEvents, RT::PiEvent &Event,
16411642
ProgramManager::KernelArgMask EliminatedArgMask) {
16421643
vector_class<ArgDesc> &Args = ExecKernel->MArgs;
16431644
// TODO this is not necessary as long as we can guarantee that the arguments
@@ -1692,9 +1693,21 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch(
16921693
break;
16931694
}
16941695
case kernel_param_kind_t::kind_specialization_constants_buffer: {
1695-
throw cl::sycl::feature_not_supported(
1696-
"SYCL2020 specialization constants are not yet fully supported",
1697-
PI_INVALID_OPERATION);
1696+
if (MQueue->is_host()) {
1697+
throw cl::sycl::feature_not_supported(
1698+
"SYCL2020 specialization constants are not yet supported on host "
1699+
"device",
1700+
PI_INVALID_OPERATION);
1701+
}
1702+
if (DeviceImageImpl != nullptr) {
1703+
RT::PiMem SpecConstsBuffer =
1704+
DeviceImageImpl->get_spec_const_buffer_ref();
1705+
Plugin.call<PiApiKind::piextKernelSetArgMemObj>(Kernel, NextTrueIndex,
1706+
&SpecConstsBuffer);
1707+
} else {
1708+
Plugin.call<PiApiKind::piextKernelSetArgMemObj>(Kernel, NextTrueIndex,
1709+
nullptr);
1710+
}
16981711
break;
16991712
}
17001713
}
@@ -1916,6 +1929,8 @@ cl_int ExecCGCommand::enqueueImp() {
19161929
bool KnownProgram = true;
19171930

19181931
std::shared_ptr<kernel_impl> SyclKernelImpl;
1932+
std::shared_ptr<device_image_impl> DeviceImageImpl;
1933+
19191934
// Use kernel_bundle is available
19201935
if (KernelBundleImplPtr) {
19211936

@@ -1929,9 +1944,7 @@ cl_int ExecCGCommand::enqueueImp() {
19291944
SyclKernelImpl = detail::getSyclObjImpl(SyclKernel);
19301945

19311946
Kernel = SyclKernelImpl->getHandleRef();
1932-
1933-
std::shared_ptr<device_image_impl> DeviceImageImpl =
1934-
SyclKernelImpl->getDeviceImage();
1947+
DeviceImageImpl = SyclKernelImpl->getDeviceImage();
19351948

19361949
Program = DeviceImageImpl->get_program_ref();
19371950

@@ -1979,11 +1992,13 @@ cl_int ExecCGCommand::enqueueImp() {
19791992
if (KernelMutex != nullptr) {
19801993
// For cacheable kernels, we use per-kernel mutex
19811994
std::lock_guard<std::mutex> Lock(*KernelMutex);
1982-
Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, RawEvents,
1983-
Event, EliminatedArgMask);
1995+
Error =
1996+
SetKernelParamsAndLaunch(ExecKernel, DeviceImageImpl, Kernel, NDRDesc,
1997+
RawEvents, Event, EliminatedArgMask);
19841998
} else {
1985-
Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, RawEvents,
1986-
Event, EliminatedArgMask);
1999+
Error =
2000+
SetKernelParamsAndLaunch(ExecKernel, DeviceImageImpl, Kernel, NDRDesc,
2001+
RawEvents, Event, EliminatedArgMask);
19872002
}
19882003

19892004
if (PI_SUCCESS != Error) {

sycl/source/detail/scheduler/commands.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -519,9 +519,10 @@ class ExecCGCommand : public Command {
519519
AllocaCommandBase *getAllocaForReq(Requirement *Req);
520520

521521
pi_result SetKernelParamsAndLaunch(
522-
CGExecKernel *ExecKernel, RT::PiKernel Kernel, NDRDescT &NDRDesc,
523-
std::vector<RT::PiEvent> &RawEvents, RT::PiEvent &Event,
524-
ProgramManager::KernelArgMask EliminatedArgMask);
522+
CGExecKernel *ExecKernel,
523+
std::shared_ptr<device_image_impl> DeviceImageImpl, RT::PiKernel Kernel,
524+
NDRDescT &NDRDesc, std::vector<RT::PiEvent> &RawEvents,
525+
RT::PiEvent &Event, ProgramManager::KernelArgMask EliminatedArgMask);
525526

526527
std::unique_ptr<detail::CG> MCommandGroup;
527528

sycl/source/handler.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ handler::getOrInsertHandlerKernelBundle(bool Insert) const {
5757
if (!KernelBundleImpPtr && Insert) {
5858
KernelBundleImpPtr = detail::getSyclObjImpl(
5959
get_kernel_bundle<bundle_state::input>(MQueue->get_context()));
60+
if (KernelBundleImpPtr->empty()) {
61+
KernelBundleImpPtr = detail::getSyclObjImpl(
62+
get_kernel_bundle<bundle_state::executable>(MQueue->get_context()));
63+
}
6064

6165
detail::ExtendedMemberT EMember = {
6266
detail::ExtendedMembersType::HANDLER_KERNEL_BUNDLE, KernelBundleImpPtr};
@@ -340,9 +344,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
340344
break;
341345
}
342346
case kernel_param_kind_t::kind_specialization_constants_buffer: {
343-
throw cl::sycl::feature_not_supported(
344-
"SYCL2020 specialization constants are not yet fully supported",
345-
PI_INVALID_OPERATION);
347+
MArgs.emplace_back(
348+
kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size,
349+
Index + IndexShift);
346350
break;
347351
}
348352
}

sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@
55
// and parallel_for_work_group to verify that this code compiles and runs
66
// correctly with user's lambda with and without sycl::kernel_handler argument
77

8-
// TODO: enable cuda support when non-native spec constants started to be
9-
// supported
10-
// UNSUPPORTED: cuda
11-
128
#include <CL/sycl.hpp>
139

1410
int main() {
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#include <sycl/sycl.hpp>
2+
3+
#include <cmath>
4+
5+
class Kernel1Name;
6+
class Kernel2Name;
7+
8+
struct TestStruct {
9+
int a;
10+
int b;
11+
};
12+
13+
const static sycl::specialization_id<int> SpecConst1{42};
14+
const static sycl::specialization_id<int> SpecConst2{42};
15+
const static sycl::specialization_id<TestStruct> SpecConst3{TestStruct{42, 42}};
16+
const static sycl::specialization_id<short> SpecConst4{42};
17+
18+
int main() {
19+
sycl::queue Q;
20+
21+
// No support for host device so far
22+
if (Q.is_host())
23+
return 0;
24+
25+
{
26+
sycl::buffer<int, 1> Buf{sycl::range{1}};
27+
Q.submit([&](sycl::handler &CGH) {
28+
CGH.set_specialization_constant<SpecConst2>(1);
29+
auto Acc = Buf.get_access<sycl::access::mode::read_write>(CGH);
30+
CGH.single_task<class Kernel1Name>([=](sycl::kernel_handler KH) {
31+
Acc[0] = KH.get_specialization_constant<SpecConst2>();
32+
});
33+
});
34+
auto Acc = Buf.get_access<sycl::access::mode::read>();
35+
assert(Acc[0] == 1);
36+
}
37+
38+
{
39+
sycl::buffer<TestStruct, 1> Buf{sycl::range{1}};
40+
Q.submit([&](sycl::handler &CGH) {
41+
auto Acc = Buf.get_access<sycl::access::mode::read_write>(CGH);
42+
CGH.set_specialization_constant<SpecConst3>(TestStruct{1, 2});
43+
const auto SC = CGH.get_specialization_constant<SpecConst4>();
44+
assert(SC == 42);
45+
CGH.single_task<class Kernel2Name>([=](sycl::kernel_handler KH) {
46+
Acc[0] = KH.get_specialization_constant<SpecConst3>();
47+
});
48+
});
49+
auto Acc = Buf.get_access<sycl::access::mode::read>();
50+
assert(Acc[0].a == 1 && Acc[0].b == 2);
51+
}
52+
53+
return 0;
54+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// REQUIRES: aoc, accelerator
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out
4+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
5+
6+
// This test checks correctness of SYCL2020 non-native specialization constants
7+
// on accelerator device
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
4+
// This test checks correctness of compiling and running of application with
5+
// kernel lambdas containing kernel_handler arguments and w/o usage of
6+
// specialization constants in AOT mode
7+
8+
#include <CL/sycl.hpp>
9+
10+
int main() {
11+
sycl::queue q;
12+
13+
q.submit([&](sycl::handler &cgh) {
14+
cgh.single_task<class KernelSingleTaskWithKernelHandler>(
15+
[=](sycl::kernel_handler kh) {});
16+
});
17+
18+
q.submit([&](sycl::handler &cgh) {
19+
cgh.parallel_for<class KernelParallelForNDItemWithKernelHandler>(
20+
sycl::nd_range<3>(sycl::range<3>(4, 4, 4), sycl::range<3>(2, 2, 2)),
21+
[=](sycl::nd_item<3> item, sycl::kernel_handler kh) {});
22+
});
23+
24+
// parallel_for_work_group with kernel_handler arg
25+
q.submit([&](sycl::handler &cgh) {
26+
cgh.parallel_for_work_group<
27+
class KernelParallelForWorkGroupWithoutKernelHandler>(
28+
sycl::range<3>(2, 2, 2), sycl::range<3>(2, 2, 2),
29+
[=](sycl::group<3> myGroup, sycl::kernel_handler kh) {
30+
myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {});
31+
myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {});
32+
});
33+
});
34+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// REQUIRES: opencl-aot, cpu
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out
4+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
5+
6+
// This test checks correctness of SYCL2020 non-native specialization constants
7+
// on CPU device
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// REQUIRES: cuda
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=nvptx64-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out
4+
// RUN: env SYCL_DEVICE_FILTER=cuda %t.out
5+
6+
// TODO: enable this test then compile-time error in sycl-post-link is fixed
7+
// UNSUPPORTED: cuda
8+
9+
// This test checks correctness of SYCL2020 non-native specialization constants
10+
// on CUDA device
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// REQUIRES: ocloc, gpu
2+
// UNSUPPORTED: cuda
3+
// CUDA is not compatible with SPIR.
4+
5+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend=spir64_gen-unknown-unknown-sycldevice "-device *" %S/Inputs/common.cpp -o %t.out
6+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
7+
8+
// This test checks correctness of SYCL2020 non-native specialization constants
9+
// on GPU device

0 commit comments

Comments
 (0)