Skip to content

Commit 8a5462d

Browse files
[SYCL] Clean up excessive kernel name string copying (#17340)
On the application side, kernel names can be retrieved as a const char* from the integration header or built-ins. On the library side, they are retrieved from the offload entries. With the recent introduction of the __sycl_unregister_lib implementation, there shouldn't be any need to store copies of those strings anymore.
1 parent f90ca8e commit 8a5462d

27 files changed

+171
-116
lines changed
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//==---------- kernel_name_str_t.hpp ----- Kernel name type aliases --------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <sycl/detail/string.hpp>
10+
#include <sycl/detail/string_view.hpp>
11+
12+
namespace sycl {
13+
inline namespace _V1 {
14+
namespace detail {
15+
16+
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
17+
using KernelNameStrT = std::string_view;
18+
using KernelNameStrRefT = std::string_view;
19+
using ABINeutralKernelNameStrT = detail::string_view;
20+
#else
21+
using KernelNameStrT = std::string;
22+
using KernelNameStrRefT = const std::string &;
23+
using ABINeutralKernelNameStrT = detail::string;
24+
#endif
25+
26+
} // namespace detail
27+
} // namespace _V1
28+
} // namespace sycl

sycl/include/sycl/detail/string.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8+
#pragma once
89

910
#include <cstring>
1011
#include <string>
1112

12-
#pragma once
13-
1413
namespace sycl {
1514
inline namespace _V1 {
1615
namespace detail {
@@ -58,6 +57,8 @@ class string {
5857
}
5958

6059
const char *c_str() const noexcept { return str ? str : ""; }
60+
const char *data() const noexcept { return c_str(); }
61+
bool empty() { return str ? str[0] : false; }
6162

6263
friend bool operator==(const string &lhs, std::string_view rhs) noexcept {
6364
return rhs == lhs.c_str();

sycl/include/sycl/detail/string_view.hpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8+
#pragma once
89

9-
#include <string>
10+
#include <sycl/detail/string.hpp>
1011

11-
#pragma once
12+
#include <string>
1213

1314
namespace sycl {
1415
inline namespace _V1 {
@@ -26,6 +27,7 @@ class string_view {
2627
string_view(const string_view &strn) noexcept = default;
2728
string_view(string_view &&strn) noexcept = default;
2829
string_view(std::string_view strn) noexcept : str(strn.data()) {}
30+
string_view(const sycl::detail::string &strn) noexcept : str(strn.c_str()) {}
2931

3032
string_view &operator=(string_view &&strn) noexcept = default;
3133
string_view &operator=(const string_view &strn) noexcept = default;
@@ -35,7 +37,12 @@ class string_view {
3537
return *this;
3638
}
3739

38-
const char *data() const noexcept { return str; }
40+
string_view &operator=(const sycl::detail::string &strn) noexcept {
41+
str = strn.c_str();
42+
return *this;
43+
}
44+
45+
const char *data() const noexcept { return str ? str : ""; }
3946

4047
friend bool operator==(string_view lhs, std::string_view rhs) noexcept {
4148
return rhs == lhs.data();

sycl/include/sycl/handler.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <sycl/detail/id_queries_fit_in_int.hpp>
1919
#include <sycl/detail/impl_utils.hpp>
2020
#include <sycl/detail/kernel_desc.hpp>
21+
#include <sycl/detail/kernel_name_str_t.hpp>
2122
#include <sycl/detail/reduction_forward.hpp>
2223
#include <sycl/detail/string.hpp>
2324
#include <sycl/detail/string_view.hpp>
@@ -504,7 +505,7 @@ class __SYCL_EXPORT handler {
504505
bool IsKernelCreatedFromSource, bool IsESIMD);
505506

506507
/// \return a string containing name of SYCL kernel.
507-
detail::string getKernelName();
508+
detail::ABINeutralKernelNameStrT getKernelName();
508509

509510
template <typename LambdaNameT> bool lambdaAndKernelHaveEqualName() {
510511
// TODO It is unclear a kernel and a lambda/functor must to be equal or not
@@ -514,7 +515,7 @@ class __SYCL_EXPORT handler {
514515
// values of arguments for the kernel.
515516
assert(MKernel && "MKernel is not initialized");
516517
const std::string LambdaName = detail::getKernelName<LambdaNameT>();
517-
detail::string KernelName = getKernelName();
518+
detail::ABINeutralKernelNameStrT KernelName = getKernelName();
518519
return KernelName == LambdaName;
519520
}
520521

@@ -3429,7 +3430,7 @@ class __SYCL_EXPORT handler {
34293430
std::shared_ptr<detail::queue_impl> MQueue;
34303431
std::vector<detail::LocalAccessorImplPtr> MLocalAccStorage;
34313432
std::vector<std::shared_ptr<detail::stream_impl>> MStreamStorage;
3432-
detail::string MKernelName;
3433+
detail::ABINeutralKernelNameStrT MKernelName;
34333434
/// Storage for a sycl::kernel object.
34343435
std::shared_ptr<detail::kernel_impl> MKernel;
34353436
/// Pointer to the source host memory or accessor(depending on command type).

sycl/source/detail/cg.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ class CGExecKernel : public CG {
256256
std::shared_ptr<detail::kernel_impl> MSyclKernel;
257257
std::shared_ptr<detail::kernel_bundle_impl> MKernelBundle;
258258
std::vector<ArgDesc> MArgs;
259-
std::string MKernelName;
259+
KernelNameStrT MKernelName;
260260
std::vector<std::shared_ptr<detail::stream_impl>> MStreams;
261261
std::vector<std::shared_ptr<const void>> MAuxiliaryResources;
262262
/// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list
@@ -271,7 +271,7 @@ class CGExecKernel : public CG {
271271
std::shared_ptr<detail::kernel_impl> SyclKernel,
272272
std::shared_ptr<detail::kernel_bundle_impl> KernelBundle,
273273
CG::StorageInitHelper CGData, std::vector<ArgDesc> Args,
274-
std::string KernelName,
274+
KernelNameStrT KernelName,
275275
std::vector<std::shared_ptr<detail::stream_impl>> Streams,
276276
std::vector<std::shared_ptr<const void>> AuxiliaryResources,
277277
CGType Type, ur_kernel_cache_config_t KernelCacheConfig,
@@ -293,7 +293,7 @@ class CGExecKernel : public CG {
293293
CGExecKernel(const CGExecKernel &CGExec) = default;
294294

295295
const std::vector<ArgDesc> &getArguments() const { return MArgs; }
296-
const std::string &getKernelName() const { return MKernelName; }
296+
KernelNameStrRefT getKernelName() const { return MKernelName; }
297297
const std::vector<std::shared_ptr<detail::stream_impl>> &getStreams() const {
298298
return MStreams;
299299
}

sycl/source/detail/device_image_impl.hpp

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ class ManagedDeviceBinaries {
142142
sycl_device_binaries MBinaries;
143143
};
144144

145+
using MangledKernelNameMapT = std::map<std::string, std::string, std::less<>>;
146+
using KernelNameSetT = std::set<std::string, std::less<>>;
147+
145148
// Information unique to images compiled at runtime through the
146149
// ext_oneapi_kernel_compiler extension.
147150
struct KernelCompilerBinaryInfo {
@@ -152,13 +155,12 @@ struct KernelCompilerBinaryInfo {
152155
: MLanguage{Lang}, MIncludePairs{std::move(IncludePairsVec)} {}
153156

154157
KernelCompilerBinaryInfo(syclex::source_language Lang,
155-
std::set<std::string> &&KernelNames)
158+
KernelNameSetT &&KernelNames)
156159
: MLanguage{Lang}, MKernelNames{std::move(KernelNames)} {}
157160

158161
KernelCompilerBinaryInfo(
159-
syclex::source_language Lang, std::set<std::string> &&KernelNames,
160-
std::unordered_map<std::string, std::string> &&MangledKernelNames,
161-
std::string &&Prefix,
162+
syclex::source_language Lang, KernelNameSetT &&KernelNames,
163+
MangledKernelNameMapT &&MangledKernelNames, std::string &&Prefix,
162164
std::shared_ptr<ManagedDeviceGlobalsRegistry> &&DeviceGlobalRegistry)
163165
: MLanguage{Lang}, MKernelNames{std::move(KernelNames)},
164166
MMangledKernelNames{std::move(MangledKernelNames)},
@@ -221,8 +223,8 @@ struct KernelCompilerBinaryInfo {
221223
}
222224

223225
syclex::source_language MLanguage;
224-
std::set<std::string> MKernelNames;
225-
std::unordered_map<std::string, std::string> MMangledKernelNames;
226+
KernelNameSetT MKernelNames;
227+
MangledKernelNameMapT MMangledKernelNames;
226228
std::string MPrefix;
227229
include_pairs_t MIncludePairs;
228230
std::vector<std::shared_ptr<ManagedDeviceGlobalsRegistry>>
@@ -278,7 +280,7 @@ class device_image_impl {
278280
device_image_impl(const RTDeviceBinaryImage *BinImage, const context &Context,
279281
const std::vector<device> &Devices, bundle_state State,
280282
ur_program_handle_t Program, syclex::source_language Lang,
281-
std::set<std::string> &&KernelNames)
283+
KernelNameSetT &&KernelNames)
282284
: MBinImage(BinImage), MContext(std::move(Context)),
283285
MDevices(std::move(Devices)), MState(State), MProgram(Program),
284286
MKernelIDs(std::make_shared<std::vector<kernel_id>>()),
@@ -292,9 +294,8 @@ class device_image_impl {
292294
const RTDeviceBinaryImage *BinImage, const context &Context,
293295
const std::vector<device> &Devices, bundle_state State,
294296
std::shared_ptr<std::vector<kernel_id>> &&KernelIDs,
295-
syclex::source_language Lang, std::set<std::string> &&KernelNames,
296-
std::unordered_map<std::string, std::string> &&MangledKernelNames,
297-
std::string &&Prefix,
297+
syclex::source_language Lang, KernelNameSetT &&KernelNames,
298+
MangledKernelNameMapT &&MangledKernelNames, std::string &&Prefix,
298299
std::shared_ptr<ManagedDeviceGlobalsRegistry> &&DeviceGlobalRegistry)
299300
: MBinImage(BinImage), MContext(std::move(Context)),
300301
MDevices(std::move(Devices)), MState(State), MProgram(nullptr),
@@ -337,8 +338,7 @@ class device_image_impl {
337338

338339
device_image_impl(const context &Context, const std::vector<device> &Devices,
339340
bundle_state State, ur_program_handle_t Program,
340-
syclex::source_language Lang,
341-
std::set<std::string> &&KernelNames)
341+
syclex::source_language Lang, KernelNameSetT &&KernelNames)
342342
: MBinImage(static_cast<const RTDeviceBinaryImage *>(nullptr)),
343343
MContext(std::move(Context)), MDevices(std::move(Devices)),
344344
MState(State), MProgram(Program),
@@ -594,17 +594,17 @@ class device_image_impl {
594594
}
595595
}
596596

597-
std::string adjustKernelName(const std::string &Name) const {
597+
std::string adjustKernelName(std::string_view Name) const {
598598
if (!MRTCBinInfo.has_value())
599-
return Name;
599+
return Name.data();
600600

601601
if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) {
602602
auto It = MRTCBinInfo->MMangledKernelNames.find(Name);
603603
if (It != MRTCBinInfo->MMangledKernelNames.end())
604604
return It->second;
605605
}
606606

607-
return Name;
607+
return Name.data();
608608
}
609609

610610
bool hasKernelName(const std::string &Name) const {
@@ -614,7 +614,7 @@ class device_image_impl {
614614
}
615615

616616
std::shared_ptr<kernel_impl> tryGetSourceBasedKernel(
617-
const std::string &Name, const context &Context,
617+
std::string_view Name, const context &Context,
618618
const std::shared_ptr<kernel_bundle_impl> &OwnerBundle,
619619
const std::shared_ptr<device_image_impl> &Self) const {
620620
if (!(getOriginMask() & ImageOriginKernelCompiler))
@@ -768,8 +768,8 @@ class device_image_impl {
768768
std::vector<std::shared_ptr<device_image_impl>> Result;
769769
Result.reserve(NewImages.size());
770770
for (auto &[NewImage, KernelIDs] : NewImages) {
771-
std::set<std::string> KernelNames;
772-
std::unordered_map<std::string, std::string> MangledKernelNames;
771+
KernelNameSetT KernelNames;
772+
MangledKernelNameMapT MangledKernelNames;
773773
std::unordered_set<std::string> DeviceGlobalIDSet;
774774
std::vector<std::string> DeviceGlobalIDVec;
775775
std::vector<std::string> DeviceGlobalNames;
@@ -970,7 +970,7 @@ class device_image_impl {
970970
&KernelNamesStr[0], nullptr);
971971
std::vector<std::string> KernelNames =
972972
detail::split_string(KernelNamesStr, ';');
973-
std::set<std::string> KernelNameSet{KernelNames.begin(), KernelNames.end()};
973+
KernelNameSetT KernelNameSet{KernelNames.begin(), KernelNames.end()};
974974

975975
// If caching enabled and kernel not fetched from cache, cache.
976976
if (PersistentDeviceCodeCache::isEnabled() && !FetchedFromCache &&

sycl/source/detail/graph_impl.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -795,7 +795,7 @@ exec_graph_impl::enqueueNodeDirect(sycl::context Ctx,
795795
CGExec->MLine, CGExec->MColumn);
796796
auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData(
797797
StreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc,
798-
CGExec->MKernelName.c_str(), nullptr, CGExec->MNDRDesc,
798+
CGExec->MKernelName.data(), nullptr, CGExec->MNDRDesc,
799799
CGExec->MKernelBundle, CGExec->MArgs);
800800
if (CmdTraceEvent)
801801
sycl::detail::emitInstrumentationGeneral(
@@ -1352,12 +1352,12 @@ void exec_graph_impl::update(std::shared_ptr<graph_impl> GraphImpl) {
13521352
sycl::detail::CGExecKernel *TargetCGExec =
13531353
static_cast<sycl::detail::CGExecKernel *>(
13541354
MNodeStorage[i]->MCommandGroup.get());
1355-
const std::string &TargetKernelName = TargetCGExec->getKernelName();
1355+
KernelNameStrRefT TargetKernelName = TargetCGExec->getKernelName();
13561356

13571357
sycl::detail::CGExecKernel *SourceCGExec =
13581358
static_cast<sycl::detail::CGExecKernel *>(
13591359
GraphImpl->MNodeStorage[i]->MCommandGroup.get());
1360-
const std::string &SourceKernelName = SourceCGExec->getKernelName();
1360+
KernelNameStrRefT SourceKernelName = SourceCGExec->getKernelName();
13611361

13621362
if (TargetKernelName.compare(SourceKernelName) != 0) {
13631363
std::stringstream ErrorStream(

sycl/source/detail/jit_compiler.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ updatePromotedArgs(const ::jit_compiler::SYCLKernelInfo &FusedKernelInfo,
662662

663663
ur_kernel_handle_t jit_compiler::materializeSpecConstants(
664664
const QueueImplPtr &Queue, const RTDeviceBinaryImage *BinImage,
665-
const std::string &KernelName,
665+
KernelNameStrRefT KernelName,
666666
const std::vector<unsigned char> &SpecConstBlob) {
667667
#ifndef _WIN32
668668
if (!BinImage) {
@@ -712,7 +712,7 @@ ur_kernel_handle_t jit_compiler::materializeSpecConstants(
712712
::jit_compiler::option::JITTargetFeatures::set(TargetFeaturesOpt));
713713

714714
auto MaterializerResult =
715-
MaterializeSpecConstHandle(KernelName.c_str(), BinInfo, SpecConstBlob);
715+
MaterializeSpecConstHandle(KernelName.data(), BinInfo, SpecConstBlob);
716716
if (MaterializerResult.failed()) {
717717
std::string Message{"Compilation for kernel failed with message:\n"};
718718
Message.append(MaterializerResult.getErrorMessage());
@@ -802,15 +802,15 @@ jit_compiler::fuseKernels(QueueImplPtr Queue,
802802
assert(KernelCmd->isFusable());
803803
auto *KernelCG = static_cast<CGExecKernel *>(&CG);
804804

805-
auto &KernelName = KernelCG->MKernelName;
805+
KernelNameStrRefT KernelName = KernelCG->MKernelName;
806806
if (KernelName.empty()) {
807807
printPerformanceWarning(
808808
"Cannot fuse kernel with invalid kernel function name");
809809
return nullptr;
810810
}
811811

812812
auto [DeviceImage, Program] =
813-
retrieveKernelBinary(Queue, KernelName.c_str(), KernelCG);
813+
retrieveKernelBinary(Queue, KernelName.data(), KernelCG);
814814
if (!DeviceImage || !Program) {
815815
printPerformanceWarning("No suitable IR available for fusion");
816816
return nullptr;
@@ -914,7 +914,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue,
914914
SYCLTypeToIndices(CurrentNDR.GlobalOffset)};
915915

916916
Ranges.push_back(JITCompilerNDR);
917-
InputKernelInfo.emplace_back(KernelName.c_str(), ArgDescriptor,
917+
InputKernelInfo.emplace_back(KernelName.data(), ArgDescriptor,
918918
JITCompilerNDR, BinInfo);
919919

920920
// Collect information for the fused kernel

sycl/source/detail/jit_compiler.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <detail/jit_device_binaries.hpp>
1212
#include <detail/scheduler/commands.hpp>
1313
#include <detail/scheduler/scheduler.hpp>
14+
#include <sycl/detail/kernel_name_str_t.hpp>
1415
#include <sycl/feature_test.hpp>
1516
#if SYCL_EXT_JIT_ENABLE
1617
#include <KernelFusion.h>
@@ -46,7 +47,7 @@ class jit_compiler {
4647
ur_kernel_handle_t
4748
materializeSpecConstants(const QueueImplPtr &Queue,
4849
const RTDeviceBinaryImage *BinImage,
49-
const std::string &KernelName,
50+
KernelNameStrRefT KernelName,
5051
const std::vector<unsigned char> &SpecConstBlob);
5152

5253
std::pair<sycl_device_binaries, std::string> compileSYCL(

sycl/source/detail/kernel_bundle_impl.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <sycl/backend_types.hpp>
1515
#include <sycl/context.hpp>
1616
#include <sycl/detail/common.hpp>
17+
#include <sycl/detail/kernel_name_str_t.hpp>
1718
#include <sycl/device.hpp>
1819
#include <sycl/kernel_bundle.hpp>
1920

@@ -758,7 +759,7 @@ class kernel_bundle_impl {
758759
}
759760

760761
std::shared_ptr<kernel_impl>
761-
tryGetKernel(const std::string &Name,
762+
tryGetKernel(detail::KernelNameStrRefT Name,
762763
const std::shared_ptr<kernel_bundle_impl> &Self) const {
763764
// TODO: For source-based kernels, it may be faster to keep a map between
764765
// {kernel_name, device} and their corresponding image.

sycl/source/detail/kernel_id_impl.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
#pragma once
1010

11+
#include <sycl/detail/kernel_name_str_t.hpp>
12+
1113
namespace sycl {
1214
inline namespace _V1 {
1315
namespace detail {
@@ -31,14 +33,12 @@ struct EqualByNameComp {
3133
// identificator
3234
class kernel_id_impl {
3335
public:
34-
kernel_id_impl(std::string Name) : MName(std::move(Name)) {}
36+
kernel_id_impl(KernelNameStrT Name) : MName(std::move(Name)) {}
3537
kernel_id_impl(){};
3638
const char *get_name() { return MName.data(); }
3739

38-
const std::string &get_name_string() { return MName; }
39-
4040
private:
41-
std::string MName;
41+
KernelNameStrT MName;
4242
};
4343

4444
} // namespace detail

0 commit comments

Comments
 (0)