Skip to content

[SYCL] Clean up excessive kernel name string copying #17340

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions sycl/include/sycl/detail/kernel_name_str_t.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//==---------- kernel_name_str_t.hpp ----- Kernel name type aliases --------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <sycl/detail/string.hpp>
#include <sycl/detail/string_view.hpp>

namespace sycl {
inline namespace _V1 {
namespace detail {

#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
using KernelNameStrT = std::string_view;
using KernelNameStrRefT = std::string_view;
using ABINeutralKernelNameStrT = detail::string_view;
#else
using KernelNameStrT = std::string;
using KernelNameStrRefT = const std::string &;
using ABINeutralKernelNameStrT = detail::string;
#endif

} // namespace detail
} // namespace _V1
} // namespace sycl
5 changes: 3 additions & 2 deletions sycl/include/sycl/detail/string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#pragma once

#include <cstring>
#include <string>

#pragma once

namespace sycl {
inline namespace _V1 {
namespace detail {
Expand Down Expand Up @@ -58,6 +57,8 @@ class string {
}

const char *c_str() const noexcept { return str ? str : ""; }
const char *data() const noexcept { return c_str(); }
bool empty() { return str ? str[0] : false; }

friend bool operator==(const string &lhs, std::string_view rhs) noexcept {
return rhs == lhs.c_str();
Expand Down
13 changes: 10 additions & 3 deletions sycl/include/sycl/detail/string_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#pragma once

#include <string>
#include <sycl/detail/string.hpp>

#pragma once
#include <string>

namespace sycl {
inline namespace _V1 {
Expand All @@ -26,6 +27,7 @@ class string_view {
string_view(const string_view &strn) noexcept = default;
string_view(string_view &&strn) noexcept = default;
string_view(std::string_view strn) noexcept : str(strn.data()) {}
string_view(const sycl::detail::string &strn) noexcept : str(strn.c_str()) {}

string_view &operator=(string_view &&strn) noexcept = default;
string_view &operator=(const string_view &strn) noexcept = default;
Expand All @@ -35,7 +37,12 @@ class string_view {
return *this;
}

const char *data() const noexcept { return str; }
string_view &operator=(const sycl::detail::string &strn) noexcept {
str = strn.c_str();
return *this;
}

const char *data() const noexcept { return str ? str : ""; }

friend bool operator==(string_view lhs, std::string_view rhs) noexcept {
return rhs == lhs.data();
Expand Down
7 changes: 4 additions & 3 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <sycl/detail/id_queries_fit_in_int.hpp>
#include <sycl/detail/impl_utils.hpp>
#include <sycl/detail/kernel_desc.hpp>
#include <sycl/detail/kernel_name_str_t.hpp>
#include <sycl/detail/reduction_forward.hpp>
#include <sycl/detail/string.hpp>
#include <sycl/detail/string_view.hpp>
Expand Down Expand Up @@ -504,7 +505,7 @@ class __SYCL_EXPORT handler {
bool IsKernelCreatedFromSource, bool IsESIMD);

/// \return a string containing name of SYCL kernel.
detail::string getKernelName();
detail::ABINeutralKernelNameStrT getKernelName();

template <typename LambdaNameT> bool lambdaAndKernelHaveEqualName() {
// TODO It is unclear a kernel and a lambda/functor must to be equal or not
Expand All @@ -514,7 +515,7 @@ class __SYCL_EXPORT handler {
// values of arguments for the kernel.
assert(MKernel && "MKernel is not initialized");
const std::string LambdaName = detail::getKernelName<LambdaNameT>();
detail::string KernelName = getKernelName();
detail::ABINeutralKernelNameStrT KernelName = getKernelName();
return KernelName == LambdaName;
}

Expand Down Expand Up @@ -3418,7 +3419,7 @@ class __SYCL_EXPORT handler {
std::shared_ptr<detail::queue_impl> MQueue;
std::vector<detail::LocalAccessorImplPtr> MLocalAccStorage;
std::vector<std::shared_ptr<detail::stream_impl>> MStreamStorage;
detail::string MKernelName;
detail::ABINeutralKernelNameStrT MKernelName;
/// Storage for a sycl::kernel object.
std::shared_ptr<detail::kernel_impl> MKernel;
/// Pointer to the source host memory or accessor(depending on command type).
Expand Down
6 changes: 3 additions & 3 deletions sycl/source/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ class CGExecKernel : public CG {
std::shared_ptr<detail::kernel_impl> MSyclKernel;
std::shared_ptr<detail::kernel_bundle_impl> MKernelBundle;
std::vector<ArgDesc> MArgs;
std::string MKernelName;
KernelNameStrT MKernelName;
std::vector<std::shared_ptr<detail::stream_impl>> MStreams;
std::vector<std::shared_ptr<const void>> MAuxiliaryResources;
/// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list
Expand All @@ -271,7 +271,7 @@ class CGExecKernel : public CG {
std::shared_ptr<detail::kernel_impl> SyclKernel,
std::shared_ptr<detail::kernel_bundle_impl> KernelBundle,
CG::StorageInitHelper CGData, std::vector<ArgDesc> Args,
std::string KernelName,
KernelNameStrT KernelName,
std::vector<std::shared_ptr<detail::stream_impl>> Streams,
std::vector<std::shared_ptr<const void>> AuxiliaryResources,
CGType Type, ur_kernel_cache_config_t KernelCacheConfig,
Expand All @@ -293,7 +293,7 @@ class CGExecKernel : public CG {
CGExecKernel(const CGExecKernel &CGExec) = default;

const std::vector<ArgDesc> &getArguments() const { return MArgs; }
const std::string &getKernelName() const { return MKernelName; }
KernelNameStrRefT getKernelName() const { return MKernelName; }
const std::vector<std::shared_ptr<detail::stream_impl>> &getStreams() const {
return MStreams;
}
Expand Down
38 changes: 19 additions & 19 deletions sycl/source/detail/device_image_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ class ManagedDeviceBinaries {
sycl_device_binaries MBinaries;
};

using MangledKernelNameMapT = std::map<std::string, std::string, std::less<>>;
Copy link
Contributor Author

@sergey-semenov sergey-semenov Apr 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to highlight this part to the reviewers: this container used to be a std::unordered_map, but those don't support heterogenous lookup in C++17.

An alternative to this can be using a separate container for storing strings while using this one to work with string views. This would require special handling when merging multiple images, so I'm leaning towards leaving that as a potential future optimization, but let me know if you feel otherwise.

using KernelNameSetT = std::set<std::string, std::less<>>;

// Information unique to images compiled at runtime through the
// ext_oneapi_kernel_compiler extension.
struct KernelCompilerBinaryInfo {
Expand All @@ -152,13 +155,12 @@ struct KernelCompilerBinaryInfo {
: MLanguage{Lang}, MIncludePairs{std::move(IncludePairsVec)} {}

KernelCompilerBinaryInfo(syclex::source_language Lang,
std::set<std::string> &&KernelNames)
KernelNameSetT &&KernelNames)
: MLanguage{Lang}, MKernelNames{std::move(KernelNames)} {}

KernelCompilerBinaryInfo(
syclex::source_language Lang, std::set<std::string> &&KernelNames,
std::unordered_map<std::string, std::string> &&MangledKernelNames,
std::string &&Prefix,
syclex::source_language Lang, KernelNameSetT &&KernelNames,
MangledKernelNameMapT &&MangledKernelNames, std::string &&Prefix,
std::shared_ptr<ManagedDeviceGlobalsRegistry> &&DeviceGlobalRegistry)
: MLanguage{Lang}, MKernelNames{std::move(KernelNames)},
MMangledKernelNames{std::move(MangledKernelNames)},
Expand Down Expand Up @@ -221,8 +223,8 @@ struct KernelCompilerBinaryInfo {
}

syclex::source_language MLanguage;
std::set<std::string> MKernelNames;
std::unordered_map<std::string, std::string> MMangledKernelNames;
KernelNameSetT MKernelNames;
MangledKernelNameMapT MMangledKernelNames;
std::string MPrefix;
include_pairs_t MIncludePairs;
std::vector<std::shared_ptr<ManagedDeviceGlobalsRegistry>>
Expand Down Expand Up @@ -278,7 +280,7 @@ class device_image_impl {
device_image_impl(const RTDeviceBinaryImage *BinImage, const context &Context,
const std::vector<device> &Devices, bundle_state State,
ur_program_handle_t Program, syclex::source_language Lang,
std::set<std::string> &&KernelNames)
KernelNameSetT &&KernelNames)
: MBinImage(BinImage), MContext(std::move(Context)),
MDevices(std::move(Devices)), MState(State), MProgram(Program),
MKernelIDs(std::make_shared<std::vector<kernel_id>>()),
Expand All @@ -292,9 +294,8 @@ class device_image_impl {
const RTDeviceBinaryImage *BinImage, const context &Context,
const std::vector<device> &Devices, bundle_state State,
std::shared_ptr<std::vector<kernel_id>> &&KernelIDs,
syclex::source_language Lang, std::set<std::string> &&KernelNames,
std::unordered_map<std::string, std::string> &&MangledKernelNames,
std::string &&Prefix,
syclex::source_language Lang, KernelNameSetT &&KernelNames,
MangledKernelNameMapT &&MangledKernelNames, std::string &&Prefix,
std::shared_ptr<ManagedDeviceGlobalsRegistry> &&DeviceGlobalRegistry)
: MBinImage(BinImage), MContext(std::move(Context)),
MDevices(std::move(Devices)), MState(State), MProgram(nullptr),
Expand Down Expand Up @@ -337,8 +338,7 @@ class device_image_impl {

device_image_impl(const context &Context, const std::vector<device> &Devices,
bundle_state State, ur_program_handle_t Program,
syclex::source_language Lang,
std::set<std::string> &&KernelNames)
syclex::source_language Lang, KernelNameSetT &&KernelNames)
: MBinImage(static_cast<const RTDeviceBinaryImage *>(nullptr)),
MContext(std::move(Context)), MDevices(std::move(Devices)),
MState(State), MProgram(Program),
Expand Down Expand Up @@ -594,17 +594,17 @@ class device_image_impl {
}
}

std::string adjustKernelName(const std::string &Name) const {
std::string adjustKernelName(std::string_view Name) const {
if (!MRTCBinInfo.has_value())
return Name;
return Name.data();

if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) {
auto It = MRTCBinInfo->MMangledKernelNames.find(Name);
if (It != MRTCBinInfo->MMangledKernelNames.end())
return It->second;
}

return Name;
return Name.data();
}

bool hasKernelName(const std::string &Name) const {
Expand All @@ -614,7 +614,7 @@ class device_image_impl {
}

std::shared_ptr<kernel_impl> tryGetSourceBasedKernel(
const std::string &Name, const context &Context,
std::string_view Name, const context &Context,
const std::shared_ptr<kernel_bundle_impl> &OwnerBundle,
const std::shared_ptr<device_image_impl> &Self) const {
if (!(getOriginMask() & ImageOriginKernelCompiler))
Expand Down Expand Up @@ -768,8 +768,8 @@ class device_image_impl {
std::vector<std::shared_ptr<device_image_impl>> Result;
Result.reserve(NewImages.size());
for (auto &[NewImage, KernelIDs] : NewImages) {
std::set<std::string> KernelNames;
std::unordered_map<std::string, std::string> MangledKernelNames;
KernelNameSetT KernelNames;
MangledKernelNameMapT MangledKernelNames;
std::unordered_set<std::string> DeviceGlobalIDSet;
std::vector<std::string> DeviceGlobalIDVec;
std::vector<std::string> DeviceGlobalNames;
Expand Down Expand Up @@ -970,7 +970,7 @@ class device_image_impl {
&KernelNamesStr[0], nullptr);
std::vector<std::string> KernelNames =
detail::split_string(KernelNamesStr, ';');
std::set<std::string> KernelNameSet{KernelNames.begin(), KernelNames.end()};
KernelNameSetT KernelNameSet{KernelNames.begin(), KernelNames.end()};

// If caching enabled and kernel not fetched from cache, cache.
if (PersistentDeviceCodeCache::isEnabled() && !FetchedFromCache &&
Expand Down
6 changes: 3 additions & 3 deletions sycl/source/detail/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,7 @@ exec_graph_impl::enqueueNodeDirect(sycl::context Ctx,
CGExec->MLine, CGExec->MColumn);
auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData(
StreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc,
CGExec->MKernelName.c_str(), nullptr, CGExec->MNDRDesc,
CGExec->MKernelName.data(), nullptr, CGExec->MNDRDesc,
CGExec->MKernelBundle, CGExec->MArgs);
if (CmdTraceEvent)
sycl::detail::emitInstrumentationGeneral(
Expand Down Expand Up @@ -1352,12 +1352,12 @@ void exec_graph_impl::update(std::shared_ptr<graph_impl> GraphImpl) {
sycl::detail::CGExecKernel *TargetCGExec =
static_cast<sycl::detail::CGExecKernel *>(
MNodeStorage[i]->MCommandGroup.get());
const std::string &TargetKernelName = TargetCGExec->getKernelName();
KernelNameStrRefT TargetKernelName = TargetCGExec->getKernelName();

sycl::detail::CGExecKernel *SourceCGExec =
static_cast<sycl::detail::CGExecKernel *>(
GraphImpl->MNodeStorage[i]->MCommandGroup.get());
const std::string &SourceKernelName = SourceCGExec->getKernelName();
KernelNameStrRefT SourceKernelName = SourceCGExec->getKernelName();

if (TargetKernelName.compare(SourceKernelName) != 0) {
std::stringstream ErrorStream(
Expand Down
10 changes: 5 additions & 5 deletions sycl/source/detail/jit_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ updatePromotedArgs(const ::jit_compiler::SYCLKernelInfo &FusedKernelInfo,

ur_kernel_handle_t jit_compiler::materializeSpecConstants(
const QueueImplPtr &Queue, const RTDeviceBinaryImage *BinImage,
const std::string &KernelName,
KernelNameStrRefT KernelName,
const std::vector<unsigned char> &SpecConstBlob) {
#ifndef _WIN32
if (!BinImage) {
Expand Down Expand Up @@ -712,7 +712,7 @@ ur_kernel_handle_t jit_compiler::materializeSpecConstants(
::jit_compiler::option::JITTargetFeatures::set(TargetFeaturesOpt));

auto MaterializerResult =
MaterializeSpecConstHandle(KernelName.c_str(), BinInfo, SpecConstBlob);
MaterializeSpecConstHandle(KernelName.data(), BinInfo, SpecConstBlob);
if (MaterializerResult.failed()) {
std::string Message{"Compilation for kernel failed with message:\n"};
Message.append(MaterializerResult.getErrorMessage());
Expand Down Expand Up @@ -802,15 +802,15 @@ jit_compiler::fuseKernels(QueueImplPtr Queue,
assert(KernelCmd->isFusable());
auto *KernelCG = static_cast<CGExecKernel *>(&CG);

auto &KernelName = KernelCG->MKernelName;
KernelNameStrRefT KernelName = KernelCG->MKernelName;
if (KernelName.empty()) {
printPerformanceWarning(
"Cannot fuse kernel with invalid kernel function name");
return nullptr;
}

auto [DeviceImage, Program] =
retrieveKernelBinary(Queue, KernelName.c_str(), KernelCG);
retrieveKernelBinary(Queue, KernelName.data(), KernelCG);
if (!DeviceImage || !Program) {
printPerformanceWarning("No suitable IR available for fusion");
return nullptr;
Expand Down Expand Up @@ -914,7 +914,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue,
SYCLTypeToIndices(CurrentNDR.GlobalOffset)};

Ranges.push_back(JITCompilerNDR);
InputKernelInfo.emplace_back(KernelName.c_str(), ArgDescriptor,
InputKernelInfo.emplace_back(KernelName.data(), ArgDescriptor,
JITCompilerNDR, BinInfo);

// Collect information for the fused kernel
Expand Down
3 changes: 2 additions & 1 deletion sycl/source/detail/jit_compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <detail/jit_device_binaries.hpp>
#include <detail/scheduler/commands.hpp>
#include <detail/scheduler/scheduler.hpp>
#include <sycl/detail/kernel_name_str_t.hpp>
#include <sycl/feature_test.hpp>
#if SYCL_EXT_JIT_ENABLE
#include <KernelFusion.h>
Expand Down Expand Up @@ -46,7 +47,7 @@ class jit_compiler {
ur_kernel_handle_t
materializeSpecConstants(const QueueImplPtr &Queue,
const RTDeviceBinaryImage *BinImage,
const std::string &KernelName,
KernelNameStrRefT KernelName,
const std::vector<unsigned char> &SpecConstBlob);

std::pair<sycl_device_binaries, std::string> compileSYCL(
Expand Down
3 changes: 2 additions & 1 deletion sycl/source/detail/kernel_bundle_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <sycl/backend_types.hpp>
#include <sycl/context.hpp>
#include <sycl/detail/common.hpp>
#include <sycl/detail/kernel_name_str_t.hpp>
#include <sycl/device.hpp>
#include <sycl/kernel_bundle.hpp>

Expand Down Expand Up @@ -758,7 +759,7 @@ class kernel_bundle_impl {
}

std::shared_ptr<kernel_impl>
tryGetKernel(const std::string &Name,
tryGetKernel(detail::KernelNameStrRefT Name,
const std::shared_ptr<kernel_bundle_impl> &Self) const {
// TODO: For source-based kernels, it may be faster to keep a map between
// {kernel_name, device} and their corresponding image.
Expand Down
8 changes: 4 additions & 4 deletions sycl/source/detail/kernel_id_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#pragma once

#include <sycl/detail/kernel_name_str_t.hpp>

namespace sycl {
inline namespace _V1 {
namespace detail {
Expand All @@ -31,14 +33,12 @@ struct EqualByNameComp {
// identificator
class kernel_id_impl {
public:
kernel_id_impl(std::string Name) : MName(std::move(Name)) {}
kernel_id_impl(KernelNameStrT Name) : MName(std::move(Name)) {}
kernel_id_impl(){};
const char *get_name() { return MName.data(); }

const std::string &get_name_string() { return MName; }

private:
std::string MName;
KernelNameStrT MName;
};

} // namespace detail
Expand Down
Loading