Skip to content

[SYCL] Make SYCL RT compatible with the new offload entry type #17109

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Mar 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clang/test/Driver/sycl-linker-wrapper-image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ int main() {

// CHECK-DAG: %_pi_device_binary_property_struct = type { ptr, ptr, i32, i64 }
// CHECK-DAG: %_pi_device_binary_property_set_struct = type { ptr, ptr, ptr }
// CHECK-DAG: %struct.__tgt_offload_entry = type { ptr, ptr, i64, i32, i32 }
// CHECK-DAG: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr }
// CHECK-DAG: %__sycl.tgt_device_image = type { i16, i8, i8, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
// CHECK-DAG: %__sycl.tgt_bin_desc = type { i16, i16, ptr, ptr, ptr }

Expand All @@ -55,7 +55,7 @@ int main() {
// CHECK-DAG: @__sycl_offload_prop_sets_arr.5 = internal constant [3 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.3, ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.4, ptr null, ptr null }]
// CHECK-DAG: @.sycl_offloading.0.data = internal unnamed_addr constant [912 x i8]
// CHECK-DAG: @__sycl_offload_entry_name = internal unnamed_addr constant [25 x i8] c"_ZTSZ4mainE11fake_kernel\00"
// CHECK-DAG: @__sycl_offload_entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { ptr null, ptr @__sycl_offload_entry_name, i64 0, i32 0, i32 0 }]
// CHECK-DAG: @__sycl_offload_entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr null, ptr @__sycl_offload_entry_name, i64 0, i64 0, ptr null }]
// CHECK-DAG: @.sycl_offloading.0.info = internal local_unnamed_addr constant [2 x i64] [i64 ptrtoint (ptr @.sycl_offloading.0.data to i64), i64 912], section ".tgtimg", align 16
// CHECK-DAG: @llvm.used = appending global [1 x ptr] [ptr @.sycl_offloading.0.info], section "llvm.metadata"
// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 2, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr null, ptr null, ptr @.sycl_offloading.0.data, ptr getelementptr ([912 x i8], ptr @.sycl_offloading.0.data, i64 0, i64 912), ptr @__sycl_offload_entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 0, i64 1), ptr @__sycl_offload_prop_sets_arr.5, ptr getelementptr ([3 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.5, i64 0, i64 3) }]
Expand Down
34 changes: 16 additions & 18 deletions llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,6 @@ int8_t binaryImageFormatToInt8(SYCLBinaryImageFormat Format) {
}
}

StructType* getLegacyOffloadEntryTy(Module &M) {
LLVMContext &C = M.getContext();
StructType *EntryTy =
StructType::getTypeByName(C, "struct.__tgt_offload_entry");
if (!EntryTy)
EntryTy = StructType::create(
"struct.__tgt_offload_entry", PointerType::getUnqual(C),
PointerType::getUnqual(C), M.getDataLayout().getIntPtrType(C),
Type::getInt32Ty(C), Type::getInt32Ty(C));
return EntryTy;
}

/// Wrapper helper class that creates all LLVM IRs wrapping given images.
/// Note: All created structures, "_pi_device_*", "__sycl_*" and "__tgt*" names
/// in this implementation are aligned with "sycl/include/sycl/detail/pi.h".
Expand All @@ -95,7 +83,7 @@ struct Wrapper {

SyclPropTy = getSyclPropTy();
SyclPropSetTy = getSyclPropSetTy();
EntryTy = getLegacyOffloadEntryTy(M);
EntryTy = offloading::getEntryTy(M);
SyclDeviceImageTy = getSyclDeviceImageTy();
SyclBinDescTy = getSyclBinDescTy();
}
Expand Down Expand Up @@ -399,16 +387,26 @@ struct Wrapper {
return std::pair<Constant *, Constant *>(NullPtr, NullPtr);
}

auto *Zero = ConstantInt::get(getSizeTTy(), 0);
auto *I64Zero = ConstantInt::get(Type::getInt64Ty(C), 0);
auto *I32Zero = ConstantInt::get(Type::getInt32Ty(C), 0);
auto *NullPtr = Constant::getNullValue(PointerType::getUnqual(C));

SmallVector<Constant *> EntriesInits;
std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(Entries);
for (line_iterator LI(*MB); !LI.is_at_eof(); ++LI)
EntriesInits.push_back(ConstantStruct::get(
EntryTy, NullPtr, addStringToModule(*LI, "__sycl_offload_entry_name"),
Zero, I32Zero, I32Zero));
for (line_iterator LI(*MB); !LI.is_at_eof(); ++LI) {
Constant *EntryData[] = {
ConstantExpr::getNullValue(Type::getInt64Ty(C)),
ConstantInt::get(Type::getInt16Ty(C), 1),
ConstantInt::get(Type::getInt16Ty(C), object::OffloadKind::OFK_SYCL),
I32Zero,
NullPtr,
addStringToModule(*LI, "__sycl_offload_entry_name"),
I64Zero,
I64Zero,
NullPtr};

EntriesInits.push_back(ConstantStruct::get(EntryTy, EntryData));
}

auto *Arr = ConstantArray::get(ArrayType::get(EntryTy, EntriesInits.size()),
EntriesInits);
Expand Down
57 changes: 57 additions & 0 deletions sycl/source/detail/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,70 @@

#define __SYCL_PROGRAM_METADATA_TAG_NEED_FINALIZATION "Requires finalization"

// New entry type after
// https://github.com/llvm/llvm-project/pull/124018
// This is a replica of the EntryTy data structure in
// llvm/include/llvm/Frontend/Offloading/Utility.h.
struct _sycl_offload_entry_struct_new {
/// Reserved bytes used to detect an older version of the struct, always zero.
uint64_t Reserved;
/// The current version of the struct for runtime forward compatibility.
uint16_t Version;
/// The expected consumer of this entry, e.g. CUDA or OpenMP.
uint16_t Kind;
/// Flags associated with the global.
uint32_t Flags;
/// The address of the global to be registered by the runtime.
void *Address;
/// The name of the symbol in the device image.
char *SymbolName;
/// The number of bytes the symbol takes.
uint64_t Size;
/// Extra generic data used to register this entry.
uint64_t Data;
/// An extra pointer, usually null.
void *AuxAddr;
};
using sycl_offload_entry_new = _sycl_offload_entry_struct_new *;

// Entry type, matches OpenMP for compatibility
struct _sycl_offload_entry_struct {
void *addr;
char *name;
size_t size;
int32_t flags;
int32_t reserved;

inline bool IsNewOffloadEntryType() {
// Assume this is the new version of the struct.
auto newStruct = reinterpret_cast<sycl_offload_entry_new>(this);

// Check if first 64 bits is equal to 0, next 16 bits is equal to 1, next 16
// bits is equal to 4 (OK_SYCL), and check if Flags are zero. If all these
// conditions are met, then this is a newer version of the struct.
// We can not just rely on checking the first 64 bits, because even for the
// older version of the struct, the first 64 bits (void* addr) are zero.
return newStruct->Reserved == 0 && newStruct->Version == 1 &&
newStruct->Kind == 4 && newStruct->Flags == 0;
}

// Name is the only field that's used in SYCL.
inline char *GetName() {
if (IsNewOffloadEntryType())
return reinterpret_cast<sycl_offload_entry_new>(this)->SymbolName;

return name;
}

// Increment the pointer to the next entry. A mix of old and new offload entry
// types is not supported.
inline _sycl_offload_entry_struct *Increment() {
if (IsNewOffloadEntryType())
return reinterpret_cast<_sycl_offload_entry_struct *>(
reinterpret_cast<sycl_offload_entry_new>(this) + 1);

return this + 1;
}
};
using sycl_offload_entry = _sycl_offload_entry_struct *;

Expand Down
5 changes: 3 additions & 2 deletions sycl/source/detail/device_binary_image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,10 @@ void RTDeviceBinaryImage::print() const {
std::cerr << " Link options : "
<< (Bin->LinkOptions ? Bin->LinkOptions : "NULL") << "\n";
std::cerr << " Entries : ";

for (sycl_offload_entry EntriesIt = Bin->EntriesBegin;
EntriesIt != Bin->EntriesEnd; ++EntriesIt)
std::cerr << EntriesIt->name << " ";
EntriesIt != Bin->EntriesEnd; EntriesIt = EntriesIt->Increment())
std::cerr << EntriesIt->GetName() << " ";
std::cerr << "\n";
std::cerr << " Properties [" << Bin->PropertySetsBegin << "-"
<< Bin->PropertySetsEnd << "]:\n";
Expand Down
4 changes: 2 additions & 2 deletions sycl/source/detail/persistent_device_code_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ getSortedImages(const std::vector<const RTDeviceBinaryImage *> &Imgs) {
[](const RTDeviceBinaryImage *A, const RTDeviceBinaryImage *B) {
// All entry names are unique among these images, so comparing the
// first ones is enough.
return std::strcmp(A->getRawData().EntriesBegin->name,
B->getRawData().EntriesBegin->name) < 0;
return std::strcmp(A->getRawData().EntriesBegin->GetName(),
B->getRawData().EntriesBegin->GetName()) < 0;
});
return SortedImgs;
}
Expand Down
31 changes: 16 additions & 15 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1867,33 +1867,34 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) {
m_BinImg2KernelIDs[Img.get()].reset(new std::vector<kernel_id>);

for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will the iterator work when the input binary is using legacy entry format? sizeof entries are different.

++EntriesIt) {
EntriesIt = EntriesIt->Increment()) {

auto name = EntriesIt->GetName();

// Skip creating unique kernel ID if it is a service kernel.
// SYCL service kernels are identified by having
// __sycl_service_kernel__ in the mangled name, primarily as part of
// the namespace of the name type.
if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) {
m_ServiceKernels.insert(std::make_pair(EntriesIt->name, Img.get()));
if (std::strstr(name, "__sycl_service_kernel__")) {
m_ServiceKernels.insert(std::make_pair(name, Img.get()));
continue;
}

// Skip creating unique kernel ID if it is an exported device
// function. Exported device functions appear in the offload entries
// among kernels, but are identifiable by being listed in properties.
if (m_ExportedSymbolImages.find(EntriesIt->name) !=
m_ExportedSymbolImages.end())
if (m_ExportedSymbolImages.find(name) != m_ExportedSymbolImages.end())
continue;

// ... and create a unique kernel ID for the entry
auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
auto It = m_KernelName2KernelIDs.find(name);
if (It == m_KernelName2KernelIDs.end()) {
std::shared_ptr<detail::kernel_id_impl> KernelIDImpl =
std::make_shared<detail::kernel_id_impl>(EntriesIt->name);
std::make_shared<detail::kernel_id_impl>(name);
sycl::kernel_id KernelID =
detail::createSyclObjFromImpl<sycl::kernel_id>(KernelIDImpl);

It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name, KernelID);
It = m_KernelName2KernelIDs.emplace_hint(It, name, KernelID);
}
m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
m_BinImg2KernelIDs[Img.get()]->push_back(It->second);
Expand Down Expand Up @@ -2020,25 +2021,25 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) {

// Unmap the unique kernel IDs for the offload entries
for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE;
++EntriesIt) {
EntriesIt = EntriesIt->Increment()) {

// Drop entry for service kernel
if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) {
m_ServiceKernels.erase(EntriesIt->name);
if (std::strstr(EntriesIt->GetName(), "__sycl_service_kernel__")) {
m_ServiceKernels.erase(EntriesIt->GetName());
continue;
}

// Exported device functions won't have a kernel ID
if (m_ExportedSymbolImages.find(EntriesIt->name) !=
if (m_ExportedSymbolImages.find(EntriesIt->GetName()) !=
m_ExportedSymbolImages.end()) {
continue;
}

// remove everything associated with this KernelName
m_KernelUsesAssert.erase(EntriesIt->name);
m_KernelImplicitLocalArgPos.erase(EntriesIt->name);
m_KernelUsesAssert.erase(EntriesIt->GetName());
m_KernelImplicitLocalArgPos.erase(EntriesIt->GetName());

if (auto It = m_KernelName2KernelIDs.find(EntriesIt->name);
if (auto It = m_KernelName2KernelIDs.find(EntriesIt->GetName());
It != m_KernelName2KernelIDs.end()) {
m_KernelName2KernelIDs.erase(It);
m_KernelIDs2BinImage.erase(It->second);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -336,8 +336,8 @@ TEST_P(PersistentDeviceCodeCache, MultipleImages) {
std::sort(Imgs.begin(), Imgs.end(),
[](const detail::RTDeviceBinaryImage *A,
const detail::RTDeviceBinaryImage *B) {
return std::strcmp(A->getRawData().EntriesBegin->name,
B->getRawData().EntriesBegin->name) < 0;
return std::strcmp(A->getRawData().EntriesBegin->GetName(),
B->getRawData().EntriesBegin->GetName()) < 0;
});
std::string ItemDir = detail::PersistentDeviceCodeCache::getCacheItemPath(
Dev, Imgs, {}, BuildOptions);
Expand Down
Loading