Skip to content

[OffloadBundler] Expose function to parse compressed bundle headers #130284

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 10 additions & 40 deletions clang/include/clang/Driver/OffloadBundler.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,50 +107,20 @@ struct OffloadTargetInfo {
// - Compressed Data (variable length).
class CompressedOffloadBundle {
private:
static inline const size_t MagicSize = 4;
static inline const size_t VersionFieldSize = sizeof(uint16_t);
static inline const size_t MethodFieldSize = sizeof(uint16_t);
// Legacy size fields for V1/V2
static inline const size_t FileSizeFieldSizeV2 = sizeof(uint32_t);
static inline const size_t UncompressedSizeFieldSizeV2 = sizeof(uint32_t);
// New size fields for V3
static inline const size_t FileSizeFieldSizeV3 = sizeof(uint64_t);
static inline const size_t UncompressedSizeFieldSizeV3 = sizeof(uint64_t);
static inline const size_t HashFieldSize = sizeof(uint64_t);

// Keep V1 header size for backward compatibility
static inline const size_t V1HeaderSize =
MagicSize + VersionFieldSize + MethodFieldSize +
UncompressedSizeFieldSizeV2 + HashFieldSize;

// Keep V2 header size for backward compatibility
static inline const size_t V2HeaderSize =
MagicSize + VersionFieldSize + FileSizeFieldSizeV2 + MethodFieldSize +
UncompressedSizeFieldSizeV2 + HashFieldSize;

// Add V3 header size with 64-bit fields
static inline const size_t V3HeaderSize =
MagicSize + VersionFieldSize + FileSizeFieldSizeV3 + MethodFieldSize +
UncompressedSizeFieldSizeV3 + HashFieldSize;

static inline const llvm::StringRef MagicNumber = "CCOB";

public:
static inline const uint16_t DefaultVersion = 2;
struct CompressedBundleHeader {
unsigned Version;
llvm::compression::Format CompressionFormat;
std::optional<size_t> FileSize;
size_t UncompressedFileSize;
uint64_t Hash;

// Helper method to get header size based on version
static size_t getHeaderSize(uint16_t Version) {
switch (Version) {
case 1:
return V1HeaderSize;
case 2:
return V2HeaderSize;
case 3:
return V3HeaderSize;
default:
llvm_unreachable("Unsupported version");
}
}
static llvm::Expected<CompressedBundleHeader> tryParse(llvm::StringRef);
};

static inline const uint16_t DefaultVersion = 2;

static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
Expand Down
178 changes: 117 additions & 61 deletions clang/lib/Driver/OffloadBundler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
Expand Down Expand Up @@ -1121,13 +1122,116 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
}

// Use packed structs to avoid padding, such that the structs map the serialized
// format.
LLVM_PACKED_START
union RawCompressedBundleHeader {
struct CommonFields {
uint32_t Magic;
uint16_t Version;
uint16_t Method;
};

struct V1Header {
CommonFields Common;
uint32_t UncompressedFileSize;
uint64_t Hash;
};

struct V2Header {
CommonFields Common;
uint32_t FileSize;
uint32_t UncompressedFileSize;
uint64_t Hash;
};

struct V3Header {
CommonFields Common;
uint64_t FileSize;
uint64_t UncompressedFileSize;
uint64_t Hash;
};

CommonFields Common;
V1Header V1;
V2Header V2;
V3Header V3;
};
LLVM_PACKED_END

// Helper method to get header size based on version
static size_t getHeaderSize(uint16_t Version) {
switch (Version) {
case 1:
return sizeof(RawCompressedBundleHeader::V1Header);
case 2:
return sizeof(RawCompressedBundleHeader::V2Header);
case 3:
return sizeof(RawCompressedBundleHeader::V3Header);
default:
llvm_unreachable("Unsupported version");
}
}

Expected<CompressedOffloadBundle::CompressedBundleHeader>
CompressedOffloadBundle::CompressedBundleHeader::tryParse(StringRef Blob) {
assert(Blob.size() >= sizeof(RawCompressedBundleHeader::CommonFields));
assert(llvm::identify_magic(Blob) ==
llvm::file_magic::offload_bundle_compressed);

RawCompressedBundleHeader Header;
memcpy(&Header, Blob.data(), std::min(Blob.size(), sizeof(Header)));

CompressedBundleHeader Normalized;
Normalized.Version = Header.Common.Version;

size_t RequiredSize = getHeaderSize(Normalized.Version);
if (Blob.size() < RequiredSize)
return createStringError(inconvertibleErrorCode(),
"Compressed bundle header size too small");

switch (Normalized.Version) {
case 1:
Normalized.UncompressedFileSize = Header.V1.UncompressedFileSize;
Normalized.Hash = Header.V1.Hash;
break;
case 2:
Normalized.FileSize = Header.V2.FileSize;
Normalized.UncompressedFileSize = Header.V2.UncompressedFileSize;
Normalized.Hash = Header.V2.Hash;
break;
case 3:
Normalized.FileSize = Header.V3.FileSize;
Normalized.UncompressedFileSize = Header.V3.UncompressedFileSize;
Normalized.Hash = Header.V3.Hash;
break;
default:
return createStringError(inconvertibleErrorCode(),
"Unknown compressed bundle version");
}

// Determine compression format
switch (Header.Common.Method) {
case static_cast<uint16_t>(compression::Format::Zlib):
case static_cast<uint16_t>(compression::Format::Zstd):
Normalized.CompressionFormat =
static_cast<compression::Format>(Header.Common.Method);
break;
default:
return createStringError(inconvertibleErrorCode(),
"Unknown compressing method");
}

return Normalized;
}

llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
bool Verbose) {
StringRef Blob = Input.getBuffer();

// Check minimum header size (using V1 as it's the smallest)
if (Blob.size() < V1HeaderSize)
if (Blob.size() < sizeof(RawCompressedBundleHeader::CommonFields))
return llvm::MemoryBuffer::getMemBufferCopy(Blob);

if (llvm::identify_magic(Blob) !=
Expand All @@ -1137,76 +1241,28 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
}

size_t CurrentOffset = MagicSize;

// Read version
uint16_t ThisVersion;
memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
CurrentOffset += VersionFieldSize;

// Verify header size based on version
if (ThisVersion >= 2 && ThisVersion <= 3) {
size_t RequiredSize = (ThisVersion == 2) ? V2HeaderSize : V3HeaderSize;
if (Blob.size() < RequiredSize)
return createStringError(inconvertibleErrorCode(),
"Compressed bundle header size too small");
}

// Read compression method
uint16_t CompressionMethod;
memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
CurrentOffset += MethodFieldSize;

// Read total file size (version 2+)
uint64_t TotalFileSize = 0;
if (ThisVersion >= 2) {
if (ThisVersion == 2) {
uint32_t TotalFileSize32;
memcpy(&TotalFileSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
TotalFileSize = TotalFileSize32;
CurrentOffset += FileSizeFieldSizeV2;
} else { // Version 3
memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
CurrentOffset += FileSizeFieldSizeV3;
}
}
Expected<CompressedBundleHeader> HeaderOrErr =
CompressedBundleHeader::tryParse(Blob);
if (!HeaderOrErr)
return HeaderOrErr.takeError();

// Read uncompressed size
uint64_t UncompressedSize = 0;
if (ThisVersion <= 2) {
uint32_t UncompressedSize32;
memcpy(&UncompressedSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
UncompressedSize = UncompressedSize32;
CurrentOffset += UncompressedSizeFieldSizeV2;
} else { // Version 3
memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
CurrentOffset += UncompressedSizeFieldSizeV3;
}
const CompressedBundleHeader &Normalized = *HeaderOrErr;
unsigned ThisVersion = Normalized.Version;
size_t HeaderSize = getHeaderSize(ThisVersion);

// Read hash
uint64_t StoredHash;
memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
CurrentOffset += HashFieldSize;
llvm::compression::Format CompressionFormat = Normalized.CompressionFormat;

// Determine compression format
llvm::compression::Format CompressionFormat;
if (CompressionMethod ==
static_cast<uint16_t>(llvm::compression::Format::Zlib))
CompressionFormat = llvm::compression::Format::Zlib;
else if (CompressionMethod ==
static_cast<uint16_t>(llvm::compression::Format::Zstd))
CompressionFormat = llvm::compression::Format::Zstd;
else
return createStringError(inconvertibleErrorCode(),
"Unknown compressing method");
size_t TotalFileSize = Normalized.FileSize.value_or(0);
size_t UncompressedSize = Normalized.UncompressedFileSize;
auto StoredHash = Normalized.Hash;

llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
*ClangOffloadBundlerTimerGroup);
if (Verbose)
DecompressTimer.startTimer();

SmallVector<uint8_t, 0> DecompressedData;
StringRef CompressedData = Blob.substr(CurrentOffset);
StringRef CompressedData = Blob.substr(HeaderSize);
if (llvm::Error DecompressionError = llvm::compression::decompress(
CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
DecompressedData, UncompressedSize))
Expand Down