Skip to content

[ClangOffloadBundler] Add file size to header #88827

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion clang/docs/ClangOffloadBundler.rst
Original file line number Diff line number Diff line change
Expand Up @@ -518,11 +518,14 @@ The compressed offload bundle begins with a header followed by the compressed bi
This is a unique identifier to distinguish compressed offload bundles. The value is the string 'CCOB' (Compressed Clang Offload Bundle).

- **Version Number (16-bit unsigned int)**:
This denotes the version of the compressed offload bundle format. The current version is `1`.
This denotes the version of the compressed offload bundle format. The current version is `2`.

- **Compression Method (16-bit unsigned int)**:
This field indicates the compression method used. The value corresponds to either `zlib` or `zstd`, represented as a 16-bit unsigned integer cast from the LLVM compression enumeration.

- **Total File Size (32-bit unsigned int)**:
This is the total size (in bytes) of the file, including the header. Available in version 2 and above.

- **Uncompressed Binary Size (32-bit unsigned int)**:
This is the size (in bytes) of the binary data before it was compressed.

Expand Down
17 changes: 11 additions & 6 deletions clang/include/clang/Driver/OffloadBundler.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ struct OffloadTargetInfo {
// - Version (2 bytes)
// - Compression Method (2 bytes) - Uses the values from
// llvm::compression::Format.
// - Total file size (4 bytes). Available in version 2 and above.
// - Uncompressed Size (4 bytes).
// - Truncated MD5 Hash (8 bytes).
// - Compressed Data (variable length).
Expand All @@ -109,13 +110,17 @@ class CompressedOffloadBundle {
static inline const size_t MagicSize = 4;
static inline const size_t VersionFieldSize = sizeof(uint16_t);
static inline const size_t MethodFieldSize = sizeof(uint16_t);
static inline const size_t SizeFieldSize = sizeof(uint32_t);
static inline const size_t HashFieldSize = 8;
static inline const size_t HeaderSize = MagicSize + VersionFieldSize +
MethodFieldSize + SizeFieldSize +
HashFieldSize;
static inline const size_t FileSizeFieldSize = sizeof(uint32_t);
static inline const size_t UncompressedSizeFieldSize = sizeof(uint32_t);
static inline const size_t HashFieldSize = sizeof(uint64_t);
static inline const size_t V1HeaderSize =
MagicSize + VersionFieldSize + MethodFieldSize +
UncompressedSizeFieldSize + HashFieldSize;
static inline const size_t V2HeaderSize =
MagicSize + VersionFieldSize + FileSizeFieldSize + MethodFieldSize +
UncompressedSizeFieldSize + HashFieldSize;
static inline const llvm::StringRef MagicNumber = "CCOB";
static inline const uint16_t Version = 1;
static inline const uint16_t Version = 2;

public:
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
Expand Down
54 changes: 38 additions & 16 deletions clang/lib/Driver/OffloadBundler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1010,13 +1010,19 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,

uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
uint32_t UncompressedSize = Input.getBuffer().size();
uint32_t TotalFileSize = MagicNumber.size() + sizeof(TotalFileSize) +
sizeof(Version) + sizeof(CompressionMethod) +
sizeof(UncompressedSize) + sizeof(TruncatedHash) +
CompressedBuffer.size();

SmallVector<char, 0> FinalBuffer;
llvm::raw_svector_ostream OS(FinalBuffer);
OS << MagicNumber;
OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
OS.write(reinterpret_cast<const char *>(&CompressionMethod),
sizeof(CompressionMethod));
OS.write(reinterpret_cast<const char *>(&TotalFileSize),
sizeof(TotalFileSize));
OS.write(reinterpret_cast<const char *>(&UncompressedSize),
sizeof(UncompressedSize));
OS.write(reinterpret_cast<const char *>(&TruncatedHash),
Expand All @@ -1034,6 +1040,8 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
(UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds;

llvm::errs() << "Compressed bundle format version: " << Version << "\n"
<< "Total file size (including headers): "
<< formatWithCommas(TotalFileSize) << " bytes\n"
<< "Compression method used: " << MethodUsed << "\n"
<< "Compression level: " << P.level << "\n"
<< "Binary size before compression: "
Expand All @@ -1059,31 +1067,42 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,

StringRef Blob = Input.getBuffer();

if (Blob.size() < HeaderSize) {
if (Blob.size() < V1HeaderSize)
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
}

if (llvm::identify_magic(Blob) !=
llvm::file_magic::offload_bundle_compressed) {
if (Verbose)
llvm::errs() << "Uncompressed bundle.\n";
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
}

size_t CurrentOffset = MagicSize;

uint16_t ThisVersion;
memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
CurrentOffset += VersionFieldSize;

uint16_t CompressionMethod;
memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
CurrentOffset += MethodFieldSize;

uint32_t TotalFileSize;
if (ThisVersion >= 2) {
if (Blob.size() < V2HeaderSize)
return createStringError(inconvertibleErrorCode(),
"Compressed bundle header size too small");
memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
CurrentOffset += FileSizeFieldSize;
}

uint32_t UncompressedSize;
memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
CurrentOffset += UncompressedSizeFieldSize;

uint64_t StoredHash;
memcpy(&ThisVersion, Input.getBuffer().data() + MagicNumber.size(),
sizeof(uint16_t));
memcpy(&CompressionMethod, Blob.data() + MagicSize + VersionFieldSize,
sizeof(uint16_t));
memcpy(&UncompressedSize,
Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize,
sizeof(uint32_t));
memcpy(&StoredHash,
Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize +
SizeFieldSize,
sizeof(uint64_t));
memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
CurrentOffset += HashFieldSize;

llvm::compression::Format CompressionFormat;
if (CompressionMethod ==
Expand All @@ -1102,7 +1121,7 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
DecompressTimer.startTimer();

SmallVector<uint8_t, 0> DecompressedData;
StringRef CompressedData = Blob.substr(HeaderSize);
StringRef CompressedData = Blob.substr(CurrentOffset);
if (llvm::Error DecompressionError = llvm::compression::decompress(
CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
DecompressedData, UncompressedSize))
Expand Down Expand Up @@ -1135,8 +1154,11 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
double DecompressionSpeedMBs =
(UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds;

llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
<< "Decompression method: "
llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n";
if (ThisVersion >= 2)
llvm::errs() << "Total file size (from header): "
<< formatWithCommas(TotalFileSize) << " bytes\n";
llvm::errs() << "Decompression method: "
<< (CompressionFormat == llvm::compression::Format::Zlib
? "zlib"
: "zstd")
Expand Down
19 changes: 11 additions & 8 deletions clang/test/Driver/clang-offload-bundler-zstd.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,22 @@
// Check compression/decompression of offload bundle.
//
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose 2>&1 | \
// RUN: FileCheck -check-prefix=COMPRESS %s
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose >%t.1.txt 2>&1
// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle -verbose 2>&1 | \
// RUN: FileCheck -check-prefix=DECOMPRESS %s
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle -verbose >%t.2.txt 2>&1
// RUN: cat %t.1.txt %t.2.txt | FileCheck %s
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2
//
// COMPRESS: Compression method used: zstd
// COMPRESS: Compression level: 3
// DECOMPRESS: Decompression method: zstd
// DECOMPRESS: Hashes match: Yes
// CHECK: Compressed bundle format version: 2
// CHECK: Total file size (including headers): [[SIZE:[0-9]*]] bytes
// CHECK: Compression method used: zstd
// CHECK: Compression level: 3
// CHECK: Compressed bundle format version: 2
// CHECK: Total file size (from header): [[SIZE]] bytes
// CHECK: Decompression method: zstd
// CHECK: Hashes match: Yes
// NOHOST-NOT: host-
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
Expand Down