Skip to content

[OffloadBundler] Compress bundles over 4GB #122307

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/docs/ClangOffloadBundler.rst
Original file line number Diff line number Diff line change
Expand Up @@ -542,3 +542,5 @@ The compressed offload bundle begins with a header followed by the compressed bi

- **Compressed Data**:
The actual compressed binary data follows the header. Its size can be inferred from the total size of the file minus the header size.

> **Note**: Version 3 of the format is under development. It uses 64-bit fields for Total File Size and Uncompressed Binary Size to support files larger than 4GB. To experiment with version 3, set the environment variable `COMPRESSED_BUNDLE_FORMAT_VERSION=3`. This support is experimental and not recommended for production use.
1 change: 1 addition & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,7 @@ RISC-V Support
CUDA/HIP Language Changes
^^^^^^^^^^^^^^^^^^^^^^^^^
- Fixed a bug about overriding a constexpr pure-virtual member function with a non-constexpr virtual member function which causes compilation failure when including standard C++ header `format`.
- Added initial support for version 3 of the compressed offload bundle format, which uses 64-bit fields for Total File Size and Uncompressed Binary Size. This enables support for files larger than 4GB. The support is currently experimental and can be enabled by setting the environment variable `COMPRESSED_BUNDLE_FORMAT_VERSION=3`.

CUDA Support
^^^^^^^^^^^^
Expand Down
50 changes: 39 additions & 11 deletions clang/include/clang/Driver/OffloadBundler.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class OffloadBundlerConfig {
bool Verbose = false;
llvm::compression::Format CompressionFormat;
int CompressionLevel;
uint16_t CompressedBundleVersion;

unsigned BundleAlignment = 1;
unsigned HostInputIndex = ~0u;
Expand Down Expand Up @@ -100,36 +101,63 @@ struct OffloadTargetInfo {
// - Version (2 bytes)
// - Compression Method (2 bytes) - Uses the values from
// llvm::compression::Format.
// - Total file size (4 bytes). Available in version 2 and above.
// - Uncompressed Size (4 bytes).
// - Total file size (4 bytes in V2, 8 bytes in V3).
// - Uncompressed Size (4 bytes in V1/V2, 8 bytes in V3).
// - Truncated MD5 Hash (8 bytes).
// - Compressed Data (variable length).

class CompressedOffloadBundle {
private:
static inline const size_t MagicSize = 4;
static inline const size_t VersionFieldSize = sizeof(uint16_t);
static inline const size_t MethodFieldSize = sizeof(uint16_t);
static inline const size_t FileSizeFieldSize = sizeof(uint32_t);
static inline const size_t UncompressedSizeFieldSize = sizeof(uint32_t);
// Legacy size fields for V1/V2
static inline const size_t FileSizeFieldSizeV2 = sizeof(uint32_t);
static inline const size_t UncompressedSizeFieldSizeV2 = sizeof(uint32_t);
// New size fields for V3
static inline const size_t FileSizeFieldSizeV3 = sizeof(uint64_t);
static inline const size_t UncompressedSizeFieldSizeV3 = sizeof(uint64_t);
static inline const size_t HashFieldSize = sizeof(uint64_t);

// Keep V1 header size for backward compatibility
static inline const size_t V1HeaderSize =
MagicSize + VersionFieldSize + MethodFieldSize +
UncompressedSizeFieldSize + HashFieldSize;
UncompressedSizeFieldSizeV2 + HashFieldSize;

// Keep V2 header size for backward compatibility
static inline const size_t V2HeaderSize =
MagicSize + VersionFieldSize + FileSizeFieldSize + MethodFieldSize +
UncompressedSizeFieldSize + HashFieldSize;
MagicSize + VersionFieldSize + FileSizeFieldSizeV2 + MethodFieldSize +
UncompressedSizeFieldSizeV2 + HashFieldSize;

// Add V3 header size with 64-bit fields
static inline const size_t V3HeaderSize =
MagicSize + VersionFieldSize + FileSizeFieldSizeV3 + MethodFieldSize +
UncompressedSizeFieldSizeV3 + HashFieldSize;

static inline const llvm::StringRef MagicNumber = "CCOB";
static inline const uint16_t Version = 2;

public:
static inline const uint16_t DefaultVersion = 2;

// Helper method to get header size based on version
static size_t getHeaderSize(uint16_t Version) {
switch (Version) {
case 1:
return V1HeaderSize;
case 2:
return V2HeaderSize;
case 3:
return V3HeaderSize;
default:
llvm_unreachable("Unsupported version");
}
}

static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
bool Verbose = false);
uint16_t Version, bool Verbose = false);
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
};

} // namespace clang

#endif // LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
138 changes: 106 additions & 32 deletions clang/lib/Driver/OffloadBundler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -935,7 +935,8 @@ CreateFileHandler(MemoryBuffer &FirstInput,
"'" + FilesType + "': invalid file type specified");
}

OffloadBundlerConfig::OffloadBundlerConfig() {
OffloadBundlerConfig::OffloadBundlerConfig()
: CompressedBundleVersion(CompressedOffloadBundle::DefaultVersion) {
if (llvm::compression::zstd::isAvailable()) {
CompressionFormat = llvm::compression::Format::Zstd;
// Compression level 3 is usually sufficient for zstd since long distance
Expand All @@ -951,16 +952,13 @@ OffloadBundlerConfig::OffloadBundlerConfig() {
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
return;

auto VerboseEnvVarOpt = llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_VERBOSE");
if (VerboseEnvVarOpt.has_value())
Verbose = VerboseEnvVarOpt.value() == "1";

auto CompressEnvVarOpt =
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
if (CompressEnvVarOpt.has_value())
Compress = CompressEnvVarOpt.value() == "1";

auto CompressionLevelEnvVarOpt =
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESSION_LEVEL");
if (CompressionLevelEnvVarOpt.has_value()) {
Expand All @@ -973,6 +971,26 @@ OffloadBundlerConfig::OffloadBundlerConfig() {
<< "Warning: Invalid value for OFFLOAD_BUNDLER_COMPRESSION_LEVEL: "
<< CompressionLevelStr.str() << ". Ignoring it.\n";
}
auto CompressedBundleFormatVersionOpt =
llvm::sys::Process::GetEnv("COMPRESSED_BUNDLE_FORMAT_VERSION");
if (CompressedBundleFormatVersionOpt.has_value()) {
llvm::StringRef VersionStr = CompressedBundleFormatVersionOpt.value();
uint16_t Version;
if (!VersionStr.getAsInteger(10, Version)) {
if (Version >= 2 && Version <= 3)
CompressedBundleVersion = Version;
else
llvm::errs()
<< "Warning: Invalid value for COMPRESSED_BUNDLE_FORMAT_VERSION: "
<< VersionStr.str()
<< ". Valid values are 2 or 3. Using default version "
<< CompressedBundleVersion << ".\n";
} else
llvm::errs()
<< "Warning: Invalid value for COMPRESSED_BUNDLE_FORMAT_VERSION: "
<< VersionStr.str() << ". Using default version "
<< CompressedBundleVersion << ".\n";
}
}

// Utility function to format numbers with commas
Expand All @@ -989,12 +1007,11 @@ static std::string formatWithCommas(unsigned long long Value) {
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
CompressedOffloadBundle::compress(llvm::compression::Params P,
const llvm::MemoryBuffer &Input,
bool Verbose) {
uint16_t Version, bool Verbose) {
if (!llvm::compression::zstd::isAvailable() &&
!llvm::compression::zlib::isAvailable())
return createStringError(llvm::inconvertibleErrorCode(),
"Compression not supported");

llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
*ClangOffloadBundlerTimerGroup);
if (Verbose)
Expand All @@ -1011,7 +1028,6 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
auto BufferUint8 = llvm::ArrayRef<uint8_t>(
reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
Input.getBuffer().size());

llvm::Timer CompressTimer("Compression Timer", "Compression time",
*ClangOffloadBundlerTimerGroup);
if (Verbose)
Expand All @@ -1021,22 +1037,54 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
CompressTimer.stopTimer();

uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
uint32_t UncompressedSize = Input.getBuffer().size();
uint32_t TotalFileSize = MagicNumber.size() + sizeof(TotalFileSize) +
sizeof(Version) + sizeof(CompressionMethod) +
sizeof(UncompressedSize) + sizeof(TruncatedHash) +
CompressedBuffer.size();

// Store sizes in 64-bit variables first
uint64_t UncompressedSize64 = Input.getBuffer().size();
uint64_t TotalFileSize64;

// Calculate total file size based on version
if (Version == 2) {
// For V2, ensure the sizes don't exceed 32-bit limit
if (UncompressedSize64 > std::numeric_limits<uint32_t>::max())
return createStringError(llvm::inconvertibleErrorCode(),
"Uncompressed size exceeds version 2 limit");
if ((MagicNumber.size() + sizeof(uint32_t) + sizeof(Version) +
sizeof(CompressionMethod) + sizeof(uint32_t) + sizeof(TruncatedHash) +
CompressedBuffer.size()) > std::numeric_limits<uint32_t>::max())
return createStringError(llvm::inconvertibleErrorCode(),
"Total file size exceeds version 2 limit");

TotalFileSize64 = MagicNumber.size() + sizeof(uint32_t) + sizeof(Version) +
sizeof(CompressionMethod) + sizeof(uint32_t) +
sizeof(TruncatedHash) + CompressedBuffer.size();
} else { // Version 3
TotalFileSize64 = MagicNumber.size() + sizeof(uint64_t) + sizeof(Version) +
sizeof(CompressionMethod) + sizeof(uint64_t) +
sizeof(TruncatedHash) + CompressedBuffer.size();
}

SmallVector<char, 0> FinalBuffer;
llvm::raw_svector_ostream OS(FinalBuffer);
OS << MagicNumber;
OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
OS.write(reinterpret_cast<const char *>(&CompressionMethod),
sizeof(CompressionMethod));
OS.write(reinterpret_cast<const char *>(&TotalFileSize),
sizeof(TotalFileSize));
OS.write(reinterpret_cast<const char *>(&UncompressedSize),
sizeof(UncompressedSize));

// Write size fields according to version
if (Version == 2) {
uint32_t TotalFileSize32 = static_cast<uint32_t>(TotalFileSize64);
uint32_t UncompressedSize32 = static_cast<uint32_t>(UncompressedSize64);
OS.write(reinterpret_cast<const char *>(&TotalFileSize32),
sizeof(TotalFileSize32));
OS.write(reinterpret_cast<const char *>(&UncompressedSize32),
sizeof(UncompressedSize32));
} else { // Version 3
OS.write(reinterpret_cast<const char *>(&TotalFileSize64),
sizeof(TotalFileSize64));
OS.write(reinterpret_cast<const char *>(&UncompressedSize64),
sizeof(UncompressedSize64));
}

OS.write(reinterpret_cast<const char *>(&TruncatedHash),
sizeof(TruncatedHash));
OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
Expand All @@ -1046,18 +1094,17 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
auto MethodUsed =
P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib";
double CompressionRate =
static_cast<double>(UncompressedSize) / CompressedBuffer.size();
static_cast<double>(UncompressedSize64) / CompressedBuffer.size();
double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
double CompressionSpeedMBs =
(UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds;

(UncompressedSize64 / (1024.0 * 1024.0)) / CompressionTimeSeconds;
llvm::errs() << "Compressed bundle format version: " << Version << "\n"
<< "Total file size (including headers): "
<< formatWithCommas(TotalFileSize) << " bytes\n"
<< formatWithCommas(TotalFileSize64) << " bytes\n"
<< "Compression method used: " << MethodUsed << "\n"
<< "Compression level: " << P.level << "\n"
<< "Binary size before compression: "
<< formatWithCommas(UncompressedSize) << " bytes\n"
<< formatWithCommas(UncompressedSize64) << " bytes\n"
<< "Binary size after compression: "
<< formatWithCommas(CompressedBuffer.size()) << " bytes\n"
<< "Compression rate: "
Expand All @@ -1069,16 +1116,17 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
<< "Truncated MD5 hash: "
<< llvm::format_hex(TruncatedHash, 16) << "\n";
}

return llvm::MemoryBuffer::getMemBufferCopy(
llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
}

llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
bool Verbose) {

StringRef Blob = Input.getBuffer();

// Check minimum header size (using V1 as it's the smallest)
if (Blob.size() < V1HeaderSize)
return llvm::MemoryBuffer::getMemBufferCopy(Blob);

Expand All @@ -1091,31 +1139,56 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,

size_t CurrentOffset = MagicSize;

// Read version
uint16_t ThisVersion;
memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
CurrentOffset += VersionFieldSize;

// Verify header size based on version
if (ThisVersion >= 2 && ThisVersion <= 3) {
size_t RequiredSize = (ThisVersion == 2) ? V2HeaderSize : V3HeaderSize;
if (Blob.size() < RequiredSize)
return createStringError(inconvertibleErrorCode(),
"Compressed bundle header size too small");
}

// Read compression method
uint16_t CompressionMethod;
memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
CurrentOffset += MethodFieldSize;

uint32_t TotalFileSize;
// Read total file size (version 2+)
uint64_t TotalFileSize = 0;
if (ThisVersion >= 2) {
if (Blob.size() < V2HeaderSize)
return createStringError(inconvertibleErrorCode(),
"Compressed bundle header size too small");
memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
CurrentOffset += FileSizeFieldSize;
if (ThisVersion == 2) {
uint32_t TotalFileSize32;
memcpy(&TotalFileSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
TotalFileSize = TotalFileSize32;
CurrentOffset += FileSizeFieldSizeV2;
} else { // Version 3
memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
CurrentOffset += FileSizeFieldSizeV3;
}
}

uint32_t UncompressedSize;
memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
CurrentOffset += UncompressedSizeFieldSize;
// Read uncompressed size
uint64_t UncompressedSize = 0;
if (ThisVersion <= 2) {
uint32_t UncompressedSize32;
memcpy(&UncompressedSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
UncompressedSize = UncompressedSize32;
CurrentOffset += UncompressedSizeFieldSizeV2;
} else { // Version 3
memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
CurrentOffset += UncompressedSizeFieldSizeV3;
}

// Read hash
uint64_t StoredHash;
memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
CurrentOffset += HashFieldSize;

// Determine compression format
llvm::compression::Format CompressionFormat;
if (CompressionMethod ==
static_cast<uint16_t>(llvm::compression::Format::Zlib))
Expand Down Expand Up @@ -1381,7 +1454,8 @@ Error OffloadBundler::BundleFiles() {
auto CompressionResult = CompressedOffloadBundle::compress(
{BundlerConfig.CompressionFormat, BundlerConfig.CompressionLevel,
/*zstdEnableLdm=*/true},
*BufferMemory, BundlerConfig.Verbose);
*BufferMemory, BundlerConfig.CompressedBundleVersion,
BundlerConfig.Verbose);
if (auto Error = CompressionResult.takeError())
return Error;

Expand Down
24 changes: 24 additions & 0 deletions clang/test/Driver/clang-offload-bundler-zlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,30 @@
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
//

// Check compression/decompression of offload bundle using version 3 format.
//
// RUN: env OFFLOAD_BUNDLER_COMPRESS=1 OFFLOAD_BUNDLER_VERBOSE=1 COMPRESSED_BUNDLE_FORMAT_VERSION=3 \
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc 2>&1 | \
// RUN: FileCheck -check-prefix=COMPRESS %s
// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
// RUN: env OFFLOAD_BUNDLER_VERBOSE=1 \
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle 2>&1 | \
// RUN: FileCheck -check-prefix=DECOMPRESS %s
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2
//
// COMPRESS: Compressed bundle format version: 3
// COMPRESS: Compression method used: zlib
// COMPRESS: Compression level: 6
// DECOMPRESS: Compressed bundle format version: 3
// DECOMPRESS: Decompression method: zlib
// DECOMPRESS: Hashes match: Yes
// NOHOST-NOT: host-
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906

// Check -compression-level= option

// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
Expand Down
Loading