Skip to content

Commit fd559db

Browse files
committed
[ClangOffloadBundler] Add file size to header (llvm#88827)
__hipRegisterFatBinary only accepts one pointer argument. It is expected to get the fat binary size from the header. This patch adds a file size field to the header of the compressed bundle. llvm#88827 This patch also cherry-picks the dependent changes: e9901d8 124d0b7 e733d7e 78dca4a Change-Id: Ia49f3b4a6c81a27b7959c5d9b437496e8bce6657
1 parent 6b1795c commit fd559db

File tree

18 files changed

+270
-88
lines changed

18 files changed

+270
-88
lines changed

clang/docs/ClangOffloadBundler.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,11 +518,14 @@ The compressed offload bundle begins with a header followed by the compressed bi
518518
This is a unique identifier to distinguish compressed offload bundles. The value is the string 'CCOB' (Compressed Clang Offload Bundle).
519519

520520
- **Version Number (16-bit unsigned int)**:
521-
This denotes the version of the compressed offload bundle format. The current version is `1`.
521+
This denotes the version of the compressed offload bundle format. The current version is `2`.
522522

523523
- **Compression Method (16-bit unsigned int)**:
524524
This field indicates the compression method used. The value corresponds to either `zlib` or `zstd`, represented as a 16-bit unsigned integer cast from the LLVM compression enumeration.
525525

526+
- **Total File Size (32-bit unsigned int)**:
527+
This is the total size (in bytes) of the file, including the header. Available in version 2 and above.
528+
526529
- **Uncompressed Binary Size (32-bit unsigned int)**:
527530
This is the size (in bytes) of the binary data before it was compressed.
528531

clang/include/clang/Driver/OffloadBundler.h

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#ifndef LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
1818
#define LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
1919

20+
#include "llvm/Support/Compression.h"
2021
#include "llvm/Support/Error.h"
2122
#include "llvm/TargetParser/Triple.h"
2223
#include <llvm/Support/MemoryBuffer.h>
@@ -36,6 +37,8 @@ class OffloadBundlerConfig {
3637
bool HipOpenmpCompatible = false;
3738
bool Compress = false;
3839
bool Verbose = false;
40+
llvm::compression::Format CompressionFormat;
41+
int CompressionLevel;
3942

4043
unsigned BundleAlignment = 1;
4144
unsigned HostInputIndex = ~0u;
@@ -97,6 +100,7 @@ struct OffloadTargetInfo {
97100
// - Version (2 bytes)
98101
// - Compression Method (2 bytes) - Uses the values from
99102
// llvm::compression::Format.
103+
// - Total file size (4 bytes). Available in version 2 and above.
100104
// - Uncompressed Size (4 bytes).
101105
// - Truncated MD5 Hash (8 bytes).
102106
// - Compressed Data (variable length).
@@ -106,17 +110,22 @@ class CompressedOffloadBundle {
106110
static inline const size_t MagicSize = 4;
107111
static inline const size_t VersionFieldSize = sizeof(uint16_t);
108112
static inline const size_t MethodFieldSize = sizeof(uint16_t);
109-
static inline const size_t SizeFieldSize = sizeof(uint32_t);
110-
static inline const size_t HashFieldSize = 8;
111-
static inline const size_t HeaderSize = MagicSize + VersionFieldSize +
112-
MethodFieldSize + SizeFieldSize +
113-
HashFieldSize;
113+
static inline const size_t FileSizeFieldSize = sizeof(uint32_t);
114+
static inline const size_t UncompressedSizeFieldSize = sizeof(uint32_t);
115+
static inline const size_t HashFieldSize = sizeof(uint64_t);
116+
static inline const size_t V1HeaderSize =
117+
MagicSize + VersionFieldSize + MethodFieldSize +
118+
UncompressedSizeFieldSize + HashFieldSize;
119+
static inline const size_t V2HeaderSize =
120+
MagicSize + VersionFieldSize + FileSizeFieldSize + MethodFieldSize +
121+
UncompressedSizeFieldSize + HashFieldSize;
114122
static inline const llvm::StringRef MagicNumber = "CCOB";
115-
static inline const uint16_t Version = 1;
123+
static inline const uint16_t Version = 2;
116124

117125
public:
118126
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
119-
compress(const llvm::MemoryBuffer &Input, bool Verbose = false);
127+
compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
128+
bool Verbose = false);
120129
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
121130
decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
122131
};

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,6 +1232,10 @@ def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>;
12321232
def offload_compress : Flag<["--"], "offload-compress">,
12331233
HelpText<"Compress offload device binaries (HIP only)">;
12341234
def no_offload_compress : Flag<["--"], "no-offload-compress">;
1235+
1236+
def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">,
1237+
Flags<[HelpHidden]>,
1238+
HelpText<"Compression level for offload device binaries (HIP only)">;
12351239
}
12361240

12371241
// CUDA options

clang/lib/Driver/OffloadBundler.cpp

Lines changed: 123 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,17 @@ CreateFileHandler(MemoryBuffer &FirstInput,
906906
}
907907

908908
OffloadBundlerConfig::OffloadBundlerConfig() {
909+
if (llvm::compression::zstd::isAvailable()) {
910+
CompressionFormat = llvm::compression::Format::Zstd;
911+
// Compression level 3 is usually sufficient for zstd since long distance
912+
// matching is enabled.
913+
CompressionLevel = 3;
914+
} else if (llvm::compression::zlib::isAvailable()) {
915+
CompressionFormat = llvm::compression::Format::Zlib;
916+
// Use default level for zlib since higher level does not have significant
917+
// improvement.
918+
CompressionLevel = llvm::compression::zlib::DefaultCompression;
919+
}
909920
auto IgnoreEnvVarOpt =
910921
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
911922
if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
@@ -919,11 +930,41 @@ OffloadBundlerConfig::OffloadBundlerConfig() {
919930
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
920931
if (CompressEnvVarOpt.has_value())
921932
Compress = CompressEnvVarOpt.value() == "1";
933+
934+
auto CompressionLevelEnvVarOpt =
935+
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESSION_LEVEL");
936+
if (CompressionLevelEnvVarOpt.has_value()) {
937+
llvm::StringRef CompressionLevelStr = CompressionLevelEnvVarOpt.value();
938+
int Level;
939+
if (!CompressionLevelStr.getAsInteger(10, Level))
940+
CompressionLevel = Level;
941+
else
942+
llvm::errs()
943+
<< "Warning: Invalid value for OFFLOAD_BUNDLER_COMPRESSION_LEVEL: "
944+
<< CompressionLevelStr.str() << ". Ignoring it.\n";
945+
}
946+
}
947+
948+
// Utility function to format numbers with commas
949+
static std::string formatWithCommas(unsigned long long Value) {
950+
std::string Num = std::to_string(Value);
951+
int InsertPosition = Num.length() - 3;
952+
while (InsertPosition > 0) {
953+
Num.insert(InsertPosition, ",");
954+
InsertPosition -= 3;
955+
}
956+
return Num;
922957
}
923958

924959
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
925-
CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
960+
CompressedOffloadBundle::compress(llvm::compression::Params P,
961+
const llvm::MemoryBuffer &Input,
926962
bool Verbose) {
963+
if (!llvm::compression::zstd::isAvailable() &&
964+
!llvm::compression::zlib::isAvailable())
965+
return createStringError(llvm::inconvertibleErrorCode(),
966+
"Compression not supported");
967+
927968
llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
928969
ClangOffloadBundlerTimerGroup);
929970
if (Verbose)
@@ -941,33 +982,29 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
941982
reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
942983
Input.getBuffer().size());
943984

944-
llvm::compression::Format CompressionFormat;
945-
946-
if (llvm::compression::zstd::isAvailable())
947-
CompressionFormat = llvm::compression::Format::Zstd;
948-
else if (llvm::compression::zlib::isAvailable())
949-
CompressionFormat = llvm::compression::Format::Zlib;
950-
else
951-
return createStringError(llvm::inconvertibleErrorCode(),
952-
"Compression not supported");
953-
954985
llvm::Timer CompressTimer("Compression Timer", "Compression time",
955986
ClangOffloadBundlerTimerGroup);
956987
if (Verbose)
957988
CompressTimer.startTimer();
958-
llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer);
989+
llvm::compression::compress(P, BufferUint8, CompressedBuffer);
959990
if (Verbose)
960991
CompressTimer.stopTimer();
961992

962-
uint16_t CompressionMethod = static_cast<uint16_t>(CompressionFormat);
993+
uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
963994
uint32_t UncompressedSize = Input.getBuffer().size();
995+
uint32_t TotalFileSize = MagicNumber.size() + sizeof(TotalFileSize) +
996+
sizeof(Version) + sizeof(CompressionMethod) +
997+
sizeof(UncompressedSize) + sizeof(TruncatedHash) +
998+
CompressedBuffer.size();
964999

9651000
SmallVector<char, 0> FinalBuffer;
9661001
llvm::raw_svector_ostream OS(FinalBuffer);
9671002
OS << MagicNumber;
9681003
OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
9691004
OS.write(reinterpret_cast<const char *>(&CompressionMethod),
9701005
sizeof(CompressionMethod));
1006+
OS.write(reinterpret_cast<const char *>(&TotalFileSize),
1007+
sizeof(TotalFileSize));
9711008
OS.write(reinterpret_cast<const char *>(&UncompressedSize),
9721009
sizeof(UncompressedSize));
9731010
OS.write(reinterpret_cast<const char *>(&TruncatedHash),
@@ -977,17 +1014,31 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
9771014

9781015
if (Verbose) {
9791016
auto MethodUsed =
980-
CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib";
1017+
P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib";
1018+
double CompressionRate =
1019+
static_cast<double>(UncompressedSize) / CompressedBuffer.size();
1020+
double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
1021+
double CompressionSpeedMBs =
1022+
(UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds;
1023+
9811024
llvm::errs() << "Compressed bundle format version: " << Version << "\n"
1025+
<< "Total file size (including headers): "
1026+
<< formatWithCommas(TotalFileSize) << " bytes\n"
9821027
<< "Compression method used: " << MethodUsed << "\n"
983-
<< "Binary size before compression: " << UncompressedSize
984-
<< " bytes\n"
985-
<< "Binary size after compression: " << CompressedBuffer.size()
986-
<< " bytes\n"
1028+
<< "Compression level: " << P.level << "\n"
1029+
<< "Binary size before compression: "
1030+
<< formatWithCommas(UncompressedSize) << " bytes\n"
1031+
<< "Binary size after compression: "
1032+
<< formatWithCommas(CompressedBuffer.size()) << " bytes\n"
1033+
<< "Compression rate: "
1034+
<< llvm::format("%.2lf", CompressionRate) << "\n"
1035+
<< "Compression ratio: "
1036+
<< llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
1037+
<< "Compression speed: "
1038+
<< llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
9871039
<< "Truncated MD5 hash: "
9881040
<< llvm::format_hex(TruncatedHash, 16) << "\n";
9891041
}
990-
9911042
return llvm::MemoryBuffer::getMemBufferCopy(
9921043
llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
9931044
}
@@ -998,31 +1049,42 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
9981049

9991050
StringRef Blob = Input.getBuffer();
10001051

1001-
if (Blob.size() < HeaderSize) {
1052+
if (Blob.size() < V1HeaderSize)
10021053
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
1003-
}
1054+
10041055
if (llvm::identify_magic(Blob) !=
10051056
llvm::file_magic::offload_bundle_compressed) {
10061057
if (Verbose)
10071058
llvm::errs() << "Uncompressed bundle.\n";
10081059
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
10091060
}
10101061

1062+
size_t CurrentOffset = MagicSize;
1063+
10111064
uint16_t ThisVersion;
1065+
memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
1066+
CurrentOffset += VersionFieldSize;
1067+
10121068
uint16_t CompressionMethod;
1069+
memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
1070+
CurrentOffset += MethodFieldSize;
1071+
1072+
uint32_t TotalFileSize;
1073+
if (ThisVersion >= 2) {
1074+
if (Blob.size() < V2HeaderSize)
1075+
return createStringError(inconvertibleErrorCode(),
1076+
"Compressed bundle header size too small");
1077+
memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
1078+
CurrentOffset += FileSizeFieldSize;
1079+
}
1080+
10131081
uint32_t UncompressedSize;
1082+
memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
1083+
CurrentOffset += UncompressedSizeFieldSize;
1084+
10141085
uint64_t StoredHash;
1015-
memcpy(&ThisVersion, Input.getBuffer().data() + MagicNumber.size(),
1016-
sizeof(uint16_t));
1017-
memcpy(&CompressionMethod, Blob.data() + MagicSize + VersionFieldSize,
1018-
sizeof(uint16_t));
1019-
memcpy(&UncompressedSize,
1020-
Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize,
1021-
sizeof(uint32_t));
1022-
memcpy(&StoredHash,
1023-
Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize +
1024-
SizeFieldSize,
1025-
sizeof(uint64_t));
1086+
memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
1087+
CurrentOffset += HashFieldSize;
10261088

10271089
llvm::compression::Format CompressionFormat;
10281090
if (CompressionMethod ==
@@ -1041,7 +1103,7 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
10411103
DecompressTimer.startTimer();
10421104

10431105
SmallVector<uint8_t, 0> DecompressedData;
1044-
StringRef CompressedData = Blob.substr(HeaderSize);
1106+
StringRef CompressedData = Blob.substr(CurrentOffset);
10451107
if (llvm::Error DecompressionError = llvm::compression::decompress(
10461108
CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
10471109
DecompressedData, UncompressedSize))
@@ -1052,7 +1114,10 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
10521114
if (Verbose) {
10531115
DecompressTimer.stopTimer();
10541116

1055-
// Recalculate MD5 hash
1117+
double DecompressionTimeSeconds =
1118+
DecompressTimer.getTotalTime().getWallTime();
1119+
1120+
// Recalculate MD5 hash for integrity check
10561121
llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
10571122
"Hash recalculation time",
10581123
ClangOffloadBundlerTimerGroup);
@@ -1066,16 +1131,30 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
10661131
HashRecalcTimer.stopTimer();
10671132
bool HashMatch = (StoredHash == RecalculatedHash);
10681133

1069-
llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
1070-
<< "Decompression method: "
1134+
double CompressionRate =
1135+
static_cast<double>(UncompressedSize) / CompressedData.size();
1136+
double DecompressionSpeedMBs =
1137+
(UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds;
1138+
1139+
llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n";
1140+
if (ThisVersion >= 2)
1141+
llvm::errs() << "Total file size (from header): "
1142+
<< formatWithCommas(TotalFileSize) << " bytes\n";
1143+
llvm::errs() << "Decompression method: "
10711144
<< (CompressionFormat == llvm::compression::Format::Zlib
10721145
? "zlib"
10731146
: "zstd")
10741147
<< "\n"
1075-
<< "Size before decompression: " << CompressedData.size()
1076-
<< " bytes\n"
1077-
<< "Size after decompression: " << UncompressedSize
1078-
<< " bytes\n"
1148+
<< "Size before decompression: "
1149+
<< formatWithCommas(CompressedData.size()) << " bytes\n"
1150+
<< "Size after decompression: "
1151+
<< formatWithCommas(UncompressedSize) << " bytes\n"
1152+
<< "Compression rate: "
1153+
<< llvm::format("%.2lf", CompressionRate) << "\n"
1154+
<< "Compression ratio: "
1155+
<< llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
1156+
<< "Decompression speed: "
1157+
<< llvm::format("%.2lf MB/s", DecompressionSpeedMBs) << "\n"
10791158
<< "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
10801159
<< "Recalculated hash: "
10811160
<< llvm::format_hex(RecalculatedHash, 16) << "\n"
@@ -1269,8 +1348,10 @@ Error OffloadBundler::BundleFiles() {
12691348
std::unique_ptr<llvm::MemoryBuffer> BufferMemory =
12701349
llvm::MemoryBuffer::getMemBufferCopy(
12711350
llvm::StringRef(Buffer.data(), Buffer.size()));
1272-
auto CompressionResult =
1273-
CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose);
1351+
auto CompressionResult = CompressedOffloadBundle::compress(
1352+
{BundlerConfig.CompressionFormat, BundlerConfig.CompressionLevel,
1353+
/*zstdEnableLdm=*/true},
1354+
*BufferMemory, BundlerConfig.Verbose);
12741355
if (auto Error = CompressionResult.takeError())
12751356
return Error;
12761357

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8627,7 +8627,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
86278627
}
86288628

86298629
// Begin OffloadBundler
8630-
86318630
void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
86328631
const InputInfo &Output,
86338632
const InputInfoList &Inputs,
@@ -8712,11 +8711,7 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
87128711
}
87138712
CmdArgs.push_back(TCArgs.MakeArgString(UB));
87148713
}
8715-
if (TCArgs.hasFlag(options::OPT_offload_compress,
8716-
options::OPT_no_offload_compress, false))
8717-
CmdArgs.push_back("-compress");
8718-
if (TCArgs.hasArg(options::OPT_v))
8719-
CmdArgs.push_back("-verbose");
8714+
addOffloadCompressArgs(TCArgs, CmdArgs);
87208715
// All the inputs are encoded as commands.
87218716
C.addCommand(std::make_unique<Command>(
87228717
JA, *this, ResponseFileSupport::None(),
@@ -9356,6 +9351,8 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
93569351
for (const char *LinkArg : LinkCommand->getArguments())
93579352
CmdArgs.push_back(LinkArg);
93589353

9354+
addOffloadCompressArgs(Args, CmdArgs);
9355+
93599356
const char *Exec =
93609357
Args.MakeArgString(getToolChain().GetProgramPath("clang-linker-wrapper"));
93619358

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2904,3 +2904,15 @@ void tools::addHIPRuntimeLibArgs(const ToolChain &TC, Compilation &C,
29042904
}
29052905
}
29062906
}
2907+
2908+
void tools::addOffloadCompressArgs(const llvm::opt::ArgList &TCArgs,
2909+
llvm::opt::ArgStringList &CmdArgs) {
2910+
if (TCArgs.hasFlag(options::OPT_offload_compress,
2911+
options::OPT_no_offload_compress, false))
2912+
CmdArgs.push_back("-compress");
2913+
if (TCArgs.hasArg(options::OPT_v))
2914+
CmdArgs.push_back("-verbose");
2915+
if (auto *Arg = TCArgs.getLastArg(options::OPT_offload_compression_level_EQ))
2916+
CmdArgs.push_back(
2917+
TCArgs.MakeArgString(Twine("-compression-level=") + Arg->getValue()));
2918+
}

0 commit comments

Comments
 (0)