Skip to content

Commit 124d0b7

Browse files
authored
[HIP] add --offload-compression-level= option (#83605)
Added --offload-compression-level= option to clang and -compression-level= option to clang-offload-bundler for controlling compression level. Added support of long distance matching (LDM) for llvm::zstd which is off by default. Enable it for clang-offload-bundler by default since it improves compression rate in general. Change default compression level to 3 for zstd for clang-offload-bundler since it works well for bundle entry size from 1KB to 32MB, which should cover most of the clang-offload-bundler usage. Users can still specify compression level by -compression-level= option if necessary.
1 parent 83fe0b1 commit 124d0b7

File tree

17 files changed

+204
-64
lines changed

17 files changed

+204
-64
lines changed

clang/include/clang/Driver/OffloadBundler.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#ifndef LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
1818
#define LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
1919

20+
#include "llvm/Support/Compression.h"
2021
#include "llvm/Support/Error.h"
2122
#include "llvm/TargetParser/Triple.h"
2223
#include <llvm/Support/MemoryBuffer.h>
@@ -36,6 +37,8 @@ class OffloadBundlerConfig {
3637
bool HipOpenmpCompatible = false;
3738
bool Compress = false;
3839
bool Verbose = false;
40+
llvm::compression::Format CompressionFormat;
41+
int CompressionLevel;
3942

4043
unsigned BundleAlignment = 1;
4144
unsigned HostInputIndex = ~0u;
@@ -116,7 +119,8 @@ class CompressedOffloadBundle {
116119

117120
public:
118121
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
119-
compress(const llvm::MemoryBuffer &Input, bool Verbose = false);
122+
compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
123+
bool Verbose = false);
120124
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
121125
decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
122126
};

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,6 +1264,10 @@ def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>;
12641264
def offload_compress : Flag<["--"], "offload-compress">,
12651265
HelpText<"Compress offload device binaries (HIP only)">;
12661266
def no_offload_compress : Flag<["--"], "no-offload-compress">;
1267+
1268+
def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">,
1269+
Flags<[HelpHidden]>,
1270+
HelpText<"Compression level for offload device binaries (HIP only)">;
12671271
}
12681272

12691273
// CUDA options

clang/lib/Driver/OffloadBundler.cpp

Lines changed: 85 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -924,6 +924,17 @@ CreateFileHandler(MemoryBuffer &FirstInput,
924924
}
925925

926926
OffloadBundlerConfig::OffloadBundlerConfig() {
927+
if (llvm::compression::zstd::isAvailable()) {
928+
CompressionFormat = llvm::compression::Format::Zstd;
929+
// Compression level 3 is usually sufficient for zstd since long distance
930+
// matching is enabled.
931+
CompressionLevel = 3;
932+
} else if (llvm::compression::zlib::isAvailable()) {
933+
CompressionFormat = llvm::compression::Format::Zlib;
934+
// Use default level for zlib since higher level does not have significant
935+
// improvement.
936+
CompressionLevel = llvm::compression::zlib::DefaultCompression;
937+
}
927938
auto IgnoreEnvVarOpt =
928939
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
929940
if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
@@ -937,11 +948,41 @@ OffloadBundlerConfig::OffloadBundlerConfig() {
937948
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
938949
if (CompressEnvVarOpt.has_value())
939950
Compress = CompressEnvVarOpt.value() == "1";
951+
952+
auto CompressionLevelEnvVarOpt =
953+
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESSION_LEVEL");
954+
if (CompressionLevelEnvVarOpt.has_value()) {
955+
llvm::StringRef CompressionLevelStr = CompressionLevelEnvVarOpt.value();
956+
int Level;
957+
if (!CompressionLevelStr.getAsInteger(10, Level))
958+
CompressionLevel = Level;
959+
else
960+
llvm::errs()
961+
<< "Warning: Invalid value for OFFLOAD_BUNDLER_COMPRESSION_LEVEL: "
962+
<< CompressionLevelStr.str() << ". Ignoring it.\n";
963+
}
964+
}
965+
966+
// Utility function to format numbers with commas
967+
static std::string formatWithCommas(unsigned long long Value) {
968+
std::string Num = std::to_string(Value);
969+
int InsertPosition = Num.length() - 3;
970+
while (InsertPosition > 0) {
971+
Num.insert(InsertPosition, ",");
972+
InsertPosition -= 3;
973+
}
974+
return Num;
940975
}
941976

942977
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
943-
CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
978+
CompressedOffloadBundle::compress(llvm::compression::Params P,
979+
const llvm::MemoryBuffer &Input,
944980
bool Verbose) {
981+
if (!llvm::compression::zstd::isAvailable() &&
982+
!llvm::compression::zlib::isAvailable())
983+
return createStringError(llvm::inconvertibleErrorCode(),
984+
"Compression not supported");
985+
945986
llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
946987
ClangOffloadBundlerTimerGroup);
947988
if (Verbose)
@@ -959,25 +1000,15 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
9591000
reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
9601001
Input.getBuffer().size());
9611002

962-
llvm::compression::Format CompressionFormat;
963-
964-
if (llvm::compression::zstd::isAvailable())
965-
CompressionFormat = llvm::compression::Format::Zstd;
966-
else if (llvm::compression::zlib::isAvailable())
967-
CompressionFormat = llvm::compression::Format::Zlib;
968-
else
969-
return createStringError(llvm::inconvertibleErrorCode(),
970-
"Compression not supported");
971-
9721003
llvm::Timer CompressTimer("Compression Timer", "Compression time",
9731004
ClangOffloadBundlerTimerGroup);
9741005
if (Verbose)
9751006
CompressTimer.startTimer();
976-
llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer);
1007+
llvm::compression::compress(P, BufferUint8, CompressedBuffer);
9771008
if (Verbose)
9781009
CompressTimer.stopTimer();
9791010

980-
uint16_t CompressionMethod = static_cast<uint16_t>(CompressionFormat);
1011+
uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
9811012
uint32_t UncompressedSize = Input.getBuffer().size();
9821013

9831014
SmallVector<char, 0> FinalBuffer;
@@ -995,17 +1026,29 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
9951026

9961027
if (Verbose) {
9971028
auto MethodUsed =
998-
CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib";
1029+
P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib";
1030+
double CompressionRate =
1031+
static_cast<double>(UncompressedSize) / CompressedBuffer.size();
1032+
double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
1033+
double CompressionSpeedMBs =
1034+
(UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds;
1035+
9991036
llvm::errs() << "Compressed bundle format version: " << Version << "\n"
10001037
<< "Compression method used: " << MethodUsed << "\n"
1001-
<< "Binary size before compression: " << UncompressedSize
1002-
<< " bytes\n"
1003-
<< "Binary size after compression: " << CompressedBuffer.size()
1004-
<< " bytes\n"
1038+
<< "Compression level: " << P.level << "\n"
1039+
<< "Binary size before compression: "
1040+
<< formatWithCommas(UncompressedSize) << " bytes\n"
1041+
<< "Binary size after compression: "
1042+
<< formatWithCommas(CompressedBuffer.size()) << " bytes\n"
1043+
<< "Compression rate: "
1044+
<< llvm::format("%.2lf", CompressionRate) << "\n"
1045+
<< "Compression ratio: "
1046+
<< llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
1047+
<< "Compression speed: "
1048+
<< llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
10051049
<< "Truncated MD5 hash: "
10061050
<< llvm::format_hex(TruncatedHash, 16) << "\n";
10071051
}
1008-
10091052
return llvm::MemoryBuffer::getMemBufferCopy(
10101053
llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
10111054
}
@@ -1070,7 +1113,10 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
10701113
if (Verbose) {
10711114
DecompressTimer.stopTimer();
10721115

1073-
// Recalculate MD5 hash
1116+
double DecompressionTimeSeconds =
1117+
DecompressTimer.getTotalTime().getWallTime();
1118+
1119+
// Recalculate MD5 hash for integrity check
10741120
llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
10751121
"Hash recalculation time",
10761122
ClangOffloadBundlerTimerGroup);
@@ -1084,16 +1130,27 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
10841130
HashRecalcTimer.stopTimer();
10851131
bool HashMatch = (StoredHash == RecalculatedHash);
10861132

1133+
double CompressionRate =
1134+
static_cast<double>(UncompressedSize) / CompressedData.size();
1135+
double DecompressionSpeedMBs =
1136+
(UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds;
1137+
10871138
llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
10881139
<< "Decompression method: "
10891140
<< (CompressionFormat == llvm::compression::Format::Zlib
10901141
? "zlib"
10911142
: "zstd")
10921143
<< "\n"
1093-
<< "Size before decompression: " << CompressedData.size()
1094-
<< " bytes\n"
1095-
<< "Size after decompression: " << UncompressedSize
1096-
<< " bytes\n"
1144+
<< "Size before decompression: "
1145+
<< formatWithCommas(CompressedData.size()) << " bytes\n"
1146+
<< "Size after decompression: "
1147+
<< formatWithCommas(UncompressedSize) << " bytes\n"
1148+
<< "Compression rate: "
1149+
<< llvm::format("%.2lf", CompressionRate) << "\n"
1150+
<< "Compression ratio: "
1151+
<< llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
1152+
<< "Decompression speed: "
1153+
<< llvm::format("%.2lf MB/s", DecompressionSpeedMBs) << "\n"
10971154
<< "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
10981155
<< "Recalculated hash: "
10991156
<< llvm::format_hex(RecalculatedHash, 16) << "\n"
@@ -1287,8 +1344,10 @@ Error OffloadBundler::BundleFiles() {
12871344
std::unique_ptr<llvm::MemoryBuffer> BufferMemory =
12881345
llvm::MemoryBuffer::getMemBufferCopy(
12891346
llvm::StringRef(Buffer.data(), Buffer.size()));
1290-
auto CompressionResult =
1291-
CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose);
1347+
auto CompressionResult = CompressedOffloadBundle::compress(
1348+
{BundlerConfig.CompressionFormat, BundlerConfig.CompressionLevel,
1349+
/*zstdEnableLdm=*/true},
1350+
*BufferMemory, BundlerConfig.Verbose);
12921351
if (auto Error = CompressionResult.takeError())
12931352
return Error;
12941353

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8529,7 +8529,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
85298529
}
85308530

85318531
// Begin OffloadBundler
8532-
85338532
void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
85348533
const InputInfo &Output,
85358534
const InputInfoList &Inputs,
@@ -8627,11 +8626,7 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
86278626
}
86288627
CmdArgs.push_back(TCArgs.MakeArgString(UB));
86298628
}
8630-
if (TCArgs.hasFlag(options::OPT_offload_compress,
8631-
options::OPT_no_offload_compress, false))
8632-
CmdArgs.push_back("-compress");
8633-
if (TCArgs.hasArg(options::OPT_v))
8634-
CmdArgs.push_back("-verbose");
8629+
addOffloadCompressArgs(TCArgs, CmdArgs);
86358630
// All the inputs are encoded as commands.
86368631
C.addCommand(std::make_unique<Command>(
86378632
JA, *this, ResponseFileSupport::None(),
@@ -8900,9 +8895,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
89008895
for (const char *LinkArg : LinkCommand->getArguments())
89018896
CmdArgs.push_back(LinkArg);
89028897

8903-
if (Args.hasFlag(options::OPT_offload_compress,
8904-
options::OPT_no_offload_compress, false))
8905-
CmdArgs.push_back("--compress");
8898+
addOffloadCompressArgs(Args, CmdArgs);
89068899

89078900
const char *Exec =
89088901
Args.MakeArgString(getToolChain().GetProgramPath("clang-linker-wrapper"));

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2863,3 +2863,15 @@ void tools::addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC,
28632863
CmdArgs.push_back("+outline-atomics");
28642864
}
28652865
}
2866+
2867+
void tools::addOffloadCompressArgs(const llvm::opt::ArgList &TCArgs,
2868+
llvm::opt::ArgStringList &CmdArgs) {
2869+
if (TCArgs.hasFlag(options::OPT_offload_compress,
2870+
options::OPT_no_offload_compress, false))
2871+
CmdArgs.push_back("-compress");
2872+
if (TCArgs.hasArg(options::OPT_v))
2873+
CmdArgs.push_back("-verbose");
2874+
if (auto *Arg = TCArgs.getLastArg(options::OPT_offload_compression_level_EQ))
2875+
CmdArgs.push_back(
2876+
TCArgs.MakeArgString(Twine("-compression-level=") + Arg->getValue()));
2877+
}

clang/lib/Driver/ToolChains/CommonArgs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ void addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC,
221221
const llvm::opt::ArgList &Args,
222222
llvm::opt::ArgStringList &CmdArgs,
223223
const llvm::Triple &Triple);
224+
void addOffloadCompressArgs(const llvm::opt::ArgList &TCArgs,
225+
llvm::opt::ArgStringList &CmdArgs);
224226

225227
} // end namespace tools
226228
} // end namespace driver

clang/lib/Driver/ToolChains/HIPUtility.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "HIPUtility.h"
10+
#include "Clang.h"
1011
#include "CommonArgs.h"
1112
#include "clang/Driver/Compilation.h"
1213
#include "clang/Driver/Options.h"
@@ -258,11 +259,7 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
258259
Args.MakeArgString(std::string("-output=").append(Output));
259260
BundlerArgs.push_back(BundlerOutputArg);
260261

261-
if (Args.hasFlag(options::OPT_offload_compress,
262-
options::OPT_no_offload_compress, false))
263-
BundlerArgs.push_back("-compress");
264-
if (Args.hasArg(options::OPT_v))
265-
BundlerArgs.push_back("-verbose");
262+
addOffloadCompressArgs(Args, BundlerArgs);
266263

267264
const char *Bundler = Args.MakeArgString(
268265
T.getToolChain().GetProgramPath("clang-offload-bundler"));

clang/test/Driver/clang-offload-bundler-zlib.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// REQUIRES: zlib
1+
// REQUIRES: zlib && !zstd
22
// REQUIRES: x86-registered-target
33
// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}
44

@@ -34,13 +34,28 @@
3434
// RUN: diff %t.tgt2 %t.res.tgt2
3535

3636
//
37-
// COMPRESS: Compression method used:
38-
// DECOMPRESS: Decompression method:
37+
// COMPRESS: Compression method used: zlib
38+
// COMPRESS: Compression level: 6
39+
// DECOMPRESS: Decompression method: zlib
40+
// DECOMPRESS: Hashes match: Yes
3941
// NOHOST-NOT: host-
4042
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
4143
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
4244
//
4345

46+
// Check -compression-level= option
47+
48+
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
49+
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose -compression-level=9 2>&1 | \
50+
// RUN: FileCheck -check-prefix=LEVEL %s
51+
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
52+
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle
53+
// RUN: diff %t.tgt1 %t.res.tgt1
54+
// RUN: diff %t.tgt2 %t.res.tgt2
55+
//
56+
// LEVEL: Compression method used: zlib
57+
// LEVEL: Compression level: 9
58+
4459
//
4560
// Check -bundle-align option.
4661
//

clang/test/Driver/clang-offload-bundler-zstd.c

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,28 @@
3131
// RUN: diff %t.tgt1 %t.res.tgt1
3232
// RUN: diff %t.tgt2 %t.res.tgt2
3333
//
34-
// COMPRESS: Compression method used
35-
// DECOMPRESS: Decompression method
34+
// COMPRESS: Compression method used: zstd
35+
// COMPRESS: Compression level: 20
36+
// DECOMPRESS: Decompression method: zstd
37+
// DECOMPRESS: Hashes match: Yes
3638
// NOHOST-NOT: host-
3739
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
3840
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
3941
//
4042

43+
// Check -compression-level= option
44+
45+
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
46+
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose -compression-level=9 2>&1 | \
47+
// RUN: FileCheck -check-prefix=LEVEL %s
48+
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
49+
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle
50+
// RUN: diff %t.tgt1 %t.res.tgt1
51+
// RUN: diff %t.tgt2 %t.res.tgt2
52+
//
53+
// LEVEL: Compression method used: zstd
54+
// LEVEL: Compression level: 9
55+
4156
//
4257
// Check -bundle-align option.
4358
//

clang/test/Driver/hip-offload-compress-zlib.hip

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// REQUIRES: zlib
1+
// REQUIRES: zlib && !zstd
22
// REQUIRES: x86-registered-target
33
// REQUIRES: amdgpu-registered-target
44

@@ -9,13 +9,14 @@
99
// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
1010
// RUN: --no-offload-new-driver -fgpu-rdc -nogpuinc -nogpulib \
1111
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
12-
// RUN: --offload-compress --offload-device-only --gpu-bundle-output \
12+
// RUN: --offload-compress --offload-compression-level=9 \
13+
// RUN: --offload-device-only --gpu-bundle-output \
1314
// RUN: -o %t.bc \
1415
// RUN: 2>&1 | FileCheck %s
1516

1617
// CHECK: clang-offload-bundler{{.*}} -type=bc
1718
// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
18-
// CHECK-SAME: -compress -verbose
19+
// CHECK-SAME: -compress -verbose -compression-level=9
1920
// CHECK: Compressed bundle format
2021

2122
// Test uncompress of bundled bitcode.

0 commit comments

Comments
 (0)