Skip to content

[HIP] add --offload-compression-level= option #83605

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion clang/include/clang/Driver/OffloadBundler.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#ifndef LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
#define LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H

#include "llvm/Support/Compression.h"
#include "llvm/Support/Error.h"
#include "llvm/TargetParser/Triple.h"
#include <llvm/Support/MemoryBuffer.h>
Expand All @@ -36,6 +37,8 @@ class OffloadBundlerConfig {
bool HipOpenmpCompatible = false;
bool Compress = false;
bool Verbose = false;
llvm::compression::Format CompressionFormat;
int CompressionLevel;

unsigned BundleAlignment = 1;
unsigned HostInputIndex = ~0u;
Expand Down Expand Up @@ -116,7 +119,8 @@ class CompressedOffloadBundle {

public:
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
compress(const llvm::MemoryBuffer &Input, bool Verbose = false);
compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
bool Verbose = false);
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
};
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1264,6 +1264,10 @@ def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>;
def offload_compress : Flag<["--"], "offload-compress">,
HelpText<"Compress offload device binaries (HIP only)">;
def no_offload_compress : Flag<["--"], "no-offload-compress">;

def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">,
Flags<[HelpHidden]>,
HelpText<"Compression level for offload device binaries (HIP only)">;
}

// CUDA options
Expand Down
111 changes: 85 additions & 26 deletions clang/lib/Driver/OffloadBundler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -924,6 +924,17 @@ CreateFileHandler(MemoryBuffer &FirstInput,
}

OffloadBundlerConfig::OffloadBundlerConfig() {
if (llvm::compression::zstd::isAvailable()) {
CompressionFormat = llvm::compression::Format::Zstd;
// Compression level 3 is usually sufficient for zstd since long distance
// matching is enabled.
CompressionLevel = 3;
} else if (llvm::compression::zlib::isAvailable()) {
CompressionFormat = llvm::compression::Format::Zlib;
// Use default level for zlib since higher level does not have significant
// improvement.
CompressionLevel = llvm::compression::zlib::DefaultCompression;
}
auto IgnoreEnvVarOpt =
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
Expand All @@ -937,11 +948,41 @@ OffloadBundlerConfig::OffloadBundlerConfig() {
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
if (CompressEnvVarOpt.has_value())
Compress = CompressEnvVarOpt.value() == "1";

auto CompressionLevelEnvVarOpt =
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESSION_LEVEL");
if (CompressionLevelEnvVarOpt.has_value()) {
llvm::StringRef CompressionLevelStr = CompressionLevelEnvVarOpt.value();
int Level;
if (!CompressionLevelStr.getAsInteger(10, Level))
CompressionLevel = Level;
else
llvm::errs()
<< "Warning: Invalid value for OFFLOAD_BUNDLER_COMPRESSION_LEVEL: "
<< CompressionLevelStr.str() << ". Ignoring it.\n";
}
}

// Utility function to format numbers with commas
static std::string formatWithCommas(unsigned long long Value) {
std::string Num = std::to_string(Value);
int InsertPosition = Num.length() - 3;
while (InsertPosition > 0) {
Num.insert(InsertPosition, ",");
InsertPosition -= 3;
}
return Num;
}

llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
CompressedOffloadBundle::compress(llvm::compression::Params P,
const llvm::MemoryBuffer &Input,
bool Verbose) {
if (!llvm::compression::zstd::isAvailable() &&
!llvm::compression::zlib::isAvailable())
return createStringError(llvm::inconvertibleErrorCode(),
"Compression not supported");

llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
ClangOffloadBundlerTimerGroup);
if (Verbose)
Expand All @@ -959,25 +1000,15 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
Input.getBuffer().size());

llvm::compression::Format CompressionFormat;

if (llvm::compression::zstd::isAvailable())
CompressionFormat = llvm::compression::Format::Zstd;
else if (llvm::compression::zlib::isAvailable())
CompressionFormat = llvm::compression::Format::Zlib;
else
return createStringError(llvm::inconvertibleErrorCode(),
"Compression not supported");

llvm::Timer CompressTimer("Compression Timer", "Compression time",
ClangOffloadBundlerTimerGroup);
if (Verbose)
CompressTimer.startTimer();
llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer);
llvm::compression::compress(P, BufferUint8, CompressedBuffer);
if (Verbose)
CompressTimer.stopTimer();

uint16_t CompressionMethod = static_cast<uint16_t>(CompressionFormat);
uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
uint32_t UncompressedSize = Input.getBuffer().size();

SmallVector<char, 0> FinalBuffer;
Expand All @@ -995,17 +1026,29 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,

if (Verbose) {
auto MethodUsed =
CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib";
P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib";
double CompressionRate =
static_cast<double>(UncompressedSize) / CompressedBuffer.size();
double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
double CompressionSpeedMBs =
(UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds;

llvm::errs() << "Compressed bundle format version: " << Version << "\n"
<< "Compression method used: " << MethodUsed << "\n"
<< "Binary size before compression: " << UncompressedSize
<< " bytes\n"
<< "Binary size after compression: " << CompressedBuffer.size()
<< " bytes\n"
<< "Compression level: " << P.level << "\n"
<< "Binary size before compression: "
<< formatWithCommas(UncompressedSize) << " bytes\n"
<< "Binary size after compression: "
<< formatWithCommas(CompressedBuffer.size()) << " bytes\n"
<< "Compression rate: "
<< llvm::format("%.2lf", CompressionRate) << "\n"
<< "Compression ratio: "
<< llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
<< "Compression speed: "
<< llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
<< "Truncated MD5 hash: "
<< llvm::format_hex(TruncatedHash, 16) << "\n";
}

return llvm::MemoryBuffer::getMemBufferCopy(
llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
}
Expand Down Expand Up @@ -1070,7 +1113,10 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
if (Verbose) {
DecompressTimer.stopTimer();

// Recalculate MD5 hash
double DecompressionTimeSeconds =
DecompressTimer.getTotalTime().getWallTime();

// Recalculate MD5 hash for integrity check
llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
"Hash recalculation time",
ClangOffloadBundlerTimerGroup);
Expand All @@ -1084,16 +1130,27 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
HashRecalcTimer.stopTimer();
bool HashMatch = (StoredHash == RecalculatedHash);

double CompressionRate =
static_cast<double>(UncompressedSize) / CompressedData.size();
double DecompressionSpeedMBs =
(UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds;

llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
<< "Decompression method: "
<< (CompressionFormat == llvm::compression::Format::Zlib
? "zlib"
: "zstd")
<< "\n"
<< "Size before decompression: " << CompressedData.size()
<< " bytes\n"
<< "Size after decompression: " << UncompressedSize
<< " bytes\n"
<< "Size before decompression: "
<< formatWithCommas(CompressedData.size()) << " bytes\n"
<< "Size after decompression: "
<< formatWithCommas(UncompressedSize) << " bytes\n"
<< "Compression rate: "
<< llvm::format("%.2lf", CompressionRate) << "\n"
<< "Compression ratio: "
<< llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
<< "Decompression speed: "
<< llvm::format("%.2lf MB/s", DecompressionSpeedMBs) << "\n"
<< "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
<< "Recalculated hash: "
<< llvm::format_hex(RecalculatedHash, 16) << "\n"
Expand Down Expand Up @@ -1287,8 +1344,10 @@ Error OffloadBundler::BundleFiles() {
std::unique_ptr<llvm::MemoryBuffer> BufferMemory =
llvm::MemoryBuffer::getMemBufferCopy(
llvm::StringRef(Buffer.data(), Buffer.size()));
auto CompressionResult =
CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose);
auto CompressionResult = CompressedOffloadBundle::compress(
{BundlerConfig.CompressionFormat, BundlerConfig.CompressionLevel,
/*zstdEnableLdm=*/true},
*BufferMemory, BundlerConfig.Verbose);
if (auto Error = CompressionResult.takeError())
return Error;

Expand Down
11 changes: 2 additions & 9 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8524,7 +8524,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
}

// Begin OffloadBundler

void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
Expand Down Expand Up @@ -8622,11 +8621,7 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
}
if (TCArgs.hasFlag(options::OPT_offload_compress,
options::OPT_no_offload_compress, false))
CmdArgs.push_back("-compress");
if (TCArgs.hasArg(options::OPT_v))
CmdArgs.push_back("-verbose");
addOffloadCompressArgs(TCArgs, CmdArgs);
// All the inputs are encoded as commands.
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::None(),
Expand Down Expand Up @@ -8895,9 +8890,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
for (const char *LinkArg : LinkCommand->getArguments())
CmdArgs.push_back(LinkArg);

if (Args.hasFlag(options::OPT_offload_compress,
options::OPT_no_offload_compress, false))
CmdArgs.push_back("--compress");
addOffloadCompressArgs(Args, CmdArgs);

const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("clang-linker-wrapper"));
Expand Down
12 changes: 12 additions & 0 deletions clang/lib/Driver/ToolChains/CommonArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2863,3 +2863,15 @@ void tools::addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC,
CmdArgs.push_back("+outline-atomics");
}
}

void tools::addOffloadCompressArgs(const llvm::opt::ArgList &TCArgs,
llvm::opt::ArgStringList &CmdArgs) {
if (TCArgs.hasFlag(options::OPT_offload_compress,
options::OPT_no_offload_compress, false))
CmdArgs.push_back("-compress");
if (TCArgs.hasArg(options::OPT_v))
CmdArgs.push_back("-verbose");
if (auto *Arg = TCArgs.getLastArg(options::OPT_offload_compression_level_EQ))
CmdArgs.push_back(
TCArgs.MakeArgString(Twine("-compression-level=") + Arg->getValue()));
}
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/CommonArgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ void addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
const llvm::Triple &Triple);
void addOffloadCompressArgs(const llvm::opt::ArgList &TCArgs,
llvm::opt::ArgStringList &CmdArgs);

} // end namespace tools
} // end namespace driver
Expand Down
7 changes: 2 additions & 5 deletions clang/lib/Driver/ToolChains/HIPUtility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "HIPUtility.h"
#include "Clang.h"
#include "CommonArgs.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Options.h"
Expand Down Expand Up @@ -258,11 +259,7 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
Args.MakeArgString(std::string("-output=").append(Output));
BundlerArgs.push_back(BundlerOutputArg);

if (Args.hasFlag(options::OPT_offload_compress,
options::OPT_no_offload_compress, false))
BundlerArgs.push_back("-compress");
if (Args.hasArg(options::OPT_v))
BundlerArgs.push_back("-verbose");
addOffloadCompressArgs(Args, BundlerArgs);

const char *Bundler = Args.MakeArgString(
T.getToolChain().GetProgramPath("clang-offload-bundler"));
Expand Down
21 changes: 18 additions & 3 deletions clang/test/Driver/clang-offload-bundler-zlib.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// REQUIRES: zlib
// REQUIRES: zlib && !zstd
Copy link
Member

@MaskRay MaskRay Mar 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since zstd configurations are becoming more popular, zlib && !zstd would essentially disable the test for increasingly more bots. But I guess this cannot be improved.

// REQUIRES: x86-registered-target
// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}

Expand Down Expand Up @@ -34,13 +34,28 @@
// RUN: diff %t.tgt2 %t.res.tgt2

//
// COMPRESS: Compression method used:
// DECOMPRESS: Decompression method:
// COMPRESS: Compression method used: zlib
// COMPRESS: Compression level: 6
// DECOMPRESS: Decompression method: zlib
// DECOMPRESS: Hashes match: Yes
// NOHOST-NOT: host-
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
//

// Check -compression-level= option

// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose -compression-level=9 2>&1 | \
// RUN: FileCheck -check-prefix=LEVEL %s
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2
//
// LEVEL: Compression method used: zlib
// LEVEL: Compression level: 9

//
// Check -bundle-align option.
//
Expand Down
19 changes: 17 additions & 2 deletions clang/test/Driver/clang-offload-bundler-zstd.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,28 @@
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2
//
// COMPRESS: Compression method used
// DECOMPRESS: Decompression method
// COMPRESS: Compression method used: zstd
// COMPRESS: Compression level: 20
// DECOMPRESS: Decompression method: zstd
// DECOMPRESS: Hashes match: Yes
// NOHOST-NOT: host-
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
//

// Check -compression-level= option

// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose -compression-level=9 2>&1 | \
// RUN: FileCheck -check-prefix=LEVEL %s
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2
//
// LEVEL: Compression method used: zstd
// LEVEL: Compression level: 9

//
// Check -bundle-align option.
//
Expand Down
7 changes: 4 additions & 3 deletions clang/test/Driver/hip-offload-compress-zlib.hip
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// REQUIRES: zlib
// REQUIRES: zlib && !zstd
// REQUIRES: x86-registered-target
// REQUIRES: amdgpu-registered-target

Expand All @@ -9,13 +9,14 @@
// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: --no-offload-new-driver -fgpu-rdc -nogpuinc -nogpulib \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: --offload-compress --offload-device-only --gpu-bundle-output \
// RUN: --offload-compress --offload-compression-level=9 \
// RUN: --offload-device-only --gpu-bundle-output \
// RUN: -o %t.bc \
// RUN: 2>&1 | FileCheck %s

// CHECK: clang-offload-bundler{{.*}} -type=bc
// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
// CHECK-SAME: -compress -verbose
// CHECK-SAME: -compress -verbose -compression-level=9
// CHECK: Compressed bundle format

// Test uncompress of bundled bitcode.
Expand Down
Loading