|
24 | 24 | #if LLVM_ENABLE_ZLIB
|
25 | 25 | #include <zlib.h>
|
26 | 26 | #endif
|
| 27 | +#if LLVM_ENABLE_ZSTD |
| 28 | +#include <zstd.h> |
| 29 | +#endif |
27 | 30 |
|
28 | 31 | using namespace llvm;
|
29 | 32 | using namespace llvm::dwarf;
|
@@ -331,25 +334,60 @@ template <class ELFT> void OutputSection::maybeCompress() {
|
331 | 334 | llvm::TimeTraceScope timeScope("Compress debug sections");
|
332 | 335 | compressed.uncompressedSize = size;
|
333 | 336 | auto buf = std::make_unique<uint8_t[]>(size);
|
| 337 | + // Write uncompressed data to a temporary zero-initialized buffer. |
| 338 | + { |
| 339 | + parallel::TaskGroup tg; |
| 340 | + writeTo<ELFT>(buf.get(), tg); |
| 341 | + } |
| 342 | + |
| 343 | +#if LLVM_ENABLE_ZSTD |
| 344 | + // Use ZSTD's streaming compression API which permits parallel workers working |
| 345 | + // on the stream. See http://facebook.github.io/zstd/zstd_manual.html |
| 346 | + // "Streaming compression - HowTo". |
334 | 347 | if (config->compressDebugSections == DebugCompressionType::Zstd) {
|
335 |
| - { |
336 |
| - parallel::TaskGroup tg; |
337 |
| - writeTo<ELFT>(buf.get(), tg); |
338 |
| - } |
| 348 | + // Allocate a buffer of half of the input size, and grow it by 1.5x if |
| 349 | + // insufficient. |
339 | 350 | compressed.shards = std::make_unique<SmallVector<uint8_t, 0>[]>(1);
|
340 |
| - compression::zstd::compress(makeArrayRef(buf.get(), size), |
341 |
| - compressed.shards[0]); |
342 |
| - size = sizeof(Elf_Chdr) + compressed.shards[0].size(); |
| 351 | + SmallVector<uint8_t, 0> &out = compressed.shards[0]; |
| 352 | + out.resize_for_overwrite(std::max<size_t>(size / 2, 32)); |
| 353 | + size_t pos = 0; |
| 354 | + |
| 355 | + ZSTD_CCtx *cctx = ZSTD_createCCtx(); |
| 356 | + size_t ret = ZSTD_CCtx_setParameter( |
| 357 | + cctx, ZSTD_c_nbWorkers, parallel::strategy.compute_thread_count()); |
| 358 | + if (ZSTD_isError(ret)) |
| 359 | + fatal(Twine("ZSTD_CCtx_setParameter: ") + ZSTD_getErrorName(ret)); |
| 360 | + ZSTD_outBuffer zob = {out.data(), out.size(), 0}; |
| 361 | + ZSTD_EndDirective directive = ZSTD_e_continue; |
| 362 | + const size_t blockSize = ZSTD_CStreamInSize(); |
| 363 | + do { |
| 364 | + const size_t n = std::min(size - pos, blockSize); |
| 365 | + if (n == size - pos) |
| 366 | + directive = ZSTD_e_end; |
| 367 | + ZSTD_inBuffer zib = {buf.get() + pos, n, 0}; |
| 368 | + size_t bytesRemaining = 0; |
| 369 | + while (zib.pos != zib.size || |
| 370 | + (directive == ZSTD_e_end && bytesRemaining != 0)) { |
| 371 | + if (zob.pos == zob.size) { |
| 372 | + out.resize_for_overwrite(out.size() * 3 / 2); |
| 373 | + zob.dst = out.data(); |
| 374 | + zob.size = out.size(); |
| 375 | + } |
| 376 | + bytesRemaining = ZSTD_compressStream2(cctx, &zob, &zib, directive); |
| 377 | + assert(!ZSTD_isError(bytesRemaining)); |
| 378 | + } |
| 379 | + pos += n; |
| 380 | + } while (directive != ZSTD_e_end); |
| 381 | + out.resize(zob.pos); |
| 382 | + ZSTD_freeCCtx(cctx); |
| 383 | + |
| 384 | + size = sizeof(Elf_Chdr) + out.size(); |
343 | 385 | flags |= SHF_COMPRESSED;
|
344 | 386 | return;
|
345 | 387 | }
|
| 388 | +#endif |
346 | 389 |
|
347 | 390 | #if LLVM_ENABLE_ZLIB
|
348 |
| - // Write uncompressed data to a temporary zero-initialized buffer. |
349 |
| - { |
350 |
| - parallel::TaskGroup tg; |
351 |
| - writeTo<ELFT>(buf.get(), tg); |
352 |
| - } |
353 | 391 | // We chose 1 (Z_BEST_SPEED) as the default compression level because it is
|
354 | 392 | // the fastest. If -O2 is given, we use level 6 to compress debug info more by
|
355 | 393 | // ~15%. We found that level 7 to 9 doesn't make much difference (~1% more
|
|
0 commit comments