|
7 | 7 | //===----------------------------------------------------------------------===//
|
8 | 8 |
|
9 | 9 | #include "DeviceCompilation.h"
|
| 10 | +#include "ESIMD.h" |
10 | 11 |
|
11 | 12 | #include <clang/Basic/DiagnosticDriver.h>
|
12 | 13 | #include <clang/Basic/Version.h>
|
|
27 | 28 | #include <llvm/IRReader/IRReader.h>
|
28 | 29 | #include <llvm/Linker/Linker.h>
|
29 | 30 | #include <llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h>
|
| 31 | +#include <llvm/SYCLLowerIR/ESIMD/LowerESIMD.h> |
| 32 | +#include <llvm/SYCLLowerIR/LowerInvokeSimd.h> |
30 | 33 | #include <llvm/SYCLLowerIR/ModuleSplitter.h>
|
31 | 34 | #include <llvm/SYCLLowerIR/SYCLJointMatrixTransform.h>
|
32 | 35 | #include <llvm/Support/PropertySetIO.h>
|
@@ -432,42 +435,84 @@ template <class PassClass> static bool runModulePass(llvm::Module &M) {
|
432 | 435 | return !Res.areAllPreserved();
|
433 | 436 | }
|
434 | 437 |
|
435 |
| -Expected<RTCBundleInfo> jit_compiler::performPostLink( |
436 |
| - llvm::Module &Module, [[maybe_unused]] const InputArgList &UserArgList) { |
| 438 | +llvm::Expected<PostLinkResult> jit_compiler::performPostLink( |
| 439 | + std::unique_ptr<llvm::Module> Module, |
| 440 | + [[maybe_unused]] const llvm::opt::InputArgList &UserArgList) { |
437 | 441 | // This is a simplified version of `processInputModule` in
|
438 | 442 | // `llvm/tools/sycl-post-link.cpp`. Assertions/TODOs point to functionality
|
439 | 443 | // left out of the algorithm for now.
|
440 | 444 |
|
441 |
| - assert(!Module.getGlobalVariable("llvm.used") && |
442 |
| - !Module.getGlobalVariable("llvm.compiler.used")); |
| 445 | + // TODO: SplitMode can be controlled by the user. |
| 446 | + const auto SplitMode = SPLIT_NONE; |
| 447 | + |
| 448 | + // TODO: EmitOnlyKernelsAsEntryPoints is controlled by |
| 449 | + // `shouldEmitOnlyKernelsAsEntryPoints` in |
| 450 | + // `clang/lib/Driver/ToolChains/Clang.cpp`. |
| 451 | + const bool EmitOnlyKernelsAsEntryPoints = true; |
| 452 | + |
| 453 | + // TODO: The optlevel passed to `sycl-post-link` is determined by |
| 454 | + // `getSYCLPostLinkOptimizationLevel` in |
| 455 | + // `clang/lib/Driver/ToolChains/Clang.cpp`. |
| 456 | + const bool PerformOpts = true; |
| 457 | + |
| 458 | + // Propagate ESIMD attribute to wrapper functions to prevent spurious splits |
| 459 | + // and kernel link errors. |
| 460 | + runModulePass<SYCLFixupESIMDKernelWrapperMDPass>(*Module); |
| 461 | + |
| 462 | + assert(!Module->getGlobalVariable("llvm.used") && |
| 463 | + !Module->getGlobalVariable("llvm.compiler.used")); |
443 | 464 | // Otherwise: Port over the `removeSYCLKernelsConstRefArray` and
|
444 | 465 | // `removeDeviceGlobalFromCompilerUsed` methods.
|
445 | 466 |
|
446 |
| - assert(!isModuleUsingAsan(Module)); |
| 467 | + assert(!isModuleUsingAsan(*Module)); |
447 | 468 | // Otherwise: Need to instrument each image scope device globals if the module
|
448 | 469 | // has been instrumented by sanitizer pass.
|
449 | 470 |
|
450 | 471 | // Transform Joint Matrix builtin calls to align them with SPIR-V friendly
|
451 | 472 | // LLVM IR specification.
|
452 |
| - runModulePass<SYCLJointMatrixTransformPass>(Module); |
| 473 | + runModulePass<SYCLJointMatrixTransformPass>(*Module); |
| 474 | + |
| 475 | + // Do invoke_simd processing before splitting because this: |
| 476 | + // - saves processing time (the pass is run once, even though on larger IR) |
| 477 | + // - doing it before SYCL/ESIMD splitting is required for correctness |
| 478 | + if (runModulePass<SYCLLowerInvokeSimdPass>(*Module)) { |
| 479 | + return createStringError("`invoke_simd` calls detected"); |
| 480 | + } |
453 | 481 |
|
454 | 482 | // TODO: Implement actual device code splitting. We're just using the splitter
|
455 | 483 | // to obtain additional information about the module for now.
|
456 |
| - // TODO: EmitOnlyKernelsAsEntryPoints is controlled by |
457 |
| - // `shouldEmitOnlyKernelsAsEntryPoints` in |
458 |
| - // `clang/lib/Driver/ToolChains/Clang.cpp`. |
| 484 | + |
459 | 485 | std::unique_ptr<ModuleSplitterBase> Splitter = getDeviceCodeSplitter(
|
460 |
| - ModuleDesc{std::unique_ptr<llvm::Module>{&Module}}, SPLIT_NONE, |
461 |
| - /*IROutputOnly=*/false, |
462 |
| - /*EmitOnlyKernelsAsEntryPoints=*/true); |
463 |
| - assert(Splitter->remainingSplits() == 1); |
| 486 | + ModuleDesc{std::move(Module)}, SplitMode, |
| 487 | + /*IROutputOnly=*/false, EmitOnlyKernelsAsEntryPoints); |
| 488 | + assert(Splitter->hasMoreSplits()); |
| 489 | + if (Splitter->remainingSplits() > 1) { |
| 490 | + return createStringError("Device code requires splitting"); |
| 491 | + } |
464 | 492 |
|
465 | 493 | // TODO: Call `verifyNoCrossModuleDeviceGlobalUsage` if device globals shall
|
466 | 494 | // be processed.
|
467 | 495 |
|
468 |
| - assert(Splitter->hasMoreSplits()); |
469 | 496 | ModuleDesc MDesc = Splitter->nextSplit();
|
470 |
| - assert(&Module == &MDesc.getModule()); |
| 497 | + |
| 498 | + // TODO: Call `MDesc.fixupLinkageOfDirectInvokeSimdTargets()` when |
| 499 | + // `invoke_simd` is supported. |
| 500 | + |
| 501 | + SmallVector<ModuleDesc, 2> ESIMDSplits = |
| 502 | + splitByESIMD(std::move(MDesc), EmitOnlyKernelsAsEntryPoints); |
| 503 | + assert(!ESIMDSplits.empty()); |
| 504 | + if (ESIMDSplits.size() > 1) { |
| 505 | + return createStringError("Mixing SYCL and ESIMD code is unsupported"); |
| 506 | + } |
| 507 | + MDesc = std::move(ESIMDSplits.front()); |
| 508 | + |
| 509 | + if (MDesc.isESIMD()) { |
| 510 | + // `sycl-post-link` has a `-lower-esimd` option, but there's no clang driver |
| 511 | + // option to influence it. Rather, the driver sets it unconditionally in the |
| 512 | + // multi-file output mode, which we are mimicking here. |
| 513 | + lowerEsimdConstructs(MDesc, PerformOpts); |
| 514 | + } |
| 515 | + |
471 | 516 | MDesc.saveSplitInformationAsMetadata();
|
472 | 517 |
|
473 | 518 | RTCBundleInfo BundleInfo;
|
@@ -504,10 +549,7 @@ Expected<RTCBundleInfo> jit_compiler::performPostLink(
|
504 | 549 | }
|
505 | 550 | };
|
506 | 551 |
|
507 |
| - // Regain ownership of the module. |
508 |
| - MDesc.releaseModulePtr().release(); |
509 |
| - |
510 |
| - return std::move(BundleInfo); |
| 552 | + return PostLinkResult{std::move(BundleInfo), MDesc.releaseModulePtr()}; |
511 | 553 | }
|
512 | 554 |
|
513 | 555 | Expected<InputArgList>
|
@@ -569,11 +611,9 @@ jit_compiler::parseUserArgs(View<const char *> UserArgs) {
|
569 | 611 | return createStringError("Device code splitting is not yet supported");
|
570 | 612 | }
|
571 | 613 |
|
572 |
| - if (AL.hasArg(OPT_fsycl_device_code_split_esimd, |
573 |
| - OPT_fno_sycl_device_code_split_esimd)) { |
574 |
| - // TODO: There are more ESIMD-related options. |
575 |
| - return createStringError( |
576 |
| - "Runtime compilation of ESIMD kernels is not yet supported"); |
| 614 | + if (!AL.hasFlag(OPT_fsycl_device_code_split_esimd, |
| 615 | + OPT_fno_sycl_device_code_split_esimd, true)) { |
| 616 | + return createStringError("ESIMD device code split cannot be deactivated"); |
577 | 617 | }
|
578 | 618 |
|
579 | 619 | if (AL.hasFlag(OPT_fsycl_dead_args_optimization,
|
|
0 commit comments