Skip to content

Commit 3d2adc7

Browse files
[sycl-post-link] Enable ESIMD-specific lowering (#3195)
This is the first patch in the effort of moving ESIMD-specific lowering passes from clang FE (BackendUtils.cpp) to the sycl-post-link tool after SYCL-ESIMD splitting. Co-authored-by: kbobrovs <[email protected]>
1 parent eab4791 commit 3d2adc7

File tree

3 files changed

+191
-4
lines changed

3 files changed

+191
-4
lines changed
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
; This is a basic test for Lowering ESIMD constructs after splitting.
2+
; This test also implicitly checks that input module is not reused
3+
; for ESIMD kernels in any case.
4+
5+
; No lowering
6+
; RUN: sycl-post-link -split-esimd -S %s -o %t.table
7+
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-NO-LOWERING
8+
9+
; Default lowering
10+
; RUN: sycl-post-link -split-esimd -lower-esimd -S %s -o %t.table
11+
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-O2
12+
13+
; -O2 lowering
14+
; RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S %s -o %t.table
15+
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-O2
16+
17+
; -O0 lowering
18+
; RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %s -o %t.table
19+
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-O0
20+
21+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
22+
target triple = "spir64-unknown-linux-sycldevice"
23+
24+
declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv()
25+
26+
define dso_local spir_kernel void @ESIMD_kernel() #0 !sycl_explicit_simd !3 {
27+
entry:
28+
%call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv()
29+
ret void
30+
}
31+
32+
attributes #0 = { "sycl-module-id"="a.cpp" }
33+
34+
!llvm.module.flags = !{!0}
35+
!opencl.spir.version = !{!1}
36+
!spirv.Source = !{!2}
37+
38+
!0 = !{i32 1, !"wchar_size", i32 4}
39+
!1 = !{i32 1, i32 2}
40+
!2 = !{i32 0, i32 100000}
41+
!3 = !{}
42+
43+
; By default, no lowering is performed
44+
; CHECK-NO-LOWERING: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv()
45+
; CHECK-NO-LOWERING: define dso_local spir_kernel void @ESIMD_kernel()
46+
; CHECK-NO-LOWERING: entry:
47+
; CHECK-NO-LOWERING: %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv()
48+
; CHECK-NO-LOWERING: ret void
49+
; CHECK-NO-LOWERING: }
50+
51+
; With -O0, we only lower ESIMD code, but no other optimizations
52+
; CHECK-O0: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv()
53+
; CHECK-O0: define dso_local spir_kernel void @ESIMD_kernel() #1 !sycl_explicit_simd !3 !intel_reqd_sub_group_size !4 {
54+
; CHECK-O0: entry:
55+
; CHECK-O0: call <3 x i32> @llvm.genx.local.id.v3i32()
56+
; CHECK-O0: call <3 x i32> @llvm.genx.local.size.v3i32()
57+
; CHECK-O0: call i32 @llvm.genx.group.id.x()
58+
; CHECK-O0: ret void
59+
; CHECK-O0: }
60+
61+
; With -O2, unused call was optimized away
62+
; CHECK-O2: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv()
63+
; CHECK-O2: define dso_local spir_kernel void @ESIMD_kernel()
64+
; CHECK-O2: entry:
65+
; CHECK-O2: ret void
66+
; CHECK-O2: }

llvm/tools/sycl-post-link/CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,26 @@ set(LLVM_LINK_COMPONENTS
55
IRReader
66
Support
77
TransformUtils
8+
SYCLLowerIR
89
)
910

11+
get_property(LLVMGenXIntrinsics_SOURCE_DIR GLOBAL PROPERTY LLVMGenXIntrinsics_SOURCE_PROP)
12+
get_property(LLVMGenXIntrinsics_BINARY_DIR GLOBAL PROPERTY LLVMGenXIntrinsics_BINARY_PROP)
13+
14+
include_directories(
15+
${LLVMGenXIntrinsics_SOURCE_DIR}/GenXIntrinsics/include
16+
${LLVMGenXIntrinsics_BINARY_DIR}/GenXIntrinsics/include)
17+
1018
add_llvm_tool(sycl-post-link
1119
sycl-post-link.cpp
1220
SPIRKernelParamOptInfo.cpp
1321
SpecConstants.cpp
1422

23+
ADDITIONAL_HEADER_DIRS
24+
${LLVMGenXIntrinsics_SOURCE_DIR}/GenXIntrinsics/include
25+
${LLVMGenXIntrinsics_BINARY_DIR}/GenXIntrinsics/include
26+
1527
DEPENDS
1628
intrinsics_gen
29+
LLVMGenXIntrinsics
1730
)

llvm/tools/sycl-post-link/sycl-post-link.cpp

Lines changed: 112 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,15 @@
1919
#include "llvm/ADT/SetVector.h"
2020
#include "llvm/ADT/Triple.h"
2121
#include "llvm/Bitcode/BitcodeWriterPass.h"
22+
#include "llvm/GenXIntrinsics/GenXSPIRVWriterAdaptor.h"
2223
#include "llvm/IR/IRPrintingPasses.h"
2324
#include "llvm/IR/InstIterator.h"
2425
#include "llvm/IR/Instructions.h"
2526
#include "llvm/IR/LLVMContext.h"
2627
#include "llvm/IR/LegacyPassManager.h"
2728
#include "llvm/IR/Module.h"
2829
#include "llvm/IRReader/IRReader.h"
30+
#include "llvm/SYCLLowerIR/LowerESIMD.h"
2931
#include "llvm/Support/CommandLine.h"
3032
#include "llvm/Support/InitLLVM.h"
3133
#include "llvm/Support/Path.h"
@@ -35,6 +37,8 @@
3537
#include "llvm/Support/WithColor.h"
3638
#include "llvm/Transforms/IPO.h"
3739
#include "llvm/Transforms/IPO/GlobalDCE.h"
40+
#include "llvm/Transforms/InstCombine/InstCombine.h"
41+
#include "llvm/Transforms/Scalar.h"
3842
#include "llvm/Transforms/Utils/Cloning.h"
3943

4044
#include <memory>
@@ -92,6 +96,45 @@ static cl::opt<bool> SplitEsimd{"split-esimd",
9296
cl::desc("Split SYCL and ESIMD kernels"),
9397
cl::cat(PostLinkCat)};
9498

99+
// TODO Design note: sycl-post-link should probably separate different kinds of
100+
// its functionality on logical and source level:
101+
// - LLVM IR module splitting
102+
// - Running LLVM IR passes on resulting modules
103+
// - Generating additional files (like spec constants, dead arg info,...)
104+
// The tool itself could be just a "driver" creating needed pipelines from the
105+
// above actions. This could help make the tool structure clearer and more
106+
// maintainable.
107+
108+
static cl::opt<bool> LowerEsimd{
109+
"lower-esimd", cl::desc("Lower ESIMD constructs"), cl::cat(PostLinkCat)};
110+
111+
static cl::opt<bool>
112+
OptLevelO0("O0", cl::desc("Optimization level 0. Similar to clang -O0"),
113+
cl::cat(PostLinkCat));
114+
115+
static cl::opt<bool>
116+
OptLevelO1("O1", cl::desc("Optimization level 1. Similar to clang -O1"),
117+
cl::cat(PostLinkCat));
118+
119+
static cl::opt<bool>
120+
OptLevelO2("O2", cl::desc("Optimization level 2. Similar to clang -O2"),
121+
cl::cat(PostLinkCat));
122+
123+
static cl::opt<bool> OptLevelOs(
124+
"Os",
125+
cl::desc(
126+
"Like -O2 with extra optimizations for size. Similar to clang -Os"),
127+
cl::cat(PostLinkCat));
128+
129+
static cl::opt<bool> OptLevelOz(
130+
"Oz",
131+
cl::desc("Like -Os but reduces code size further. Similar to clang -Oz"),
132+
cl::cat(PostLinkCat));
133+
134+
static cl::opt<bool>
135+
OptLevelO3("O3", cl::desc("Optimization level 3. Similar to clang -O3"),
136+
cl::cat(PostLinkCat));
137+
95138
enum IRSplitMode {
96139
SPLIT_PER_TU, // one module per translation unit
97140
SPLIT_PER_KERNEL, // one module per kernel
@@ -625,6 +668,59 @@ static string_vector saveResultSymbolsLists(string_vector &ResSymbolsLists,
625668
} \
626669
}
627670

671+
// Helper function for creating Inliner pass.
672+
// The approach is taken from opt tool.
673+
static Pass *createFunctionInliningPassHelper() {
674+
if (OptLevelO0)
675+
return createFunctionInliningPass(0, 0, false);
676+
677+
if (OptLevelO1)
678+
return createFunctionInliningPass(1, 0, false);
679+
680+
if (OptLevelO2)
681+
return createFunctionInliningPass(2, 0, false);
682+
683+
if (OptLevelOs)
684+
return createFunctionInliningPass(2, 1, false);
685+
686+
if (OptLevelOz)
687+
return createFunctionInliningPass(2, 2, false);
688+
689+
if (OptLevelO3)
690+
return createFunctionInliningPass(3, 0, false);
691+
692+
return createFunctionInliningPass();
693+
}
694+
695+
// When ESIMD code was separated from the regular SYCL code,
696+
// we can safely process ESIMD part.
697+
// TODO: support options like -debug-pass, -print-[before|after], and others
698+
static void LowerEsimdConstructs(Module &M) {
699+
legacy::PassManager MPM;
700+
MPM.add(createSYCLLowerESIMDPass());
701+
if (!OptLevelO0) {
702+
// Inlining and SROA passes are required to make
703+
// ESIMD/accessor_gather_scatter.cpp test work.
704+
MPM.add(createFunctionInliningPassHelper());
705+
MPM.add(createSROAPass());
706+
}
707+
MPM.add(createESIMDLowerVecArgPass());
708+
MPM.add(createESIMDLowerLoadStorePass());
709+
if (!OptLevelO0) {
710+
MPM.add(createSROAPass());
711+
MPM.add(createEarlyCSEPass(true));
712+
MPM.add(createInstructionCombiningPass());
713+
MPM.add(createDeadCodeEliminationPass());
714+
MPM.add(createFunctionInliningPassHelper());
715+
MPM.add(createSROAPass());
716+
MPM.add(createEarlyCSEPass(true));
717+
MPM.add(createInstructionCombiningPass());
718+
MPM.add(createDeadCodeEliminationPass());
719+
}
720+
MPM.add(createGenXSPIRVWriterAdaptorPass());
721+
MPM.run(M);
722+
}
723+
628724
using TableFiles = std::map<StringRef, string_vector>;
629725

630726
static TableFiles processOneModule(std::unique_ptr<Module> M, bool IsEsimd,
@@ -633,6 +729,9 @@ static TableFiles processOneModule(std::unique_ptr<Module> M, bool IsEsimd,
633729
if (!M)
634730
return TblFiles;
635731

732+
if (IsEsimd && LowerEsimd)
733+
LowerEsimdConstructs(*M);
734+
636735
std::map<StringRef, std::vector<Function *>> GlobalsSet;
637736

638737
bool DoSplit = SplitMode.getNumOccurrences() > 0;
@@ -685,11 +784,16 @@ static TableFiles processOneModule(std::unique_ptr<Module> M, bool IsEsimd,
685784
ResultModules.push_back(std::move(M));
686785

687786
{
688-
// reuse input module if there were no spec constants and no splitting
787+
// Reuse input module with only regular SYCL kernels if there were
788+
// no spec constants and no splitting.
789+
// We cannot reuse input module for ESIMD code since it was transformed.
790+
bool CanReuseInputModule = !SpecConstsMet && (ResultModules.size() == 1) &&
791+
!SyclAndEsimdKernels && !IsEsimd;
689792
string_vector Files =
690-
SpecConstsMet || (ResultModules.size() > 1) || SyclAndEsimdKernels
691-
? saveResultModules(ResultModules, IsEsimd ? "esimd_" : "")
692-
: string_vector{InputFilename};
793+
CanReuseInputModule
794+
? string_vector{InputFilename}
795+
: saveResultModules(ResultModules, IsEsimd ? "esimd_" : "");
796+
693797
// "Code" column is always output
694798
std::copy(Files.begin(), Files.end(),
695799
std::back_inserter(TblFiles[COL_CODE]));
@@ -816,6 +920,10 @@ int main(int argc, char **argv) {
816920
"- Specialization constant intrinsic transformer. Replaces symbolic\n"
817921
" ID-based intrinsics to integer ID-based ones to make them friendly\n"
818922
" for the SPIRV translator\n"
923+
"When the tool splits input module into regular SYCL and ESIMD kernels,\n"
924+
"it performs a set of specific lowering and transformation passes on\n"
925+
"ESIMD module, which is enabled by the '-lower-esimd' option. Regular\n"
926+
"optimization level options are supported, e.g. -O[0|1|2|3|s|z].\n"
819927
"Normally, the tool generates a number of files and \"file table\"\n"
820928
"file listing all generated files in a table manner. For example, if\n"
821929
"the input file 'example.bc' contains two kernels, then the command\n"

0 commit comments

Comments
 (0)