-
Notifications
You must be signed in to change notification settings - Fork 787
[sycl-post-link] Enable ESIMD-specific lowering #3195
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
; This is a basic test for Lowering ESIMD constructs after splitting. | ||
; This test also implicitly checks that input module is not reused | ||
; for ESIMD kernels in any case. | ||
|
||
; No lowering | ||
; RUN: sycl-post-link -split-esimd -S %s -o %t.table | ||
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-NO-LOWERING | ||
|
||
; Default lowering | ||
; RUN: sycl-post-link -split-esimd -lower-esimd -S %s -o %t.table | ||
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-O2 | ||
|
||
; -O2 lowering | ||
; RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S %s -o %t.table | ||
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-O2 | ||
|
||
; -O0 lowering | ||
; RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %s -o %t.table | ||
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-O0 | ||
|
||
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
target triple = "spir64-unknown-linux-sycldevice" | ||
|
||
declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() | ||
|
||
define dso_local spir_kernel void @ESIMD_kernel() #0 !sycl_explicit_simd !3 { | ||
entry: | ||
%call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() | ||
ret void | ||
} | ||
|
||
attributes #0 = { "sycl-module-id"="a.cpp" } | ||
|
||
!llvm.module.flags = !{!0} | ||
!opencl.spir.version = !{!1} | ||
!spirv.Source = !{!2} | ||
|
||
!0 = !{i32 1, !"wchar_size", i32 4} | ||
!1 = !{i32 1, i32 2} | ||
!2 = !{i32 0, i32 100000} | ||
!3 = !{} | ||
|
||
; By default, no lowering is performed | ||
; CHECK-NO-LOWERING: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() | ||
; CHECK-NO-LOWERING: define dso_local spir_kernel void @ESIMD_kernel() | ||
; CHECK-NO-LOWERING: entry: | ||
; CHECK-NO-LOWERING: %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() | ||
; CHECK-NO-LOWERING: ret void | ||
; CHECK-NO-LOWERING: } | ||
|
||
; With -O0, we only lower ESIMD code, but no other optimizations | ||
; CHECK-O0: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() | ||
; CHECK-O0: define dso_local spir_kernel void @ESIMD_kernel() #1 !sycl_explicit_simd !3 !intel_reqd_sub_group_size !4 { | ||
; CHECK-O0: entry: | ||
; CHECK-O0: call <3 x i32> @llvm.genx.local.id.v3i32() | ||
; CHECK-O0: call <3 x i32> @llvm.genx.local.size.v3i32() | ||
; CHECK-O0: call i32 @llvm.genx.group.id.x() | ||
; CHECK-O0: ret void | ||
; CHECK-O0: } | ||
|
||
; With -O2, unused call was optimized away | ||
; CHECK-O2: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() | ||
; CHECK-O2: define dso_local spir_kernel void @ESIMD_kernel() | ||
; CHECK-O2: entry: | ||
; CHECK-O2: ret void | ||
; CHECK-O2: } |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,13 +19,15 @@ | |
#include "llvm/ADT/SetVector.h" | ||
#include "llvm/ADT/Triple.h" | ||
#include "llvm/Bitcode/BitcodeWriterPass.h" | ||
#include "llvm/GenXIntrinsics/GenXSPIRVWriterAdaptor.h" | ||
#include "llvm/IR/IRPrintingPasses.h" | ||
#include "llvm/IR/InstIterator.h" | ||
#include "llvm/IR/Instructions.h" | ||
#include "llvm/IR/LLVMContext.h" | ||
#include "llvm/IR/LegacyPassManager.h" | ||
#include "llvm/IR/Module.h" | ||
#include "llvm/IRReader/IRReader.h" | ||
#include "llvm/SYCLLowerIR/LowerESIMD.h" | ||
#include "llvm/Support/CommandLine.h" | ||
#include "llvm/Support/InitLLVM.h" | ||
#include "llvm/Support/Path.h" | ||
|
@@ -35,6 +37,8 @@ | |
#include "llvm/Support/WithColor.h" | ||
#include "llvm/Transforms/IPO.h" | ||
#include "llvm/Transforms/IPO/GlobalDCE.h" | ||
#include "llvm/Transforms/InstCombine/InstCombine.h" | ||
#include "llvm/Transforms/Scalar.h" | ||
#include "llvm/Transforms/Utils/Cloning.h" | ||
|
||
#include <memory> | ||
|
@@ -92,6 +96,45 @@ static cl::opt<bool> SplitEsimd{"split-esimd", | |
cl::desc("Split SYCL and ESIMD kernels"), | ||
cl::cat(PostLinkCat)}; | ||
|
||
// TODO Design note: sycl-post-link should probably separate different kinds of | ||
// its functionality on logical and source level: | ||
// - LLVM IR module splitting | ||
// - Running LLVM IR passes on resulting modules | ||
// - Generating additional files (like spec constants, dead arg info,...) | ||
// The tool itself could be just a "driver" creating needed pipelines from the | ||
// above actions. This could help make the tool structure clearer and more | ||
// maintainable. | ||
|
||
static cl::opt<bool> LowerEsimd{ | ||
DenisBakhvalov marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"lower-esimd", cl::desc("Lower ESIMD constructs"), cl::cat(PostLinkCat)}; | ||
|
||
static cl::opt<bool> | ||
OptLevelO0("O0", cl::desc("Optimization level 0. Similar to clang -O0"), | ||
cl::cat(PostLinkCat)); | ||
|
||
static cl::opt<bool> | ||
OptLevelO1("O1", cl::desc("Optimization level 1. Similar to clang -O1"), | ||
cl::cat(PostLinkCat)); | ||
|
||
static cl::opt<bool> | ||
OptLevelO2("O2", cl::desc("Optimization level 2. Similar to clang -O2"), | ||
cl::cat(PostLinkCat)); | ||
|
||
static cl::opt<bool> OptLevelOs( | ||
"Os", | ||
cl::desc( | ||
"Like -O2 with extra optimizations for size. Similar to clang -Os"), | ||
cl::cat(PostLinkCat)); | ||
|
||
static cl::opt<bool> OptLevelOz( | ||
"Oz", | ||
cl::desc("Like -Os but reduces code size further. Similar to clang -Oz"), | ||
cl::cat(PostLinkCat)); | ||
|
||
static cl::opt<bool> | ||
OptLevelO3("O3", cl::desc("Optimization level 3. Similar to clang -O3"), | ||
cl::cat(PostLinkCat)); | ||
Comment on lines
+111
to
+136
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How these are expected to be used? Will they be propagated to the tool based on the same options passed to clang driver? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I will submit the patch for that later and put the link here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is the driver portion that will pass the optimization level to sycl-post-link: #3207. |
||
|
||
enum IRSplitMode { | ||
SPLIT_PER_TU, // one module per translation unit | ||
SPLIT_PER_KERNEL, // one module per kernel | ||
|
@@ -627,6 +670,59 @@ static string_vector saveResultSymbolsLists(string_vector &ResSymbolsLists, | |
} \ | ||
} | ||
|
||
// Helper function for creating Inliner pass. | ||
// The approach is taken from opt tool. | ||
static Pass *createFunctionInliningPassHelper() { | ||
bader marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (OptLevelO0) | ||
return createFunctionInliningPass(0, 0, false); | ||
|
||
if (OptLevelO1) | ||
return createFunctionInliningPass(1, 0, false); | ||
|
||
if (OptLevelO2) | ||
return createFunctionInliningPass(2, 0, false); | ||
|
||
if (OptLevelOs) | ||
return createFunctionInliningPass(2, 1, false); | ||
|
||
if (OptLevelOz) | ||
return createFunctionInliningPass(2, 2, false); | ||
|
||
if (OptLevelO3) | ||
return createFunctionInliningPass(3, 0, false); | ||
|
||
return createFunctionInliningPass(); | ||
} | ||
|
||
// When ESIMD code was separated from the regular SYCL code, | ||
// we can safely process ESIMD part. | ||
Comment on lines
+697
to
+698
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is probably an architecture question/concern: why can't we leave a single optimization pipeline which is executed before I understand that IGC VC backend most likely expects LLVM IR in some particular form to do its job right, but can't it be improved? It seems a bit strange to me that we have to maintain two optimization pipelines in two different places. Moreover, I guess that the first pipeline before Note: I'm okay if we have to launch a few more passes in order to launch some There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @AlexeySachkov, yes, those are ESIMD-specific transformations that should be applied to ESIMD code only, otherwise, it will break regular SYCL code. So, it should be done after splitting. Or we should clone the code that is shared between ESIMD and regular SYCL kernels and then do the ESIMD lowering. Splitting first and then transform, eliminates the need to clone shared code, so I guess this is definitely an advantage. Yes, we have general-purposes clean-up passes after ESIMD lowering. So, yes, we have 2 sets of general-purpose passes running on ESIMD code. I believe we have to have the second set after ESIMD lowering. @cmc-rep can confirm. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'm not against ESIMD-specific transformation, I'm concerned about additional optimization passes we launch in
To me it seems better if those post-processing is moved into vc backend as pre-processing passes, because this is a thing specific to a single backend, not even to a language constructs. |
||
// TODO: support options like -debug-pass, -print-[before|after], and others | ||
static void LowerEsimdConstructs(Module &M) { | ||
legacy::PassManager MPM; | ||
MPM.add(createSYCLLowerESIMDPass()); | ||
if (!OptLevelO0) { | ||
// Inlining and SROA passes are required to make | ||
// ESIMD/accessor_gather_scatter.cpp test work. | ||
MPM.add(createFunctionInliningPassHelper()); | ||
MPM.add(createSROAPass()); | ||
} | ||
MPM.add(createESIMDLowerVecArgPass()); | ||
MPM.add(createESIMDLowerLoadStorePass()); | ||
if (!OptLevelO0) { | ||
MPM.add(createSROAPass()); | ||
MPM.add(createEarlyCSEPass(true)); | ||
MPM.add(createInstructionCombiningPass()); | ||
MPM.add(createDeadCodeEliminationPass()); | ||
MPM.add(createFunctionInliningPassHelper()); | ||
MPM.add(createSROAPass()); | ||
MPM.add(createEarlyCSEPass(true)); | ||
MPM.add(createInstructionCombiningPass()); | ||
MPM.add(createDeadCodeEliminationPass()); | ||
} | ||
MPM.add(createGenXSPIRVWriterAdaptorPass()); | ||
MPM.run(M); | ||
} | ||
|
||
using TableFiles = std::map<StringRef, string_vector>; | ||
|
||
static TableFiles processOneModule(std::unique_ptr<Module> M, bool IsEsimd, | ||
|
@@ -635,6 +731,9 @@ static TableFiles processOneModule(std::unique_ptr<Module> M, bool IsEsimd, | |
if (!M) | ||
return TblFiles; | ||
|
||
if (IsEsimd && LowerEsimd) | ||
LowerEsimdConstructs(*M); | ||
|
||
std::map<StringRef, std::vector<Function *>> GlobalsSet; | ||
|
||
bool DoSplit = SplitMode.getNumOccurrences() > 0; | ||
|
@@ -687,11 +786,16 @@ static TableFiles processOneModule(std::unique_ptr<Module> M, bool IsEsimd, | |
ResultModules.push_back(std::move(M)); | ||
|
||
{ | ||
// reuse input module if there were no spec constants and no splitting | ||
// Reuse input module with only regular SYCL kernels if there were | ||
// no spec constants and no splitting. | ||
// We cannot reuse input module for ESIMD code since it was transformed. | ||
bool CanReuseInputModule = !SpecConstsMet && (ResultModules.size() == 1) && | ||
!SyclAndEsimdKernels && !IsEsimd; | ||
string_vector Files = | ||
SpecConstsMet || (ResultModules.size() > 1) || SyclAndEsimdKernels | ||
? saveResultModules(ResultModules, IsEsimd ? "esimd_" : "") | ||
: string_vector{InputFilename}; | ||
CanReuseInputModule | ||
? string_vector{InputFilename} | ||
: saveResultModules(ResultModules, IsEsimd ? "esimd_" : ""); | ||
|
||
// "Code" column is always output | ||
std::copy(Files.begin(), Files.end(), | ||
std::back_inserter(TblFiles[COL_CODE])); | ||
|
@@ -818,6 +922,10 @@ int main(int argc, char **argv) { | |
"- Specialization constant intrinsic transformer. Replaces symbolic\n" | ||
" ID-based intrinsics to integer ID-based ones to make them friendly\n" | ||
" for the SPIRV translator\n" | ||
"When the tool splits input module into regular SYCL and ESIMD kernels,\n" | ||
"it performs a set of specific lowering and transformation passes on\n" | ||
"ESIMD module, which is enabled by the '-lower-esimd' option. Regular\n" | ||
"optimization level options are supported, e.g. -O[0|1|2|3|s|z].\n" | ||
"Normally, the tool generates a number of files and \"file table\"\n" | ||
"file listing all generated files in a table manner. For example, if\n" | ||
"the input file 'example.bc' contains two kernels, then the command\n" | ||
|
Uh oh!
There was an error while loading. Please reload this page.