Skip to content

Commit 2ffe840

Browse files
[ESIMD] Move ESIMD-specific passes to sycl-post-link (#3222)
This patch is a continuation of prior series of patches to allow having ESIMD and regular SYCL kernels in the same source code and in the same program. To mix two types of kernels we need to postpone lowering of ESIMD specific constructs and do that after we split ESIMD from regular SYCL code, which is done in sycl-post-link. This is a finalizing patch that only flips the switch for doing ESIMD-specific transformations in sycl-post-link and removes those passes from the common optimization pipeline in FE (BackendUtil.cpp). All the preliminary patches have already been committed.
1 parent 3ab344b commit 2ffe840

File tree

10 files changed

+52
-62
lines changed

10 files changed

+52
-62
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
#include "llvm/CodeGen/RegAllocRegistry.h"
2929
#include "llvm/CodeGen/SchedulerRegistry.h"
3030
#include "llvm/CodeGen/TargetSubtargetInfo.h"
31-
#include "llvm/GenXIntrinsics/GenXSPIRVWriterAdaptor.h"
3231
#include "llvm/IR/DataLayout.h"
3332
#include "llvm/IR/IRPrintingPasses.h"
3433
#include "llvm/IR/LegacyPassManager.h"
@@ -42,7 +41,6 @@
4241
#include "llvm/Passes/PassBuilder.h"
4342
#include "llvm/Passes/PassPlugin.h"
4443
#include "llvm/Passes/StandardInstrumentations.h"
45-
#include "llvm/SYCLLowerIR/LowerESIMD.h"
4644
#include "llvm/Support/BuryPointer.h"
4745
#include "llvm/Support/CommandLine.h"
4846
#include "llvm/Support/MemoryBuffer.h"
@@ -839,25 +837,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
839837

840838
PMBuilder.populateFunctionPassManager(FPM);
841839
PMBuilder.populateModulePassManager(MPM);
842-
843-
// Customize the tail of the module passes list for the ESIMD extension.
844-
if (LangOpts.SYCLIsDevice && LangOpts.SYCLExplicitSIMD &&
845-
CodeGenOpts.OptimizationLevel != 0) {
846-
MPM.add(createESIMDLowerVecArgPass());
847-
MPM.add(createESIMDLowerLoadStorePass());
848-
MPM.add(createSROAPass());
849-
MPM.add(createEarlyCSEPass(true));
850-
MPM.add(createInstructionCombiningPass());
851-
MPM.add(createDeadCodeEliminationPass());
852-
MPM.add(createFunctionInliningPass(
853-
CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize,
854-
(!CodeGenOpts.SampleProfileFile.empty() &&
855-
CodeGenOpts.PrepareForThinLTO)));
856-
MPM.add(createSROAPass());
857-
MPM.add(createEarlyCSEPass(true));
858-
MPM.add(createInstructionCombiningPass());
859-
MPM.add(createDeadCodeEliminationPass());
860-
}
861840
}
862841

863842
static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) {
@@ -954,11 +933,6 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
954933
PerFunctionPasses.add(
955934
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
956935

957-
// ESIMD extension always requires lowering of certain IR constructs, such as
958-
// ESIMD C++ intrinsics, as the last FE step.
959-
if (LangOpts.SYCLIsDevice && LangOpts.SYCLExplicitSIMD)
960-
PerModulePasses.add(createSYCLLowerESIMDPass());
961-
962936
CreatePasses(PerModulePasses, PerFunctionPasses);
963937

964938
legacy::PassManager CodeGenPasses;
@@ -976,9 +950,6 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
976950
!LangOpts.SYCLExplicitSIMD && LangOpts.EnableDAEInSpirKernels)
977951
PerModulePasses.add(createDeadArgEliminationSYCLPass());
978952

979-
if (LangOpts.SYCLIsDevice && LangOpts.SYCLExplicitSIMD)
980-
PerModulePasses.add(createGenXSPIRVWriterAdaptorPass());
981-
982953
switch (Action) {
983954
case Backend_EmitNothing:
984955
break;

clang/lib/CodeGen/CMakeLists.txt

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,6 @@ set(LLVM_LINK_COMPONENTS
2727
TransformUtils
2828
)
2929

30-
31-
get_property(LLVMGenXIntrinsics_SOURCE_DIR GLOBAL PROPERTY LLVMGenXIntrinsics_SOURCE_PROP)
32-
get_property(LLVMGenXIntrinsics_BINARY_DIR GLOBAL PROPERTY LLVMGenXIntrinsics_BINARY_PROP)
33-
34-
include_directories(
35-
${LLVMGenXIntrinsics_SOURCE_DIR}/GenXIntrinsics/include
36-
${LLVMGenXIntrinsics_BINARY_DIR}/GenXIntrinsics/include)
37-
3830
add_clang_library(clangCodeGen
3931
BackendUtil.cpp
4032
CGAtomic.cpp
@@ -96,14 +88,9 @@ add_clang_library(clangCodeGen
9688
TargetInfo.cpp
9789
VarBypassDetector.cpp
9890

99-
ADDITIONAL_HEADER_DIRS
100-
${LLVMGenXIntrinsics_SOURCE_DIR}/GenXIntrinsics/include
101-
${LLVMGenXIntrinsics_BINARY_DIR}/GenXIntrinsics/include
102-
10391
DEPENDS
10492
${codegen_deps}
10593
intrinsics_gen
106-
LLVMGenXIntrinsics
10794

10895
LINK_LIBS
10996
clangAnalysis
@@ -112,5 +99,4 @@ add_clang_library(clangCodeGen
11299
clangFrontend
113100
clangLex
114101
clangSerialization
115-
LLVMGenXIntrinsics
116102
)

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8348,7 +8348,7 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
83488348
options::OPT_fno_sycl_device_code_split_esimd, true))
83498349
addArgs(CmdArgs, TCArgs, {"-split-esimd"});
83508350
if (TCArgs.hasFlag(options::OPT_fsycl_device_code_lower_esimd,
8351-
options::OPT_fno_sycl_device_code_lower_esimd, false))
8351+
options::OPT_fno_sycl_device_code_lower_esimd, true))
83528352
addArgs(CmdArgs, TCArgs, {"-lower-esimd"});
83538353
}
83548354
addArgs(CmdArgs, TCArgs,

clang/test/CodeGenSYCL/esimd-accessor-ptr-md.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1+
// TODO: previously code generation and ESIMD lowering was
2+
// a part of the same %clang_cc1 invocation, but now it is
3+
// separate. So, we can split this test into 2, where one
4+
// will be testing code generation and the second ESIMD lowering.
5+
//
16
// RUN: %clang_cc1 -fsycl -fsycl-explicit-simd -fsycl-is-device \
27
// RUN: -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice \
3-
// RUN: -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
8+
// RUN: -disable-llvm-passes -emit-llvm %s -o %t
9+
// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table
10+
// RUN: FileCheck %s -input-file=%t_esimd_0.ll
411

512
// This test checks
613
// 1) proper metadata generation for accessors used in ESIMD
Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,27 @@
1+
// TODO: previously code generation and ESIMD lowering was
2+
// a part of the same %clang_cc1 invocation, but now it is
3+
// separate. So, we can split this test into 2, where one
4+
// will be testing code generation and the second ESIMD lowering.
5+
//
16
// RUN: %clang_cc1 -disable-llvm-passes -triple spir64-unknown-unknown-sycldevice \
2-
// RUN: -fsycl -fsycl-is-device -fsycl-explicit-simd -emit-llvm %s -o - | \
3-
// RUN: FileCheck %s
7+
// RUN: -fsycl -fsycl-is-device -fsycl-explicit-simd -emit-llvm %s -o %t
8+
// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table
9+
// RUN: FileCheck %s -input-file=%t_esimd_0.ll
410

511
// This test checks that FE allows globals with register_num attribute in ESIMD mode.
612

713
__attribute__((opencl_private)) __attribute__((register_num(17))) int vc;
814
// CHECK: @vc = {{.+}} i32 0, align 4 #0
915

10-
SYCL_EXTERNAL void init_vc(int x) {
11-
vc = x;
12-
// CHECK: store i32 %{{[0-9a-zA-Z_]+}}, i32* @vc
16+
template <typename name, typename Func>
17+
__attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
18+
kernelFunc();
19+
}
20+
21+
void init_vc(int x) {
22+
kernel<class kernel_esimd>([=]() __attribute__((sycl_explicit_simd)) {
23+
vc = x;
24+
// CHECK: store i32 %{{[0-9a-zA-Z_]+}}, i32* @vc
25+
});
1326
}
1427
// CHECK: attributes #0 = {{.*"VCByteOffset"="17".*"VCVolatile"}}

clang/test/Driver/sycl-device-lib.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@
121121
// RUN: | FileCheck %s -check-prefix=SYCL_LLVM_LINK_NO_DEVICE_LIB
122122
// SYCL_LLVM_LINK_NO_DEVICE_LIB: clang{{.*}} "-cc1" {{.*}} "-fsycl-is-device"
123123
// SYCL_LLVM_LINK_NO_DEVICE_LIB-NOT: llvm-link{{.*}} "-only-needed"
124-
// SYCL_LLVM_LINK_NO_DEVICE_LIB: sycl-post-link{{.*}} "-symbols" "-split-esimd" "-O2" "-spec-const=rt" "-o" "{{.*}}.table" "{{.*}}.bc"
124+
// SYCL_LLVM_LINK_NO_DEVICE_LIB: sycl-post-link{{.*}} "-symbols" "-split-esimd" "-lower-esimd" "-O2" "-spec-const=rt" "-o" "{{.*}}.table" "{{.*}}.bc"
125125

126126
/// ###########################################################################
127127
/// test llvm-link behavior for special user input whose filename resembles SYCL device library

clang/test/Driver/sycl-offload-with-split.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,8 +318,8 @@
318318
// CHK-NO-ESIMD-SPLIT-NOT: sycl-post-link{{.*}} "-split-esimd"
319319

320320
// Check lowering of ESIMD device code.
321-
// RUN: %clang -### -fsycl %s 2>&1 | FileCheck %s -check-prefixes=CHK-NO-ESIMD-LOWER
322-
// RUN: %clang_cl -### -fsycl %s 2>&1 | FileCheck %s -check-prefixes=CHK-NO-ESIMD-LOWER
321+
// RUN: %clang -### -fsycl %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-LOWER
322+
// RUN: %clang_cl -### -fsycl %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-LOWER
323323
// RUN: %clang -### -fsycl -fsycl-device-code-lower-esimd %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-LOWER
324324
// RUN: %clang_cl -### -fsycl -fsycl-device-code-lower-esimd %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-LOWER
325325
// RUN: %clang -### -fsycl -fno-sycl-device-code-lower-esimd %s 2>&1 | FileCheck %s -check-prefixes=CHK-NO-ESIMD-LOWER

sycl/test/esimd/glob.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
// RUN: %clangxx -fsycl -fsycl-explicit-simd -c -fsycl-device-only -Xclang -emit-llvm %s -o - | \
2-
// RUN: FileCheck %s
1+
// RUN: %clangxx -fsycl -fsycl-explicit-simd -c -fsycl-device-only -Xclang -emit-llvm %s -o %t
2+
// RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S %t -o %t.table
3+
// RUN: FileCheck %s -input-file=%t_esimd_0.ll
34

45
// This test checks that globals with register attribute are allowed in ESIMD
56
// mode, can be accessed in functions and correct LLVM IR is generated
@@ -9,7 +10,6 @@
910
#include <CL/sycl/INTEL/esimd.hpp>
1011
#include <iostream>
1112

12-
using namespace cl::sycl;
1313
using namespace sycl::INTEL::gpu;
1414

1515
constexpr unsigned VL = 16;
@@ -22,7 +22,16 @@ ESIMD_PRIVATE ESIMD_REGISTER(17 + VL) simd<int, VL> vc1;
2222
// CHECK-DAG: @vc1 = {{.+}} <16 x i32> zeroinitializer, align 64 #1
2323
// CHECK-DAG: attributes #1 = { {{.*}}"VCByteOffset"="33" "VCGlobalVariable" "VCVolatile"{{.*}} }
2424

25-
SYCL_EXTERNAL ESIMD_NOINLINE void init_vc(int x) {
25+
template <typename name, typename Func>
26+
__attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
27+
kernelFunc();
28+
}
29+
30+
SYCL_ESIMD_FUNCTION SYCL_EXTERNAL ESIMD_NOINLINE void init_vc(int x) {
2631
vc1 = vc + 1;
2732
vc = x;
2833
}
34+
35+
void caller(int x) {
36+
kernel<class kernel_esimd>([=]() SYCL_ESIMD_KERNEL { init_vc(x); });
37+
}

sycl/test/esimd/intrins_trans.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
// RUN: %clangxx -O0 -fsycl -fsycl-explicit-simd -fsycl-device-only -Xclang -emit-llvm %s -o - | \
2-
// RUN: FileCheck %s
1+
// RUN: %clangxx -O0 -fsycl -fsycl-explicit-simd -fsycl-device-only -Xclang -emit-llvm %s -o %t
2+
// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table
3+
// RUN: FileCheck %s -input-file=%t_esimd_0.ll
34

45
// Checks ESIMD intrinsic translation.
56
// NOTE: must be run in -O0, as optimizer optimizes away some of the code
@@ -109,7 +110,7 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16> foo() {
109110
// CHECK: call void @llvm.genx.media.st.v32i32(i32 0, i32 %[[SI2]], i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}})
110111

111112
auto ee = __esimd_vload<int, 16>((vector_type_t<int, 16> *)(&vg));
112-
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* {{.*}})
113+
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x i32> @llvm.genx.vload.v16i32.p0v16i32(<16 x i32>* {{.*}})
113114
__esimd_vstore<int, 32>(&vc, va.data());
114115
// CHECK: store <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> addrspace(4)* {{.*}}
115116

sycl/test/esimd/spirv_intrins_trans.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
// RUN: %clangxx -fsycl -fsycl-explicit-simd -fsycl-device-only -O0 -S -emit-llvm -x c++ %s -o - | FileCheck %s
1+
// RUN: %clangxx -fsycl -fsycl-explicit-simd -fsycl-device-only -O0 -S -emit-llvm -x c++ %s -o %t
2+
// RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S %t -o %t.table
3+
// RUN: FileCheck %s -input-file=%t_esimd_0.ll
4+
25
// This test checks that all SPIR-V intrinsics are correctly
3-
// translated into GenX counterparts (implemented in LowerCM.cpp)
6+
// translated into GenX counterparts (implemented in LowerESIMD.cpp)
47

58
#include <CL/sycl.hpp>
69
#include <CL/sycl/INTEL/esimd.hpp>

0 commit comments

Comments
 (0)