Skip to content

Commit 3d6bc0f

Browse files
authored
[SYCL][ESIMD] Add hidden compiler option to not compile host code (#11037)
ESIMD code can only be run on an Intel GPU. Previously we had the ESMID emulator that called into host implementations of some of the GPU intrinsics, but that has been deprecated and will be removed. Even when the emulator was supported, it was seldom used by customers. To summarize, the host compile for ESIMD code had little value to customers previous, and will soon have zero value once the emulator is removed. However, the host compile can be slow. We have a bug report from a customer reporting slow compilation time for an ESIMD kernel with a large vector. The reason for the slowdown was investigated by the X86 backend team and they found for the host compile, the vector exceeds the maximum legal vector size on X86 and instruction selection needs to spend a lot of time doing heavy lifting to generate the right assembly. This is a total waste of the user's time as that code will never and can never be run. As a workaround for these cases, add a new hidden option, `-fsycl-esimd-build-host-code`/`-fno-sycl-esimd-build-host-code`, default `-fsycl-esimd-build-host-code`, to try to speed up the host compile. This is mostly done through a new pass which runs early and removes all IR for ESIMD functions and replaces them with a single return statement. This allows later optimizations to do even more cleanup. Also, I modified some of the ESIMD macros to generate IR that is more likely to be optimized out for the host, just disabling inlining and setting internal linkage. We do not intend to advertise this option as there should be no issue in most cases, but we would like to be able to provide a workaround if users hit an issue. I have manually verified this option fixes the slowdown reported by the customer. --------- Signed-off-by: Sarnie, Nick <[email protected]>
1 parent 0ef26d3 commit 3d6bc0f

File tree

14 files changed

+166
-2
lines changed

14 files changed

+166
-2
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ LANGOPT(SYCLStdLayoutKernelParams, 1, 0, "Enable standard layout requirement for
281281
LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels")
282282
LANGOPT(SYCLForceInlineKernelLambda , 1, 0, "Force inline SYCL kernel lambdas in entry point")
283283
LANGOPT(SYCLESIMDForceStatelessMem, 1, 0, "Make accessors use USM memory in ESIMD kernels")
284+
LANGOPT(SYCLESIMDBuildHostCode, 1, 1, "Build the host implementation of ESIMD functions")
284285
ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL standard used")
285286
LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions")
286287
LANGOPT(SYCLExplicitSIMD , 1, 0, "SYCL compilation with explicit SIMD extension")

clang/include/clang/Driver/Options.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3634,7 +3634,14 @@ defm sycl_esimd_force_stateless_mem : BoolFOption<"sycl-esimd-force-stateless-me
36343634
"Disabled by default. (experimental)">,
36353635
NegFlag<SetFalse, [], [ClangOption], "Do not enforce using stateless memory accesses. (experimental)">,
36363636
BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option], "">>;
3637-
3637+
// TODO: Remove this option once ESIMD headers are updated to
3638+
// guard vectors to be device only.
3639+
defm sycl_esimd_build_host_code : BoolFOption<"sycl-esimd-build-host-code",
3640+
LangOpts<"SYCLESIMDBuildHostCode">, DefaultTrue,
3641+
PosFlag<SetTrue, [], [ClangOption], "Build the host implementation of ESIMD functions."
3642+
"Enabled by default.">,
3643+
NegFlag<SetFalse, [], [ClangOption], "Don't build the host implementation of ESIMD functions.">,
3644+
BothFlags<[HelpHidden], [ClangOption, CLOption, DXCOption, CC1Option], "">>;
36383645
def fsycl_targets_EQ : CommaJoined<["-"], "fsycl-targets=">, Flags<[NoXarchOption]>,
36393646
Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>,
36403647
HelpText<"Specify comma-separated list of triples SYCL offloading targets to be supported">;

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "llvm/Passes/StandardInstrumentations.h"
4646
#include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h"
4747
#include "llvm/SYCLLowerIR/ESIMD/ESIMDVerifier.h"
48+
#include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h"
4849
#include "llvm/SYCLLowerIR/LowerWGLocalMemory.h"
4950
#include "llvm/SYCLLowerIR/MutatePrintfAddrspace.h"
5051
#include "llvm/SYCLLowerIR/PrepareSYCLNativeCPU.h"
@@ -965,6 +966,11 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
965966
MPM.addPass(
966967
SYCLPropagateAspectsUsagePass(/*ExcludeAspects=*/{"fp64"}));
967968
});
969+
else if (LangOpts.SYCLIsHost && !LangOpts.SYCLESIMDBuildHostCode)
970+
PB.registerPipelineStartEPCallback(
971+
[&](ModulePassManager &MPM, OptimizationLevel Level) {
972+
MPM.addPass(ESIMDRemoveHostCodePass());
973+
});
968974

969975
// Add the InferAddressSpaces pass for all the SPIR[V] targets
970976
if (TargetTriple.isSPIR() || TargetTriple.isSPIRV()) {

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5429,6 +5429,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
54295429
else {
54305430
for (auto &Macro : D.getSYCLTargetMacroArgs())
54315431
CmdArgs.push_back(Args.MakeArgString(Macro));
5432+
if (!Args.hasFlag(options::OPT_fsycl_esimd_build_host_code,
5433+
options::OPT_fno_sycl_esimd_build_host_code, true))
5434+
CmdArgs.push_back("-fno-sycl-esimd-build-host-code");
54325435
}
54335436
if (Args.hasFlag(options::OPT_fsycl_esimd_force_stateless_mem,
54345437
options::OPT_fno_sycl_esimd_force_stateless_mem, false))

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1339,6 +1339,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
13391339
Builder.defineMacro("__ENABLE_USM_ADDR_SPACE__");
13401340
Builder.defineMacro("SYCL_DISABLE_FALLBACK_ASSERT");
13411341
}
1342+
} else if (LangOpts.SYCLIsHost && LangOpts.SYCLESIMDBuildHostCode) {
1343+
Builder.defineMacro("__ESIMD_BUILD_HOST_CODE");
13421344
}
13431345
if (LangOpts.SYCLUnnamedLambda)
13441346
Builder.defineMacro("__SYCL_UNNAMED_LAMBDA__");
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
/// Verify that the driver option is translated to corresponding options
3+
/// to host
4+
// RUN: %clang -### -fsycl -fno-sycl-esimd-build-host-code \
5+
// RUN: %s 2>&1 | FileCheck -check-prefix=CHECK-PASS-TO-COMPS %s
6+
// CHECK-PASS-TO-COMPS: clang
7+
// CHECK-PASS-TO-COMPS-NOT: "-fno-sycl-esimd-build-host-code"
8+
// CHECK-PASS-TO-COMPS: sycl-post-link{{.*}}
9+
// CHECK-PASS-TO-COMPS: clang{{.*}} "-fsycl-is-host" {{.*}}"-fno-sycl-esimd-build-host-code"
10+
11+
12+
/// Verify that removing host code is not enabled by default
13+
// RUN: %clang -### -fsycl %s 2>&1 | FileCheck -implicit-check-not "-fno-sycl-esimd-build-host-code" %s

llvm/include/llvm/SYCLLowerIR/ESIMD/LowerESIMD.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ class SYCLFixupESIMDKernelWrapperMDPass
8080
public:
8181
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
8282
};
83+
84+
class ESIMDRemoveHostCodePass : public PassInfoMixin<ESIMDRemoveHostCodePass> {
85+
public:
86+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
87+
};
88+
8389
} // namespace llvm
8490

8591
#endif // LLVM_SYCLLOWERIR_LOWERESIMD_H

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ MODULE_PASS("SPIRITTAnnotations", SPIRITTAnnotationsPass())
137137
MODULE_PASS("deadargelim-sycl", DeadArgumentEliminationSYCLPass())
138138
MODULE_PASS("sycllowerwglocalmemory", SYCLLowerWGLocalMemoryPass())
139139
MODULE_PASS("lower-esimd-kernel-attrs", SYCLFixupESIMDKernelWrapperMDPass())
140+
MODULE_PASS("esimd-remove-host-code", ESIMDRemoveHostCodePass());
140141
MODULE_PASS("sycl-propagate-aspects-usage", SYCLPropagateAspectsUsagePass())
141142
MODULE_PASS("sycl-add-opt-level-attribute", SYCLAddOptLevelAttributePass())
142143
MODULE_PASS("compile-time-properties", CompileTimePropertiesPass())

llvm/lib/SYCLLowerIR/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ add_llvm_component_library(LLVMSYCLLowerIR
5050
ESIMD/ESIMDOptimizeVecArgCallConv.cpp
5151
ESIMD/ESIMDUtils.cpp
5252
ESIMD/ESIMDVerifier.cpp
53+
ESIMD/ESIMDRemoveHostCode.cpp
5354
ESIMD/LowerESIMD.cpp
5455
ESIMD/LowerESIMDKernelAttrs.cpp
5556
CompileTimePropertiesPass.cpp
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
//===-- ESIMDRemoveHostCode.cpp - remove host code for ESIMD -----------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// ESIMD code is not often run on the host, but we still compile for the host.
9+
// If requested by the user, remove the implementations of all ESIMD functions
10+
// to possibly speed up host compilation time.
11+
//===----------------------------------------------------------------------===//
12+
13+
#define DEBUG_TYPE "ESIMDRemoveHostCodePass"
14+
15+
#include "llvm/Demangle/Demangle.h"
16+
#include "llvm/Demangle/ItaniumDemangle.h"
17+
#include "llvm/IR/Function.h"
18+
#include "llvm/IR/Module.h"
19+
#include "llvm/Pass.h"
20+
#include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h"
21+
#include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h"
22+
#include "llvm/Support/Debug.h"
23+
#include "llvm/TargetParser/Triple.h"
24+
25+
using namespace llvm;
26+
using namespace llvm::esimd;
27+
namespace id = itanium_demangle;
28+
PreservedAnalyses ESIMDRemoveHostCodePass::run(Module &M,
29+
ModuleAnalysisManager &) {
30+
// TODO: Remove this pass once ESIMD headers are updated to
31+
// guard vectors to be device only.
32+
bool Modified = false;
33+
assert(!Triple(M.getTargetTriple()).isSPIR() &&
34+
"Pass should not be run for SPIR targets");
35+
for (auto &F : M.functions()) {
36+
if (F.isDeclaration())
37+
continue;
38+
StringRef MangledName = F.getName();
39+
id::ManglingParser<SimpleAllocator> Parser(MangledName.begin(),
40+
MangledName.end());
41+
id::Node *AST = Parser.parse();
42+
if (!AST || AST->getKind() != id::Node::KFunctionEncoding)
43+
continue;
44+
45+
auto *FE = static_cast<id::FunctionEncoding *>(AST);
46+
const id::Node *NameNode = FE->getName();
47+
if (!NameNode)
48+
continue;
49+
50+
id::OutputBuffer NameBuf;
51+
NameNode->print(NameBuf);
52+
StringRef Name(NameBuf.getBuffer(), NameBuf.getCurrentPosition());
53+
if (!Name.startswith("sycl::_V1::ext::intel::esimd::") &&
54+
!Name.startswith("sycl::_V1::ext::intel::experimental::esimd::"))
55+
continue;
56+
SmallVector<BasicBlock *> BBV;
57+
for (BasicBlock &BB : F) {
58+
BB.dropAllReferences();
59+
BBV.push_back(&BB);
60+
}
61+
for (auto *BB : BBV)
62+
BB->eraseFromParent();
63+
64+
Value *Ret = nullptr;
65+
Type *RetTy = F.getFunctionType()->getReturnType();
66+
if (!RetTy->isVoidTy())
67+
Ret = Constant::getNullValue(RetTy);
68+
69+
LLVMContext &Ctx = F.getParent()->getContext();
70+
BasicBlock *BB = BasicBlock::Create(Ctx, "", &F);
71+
ReturnInst::Create(Ctx, Ret, BB);
72+
Modified = true;
73+
}
74+
return Modified ? PreservedAnalyses::none() : PreservedAnalyses::all();
75+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; RUN: opt -passes=esimd-remove-host-code -S < %s | FileCheck %s
2+
3+
; This test checks that ESIMDRemoveHostCode removes all code from ESIMD
4+
; functions and leaves others untouched.
5+
6+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
7+
target triple = "x86_64-unknown-linux-gnu"
8+
9+
; Function Attrs: alwaysinline mustprogress uwtable
10+
define linkonce_odr dso_local void @foo() {
11+
; CHECK: foo
12+
; CHECK-NEXT: %1 = alloca double, align 8
13+
; CHECK-NEXT: ret void
14+
%1 = alloca double, align 8
15+
ret void
16+
}
17+
18+
; Function Attrs: alwaysinline mustprogress uwtable
19+
define linkonce_odr dso_local void @_ZN4sycl3_V13ext5intel12experimental5esimd15lsc_block_storeIdLi64ELNS4_13lsc_data_sizeE0ELNS4_10cache_hintE0ELS7_0ENS2_5esimd6detail26dqword_element_aligned_tagEEENSt9enable_ifIXsr4sycl3ext5intel5esimdE19is_simd_flag_type_vIT4_EEvE4typeEPT_NS8_4simdISF_XT0_EEENS9_14simd_mask_implItLi1EEESC_() {
20+
; CHECK: lsc_block_store
21+
; CHECK-NEXT: ret void
22+
%1 = alloca double, align 8
23+
ret void
24+
}
25+
26+
; Function Attrs: alwaysinline mustprogress uwtable
27+
define linkonce_odr dso_local ptr @_ZN4sycl3_V13ext5intel12experimental5esimd15lsc_block_fobarIdLi64ELNS4_13lsc_data_sizeE0ELNS4_10cache_hintE0ELS7_0ENS2_5esimd6detail26dqword_element_aligned_tagEEENSt9enable_ifIXsr4sycl3ext5intel5esimdE19is_simd_flag_type_vIT4_EEvE4typeEPT_NS8_4simdISF_XT0_EEENS9_14simd_mask_implItLi1EEESC_() {
28+
; CHECK: lsc_block_fobar
29+
; CHECK-NEXT: ret ptr null
30+
%1 = alloca double, align 8
31+
ret ptr %1
32+
}

sycl/include/sycl/ext/intel/esimd/detail/defines_elementary.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,11 @@
3737
// each work-item is mapped to a separate OS thread on host device.
3838
#define ESIMD_PRIVATE thread_local
3939
#define ESIMD_REGISTER(n)
40-
40+
#ifdef __ESIMD_BUILD_HOST_CODE
4141
#define __ESIMD_API ESIMD_INLINE
42+
#else // __ESIMD_BUILD_HOST_CODE
43+
#define __ESIMD_API ESIMD_NOINLINE __attribute__((internal_linkage))
44+
#endif // __ESIMD_BUILD_HOST_CODE
4245
#endif // __SYCL_DEVICE_ONLY__
4346

4447
// Mark a function being noinline

sycl/include/sycl/ext/intel/esimd/detail/util.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
#ifdef __SYCL_DEVICE_ONLY__
2121
#define __ESIMD_INTRIN __DPCPP_SYCL_EXTERNAL SYCL_ESIMD_FUNCTION
22+
#elif !defined(__ESIMD_BUILD_HOST_CODE)
23+
#define __ESIMD_INTRIN ESIMD_NOINLINE __attribute__((internal_linkage))
2224
#else
2325
#define __ESIMD_INTRIN inline
2426
#endif // __SYCL_DEVICE_ONLY__

sycl/test-e2e/ESIMD/no_host_code.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
//==---------------- no_host_code.cpp - DPC++ ESIMD on-device test --------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// UNSUPPORTED: esimd_emulator
9+
// RUN: %{build} -fno-sycl-esimd-build-host-code -o %t.out
10+
// RUN: %{run} %t.out
11+
12+
#include "BitonicSortK.hpp"

0 commit comments

Comments
 (0)