Skip to content

Commit 8083f8a

Browse files
authored
[SYCL] Enable Auto GRF by default on PVC on Linux (#11967)
Enable auto GRF mode by default on PVC on Linux. We are not doing this for Windows because the driver doesn't support it. We have run extensive internal testing and are convinced this will not cause performance regressions, and will improve performance in some cases. You can revert to the previous behavior with `-ftarget-register-alloc-mode=pvc:default`, and we will monitor performance tracking after this and react as necessary. Signed-off-by: Sarnie, Nick <[email protected]>
1 parent 69e2b91 commit 8083f8a

File tree

10 files changed

+52
-22
lines changed

10 files changed

+52
-22
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9785,7 +9785,8 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
97859785
if (TC.getTriple().getSubArch() == llvm::Triple::NoSubArch) {
97869786
// Only store compile/link opts in the image descriptor for the SPIR-V
97879787
// target; AOT compilation has already been performed otherwise.
9788-
TC.AddImpliedTargetArgs(TT, TCArgs, BuildArgs, JA);
9788+
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
9789+
TC.AddImpliedTargetArgs(TT, TCArgs, BuildArgs, JA, *HostTC);
97899790
TC.TranslateBackendTargetArgs(TT, TCArgs, BuildArgs);
97909791
createArgString("-compile-opts=");
97919792
BuildArgs.clear();

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -593,8 +593,9 @@ void SYCL::fpga::BackendCompiler::constructOpenCLAOTCommand(
593593
// Add any implied arguments before user defined arguments.
594594
const toolchains::SYCLToolChain &TC =
595595
static_cast<const toolchains::SYCLToolChain &>(getToolChain());
596+
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
596597
llvm::Triple CPUTriple("spir64_x86_64");
597-
TC.AddImpliedTargetArgs(CPUTriple, Args, CmdArgs, JA);
598+
TC.AddImpliedTargetArgs(CPUTriple, Args, CmdArgs, JA, *HostTC);
598599
// Add the target args passed in
599600
TC.TranslateBackendTargetArgs(CPUTriple, Args, CmdArgs);
600601
TC.TranslateLinkerTargetArgs(CPUTriple, Args, CmdArgs);
@@ -751,7 +752,9 @@ void SYCL::fpga::BackendCompiler::ConstructJob(
751752
Twine("-output-report-folder=") + ReportOptArg));
752753

753754
// Add any implied arguments before user defined arguments.
754-
TC.AddImpliedTargetArgs(getToolChain().getTriple(), Args, CmdArgs, JA);
755+
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
756+
TC.AddImpliedTargetArgs(getToolChain().getTriple(), Args, CmdArgs, JA,
757+
*HostTC);
755758

756759
// Add -Xsycl-target* options.
757760
TC.TranslateBackendTargetArgs(getToolChain().getTriple(), Args, CmdArgs);
@@ -812,7 +815,9 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C,
812815
// Add -Xsycl-target* options.
813816
const toolchains::SYCLToolChain &TC =
814817
static_cast<const toolchains::SYCLToolChain &>(getToolChain());
815-
TC.AddImpliedTargetArgs(getToolChain().getTriple(), Args, CmdArgs, JA);
818+
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
819+
TC.AddImpliedTargetArgs(getToolChain().getTriple(), Args, CmdArgs, JA,
820+
*HostTC);
816821
TC.TranslateBackendTargetArgs(getToolChain().getTriple(), Args, CmdArgs,
817822
Device);
818823
TC.TranslateLinkerTargetArgs(getToolChain().getTriple(), Args, CmdArgs);
@@ -981,8 +986,9 @@ void SYCL::x86_64::BackendCompiler::ConstructJob(
981986
// Add -Xsycl-target* options.
982987
const toolchains::SYCLToolChain &TC =
983988
static_cast<const toolchains::SYCLToolChain &>(getToolChain());
984-
985-
TC.AddImpliedTargetArgs(getToolChain().getTriple(), Args, CmdArgs, JA);
989+
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
990+
TC.AddImpliedTargetArgs(getToolChain().getTriple(), Args, CmdArgs, JA,
991+
*HostTC);
986992
TC.TranslateBackendTargetArgs(getToolChain().getTriple(), Args, CmdArgs);
987993
TC.TranslateLinkerTargetArgs(getToolChain().getTriple(), Args, CmdArgs);
988994
SmallString<128> ExecPath(
@@ -1146,7 +1152,8 @@ void SYCLToolChain::TranslateTargetOpt(const llvm::opt::ArgList &Args,
11461152
void SYCLToolChain::AddImpliedTargetArgs(const llvm::Triple &Triple,
11471153
const llvm::opt::ArgList &Args,
11481154
llvm::opt::ArgStringList &CmdArgs,
1149-
const JobAction &JA) const {
1155+
const JobAction &JA,
1156+
const ToolChain &HostTC) const {
11501157
// Current implied args are for debug information and disabling of
11511158
// optimizations. They are passed along to the respective areas as follows:
11521159
// FPGA: -g -cl-opt-disable
@@ -1201,6 +1208,19 @@ void SYCLToolChain::AddImpliedTargetArgs(const llvm::Triple &Triple,
12011208
RegAllocModeVal.split(RegAllocModeArgs, ',');
12021209
for (StringRef Elem : RegAllocModeArgs)
12031210
ProcessElement(Elem);
1211+
} else if (!HostTC.getTriple().isWindowsMSVCEnvironment()) {
1212+
// If -ftarget-register-alloc-mode is not specified, the default is
1213+
// pvc:default on Windows and and pvc:auto otherwise.
1214+
StringRef DeviceName = "pvc";
1215+
StringRef BackendOptName = SYCL::gen::getGenGRFFlag("auto");
1216+
if (IsGen)
1217+
PerDeviceArgs.push_back(
1218+
{DeviceName, Args.MakeArgString("-options " + BackendOptName)});
1219+
else if (Triple.isSPIR() &&
1220+
Triple.getSubArch() == llvm::Triple::NoSubArch) {
1221+
BeArgs.push_back(Args.MakeArgString(RegAllocModeOptName + DeviceName +
1222+
":" + BackendOptName));
1223+
}
12041224
}
12051225
if (IsGen) {
12061226
// For GEN (spir64_gen) we have implied -device settings given usage

clang/lib/Driver/ToolChains/SYCL.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain {
173173
void AddImpliedTargetArgs(const llvm::Triple &Triple,
174174
const llvm::opt::ArgList &Args,
175175
llvm::opt::ArgStringList &CmdArgs,
176-
const JobAction &JA) const;
176+
const JobAction &JA, const ToolChain &HostTC) const;
177177
void TranslateBackendTargetArgs(const llvm::Triple &Triple,
178178
const llvm::opt::ArgList &Args,
179179
llvm::opt::ArgStringList &CmdArgs,

clang/test/Driver/ftarget-compile-fast.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,4 @@
1717
// RUN: -ftarget-compile-fast %s 2>&1 \
1818
// RUN: | FileCheck -check-prefix=TARGET_COMPILE_FAST_JIT %s
1919

20-
// TARGET_COMPILE_FAST_JIT: clang-offload-wrapper{{.*}} "-compile-opts=-ftarget-compile-fast
20+
// TARGET_COMPILE_FAST_JIT: clang-offload-wrapper{{.*}} "-compile-opts={{.*}}-ftarget-compile-fast

clang/test/Driver/sycl-ftarget-register-alloc-mode.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
// RUN: -fsycl-targets=spir64_gen -ftarget-register-alloc-mode=pvc:default %s 2>&1 \
1717
// RUN: | FileCheck -check-prefix=DEFAULT_AOT %s
1818

19+
// RUN: %clang -### -fsycl \
20+
// RUN: -fsycl-targets=spir64_gen %s 2>&1 \
21+
// RUN: | FileCheck %if system-windows %{ -check-prefix=DEFAULT_AOT %} %else %{ -check-prefix=AUTO_AOT %} %s
22+
1923
// RUN: %clang -### -fsycl \
2024
// RUN: -fsycl-targets=spir64_gen -ftarget-register-alloc-mode=pvc:small,pvc:large %s 2>&1 \
2125
// RUN: | FileCheck -check-prefix=MULTIPLE_ARGS_AOT %s
@@ -36,6 +40,9 @@
3640
// RUN: -ftarget-register-alloc-mode=pvc:default %s 2>&1 \
3741
// RUN: | FileCheck -check-prefix=DEFAULT_JIT %s
3842

43+
// RUN: %clang -### -fsycl %s 2>&1 \
44+
// RUN: | FileCheck %if system-windows %{ -check-prefix=DEFAULT_JIT %} %else %{ -check-prefix=AUTO_JIT %} %s
45+
3946
// RUN: %clang -### -fsycl \
4047
// RUN: -ftarget-register-alloc-mode=pvc:small,pvc:large %s 2>&1 \
4148
// RUN: | FileCheck -check-prefix=MULTIPLE_ARGS_JIT %s

clang/test/Driver/sycl-offload-static-lib-2.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,6 @@
240240
// STATIC_LIB_NOSRC-CUDA: llvm-foreach{{.*}}ptxas{{.*}} "--output-file" "[[CUBINLIST:.+]]"{{.*}} "[[PTXLIST]]"
241241
// STATIC_LIB_NOSRC-CUDA: llvm-foreach{{.*}}fatbin{{.*}} "--create" "[[OBJLIST:.+]]"{{.*}} "--image={{.*}}[[PTXLIST]]" "--image={{.*}}[[CUBINLIST]]"
242242
// STATIC_LIB_NOSRC: file-table-tform{{.*}} "-o" "[[TABLE1:.+\.table]]" "[[TABLE]]" "[[OBJLIST]]"
243-
// STATIC_LIB_NOSRC: clang-offload-wrapper{{.*}} "-o=[[BCFILE2:.+\.bc]]" "-host=x86_64-unknown-linux-gnu" "-target=[[TARGET]]" "-kind=sycl" "-batch" "[[TABLE1]]"
243+
// STATIC_LIB_NOSRC: clang-offload-wrapper{{.*}} "-o=[[BCFILE2:.+\.bc]]" "-host=x86_64-unknown-linux-gnu"{{.*}}"-target=[[TARGET]]" "-kind=sycl" "-batch" "[[TABLE1]]"
244244
// STATIC_LIB_NOSRC: llc{{.*}} "-filetype=obj" "-o" "[[FINALOBJ:.+\.o]]" "[[BCFILE2]]"
245245
// STATIC_LIB_NOSRC: ld{{.*}} "-L/dummy/dir" {{.*}} "{{.*}}_lib.{{(a|lo)}}" "[[FINALOBJ]]"

clang/test/Driver/sycl-offload.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -405,14 +405,14 @@
405405

406406
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64-unknown-unknown -Xsycl-target-backend "-DFOO1 -DFOO2" %s 2>&1 \
407407
// RUN: | FileCheck -check-prefix=CHK-TOOLS-OPTS %s
408-
// CHK-TOOLS-OPTS: clang-offload-wrapper{{.*}} "-compile-opts=-DFOO1 -DFOO2"
408+
// CHK-TOOLS-OPTS: clang-offload-wrapper{{.*}} "-compile-opts={{.*}}-DFOO1 -DFOO2"
409409

410410
/// Check for implied options (-g -O0)
411411
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64-unknown-unknown -g -O0 -Xsycl-target-backend "-DFOO1 -DFOO2" %s 2>&1 \
412412
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-OPTS %s
413413
// RUN: %clang_cl -### -fsycl -fsycl-targets=spir64-unknown-unknown -Zi -Od -Xsycl-target-backend "-DFOO1 -DFOO2" %s 2>&1 \
414414
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-OPTS %s
415-
// CHK-TOOLS-IMPLIED-OPTS: clang-offload-wrapper{{.*}} "-compile-opts=-g -DFOO1 -DFOO2"
415+
// CHK-TOOLS-IMPLIED-OPTS: clang-offload-wrapper{{.*}} "-compile-opts=-g{{.*}}-DFOO1 -DFOO2"
416416

417417
/// Check for implied options (-O0)
418418
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64 -O0 %s 2>&1 \

clang/test/Driver/sycl-offload.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@
120120
// RUN: | FileCheck -check-prefixes=SYCL_TARGET_OPT %s
121121
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -Xsycl-target-backend=spir64 -DFOO -Xsycl-target-linker=spir64 -DFOO2 %S/Inputs/SYCL/objlin64.o 2>&1 \
122122
// RUN: | FileCheck -check-prefixes=SYCL_TARGET_OPT %s
123-
// SYCL_TARGET_OPT: clang-offload-wrapper{{.*}} "-compile-opts=-DFOO" "-link-opts=-DFOO2"
123+
// SYCL_TARGET_OPT: clang-offload-wrapper{{.*}} "-compile-opts={{.*}}-DFOO" "-link-opts=-DFOO2"
124124
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_x86_64 -Xsycl-target-backend -DFOO %S/Inputs/SYCL/objlin64.o 2>&1 \
125125
// RUN: | FileCheck -check-prefixes=SYCL_TARGET_OPT_AOT %s
126126
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend -DFOO %S/Inputs/SYCL/objlin64.o 2>&1 \

clang/test/Driver/sycl-oneapi-gpu.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -403,9 +403,9 @@
403403
// RUN: -fno-sycl-device-lib=all -fno-sycl-instrument-device-code \
404404
// RUN: -target x86_64-unknown-linux-gnu -### %s 2>&1 | \
405405
// RUN: FileCheck %s --check-prefix=CHECK_TOOLS_BEOPTS
406-
// CHECK_TOOLS_BEOPTS: ocloc{{.*}} "-device" "dg1" "-DDG1"
407-
// CHECK_TOOLS_BEOPTS: ocloc{{.*}} "-device" "skl" "-DSKL"
408-
// CHECK_TOOLS_BEOPTS: ocloc{{.*}} "-device" "skl" "-DSKL2"
406+
// CHECK_TOOLS_BEOPTS: ocloc{{.*}} "-device" "dg1"{{.*}}"-DDG1"
407+
// CHECK_TOOLS_BEOPTS: ocloc{{.*}} "-device" "skl"{{.*}}"-DSKL"
408+
// CHECK_TOOLS_BEOPTS: ocloc{{.*}} "-device" "skl"{{.*}}"-DSKL2"
409409

410410
/// Check that ocloc backend option settings only occur for the expected
411411
/// toolchains when mixing intel_gpu and non-spir64_gen targets
@@ -416,7 +416,7 @@
416416
// RUN: -fno-sycl-device-lib=all -fno-sycl-instrument-device-code \
417417
// RUN: -target x86_64-unknown-linux-gnu -### %s 2>&1 | \
418418
// RUN: FileCheck %s --check-prefix=CHECK_TOOLS_BEOPTS_MIX
419-
// CHECK_TOOLS_BEOPTS_MIX: ocloc{{.*}} "-device" "dg1" "-DDG1"
419+
// CHECK_TOOLS_BEOPTS_MIX: ocloc{{.*}} "-device" "dg1"{{.*}}"-DDG1"
420420
// CHECK_TOOLS_BEOPTS_MIX: opencl-aot{{.*}} "-DCPU"
421421
// CHECK_TOOLS_BEOPTS_MIX-NOT: "-DDG1"
422-
// CHECK_TOOLS_BEOPTS_MIX: ocloc{{.*}} "-device" "skl" "-DSKL2"
422+
// CHECK_TOOLS_BEOPTS_MIX: ocloc{{.*}} "-device" "skl"{{.*}}"-DSKL2"

sycl/test-e2e/KernelAndProgram/target_register_alloc_mode.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22

33
// RUN: %{build} -ftarget-register-alloc-mode=pvc:auto -o %t_with.out
44
// RUN: %{build} -o %t_without.out
5+
// RUN: %{build} -ftarget-register-alloc-mode=pvc:default -o %t_default.out
56

6-
// RUN: env SYCL_PI_TRACE=-1 %{run} %t_with.out 2>&1 | FileCheck --check-prefix=CHECK-WITH %s
7-
// RUN: env SYCL_PI_TRACE=-1 %{run} %t_without.out 2>&1 | FileCheck --implicit-check-not=-ze-intel-enable-auto-large-GRF-mode %s
7+
// RUN: env SYCL_PI_TRACE=-1 %{run} %t_with.out 2>&1 | FileCheck --check-prefix=CHECK-OPT %s
8+
// RUN: env SYCL_PI_TRACE=-1 %{run} %t_without.out 2>&1 | FileCheck %if system-windows %{ --implicit-check-not=-ze-intel-enable-auto-large-GRF-mode %} %else %{ --check-prefix=CHECK-OPT %} %s
9+
// RUN: env SYCL_PI_TRACE=-1 %{run} %t_default.out 2>&1 | FileCheck --implicit-check-not=-ze-intel-enable-auto-large-GRF-mode %s
810

9-
// CHECK-WITH: ---> piProgramBuild(
10-
// CHECK-WITH: -ze-intel-enable-auto-large-GRF-mode
11+
// CHECK-OPT: ---> piProgramBuild(
12+
// CHECK-OPT: -ze-intel-enable-auto-large-GRF-mode
1113

1214
#include <sycl/sycl.hpp>
1315

0 commit comments

Comments
 (0)