Skip to content

Commit 92855be

Browse files
authored
[SYCL] Support -ftarget-compile-fast in JIT mode (#9516)
This option is currently only supported for AOT mode, and it's implemented in the driver by passing an `-igc_opts` string to ocloc. Support this in JIT to by passing the FE option to `clang-offload-wrapper` so it's stored in the binary and will get used at runtime. At runtime, check with the backend to get the correct backend flag, and use that. If the device is not an Intel GPU, strip the FE flag. --------- Signed-off-by: Sarnie, Nick <[email protected]>
1 parent 453ffdc commit 92855be

File tree

8 files changed

+95
-9
lines changed

8 files changed

+95
-9
lines changed

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -963,7 +963,6 @@ void SYCLToolChain::AddImpliedTargetArgs(const llvm::Triple &Triple,
963963
if (Arg *A = Args.getLastArg(options::OPT_O_Group))
964964
if (A->getOption().matches(options::OPT_O0))
965965
BeArgs.push_back("-cl-opt-disable");
966-
967966
if (IsGen) {
968967
// For GEN (spir64_gen) we have implied -device settings given usage
969968
// of intel_gpu_ as a target. Handle those here, and also check that no
@@ -991,10 +990,14 @@ void SYCLToolChain::AddImpliedTargetArgs(const llvm::Triple &Triple,
991990
CmdArgs.push_back("-device");
992991
CmdArgs.push_back(Args.MakeArgString(DepInfo));
993992
}
994-
// -ftarget-compile-fast
993+
// -ftarget-compile-fast AOT
995994
if (Args.hasArg(options::OPT_ftarget_compile_fast)) {
996995
BeArgs.push_back("-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'");
997996
}
997+
} else if (Triple.getSubArch() == llvm::Triple::NoSubArch &&
998+
Triple.isSPIR()) {
999+
// -ftarget-compile-fast JIT
1000+
Args.AddLastArg(BeArgs, options::OPT_ftarget_compile_fast);
9981001
}
9991002
if (BeArgs.empty())
10001003
return;

clang/test/Driver/ftarget-compile-fast.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,12 @@
99

1010
// TARGET_COMPILE_FAST_GEN: ocloc{{.*}} "-output"
1111
// TARGET_COMPILE_FAST_GEN: "-options" "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"
12+
13+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \
14+
// RUN: -ftarget-compile-fast %s 2>&1 \
15+
// RUN: | FileCheck -check-prefix=TARGET_COMPILE_FAST_JIT %s
16+
// RUN: %clang_cl -### --target=x86_64-pc-windows-msvc -fsycl \
17+
// RUN: -ftarget-compile-fast %s 2>&1 \
18+
// RUN: | FileCheck -check-prefix=TARGET_COMPILE_FAST_JIT %s
19+
20+
// TARGET_COMPILE_FAST_JIT: clang-offload-wrapper{{.*}} "-compile-opts=-ftarget-compile-fast

sycl/doc/UsersManual.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,10 @@ and not recommended to use in production environment.
410410
Also, some of Intel GPUs or GPU run-time/drivers may support only
411411
"stateless" memory accesses.
412412

413+
**`-ftarget-compile-fast`** [EXPERIMENTAL]
414+
Instructs the target backend to reduce compilation time, potentially
415+
at the cost of runtime performance. Currently only supported on Intel GPUs.
416+
413417
# Example: SYCL device code compilation
414418

415419
To invoke SYCL device compiler set `-fsycl-device-only` flag.

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,11 @@ pi_result piPluginGetLastError(char **message) {
5757
}
5858

5959
// Returns plugin specific backend option.
60-
// Current support is only for optimization options.
6160
// Return '-ze-opt-disable' for frontend_option = -O0.
6261
// Return '-ze-opt-level=1' for frontend_option = -O1 or -O2.
6362
// Return '-ze-opt-level=2' for frontend_option = -O3.
63+
// Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for
64+
// frontend_option = -ftarget-compile-fast.
6465
pi_result piPluginGetBackendOption(pi_platform platform,
6566
const char *frontend_option,
6667
const char **backend_option) {

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,6 @@ pi_result piPluginGetLastError(char **message) {
9999
}
100100

101101
// Returns plugin specific backend option.
102-
// Current support is only for optimization options.
103-
// Return '-cl-opt-disable' for frontend_option = -O0 and '' for others.
104102
pi_result piPluginGetBackendOption(pi_platform, const char *frontend_option,
105103
const char **backend_option) {
106104
using namespace std::literals;
@@ -110,6 +108,7 @@ pi_result piPluginGetBackendOption(pi_platform, const char *frontend_option,
110108
*backend_option = "";
111109
return PI_SUCCESS;
112110
}
111+
// Return '-cl-opt-disable' for frontend_option = -O0 and '' for others.
113112
if (!strcmp(frontend_option, "-O0")) {
114113
*backend_option = "-cl-opt-disable";
115114
return PI_SUCCESS;
@@ -119,6 +118,10 @@ pi_result piPluginGetBackendOption(pi_platform, const char *frontend_option,
119118
*backend_option = "";
120119
return PI_SUCCESS;
121120
}
121+
if (frontend_option == "-ftarget-compile-fast"sv) {
122+
*backend_option = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'";
123+
return PI_SUCCESS;
124+
}
122125
return PI_ERROR_INVALID_VALUE;
123126
}
124127

sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_platform.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,8 @@ ur_result_t ur_platform_handle_t_::populateDeviceCacheIfNeeded() {
550550
// Return '-ze-opt-disable' for frontend_option = -O0.
551551
// Return '-ze-opt-level=1' for frontend_option = -O1 or -O2.
552552
// Return '-ze-opt-level=2' for frontend_option = -O3.
553+
// Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for
554+
// frontend_option=-ftarget-compile-fast.
553555
UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption(
554556
ur_platform_handle_t Platform, ///< [in] handle of the platform instance.
555557
const char *FrontendOption, ///< [in] string containing the frontend option.
@@ -578,5 +580,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption(
578580
*PlatformOption = "-ze-opt-level=2";
579581
return UR_RESULT_SUCCESS;
580582
}
583+
if (FrontendOption == "-ftarget-compile-fast"sv) {
584+
*PlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'";
585+
return UR_RESULT_SUCCESS;
586+
}
581587
return UR_RESULT_ERROR_INVALID_VALUE;
582588
}

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -456,17 +456,34 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts,
456456
CompileOpts += std::string(backend_option);
457457
}
458458
}
459-
if ((PlatformImpl->getBackend() == backend::ext_oneapi_level_zero ||
459+
bool IsIntelGPU =
460+
(PlatformImpl->getBackend() == backend::ext_oneapi_level_zero ||
460461
PlatformImpl->getBackend() == backend::opencl) &&
461-
std::all_of(Devs.begin(), Devs.end(),
462-
[](const device &Dev) { return Dev.is_gpu(); }) &&
463-
Img.getDeviceGlobals().size() != 0) {
462+
std::all_of(Devs.begin(), Devs.end(), [](const device &Dev) {
463+
return Dev.is_gpu() &&
464+
Dev.get_info<info::device::vendor_id>() == 0x8086;
465+
});
466+
if (IsIntelGPU && Img.getDeviceGlobals().size() != 0) {
464467
// If the image has device globals we need to add the
465468
// -ze-take-global-address option to tell IGC to record addresses of these.
466469
if (!CompileOpts.empty())
467470
CompileOpts += " ";
468471
CompileOpts += "-ze-take-global-address";
469472
}
473+
if (!CompileOptsEnv) {
474+
static const char *TargetCompileFast = "-ftarget-compile-fast";
475+
if (auto Pos = CompileOpts.find(TargetCompileFast);
476+
Pos != std::string::npos) {
477+
const char *BackendOption = nullptr;
478+
if (IsIntelGPU)
479+
PlatformImpl->getBackendOption(TargetCompileFast, &BackendOption);
480+
auto OptLen = strlen(TargetCompileFast);
481+
if (IsIntelGPU && BackendOption && BackendOption[0] != '\0')
482+
CompileOpts.replace(Pos, OptLen, BackendOption);
483+
else
484+
CompileOpts.erase(Pos, OptLen);
485+
}
486+
}
470487
}
471488

472489
static void applyOptionsFromImage(std::string &CompileOpts,
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// RUN: %{build} -ftarget-compile-fast -o %t_with.out
2+
// RUN: %{build} -o %t_without.out
3+
4+
// RUN: env SYCL_PI_TRACE=-1 %{run} %t_with.out 2>&1 | FileCheck %if !gpu || ext_oneapi_hip || ext_oneapi_cuda %{ --check-prefix=CHECK-WITHOUT %} %else %{ --check-prefix=CHECK-INTEL-GPU-WITH %} %s
5+
// RUN: env SYCL_PI_TRACE=-1 %{run} %t_without.out 2>&1 | FileCheck --implicit-check-not=-igc_opts %s
6+
7+
// CHECK-INTEL-GPU-WITH: ---> piProgramBuild(
8+
// CHECK-INTEL-GPU-WITH: -igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'
9+
10+
// CHECK-WITHOUT: ---> piProgramBuild(
11+
// CHECK-WITHOUT-NOT: -igc_opts
12+
// CHECK-WITHOUT: ) ---> pi_result : PI_SUCCESS
13+
14+
#include <sycl/sycl.hpp>
15+
16+
int main() {
17+
sycl::buffer<size_t, 1> Buffer(4);
18+
19+
sycl::queue Queue;
20+
21+
sycl::range<1> NumOfWorkItems{Buffer.size()};
22+
23+
Queue.submit([&](sycl::handler &cgh) {
24+
sycl::accessor Accessor{Buffer, cgh, sycl::write_only};
25+
cgh.parallel_for<class FillBuffer>(NumOfWorkItems, [=](sycl::id<1> WIid) {
26+
Accessor[WIid] = WIid.get(0);
27+
});
28+
});
29+
30+
sycl::host_accessor HostAccessor{Buffer, sycl::read_only};
31+
32+
bool MismatchFound = false;
33+
for (size_t I = 0; I < Buffer.size(); ++I) {
34+
if (HostAccessor[I] != I) {
35+
std::cout << "The result is incorrect for element: " << I
36+
<< " , expected: " << I << " , got: " << HostAccessor[I]
37+
<< std::endl;
38+
MismatchFound = true;
39+
}
40+
}
41+
42+
return MismatchFound;
43+
}

0 commit comments

Comments
 (0)