Skip to content

Commit ee585e9

Browse files
committed
[SYCL] Add support for -fsycl -fsycl-targets and -Xsycl-target.
These are just option hooks and some underlying support but full fledged support is not yet there for -Xsycl-target. This enables an end to end compilation solution to a fat binary containing host object and sycldevice binary. Supports compilation for multiple source files. Signed-off-by: Vladimir Lazarev <[email protected]>
1 parent 08b20c1 commit ee585e9

File tree

17 files changed

+905
-41
lines changed

17 files changed

+905
-41
lines changed

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ def err_drv_Xopenmp_target_missing_triple : Error<
8181
"cannot deduce implicit triple value for -Xopenmp-target, specify triple using -Xopenmp-target=<triple>">;
8282
def err_drv_invalid_Xopenmp_target_with_args : Error<
8383
"invalid -Xopenmp-target argument: '%0', options requiring arguments are unsupported">;
84+
def err_drv_Xsycl_target_missing_triple : Error<
85+
"cannot deduce implicit triple value for -Xsycl-target, specify triple using -Xsycl-target=<triple>">;
86+
def err_drv_invalid_Xsycl_target_with_args : Error<
87+
"invalid -Xsycl-target argument: '%0', options requiring arguments are unsupported">;
8488
def err_drv_argument_only_allowed_with : Error<
8589
"invalid argument '%0' only allowed with '%1'">;
8690
def err_drv_argument_not_allowed_with : Error<
@@ -204,15 +208,21 @@ def err_drv_optimization_remark_pattern : Error<
204208
"%0 in '%1'">;
205209
def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">;
206210
def err_drv_invalid_omp_target : Error<"OpenMP target is invalid: '%0'">;
211+
def err_drv_invalid_sycl_target : Error<"SYCL target is invalid: '%0'">;
207212
def err_drv_omp_host_ir_file_not_found : Error<
208213
"The provided host compiler IR file '%0' is required to generate code for OpenMP target regions but cannot be found.">;
209214
def err_drv_omp_host_target_not_supported : Error<
210215
"The target '%0' is not a supported OpenMP host target.">;
211216
def err_drv_expecting_fopenmp_with_fopenmp_targets : Error<
212217
"The option -fopenmp-targets must be used in conjunction with a -fopenmp option compatible with offloading, please use -fopenmp=libomp or -fopenmp=libiomp5.">;
218+
def err_drv_expecting_fsycl_with_fsycl_targets : Error<
219+
"The option -fsycl-targets must be used in conjunction with -fsycl to enable offloading.">;
213220
def warn_drv_omp_offload_target_duplicate : Warning<
214221
"The OpenMP offloading target '%0' is similar to target '%1' already specified - will be ignored.">,
215222
InGroup<OpenMPTarget>;
223+
def warn_drv_sycl_offload_target_duplicate : Warning<
224+
"The SYCL offloading target '%0' is similar to target '%1' already specified - will be ignored.">,
225+
InGroup<SyclTarget>;
216226
def warn_drv_omp_offload_target_missingbcruntime : Warning<
217227
"No library '%0' found in the default clang lib directory or in LIBRARY_PATH. Expect degraded performance due to no inlining of runtime functions on target devices.">,
218228
InGroup<OpenMPTarget>;

clang/include/clang/Basic/DiagnosticGroups.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,9 @@ def OpenMPClauses : DiagGroup<"openmp-clauses">;
995995
def OpenMPLoopForm : DiagGroup<"openmp-loop-form">;
996996
def OpenMPTarget : DiagGroup<"openmp-target">;
997997

998+
// SYCL warnings
999+
def SyclTarget : DiagGroup<"sycl-target">;
1000+
9981001
// Backend warnings.
9991002
def BackendInlineAsm : DiagGroup<"inline-asm">;
10001003
def BackendFrameLargerThanEQ : DiagGroup<"frame-larger-than=">;

clang/include/clang/Driver/Action.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ class Action {
9090
OFK_Cuda = 0x02,
9191
OFK_OpenMP = 0x04,
9292
OFK_HIP = 0x08,
93+
OFK_SYCL = 0x10
9394
};
9495

9596
static const char *getClassName(ActionClass AC);

clang/include/clang/Driver/Options.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,11 @@ def Xopenmp_target : Separate<["-"], "Xopenmp-target">,
473473
def Xopenmp_target_EQ : JoinedAndSeparate<["-"], "Xopenmp-target=">,
474474
HelpText<"Pass <arg> to the target offloading toolchain identified by <triple>.">,
475475
MetaVarName<"<triple> <arg>">;
476+
def Xsycl_target : Separate<["-"], "Xsycl-target">,
477+
HelpText<"Pass <arg> to the SYCL based target offloading toolchain.">, MetaVarName<"<arg>">;
478+
def Xsycl_target_EQ : JoinedAndSeparate<["-"], "Xsycl-target=">,
479+
HelpText<"Pass <arg> to the SYCL based target offloading toolchain identified by <triple>.">,
480+
MetaVarName<"<triple> <arg>">;
476481
def z : Separate<["-"], "z">, Flags<[LinkerInput, RenderAsInput]>,
477482
HelpText<"Pass -z <arg> to the linker">, MetaVarName<"<arg>">,
478483
Group<Link_Group>;
@@ -1677,6 +1682,11 @@ def fstrict_vtable_pointers: Flag<["-"], "fstrict-vtable-pointers">,
16771682
HelpText<"Enable optimizations based on the strict rules for overwriting "
16781683
"polymorphic C++ objects">;
16791684
def fstrict_overflow : Flag<["-"], "fstrict-overflow">, Group<f_Group>;
1685+
def fsycl : Flag<["-"], "fsycl">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
1686+
HelpText<"generate SYCL code.">;
1687+
def fno_sycl : Flag<["-"], "fno-sycl">, Group<f_Group>, Flags<[NoArgumentUnused]>;
1688+
def fsycl_targets_EQ : CommaJoined<["-"], "fsycl-targets=">, Flags<[DriverOption, CC1Option]>,
1689+
HelpText<"Specify comma-separated list of triples SYCL offloading targets to be supported">;
16801690
def fsyntax_only : Flag<["-"], "fsyntax-only">,
16811691
Flags<[DriverOption,CoreOption,CC1Option]>, Group<Action_Group>;
16821692
def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group<f_Group>;

clang/include/clang/Driver/ToolChain.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -273,12 +273,14 @@ class ToolChain {
273273
return nullptr;
274274
}
275275

276-
/// TranslateOpenMPTargetArgs - Create a new derived argument list for
277-
/// that contains the OpenMP target specific flags passed via
276+
/// TranslateOffloadTargetArgs - Create a new derived argument list for
277+
/// that contains the Offloat target specific flags passed via
278278
/// -Xopenmp-target -opt=val OR -Xopenmp-target=<triple> -opt=val
279-
virtual llvm::opt::DerivedArgList *TranslateOpenMPTargetArgs(
279+
/// Also handles -Xsycl-target OR -Xsycl-target=<triple>
280+
virtual llvm::opt::DerivedArgList *TranslateOffloadTargetArgs(
280281
const llvm::opt::DerivedArgList &Args, bool SameTripleAsHost,
281-
SmallVectorImpl<llvm::opt::Arg *> &AllocatedArgs) const;
282+
SmallVectorImpl<llvm::opt::Arg *> &AllocatedArgs,
283+
Action::OffloadKind DeviceOffloadKind) const;
282284

283285
/// Choose a tool to use to handle the action \p JA.
284286
///

clang/include/clang/Driver/Types.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,5 +102,6 @@ TYPE("dSYM", dSYM, INVALID, "dSYM", "A")
102102
TYPE("dependencies", Dependencies, INVALID, "d", "")
103103
TYPE("cuda-fatbin", CUDA_FATBIN, INVALID, "fatbin","A")
104104
TYPE("spirv", SPIRV, INVALID, "spv", "")
105+
TYPE("sycl-header", SYCL_Header, INVALID, "h", "")
105106
TYPE("hip-fatbin", HIP_FATBIN, INVALID, "hipfb", "A")
106107
TYPE("none", Nothing, INVALID, nullptr, "u")

clang/lib/Driver/Action.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ std::string Action::getOffloadingKindPrefix() const {
9999
return "device-openmp";
100100
case OFK_HIP:
101101
return "device-hip";
102+
case OFK_SYCL:
103+
return "device-sycl";
102104

103105
// TODO: Add other programming models here.
104106
}
@@ -116,6 +118,8 @@ std::string Action::getOffloadingKindPrefix() const {
116118
Res += "-hip";
117119
if (ActiveOffloadKindMask & OFK_OpenMP)
118120
Res += "-openmp";
121+
if (ActiveOffloadKindMask & OFK_SYCL)
122+
Res += "-sycl";
119123

120124
// TODO: Add other programming models here.
121125

@@ -152,6 +156,8 @@ StringRef Action::GetOffloadKindName(OffloadKind Kind) {
152156
return "openmp";
153157
case OFK_HIP:
154158
return "hip";
159+
case OFK_SYCL:
160+
return "sycl";
155161

156162
// TODO: Add other programming models here.
157163
}

clang/lib/Driver/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ add_clang_library(clangDriver
6464
ToolChains/TCE.cpp
6565
ToolChains/WebAssembly.cpp
6666
ToolChains/XCore.cpp
67+
ToolChains/SYCL.cpp
6768
Types.cpp
6869
XRayArgs.cpp
6970

clang/lib/Driver/Compilation.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,14 @@ Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch,
6969
if (!Entry) {
7070
SmallVector<Arg *, 4> AllocatedArgs;
7171
DerivedArgList *OpenMPArgs = nullptr;
72-
// Translate OpenMP toolchain arguments provided via the -Xopenmp-target flags.
73-
if (DeviceOffloadKind == Action::OFK_OpenMP) {
72+
// Translate OpenMP toolchain arguments provided via the -Xopenmp-target
73+
// or -Xsycl-target flags.
74+
if (DeviceOffloadKind == Action::OFK_OpenMP ||
75+
DeviceOffloadKind == Action::OFK_SYCL) {
7476
const ToolChain *HostTC = getSingleOffloadToolChain<Action::OFK_Host>();
7577
bool SameTripleAsHost = (TC->getTriple() == HostTC->getTriple());
76-
OpenMPArgs = TC->TranslateOpenMPTargetArgs(
77-
*TranslatedArgs, SameTripleAsHost, AllocatedArgs);
78+
OpenMPArgs = TC->TranslateOffloadTargetArgs(
79+
*TranslatedArgs, SameTripleAsHost, AllocatedArgs, DeviceOffloadKind);
7880
}
7981

8082
if (!OpenMPArgs) {

clang/lib/Driver/Driver.cpp

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "ToolChains/TCE.h"
4444
#include "ToolChains/WebAssembly.h"
4545
#include "ToolChains/XCore.h"
46+
#include "ToolChains/SYCL.h"
4647
#include "clang/Basic/Version.h"
4748
#include "clang/Config/config.h"
4849
#include "clang/Driver/Action.h"
@@ -693,6 +694,61 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
693694
<< OpenMPTargets->getAsString(C.getInputArgs());
694695
}
695696

697+
//
698+
// SYCL
699+
//
700+
// We need to generate a SYCL toolchain if the user specified targets with
701+
// the -fsycl-targets option.
702+
if (Arg *SYCLTargets =
703+
C.getInputArgs().getLastArg(options::OPT_fsycl_targets_EQ)) {
704+
if (SYCLTargets->getNumValues()) {
705+
// We expect that -fsycl-targets is always used in conjunction with the
706+
// -fsycl option
707+
bool HasValidSYCLRuntime = C.getInputArgs().hasFlag(
708+
options::OPT_fsycl, options::OPT_fno_sycl, false);
709+
710+
if (HasValidSYCLRuntime) {
711+
llvm::StringMap<const char *> FoundNormalizedTriples;
712+
for (const char *Val : SYCLTargets->getValues()) {
713+
llvm::Triple TT(Val);
714+
std::string NormalizedName = TT.normalize();
715+
716+
// Make sure we don't have a duplicate triple.
717+
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
718+
if (Duplicate != FoundNormalizedTriples.end()) {
719+
Diag(clang::diag::warn_drv_sycl_offload_target_duplicate)
720+
<< Val << Duplicate->second;
721+
continue;
722+
}
723+
724+
// Store the current triple so that we can check for duplicates in the
725+
// following iterations.
726+
FoundNormalizedTriples[NormalizedName] = Val;
727+
728+
// If the specified target is invalid, emit a diagnostic.
729+
if (TT.getArch() == llvm::Triple::UnknownArch)
730+
Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
731+
else {
732+
const ToolChain *HostTC =
733+
C.getSingleOffloadToolChain<Action::OFK_Host>();
734+
const llvm::Triple &HostTriple = HostTC->getTriple();
735+
// Use the SYCL and host triples as the key into the ToolChains map,
736+
// because the device toolchain we create depends on both.
737+
auto &SYCLTC = ToolChains[TT.str() + "/" + HostTriple.str()];
738+
if (!SYCLTC) {
739+
SYCLTC = llvm::make_unique<toolchains::SYCLToolChain>(
740+
*this, TT, *HostTC, C.getInputArgs());
741+
}
742+
C.addOffloadDeviceToolChain(SYCLTC.get(), Action::OFK_SYCL);
743+
}
744+
}
745+
} else
746+
Diag(clang::diag::err_drv_expecting_fsycl_with_fsycl_targets);
747+
} else
748+
Diag(clang::diag::warn_drv_empty_joined_argument)
749+
<< SYCLTargets->getAsString(C.getInputArgs());
750+
}
751+
696752
//
697753
// TODO: Add support for other offloading programming models here.
698754
//
@@ -2852,6 +2908,154 @@ class OffloadingActionBuilder final {
28522908
}
28532909
};
28542910

2911+
/// SYCL action builder. The host bitcode is passed to the device frontend
2912+
/// and all the device linked images are passed to the host link phase.
2913+
/// SPIR related are wrapped before added to the fat binary
2914+
class SYCLActionBuilder final : public DeviceActionBuilder {
2915+
/// The SYCL actions for the current input.
2916+
ActionList SYCLDeviceActions;
2917+
2918+
/// The linker inputs obtained for each toolchain.
2919+
SmallVector<ActionList, 8> DeviceLinkerInputs;
2920+
2921+
/// The compiler inputs obtained for each toolchain
2922+
Action * DeviceCompilerInput = nullptr;
2923+
2924+
public:
2925+
SYCLActionBuilder(Compilation &C, DerivedArgList &Args,
2926+
const Driver::InputList &Inputs)
2927+
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL) {}
2928+
2929+
ActionBuilderReturnCode
2930+
getDeviceDependences(OffloadAction::DeviceDependences &DA,
2931+
phases::ID CurPhase, phases::ID FinalPhase,
2932+
PhasesTy &Phases) override {
2933+
2934+
// We should always have an action for each input.
2935+
assert(SYCLDeviceActions.size() == ToolChains.size() &&
2936+
"Number of SYCL actions and toolchains do not match.");
2937+
2938+
// FIXME: This adds the integrated header generation pass before the
2939+
// Host compilation pass so the Host can use the header generated. This
2940+
// can be improved upon to where the header generation and spv generation
2941+
// is done in the same step. Currently, its not too efficient.
2942+
// The host depends on the generated integrated header from the device
2943+
// compilation.
2944+
if (CurPhase == phases::Compile) {
2945+
for (Action *&A : SYCLDeviceActions) {
2946+
DeviceCompilerInput =
2947+
C.MakeAction<CompileJobAction>(A, types::TY_SYCL_Header);
2948+
}
2949+
DA.add(*DeviceCompilerInput, *ToolChains.front(), /*BoundArch=*/nullptr,
2950+
Action::OFK_SYCL);
2951+
// Clear the input file, it is already a dependence to a host
2952+
// action.
2953+
DeviceCompilerInput = nullptr;
2954+
}
2955+
2956+
// The host only depends on device action in the linking phase, when all
2957+
// the device images have to be embedded in the host image.
2958+
if (CurPhase == phases::Link) {
2959+
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
2960+
"Toolchains and linker inputs sizes do not match.");
2961+
auto LI = DeviceLinkerInputs.begin();
2962+
for (auto *A : SYCLDeviceActions) {
2963+
LI->push_back(A);
2964+
++LI;
2965+
}
2966+
2967+
// We passed the device action as a host dependence, so we don't need to
2968+
// do anything else with them.
2969+
SYCLDeviceActions.clear();
2970+
return ABRT_Success;
2971+
}
2972+
2973+
// By default, we produce an action for each device arch.
2974+
for (Action *&A : SYCLDeviceActions) {
2975+
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A,
2976+
AssociatedOffloadKind);
2977+
}
2978+
2979+
return ABRT_Success;
2980+
}
2981+
2982+
ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {
2983+
2984+
// If this is an input action replicate it for each SYCL toolchain.
2985+
if (auto *IA = dyn_cast<InputAction>(HostAction)) {
2986+
SYCLDeviceActions.clear();
2987+
for (unsigned I = 0; I < ToolChains.size(); ++I)
2988+
SYCLDeviceActions.push_back(
2989+
C.MakeAction<InputAction>(IA->getInputArg(), IA->getType()));
2990+
return ABRT_Success;
2991+
}
2992+
2993+
// If this is an unbundling action use it as is for each SYCL toolchain.
2994+
if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) {
2995+
SYCLDeviceActions.clear();
2996+
for (unsigned I = 0; I < ToolChains.size(); ++I) {
2997+
SYCLDeviceActions.push_back(UA);
2998+
UA->registerDependentActionInfo(
2999+
ToolChains[I], /*BoundArch=*/StringRef(), Action::OFK_SYCL);
3000+
}
3001+
return ABRT_Success;
3002+
}
3003+
return ABRT_Success;
3004+
}
3005+
3006+
void appendTopLevelActions(ActionList &AL) override {
3007+
if (SYCLDeviceActions.empty())
3008+
return;
3009+
3010+
// We should always have an action for each input.
3011+
assert(SYCLDeviceActions.size() == ToolChains.size() &&
3012+
"Number of SYCL actions and toolchains do not match.");
3013+
3014+
// Append all device actions followed by the proper offload action.
3015+
auto TI = ToolChains.begin();
3016+
for (auto *A : SYCLDeviceActions) {
3017+
OffloadAction::DeviceDependences Dep;
3018+
Dep.add(*A, **TI, /*BoundArch=*/nullptr, Action::OFK_SYCL);
3019+
AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
3020+
++TI;
3021+
}
3022+
// We no longer need the action stored in this builder.
3023+
SYCLDeviceActions.clear();
3024+
}
3025+
3026+
void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
3027+
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
3028+
"Toolchains and linker inputs sizes do not match.");
3029+
3030+
// Append a new link action for each device.
3031+
auto TC = ToolChains.begin();
3032+
for (auto &LI : DeviceLinkerInputs) {
3033+
auto *DeviceLinkAction =
3034+
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
3035+
DA.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
3036+
Action::OFK_SYCL);
3037+
++TC;
3038+
}
3039+
}
3040+
3041+
bool initialize() override {
3042+
// Get the SYCL toolchains. If we don't get any, the action builder will
3043+
// know there is nothing to do related to SYCL offloading.
3044+
auto SYCLTCRange = C.getOffloadToolChains<Action::OFK_SYCL>();
3045+
for (auto TI = SYCLTCRange.first, TE = SYCLTCRange.second; TI != TE;
3046+
++TI)
3047+
ToolChains.push_back(TI->second);
3048+
3049+
DeviceLinkerInputs.resize(ToolChains.size());
3050+
return false;
3051+
}
3052+
3053+
bool canUseBundlerUnbundler() const override {
3054+
// SYCL should use bundled files whenever possible.
3055+
return true;
3056+
}
3057+
};
3058+
28553059
///
28563060
/// TODO: Add the implementation for other specialized builders here.
28573061
///
@@ -2879,6 +3083,9 @@ class OffloadingActionBuilder final {
28793083
// Create a specialized builder for OpenMP.
28803084
SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs));
28813085

3086+
// Create a specialized builder for SYCL.
3087+
SpecializedBuilders.push_back(new SYCLActionBuilder(C, Args, Inputs));
3088+
28823089
//
28833090
// TODO: Build other specialized builders here.
28843091
//

0 commit comments

Comments
 (0)