Skip to content

Commit ed3568d

Browse files
committed
[SYCL] Add support ahead of time compilation support via separate compilation
Adds new options -fsycl-link-targets and -fsycl-add-targets. link-targets gathers the spv device binaries to be used in a separate compilation. That resulting device binary is inserted back in to create the final binary with add-targets Signed-off-by: Vladimir Lazarev <[email protected]>
1 parent 5c727bb commit ed3568d

File tree

5 files changed

+282
-58
lines changed

5 files changed

+282
-58
lines changed

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,14 +209,16 @@ def err_drv_optimization_remark_pattern : Error<
209209
def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">;
210210
def err_drv_invalid_omp_target : Error<"OpenMP target is invalid: '%0'">;
211211
def err_drv_invalid_sycl_target : Error<"SYCL target is invalid: '%0'">;
212+
def err_drv_sycl_target_conflict : Error<"The option -fsycl-targets conflicts with -fsycl-link-targets">;
213+
def err_drv_sycl_add_link_conflict : Error<"The option -fsycl-link-targets conflicts with -fsycl-add-targets">;
212214
def err_drv_omp_host_ir_file_not_found : Error<
213215
"The provided host compiler IR file '%0' is required to generate code for OpenMP target regions but cannot be found.">;
214216
def err_drv_omp_host_target_not_supported : Error<
215217
"The target '%0' is not a supported OpenMP host target.">;
216218
def err_drv_expecting_fopenmp_with_fopenmp_targets : Error<
217219
"The option -fopenmp-targets must be used in conjunction with a -fopenmp option compatible with offloading, please use -fopenmp=libomp or -fopenmp=libiomp5.">;
218220
def err_drv_expecting_fsycl_with_fsycl_targets : Error<
219-
"The option -fsycl-targets must be used in conjunction with -fsycl to enable offloading.">;
221+
"The option -fsycl%0targets must be used in conjunction with -fsycl to enable offloading.">;
220222
def warn_drv_omp_offload_target_duplicate : Warning<
221223
"The OpenMP offloading target '%0' is similar to target '%1' already specified - will be ignored.">,
222224
InGroup<OpenMPTarget>;

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1687,6 +1687,10 @@ def fsycl : Flag<["-"], "fsycl">, Group<f_Group>, Flags<[CC1Option, NoArgumentUn
16871687
def fno_sycl : Flag<["-"], "fno-sycl">, Group<f_Group>, Flags<[NoArgumentUnused]>;
16881688
def fsycl_targets_EQ : CommaJoined<["-"], "fsycl-targets=">, Flags<[DriverOption, CC1Option]>,
16891689
HelpText<"Specify comma-separated list of triples SYCL offloading targets to be supported">;
1690+
def fsycl_add_targets_EQ : CommaJoined<["-"], "fsycl-add-targets=">, Flags<[DriverOption]>,
1691+
HelpText<"Specify comma-separated list of triple and device binary image pairs to add to the final SYCL binary">;
1692+
def fsycl_link_targets_EQ : CommaJoined<["-"], "fsycl-link-targets=">, Flags<[DriverOption, CC1Option]>,
1693+
HelpText<"Specify comma-separated list of triples SYCL offloading targets to produce linked device images">;
16901694
def fsyntax_only : Flag<["-"], "fsyntax-only">,
16911695
Flags<[DriverOption,CoreOption,CC1Option]>, Group<Action_Group>;
16921696
def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group<f_Group>;

clang/include/clang/Driver/Types.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,5 +103,6 @@ TYPE("dependencies", Dependencies, INVALID, "d", "")
103103
TYPE("cuda-fatbin", CUDA_FATBIN, INVALID, "fatbin","A")
104104
TYPE("spirv", SPIRV, INVALID, "spv", "")
105105
TYPE("sycl-header", SYCL_Header, INVALID, "h", "")
106+
TYPE("sycl-fatbin", SYCL_FATBIN, INVALID, nullptr, "")
106107
TYPE("hip-fatbin", HIP_FATBIN, INVALID, "hipfb", "A")
107108
TYPE("none", Nothing, INVALID, nullptr, "u")

clang/lib/Driver/Driver.cpp

Lines changed: 197 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -698,18 +698,49 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
698698
// SYCL
699699
//
700700
// We need to generate a SYCL toolchain if the user specified targets with
701-
// the -fsycl-targets option. If -fsycl is supplied without -fsycl-targets
702-
// we will assume SPIR-V
701+
// the -fsycl-targets, -fsycl-add-targets or -fsycl-link-targets option.
702+
// If -fsycl is supplied without any of these we will assume SPIR-V
703703
bool HasValidSYCLRuntime = C.getInputArgs().hasFlag(options::OPT_fsycl,
704704
options::OPT_fno_sycl, false);
705-
if (Arg *SYCLTargets =
706-
C.getInputArgs().getLastArg(options::OPT_fsycl_targets_EQ)) {
707-
if (SYCLTargets->getNumValues()) {
705+
706+
Arg *SYCLTargets =
707+
C.getInputArgs().getLastArg(options::OPT_fsycl_targets_EQ);
708+
Arg *SYCLLinkTargets =
709+
C.getInputArgs().getLastArg(options::OPT_fsycl_link_targets_EQ);
710+
Arg *SYCLAddTargets =
711+
C.getInputArgs().getLastArg(options::OPT_fsycl_add_targets_EQ);
712+
// -fsycl-targets cannot be used with -fsycl-link-targets
713+
if (SYCLTargets && SYCLLinkTargets)
714+
Diag(clang::diag::err_drv_sycl_target_conflict);
715+
// -fsycl-link-targets and -fsycl-add-targets cannot be used together
716+
if (SYCLLinkTargets && SYCLAddTargets)
717+
Diag(clang::diag::err_drv_sycl_add_link_conflict);
718+
719+
// -fsycl-add-targets is a list of paired items (Triple and file) which are
720+
// gathered and used to be linked into the final device binary. This can
721+
// be used with -fsycl-targets to put together the final conglomerate binary
722+
if (SYCLAddTargets) {
723+
if (SYCLAddTargets->getNumValues()) {
724+
// -fsycl-add-targets should be used with -fsycl
725+
if (HasValidSYCLRuntime) {
726+
// Use of -fsycl-add-targets adds additional files to the SYCL device
727+
// link step. Regular offload processing occurs below
728+
} else
729+
Diag(clang::diag::err_drv_expecting_fsycl_with_fsycl_targets)
730+
<< "-add-";
731+
} else
732+
Diag(clang::diag::warn_drv_empty_joined_argument)
733+
<< SYCLAddTargets->getAsString(C.getInputArgs());
734+
}
735+
if (SYCLTargets || SYCLLinkTargets) {
736+
// At this point, we know we have a valid -fsycl*target option passed
737+
Arg * SYCLTargetsValues = SYCLTargets ? SYCLTargets : SYCLLinkTargets;
738+
if (SYCLTargetsValues->getNumValues()) {
708739
// We expect that -fsycl-targets is always used in conjunction with the
709740
// -fsycl option
710741
if (HasValidSYCLRuntime) {
711742
llvm::StringMap<const char *> FoundNormalizedTriples;
712-
for (const char *Val : SYCLTargets->getValues()) {
743+
for (const char *Val : SYCLTargetsValues->getValues()) {
713744
llvm::Triple TT(Val);
714745
std::string NormalizedName = TT.normalize();
715746

@@ -742,11 +773,14 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
742773
C.addOffloadDeviceToolChain(SYCLTC.get(), Action::OFK_SYCL);
743774
}
744775
}
745-
} else
746-
Diag(clang::diag::err_drv_expecting_fsycl_with_fsycl_targets);
776+
} else {
777+
const char *syclArg = SYCLTargets ? "-" : "-link-";
778+
Diag(clang::diag::err_drv_expecting_fsycl_with_fsycl_targets)
779+
<< syclArg;
780+
}
747781
} else
748782
Diag(clang::diag::warn_drv_empty_joined_argument)
749-
<< SYCLTargets->getAsString(C.getInputArgs());
783+
<< SYCLTargetsValues->getAsString(C.getInputArgs());
750784
} else {
751785
// If -fsycl is supplied without -fsycl-targets we will assume SPIR-V
752786
if (HasValidSYCLRuntime) {
@@ -2933,9 +2967,18 @@ class OffloadingActionBuilder final {
29332967
/// and all the device linked images are passed to the host link phase.
29342968
/// SPIR related are wrapped before added to the fat binary
29352969
class SYCLActionBuilder final : public DeviceActionBuilder {
2970+
/// Flag to signal if the user requested device-only compilation.
2971+
bool CompileDeviceOnly = false;
2972+
29362973
/// The SYCL actions for the current input.
29372974
ActionList SYCLDeviceActions;
29382975

2976+
/// The SYCL link binary if it was generated for the current input.
2977+
Action *SYCLLinkBinary = nullptr;
2978+
2979+
/// SYCL ahead of time compilation inputs
2980+
SmallVector<std::pair<llvm::Triple, const char *>, 8> SYCLAOTInputs;
2981+
29392982
/// The linker inputs obtained for each toolchain.
29402983
SmallVector<ActionList, 8> DeviceLinkerInputs;
29412984

@@ -2952,9 +2995,44 @@ class OffloadingActionBuilder final {
29522995
phases::ID CurPhase, phases::ID FinalPhase,
29532996
PhasesTy &Phases) override {
29542997

2955-
// We should always have an action for each input.
2956-
assert(SYCLDeviceActions.size() == ToolChains.size() &&
2957-
"Number of SYCL actions and toolchains do not match.");
2998+
// With -fsycl-link-targets, we will take the unbundled binaries
2999+
// for each device and link them together to a single binary that will
3000+
// be used in a split compilation step.
3001+
if (CompileDeviceOnly) {
3002+
ActionList DeviceActions;
3003+
for (auto Ph : Phases) {
3004+
// Skip the phases that were already dealt with.
3005+
if (Ph < CurPhase)
3006+
continue;
3007+
// We have to be consistent with the host final phase.
3008+
if (Ph > FinalPhase || Ph == phases::Link)
3009+
break;
3010+
for (Action *&A : SYCLDeviceActions) {
3011+
A = C.getDriver().ConstructPhaseAction(C, Args, Ph, A,
3012+
Action::OFK_SYCL);
3013+
}
3014+
}
3015+
for (Action *&A : SYCLDeviceActions) {
3016+
OffloadAction::DeviceDependences DDep;
3017+
DDep.add(*A, *ToolChains.front(), /*BoundArch*/ nullptr,
3018+
Action::OFK_SYCL);
3019+
DeviceActions.push_back(
3020+
C.MakeAction<OffloadAction>(DDep, A->getType()));
3021+
}
3022+
3023+
// We generate the fat binary if we have device input actions.
3024+
if (!DeviceActions.empty()) {
3025+
SYCLLinkBinary =
3026+
C.MakeAction<LinkJobAction>(DeviceActions, types::TY_Image);
3027+
3028+
// Remove the SYCL actions as they are already connected to an host
3029+
// action or fat binary.
3030+
SYCLDeviceActions.clear();
3031+
}
3032+
3033+
// We avoid creating host action in device-only mode.
3034+
return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
3035+
}
29583036

29593037
// FIXME: This adds the integrated header generation pass before the
29603038
// Host compilation pass so the Host can use the header generated. This
@@ -3005,6 +3083,11 @@ class OffloadingActionBuilder final {
30053083
// If this is an input action replicate it for each SYCL toolchain.
30063084
if (auto *IA = dyn_cast<InputAction>(HostAction)) {
30073085
SYCLDeviceActions.clear();
3086+
3087+
// libraries are not replicated for SYCL
3088+
if (!types::isSrcFile(IA->getType()))
3089+
return ABRT_Inactive;
3090+
30083091
for (unsigned I = 0; I < ToolChains.size(); ++I)
30093092
SYCLDeviceActions.push_back(
30103093
C.MakeAction<InputAction>(IA->getInputArg(), IA->getType()));
@@ -3025,6 +3108,17 @@ class OffloadingActionBuilder final {
30253108
}
30263109

30273110
void appendTopLevelActions(ActionList &AL) override {
3111+
3112+
if (SYCLLinkBinary) {
3113+
OffloadAction::DeviceDependences Dep;
3114+
Dep.add(*SYCLLinkBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
3115+
Action::OFK_SYCL);
3116+
AL.push_back(C.MakeAction<OffloadAction>(Dep, SYCLLinkBinary->getType()));
3117+
SYCLDeviceActions.clear();
3118+
SYCLLinkBinary = nullptr;
3119+
return;
3120+
}
3121+
30283122
if (SYCLDeviceActions.empty())
30293123
return;
30303124

@@ -3048,19 +3142,46 @@ class OffloadingActionBuilder final {
30483142
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
30493143
"Toolchains and linker inputs sizes do not match.");
30503144

3051-
// Append a new link action for each device.
3052-
auto TC = ToolChains.begin();
3053-
for (auto &LI : DeviceLinkerInputs) {
3054-
auto *DeviceLinkAction =
3055-
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
3056-
3057-
// After the Link, wrap the files before the final host link
3058-
auto *DeviceWrappingAction =
3059-
C.MakeAction<OffloadWrappingJobAction>(DeviceLinkAction,
3060-
types::TY_Object);
3061-
DA.add(*DeviceWrappingAction, **TC, /*BoundArch=*/nullptr,
3062-
Action::OFK_SYCL);
3063-
++TC;
3145+
// FIXME - If -fsycl-add-targets is provided, do not link in the regular
3146+
// device binaries - only pull in the add-targets variants. We are doing
3147+
// this to allow for a specific device only binary to be created until
3148+
// we have the ability to resolve multiple devices
3149+
if (SYCLAOTInputs.empty()) {
3150+
// Append a new link action for each device.
3151+
auto TC = ToolChains.begin();
3152+
for (auto &LI : DeviceLinkerInputs) {
3153+
auto *DeviceLinkAction =
3154+
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
3155+
3156+
// After the Link, wrap the files before the final host link
3157+
auto *DeviceWrappingAction =
3158+
C.MakeAction<OffloadWrappingJobAction>(DeviceLinkAction,
3159+
types::TY_Object);
3160+
DA.add(*DeviceWrappingAction, **TC, /*BoundArch=*/nullptr,
3161+
Action::OFK_SYCL);
3162+
++TC;
3163+
}
3164+
} else {
3165+
// Perform additional wraps against -fsycl-add-targets
3166+
// FIXME - The triple is currently not used from the AOT inputs, these
3167+
// will eventually be added to a manifest that is built into the final
3168+
// binary
3169+
ActionList AddInputs;
3170+
for (auto SAI : SYCLAOTInputs) {
3171+
std::string FN(SAI.second);
3172+
const char * FNStr = Args.MakeArgString(FN);
3173+
Arg *myArg = Args.MakeSeparateArg(nullptr,
3174+
C.getDriver().getOpts().getOption(options::OPT_INPUT), FNStr);
3175+
Action *SYCLAdd = C.MakeAction<InputAction>(*myArg,
3176+
types::TY_SYCL_FATBIN);
3177+
AddInputs.push_back(SYCLAdd);
3178+
}
3179+
for (auto &LI : AddInputs) {
3180+
auto *DeviceWrappingAction =
3181+
C.MakeAction<OffloadWrappingJobAction>(LI, types::TY_Object);
3182+
DA.add(*DeviceWrappingAction, *ToolChains.front(),
3183+
/*BoundArch=*/nullptr, Action::OFK_SYCL);
3184+
}
30643185
}
30653186
}
30663187

@@ -3072,6 +3193,36 @@ class OffloadingActionBuilder final {
30723193
++TI)
30733194
ToolChains.push_back(TI->second);
30743195

3196+
Arg *SYCLLinkTargets = Args.getLastArg(
3197+
options::OPT_fsycl_link_targets_EQ);
3198+
CompileDeviceOnly = SYCLLinkTargets &&
3199+
SYCLLinkTargets->getOption().matches(
3200+
options::OPT_fsycl_link_targets_EQ);
3201+
Arg *SYCLAddTargets = Args.getLastArg(
3202+
options::OPT_fsycl_add_targets_EQ);
3203+
if (SYCLAddTargets) {
3204+
for (StringRef Val : SYCLAddTargets->getValues()) {
3205+
// Parse out the Triple and Input (triple:binary) and create a
3206+
// ToolChain for each entry. Each of these will be wrapped and fed
3207+
// into the final binary
3208+
// Populate the pairs, expects format of 'triple:file', any other
3209+
// format will not be accepted
3210+
std::pair<StringRef, StringRef> I = Val.split(':');
3211+
llvm::Triple TT;
3212+
const char * TF;
3213+
if (!I.first.empty() && !I.second.empty()) {
3214+
TT = llvm::Triple(I.first);
3215+
TF = C.getArgs().MakeArgString(I.second);
3216+
// populate the input vector
3217+
SYCLAOTInputs.push_back(std::make_pair(TT, TF));
3218+
} else {
3219+
// No colon found, do not use the input
3220+
C.getDriver().Diag(diag::err_drv_unsupported_option_argument)
3221+
<< SYCLAddTargets->getOption().getName() << Val;
3222+
}
3223+
}
3224+
}
3225+
30753226
DeviceLinkerInputs.resize(ToolChains.size());
30763227
return false;
30773228
}
@@ -4324,9 +4475,18 @@ InputInfo Driver::BuildJobsForActionNoCache(
43244475
Arch = UI.DependentBoundArch;
43254476
} else
43264477
Arch = BoundArch;
4478+
// When unbundling for SYCL and there is no Target offload, assume
4479+
// Host as the dependent offload, as the host path has been stripped
4480+
// in this instance
4481+
Action::OffloadKind DependentOffloadKind;
4482+
if (UI.DependentOffloadKind == Action::OFK_SYCL &&
4483+
TargetDeviceOffloadKind == Action::OFK_None)
4484+
DependentOffloadKind = Action::OFK_Host;
4485+
else
4486+
DependentOffloadKind = UI.DependentOffloadKind;
43274487

43284488
CachedResults[{A, GetTriplePlusArchString(UI.DependentToolChain, Arch,
4329-
UI.DependentOffloadKind)}] =
4489+
DependentOffloadKind)}] =
43304490
CurI;
43314491
}
43324492

@@ -4340,12 +4500,20 @@ InputInfo Driver::BuildJobsForActionNoCache(
43404500
} else if (JA->getType() == types::TY_Nothing)
43414501
Result = InputInfo(A, BaseInput);
43424502
else {
4343-
// We only have to generate a prefix for the host if this is not a top-level
4344-
// action.
4345-
std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
4503+
std::string OffloadingPrefix;
4504+
// When generating binaries with -fsycl-link-target, the output file prefix
4505+
// is the triple arch only
4506+
if (Args.getLastArg(options::OPT_fsycl_link_targets_EQ)) {
4507+
OffloadingPrefix = "-";
4508+
OffloadingPrefix += TC->getTriple().getArchName();
4509+
} else {
4510+
// We only have to generate a prefix for the host if this is not a
4511+
// top-level action.
4512+
OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
43464513
A->getOffloadingDeviceKind(), TC->getTriple().normalize(),
43474514
/*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() &&
43484515
!AtTopLevel);
4516+
}
43494517
Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
43504518
AtTopLevel, MultipleArchs,
43514519
OffloadingPrefix),

0 commit comments

Comments
 (0)