Skip to content

Commit 55ebcae

Browse files
mdtoguchibader
authored andcommitted
[SYCL] Add FPGA Command line support for Windows
Allows for FPGA CLI modifications to work on Windows, by enabling the use of archives/libraries instead of partially linked objects. This also impacts the Linux side of the implementation to use archives instead of objects. Adds support for -fsycl-link from source, which before required a 2 step compilation to object then to FPGA device generation Enable additional aoc options for dep input and output report Allow for link on Windows to use tempfile list args When unbundling with an aocr/aocx archive, be sure any additional objects are linked and wrapped before passed to the final link Modify -fintelfpga unbundling of FPGA archives to do single host and single target unbundles Signed-off-by: Michael D Toguchi <[email protected]>
1 parent ac544f9 commit 55ebcae

File tree

10 files changed

+289
-156
lines changed

10 files changed

+289
-156
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 95 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -3131,9 +3131,6 @@ class OffloadingActionBuilder final {
31313131
/// The linker inputs obtained for each toolchain.
31323132
SmallVector<ActionList, 8> DeviceLinkerInputs;
31333133

3134-
/// Host object list
3135-
ActionList HostObjectList;
3136-
31373134
/// The compiler inputs obtained for each toolchain
31383135
Action * DeviceCompilerInput = nullptr;
31393136

@@ -3146,7 +3143,7 @@ class OffloadingActionBuilder final {
31463143
unsigned FPGArCount = 0;
31473144

31483145
/// Type of output file for FPGA device compilation.
3149-
types::ID FPGAOutType = types::TY_Image;
3146+
types::ID FPGAOutType = types::TY_FPGA_AOCX;
31503147

31513148
public:
31523149
SYCLActionBuilder(Compilation &C, DerivedArgList &Args,
@@ -3191,51 +3188,15 @@ class OffloadingActionBuilder final {
31913188
// for each device and link them together to a single binary that will
31923189
// be used in a split compilation step.
31933190
if (CompileDeviceOnly && !SYCLDeviceActions.empty()) {
3194-
bool SYCLAOTCompile = false;
3195-
unsigned I = 0;
3196-
for (auto SDA : SYCLDeviceActions) {
3191+
for (auto SDA : SYCLDeviceActions)
31973192
SYCLLinkBinaryList.push_back(SDA);
3198-
auto TT = SYCLTripleList[I];
3199-
if (TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga ||
3200-
TT.getSubArch() == llvm::Triple::SPIRSubArch_gen)
3201-
SYCLAOTCompile = true;
3202-
I++;
3203-
}
3204-
if (WrapDeviceOnlyBinary && !SYCLAOTCompile) {
3193+
if (WrapDeviceOnlyBinary) {
32053194
auto *DeviceLinkAction =
32063195
C.MakeAction<LinkJobAction>(SYCLLinkBinaryList, types::TY_Image);
32073196
// Wrap the binary when -fsycl-link is given
32083197
SYCLLinkBinary =
32093198
C.MakeAction<OffloadWrappingJobAction>(DeviceLinkAction,
32103199
types::TY_Object);
3211-
} else if (SYCLAOTCompile) {
3212-
// All inputs have corresponding dependency files when built with
3213-
// -fintelfpga. Gather those here.
3214-
auto *LinkAction = C.MakeAction<LinkJobAction>(SYCLLinkBinaryList,
3215-
types::TY_SPIRV);
3216-
// Do the additional Ahead of Time compilation when the specific
3217-
// triple calls for it (provided a valid subarch).
3218-
auto *DeviceBECompileAction =
3219-
C.MakeAction<BackendCompileJobAction>(LinkAction, FPGAOutType);
3220-
3221-
// When performing -fsycl-link with FPGA, we will take the
3222-
// generated device binary and bundle that with all of the host
3223-
// objects (partially linked together).
3224-
if (!HostObjectList.empty()) {
3225-
// Bundling job only takes a single host and target object, so
3226-
// perform the partial link and send that into the bundler
3227-
auto *PartialLinkAction =
3228-
C.MakeAction<LinkJobAction>(HostObjectList,
3229-
types::TY_Object);
3230-
// Add the host action to the list in order to create the
3231-
// bundling action.
3232-
ActionList OffloadAL;
3233-
OffloadAL.push_back(DeviceBECompileAction);
3234-
OffloadAL.push_back(PartialLinkAction);
3235-
3236-
SYCLLinkBinary =
3237-
C.MakeAction<OffloadBundlingJobAction>(OffloadAL);
3238-
}
32393200
} else
32403201
SYCLLinkBinary = C.MakeAction<LinkJobAction>(SYCLLinkBinaryList,
32413202
types::TY_Image);
@@ -3303,11 +3264,6 @@ class OffloadingActionBuilder final {
33033264
if (IA->getType() == types::TY_Object ||
33043265
IA->getType() == types::TY_FPGA_AOCX ||
33053266
IA->getType() == types::TY_FPGA_AOCR) {
3306-
OffloadingActionBuilder OffloadBuilder(C, Args, Inputs);
3307-
if (Args.hasArg(options::OPT_fsycl_link_EQ)) {
3308-
HostObjectList.push_back(
3309-
C.MakeAction<InputAction>(IA->getInputArg(), types::TY_Object));
3310-
}
33113267
// Keep track of the number of FPGA devices encountered
33123268
// Only one of these is allowed at a single time.
33133269
if (IA->getType() == types::TY_FPGA_AOCX)
@@ -3379,20 +3335,23 @@ class OffloadingActionBuilder final {
33793335
// Perform a check for device kernels. This is done for FPGA when an
33803336
// aocx or aocr based file is found.
33813337
if (FPGAxCount || FPGArCount) {
3338+
ActionList DeviceObjects;
33823339
for (const auto &I : LI) {
33833340
if (I->getType() == types::TY_Object) {
3384-
auto *DeviceCheckAction =
3385-
C.MakeAction<SPIRCheckJobAction>(I, types::TY_Image);
3386-
DA.add(*DeviceCheckAction, **TC, /*BoundArch=*/nullptr,
3387-
Action::OFK_SYCL);
3341+
// FIXME - Checker does not work well inline with the tool
3342+
// chain, but it needs to be here for real time checking
3343+
//auto *DeviceCheckAction =
3344+
//C.MakeAction<SPIRCheckJobAction>(I, types::TY_Object);
3345+
//DeviceObjects.push_back(DeviceCheckAction);
3346+
DeviceObjects.push_back(I);
33883347
} else {
33893348
// Do not perform a device link and only pass the aocr
33903349
// file to the offline compilation before wrapping. Just
33913350
// wrap an aocx file.
33923351
Action * DeviceWrappingAction;
33933352
if (I->getType() == types::TY_FPGA_AOCR) {
33943353
auto *DeviceBECompileAction =
3395-
C.MakeAction<BackendCompileJobAction>(I, types::TY_Image);
3354+
C.MakeAction<BackendCompileJobAction>(I, FPGAOutType);
33963355
DeviceWrappingAction =
33973356
C.MakeAction<OffloadWrappingJobAction>(
33983357
DeviceBECompileAction, types::TY_Object);
@@ -3403,6 +3362,17 @@ class OffloadingActionBuilder final {
34033362
Action::OFK_SYCL);
34043363
}
34053364
}
3365+
if (!DeviceObjects.empty()) {
3366+
// link and wrap the device binary, but do not perform the
3367+
// backend compile.
3368+
auto *DeviceLinkAction =
3369+
C.MakeAction<LinkJobAction>(DeviceObjects, types::TY_SPIRV);
3370+
auto *DeviceWrappingAction =
3371+
C.MakeAction<OffloadWrappingJobAction>(DeviceLinkAction,
3372+
types::TY_Object);
3373+
DA.add(*DeviceWrappingAction, **TC, /*BoundArch=*/nullptr,
3374+
Action::OFK_SYCL);
3375+
}
34063376
continue;
34073377
}
34083378
auto *DeviceLinkAction =
@@ -3417,7 +3387,7 @@ class OffloadingActionBuilder final {
34173387
if (SYCLAOTCompile) {
34183388
types::ID OutType = types::TY_Image;
34193389
if (TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga)
3420-
OutType = types::TY_FPGA_AOCX;
3390+
OutType = FPGAOutType;
34213391
// Do the additional Ahead of Time compilation when the specific
34223392
// triple calls for it (provided a valid subarch).
34233393
auto *DeviceBECompileAction =
@@ -3474,10 +3444,10 @@ class OffloadingActionBuilder final {
34743444
Arg *SYCLLinkTargets = Args.getLastArg(
34753445
options::OPT_fsycl_link_targets_EQ);
34763446
WrapDeviceOnlyBinary = Args.hasArg(options::OPT_fsycl_link_EQ);
3477-
CompileDeviceOnly = (SYCLLinkTargets &&
3478-
SYCLLinkTargets->getOption().matches(
3479-
options::OPT_fsycl_link_targets_EQ)) ||
3480-
WrapDeviceOnlyBinary;
3447+
// Device only compilation for -fsycl-link (no FPGA) and
3448+
// -fsycl-link-targets
3449+
CompileDeviceOnly = (SYCLLinkTargets || (WrapDeviceOnlyBinary &&
3450+
!Args.hasArg(options::OPT_fintelfpga)));
34813451
Arg *SYCLAddTargets = Args.getLastArg(
34823452
options::OPT_fsycl_add_targets_EQ);
34833453
if (SYCLAddTargets) {
@@ -3687,12 +3657,13 @@ class OffloadingActionBuilder final {
36873657

36883658
// Checking uses -check-section option with the input file, no output
36893659
// file and the target triple being looked for.
3690-
const char *Targets = C.getArgs().MakeArgString(Twine("-targets=fpga-") +
3660+
const char *Targets = C.getArgs().MakeArgString(Twine("-targets=sycl-") +
36913661
TT.str());
36923662
const char *Inputs = C.getArgs().MakeArgString(Twine("-inputs=") +
36933663
Object);
3694-
// Always use -type=o for aocx/aocr bundle checking.
3695-
const char *Type = C.getArgs().MakeArgString("-type=o");
3664+
// Always use -type=ao for aocx/aocr bundle checking. The 'bundles' are
3665+
// actually archives.
3666+
const char *Type = C.getArgs().MakeArgString("-type=ao");
36963667
std::vector<StringRef> BundlerArgs = { "clang-offload-bundler",
36973668
Type,
36983669
Targets,
@@ -3715,8 +3686,6 @@ class OffloadingActionBuilder final {
37153686
else
37163687
llvm::errs() << A << " ";
37173688
llvm::errs() << '\n';
3718-
if (OutputOnly)
3719-
return false;
37203689
}
37213690
if (BundlerBinary.getError())
37223691
return false;
@@ -3764,19 +3733,22 @@ class OffloadingActionBuilder final {
37643733
types::TY_Object &&
37653734
Args.hasArg(options::OPT_foffload_static_lib_EQ))) {
37663735
ActionList HostActionList;
3736+
Action * A(HostAction);
37673737
// Only check for FPGA device information when using fpga SubArch.
37683738
if (Args.hasArg(options::OPT_fintelfpga) &&
3769-
HasFPGADeviceBinary(C, InputArg->getAsString(Args), true)) {
3770-
Action * FPGAAction =
3771-
C.MakeAction<InputAction>(*InputArg, types::TY_FPGA_AOCX);
3772-
HostActionList.push_back(FPGAAction);
3773-
} else if (Args.hasArg(options::OPT_fintelfpga) &&
3774-
HasFPGADeviceBinary(C, InputArg->getAsString(Args))) {
3775-
Action * FPGAAction =
3776-
C.MakeAction<InputAction>(*InputArg, types::TY_FPGA_AOCR);
3777-
HostActionList.push_back(FPGAAction);
3778-
} else
3779-
HostActionList.push_back(HostAction);
3739+
HostAction->getType() != types::TY_FPGA_AOCR &&
3740+
HostAction->getType() != types::TY_FPGA_AOCX &&
3741+
!(HostAction->getType() == types::TY_Object &&
3742+
llvm::sys::path::has_extension(InputName) &&
3743+
types::lookupTypeForExtension(
3744+
llvm::sys::path::extension(InputName).drop_front()) ==
3745+
types::TY_Object)) {
3746+
if (HasFPGADeviceBinary(C, InputArg->getAsString(Args), true))
3747+
A = C.MakeAction<InputAction>(*InputArg, types::TY_FPGA_AOCX);
3748+
else if (HasFPGADeviceBinary(C, InputArg->getAsString(Args)))
3749+
A = C.MakeAction<InputAction>(*InputArg, types::TY_FPGA_AOCR);
3750+
}
3751+
HostActionList.push_back(A);
37803752
if (!HostActionList.empty()) {
37813753
auto UnbundlingHostAction =
37823754
C.MakeAction<OffloadUnbundlingJobAction>(HostActionList);
@@ -4230,10 +4202,35 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
42304202
Current, InputArg, phases::Link, FinalPhase, PL);
42314203
}
42324204
}
4205+
// For an FPGA archive, we add the unbundling step above to take care of
4206+
// the device side, but also unbundle here to extract the host side
4207+
for (const auto &LI : LinkerInputs) {
4208+
Action *UnbundlerInput = nullptr;
4209+
if (auto *IA = dyn_cast<InputAction>(LI)) {
4210+
if (IA->getType() == types::TY_FPGA_AOCR ||
4211+
IA->getType() == types::TY_FPGA_AOCX) {
4212+
// Add to unbundler.
4213+
UnbundlerInput = LI;
4214+
}
4215+
}
4216+
if (UnbundlerInput) {
4217+
if (auto *IA = dyn_cast<InputAction>(UnbundlerInput)) {
4218+
std::string FileName = IA->getInputArg().getAsString(Args);
4219+
Arg *InputArg = MakeInputArg(Args, *Opts, FileName);
4220+
OffloadBuilder.addHostDependenceToDeviceActions(UnbundlerInput,
4221+
InputArg, Args);
4222+
OffloadBuilder.addDeviceDependencesToHostAction(UnbundlerInput,
4223+
InputArg, phases::Link, FinalPhase, PL);
4224+
}
4225+
}
4226+
}
42334227

42344228
// Add a link action if necessary.
42354229
if (!LinkerInputs.empty()) {
4236-
Action *LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image);
4230+
types::ID LinkType(types::TY_Image);
4231+
if (Args.hasArg(options::OPT_fsycl_link_EQ))
4232+
LinkType = types::TY_Archive;
4233+
Action *LA = C.MakeAction<LinkJobAction>(LinkerInputs, LinkType);
42374234
LA = OffloadBuilder.processHostLinkAction(LA);
42384235
Actions.push_back(LA);
42394236
}
@@ -5011,17 +5008,33 @@ InputInfo Driver::BuildJobsForActionNoCache(
50115008
C.addTempFile(C.getArgs().MakeArgString(TmpFileName),
50125009
types::TY_Tempfilelist);
50135010
CurI = InputInfo(types::TY_Tempfilelist, TmpFile, TmpFile);
5014-
} else if (UI.DependentOffloadKind == Action::OFK_Host &&
5015-
EffectiveTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga &&
5016-
(JA->getType() == types::TY_FPGA_AOCX ||
5017-
JA->getType() == types::TY_FPGA_AOCR)) {
5018-
// Output file from unbundle is FPGA device. Name the file accordingly.
5011+
} else if (JA->getType() == types::TY_FPGA_AOCX ||
5012+
JA->getType() == types::TY_FPGA_AOCR) {
5013+
std::string Ext(types::getTypeTempSuffix(JA->getType()));
5014+
types::ID TI = types::TY_Object;
5015+
if (EffectiveTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) {
5016+
// Output file from unbundle is FPGA device. Name the file
5017+
// accordingly.
5018+
if (UI.DependentOffloadKind == Action::OFK_Host) {
5019+
// Do not add the current info for Host with FPGA device. The host
5020+
// side isn't used
5021+
continue;
5022+
}
5023+
} else if (EffectiveTriple.getSubArch() !=
5024+
llvm::Triple::SPIRSubArch_fpga) {
5025+
if (UI.DependentOffloadKind == Action::OFK_SYCL) {
5026+
// Do not add the current info for device with FPGA device. The
5027+
// device side isn't used
5028+
continue;
5029+
}
5030+
TI = types::TY_Tempfilelist;
5031+
Ext = "txt";
5032+
}
50195033
std::string TmpFileName =
5020-
C.getDriver().GetTemporaryPath(llvm::sys::path::stem(BaseInput),
5021-
"o");
5034+
C.getDriver().GetTemporaryPath(llvm::sys::path::stem(BaseInput), Ext);
50225035
const char *TmpFile =
50235036
C.addTempFile(C.getArgs().MakeArgString(TmpFileName));
5024-
CurI = InputInfo(types::TY_Object, TmpFile, TmpFile);
5037+
CurI = InputInfo(TI, TmpFile, TmpFile);
50255038
} else {
50265039
std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
50275040
UI.DependentOffloadKind,

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6494,8 +6494,12 @@ void OffloadBundler::ConstructJobMultipleOutputs(
64946494
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, LinkArgs, Inputs));
64956495
} else if (Input.getType() == types::TY_FPGA_AOCX ||
64966496
Input.getType() == types::TY_FPGA_AOCR) {
6497-
// Override type with object type.
6498-
TypeArg = "o";
6497+
// Override type with archive object
6498+
if (getToolChain().getTriple().getSubArch() ==
6499+
llvm::Triple::SPIRSubArch_fpga)
6500+
TypeArg = "ao";
6501+
else
6502+
TypeArg = "aoo";
64996503
}
65006504
if (C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment() &&
65016505
Input.getType() == types::TY_Archive)
@@ -6509,26 +6513,34 @@ void OffloadBundler::ConstructJobMultipleOutputs(
65096513
Triples += "-targets=";
65106514
auto DepInfo = UA.getDependentActionsInfo();
65116515
for (unsigned I = 0; I < DepInfo.size(); ++I) {
6512-
if (I)
6513-
Triples += ',';
6514-
65156516
auto &Dep = DepInfo[I];
65166517
// FPGA device triples are 'transformed' for the bundler when creating
6517-
// aocx or aocr type bundles.
6518-
if (Dep.DependentToolChain->getTriple().getSubArch() ==
6519-
llvm::Triple::SPIRSubArch_fpga &&
6520-
(Input.getType() == types::TY_FPGA_AOCX ||
6521-
Input.getType() == types::TY_FPGA_AOCR)) {
6522-
llvm::Triple TT;
6523-
TT.setArchName(Input.getType() == types::TY_FPGA_AOCX ? "fpga_aocx"
6524-
: "fpga_aocr");
6525-
TT.setVendorName("intel");
6526-
TT.setOS(llvm::Triple(llvm::sys::getProcessTriple()).getOS());
6527-
TT.setEnvironment(llvm::Triple::SYCLDevice);
6528-
Triples += "fpga-";
6529-
Triples += TT.normalize();
6518+
// aocx or aocr type bundles. Also, we only do a specific target
6519+
// unbundling, skipping the host side or device side.
6520+
if (Input.getType() == types::TY_FPGA_AOCX ||
6521+
Input.getType() == types::TY_FPGA_AOCR) {
6522+
if (getToolChain().getTriple().getSubArch() ==
6523+
llvm::Triple::SPIRSubArch_fpga &&
6524+
Dep.DependentOffloadKind == Action::OFK_SYCL) {
6525+
llvm::Triple TT;
6526+
TT.setArchName(Input.getType() == types::TY_FPGA_AOCX ? "fpga_aocx"
6527+
: "fpga_aocr");
6528+
TT.setVendorName("intel");
6529+
TT.setOS(llvm::Triple(llvm::sys::getProcessTriple()).getOS());
6530+
TT.setEnvironment(llvm::Triple::SYCLDevice);
6531+
Triples += "sycl-";
6532+
Triples += TT.normalize();
6533+
} else if (getToolChain().getTriple().getSubArch() !=
6534+
llvm::Triple::SPIRSubArch_fpga &&
6535+
Dep.DependentOffloadKind == Action::OFK_Host) {
6536+
Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind);
6537+
Triples += '-';
6538+
Triples += Dep.DependentToolChain->getTriple().normalize();
6539+
}
65306540
continue;
65316541
}
6542+
if (I)
6543+
Triples += ',';
65326544
Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind);
65336545
Triples += '-';
65346546
Triples += Dep.DependentToolChain->getTriple().normalize();
@@ -6593,6 +6605,23 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
65936605
SmallString<128> HostTripleOpt("-host=");
65946606
HostTripleOpt += getToolChain().getAuxTriple()->str();
65956607
WrapperArgs.push_back(C.getArgs().MakeArgString(HostTripleOpt));
6608+
// When wrapping an FPGA device binary, we need to be sure to apply the
6609+
// appropriate triple that corresponds (fpga_aoc[xr]-intel-<os>-sycldevice)
6610+
// to the target triple setting.
6611+
if (getToolChain().getTriple().getSubArch() ==
6612+
llvm::Triple::SPIRSubArch_fpga &&
6613+
TCArgs.hasArg(options::OPT_fsycl_link_EQ)) {
6614+
llvm::Triple TT;
6615+
auto *A = C.getInputArgs().getLastArg(options::OPT_fsycl_link_EQ);
6616+
TT.setArchName((A->getValue() == StringRef("early")) ? "fpga_aocr"
6617+
: "fpga_aocx");
6618+
TT.setVendorName("intel");
6619+
TT.setOS(llvm::Triple(llvm::sys::getProcessTriple()).getOS());
6620+
TT.setEnvironment(llvm::Triple::SYCLDevice);
6621+
SmallString<128> TargetTripleOpt("-target=");
6622+
TargetTripleOpt += TT.str();
6623+
WrapperArgs.push_back(C.getArgs().MakeArgString(TargetTripleOpt));
6624+
}
65966625

65976626
// TODO forcing offload kind is a simplification which assumes wrapper used
65986627
// only with SYCL. Device binary format (-format=xxx) option should also come

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,14 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs,
160160
// Don't try to pass LLVM inputs unless we have native support.
161161
D.Diag(diag::err_drv_no_linker_llvm_support) << TC.getTripleString();
162162

163+
if (II.getType() == types::TY_Tempfilelist) {
164+
// Take the list file and pass it in with '@'.
165+
std::string FileName(II.getFilename());
166+
const char * ArgFile = Args.MakeArgString("@" + FileName);
167+
CmdArgs.push_back(ArgFile);
168+
continue;
169+
}
170+
163171
// Add filenames immediately.
164172
if (II.isFilename()) {
165173
CmdArgs.push_back(II.getFilename());

0 commit comments

Comments
 (0)