Skip to content

Commit d9ce963

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (#3)
2 parents 8f7d3e7 + 6431be6 commit d9ce963

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1552
-689
lines changed

clang/include/clang/Driver/Job.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "clang/Basic/LLVM.h"
1313
#include "llvm/ADT/ArrayRef.h"
14+
#include "llvm/ADT/DenseMap.h"
1415
#include "llvm/ADT/Optional.h"
1516
#include "llvm/ADT/SmallVector.h"
1617
#include "llvm/ADT/StringRef.h"
@@ -39,6 +40,10 @@ struct CrashReportInfo {
3940
/// Command - An executable path/name and argument vector to
4041
/// execute.
4142
class Command {
43+
public:
44+
using ErrorCodeDiagMapTy = llvm::DenseMap<int, std::string>;
45+
46+
private:
4247
/// Source - The action which caused the creation of this job.
4348
const Action &Source;
4449

@@ -48,6 +53,18 @@ class Command {
4853
/// The executable to run.
4954
const char *Executable;
5055

56+
/// The container for custom driver-set diagnostic messages that are
57+
/// produced upon particular error codes returned by the command.
58+
/// In order to add such a diagnostic for an external tool, consider the
59+
/// following criteria:
60+
/// 1) Does the command's executable return different codes upon different
61+
/// types of errors?
62+
/// 2) If the executable provides a single error code for various error types,
63+
/// is only a certain type of failure expected to occur within the driver
64+
/// flow? E.g. the driver guarantees a valid input to the tool, so any
65+
/// "invalid input" error can be ruled out
66+
ErrorCodeDiagMapTy ErrorCodeDiagMap;
67+
5168
/// The list of program arguments (not including the implicit first
5269
/// argument, which will be the executable).
5370
llvm::opt::ArgStringList Arguments;
@@ -100,6 +117,15 @@ class Command {
100117
virtual int Execute(ArrayRef<Optional<StringRef>> Redirects,
101118
std::string *ErrMsg, bool *ExecutionFailed) const;
102119

120+
/// Store a custom driver diagnostic message upon a particular error code
121+
/// returned by the command
122+
void addDiagForErrorCode(int ErrorCode, StringRef CustomDiag);
123+
124+
/// Get the custom driver diagnostic message for a particular error code
125+
/// if such was stored. Returns an empty string if no diagnostic message
126+
/// was found for the given error code.
127+
StringRef getDiagForErrorCode(int ErrorCode) const;
128+
103129
/// getSource - Return the Action which caused the creation of this job.
104130
const Action &getSource() const { return Source; }
105131

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
843843
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
844844

845845
if (LangOpts.SYCLIsDevice)
846-
PerModulePasses.add(createSYCLLowerWGScopePass());
846+
PerFunctionPasses.add(createSYCLLowerWGScopePass());
847847

848848
CreatePasses(PerModulePasses, PerFunctionPasses);
849849

clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.cpp

Lines changed: 76 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ enum class MemorySemantics : unsigned {
190190

191191
Instruction *genWGBarrier(Instruction &Before);
192192
Value *genLinearLocalID(Instruction &Before);
193-
Value *createWGLocalVariable(Module &M, Type *T, const Twine &Name);
193+
GlobalVariable *createWGLocalVariable(Module &M, Type *T, const Twine &Name);
194194
} // namespace spirv
195195

196196
static bool isCallToAFuncMarkedWithMD(const Instruction *I, const char *MD) {
@@ -375,16 +375,25 @@ namespace {
375375
using LocalsSet = SmallPtrSet<AllocaInst *, 4>;
376376
}
377377

378-
static void copyBetweenLocalAndShadow(AllocaInst *L, GlobalVariable *Shadow,
379-
IRBuilder<> &Builder, bool Loc2Shadow) {
380-
Type *T = L->getAllocatedType();
378+
static void copyBetweenPrivateAndShadow(Value *L, GlobalVariable *Shadow,
379+
IRBuilder<> &Builder, bool Loc2Shadow) {
380+
Type *T = nullptr;
381+
int LocAlignN = 0;
382+
383+
if (const auto *AI = dyn_cast<AllocaInst>(L)) {
384+
T = AI->getAllocatedType();
385+
LocAlignN = AI->getAlignment();
386+
} else {
387+
T = cast<Argument>(L)->getParamByValType();
388+
LocAlignN = cast<Argument>(L)->getParamAlignment();
389+
}
381390

382391
if (T->isAggregateType()) {
383392
// TODO: we should use methods which directly return MaybeAlign once such
384393
// are added to LLVM for AllocaInst and GlobalVariable
385-
auto LocAlign = MaybeAlign(L->getAlignment());
394+
auto LocAlign = MaybeAlign(LocAlignN);
386395
auto ShdAlign = MaybeAlign(Shadow->getAlignment());
387-
Module &M = *L->getModule();
396+
Module &M = *Shadow->getParent();
388397
auto SizeVal = M.getDataLayout().getTypeStoreSize(T);
389398
auto Size = ConstantInt::get(getSizeTTy(M), SizeVal);
390399
if (Loc2Shadow)
@@ -434,9 +443,9 @@ static void copyBetweenLocalAndShadow(AllocaInst *L, GlobalVariable *Shadow,
434443
//
435444
static void materializeLocalsInWIScopeBlocksImpl(
436445
const DenseMap<BasicBlock *, std::unique_ptr<LocalsSet>> &BB2MatLocals,
437-
const DenseMap<AllocaInst *, Value *> &Local2Shadow) {
446+
const DenseMap<AllocaInst *, GlobalVariable *> &Local2Shadow) {
438447
for (auto &P : BB2MatLocals) {
439-
// generate LeaderBB and local<->shadow copies in proper BBs
448+
// generate LeaderBB and private<->shadow copies in proper BBs
440449
BasicBlock *LeaderBB = P.first;
441450
BasicBlock *BB = LeaderBB->splitBasicBlock(&LeaderBB->front(), "LeaderMat");
442451
// Add a barrier to the original block:
@@ -445,18 +454,19 @@ static void materializeLocalsInWIScopeBlocksImpl(
445454
for (AllocaInst *L : *P.second.get()) {
446455
auto MapEntry = Local2Shadow.find(L);
447456
assert(MapEntry != Local2Shadow.end() && "local must have a shadow");
448-
auto *Shadow = dyn_cast<GlobalVariable>(MapEntry->second);
457+
auto *Shadow = MapEntry->second;
449458
LLVMContext &Ctx = L->getContext();
450459
IRBuilder<> Builder(Ctx);
451460
// fill the leader BB:
452461
// fetch data from leader's private copy (which is always up to date) into
453462
// the corresponding shadow variable
454463
Builder.SetInsertPoint(&LeaderBB->front());
455-
copyBetweenLocalAndShadow(L, Shadow, Builder, true /*local->shadow*/);
464+
copyBetweenPrivateAndShadow(L, Shadow, Builder, true /*private->shadow*/);
456465
// store data to the local variable - effectively "refresh" the value of
457466
// the local in each work item in the work group
458467
Builder.SetInsertPoint(At);
459-
copyBetweenLocalAndShadow(L, Shadow, Builder, false /*shadow->local*/);
468+
copyBetweenPrivateAndShadow(L, Shadow, Builder,
469+
false /*shadow->private*/);
460470
}
461471
// now generate the TestBB and the leader WI guard
462472
BasicBlock *TestBB =
@@ -528,7 +538,7 @@ void materializeLocalsInWIScopeBlocks(
528538
SmallPtrSetImpl<AllocaInst *> &Locals,
529539
SmallPtrSetImpl<BasicBlock *> &WIScopeBBs) {
530540
// maps local variable to its "shadow" workgroup-shared global:
531-
DenseMap<AllocaInst *, Value *> Local2Shadow;
541+
DenseMap<AllocaInst *, GlobalVariable *> Local2Shadow;
532542
// records which locals must be materialized at the beginning of a block:
533543
DenseMap<BasicBlock *, std::unique_ptr<LocalsSet>> BB2MatLocals;
534544

@@ -543,7 +553,7 @@ void materializeLocalsInWIScopeBlocks(
543553
continue;
544554
if (Local2Shadow.find(L) == Local2Shadow.end()) {
545555
// lazily create a "shadow" for current local:
546-
Value *Shadow = spirv::createWGLocalVariable(
556+
GlobalVariable *Shadow = spirv::createWGLocalVariable(
547557
*BB->getModule(), L->getAllocatedType(), "WGCopy");
548558
Local2Shadow.insert(std::make_pair(L, Shadow));
549559
}
@@ -667,6 +677,47 @@ static void fixupPrivateMemoryPFWILambdaCaptures(CallInst *PFWICall) {
667677
}
668678
}
669679

680+
// Go through "byval" parameters which are passed as AS(0) pointers
681+
// and: (1) create local shadows for them (2) and initialize them from the
682+
// leader's copy and (3) replace usages with pointer to the shadow
683+
static void shareByValParams(Function &F) {
684+
// split
685+
BasicBlock *EntryBB = &F.getEntryBlock();
686+
BasicBlock *LeaderBB = EntryBB->splitBasicBlock(&EntryBB->front(), "leader");
687+
BasicBlock *MergeBB = LeaderBB->splitBasicBlock(&LeaderBB->front(), "merge");
688+
689+
// 1) rewire the above basic blocks so that LeaderBB is executed only for the
690+
// leader workitem
691+
guardBlockWithIsLeaderCheck(EntryBB, LeaderBB, MergeBB,
692+
EntryBB->back().getDebugLoc());
693+
Instruction &At = LeaderBB->back();
694+
695+
for (auto &Arg : F.args()) {
696+
if (!Arg.hasByValAttr())
697+
continue;
698+
assert(Arg.getType()->getPointerAddressSpace() ==
699+
asUInt(spirv::AddrSpace::Private));
700+
Type *T = Arg.getParamByValType();
701+
702+
// 2) create the shared copy - "shadow" - for current byval arg
703+
GlobalVariable *Shadow =
704+
spirv::createWGLocalVariable(*F.getParent(), T, "ArgShadow");
705+
706+
// 3) replace argument with shadow in all uses
707+
for (auto *U : Arg.users())
708+
U->replaceUsesOfWith(&Arg, Shadow);
709+
710+
// 4) fill the shadow from the argument for the leader WI only
711+
LLVMContext &Ctx = At.getContext();
712+
IRBuilder<> Builder(Ctx);
713+
Builder.SetInsertPoint(&LeaderBB->front());
714+
copyBetweenPrivateAndShadow(&Arg, Shadow, Builder,
715+
true /*private->shadow*/);
716+
}
717+
// 5) make sure workers use up-to-date shared values written by the leader
718+
spirv::genWGBarrier(MergeBB->front());
719+
}
720+
670721
PreservedAnalyses SYCLLowerWGScopePass::run(Function &F,
671722
FunctionAnalysisManager &FAM) {
672723
if (!F.getMetadata(WG_SCOPE_MD))
@@ -729,7 +780,13 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F,
729780
}
730781
}
731782
#ifndef NDEBUG
732-
bool HaveChanges = (Ranges.size() > 0) || (Allocas.size() > 0);
783+
int NByval = 0;
784+
for (const auto &Arg : F.args()) {
785+
if (Arg.hasByValAttr())
786+
NByval++;
787+
}
788+
789+
bool HaveChanges = (Ranges.size() > 0) || (Allocas.size() > 0) || NByval > 0;
733790

734791
if (HaveChanges && Debug > 1) {
735792
dumpIR(F, "before");
@@ -762,6 +819,9 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F,
762819
for (auto *PFWICall : PFWICalls)
763820
fixupPrivateMemoryPFWILambdaCaptures(PFWICall);
764821

822+
// Finally, create shadows for and replace usages of byval pointer params
823+
shareByValParams(F);
824+
765825
#ifndef NDEBUG
766826
if (HaveChanges && Debug > 0)
767827
verifyModule(*F.getParent(), &llvm::errs());
@@ -773,7 +833,8 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F,
773833
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
774834
}
775835

776-
Value *spirv::createWGLocalVariable(Module &M, Type *T, const Twine &Name) {
836+
GlobalVariable *spirv::createWGLocalVariable(Module &M, Type *T,
837+
const Twine &Name) {
777838
GlobalVariable *G =
778839
new GlobalVariable(M, // module
779840
T, // type

clang/lib/Driver/Driver.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1685,7 +1685,7 @@ int Driver::ExecuteCompilation(
16851685
// diagnostics, so always print the diagnostic there.
16861686
const Tool &FailingTool = FailingCommand->getCreator();
16871687

1688-
if (!FailingCommand->getCreator().hasGoodDiagnostics() || CommandRes != 1) {
1688+
if (!FailingTool.hasGoodDiagnostics() || CommandRes != 1) {
16891689
// FIXME: See FIXME above regarding result code interpretation.
16901690
if (CommandRes < 0)
16911691
Diag(clang::diag::err_drv_command_signalled)
@@ -1694,6 +1694,10 @@ int Driver::ExecuteCompilation(
16941694
Diag(clang::diag::err_drv_command_failed)
16951695
<< FailingTool.getShortName() << CommandRes;
16961696
}
1697+
1698+
auto CustomDiag = FailingCommand->getDiagForErrorCode(CommandRes);
1699+
if (!CustomDiag.empty())
1700+
Diag(clang::diag::note_drv_command_failed_diag_msg) << CustomDiag;
16971701
}
16981702
return Res;
16991703
}

clang/lib/Driver/Job.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,17 @@ void Command::buildArgvForResponseFile(
173173
}
174174
}
175175

176+
void Command::addDiagForErrorCode(int ErrorCode, StringRef CustomDiag) {
177+
ErrorCodeDiagMap[ErrorCode] = CustomDiag.str();
178+
}
179+
180+
StringRef Command::getDiagForErrorCode(int ErrorCode) const {
181+
auto ErrorCodeDiagIt = ErrorCodeDiagMap.find(ErrorCode);
182+
if (ErrorCodeDiagIt != ErrorCodeDiagMap.end())
183+
return ErrorCodeDiagIt->second;
184+
return StringRef();
185+
}
186+
176187
/// Rewrite relative include-like flag paths to absolute ones.
177188
static void
178189
rewriteIncludes(const llvm::ArrayRef<const char *> &Args, size_t Idx,

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7440,18 +7440,29 @@ void SPIRCheck::ConstructJob(Compilation &C, const JobAction &JA,
74407440
// we need to exit. The expected output is the input as this is just an
74417441
// intermediate check with no functional change.
74427442
ArgStringList CheckArgs;
7443-
for (auto I : Inputs) {
7444-
CheckArgs.push_back(I.getFilename());
7445-
}
7443+
assert(Inputs.size() == 1 && "Unexpected number of inputs to the tool");
7444+
const InputInfo &InputFile = Inputs.front();
7445+
CheckArgs.push_back(InputFile.getFilename());
74467446

74477447
// Add output file, which is just a copy of the input to better fit in the
74487448
// toolchain flow.
74497449
CheckArgs.push_back("-o");
74507450
CheckArgs.push_back(Output.getFilename());
7451-
7452-
C.addCommand(std::make_unique<Command>(JA, *this,
7451+
auto Cmd = std::make_unique<Command>(
7452+
JA, *this,
74537453
TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
7454-
CheckArgs, None));
7454+
CheckArgs, None);
7455+
7456+
if (getToolChain().getTriple().getSubArch() ==
7457+
llvm::Triple::SPIRSubArch_fpga) {
7458+
const char *Msg = TCArgs.MakeArgString(
7459+
Twine("The FPGA image does not include all device kernels from ") +
7460+
Twine(InputFile.getBaseInput()) +
7461+
Twine(". Please re-generate the image"));
7462+
Cmd->addDiagForErrorCode(/*ErrorCode*/ 1, Msg);
7463+
}
7464+
7465+
C.addCommand(std::move(Cmd));
74557466
}
74567467

74577468
void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,

clang/lib/Driver/ToolChains/Clang.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ class LLVM_LIBRARY_VISIBILITY SYCLPostLink final : public Tool {
199199
: Tool("SYCL post link", "sycl-post-link", TC) {}
200200

201201
bool hasIntegratedCPP() const override { return false; }
202+
bool hasGoodDiagnostics() const override { return true; }
202203
void ConstructJob(Compilation &C, const JobAction &JA,
203204
const InputInfo &Output, const InputInfoList &Inputs,
204205
const llvm::opt::ArgList &TCArgs,

clang/test/CodeGenSYCL/Inputs/sycl.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ class sampler {
2323

2424
template <int dimensions = 1>
2525
class group {
26+
public:
27+
group() = default; // fake constructor
2628
};
2729

2830
namespace access {
@@ -251,6 +253,12 @@ kernel_parallel_for(KernelType KernelFunc) {
251253
KernelFunc(id<Dims>());
252254
}
253255

256+
template <typename KernelName, typename KernelType, int Dims>
257+
ATTR_SYCL_KERNEL void
258+
kernel_parallel_for_work_group(KernelType KernelFunc) {
259+
KernelFunc(group<Dims>());
260+
}
261+
254262
class handler {
255263
public:
256264
template <typename KernelName = auto_name, typename KernelType, int Dims>
@@ -263,6 +271,17 @@ class handler {
263271
#endif
264272
}
265273

274+
template <typename KernelName = auto_name, typename KernelType, int Dims>
275+
void parallel_for_work_group(range<Dims> numWorkGroups, range<Dims> WorkGroupSize, KernelType kernelFunc) {
276+
using NameT = typename get_kernel_name_t<KernelName, KernelType>::name;
277+
#ifdef __SYCL_DEVICE_ONLY__
278+
kernel_parallel_for_work_group<NameT, KernelType, Dims>(kernelFunc);
279+
#else
280+
group<Dims> G;
281+
kernelFunc(G);
282+
#endif
283+
}
284+
266285
template <typename KernelName = auto_name, typename KernelType>
267286
void single_task(KernelType kernelFunc) {
268287
using NameT = typename get_kernel_name_t<KernelName, KernelType>::name;

0 commit comments

Comments
 (0)