Skip to content

Commit 4c16f02

Browse files
committed
Merge remote-tracking branch 'origin/sycl' into zeKernelCreateForLinkedModule
2 parents b26c47f + eb17836 commit 4c16f02

File tree

58 files changed

+1637
-2343
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1637
-2343
lines changed

.github/CODEOWNERS

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,18 +76,18 @@ xpti/ @tovinkere @andykaylor
7676
xptifw/ @tovinkere @andykaylor
7777

7878
# DPC++ tools
79-
llvm/**/file-table-tform/ @mlychkov @AlexeySachkov
79+
llvm/**/file-table-tform/ @mlychkov @AlexeySachkov @kbobrovs
8080
llvm/**/llvm-foreach/ @AlexeySachkov @Fznamznon
8181
llvm/**/llvm-no-spir-kernel/ @AGindinson @AlexeySachkov
82-
llvm/**/sycl-post-link/ @mlychkov @AlexeySachkov
83-
llvm/include/llvm/Support/PropertySetIO.h @mlychkov @AlexeySachkov
84-
llvm/lib/Support/PropertySetIO.cpp @mlychkov @AlexeySachkov
85-
llvm/unittests/Support/PropertySetIOTest.cpp @mlychkov @AlexeySachkov
86-
llvm/lib/Support/Base64.cpp @mlychkov @AlexeySachkov
82+
llvm/**/sycl-post-link/ @mlychkov @AlexeySachkov @kbobrovs
83+
llvm/include/llvm/Support/PropertySetIO.h @mlychkov @AlexeySachkov @kbobrovs
84+
llvm/lib/Support/PropertySetIO.cpp @mlychkov @AlexeySachkov @kbobrovs
85+
llvm/unittests/Support/PropertySetIOTest.cpp @mlychkov @AlexeySachkov @kbobrovs
86+
llvm/lib/Support/Base64.cpp @mlychkov @AlexeySachkov @kbobrovs
8787

8888
# Clang offload tools
8989
clang/tools/clang-offload-bundler/ @mlychkov @sndmitriev @AlexeySachkov
90-
clang/tools/clang-offload-wrapper/ @mlychkov @sndmitriev @AlexeySachkov
90+
clang/tools/clang-offload-wrapper/ @mlychkov @sndmitriev @AlexeySachkov @kbobrovs
9191
clang/tools/clang-offload-deps/ @sndmitriev @mlychkov @AlexeySachkov
9292

9393
# Explicit SIMD

buildbot/dependency.conf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ ocl_cpu_rt_ver_win=2021.11.3.0.09
77
# https://github.com/intel/compute-runtime/releases/tag/21.13.19438
88
ocl_gpu_rt_ver=21.13.19438
99
# Same GPU driver supports Level Zero and OpenCL
10-
# https://downloadmirror.intel.com/30266/a08/igfx_win10_100.9316.zip
11-
ocl_gpu_rt_ver_win=27.20.100.9316
10+
# https://downloadmirror.intel.com/30381/a08/igfx_win10_100.9466.zip
11+
ocl_gpu_rt_ver_win=27.20.100.9466
1212
intel_sycl_ver=build
1313

1414
# TBB binaries can be built from sources following instructions under
@@ -31,7 +31,7 @@ ocloc_ver_win=27.20.100.9168
3131
cpu_driver_lin=2021.11.3.0.09
3232
cpu_driver_win=2021.11.3.0.09
3333
gpu_driver_lin=21.13.19438
34-
gpu_driver_win=27.20.100.9316
34+
gpu_driver_win=27.20.100.9466
3535
fpga_driver_lin=2021.11.3.0.09
3636
fpga_driver_win=2021.11.3.0.09
3737
# NVidia CUDA driver

clang/include/clang/Basic/Attr.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1975,6 +1975,21 @@ def : MutualExclusions<[SYCLIntelFPGAIVDep,
19751975
def : MutualExclusions<[SYCLIntelFPGAMaxConcurrency,
19761976
SYCLIntelFPGADisableLoopPipelining]>;
19771977

1978+
def SYCLIntelFPGALoopCount : StmtAttr {
1979+
let Spellings = [CXX11<"intel", "loop_count_min">,
1980+
CXX11<"intel", "loop_count_max">,
1981+
CXX11<"intel", "loop_count_avg">];
1982+
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt],
1983+
ErrorDiag, "'for', 'while', and 'do' statements">;
1984+
let Accessors = [Accessor<"isMin", [CXX11<"intel", "loop_count_min">]>,
1985+
Accessor<"isMax", [CXX11<"intel", "loop_count_max">]>,
1986+
Accessor<"isAvg", [CXX11<"intel", "loop_count_avg">]>];
1987+
let Args = [ExprArgument<"NTripCount">];
1988+
let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
1989+
let IsStmtDependent = 1;
1990+
let Documentation = [SYCLIntelFPGALoopCountAttrDocs];
1991+
}
1992+
19781993
def : MutualExclusions<[SYCLIntelFPGAMaxConcurrency,
19791994
SYCLIntelFPGADisableLoopPipelining]>;
19801995

clang/include/clang/Basic/AttrDocs.td

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2951,6 +2951,41 @@ or ``ivdep``.
29512951
}];
29522952
}
29532953

2954+
def SYCLIntelFPGALoopCountAttrDocs : Documentation {
2955+
let Category = DocCatVariable;
2956+
let Heading = "intel::loop_count_min, intel::loop_count_max, intel::loop_count_avg";
2957+
let Content = [{
2958+
The loop count attributes specify the minimum, maximum, or average number of
2959+
iterations for a ``for`` loop. These are hints that the user specify that can be
2960+
used by some of the loop optimization to make decisions such as if the loop
2961+
should be unrolled. It is a way for the user to provide some information without
2962+
using PGO.
2963+
2964+
.. code-block:: c++
2965+
2966+
void foo(int *array, size_t n) {
2967+
[[intel::loop_count_min(4)]] for (int i = 0; i < n; ++i) array[i] = 0;
2968+
}
2969+
2970+
void zoo(int *array, size_t n) {
2971+
[[intel::loop_count_max(10)]] for (int i = 0; i < n; ++i) array[i] = 0;
2972+
}
2973+
2974+
void goo(int *array, size_t n) {
2975+
[[intel::loop_count_min(3)]]
2976+
[[intel::loop_count_max(10)]]
2977+
[[intel::loop_count_avg(5)]]
2978+
for (int i = 0; i < n; ++i) array[i] = 0;
2979+
}
2980+
2981+
template<int N>
2982+
void bar() {
2983+
[[intel::loop_count_avg(N)]] for(;;) { }
2984+
}
2985+
2986+
}];
2987+
}
2988+
29542989
def SYCLIntelFPGAMaxInterleavingAttrDocs : Documentation {
29552990
let Category = DocCatVariable;
29562991
let Heading = "intel::max_interleaving";

clang/include/clang/Sema/Sema.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2121,6 +2121,9 @@ class Sema final {
21212121
OpenCLUnrollHintAttr *
21222122
BuildOpenCLLoopUnrollHintAttr(const AttributeCommonInfo &A, Expr *E);
21232123

2124+
SYCLIntelFPGALoopCountAttr *
2125+
BuildSYCLIntelFPGALoopCount(const AttributeCommonInfo &CI, Expr *E);
2126+
21242127
bool CheckQualifiedFunctionForTypeId(QualType T, SourceLocation Loc);
21252128

21262129
bool CheckFunctionReturnType(QualType T, SourceLocation Loc);
@@ -13439,7 +13442,8 @@ FPGALoopAttrT *Sema::BuildSYCLIntelFPGALoopAttr(const AttributeCommonInfo &A,
1343913442
A.getParsedKind() ==
1344013443
ParsedAttr::AT_SYCLIntelFPGAMaxInterleaving ||
1344113444
A.getParsedKind() ==
13442-
ParsedAttr::AT_SYCLIntelFPGASpeculatedIterations) {
13445+
ParsedAttr::AT_SYCLIntelFPGASpeculatedIterations ||
13446+
A.getParsedKind() == ParsedAttr::AT_SYCLIntelFPGALoopCount) {
1344313447
if (Val < 0) {
1344413448
Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer)
1344513449
<< A.getAttrName() << /* non-negative */ 1;

clang/lib/CodeGen/CGLoopInfo.cpp

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,12 @@ MDNode *LoopInfo::createMetadata(
605605
LoopProperties.push_back(MDNode::get(Ctx, Vals));
606606
}
607607

608+
for (auto &VC : Attrs.SYCLIntelFPGAVariantCount) {
609+
Metadata *Vals[] = {MDString::get(Ctx, VC.first),
610+
ConstantAsMetadata::get(ConstantInt::get(
611+
llvm::Type::getInt32Ty(Ctx), VC.second))};
612+
LoopProperties.push_back(MDNode::get(Ctx, Vals));
613+
}
608614
LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(),
609615
AdditionalLoopProperties.end());
610616
return createFullUnrollMetadata(Attrs, LoopProperties, HasUserTransforms);
@@ -621,10 +627,11 @@ LoopAttributes::LoopAttributes(bool IsParallel)
621627
SYCLLoopCoalesceNLevels(0), SYCLLoopPipeliningDisable(false),
622628
SYCLMaxInterleavingEnable(false), SYCLMaxInterleavingNInvocations(0),
623629
SYCLSpeculatedIterationsEnable(false),
624-
SYCLSpeculatedIterationsNIterations(0), UnrollCount(0),
625-
UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified),
626-
PipelineDisabled(false), PipelineInitiationInterval(0),
627-
SYCLNofusionEnable(false), MustProgress(false) {}
630+
SYCLSpeculatedIterationsNIterations(0), SYCLIntelFPGAVariantCount(false),
631+
UnrollCount(0), UnrollAndJamCount(0),
632+
DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false),
633+
PipelineInitiationInterval(0), SYCLNofusionEnable(false),
634+
MustProgress(false) {}
628635

629636
void LoopAttributes::clear() {
630637
IsParallel = false;
@@ -643,6 +650,7 @@ void LoopAttributes::clear() {
643650
SYCLMaxInterleavingNInvocations = 0;
644651
SYCLSpeculatedIterationsEnable = false;
645652
SYCLSpeculatedIterationsNIterations = 0;
653+
SYCLIntelFPGAVariantCount.clear();
646654
UnrollCount = 0;
647655
UnrollAndJamCount = 0;
648656
VectorizeEnable = LoopAttributes::Unspecified;
@@ -680,8 +688,9 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
680688
Attrs.SYCLMaxInterleavingNInvocations == 0 &&
681689
Attrs.SYCLSpeculatedIterationsEnable == false &&
682690
Attrs.SYCLSpeculatedIterationsNIterations == 0 &&
683-
Attrs.UnrollCount == 0 && Attrs.UnrollAndJamCount == 0 &&
684-
!Attrs.PipelineDisabled && Attrs.PipelineInitiationInterval == 0 &&
691+
Attrs.SYCLIntelFPGAVariantCount.empty() && Attrs.UnrollCount == 0 &&
692+
Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled &&
693+
Attrs.PipelineInitiationInterval == 0 &&
685694
Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified &&
686695
Attrs.VectorizeEnable == LoopAttributes::Unspecified &&
687696
Attrs.UnrollEnable == LoopAttributes::Unspecified &&
@@ -1030,6 +1039,19 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
10301039
->getSExtValue());
10311040
}
10321041

1042+
if (const auto *IntelFPGALoopCountAvg =
1043+
dyn_cast<SYCLIntelFPGALoopCountAttr>(A)) {
1044+
unsigned int Count = IntelFPGALoopCountAvg->getNTripCount()
1045+
->getIntegerConstantExpr(Ctx)
1046+
->getSExtValue();
1047+
const char *Var = IntelFPGALoopCountAvg->isMax()
1048+
? "llvm.loop.intel.loopcount_max"
1049+
: IntelFPGALoopCountAvg->isMin()
1050+
? "llvm.loop.intel.loopcount_min"
1051+
: "llvm.loop.intel.loopcount_avg";
1052+
setSYCLIntelFPGAVariantCount(Var, Count);
1053+
}
1054+
10331055
if (const auto *IntelFPGALoopCoalesce =
10341056
dyn_cast<SYCLIntelFPGALoopCoalesceAttr>(A)) {
10351057
if (auto *LCE = IntelFPGALoopCoalesce->getNExpr())

clang/lib/CodeGen/CGLoopInfo.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ struct LoopAttributes {
117117
/// Value for llvm.loop.max_concurrency.count metadata.
118118
unsigned SYCLMaxConcurrencyNThreads;
119119

120+
/// Value for count variant (min/max/avg) and count metadata.
121+
llvm::SmallVector<std::pair<const char *, unsigned int>, 2>
122+
SYCLIntelFPGAVariantCount;
123+
120124
/// Flag for llvm.loop.coalesce metadata.
121125
bool SYCLLoopCoalesceEnable;
122126

@@ -404,6 +408,11 @@ class LoopInfoStack {
404408
StagedAttrs.SYCLSpeculatedIterationsNIterations = C;
405409
}
406410

411+
/// Set value of variant and loop count for the next loop pushed.
412+
void setSYCLIntelFPGAVariantCount(const char *Var, unsigned int Count) {
413+
StagedAttrs.SYCLIntelFPGAVariantCount.push_back({Var, Count});
414+
}
415+
407416
/// Set the unroll count for the next loop pushed.
408417
void setUnrollCount(unsigned C) { StagedAttrs.UnrollCount = C; }
409418

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -657,8 +657,7 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
657657
// To support the SYCL 2020 spelling with no propagation, only emit for
658658
// kernel-or-device when that spelling, fall-back to old behavior.
659659
if (ReqSubGroup && (IsKernelOrDevice || !ReqSubGroup->isSYCL2020Spelling())) {
660-
const auto *CE = dyn_cast<ConstantExpr>(ReqSubGroup->getValue());
661-
assert(CE && "Not an integer constant expression");
660+
const auto *CE = cast<ConstantExpr>(ReqSubGroup->getValue());
662661
Optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt();
663662
llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
664663
Builder.getInt32(ArgVal->getSExtValue()))};
@@ -705,32 +704,26 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
705704
llvm::MDNode::get(Context, AttrMDArgs));
706705
}
707706

708-
if (const SYCLIntelNumSimdWorkItemsAttr *A =
709-
FD->getAttr<SYCLIntelNumSimdWorkItemsAttr>()) {
710-
const auto *CE = dyn_cast<ConstantExpr>(A->getValue());
711-
assert(CE && "Not an integer constant expression");
707+
if (const auto *A = FD->getAttr<SYCLIntelNumSimdWorkItemsAttr>()) {
708+
const auto *CE = cast<ConstantExpr>(A->getValue());
712709
Optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt();
713710
llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
714711
Builder.getInt32(ArgVal->getSExtValue()))};
715712
Fn->setMetadata("num_simd_work_items",
716713
llvm::MDNode::get(Context, AttrMDArgs));
717714
}
718715

719-
if (const SYCLIntelSchedulerTargetFmaxMhzAttr *A =
720-
FD->getAttr<SYCLIntelSchedulerTargetFmaxMhzAttr>()) {
721-
const auto *CE = dyn_cast<ConstantExpr>(A->getValue());
722-
assert(CE && "Not an integer constant expression");
716+
if (const auto *A = FD->getAttr<SYCLIntelSchedulerTargetFmaxMhzAttr>()) {
717+
const auto *CE = cast<ConstantExpr>(A->getValue());
723718
Optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt();
724719
llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
725720
Builder.getInt32(ArgVal->getSExtValue()))};
726721
Fn->setMetadata("scheduler_target_fmax_mhz",
727722
llvm::MDNode::get(Context, AttrMDArgs));
728723
}
729724

730-
if (const SYCLIntelMaxGlobalWorkDimAttr *A =
731-
FD->getAttr<SYCLIntelMaxGlobalWorkDimAttr>()) {
732-
const auto *CE = dyn_cast<ConstantExpr>(A->getValue());
733-
assert(CE && "Not an integer constant expression");
725+
if (const auto *A = FD->getAttr<SYCLIntelMaxGlobalWorkDimAttr>()) {
726+
const auto *CE = cast<ConstantExpr>(A->getValue());
734727
Optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt();
735728
llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
736729
Builder.getInt32(ArgVal->getSExtValue()))};
@@ -760,12 +753,8 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
760753
llvm::MDNode::get(Context, AttrMDArgs));
761754
}
762755

763-
if (const SYCLIntelNoGlobalWorkOffsetAttr *A =
764-
FD->getAttr<SYCLIntelNoGlobalWorkOffsetAttr>()) {
765-
const Expr *Arg = A->getValue();
766-
assert(Arg && "Got an unexpected null argument");
767-
const auto *CE = dyn_cast<ConstantExpr>(Arg);
768-
assert(CE && "Not an integer constant expression");
756+
if (const auto *A = FD->getAttr<SYCLIntelNoGlobalWorkOffsetAttr>()) {
757+
const auto *CE = cast<ConstantExpr>(A->getValue());
769758
Optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt();
770759
if (ArgVal->getBoolValue())
771760
Fn->setMetadata("no_global_work_offset", llvm::MDNode::get(Context, {}));

clang/lib/Driver/Driver.cpp

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6213,7 +6213,6 @@ InputInfo Driver::BuildJobsForActionNoCache(
62136213

62146214
// Only use pipes when there is exactly one input.
62156215
InputInfoList InputInfos;
6216-
bool JobForPreprocessToStdout = false;
62176216
for (const Action *Input : Inputs) {
62186217
// Treat dsymutil and verify sub-jobs as being at the top-level too, they
62196218
// shouldn't get temporary output names.
@@ -6225,11 +6224,6 @@ InputInfo Driver::BuildJobsForActionNoCache(
62256224
SubJobAtTopLevel, MultipleArchs, LinkingOutput, CachedResults,
62266225
A->getOffloadingDeviceKind()));
62276226
}
6228-
// Check if we are in sub-work for preprocessing for host side. If so we will
6229-
// add another job to print information to terminal later.
6230-
if (!AtTopLevel && A->getKind() == Action::PreprocessJobClass &&
6231-
C.getJobs().size() == 1)
6232-
JobForPreprocessToStdout = true;
62336227

62346228
// Always use the first input as the base input.
62356229
const char *BaseInput = InputInfos[0].getBaseInput();
@@ -6264,7 +6258,6 @@ InputInfo Driver::BuildJobsForActionNoCache(
62646258

62656259
// Determine the place to write output to, if any.
62666260
InputInfo Result;
6267-
InputInfo ResultForPreprocessToStdout;
62686261
InputInfoList UnbundlingResults;
62696262
if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(JA)) {
62706263
// If we have an unbundling job, we need to create results for all the
@@ -6469,8 +6462,6 @@ InputInfo Driver::BuildJobsForActionNoCache(
64696462
AtTopLevel, MultipleArchs,
64706463
OffloadingPrefix),
64716464
BaseInput);
6472-
if (JobForPreprocessToStdout)
6473-
ResultForPreprocessToStdout = InputInfo(A, "-", BaseInput);
64746465
}
64756466

64766467
if (CCCPrintBindings && !CCGenDiagnostics) {
@@ -6493,19 +6484,12 @@ InputInfo Driver::BuildJobsForActionNoCache(
64936484
llvm::errs() << "] \n";
64946485
}
64956486
} else {
6496-
if (UnbundlingResults.empty()) {
6487+
if (UnbundlingResults.empty())
64976488
T->ConstructJob(
64986489
C, *JA, Result, InputInfos,
64996490
C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()),
65006491
LinkingOutput);
6501-
// Add another job to print information to terminal for host side.
6502-
if (JobForPreprocessToStdout) {
6503-
T->ConstructJob(
6504-
C, *JA, ResultForPreprocessToStdout, InputInfos,
6505-
C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()),
6506-
LinkingOutput);
6507-
}
6508-
} else
6492+
else
65096493
T->ConstructJobMultipleOutputs(
65106494
C, *JA, UnbundlingResults, InputInfos,
65116495
C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()),

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8440,7 +8440,7 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA,
84408440
TranslatorArgs.push_back(Output.getFilename());
84418441
if (getToolChain().getTriple().isSYCLDeviceEnvironment()) {
84428442
TranslatorArgs.push_back("-spirv-max-version=1.3");
8443-
TranslatorArgs.push_back("-spirv-debug-info-version=legacy");
8443+
TranslatorArgs.push_back("-spirv-debug-info-version=ocl-100");
84448444
// Prevent crash in the translator if input IR contains DIExpression
84458445
// operations which don't have mapping to OpenCL.DebugInfo.100 spec.
84468446
TranslatorArgs.push_back("-spirv-allow-extra-diexpressions");

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ const char *SYCL::Linker::constructLLVMSpirvCommand(
4141
} else {
4242
CmdArgs.push_back("-spirv-max-version=1.3");
4343
CmdArgs.push_back("-spirv-ext=+all");
44-
CmdArgs.push_back("-spirv-debug-info-version=legacy");
44+
CmdArgs.push_back("-spirv-debug-info-version=ocl-100");
4545
CmdArgs.push_back("-spirv-allow-extra-diexpressions");
4646
CmdArgs.push_back("-spirv-allow-unknown-intrinsics=llvm.genx.");
4747
CmdArgs.push_back("-o");

0 commit comments

Comments
 (0)