Skip to content

Commit 60ec6f1

Browse files
Merge remote-tracking branch 'intel_llvm/sycl' into syclTypeAttr
2 parents 95beeec + 90fa5bb commit 60ec6f1

File tree

186 files changed

+13316
-10242
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

186 files changed

+13316
-10242
lines changed

.github/workflows/sycl_precommit.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ on:
88
paths-ignore:
99
- '.github/ISSUE_TEMPLATE/**'
1010
- '.github/CODEOWNERS'
11+
- '.github/workflows/sycl_update_gpu_driver.yml'
1112
- 'devops/containers/**'
1213
- 'devops/scripts/install_drivers.sh'
1314
- 'devops/scripts/install_build_tools.sh'
@@ -26,11 +27,13 @@ jobs:
2627
container:
2728
image: ghcr.io/intel/llvm/sycl_ubuntu2004_nightly:no-drivers
2829
steps:
30+
- name: 'PR commits + 1'
31+
run: echo "PR_FETCH_DEPTH=$(( ${{ github.event.pull_request.commits }} + 1 ))" >> "${GITHUB_ENV}"
2932
- uses: actions/checkout@v2
3033
with:
3134
ref: ${{ github.event.pull_request.head.sha }}
3235
persist-credentials: false
33-
fetch-depth: 2
36+
fetch-depth: ${{ env.PR_FETCH_DEPTH }}
3437
- name: Run clang-format
3538
uses: ./devops/actions/clang-format
3639

.github/workflows/sycl_update_gpu_driver.yml

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,41 +18,13 @@ jobs:
1818
- name: Create Pull Request
1919
env:
2020
BRANCH: ci/update_gpu_driver-linux-${{ env.NEW_DRIVER_VERSION }}
21-
LLVMBOT_TOKEN: ${{ secrets.LLVM_MAIN_SYNC_BBSYCL_TOKEN }}
22-
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
21+
GITHUB_TOKEN: ${{ secrets.LLVM_MAIN_SYNC_BBSYCL_TOKEN }}
2322
run: |
2423
cd $GITHUB_WORKSPACE
25-
# Set fake identity to fulfil git requirements
2624
git config --global user.name "GitHub Actions"
2725
git config --global user.email "[email protected]"
2826
git checkout -B $BRANCH
2927
git add -u
30-
git commit -m "[GHA] Uplift GPU RT version for Linux CI" || exit 0 # exit if commit is empty
31-
git push https://[email protected]/${{ github.repository }} ${BRANCH}
32-
gh pr create --head $BRANCH --title "[GHA] Uplift GPU RT version for Linux CI" --body "Uplift GPU RT version for Linux to $NEW_DRIVER_VERSION"
33-
34-
update_driver_linux_staging:
35-
runs-on: ubuntu-latest
36-
if: github.repository == 'intel/llvm'
37-
steps:
38-
- uses: actions/checkout@v2
39-
- name: Update dependencies file
40-
run: |
41-
version="$(python3 devops/scripts/update_drivers.py linux_staging)"
42-
echo 'NEW_DRIVER_VERSION='$version >> $GITHUB_ENV
43-
- name: Update sycl Branch
44-
env:
45-
BRANCH: ci/update_gpu_driver-linux_staging-${{ env.NEW_DRIVER_VERSION }}
46-
LLVMBOT_TOKEN: ${{ secrets.LLVM_MAIN_SYNC_BBSYCL_TOKEN }}
47-
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
48-
run: |
49-
cd $GITHUB_WORKSPACE
50-
# Set fake identity to fulfil git requirements
51-
git config --global user.name "GitHub Actions"
52-
git config --global user.email "[email protected]"
53-
git checkout -B $BRANCH
54-
git add -u
55-
git commit -m "[GHA] Uplift GPU RT version for Nightly Builds" || exit 0 # exit if commit is empty
56-
git push https://[email protected]/${{ github.repository }} ${BRANCH}
57-
gh pr create --head $BRANCH --title "[GHA] Uplift GPU RT version for Nightly Builds" --body "Uplift GPU RT version for Linux to $NEW_DRIVER_VERSION"
58-
28+
git commit -m "[GHA] Uplift Linux GPU RT version to $NEW_DRIVER_VERSION" || exit 0 # exit if commit is empty
29+
git push https://[email protected]/${{ github.repository }} ${BRANCH}
30+
gh pr create --head $BRANCH --title "[GHA] Uplift Linux GPU RT version to $NEW_DRIVER_VERSION" --body "Scheduled drivers uplift"

clang/include/clang/Basic/Attr.td

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,18 +1247,18 @@ def CUDAShared : InheritableAttr {
12471247
}
12481248
def : MutualExclusions<[CUDAConstant, CUDAShared, HIPManaged]>;
12491249

1250+
def GlobalStorageNonLocalVar : SubsetSubject<Var,
1251+
[{S->hasGlobalStorage() &&
1252+
!S->isLocalVarDeclOrParm()}],
1253+
"global variables">;
1254+
12501255
def SYCLDevice : InheritableAttr {
12511256
let Spellings = [GNU<"sycl_device">];
1252-
let Subjects = SubjectList<[Function]>;
1257+
let Subjects = SubjectList<[Function, GlobalStorageNonLocalVar]>;
12531258
let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
12541259
let Documentation = [SYCLDeviceDocs];
12551260
}
12561261

1257-
def GlobalStorageNonLocalVar : SubsetSubject<Var,
1258-
[{S->hasGlobalStorage() &&
1259-
!S->isLocalVarDeclOrParm()}],
1260-
"global variables">;
1261-
12621262
def SYCLGlobalVar : InheritableAttr {
12631263
let Spellings = [GNU<"sycl_global_var">];
12641264
let Subjects = SubjectList<[GlobalStorageNonLocalVar], ErrorDiag>;
@@ -2401,6 +2401,18 @@ def SYCLIntelFPGANofusion : StmtAttr {
24012401
let Documentation = [SYCLIntelFPGANofusionAttrDocs];
24022402
}
24032403

2404+
def SYCLIntelFPGAMaxReinvocationDelay : StmtAttr {
2405+
let Spellings = [CXX11<"intel", "max_reinvocation_delay">];
2406+
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt],
2407+
ErrorDiag, "'for', 'while', and 'do' statements">;
2408+
let Args = [ExprArgument<"NExpr">];
2409+
let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
2410+
let IsStmtDependent = 1;
2411+
let Documentation = [SYCLIntelFPGAMaxReinvocationDelayAttrDocs];
2412+
}
2413+
def : MutualExclusions<[SYCLIntelFPGADisableLoopPipelining,
2414+
SYCLIntelFPGAMaxReinvocationDelay]>;
2415+
24042416
def IntelFPGALocalNonConstVar : SubsetSubject<Var,
24052417
[{S->hasLocalStorage() &&
24062418
S->getKind() != Decl::ImplicitParam &&

clang/include/clang/Basic/AttrDocs.td

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3314,7 +3314,7 @@ disables pipelining of the loop or function data path, causing the loop
33143314
or function to be executed serially. Cannot be used on the same loop or
33153315
function, or in conjunction with ``max_interleaving``,
33163316
``speculated_iterations``, ``max_concurrency``, ``initiation_interval``,
3317-
or ``ivdep``.
3317+
``ivdep``, or ``max_reinvocation_delay``.
33183318

33193319
.. code-block:: c++
33203320

@@ -3447,6 +3447,31 @@ loop should not be fused with any adjacent loop.
34473447
}];
34483448
}
34493449

3450+
def SYCLIntelFPGAMaxReinvocationDelayAttrDocs : Documentation {
3451+
let Category = DocCatVariable;
3452+
let Heading = "intel::max_reinvocation_delay";
3453+
let Content = [{
3454+
This attribute applies to a loop. Specifies the maximum number of cycles allowed
3455+
on the delay between the launch of the last iteration of a loop invocation and
3456+
the launch of the first iteration of a new loop invocation. Parameter N is
3457+
mandatory, and is a positive integer. Cannot be used on the same loop in
3458+
conjunction with disable_loop_pipelining.
3459+
3460+
.. code-block:: c++
3461+
3462+
void foo() {
3463+
int var = 0;
3464+
[[intel::max_reinvocation_delay(1)]]
3465+
for (int i = 0; sycl::log10((float)(x)) < 10; i++) var++;
3466+
}
3467+
3468+
template<int N>
3469+
void bar() {
3470+
[[intel::max_reinvocation_delay(N)]] for(;;) { }
3471+
}
3472+
}];
3473+
}
3474+
34503475
def SYCLIntelLoopFuseDocs : Documentation {
34513476
let Category = DocCatFunction;
34523477
let Heading = "loop_fuse, loop_fuse_independent";

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,9 @@ def err_drv_sycl_missing_amdgpu_arch : Error<
342342
def warn_drv_sycl_offload_target_duplicate : Warning<
343343
"SYCL offloading target '%0' is similar to target '%1' already specified; "
344344
"will be ignored">, InGroup<SyclTarget>;
345+
def warn_drv_sycl_target_missing : Warning<
346+
"linked binaries do not contain expected '%0' target; found targets: '%1'">,
347+
InGroup<SyclTarget>;
345348
def err_drv_failed_to_deduce_target_from_arch : Error<
346349
"failed to deduce triple for target architecture '%0'; specify the triple "
347350
"using '-fopenmp-targets' and '-Xopenmp-target' instead.">;

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11727,6 +11727,9 @@ def err_sycl_restrict : Error<
1172711727
"|use a const static or global variable that is neither zero-initialized "
1172811728
"nor constant-initialized"
1172911729
"}0">;
11730+
def err_sycl_external_global : Error<
11731+
"invalid reference to 'device_global' variable; external 'device_global'"
11732+
" variable must be marked with SYCL_EXTERNAL macro">;
1173011733
def warn_sycl_kernel_too_big_args : Warning<
1173111734
"size of kernel arguments (%0 bytes) may exceed the supported maximum "
1173211735
"of %1 bytes on some devices">, InGroup<SyclStrict>, ShowInSystemHeader;
@@ -11758,9 +11761,12 @@ def err_sycl_function_attribute_mismatch : Error<
1175811761
"SYCL kernel without %0 attribute can't call a function with this attribute">;
1175911762
def err_sycl_x_y_z_arguments_must_be_one : Error<
1176011763
"all %0 attribute arguments must be '1' when the %1 attribute argument is '0'">;
11761-
def err_sycl_attribute_internal_function
11764+
def err_sycl_attribute_internal_decl
1176211765
: Error<"%0 attribute cannot be applied to a "
11763-
"static function or function in an anonymous namespace">;
11766+
"static %select{function|variable}1 or %select{function|variable}1 "
11767+
"in an anonymous namespace">;
11768+
def err_sycl_attribute_not_device_global
11769+
: Error<"%0 attribute can only be applied to 'device_global' variables">;
1176411770
def err_sycl_compiletime_property_duplication : Error<
1176511771
"can't apply %0 property twice to the same accessor">;
1176611772
def err_sycl_invalid_property_list_param_number : Error<

clang/include/clang/Driver/Driver.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,10 @@ class Driver {
769769
bool checkForOffloadStaticLib(Compilation &C,
770770
llvm::opt::DerivedArgList &Args) const;
771771

772+
/// Checks for any mismatch of targets and provided input binaries.
773+
void checkForOffloadMismatch(Compilation &C,
774+
llvm::opt::DerivedArgList &Args) const;
775+
772776
/// Track filename used for the FPGA dependency info.
773777
mutable llvm::StringMap<const std::string> FPGATempDepFiles;
774778

clang/include/clang/Driver/Options.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2867,6 +2867,12 @@ defm sycl_instrument_device_code
28672867
BothFlags<[CC1Option, CoreOption], " Instrumentation and Tracing "
28682868
"Technology (ITT) instrumentation intrinsics calls "
28692869
"(experimental)">>;
2870+
def fsycl_link_huge_device_code : Flag<["-"], "fsycl-link-huge-device-code">,
2871+
Group<sycl_Group>, HelpText<"Generate and use a custom linker script for huge"
2872+
" device code sections">;
2873+
def fno_sycl_link_huge_device_code : Flag<["-"], "fno-sycl-link-huge-device-code">,
2874+
Group<sycl_Group>, HelpText<"Do not generate or use a custom linker script"
2875+
" for huge device code sections (default)">;
28702876
defm sycl_id_queries_fit_in_int: BoolFOption<"sycl-id-queries-fit-in-int",
28712877
LangOpts<"SYCLValueFitInMaxInt">, DefaultTrue,
28722878
PosFlag<SetTrue, [], "Assume">, NegFlag<SetFalse, [], "Do not assume">,

clang/include/clang/Sema/Sema.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2287,6 +2287,9 @@ class Sema final {
22872287
Expr *E);
22882288
SYCLIntelFPGALoopCoalesceAttr *
22892289
BuildSYCLIntelFPGALoopCoalesceAttr(const AttributeCommonInfo &CI, Expr *E);
2290+
SYCLIntelFPGAMaxReinvocationDelayAttr *
2291+
BuildSYCLIntelFPGAMaxReinvocationDelayAttr(const AttributeCommonInfo &CI,
2292+
Expr *E);
22902293

22912294
bool CheckQualifiedFunctionForTypeId(QualType T, SourceLocation Loc);
22922295

clang/lib/CodeGen/CGLoopInfo.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,15 @@ MDNode *LoopInfo::createMetadata(
611611
llvm::Type::getInt32Ty(Ctx), VC.second))};
612612
LoopProperties.push_back(MDNode::get(Ctx, Vals));
613613
}
614+
615+
if (Attrs.SYCLMaxReinvocationDelayNCycles) {
616+
Metadata *Vals[] = {
617+
MDString::get(Ctx, "llvm.loop.intel.max_reinvocation_delay.count"),
618+
ConstantAsMetadata::get(
619+
ConstantInt::get(llvm::Type::getInt32Ty(Ctx),
620+
*Attrs.SYCLMaxReinvocationDelayNCycles))};
621+
LoopProperties.push_back(MDNode::get(Ctx, Vals));
622+
}
614623

615624
LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(),
616625
AdditionalLoopProperties.end());
@@ -645,6 +654,7 @@ void LoopAttributes::clear() {
645654
SYCLMaxInterleavingNInvocations.reset();
646655
SYCLSpeculatedIterationsNIterations.reset();
647656
SYCLIntelFPGAVariantCount.clear();
657+
SYCLMaxReinvocationDelayNCycles.reset();
648658
UnrollCount = 0;
649659
UnrollAndJamCount = 0;
650660
VectorizeEnable = LoopAttributes::Unspecified;
@@ -681,6 +691,7 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
681691
!Attrs.SYCLMaxInterleavingNInvocations &&
682692
!Attrs.SYCLSpeculatedIterationsNIterations &&
683693
Attrs.SYCLIntelFPGAVariantCount.empty() && Attrs.UnrollCount == 0 &&
694+
!Attrs.SYCLMaxReinvocationDelayNCycles &&
684695
Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled &&
685696
Attrs.PipelineInitiationInterval == 0 &&
686697
Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified &&
@@ -1012,6 +1023,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
10121023
// emitted
10131024
// For attribute nofusion:
10141025
// 'llvm.loop.fusion.disable' metadata will be emitted
1026+
// For attribute max_reinvocation_delay:
1027+
// n - 'llvm.loop.intel.max_reinvocation_delay.count, i32 n' metadata will be
1028+
// emitted
10151029
for (const auto *A : Attrs) {
10161030
if (const auto *IntelFPGAIVDep = dyn_cast<SYCLIntelFPGAIVDepAttr>(A))
10171031
addSYCLIVDepInfo(Header->getContext(), IntelFPGAIVDep->getSafelenValue(),
@@ -1076,6 +1090,14 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
10761090

10771091
if (isa<SYCLIntelFPGANofusionAttr>(A))
10781092
setSYCLNofusionEnable();
1093+
1094+
if (const auto *IntelFPGAMaxReinvocationDelay =
1095+
dyn_cast<SYCLIntelFPGAMaxReinvocationDelayAttr>(A)) {
1096+
const auto *CE = cast<ConstantExpr>(
1097+
IntelFPGAMaxReinvocationDelay->getNExpr());
1098+
llvm::APSInt ArgVal = CE->getResultAsAPSInt();
1099+
setSYCLMaxReinvocationDelayNCycles(ArgVal.getSExtValue());
1100+
}
10791101
}
10801102

10811103
setMustProgress(MustProgress);

clang/lib/CodeGen/CGLoopInfo.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ struct LoopAttributes {
134134
/// Value for llvm.loop.intel.speculated.iterations.count metadata.
135135
llvm::Optional<unsigned> SYCLSpeculatedIterationsNIterations;
136136

137+
// Value for llvm.loop.intel.max_reinvocation_delay metadata.
138+
llvm::Optional<unsigned> SYCLMaxReinvocationDelayNCycles;
139+
137140
/// llvm.unroll.
138141
unsigned UnrollCount;
139142

@@ -410,6 +413,11 @@ class LoopInfoStack {
410413
/// Set no progress for the next loop pushed.
411414
void setMustProgress(bool P) { StagedAttrs.MustProgress = P; }
412415

416+
/// Set value of max reinvocation delay for the next loop pushed.
417+
void setSYCLMaxReinvocationDelayNCycles(unsigned C) {
418+
StagedAttrs.SYCLMaxReinvocationDelayNCycles = C;
419+
}
420+
413421
private:
414422
/// Returns true if there is LoopInfo on the stack.
415423
bool hasInfo() const { return !Active.empty(); }

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -840,13 +840,15 @@ void CodeGenModule::Release() {
840840
llvm::MDString::get(Ctx, CodeGenOpts.MemoryProfileOutput));
841841
}
842842

843-
if ((LangOpts.CUDAIsDevice || LangOpts.isSYCL()) && getTriple().isNVPTX()) {
843+
if ((LangOpts.CUDAIsDevice || LangOpts.SYCLIsDevice) && getTriple().isNVPTX()) {
844844
// Indicate whether __nvvm_reflect should be configured to flush denormal
845845
// floating point values to 0. (This corresponds to its "__CUDA_FTZ"
846846
// property.)
847847
getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz",
848-
CodeGenOpts.FP32DenormalMode.Output !=
849-
llvm::DenormalMode::IEEE);
848+
(CodeGenOpts.FP32DenormalMode.Output !=
849+
llvm::DenormalMode::IEEE) ||
850+
(CodeGenOpts.FPDenormalMode.Output !=
851+
llvm::DenormalMode::IEEE));
850852
getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-prec-sqrt",
851853
getTarget().getTargetOpts().NVVMCudaPrecSqrt);
852854
}

clang/lib/CodeGen/CodeGenTypes.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,54 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD,
5151
StringRef suffix) {
5252
SmallString<256> TypeName;
5353
llvm::raw_svector_ostream OS(TypeName);
54+
// If RD is spirv_JointMatrixINTEL type, mangle differently.
55+
if (CGM.getTriple().isSPIRV() || CGM.getTriple().isSPIR()) {
56+
if (RD->getQualifiedNameAsString() == "__spv::__spirv_JointMatrixINTEL") {
57+
if (auto TemplateDecl = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
58+
ArrayRef<TemplateArgument> TemplateArgs =
59+
TemplateDecl->getTemplateArgs().asArray();
60+
OS << "spirv.JointMatrixINTEL.";
61+
for (auto &TemplateArg : TemplateArgs) {
62+
OS << "_";
63+
if (TemplateArg.getKind() == TemplateArgument::Type) {
64+
llvm::Type *TTy = ConvertType(TemplateArg.getAsType());
65+
if (TTy->isIntegerTy()) {
66+
switch (TTy->getIntegerBitWidth()) {
67+
case 8:
68+
OS << "char";
69+
break;
70+
case 16:
71+
OS << "short";
72+
break;
73+
case 32:
74+
OS << "int";
75+
break;
76+
case 64:
77+
OS << "long";
78+
break;
79+
default:
80+
OS << "i" << TTy->getIntegerBitWidth();
81+
break;
82+
}
83+
} else if (TTy->isBFloatTy())
84+
OS << "bfloat16";
85+
else if (TTy->isStructTy()) {
86+
StringRef LlvmTyName = TTy->getStructName();
87+
// Emit half/bfloat16 for sycl[::*]::{half,bfloat16}
88+
if (LlvmTyName.startswith("class.sycl::") ||
89+
LlvmTyName.startswith("class.__sycl_internal::"))
90+
LlvmTyName = LlvmTyName.rsplit("::").second;
91+
OS << LlvmTyName;
92+
} else
93+
TTy->print(OS, false, true);
94+
} else if (TemplateArg.getKind() == TemplateArgument::Integral)
95+
OS << TemplateArg.getAsIntegral();
96+
}
97+
Ty->setName(OS.str());
98+
return;
99+
}
100+
}
101+
}
54102
OS << RD->getKindName() << '.';
55103

56104
// FIXME: We probably want to make more tweaks to the printing policy. For

0 commit comments

Comments
 (0)