Skip to content

Commit 311ba07

Browse files
committed
Merge remote-tracking branch 'origin/sycl' into private/asachkov/fix-integer-vec-conversions
This commit also fixes conversions to/from `std::byte`
2 parents e0d167c + 096676e commit 311ba07

File tree

325 files changed

+9618
-8682
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

325 files changed

+9618
-8682
lines changed

.github/CODEOWNERS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ xptifw/ @intel/llvm-reviewers-runtime
5959

6060
# DPC++ tools
6161
llvm/ @intel/dpcpp-tools-reviewers
62+
clang/lib/CodeGen/BackendUtil.cpp @intel/dpcpp-cfe-reviewers @intel/dpcpp-tools-reviewers
63+
clang/include/clang/CodeGen/BackendUtil.h @intel/dpcpp-cfe-reviewers @intel/dpcpp-tools-reviewers
6264

6365
# OpenCL CPU RT
6466
opencl-cpu/ @intel/ocl-cpu-rt-write

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ guidelines.
8181

8282
## Late-outline OpenMP\* and OpenMP\* Offload
8383

84-
See [openmp](/tree/openmp) branch.
84+
See [openmp](/openmp) branch.
8585

8686
# License
8787

clang/include/clang/Basic/DiagnosticCommonKinds.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,4 +436,10 @@ def warn_try_not_valid_on_target : Warning<
436436
"target '%0' does not support exception handling;"
437437
" 'catch' block is ignored">,
438438
InGroup<OpenMPTargetException>;
439+
440+
// Launch bound
441+
def warn_launch_bounds_missing_attr: Warning<
442+
"%0 attribute ignored, as it requires: maximum work group size"
443+
"%select{| and minimum work groups per compute unit}1 to be also specified">,
444+
InGroup<IgnoredAttributes>;
439445
}

clang/lib/CodeGen/Targets/NVPTX.cpp

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -245,29 +245,47 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
245245
// And kernel functions are not subject to inlining
246246
F->addFnAttr(llvm::Attribute::NoInline);
247247
}
248+
bool HasMaxWorkGroupSize = false;
249+
bool HasMinWorkGroupPerCU = false;
248250
if (const auto *MWGS = FD->getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
249251
auto MaxThreads = (*MWGS->getZDimVal()).getExtValue() *
250252
(*MWGS->getYDimVal()).getExtValue() *
251253
(*MWGS->getXDimVal()).getExtValue();
252-
if (MaxThreads > 0)
254+
if (MaxThreads > 0) {
253255
addNVVMMetadata(F, "maxntidx", MaxThreads);
256+
HasMaxWorkGroupSize = true;
257+
}
258+
}
254259

255-
auto attrValue = [&](Expr *E) {
256-
const auto *CE = cast<ConstantExpr>(E);
257-
std::optional<llvm::APInt> Val = CE->getResultAsAPSInt();
258-
return Val->getZExtValue();
259-
};
260-
261-
if (const auto *MWGPCU =
262-
FD->getAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
260+
auto attrValue = [&](Expr *E) {
261+
const auto *CE = cast<ConstantExpr>(E);
262+
std::optional<llvm::APInt> Val = CE->getResultAsAPSInt();
263+
return Val->getZExtValue();
264+
};
265+
266+
if (const auto *MWGPCU =
267+
FD->getAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
268+
if (!HasMaxWorkGroupSize && FD->hasAttr<OpenCLKernelAttr>()) {
269+
M.getDiags().Report(D->getLocation(),
270+
diag::warn_launch_bounds_missing_attr)
271+
<< MWGPCU << 0;
272+
} else {
263273
// The value is guaranteed to be > 0, pass it to the metadata.
264274
addNVVMMetadata(F, "minnctapersm", attrValue(MWGPCU->getValue()));
275+
HasMinWorkGroupPerCU = true;
276+
}
277+
}
265278

266-
if (const auto *MWGPMP =
267-
FD->getAttr<SYCLIntelMaxWorkGroupsPerMultiprocessorAttr>()) {
268-
// The value is guaranteed to be > 0, pass it to the metadata.
269-
addNVVMMetadata(F, "maxclusterrank", attrValue(MWGPMP->getValue()));
270-
}
279+
if (const auto *MWGPMP =
280+
FD->getAttr<SYCLIntelMaxWorkGroupsPerMultiprocessorAttr>()) {
281+
if ((!HasMaxWorkGroupSize || !HasMinWorkGroupPerCU) &&
282+
FD->hasAttr<OpenCLKernelAttr>()) {
283+
M.getDiags().Report(D->getLocation(),
284+
diag::warn_launch_bounds_missing_attr)
285+
<< MWGPMP << 1;
286+
} else {
287+
// The value is guaranteed to be > 0, pass it to the metadata.
288+
addNVVMMetadata(F, "maxclusterrank", attrValue(MWGPMP->getValue()));
271289
}
272290
}
273291
}

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5398,8 +5398,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
53985398
break;
53995399
}
54005400
}
5401-
if (HasFPGA) {
5401+
// At -O0, imply -fsycl-disable-range-rounding.
5402+
bool DisableRangeRounding = false;
5403+
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
5404+
if (A->getOption().matches(options::OPT_O0))
5405+
DisableRangeRounding = true;
5406+
}
5407+
if (DisableRangeRounding || HasFPGA)
54025408
CmdArgs.push_back("-fsycl-disable-range-rounding");
5409+
5410+
if (HasFPGA) {
54035411
// Pass -fintelfpga to both the host and device SYCL compilations if set.
54045412
CmdArgs.push_back("-fintelfpga");
54055413
}
@@ -6106,10 +6114,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
61066114
A->render(Args, CmdArgs);
61076115
}
61086116

6109-
if (Arg *A = Args.getLastArg(options::OPT_faltmathlib_EQ)) {
6110-
StringRef Name = A->getValue();
6117+
if (Arg *A = Args.getLastArg(options::OPT_faltmathlib_EQ))
61116118
A->render(Args, CmdArgs);
6112-
}
61136119

61146120
if (Args.hasFlag(options::OPT_fmerge_all_constants,
61156121
options::OPT_fno_merge_all_constants, false))

clang/test/Driver/sycl-offload.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,26 @@
428428
// RUN: | FileCheck -check-prefix=CHK-TOOLS-OPTS2 %s
429429
// CHK-TOOLS-OPTS2: clang-offload-wrapper{{.*}} "-link-opts=-DFOO1 -DFOO2"
430430

431+
/// -fsycl-disable-range-rounding settings
432+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \
433+
// RUN: -fsycl-targets=spir64 -O0 %s 2>&1 \
434+
// RUN: | FileCheck -check-prefix=CHK-DISABLE-RANGE-ROUNDING %s
435+
// RUN: %clang_cl -### -fsycl -fsycl-targets=spir64 -Od %s 2>&1 \
436+
// RUN: | FileCheck -check-prefix=CHK-DISABLE-RANGE-ROUNDING %s
437+
// CHK-DISABLE-RANGE-ROUNDING: "-fsycl-disable-range-rounding"
438+
439+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \
440+
// RUN: -fsycl-targets=spir64 -O2 %s 2>&1 \
441+
// RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s
442+
// RUN: %clang_cl -### -fsycl -fsycl-targets=spir64 -O2 %s 2>&1 \
443+
// RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s
444+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \
445+
// RUN: -fsycl-targets=spir64 %s 2>&1 \
446+
// RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s
447+
// RUN: %clang_cl -### -fsycl -fsycl-targets=spir64 %s 2>&1 \
448+
// RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s
449+
// CHK-RANGE-ROUNDING-NOT: "-fsycl-disable-range-rounding"
450+
431451
/// ###########################################################################
432452

433453
/// Verify that triple-boundarch pairs are correct with multi-targetting

clang/test/SemaSYCL/lb_sm_90.cpp

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
// RUN: %clang_cc1 -internal-isystem %S/Inputs %s -triple nvptx64-nvidia-cuda -target-cpu sm_90 -fsycl-is-device -fsyntax-only -Wno-c++23-extensions -verify
2-
// expected-no-diagnostics
1+
// RUN: %clang_cc1 -internal-isystem %S/Inputs %s -triple nvptx64-nvidia-cuda -target-cpu sm_90 -fsycl-is-device -fsyntax-only -Wno-c++23-extensions -verify -S
32

43
// Maximum work groups per multi-processor, mapped to maxclusterrank PTX
5-
// directive, is an SM_90 feature, make sure that no warnings/errors are issued.
4+
// directive, is an SM_90 feature. Attributes need to be used in sequence:
5+
// max_work_group_size, min_work_groups_per_cu, max_work_groups_per_mp, warn on
6+
// missing attributes in sequences.
67

78
#include "sycl.hpp"
89

@@ -13,19 +14,44 @@ template <int N1, int N2, int N3> class Functor {
1314
operator()() const {}
1415
};
1516

17+
// expected-warning@+1 {{'max_work_groups_per_mp' attribute ignored, as it requires: maximum work group size and minimum work groups per compute unit to be also specified}}
18+
template <int N1, int N2> class Functor_2 {
19+
public:
20+
[[intel::max_work_group_size(1, 1, N1),
21+
intel::max_work_groups_per_mp(N2)]] void
22+
operator()() const {}
23+
};
24+
1625
int main() {
1726
sycl::queue Q{};
1827

1928
Q.submit([&](sycl::handler &cgh) {
20-
cgh.single_task<class T1>( [=] [[intel::max_work_group_size(1, 1, 256),
21-
intel::min_work_groups_per_cu(2),
22-
intel::max_work_groups_per_mp(4)]] (
23-
) { volatile int A = 42; });
29+
cgh.single_task<class T1>(
30+
[=] [[intel::max_work_group_size(1, 1, 256),
31+
intel::min_work_groups_per_cu(2),
32+
intel::max_work_groups_per_mp(4)]] () { volatile int A = 42; });
33+
34+
// expected-warning@+2 {{'max_work_groups_per_mp' attribute ignored, as it requires: maximum work group size and minimum work groups per compute unit to be also specified}}
35+
cgh.single_task<class T2>(
36+
[=] [[intel::max_work_group_size(1, 1, 256),
37+
intel::max_work_groups_per_mp(4)]] () { volatile int A = 42; });
38+
39+
// expected-warning@+2 {{'max_work_groups_per_mp' attribute ignored, as it requires: maximum work group size and minimum work groups per compute unit to be also specified}}
40+
cgh.single_task<class T3>(
41+
[=] [[intel::max_work_groups_per_mp(4)]] () { volatile int A = 42; });
42+
43+
// expected-warning@+2 {{'min_work_groups_per_cu' attribute ignored, as it requires: maximum work group size to be also specified}} cgh.single_task<class T4>(
44+
cgh.single_task<class T4>(
45+
[=] [[intel::min_work_groups_per_cu(4)]] () { volatile int A = 42; });
2446
});
2547

2648
Q.submit([&](sycl::handler &cgh) {
2749
cgh.single_task<class F>(Functor<512, 8, 16>{});
2850
});
2951

52+
Q.submit([&](sycl::handler &cgh) {
53+
cgh.single_task<class F2>(Functor_2<512, 8>{});
54+
});
55+
3056
return 0;
3157
}

llvm/lib/SYCLLowerIR/CompileTimeProperties.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,8 @@ SYCL_COMPILE_TIME_PROPERTY("sycl-bi-directional-ports-true", 5885,
6060
// SPIR-V Spec: https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/INTEL/SPV_INTEL_cache_controls.asciidoc
6161
SYCL_COMPILE_TIME_PROPERTY("sycl-prefetch-hint", 6442, DecorValueTy::uint32)
6262
SYCL_COMPILE_TIME_PROPERTY("sycl-prefetch-hint-nt", 6442, DecorValueTy::uint32)
63+
64+
// The corresponding SPIR-V OpCodes for cache control properties
65+
SYCL_COMPILE_TIME_PROPERTY("sycl-cache-read-hint", 6442, DecorValueTy::uint32)
66+
SYCL_COMPILE_TIME_PROPERTY("sycl-cache-read-assertion", 6442, DecorValueTy::uint32)
67+
SYCL_COMPILE_TIME_PROPERTY("sycl-cache-write-hint", 6443, DecorValueTy::uint32)

0 commit comments

Comments
 (0)