Skip to content

Commit ab66ccc

Browse files
committed
Merge from 'sycl' to 'sycl-web' (4 commits)
2 parents c281123 + 3d73d9b commit ab66ccc

File tree

27 files changed

+412
-187
lines changed

27 files changed

+412
-187
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
#include "llvm/SYCLLowerIR/RecordSYCLAspectNames.h"
5757
#include "llvm/SYCLLowerIR/SYCLAddOptLevelAttribute.h"
5858
#include "llvm/SYCLLowerIR/SYCLConditionalCallOnDevice.h"
59+
#include "llvm/SYCLLowerIR/SYCLCreateNVVMAnnotations.h"
5960
#include "llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h"
6061
#include "llvm/SYCLLowerIR/SYCLPropagateJointMatrixUsage.h"
6162
#include "llvm/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.h"
@@ -1156,6 +1157,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
11561157
// and before cleaning up metadata)
11571158
MPM.addPass(RecordSYCLAspectNamesPass());
11581159

1160+
if (TargetTriple.isNVPTX())
1161+
MPM.addPass(SYCLCreateNVVMAnnotationsPass());
1162+
11591163
// Remove SYCL metadata added by the frontend, like sycl_aspects
11601164
// Note, this pass should be at the end of the pipeline
11611165
MPM.addPass(CleanupSYCLMetadataPass());

clang/lib/CodeGen/Targets/NVPTX.cpp

Lines changed: 0 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -297,80 +297,6 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
297297
addNVVMMetadata(F, "grid_constant", GridConstantParamIdxs);
298298
}
299299
}
300-
bool HasMaxWorkGroupSize = false;
301-
bool HasMinWorkGroupPerCU = false;
302-
if (const auto *MWGS = FD->getAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
303-
HasMaxWorkGroupSize = true;
304-
// We must index-flip between SYCL's notation, X,Y,Z (aka dim0,dim1,dim2)
305-
// with the fastest-moving dimension rightmost, to CUDA's, where X is the
306-
// fastest-moving dimension.
307-
addNVVMMetadata(F, "maxntidx", MWGS->getZDimVal());
308-
addNVVMMetadata(F, "maxntidy", MWGS->getYDimVal());
309-
addNVVMMetadata(F, "maxntidz", MWGS->getXDimVal());
310-
}
311-
312-
if (const auto *RWGS = FD->getAttr<SYCLReqdWorkGroupSizeAttr>()) {
313-
llvm::SmallVector<std::optional<int64_t>, 3> Ops;
314-
// Index-flip and pad out any missing elements. Note the misleading
315-
// nomenclature of the methods: getXDimVal doesn't return the X dimension;
316-
// it returns the left-most dimension (dim0). This could correspond to
317-
// CUDA's X, Y, or Z, depending on the number of operands provided.
318-
if (auto Dim0 = RWGS->getXDimVal())
319-
Ops.push_back(Dim0->getExtValue());
320-
if (auto Dim1 = RWGS->getYDimVal())
321-
Ops.push_back(Dim1->getExtValue());
322-
if (auto Dim2 = RWGS->getZDimVal())
323-
Ops.push_back(Dim2->getExtValue());
324-
std::reverse(Ops.begin(), Ops.end());
325-
Ops.append(3 - Ops.size(), std::nullopt);
326-
327-
// Work-group sizes (in NVVM annotations) must be positive and less than
328-
// INT32_MAX, whereas SYCL can allow for larger work-group sizes (see
329-
// -fno-sycl-id-queries-fit-in-int). If any dimension is too large for
330-
// NVPTX, don't emit any annotation at all.
331-
if (llvm::all_of(Ops, [](std::optional<int64_t> V) {
332-
return !V || llvm::isUInt<31>(*V);
333-
})) {
334-
if (auto X = Ops[0])
335-
addNVVMMetadata(F, "reqntidx", *X);
336-
if (auto Y = Ops[1])
337-
addNVVMMetadata(F, "reqntidy", *Y);
338-
if (auto Z = Ops[2])
339-
addNVVMMetadata(F, "reqntidz", *Z);
340-
}
341-
}
342-
343-
auto attrValue = [&](Expr *E) {
344-
const auto *CE = cast<ConstantExpr>(E);
345-
std::optional<llvm::APInt> Val = CE->getResultAsAPSInt();
346-
return Val->getZExtValue();
347-
};
348-
349-
if (const auto *MWGPCU =
350-
FD->getAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
351-
if (!HasMaxWorkGroupSize && FD->hasAttr<OpenCLKernelAttr>()) {
352-
M.getDiags().Report(D->getLocation(),
353-
diag::warn_launch_bounds_missing_attr)
354-
<< MWGPCU << 0;
355-
} else {
356-
// The value is guaranteed to be > 0, pass it to the metadata.
357-
addNVVMMetadata(F, "minctasm", attrValue(MWGPCU->getValue()));
358-
HasMinWorkGroupPerCU = true;
359-
}
360-
}
361-
362-
if (const auto *MWGPMP =
363-
FD->getAttr<SYCLIntelMaxWorkGroupsPerMultiprocessorAttr>()) {
364-
if ((!HasMaxWorkGroupSize || !HasMinWorkGroupPerCU) &&
365-
FD->hasAttr<OpenCLKernelAttr>()) {
366-
M.getDiags().Report(D->getLocation(),
367-
diag::warn_launch_bounds_missing_attr)
368-
<< MWGPMP << 1;
369-
} else {
370-
// The value is guaranteed to be > 0, pass it to the metadata.
371-
addNVVMMetadata(F, "maxclusterrank", attrValue(MWGPMP->getValue()));
372-
}
373-
}
374300
}
375301

376302
// Perform special handling in CUDA mode.

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11271,6 +11271,14 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1127111271
CmdArgs.push_back(Args.MakeArgString(
1127211272
Twine("-sycl-device-library-location=") + DeviceLibDir));
1127311273

11274+
if (C.getDriver().isDumpDeviceCodeEnabled()) {
11275+
SmallString<128> DumpDir;
11276+
Arg *A = C.getArgs().getLastArg(options::OPT_fsycl_dump_device_code_EQ);
11277+
DumpDir = A ? A->getValue() : "";
11278+
CmdArgs.push_back(
11279+
Args.MakeArgString(Twine("-sycl-dump-device-code=") + DumpDir));
11280+
}
11281+
1127411282
auto appendOption = [](SmallString<128> &OptString, StringRef AddOpt) {
1127511283
if (!OptString.empty())
1127611284
OptString += " ";

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4086,11 +4086,17 @@ bool static check32BitInt(const Expr *E, Sema &S, llvm::APSInt &I,
40864086

40874087
void Sema::AddSYCLIntelMinWorkGroupsPerComputeUnitAttr(
40884088
Decl *D, const AttributeCommonInfo &CI, Expr *E) {
4089-
if (Context.getLangOpts().SYCLIsDevice &&
4090-
!Context.getTargetInfo().getTriple().isNVPTX()) {
4091-
Diag(E->getBeginLoc(), diag::warn_launch_bounds_is_cuda_specific)
4092-
<< CI << E->getSourceRange();
4093-
return;
4089+
if (Context.getLangOpts().SYCLIsDevice) {
4090+
if (!Context.getTargetInfo().getTriple().isNVPTX()) {
4091+
Diag(E->getBeginLoc(), diag::warn_launch_bounds_is_cuda_specific)
4092+
<< CI << E->getSourceRange();
4093+
return;
4094+
}
4095+
4096+
if (!D->hasAttr<SYCLIntelMaxWorkGroupSizeAttr>()) {
4097+
Diag(CI.getLoc(), diag::warn_launch_bounds_missing_attr) << CI << 0;
4098+
return;
4099+
}
40944100
}
40954101
if (!E->isValueDependent()) {
40964102
// Validate that we have an integer constant expression and then store the
@@ -4152,6 +4158,12 @@ void Sema::AddSYCLIntelMaxWorkGroupsPerMultiprocessorAttr(
41524158
<< OffloadArchToString(SM) << CI << E->getSourceRange();
41534159
return;
41544160
}
4161+
4162+
if (!D->hasAttr<SYCLIntelMaxWorkGroupSizeAttr>() ||
4163+
!D->hasAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
4164+
Diag(CI.getLoc(), diag::warn_launch_bounds_missing_attr) << CI << 1;
4165+
return;
4166+
}
41554167
}
41564168
if (!E->isValueDependent()) {
41574169
// Validate that we have an integer constant expression and then store the

clang/test/CodeGenSYCL/launch_bounds_nvptx.cpp

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -48,37 +48,6 @@ int main() {
4848
// CHECK: define dso_local void @{{.*}}kernel_name2() #0 {{.*}} !min_work_groups_per_cu ![[MWGPC:[0-9]+]] !max_work_groups_per_mp ![[MWGPM:[0-9]+]] !max_work_group_size ![[MWGS:[0-9]+]]
4949
// CHECK: define dso_local void @{{.*}}kernel_name3() #0 {{.*}} !min_work_groups_per_cu ![[MWGPC_MWGPM:[0-9]+]] !max_work_groups_per_mp ![[MWGPC_MWGPM]] !max_work_group_size ![[MWGS_2:[0-9]+]]
5050

51-
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxntidx", i32 8}
52-
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxntidy", i32 4}
53-
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxntidz", i32 2}
54-
// CHECK: {{.*}}@{{.*}}kernel_name1, !"minctasm", i32 2}
55-
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxclusterrank", i32 4}
56-
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxntidx", i32 8}
57-
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxntidy", i32 4}
58-
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxntidz", i32 2}
59-
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"minctasm", i32 2}
60-
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxclusterrank", i32 4}
61-
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxntidx", i32 8}
62-
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxntidy", i32 4}
63-
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxntidz", i32 2}
64-
// CHECK: {{.*}}@{{.*}}kernel_name2, !"minctasm", i32 2}
65-
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxclusterrank", i32 4}
66-
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxntidx", i32 8}
67-
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxntidy", i32 4}
68-
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxntidz", i32 2}
69-
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"minctasm", i32 2}
70-
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxclusterrank", i32 4}
71-
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxntidx", i32 8}
72-
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxntidy", i32 4}
73-
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxntidz", i32 6}
74-
// CHECK: {{.*}}@{{.*}}kernel_name3, !"minctasm", i32 6}
75-
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxclusterrank", i32 6}
76-
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxntidx", i32 8}
77-
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxntidy", i32 4}
78-
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxntidz", i32 6}
79-
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"minctasm", i32 6}
80-
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxclusterrank", i32 6}
81-
8251
// CHECK: ![[MWGPC]] = !{i32 2}
8352
// CHECK: ![[MWGPM]] = !{i32 4}
8453
// CHECK: ![[MWGS]] = !{i32 8, i32 4, i32 2}

clang/test/CodeGenSYCL/reqd-work-group-size.cpp

Lines changed: 2 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
22
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
3-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NVPTX
4-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NVPTX
3+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
4+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
55

66
#include "sycl.hpp"
77

@@ -123,55 +123,6 @@ int main() {
123123
// CHECK: define {{.*}} void @{{.*}}kernel_name22() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D22:[0-9]+]]
124124
// CHECK: define {{.*}} void @{{.*}}kernel_name24() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D2:[0-9]+]]
125125

126-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name1, !"reqntidx", i32 16}
127-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name1, !"reqntidy", i32 16}
128-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name1, !"reqntidz", i32 32}
129-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name3, !"reqntidx", i32 8}
130-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name3, !"reqntidy", i32 8}
131-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name3, !"reqntidz", i32 8}
132-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name4, !"reqntidx", i32 2}
133-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name4, !"reqntidy", i32 2}
134-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name4, !"reqntidz", i32 2}
135-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name6, !"reqntidx", i32 2}
136-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name6, !"reqntidy", i32 8}
137-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name6, !"reqntidz", i32 1}
138-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name7, !"reqntidx", i32 16}
139-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name7, !"reqntidy", i32 16}
140-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name7, !"reqntidz", i32 32}
141-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name9, !"reqntidx", i32 8}
142-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name9, !"reqntidy", i32 8}
143-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name9, !"reqntidz", i32 8}
144-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name10, !"reqntidx", i32 2}
145-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name10, !"reqntidy", i32 2}
146-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name10, !"reqntidz", i32 2}
147-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name12, !"reqntidx", i32 2}
148-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name12, !"reqntidy", i32 8}
149-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name12, !"reqntidz", i32 1}
150-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name13, !"reqntidx", i32 16}
151-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name13, !"reqntidy", i32 32}
152-
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name13, !"reqntidz"
153-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name15, !"reqntidx", i32 8}
154-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name15, !"reqntidy", i32 8}
155-
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name15, !"reqntidz"
156-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name16, !"reqntidx", i32 2}
157-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name16, !"reqntidy", i32 2}
158-
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name16, !"reqntidz"
159-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name18, !"reqntidx", i32 8}
160-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name18, !"reqntidy", i32 1}
161-
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name18, !"reqntidz"
162-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name19, !"reqntidx", i32 32}
163-
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name19, !"reqntidy",
164-
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name19, !"reqntidz",
165-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name21, !"reqntidx", i32 8}
166-
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name21, !"reqntidy",
167-
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name21, !"reqntidz",
168-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name22, !"reqntidx", i32 2}
169-
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name22, !"reqntidy",
170-
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name22, !"reqntidz",
171-
// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name24, !"reqntidx", i32 1}
172-
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name24, !"reqntidy",
173-
// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name24, !"reqntidz",
174-
175126
// CHECK: ![[NDRWGS3D]] = !{i32 3}
176127
// CHECK: ![[WGSIZE3D32]] = !{i32 16, i32 16, i32 32}
177128
// CHECK: ![[WGSIZE3D88]] = !{i32 8, i32 8, i32 8}

clang/test/Driver/linker-wrapper-sycl.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@
4040
// RUN: clang-linker-wrapper -sycl-device-libraries=%t.devicelib.o -sycl-post-link-options="SYCL_POST_LINK_OPTIONS" -llvm-spirv-options="LLVM_SPIRV_OPTIONS" "--host-triple=x86_64-unknown-linux-gnu" "--triple=spir64" "--linker-path=/usr/bin/ld" -shared "--" HOST_LINKER_FLAGS "-dynamic-linker" HOST_DYN_LIB "-o" "a.out" HOST_LIB_PATH HOST_STAT_LIB %t.o --dry-run 2>&1 | FileCheck -check-prefix=CHK-SHARED %s
4141
// CHK-SHARED: "{{.*}}llc"{{.*}} -relocation-model=pic
4242

43+
// RUN: rm %T/linker_wrapper_dump || true
44+
// RUN: clang-linker-wrapper -sycl-dump-device-code=%T/linker_wrapper_dump -sycl-device-libraries=%t.devicelib.o "--host-triple=x86_64-unknown-linux-gnu" "--triple=spir64" "--linker-path=/usr/bin/ld" -shared "--" HOST_LINKER_FLAGS "-dynamic-linker" HOST_DYN_LIB "-o" "a.out" HOST_LIB_PATH HOST_STAT_LIB %t.o --dry-run
45+
// RUN: ls %T/linker_wrapper_dump | FileCheck -check-prefix=CHK-SYCL-DUMP-DEVICE %s
46+
// CHK-SYCL-DUMP-DEVICE: {{.*}}.spv
47+
4348
/// Check for list of commands for standalone clang-linker-wrapper run for sycl (AOT for Intel GPU)
4449
// -------
4550
// Generate .o file as linker wrapper input.
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// SYCL offloading tests using -fsycl-dump-device-code
2+
3+
// Verify that -fsycl-dump-device-code passes the option to
4+
// clang-linker-wrapper in the new offload model.
5+
6+
// clang -fsycl --offload-new-driver -target x86_64-unknown-linux-gnu
7+
// RUN: %clang -fsycl --offload-new-driver -fno-sycl-instrument-device-code -fno-sycl-device-lib=all -target x86_64-unknown-linux-gnu -fsycl-dump-device-code=/user/input/path %s -### 2>&1 \
8+
// RUN: | FileCheck %s --check-prefixes=CHK-FSYCL-DUMP-DEVICE-CODE-NEW-OFFLOAD
9+
10+
// clang -fsycl --offload-new-driver -fsycl-targets=spir64-unknown-unknown
11+
// RUN: %clang -fsycl --offload-new-driver -fsycl-targets=spir64-unknown-unknown -target x86_64-unknown-linux-gnu -fsycl-dump-device-code=/user/input/path %s -### 2>&1 \
12+
// RUN: | FileCheck %s --check-prefixes=CHK-FSYCL-DUMP-DEVICE-CODE-NEW-OFFLOAD
13+
14+
// clang --driver-mode=g++
15+
// RUN: %clangxx -fsycl --offload-new-driver -fsycl-targets=spir64-unknown-unknown -target x86_64-unknown-linux-gnu -fsycl-dump-device-code=/user/input/path %s -### 2>&1 \
16+
// RUN: | FileCheck %s --check-prefixes=CHK-FSYCL-DUMP-DEVICE-CODE-NEW-OFFLOAD
17+
18+
// Windows
19+
// RUN: %clang_cl -fsycl --offload-new-driver -fsycl-dump-device-code=/user/input/path %s -### 2>&1 \
20+
// RUN: | FileCheck %s --check-prefixes=CHK-FSYCL-DUMP-DEVICE-CODE-WIN-NEW-OFFLOAD
21+
22+
// CHK-FSYCL-DUMP-DEVICE-CODE-NEW-OFFLOAD: clang-linker-wrapper{{.*}} "-sycl-dump-device-code=/user/input/path"
23+
// CHK-FSYCL-DUMP-DEVICE-CODE-WIN-NEW-OFFLOAD: clang-linker-wrapper{{.*}} "-sycl-dump-device-code=/user/input/path"
24+
25+
// Linux
26+
// RUN: %clang -fsycl --offload-new-driver -fsycl-targets=spir64-unknown-unknown -target x86_64-unknown-linux-gnu -fsycl-dump-device-code= %s -### 2>&1 \
27+
// RUN: | FileCheck %s --check-prefixes=CHK-FSYCL-DUMP-DEVICE-CODE-CWD-NEW-OFFLOAD
28+
29+
// Windows
30+
// RUN: %clang_cl -fsycl --offload-new-driver -fsycl-dump-device-code= %s -### 2>&1 \
31+
// RUN: | FileCheck %s --check-prefixes=CHK-FSYCL-DUMP-DEVICE-CODE-WIN-CWD-NEW-OFFLOAD
32+
33+
// CHK-FSYCL-DUMP-DEVICE-CODE-CWD-NEW-OFFLOAD: clang-linker-wrapper{{.*}} "-sycl-dump-device-code="
34+
// CHK-FSYCL-DUMP-DEVICE-CODE-WIN-CWD-NEW-OFFLOAD: clang-linker-wrapper{{.*}} "-sycl-dump-device-code="

clang/test/SemaSYCL/lb_sm_70.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,14 @@ int main() {
4848
intel::min_work_groups_per_cu(4),
4949
intel::min_work_groups_per_cu(8)]] () { volatile int A = 42; });
5050

51-
// expected-error@+2 {{'min_work_groups_per_cu' attribute requires a non-negative integral compile time constant expression}}
51+
// expected-warning@+2 {{'min_work_groups_per_cu' attribute ignored, as it requires: maximum work group size to be also specified}}
5252
cgh.single_task<class T5>(
53-
[=] [[intel::min_work_groups_per_cu(-8)]] () { volatile int A = 42; });
53+
[=] [[intel::min_work_groups_per_cu(8)]] () { volatile int A = 42; });
54+
55+
// expected-error@+3 {{'min_work_groups_per_cu' attribute requires a non-negative integral compile time constant expression}}
56+
cgh.single_task<class T5>(
57+
[=] [[intel::max_work_group_size(1, 1, 256),
58+
intel::min_work_groups_per_cu(-8)]] () { volatile int A = 42; });
5459
});
5560

5661
Q.submit([&](sycl::handler &cgh) {

clang/test/SemaSYCL/lb_sm_90.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ template <int N1, int N2, int N3> class Functor {
1515
operator()() const {}
1616
};
1717

18-
// expected-warning@+1 {{'max_work_groups_per_mp' attribute ignored, as it requires: maximum work group size and minimum work groups per compute unit to be also specified}}
1918
template <int N1, int N2> class Functor_2 {
2019
public:
20+
// expected-warning@+2 {{'max_work_groups_per_mp' attribute ignored, as it requires: maximum work group size and minimum work groups per compute unit to be also specified}}
2121
[[intel::max_work_group_size(1, 1, N1),
2222
intel::max_work_groups_per_mp(N2)]] void
2323
operator()() const {}
@@ -32,7 +32,7 @@ int main() {
3232
intel::min_work_groups_per_cu(2),
3333
intel::max_work_groups_per_mp(4)]] () { volatile int A = 42; });
3434

35-
// expected-warning@+2 {{'max_work_groups_per_mp' attribute ignored, as it requires: maximum work group size and minimum work groups per compute unit to be also specified}}
35+
// expected-warning@+3 {{'max_work_groups_per_mp' attribute ignored, as it requires: maximum work group size and minimum work groups per compute unit to be also specified}}
3636
cgh.single_task<class T2>(
3737
[=] [[intel::max_work_group_size(1, 1, 256),
3838
intel::max_work_groups_per_mp(4)]] () { volatile int A = 42; });

0 commit comments

Comments
 (0)