Skip to content

Commit 9431687

Browse files
committed
PR feedback
1 parent 27d973e commit 9431687

File tree

7 files changed

+120
-127
lines changed

7 files changed

+120
-127
lines changed

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -758,23 +758,23 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
758758
llvm::MDNode::get(Context, AttrMDArgs));
759759
}
760760

761-
if (const auto *A = FD->getAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
762-
const auto *CE = cast<ConstantExpr>(A->getValue());
761+
auto attrAsMDArg = [&](Expr *E) {
762+
const auto *CE = cast<ConstantExpr>(E);
763763
std::optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt();
764-
llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
765-
Builder.getInt32(ArgVal->getSExtValue()))};
764+
assert(ArgVal.has_value() && "Failed to obtain attribute value.");
765+
return llvm::ConstantAsMetadata::get(
766+
Builder.getInt32(ArgVal->getSExtValue()));
767+
};
768+
769+
if (const auto *A = FD->getAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
766770
Fn->setMetadata("min_work_groups_per_cu",
767-
llvm::MDNode::get(Context, AttrMDArgs));
771+
llvm::MDNode::get(Context, {attrAsMDArg(A->getValue())}));
768772
}
769773

770774
if (const auto *A =
771775
FD->getAttr<SYCLIntelMaxWorkGroupsPerMultiprocessorAttr>()) {
772-
const auto *CE = cast<ConstantExpr>(A->getValue());
773-
std::optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt();
774-
llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
775-
Builder.getInt32(ArgVal->getSExtValue()))};
776776
Fn->setMetadata("max_work_groups_per_mp",
777-
llvm::MDNode::get(Context, AttrMDArgs));
777+
llvm::MDNode::get(Context, {attrAsMDArg(A->getValue())}));
778778
}
779779

780780
if (const SYCLIntelMaxWorkGroupSizeAttr *A =

clang/lib/CodeGen/Targets/NVPTX.cpp

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -251,23 +251,24 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
251251
(*MWGS->getXDimVal()).getExtValue();
252252
if (MaxThreads > 0)
253253
addNVVMMetadata(F, "maxntidx", MaxThreads);
254-
}
255-
if (const auto *MWGPCU =
256-
FD->getAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
257-
auto *MinWorkGroups = MWGPCU->getValue();
258-
if (const auto *CE = dyn_cast<ConstantExpr>(MinWorkGroups)) {
259-
auto MinVal = CE->getResultAsAPSInt();
260-
// The value is guaranteed to be > 0, pass it to the metadata.
261-
addNVVMMetadata(F, "minnctapersm", MinVal.getExtValue());
262-
}
263-
}
264-
if (const auto *MWGPMP =
265-
FD->getAttr<SYCLIntelMaxWorkGroupsPerMultiprocessorAttr>()) {
266-
auto *MaxWorkGroups = MWGPMP->getValue();
267-
if (const auto *CE = dyn_cast<ConstantExpr>(MaxWorkGroups)) {
268-
auto MaxVal = CE->getResultAsAPSInt();
254+
255+
auto attrValue = [&](Expr *E) {
256+
const auto *CE = cast<ConstantExpr>(E);
257+
std::optional<llvm::APInt> Val = CE->getResultAsAPSInt();
258+
assert(Val.has_value() && "Failed to get attribute value.");
259+
return Val->getZExtValue();
260+
};
261+
262+
if (const auto *MWGPCU =
263+
FD->getAttr<SYCLIntelMinWorkGroupsPerComputeUnitAttr>()) {
269264
// The value is guaranteed to be > 0, pass it to the metadata.
270-
addNVVMMetadata(F, "maxclusterrank", MaxVal.getExtValue());
265+
addNVVMMetadata(F, "minnctapersm", attrValue(MWGPCU->getValue()));
266+
267+
if (const auto *MWGPMP =
268+
FD->getAttr<SYCLIntelMaxWorkGroupsPerMultiprocessorAttr>()) {
269+
// The value is guaranteed to be > 0, pass it to the metadata.
270+
addNVVMMetadata(F, "maxclusterrank", attrValue(MWGPMP->getValue()));
271+
}
271272
}
272273
}
273274
}

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 23 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "clang/AST/Mangle.h"
2323
#include "clang/AST/RecursiveASTVisitor.h"
2424
#include "clang/AST/Type.h"
25+
#include "clang/Basic/AttributeCommonInfo.h"
2526
#include "clang/Basic/CharInfo.h"
2627
#include "clang/Basic/Cuda.h"
2728
#include "clang/Basic/DarwinSDKInfo.h"
@@ -200,13 +201,18 @@ static unsigned getNumAttributeArgs(const ParsedAttr &AL) {
200201
return AL.getNumArgs() + AL.hasParsedType();
201202
}
202203

203-
/// A helper function to provide Attribute Location for the Attr types
204-
/// AND the ParsedAttr.
205-
template <typename AttrInfo>
206-
static std::enable_if_t<std::is_base_of_v<Attr, AttrInfo>, SourceLocation>
207-
getAttrLoc(const AttrInfo &AL) {
204+
/// Helper functions to provide Attribute Location for the Attr types,
205+
/// AttributeCommonInfo AND the ParsedAttr.
206+
template <typename T>
207+
static std::enable_if_t<std::is_base_of_v<Attr, T>, SourceLocation>
208+
getAttrLoc(const T &AL) {
208209
return AL.getLocation();
209210
}
211+
template <typename T,
212+
std::enable_if_t<std::is_same_v<AttributeCommonInfo, T>, bool> = true>
213+
static SourceLocation getAttrLoc(const T &AL) {
214+
return AL.getScopeLoc();
215+
}
210216
static SourceLocation getAttrLoc(const ParsedAttr &AL) { return AL.getLoc(); }
211217

212218
/// If Expr is a valid integer constant, get the value of the integer
@@ -4447,26 +4453,6 @@ void Sema::AddSYCLIntelMaxGlobalWorkDimAttr(Decl *D,
44474453
D->addAttr(::new (Context) SYCLIntelMaxGlobalWorkDimAttr(Context, CI, E));
44484454
}
44494455

4450-
// Check that the attribute is an integer constant that can fit in 32-bits.
4451-
// Issue correct error message and return false on failure.
4452-
bool static check32BitInt(const Expr *E, const AttributeCommonInfo &CI,
4453-
Sema &S) {
4454-
std::optional<llvm::APSInt> I = llvm::APSInt(64);
4455-
if (!(I = E->getIntegerConstantExpr(S.Context))) {
4456-
S.Diag(E->getExprLoc(), diag::err_attribute_argument_n_type)
4457-
<< CI << 0 << AANT_ArgumentIntegerConstant << E->getSourceRange();
4458-
return false;
4459-
}
4460-
// Make sure we can fit it in 32 bits.
4461-
if (!I->isIntN(32)) {
4462-
S.Diag(E->getExprLoc(), diag::err_ice_too_large)
4463-
<< toString(*I, 10, false) << 32 << /* Unsigned */ 1;
4464-
return false;
4465-
}
4466-
4467-
return true;
4468-
}
4469-
44704456
void Sema::AddSYCLIntelMinWorkGroupsPerComputeUnitAttr(
44714457
Decl *D, const AttributeCommonInfo &CI, Expr *E) {
44724458
if (Context.getLangOpts().SYCLIsDevice &&
@@ -4476,24 +4462,22 @@ void Sema::AddSYCLIntelMinWorkGroupsPerComputeUnitAttr(
44764462
return;
44774463
}
44784464
if (!E->isValueDependent()) {
4479-
if (!check32BitInt(E, CI, *this))
4465+
uint32_t Val;
4466+
if (!checkUInt32Argument(*this, CI, E, Val, UINT_MAX /* Idx */,
4467+
true /* StrictlyUnsigned */))
44804468
return;
4469+
44814470
// Validate that we have an integer constant expression and then store the
44824471
// converted constant expression into the semantic attribute so that we
44834472
// don't have to evaluate it again later.
44844473
llvm::APSInt ArgVal;
44854474
ExprResult Res = VerifyIntegerConstantExpression(E, &ArgVal);
44864475
if (Res.isInvalid())
44874476
return;
4477+
if (Val != ArgVal)
4478+
llvm_unreachable("Values must not differ.");
44884479
E = Res.get();
44894480

4490-
// This attribute must be greater than 0.
4491-
if (ArgVal <= 0) {
4492-
Diag(E->getBeginLoc(), diag::err_attribute_argument_is_zero)
4493-
<< CI << E->getSourceRange();
4494-
return;
4495-
}
4496-
44974481
// Check to see if there's a duplicate attribute with different values
44984482
// already applied to the declaration.
44994483
if (const auto *DeclAttr =
@@ -4543,8 +4527,11 @@ void Sema::AddSYCLIntelMaxWorkGroupsPerMultiprocessorAttr(
45434527
}
45444528
}
45454529
if (!E->isValueDependent()) {
4546-
if (!check32BitInt(E, CI, *this))
4530+
uint32_t Val;
4531+
if (!checkUInt32Argument(*this, CI, E, Val, UINT_MAX /* Idx */,
4532+
true /* StrictlyUnsigned */))
45474533
return;
4534+
45484535
// Validate that we have an integer constant expression and then store the
45494536
// converted constant expression into the semantic attribute so that we
45504537
// don't have to evaluate it again later.
@@ -4553,13 +4540,8 @@ void Sema::AddSYCLIntelMaxWorkGroupsPerMultiprocessorAttr(
45534540
if (Res.isInvalid())
45544541
return;
45554542
E = Res.get();
4556-
4557-
// This attribute must be greater than 0.
4558-
if (ArgVal <= 0) {
4559-
Diag(E->getBeginLoc(), diag::err_attribute_argument_is_zero)
4560-
<< CI << E->getSourceRange();
4561-
return;
4562-
}
4543+
if (Val != ArgVal)
4544+
llvm_unreachable("Values must not differ.");
45634545

45644546
// Check to see if there's a duplicate attribute with different values
45654547
// already applied to the declaration.

clang/test/CodeGenSYCL/launch_bounds_nvptx.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1-
// REQUIRES: cuda
1+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2017 -triple nvptx-unknown-unknown -target-cpu sm_90 -disable-llvm-passes -S -emit-llvm -o - %s | FileCheck %s
22

3-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2017 -triple nvptx-unknown-unknown -target-cpu sm_90 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
3+
// Test correct handling of maximum work group size, minimum work groups per
4+
// compute unit and maximum work groups per multi-processor attributes, that
5+
// correspond to CUDA's launch bounds. Expect max_work_group_size,
6+
// min_work_groups_per_cu and max_work_groups_per_mp that are mapped to
7+
// maxntidx, minnctapersm, maxclusterrank PTX directives respectively.
48

59
#include "sycl.hpp"
610

Lines changed: 51 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,51 @@
1-
// REQUIRES: cuda
2-
3-
// RUN: %clangxx -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_90 -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR
4-
// RUN: %clangxx -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_90 -fsycl -fsyntax-only -Xclang -verify %s
5-
// expected-no-diagnostics
6-
7-
#include <sycl/sycl.hpp>
8-
9-
template <int N1, int N2, int N3> class Functor {
10-
public:
11-
[[intel::max_work_group_size(1, 1, N1), intel::min_work_groups_per_cu(N2),
12-
intel::max_work_groups_per_mp(N3)]] void
13-
operator()() const {}
14-
};
15-
16-
int main() {
17-
sycl::queue Q{};
18-
19-
sycl::range<1> Gws(32);
20-
sycl::range<1> Lws(32);
21-
22-
Q.submit([&](sycl::handler &cgh) {
23-
cgh.parallel_for(sycl::nd_range<1>(Gws, Lws),
24-
[=](sycl::id<1>) [[intel::max_work_group_size(1, 1, 256),
25-
intel::min_work_groups_per_cu(2),
26-
intel::max_work_groups_per_mp(4)]] {
27-
volatile int A = 42;
28-
});
29-
}).wait_and_throw();
30-
// CHECK-IR: !min_work_groups_per_cu [[MWGPCU:![0-9]+]]
31-
// CHECK-IR: !max_work_groups_per_mp [[MWGPMP:![0-9]+]]
32-
// CHECK-IR: !max_work_group_size [[MWGS:![0-9]+]]
33-
34-
Q.single_task<class F>(Functor<512, 8, 16>{}).wait();
35-
// CHECK-IR: !min_work_groups_per_cu [[MWGPCU_F:![0-9]+]]
36-
// CHECK-IR: !max_work_groups_per_mp [[MWGPMP_F:![0-9]+]]
37-
// CHECK-IR: !max_work_group_size [[MWGS_F:![0-9]+]]
38-
39-
// CHECK-IR: [[MWGPCU]] = !{i32 2}
40-
// CHECK-IR: [[MWGPMP]] = !{i32 4}
41-
// CHECK-IR: [[MWGS]] = !{i32 256, i32 1, i32 1}
42-
43-
// CHECK-IR: [[MWGPCU_F]] = !{i32 8}
44-
// CHECK-IR: [[MWGPMP_F]] = !{i32 16}
45-
// CHECK-IR: [[MWGS_F]] = !{i32 512, i32 1, i32 1}
46-
47-
return 0;
48-
}
1+
// RUN: %clang_cc1 -internal-isystem %S/Inputs -triple nvptx-unknown-unknown -target-cpu sm_90 -fsycl-is-device -S -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR
2+
// RUN: %clang_cc1 -internal-isystem %S/Inputs %s -triple nvptx64-nvidia-cuda -target-cpu sm_90 -fsycl-is-device -fsyntax-only -verify
3+
// expected-no-diagnostics
4+
5+
// Maximum work groups per multi-processor, mapped to maxclusterrank PTX
6+
// directive, is an SM_90 feature, make sure that correct metadata is generated
7+
// and no warnings/errors are issued.
8+
9+
#include "sycl.hpp"
10+
11+
template <int N1, int N2, int N3> class Functor {
12+
public:
13+
[[intel::max_work_group_size(1, 1, N1), intel::min_work_groups_per_cu(N2),
14+
intel::max_work_groups_per_mp(N3)]] void
15+
operator()() const {}
16+
};
17+
18+
int main() {
19+
sycl::queue Q{};
20+
21+
sycl::range<1> Gws(32);
22+
23+
Q.submit([&](sycl::handler &cgh) {
24+
cgh.parallel_for<class K1>(Gws,
25+
[=](sycl::id<1>) [[intel::max_work_group_size(1, 1, 256),
26+
intel::min_work_groups_per_cu(2),
27+
intel::max_work_groups_per_mp(4)]] {
28+
volatile int A = 42;
29+
});
30+
});
31+
// CHECK-IR: !min_work_groups_per_cu [[MWGPCU:![0-9]+]]
32+
// CHECK-IR: !max_work_groups_per_mp [[MWGPMP:![0-9]+]]
33+
// CHECK-IR: !max_work_group_size [[MWGS:![0-9]+]]
34+
35+
Q.submit([&](sycl::handler &cgh) {
36+
cgh.single_task<class F>(Functor<512, 8, 16>{});
37+
});
38+
// CHECK-IR: !min_work_groups_per_cu [[MWGPCU_F:![0-9]+]]
39+
// CHECK-IR: !max_work_groups_per_mp [[MWGPMP_F:![0-9]+]]
40+
// CHECK-IR: !max_work_group_size [[MWGS_F:![0-9]+]]
41+
42+
// CHECK-IR: [[MWGPCU]] = !{i32 2}
43+
// CHECK-IR: [[MWGPMP]] = !{i32 4}
44+
// CHECK-IR: [[MWGS]] = !{i32 256, i32 1, i32 1}
45+
46+
// CHECK-IR: [[MWGPCU_F]] = !{i32 8}
47+
// CHECK-IR: [[MWGPMP_F]] = !{i32 16}
48+
// CHECK-IR: [[MWGS_F]] = !{i32 512, i32 1, i32 1}
49+
50+
return 0;
51+
}

clang/test/SemaSYCL/lb_sm_70.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1-
// REQUIRES: cuda
1+
// RUN: %clang_cc1 -internal-isystem %S/Inputs -triple nvptx-unknown-unknown -target-cpu sm_70 -fsycl-is-device -S -emit-llvm %s -o -ferror-limit=100 -fsyntax-only -verify %s
22

3-
// RUN: %clangxx -ferror-limit=100 -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_70 -fsycl-device-only -fsyntax-only -Xclang -verify %s
3+
// Maximum work groups per multi-processor, mapped to maxclusterrank PTX
4+
// directive, is an SM_90 feature, make sure that correct warning is issued on
5+
// architectures lower than that. Furthermore, warn/error incorrect values
6+
// specified for max_work_groups_per_mp and min_work_groups_per_cu.
47

5-
#include <sycl/sycl.hpp>
8+
#include "sycl.hpp"
69

710
template <int N1, int N2, int N3> class Functor {
811
public:
@@ -24,15 +27,15 @@ int main() {
2427
intel::max_work_groups_per_mp(4)]] { volatile int A = 42; });
2528

2629
constexpr float A = 2.0;
27-
// expected-error@+5 {{'min_work_groups_per_cu' attribute requires parameter 0 to be an integer constant}}
30+
// expected-error@+5 {{'min_work_groups_per_cu' attribute requires an integer constant}}
2831
// expected-warning@+5 {{'maxclusterrank' requires sm_90 or higher, CUDA arch provided: sm_70, ignoring 'max_work_groups_per_mp' attribute}}
2932
cgh.single_task<class T2>(
3033
[=]()
3134
[[intel::max_work_group_size(1, 1, 256),
3235
intel::min_work_groups_per_cu(A),
3336
intel::max_work_groups_per_mp(4)]] { volatile int A = 42; });
3437

35-
// expected-error@+3 {{'min_work_groups_per_cu' attribute requires parameter 0 to be an integer constant}}
38+
// expected-error@+3 {{'min_work_groups_per_cu' attribute requires an integer constant}}
3639
cgh.single_task<class T3>(
3740
[=]() [[intel::max_work_group_size(1, 1, 256),
3841
intel::min_work_groups_per_cu(2147483647 + 1)]]
@@ -46,13 +49,15 @@ int main() {
4649
volatile int A = 42;
4750
});
4851

49-
// expected-error@+1 {{'min_work_groups_per_cu' attribute must be greater than 0}}
52+
// expected-error@+1 {{'min_work_groups_per_cu' attribute requires a non-negative integral compile time constant expression}}
5053
cgh.single_task<class T5>([=]() [[intel::min_work_groups_per_cu(-8)]] {
5154
volatile int A = 42;
5255
});
53-
}).wait_and_throw();
56+
});
5457

55-
Q.single_task<class F>(Functor<512, 8, 16>{}).wait();
58+
Q.submit([&](sycl::handler &cgh) {
59+
cgh.single_task<class F>(Functor<512, 8, 16>{});
60+
});
5661

5762
return 0;
5863
}

clang/test/SemaSYCL/lb_sm_90_ast.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
// REQUIERS: cuda
2-
31
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2017 -ast-dump -triple nvptx-unknown-unknown -target-cpu sm_90 %s | FileCheck %s
42

53
// Tests for AST of Intel max_work_group_size, min_work_groups_per_cu and

0 commit comments

Comments
 (0)