Skip to content

Commit 0afbcb2

Browse files
committed
Revert "[NVPTX] Add support for maxclusterrank in launch_bounds (#66496)"
This reverts commit dfab31b. SemaDeclAttr.cpp cannot depend on Basic's private headers (lib/Basic/Targets/NVPTX.h)
1 parent 13c603a commit 0afbcb2

File tree

15 files changed

+61
-262
lines changed

15 files changed

+61
-262
lines changed

clang/include/clang/Basic/Attr.td

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,8 +1267,7 @@ def CUDAInvalidTarget : InheritableAttr {
12671267

12681268
def CUDALaunchBounds : InheritableAttr {
12691269
let Spellings = [GNU<"launch_bounds">, Declspec<"__launch_bounds__">];
1270-
let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>,
1271-
ExprArgument<"MaxBlocks", 1>];
1270+
let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>];
12721271
let LangOpts = [CUDA];
12731272
let Subjects = SubjectList<[ObjCMethod, FunctionLike]>;
12741273
// An AST node is created for this attribute, but is not used by other parts

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11853,10 +11853,6 @@ def err_sycl_special_type_num_init_method : Error<
1185311853
"types with 'sycl_special_class' attribute must have one and only one '__init' "
1185411854
"method defined">;
1185511855

11856-
def warn_cuda_maxclusterrank_sm_90 : Warning<
11857-
"maxclusterrank requires sm_90 or higher, CUDA arch provided: %0, ignoring "
11858-
"%1 attribute">, InGroup<IgnoredAttributes>;
11859-
1186011856
def err_bit_int_bad_size : Error<"%select{signed|unsigned}0 _BitInt must "
1186111857
"have a bit size of at least %select{2|1}0">;
1186211858
def err_bit_int_max_size : Error<"%select{signed|unsigned}0 _BitInt of bit "

clang/include/clang/Sema/Sema.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11061,13 +11061,12 @@ class Sema final {
1106111061
/// Create an CUDALaunchBoundsAttr attribute.
1106211062
CUDALaunchBoundsAttr *CreateLaunchBoundsAttr(const AttributeCommonInfo &CI,
1106311063
Expr *MaxThreads,
11064-
Expr *MinBlocks,
11065-
Expr *MaxBlocks);
11064+
Expr *MinBlocks);
1106611065

1106711066
/// AddLaunchBoundsAttr - Adds a launch_bounds attribute to a particular
1106811067
/// declaration.
1106911068
void AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
11070-
Expr *MaxThreads, Expr *MinBlocks, Expr *MaxBlocks);
11069+
Expr *MaxThreads, Expr *MinBlocks);
1107111070

1107211071
/// AddModeAttr - Adds a mode attribute to a particular declaration.
1107311072
void AddModeAttr(Decl *D, const AttributeCommonInfo &CI, IdentifierInfo *Name,

clang/lib/Basic/Targets/NVPTX.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
181181

182182
bool hasBitIntType() const override { return true; }
183183
bool hasBFloat16Type() const override { return true; }
184-
185-
CudaArch getGPU() const { return GPU; }
186184
};
187185
} // namespace targets
188186
} // namespace clang

clang/lib/CodeGen/Targets/NVPTX.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -296,8 +296,8 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(
296296
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
297297
MaxThreads.getExtValue());
298298

299-
// min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
300-
// was not specified in __launch_bounds__ or if the user specified a 0 value,
299+
// min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
300+
// not specified in __launch_bounds__ or if the user specified a 0 value,
301301
// we don't have to add a PTX directive.
302302
if (Attr->getMinBlocks()) {
303303
llvm::APSInt MinBlocks(32);
@@ -307,14 +307,6 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(
307307
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
308308
MinBlocks.getExtValue());
309309
}
310-
if (Attr->getMaxBlocks()) {
311-
llvm::APSInt MaxBlocks(32);
312-
MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
313-
if (MaxBlocks > 0)
314-
// Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
315-
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
316-
MaxBlocks.getExtValue());
317-
}
318310
}
319311

320312
std::unique_ptr<TargetCodeGenInfo>

clang/lib/Parse/ParseOpenMP.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3739,8 +3739,7 @@ OMPClause *Parser::ParseOpenMPOMPXAttributesClause(bool ParseOnly) {
37393739
continue;
37403740
if (auto *A = Actions.CreateLaunchBoundsAttr(
37413741
PA, PA.getArgAsExpr(0),
3742-
PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr,
3743-
PA.getNumArgs() > 2 ? PA.getArgAsExpr(2) : nullptr))
3742+
PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr))
37443743
Attrs.push_back(A);
37453744
continue;
37463745
default:

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 9 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13-
#include "../Basic/Targets/NVPTX.h"
1413
#include "clang/AST/ASTConsumer.h"
1514
#include "clang/AST/ASTContext.h"
1615
#include "clang/AST/ASTMutationListener.h"
@@ -5609,13 +5608,6 @@ bool Sema::CheckRegparmAttr(const ParsedAttr &AL, unsigned &numParams) {
56095608
return false;
56105609
}
56115610

5612-
// Helper to get CudaArch.
5613-
static CudaArch getCudaArch(const TargetInfo &TI) {
5614-
if (!TI.getTriple().isNVPTX())
5615-
llvm_unreachable("getCudaArch is only valid for NVPTX triple");
5616-
return static_cast<const targets::NVPTXTargetInfo *>(&TI)->getGPU();
5617-
}
5618-
56195611
// Checks whether an argument of launch_bounds attribute is
56205612
// acceptable, performs implicit conversion to Rvalue, and returns
56215613
// non-nullptr Expr result on success. Otherwise, it returns nullptr
@@ -5659,51 +5651,34 @@ static Expr *makeLaunchBoundsArgExpr(Sema &S, Expr *E,
56595651

56605652
CUDALaunchBoundsAttr *
56615653
Sema::CreateLaunchBoundsAttr(const AttributeCommonInfo &CI, Expr *MaxThreads,
5662-
Expr *MinBlocks, Expr *MaxBlocks) {
5663-
CUDALaunchBoundsAttr TmpAttr(Context, CI, MaxThreads, MinBlocks, MaxBlocks);
5654+
Expr *MinBlocks) {
5655+
CUDALaunchBoundsAttr TmpAttr(Context, CI, MaxThreads, MinBlocks);
56645656
MaxThreads = makeLaunchBoundsArgExpr(*this, MaxThreads, TmpAttr, 0);
5665-
if (!MaxThreads)
5657+
if (MaxThreads == nullptr)
56665658
return nullptr;
56675659

56685660
if (MinBlocks) {
56695661
MinBlocks = makeLaunchBoundsArgExpr(*this, MinBlocks, TmpAttr, 1);
5670-
if (!MinBlocks)
5662+
if (MinBlocks == nullptr)
56715663
return nullptr;
56725664
}
56735665

5674-
if (MaxBlocks) {
5675-
// '.maxclusterrank' ptx directive requires .target sm_90 or higher.
5676-
auto SM = getCudaArch(Context.getTargetInfo());
5677-
if (SM == CudaArch::UNKNOWN || SM < CudaArch::SM_90) {
5678-
Diag(MaxBlocks->getBeginLoc(), diag::warn_cuda_maxclusterrank_sm_90)
5679-
<< CudaArchToString(SM) << CI << MaxBlocks->getSourceRange();
5680-
// Ignore it by setting MaxBlocks to null;
5681-
MaxBlocks = nullptr;
5682-
} else {
5683-
MaxBlocks = makeLaunchBoundsArgExpr(*this, MaxBlocks, TmpAttr, 2);
5684-
if (!MaxBlocks)
5685-
return nullptr;
5686-
}
5687-
}
5688-
56895666
return ::new (Context)
5690-
CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks, MaxBlocks);
5667+
CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks);
56915668
}
56925669

56935670
void Sema::AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
5694-
Expr *MaxThreads, Expr *MinBlocks,
5695-
Expr *MaxBlocks) {
5696-
if (auto *Attr = CreateLaunchBoundsAttr(CI, MaxThreads, MinBlocks, MaxBlocks))
5671+
Expr *MaxThreads, Expr *MinBlocks) {
5672+
if (auto *Attr = CreateLaunchBoundsAttr(CI, MaxThreads, MinBlocks))
56975673
D->addAttr(Attr);
56985674
}
56995675

57005676
static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
5701-
if (!AL.checkAtLeastNumArgs(S, 1) || !AL.checkAtMostNumArgs(S, 3))
5677+
if (!AL.checkAtLeastNumArgs(S, 1) || !AL.checkAtMostNumArgs(S, 2))
57025678
return;
57035679

57045680
S.AddLaunchBoundsAttr(D, AL, AL.getArgAsExpr(0),
5705-
AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr,
5706-
AL.getNumArgs() > 2 ? AL.getArgAsExpr(2) : nullptr);
5681+
AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr);
57075682
}
57085683

57095684
static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D,

clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -302,15 +302,7 @@ static void instantiateDependentCUDALaunchBoundsAttr(
302302
MinBlocks = Result.getAs<Expr>();
303303
}
304304

305-
Expr *MaxBlocks = nullptr;
306-
if (Attr.getMaxBlocks()) {
307-
Result = S.SubstExpr(Attr.getMaxBlocks(), TemplateArgs);
308-
if (Result.isInvalid())
309-
return;
310-
MaxBlocks = Result.getAs<Expr>();
311-
}
312-
313-
S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks, MaxBlocks);
305+
S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks);
314306
}
315307

316308
static void
Lines changed: 0 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,9 @@
11
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
2-
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -target-cpu sm_90 -DUSE_MAX_BLOCKS -fcuda-is-device -emit-llvm -o - | FileCheck -check-prefix=CHECK_MAX_BLOCKS %s
32

43
#include "Inputs/cuda.h"
54

65
#define MAX_THREADS_PER_BLOCK 256
76
#define MIN_BLOCKS_PER_MP 2
8-
#ifdef USE_MAX_BLOCKS
9-
#define MAX_BLOCKS_PER_MP 4
10-
#endif
117

128
// Test both max threads per block and Min cta per sm.
139
extern "C" {
@@ -21,21 +17,6 @@ Kernel1()
2117
// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256}
2218
// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"minctasm", i32 2}
2319

24-
#ifdef USE_MAX_BLOCKS
25-
// Test max threads per block and min/max cta per sm.
26-
extern "C" {
27-
__global__ void
28-
__launch_bounds__( MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP )
29-
Kernel1_sm_90()
30-
{
31-
}
32-
}
33-
34-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxntidx", i32 256}
35-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"minctasm", i32 2}
36-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxclusterrank", i32 4}
37-
#endif // USE_MAX_BLOCKS
38-
3920
// Test only max threads per block. Min cta per sm defaults to 0, and
4021
// CodeGen doesn't output a zero value for minctasm.
4122
extern "C" {
@@ -69,20 +50,6 @@ template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
6950
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
7051
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
7152

72-
#ifdef USE_MAX_BLOCKS
73-
template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
74-
__global__ void
75-
__launch_bounds__(max_threads_per_block, min_blocks_per_mp, max_blocks_per_mp)
76-
Kernel4_sm_90()
77-
{
78-
}
79-
template __global__ void Kernel4_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
80-
81-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxntidx", i32 256}
82-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"minctasm", i32 2}
83-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxclusterrank", i32 4}
84-
#endif //USE_MAX_BLOCKS
85-
8653
const int constint = 100;
8754
template <int max_threads_per_block, int min_blocks_per_mp>
8855
__global__ void
@@ -96,23 +63,6 @@ template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
9663
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
9764
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}
9865

99-
#ifdef USE_MAX_BLOCKS
100-
101-
template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
102-
__global__ void
103-
__launch_bounds__(max_threads_per_block + constint,
104-
min_blocks_per_mp + max_threads_per_block,
105-
max_blocks_per_mp + max_threads_per_block)
106-
Kernel5_sm_90()
107-
{
108-
}
109-
template __global__ void Kernel5_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
110-
111-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxntidx", i32 356}
112-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"minctasm", i32 258}
113-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxclusterrank", i32 260}
114-
#endif //USE_MAX_BLOCKS
115-
11666
// Make sure we don't emit negative launch bounds values.
11767
__global__ void
11868
__launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP )
@@ -130,26 +80,7 @@ Kernel7()
13080
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"maxntidx",
13181
// CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"minctasm",
13282

133-
#ifdef USE_MAX_BLOCKS
134-
__global__ void
135-
__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP, -MAX_BLOCKS_PER_MP )
136-
Kernel7_sm_90()
137-
{
138-
}
139-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxntidx",
140-
// CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"minctasm",
141-
// CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxclusterrank",
142-
#endif // USE_MAX_BLOCKS
143-
14483
const char constchar = 12;
14584
__global__ void __launch_bounds__(constint, constchar) Kernel8() {}
14685
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100
14786
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12
148-
149-
#ifdef USE_MAX_BLOCKS
150-
const char constchar_2 = 14;
151-
__global__ void __launch_bounds__(constint, constchar, constchar_2) Kernel8_sm_90() {}
152-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxntidx", i32 100
153-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"minctasm", i32 12
154-
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxclusterrank", i32 14
155-
#endif // USE_MAX_BLOCKS

clang/test/SemaCUDA/launch_bounds.cu

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -triple nvptx-unknown-unknown -target-cpu sm_75 -verify %s
1+
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
22

33
#include "Inputs/cuda.h"
44

@@ -11,9 +11,8 @@ __launch_bounds__(0x10000000000000000) void TestWayTooBigArg(void); // expected-
1111

1212
__launch_bounds__(-128, 7) void TestNegArg1(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
1313
__launch_bounds__(128, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
14-
__launch_bounds__(128, 2, -8) void TestNegArg2(void); // expected-warning {{maxclusterrank requires sm_90 or higher, CUDA arch provided: sm_75, ignoring 'launch_bounds' attribute}}
1514

16-
__launch_bounds__(1, 2, 3, 4) void Test4Args(void); // expected-error {{'launch_bounds' attribute takes no more than 3 arguments}}
15+
__launch_bounds__(1, 2, 3) void Test3Args(void); // expected-error {{'launch_bounds' attribute takes no more than 2 arguments}}
1716
__launch_bounds__() void TestNoArgs(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}
1817

1918
int TestNoFunction __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to Objective-C methods, functions, and function pointers}}
@@ -48,5 +47,3 @@ __launch_bounds__(Args) void TestTemplateVariadicArgs(void) {} // expected-error
4847

4948
template <int... Args>
5049
__launch_bounds__(1, Args) void TestTemplateVariadicArgs2(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}}
51-
52-
__launch_bounds__(1, 2, 3) void Test3Args(void); // expected-warning {{maxclusterrank requires sm_90 or higher, CUDA arch provided: sm_75, ignoring 'launch_bounds' attribute}}

clang/test/SemaCUDA/launch_bounds_sm_90.cu

Lines changed: 0 additions & 57 deletions
This file was deleted.

0 commit comments

Comments
 (0)