Skip to content

Commit fa84297

Browse files
authored
[clang][CUDA] Add 'noconvergent' function and statement attribute
- For languages following SPMD/SIMT programming model, functions and call sites are marked 'convergent' by default. 'noconvergent' is added in this patch to allow developers to remove that 'convergent' attribute when it's safe. Reviewers: nhaehnle, Sirraide, yxsamliu, Artem-B, ilovepi, jayfoad, ssahasra, arsenm Reviewed By: arsenm Pull Request: #100637
1 parent 78b4b5c commit fa84297

File tree

9 files changed

+200
-37
lines changed

9 files changed

+200
-37
lines changed

clang/include/clang/Basic/Attr.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2050,6 +2050,17 @@ def Convergent : InheritableAttr {
20502050
let SimpleHandler = 1;
20512051
}
20522052

2053+
def NoConvergent : InheritableAttr {
2054+
let Spellings = [Clang<"noconvergent">, Declspec<"noconvergent">];
2055+
let Subjects = SubjectList<[Function, Stmt], WarnDiag,
2056+
"functions and statements">;
2057+
let LangOpts = [CUDA];
2058+
let Documentation = [NoConvergentDocs];
2059+
let SimpleHandler = 1;
2060+
}
2061+
2062+
def : MutualExclusions<[Convergent, NoConvergent]>;
2063+
20532064
def NoInline : DeclOrStmtAttr {
20542065
let Spellings = [CustomKeyword<"__noinline__">, GCC<"noinline">,
20552066
CXX11<"clang", "noinline">, C23<"clang", "noinline">,

clang/include/clang/Basic/AttrDocs.td

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1382,6 +1382,34 @@ Sample usage:
13821382
}];
13831383
}
13841384

1385+
def NoConvergentDocs : Documentation {
1386+
let Category = DocCatFunction;
1387+
let Content = [{
1388+
This attribute prevents a function from being treated as convergent, which
1389+
means that optimizations can only move calls to that function to
1390+
control-equivalent blocks. If a statement is marked as ``noconvergent`` and
1391+
contains calls, it also prevents those calls from being treated as convergent.
1392+
In other words, those calls are not restricted to only being moved to
1393+
control-equivalent blocks.
1394+
1395+
In languages following SPMD/SIMT programming model, e.g., CUDA/HIP, function
1396+
declarations and calls are treated as convergent by default for correctness.
1397+
This ``noconvergent`` attribute is helpful for developers to prevent them from
1398+
being treated as convergent when it's safe.
1399+
1400+
.. code-block:: c
1401+
1402+
__device__ float bar(float);
1403+
__device__ float foo(float) __attribute__((noconvergent)) {}
1404+
1405+
__device__ int example(void) {
1406+
float x;
1407+
[[clang::noconvergent]] x = bar(x);
1408+
}
1409+
1410+
}];
1411+
}
1412+
13851413
def NoSplitStackDocs : Documentation {
13861414
let Category = DocCatFunction;
13871415
let Content = [{

clang/lib/CodeGen/CGCall.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2522,6 +2522,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
25222522
}
25232523
}
25242524
}
2525+
// Remove 'convergent' if requested.
2526+
if (TargetDecl->hasAttr<NoConvergentAttr>())
2527+
FuncAttrs.removeAttribute(llvm::Attribute::Convergent);
25252528
}
25262529

25272530
// Add "sample-profile-suffix-elision-policy" attribute for internal linkage
@@ -5636,6 +5639,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
56365639
Attrs =
56375640
Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
56385641

5642+
// Remove call-site convergent attribute if requested.
5643+
if (InNoConvergentAttributedStmt)
5644+
Attrs =
5645+
Attrs.removeFnAttribute(getLLVMContext(), llvm::Attribute::Convergent);
5646+
56395647
// Apply some call-site-specific attributes.
56405648
// TODO: work this into building the attribute set.
56415649

clang/lib/CodeGen/CGStmt.cpp

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
723723
bool nomerge = false;
724724
bool noinline = false;
725725
bool alwaysinline = false;
726+
bool noconvergent = false;
726727
const CallExpr *musttail = nullptr;
727728

728729
for (const auto *A : S.getAttrs()) {
@@ -738,6 +739,9 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
738739
case attr::AlwaysInline:
739740
alwaysinline = true;
740741
break;
742+
case attr::NoConvergent:
743+
noconvergent = true;
744+
break;
741745
case attr::MustTail: {
742746
const Stmt *Sub = S.getSubStmt();
743747
const ReturnStmt *R = cast<ReturnStmt>(Sub);
@@ -756,6 +760,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
756760
SaveAndRestore save_nomerge(InNoMergeAttributedStmt, nomerge);
757761
SaveAndRestore save_noinline(InNoInlineAttributedStmt, noinline);
758762
SaveAndRestore save_alwaysinline(InAlwaysInlineAttributedStmt, alwaysinline);
763+
SaveAndRestore save_noconvergent(InNoConvergentAttributedStmt, noconvergent);
759764
SaveAndRestore save_musttail(MustTailCall, musttail);
760765
EmitStmt(S.getSubStmt(), S.getAttrs());
761766
}
@@ -2465,7 +2470,8 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str,
24652470

24662471
static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
24672472
bool HasUnwindClobber, bool ReadOnly,
2468-
bool ReadNone, bool NoMerge, const AsmStmt &S,
2473+
bool ReadNone, bool NoMerge, bool NoConvergent,
2474+
const AsmStmt &S,
24692475
const std::vector<llvm::Type *> &ResultRegTypes,
24702476
const std::vector<llvm::Type *> &ArgElemTypes,
24712477
CodeGenFunction &CGF,
@@ -2506,11 +2512,11 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
25062512
llvm::ConstantAsMetadata::get(Loc)));
25072513
}
25082514

2509-
if (CGF.getLangOpts().assumeFunctionsAreConvergent())
2515+
if (!NoConvergent && CGF.getLangOpts().assumeFunctionsAreConvergent())
25102516
// Conservatively, mark all inline asm blocks in CUDA or OpenCL as
25112517
// convergent (meaning, they may call an intrinsically convergent op, such
25122518
// as bar.sync, and so can't have certain optimizations applied around
2513-
// them).
2519+
// them) unless it's explicitly marked 'noconvergent'.
25142520
Result.addFnAttr(llvm::Attribute::Convergent);
25152521
// Extract all of the register value results from the asm.
25162522
if (ResultRegTypes.size() == 1) {
@@ -3040,9 +3046,10 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
30403046
if (IsGCCAsmGoto) {
30413047
CBR = Builder.CreateCallBr(IA, Fallthrough, Transfer, Args);
30423048
EmitBlock(Fallthrough);
3043-
UpdateAsmCallInst(*CBR, HasSideEffect, false, ReadOnly, ReadNone,
3044-
InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes,
3045-
*this, RegResults);
3049+
UpdateAsmCallInst(*CBR, HasSideEffect, /*HasUnwindClobber=*/false, ReadOnly,
3050+
ReadNone, InNoMergeAttributedStmt,
3051+
InNoConvergentAttributedStmt, S, ResultRegTypes,
3052+
ArgElemTypes, *this, RegResults);
30463053
// Because we are emitting code top to bottom, we don't have enough
30473054
// information at this point to know precisely whether we have a critical
30483055
// edge. If we have outputs, split all indirect destinations.
@@ -3070,15 +3077,17 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
30703077
}
30713078
} else if (HasUnwindClobber) {
30723079
llvm::CallBase *Result = EmitCallOrInvoke(IA, Args, "");
3073-
UpdateAsmCallInst(*Result, HasSideEffect, true, ReadOnly, ReadNone,
3074-
InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes,
3075-
*this, RegResults);
3080+
UpdateAsmCallInst(*Result, HasSideEffect, /*HasUnwindClobber=*/true,
3081+
ReadOnly, ReadNone, InNoMergeAttributedStmt,
3082+
InNoConvergentAttributedStmt, S, ResultRegTypes,
3083+
ArgElemTypes, *this, RegResults);
30763084
} else {
30773085
llvm::CallInst *Result =
30783086
Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
3079-
UpdateAsmCallInst(*Result, HasSideEffect, false, ReadOnly, ReadNone,
3080-
InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes,
3081-
*this, RegResults);
3087+
UpdateAsmCallInst(*Result, HasSideEffect, /*HasUnwindClobber=*/false,
3088+
ReadOnly, ReadNone, InNoMergeAttributedStmt,
3089+
InNoConvergentAttributedStmt, S, ResultRegTypes,
3090+
ArgElemTypes, *this, RegResults);
30823091
}
30833092

30843093
EmitAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes,

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,9 @@ class CodeGenFunction : public CodeGenTypeCache {
612612
/// True if the current statement has always_inline attribute.
613613
bool InAlwaysInlineAttributedStmt = false;
614614

615+
/// True if the current statement has noconvergent attribute.
616+
bool InNoConvergentAttributedStmt = false;
617+
615618
// The CallExpr within the current statement that the musttail attribute
616619
// applies to. nullptr if there is no 'musttail' on the current statement.
617620
const CallExpr *MustTailCall = nullptr;

clang/lib/Sema/SemaStmtAttr.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,19 @@ static Attr *handleNoMergeAttr(Sema &S, Stmt *St, const ParsedAttr &A,
229229
return ::new (S.Context) NoMergeAttr(S.Context, A);
230230
}
231231

232+
static Attr *handleNoConvergentAttr(Sema &S, Stmt *St, const ParsedAttr &A,
233+
SourceRange Range) {
234+
CallExprFinder CEF(S, St);
235+
236+
if (!CEF.foundCallExpr() && !CEF.foundAsmStmt()) {
237+
S.Diag(St->getBeginLoc(), diag::warn_attribute_ignored_no_calls_in_stmt)
238+
<< A;
239+
return nullptr;
240+
}
241+
242+
return ::new (S.Context) NoConvergentAttr(S.Context, A);
243+
}
244+
232245
template <typename OtherAttr, int DiagIdx>
233246
static bool CheckStmtInlineAttr(Sema &SemaRef, const Stmt *OrigSt,
234247
const Stmt *CurSt,
@@ -664,6 +677,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A,
664677
return handleCodeAlignAttr(S, St, A);
665678
case ParsedAttr::AT_MSConstexpr:
666679
return handleMSConstexprAttr(S, St, A, Range);
680+
case ParsedAttr::AT_NoConvergent:
681+
return handleNoConvergentAttr(S, St, A, Range);
667682
default:
668683
// N.B., ClangAttrEmitter.cpp emits a diagnostic helper that ensures a
669684
// declaration attribute is not written on a statement, but this code is

clang/test/CodeGenCUDA/convergent.cu

Lines changed: 79 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
12
// REQUIRES: x86-registered-target
23
// REQUIRES: nvptx-registered-target
34

@@ -10,36 +11,89 @@
1011

1112
#include "Inputs/cuda.h"
1213

13-
// DEVICE: Function Attrs:
14-
// DEVICE-SAME: convergent
15-
// DEVICE-NEXT: define{{.*}} void @_Z3foov
14+
// DEVICE-LABEL: define dso_local void @_Z3foov(
15+
// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] {
16+
// DEVICE-NEXT: [[ENTRY:.*:]]
17+
// DEVICE-NEXT: ret void
18+
//
1619
__device__ void foo() {}
20+
// DEVICE-LABEL: define dso_local void @_Z3baxv(
21+
// DEVICE-SAME: ) #[[ATTR1:[0-9]+]] {
22+
// DEVICE-NEXT: [[ENTRY:.*:]]
23+
// DEVICE-NEXT: ret void
24+
//
25+
[[clang::noconvergent]] __device__ void bax() {}
1726

18-
// HOST: Function Attrs:
19-
// HOST-NOT: convergent
20-
// HOST-NEXT: define{{.*}} void @_Z3barv
21-
// DEVICE: Function Attrs:
22-
// DEVICE-SAME: convergent
23-
// DEVICE-NEXT: define{{.*}} void @_Z3barv
2427
__host__ __device__ void baz();
28+
29+
__host__ __device__ float aliasf0(int) asm("something");
30+
__host__ __device__ [[clang::noconvergent]] float aliasf1(int) asm("somethingelse");
31+
32+
// DEVICE-LABEL: define dso_local void @_Z3barv(
33+
// DEVICE-SAME: ) #[[ATTR0]] {
34+
// DEVICE-NEXT: [[ENTRY:.*:]]
35+
// DEVICE-NEXT: [[X:%.*]] = alloca i32, align 4
36+
// DEVICE-NEXT: call void @_Z3bazv() #[[ATTR4:[0-9]+]]
37+
// DEVICE-NEXT: [[TMP0:%.*]] = call i32 asm "trap", "=l"() #[[ATTR5:[0-9]+]], !srcloc [[META3:![0-9]+]]
38+
// DEVICE-NEXT: store i32 [[TMP0]], ptr [[X]], align 4
39+
// DEVICE-NEXT: call void asm sideeffect "trap", ""() #[[ATTR4]], !srcloc [[META4:![0-9]+]]
40+
// DEVICE-NEXT: call void asm sideeffect "nop", ""() #[[ATTR6:[0-9]+]], !srcloc [[META5:![0-9]+]]
41+
// DEVICE-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
42+
// DEVICE-NEXT: [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]]) #[[ATTR4]]
43+
// DEVICE-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4
44+
// DEVICE-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) #[[ATTR6]]
45+
// DEVICE-NEXT: ret void
46+
//
47+
// HOST-LABEL: define dso_local void @_Z3barv(
48+
// HOST-SAME: ) #[[ATTR0:[0-9]+]] {
49+
// HOST-NEXT: [[ENTRY:.*:]]
50+
// HOST-NEXT: [[X:%.*]] = alloca i32, align 4
51+
// HOST-NEXT: call void @_Z3bazv()
52+
// HOST-NEXT: [[TMP0:%.*]] = call i32 asm "trap", "=l,~{dirflag},~{fpsr},~{flags}"() #[[ATTR2:[0-9]+]], !srcloc [[META2:![0-9]+]]
53+
// HOST-NEXT: store i32 [[TMP0]], ptr [[X]], align 4
54+
// HOST-NEXT: call void asm sideeffect "trap", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3:[0-9]+]], !srcloc [[META3:![0-9]+]]
55+
// HOST-NEXT: call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3]], !srcloc [[META4:![0-9]+]]
56+
// HOST-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
57+
// HOST-NEXT: [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]])
58+
// HOST-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4
59+
// HOST-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]])
60+
// HOST-NEXT: ret void
61+
//
2562
__host__ __device__ void bar() {
26-
// DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
2763
baz();
28-
// DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]]
2964
int x;
30-
asm ("trap;" : "=l"(x));
31-
// DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]]
32-
asm volatile ("trap;");
65+
asm ("trap" : "=l"(x));
66+
asm volatile ("trap");
67+
[[clang::noconvergent]] { asm volatile ("nop"); }
68+
aliasf0(x);
69+
aliasf1(x);
3370
}
3471

35-
// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
36-
// DEVICE: attributes [[BAZ_ATTR]] = {
37-
// DEVICE-SAME: convergent
38-
// DEVICE-SAME: }
39-
// DEVICE-DAG: attributes [[CALL_ATTR]] = { convergent
40-
// DEVICE-DAG: attributes [[ASM_ATTR]] = { convergent
41-
42-
// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
43-
// HOST: attributes [[BAZ_ATTR]] = {
44-
// HOST-NOT: convergent
45-
// HOST-SAME: }
72+
73+
//.
74+
// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" }
75+
// DEVICE: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" }
76+
// DEVICE: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" }
77+
// DEVICE: attributes #[[ATTR3:[0-9]+]] = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" }
78+
// DEVICE: attributes #[[ATTR4]] = { convergent nounwind }
79+
// DEVICE: attributes #[[ATTR5]] = { convergent nounwind memory(none) }
80+
// DEVICE: attributes #[[ATTR6]] = { nounwind }
81+
//.
82+
// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
83+
// HOST: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
84+
// HOST: attributes #[[ATTR2]] = { nounwind memory(none) }
85+
// HOST: attributes #[[ATTR3]] = { nounwind }
86+
//.
87+
// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
88+
// DEVICE: [[META1:![0-9]+]] = !{i32 4, !"nvvm-reflect-ftz", i32 0}
89+
// DEVICE: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
90+
// DEVICE: [[META3]] = !{i64 3120}
91+
// DEVICE: [[META4]] = !{i64 3155}
92+
// DEVICE: [[META5]] = !{i64 3206}
93+
//.
94+
// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
95+
// HOST: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
96+
// HOST: [[META2]] = !{i64 3120}
97+
// HOST: [[META3]] = !{i64 3155}
98+
// HOST: [[META4]] = !{i64 3206}
99+
//.

clang/test/Misc/pragma-attribute-supported-attributes-list.test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
// CHECK-NEXT: Naked (SubjectMatchRule_function)
110110
// CHECK-NEXT: NoBuiltin (SubjectMatchRule_function)
111111
// CHECK-NEXT: NoCommon (SubjectMatchRule_variable)
112+
// CHECK-NEXT: NoConvergent (SubjectMatchRule_function)
112113
// CHECK-NEXT: NoDebug (SubjectMatchRule_type_alias, SubjectMatchRule_hasType_functionType, SubjectMatchRule_objc_method, SubjectMatchRule_variable_not_is_parameter)
113114
// CHECK-NEXT: NoDestroy (SubjectMatchRule_variable)
114115
// CHECK-NEXT: NoDuplicate (SubjectMatchRule_function)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
2+
3+
#include "Inputs/cuda.h"
4+
5+
__device__ float f0(float) __attribute__((noconvergent));
6+
__device__ __attribute__((noconvergent)) float f1(float);
7+
[[clang::noconvergent]] __device__ float f2(float);
8+
9+
__device__ [[clang::noconvergent(1)]] float f3(float);
10+
// expected-error@-1 {{'noconvergent' attribute takes no arguments}}
11+
12+
__device__ [[clang::noconvergent]] float g0;
13+
// expected-warning@-1 {{'noconvergent' attribute only applies to functions and statements}}
14+
15+
__device__ __attribute__((convergent)) __attribute__((noconvergent)) float f4(float);
16+
// expected-error@-1 {{'noconvergent' and 'convergent' attributes are not compatible}}
17+
// expected-note@-2 {{conflicting attribute is here}}
18+
19+
__device__ [[clang::noconvergent]] float f5(float);
20+
__device__ [[clang::convergent]] float f5(float);
21+
// expected-error@-1 {{'convergent' and 'noconvergent' attributes are not compatible}}
22+
// expected-note@-3 {{conflicting attribute is here}}
23+
24+
__device__ float f5(float x) {
25+
[[clang::noconvergent]] float y;
26+
// expected-warning@-1 {{'noconvergent' attribute only applies to functions and statements}}
27+
28+
float z;
29+
30+
[[clang::noconvergent]] z = 1;
31+
// expected-warning@-1 {{'noconvergent' attribute is ignored because there exists no call expression inside the statement}}
32+
33+
[[clang::noconvergent]] z = f0(x);
34+
}

0 commit comments

Comments
 (0)