-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[clang][CUDA] Add 'noconvergent' function and statement attribute #100637
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang][CUDA] Add 'noconvergent' function and statement attribute #100637
Conversation
Created using spr 1.3.4
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: None (darkbuck) Changes
Full diff: https://github.com/llvm/llvm-project/pull/100637.diff 6 Files Affected:
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 4825979a974d2..c3bcaa5d5f235 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2042,7 +2042,8 @@ def NoDuplicate : InheritableAttr {
def Convergent : InheritableAttr {
let Spellings = [Clang<"convergent">];
- let Subjects = SubjectList<[Function]>;
+ let Subjects = SubjectList<[Function, Stmt], WarnDiag,
+ "functions and statements">;
let Documentation = [ConvergentDocs];
let SimpleHandler = 1;
}
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 2f3dd5d01fa6c..d73feb4382acd 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5636,6 +5636,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Attrs =
Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
+ // Add call-site convergent attribute if exists.
+ if (InConvergentAttributedStmt)
+ Attrs =
+ Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::Convergent);
+
// Apply some call-site-specific attributes.
// TODO: work this into building the attribute set.
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index aa97f685ac7a9..99559dfe075fb 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -723,6 +723,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
bool nomerge = false;
bool noinline = false;
bool alwaysinline = false;
+ bool convergent = false;
const CallExpr *musttail = nullptr;
for (const auto *A : S.getAttrs()) {
@@ -738,6 +739,9 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
case attr::AlwaysInline:
alwaysinline = true;
break;
+ case attr::Convergent:
+ convergent = true;
+ break;
case attr::MustTail: {
const Stmt *Sub = S.getSubStmt();
const ReturnStmt *R = cast<ReturnStmt>(Sub);
@@ -756,6 +760,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
SaveAndRestore save_nomerge(InNoMergeAttributedStmt, nomerge);
SaveAndRestore save_noinline(InNoInlineAttributedStmt, noinline);
SaveAndRestore save_alwaysinline(InAlwaysInlineAttributedStmt, alwaysinline);
+ SaveAndRestore save_convergent(InConvergentAttributedStmt, convergent);
SaveAndRestore save_musttail(MustTailCall, musttail);
EmitStmt(S.getSubStmt(), S.getAttrs());
}
@@ -2465,7 +2470,8 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str,
static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
bool HasUnwindClobber, bool ReadOnly,
- bool ReadNone, bool NoMerge, const AsmStmt &S,
+ bool ReadNone, bool NoMerge, bool Convergent,
+ const AsmStmt &S,
const std::vector<llvm::Type *> &ResultRegTypes,
const std::vector<llvm::Type *> &ArgElemTypes,
CodeGenFunction &CGF,
@@ -2475,6 +2481,10 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
if (NoMerge)
Result.addFnAttr(llvm::Attribute::NoMerge);
+
+ if (Convergent)
+ Result.addFnAttr(llvm::Attribute::Convergent);
+
// Attach readnone and readonly attributes.
if (!HasSideEffect) {
if (ReadNone)
@@ -3037,9 +3047,10 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
if (IsGCCAsmGoto) {
CBR = Builder.CreateCallBr(IA, Fallthrough, Transfer, Args);
EmitBlock(Fallthrough);
- UpdateAsmCallInst(*CBR, HasSideEffect, false, ReadOnly, ReadNone,
- InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes,
- *this, RegResults);
+ UpdateAsmCallInst(*CBR, HasSideEffect, /*HasUnwindClobber=*/false, ReadOnly,
+ ReadNone, InNoMergeAttributedStmt,
+ InConvergentAttributedStmt, S, ResultRegTypes,
+ ArgElemTypes, *this, RegResults);
// Because we are emitting code top to bottom, we don't have enough
// information at this point to know precisely whether we have a critical
// edge. If we have outputs, split all indirect destinations.
@@ -3067,15 +3078,17 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
}
} else if (HasUnwindClobber) {
llvm::CallBase *Result = EmitCallOrInvoke(IA, Args, "");
- UpdateAsmCallInst(*Result, HasSideEffect, true, ReadOnly, ReadNone,
- InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes,
- *this, RegResults);
+ UpdateAsmCallInst(*Result, HasSideEffect, /*HasUnwindClobber=*/true,
+ ReadOnly, ReadNone, InNoMergeAttributedStmt,
+ InConvergentAttributedStmt, S, ResultRegTypes,
+ ArgElemTypes, *this, RegResults);
} else {
llvm::CallInst *Result =
Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
- UpdateAsmCallInst(*Result, HasSideEffect, false, ReadOnly, ReadNone,
- InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes,
- *this, RegResults);
+ UpdateAsmCallInst(*Result, HasSideEffect, /*HasUnwindClobber=*/false,
+ ReadOnly, ReadNone, InNoMergeAttributedStmt,
+ InConvergentAttributedStmt, S, ResultRegTypes,
+ ArgElemTypes, *this, RegResults);
}
EmitAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes,
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 67e3019565cd0..329120b70fd49 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -612,6 +612,9 @@ class CodeGenFunction : public CodeGenTypeCache {
/// True if the current statement has always_inline attribute.
bool InAlwaysInlineAttributedStmt = false;
+ /// True if the current statement has convergent attribute.
+ bool InConvergentAttributedStmt = false;
+
// The CallExpr within the current statement that the musttail attribute
// applies to. nullptr if there is no 'musttail' on the current statement.
const CallExpr *MustTailCall = nullptr;
diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp
index 7f452d177c16f..ff743d9f9df20 100644
--- a/clang/lib/Sema/SemaStmtAttr.cpp
+++ b/clang/lib/Sema/SemaStmtAttr.cpp
@@ -230,6 +230,20 @@ static Attr *handleNoMergeAttr(Sema &S, Stmt *St, const ParsedAttr &A,
return ::new (S.Context) NoMergeAttr(S.Context, A);
}
+static Attr *handleConvergentAttr(Sema &S, Stmt *St, const ParsedAttr &A,
+ SourceRange Range) {
+ NoMergeAttr NMA(S.Context, A);
+ CallExprFinder CEF(S, St);
+
+ if (!CEF.foundCallExpr() && !CEF.foundAsmStmt()) {
+ S.Diag(St->getBeginLoc(), diag::warn_attribute_ignored_no_calls_in_stmt)
+ << A;
+ return nullptr;
+ }
+
+ return ::new (S.Context) ConvergentAttr(S.Context, A);
+}
+
template <typename OtherAttr, int DiagIdx>
static bool CheckStmtInlineAttr(Sema &SemaRef, const Stmt *OrigSt,
const Stmt *CurSt,
@@ -672,6 +686,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A,
return handleCodeAlignAttr(S, St, A);
case ParsedAttr::AT_MSConstexpr:
return handleMSConstexprAttr(S, St, A, Range);
+ case ParsedAttr::AT_Convergent:
+ return handleConvergentAttr(S, St, A, Range);
default:
// N.B., ClangAttrEmitter.cpp emits a diagnostic helper that ensures a
// declaration attribute is not written on a statement, but this code is
diff --git a/clang/test/SemaOpenCL/convergent.cl b/clang/test/SemaOpenCL/convergent.cl
index 1b7fda41fc0c8..a00e65cea0176 100644
--- a/clang/test/SemaOpenCL/convergent.cl
+++ b/clang/test/SemaOpenCL/convergent.cl
@@ -4,9 +4,9 @@ void f1(void) __attribute__((convergent));
void f2(void) __attribute__((convergent(1))); // expected-error {{'convergent' attribute takes no arguments}}
-void f3(int a __attribute__((convergent))); // expected-warning {{'convergent' attribute only applies to functions}}
+void f3(int a __attribute__((convergent))); // expected-warning {{'convergent' attribute only applies to functions and statements}}
void f4(void) {
- int var1 __attribute__((convergent)); // expected-warning {{'convergent' attribute only applies to functions}}
+ int var1 __attribute__((convergent)); // expected-warning {{'convergent' attribute only applies to functions and statements}}
}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
One motivation for this patch is to be able to mark individual inline asms with 'convergent.' So far, CUDA/HIP assumes all calls and inline-asms are convergent and marks all of them with 'convergent.' This guarantees correctness but loses possible optimizations for inline-asms with ALU instructions only. |
Created using spr 1.3.4
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems logical and useful to me.
This PR will need more tests: specifically, please make sure you have test coverage for the diagnostic, and for LLVM IR generation for both convergent calls and convergent inline asm.
Please also update the documentation for the attribute and the release notes.
Missing testcase for the generated IR. |
Created using spr 1.3.4
clang/test/CodeGen/convergent.cpp
Outdated
@@ -0,0 +1,99 @@ | |||
// RUN: %clang_cc1 -emit-llvm %s -triple x86_64-unknown-linux-gnu -o - | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider using update_cc_test_checks.py.
I'd like to see checks that we emit convergence tokens on targets where that's the default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider using update_cc_test_checks.py.
I'd like to see checks that we emit convergence tokens on targets where that's the default.
However, it seems that the script doesn't generate checks on attribute definitions. I tried '--check-attributes`, but it only added function attributes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I hacked llvm/utils/UpdateTestChecks/common.py
a little bit to generate attribute def checks. I may ask for another review on that hack on update tools.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please don't merge an "auto-generated" file that isn't actually autogenerated by the in-tree version.
(Is --check-attributes not what you want?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please don't merge an "auto-generated" file that isn't actually autogenerated by the in-tree version.
(Is --check-attributes not what you want?)
yeah, --check-attributes
only enable function attributes output per function. There's no still no attr definition output. I have to make the following change:
$ git diff
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py
index eb212ed304e9..2161a3589966 100644
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@@ -2156,10 +2156,6 @@ def filter_globals_according_to_preference(
return global_val_lines_w_index
assert global_check_setting == "smart"
- if nameless_value.check_key == "#":
- # attribute sets are usually better checked by --check-attributes
- return []
-
def extract(line, nv):
p = (
"^"
Shall I use the original test check? We need to check attribute definitions including 'convergent'.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Update the test again with '--check-globals=all'. That output is the expected one.
Created using spr 1.3.4
Created using spr 1.3.4
Created using spr 1.3.4
Created using spr 1.3.4
Created using spr 1.3.4
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should be removing the source level convergent attribute, not adding more uses of it. It was a mistake to define it in the positive direction, and it is completely unusable for end users. Every piece of code that transitively calls the function in the entire program must also be marked convergent. The only way to practically achieve this is if the target/language has convergent operations, every operation must be marked convergent by default (i.e. the ConvergentFunctions language property should be set)
For optimization hints, we could have a source level noconvergent attribute to opt-out, which would be the inverse of this.
On the IR level we should pick https://reviews.llvm.org/D69498 back up.
I agree with and revise PR to replace it with |
Created using spr 1.3.4
The patch is revised to add a new |
The ``convergent`` attribute can be placed on a function declaration. It is | ||
translated into the LLVM ``convergent`` attribute, which indicates that the call | ||
instructions of a function with this attribute cannot be made control-dependent | ||
on any additional values. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe should just deprecate this, since it's kind of useless
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe should just deprecate this, since it's kind of useless
For languages other that CUDA/HIP, OpenCL, and etc., this attribute may still be helpful to mark functions convergent to restrict code motion to control-equivalent blocks. However, that restriction may not be necessary for those languages.
The ``noconvergent`` attribute removes the LLVM ``convergent`` attribute if | ||
present. If a statement is marked ``noconvergent`` and contains calls, | ||
``convergent`` attributes on those calls are removed as well. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is describing an implementation detail, which doesn't belong in the user facing documentation
// Remove call-site convergent attribute if requested. | ||
if (InNoConvergentAttributedStmt) | ||
Attrs = | ||
Attrs.removeFnAttribute(getLLVMContext(), llvm::Attribute::Convergent); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it easy to avoid adding this in the first place?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it easy to avoid adding this in the first place?
getTrivialDefaultFunctionAttributes
needs an extra argument to avoid that. However, that helper is called in several places, and that extra info is not always available. Forcing a default value in those places seems unreasonable or incorrect.
Created using spr 1.3.4
@arsenm, any further comment? |
It looks like you may have overlooked the request to add release notes for this new feature. |
@darkbuck, please revert and address the documentation! |
A revert isn't necessary just for the missing release note; a followup PR is fine. |
call sites are marked 'convergent' by default. 'noconvergent' is added
in this patch to allow developers to remove that 'convergent'
attribute when it's safe.