-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Clang] Generate the GEP instead of adding AST nodes #73730
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Bill Wendling (bwendling) ChangesThis is an alternative to #73465. It generates the GEP directly. It's not tested well, but it might be a nicer solution rather than adding AST nodes. PTAL Full diff: https://github.com/llvm/llvm-project/pull/73730.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c83ea966fdeadc6..487bd14244e531c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -916,8 +916,7 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
// Build a load of the counted_by field.
bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
- const Expr *CountedByExpr = BuildCountedByFieldExpr(Base, CountedByFD);
- Value *CountedByInst = EmitAnyExprToTemp(CountedByExpr).getScalarVal();
+ Value *CountedByInst = BuildCountedByFieldExpr(Base, CountedByFD);
llvm::Type *CountedByTy = CountedByInst->getType();
// Build a load of the index and subtract it from the count.
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 9fe8f1d7da780c8..b77d9d8904dfc2d 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -26,6 +26,7 @@
#include "clang/AST/Attr.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/NSAPI.h"
+#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/SourceManager.h"
@@ -940,8 +941,7 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF,
if (const ValueDecl *VD = CGF.FindCountedByField(Base)) {
IndexedType = Base->getType();
- const Expr *E = CGF.BuildCountedByFieldExpr(Base, VD);
- return CGF.EmitAnyExprToTemp(E).getScalarVal();
+ return CGF.BuildCountedByFieldExpr(Base, VD);
}
}
@@ -956,42 +956,77 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF,
return nullptr;
}
-const Expr *
+namespace {
+
+struct MemberExprBaseVisitor
+ : public StmtVisitor<MemberExprBaseVisitor, Expr *> {
+ MemberExprBaseVisitor() = default;
+
+ //===--------------------------------------------------------------------===//
+ // Visitor Methods
+ //===--------------------------------------------------------------------===//
+
+ Expr *Visit(Expr *E) {
+ return StmtVisitor<MemberExprBaseVisitor, Expr *>::Visit(E);
+ }
+
+ Expr *VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
+ return Visit(E->getBase());
+ }
+ Expr *VisitCastExpr(CastExpr *E) {
+ return Visit(E->getSubExpr());
+ }
+ Expr *VisitDeclRefExpr(DeclRefExpr *E) {
+ return E;
+ }
+ Expr *VisitMemberExpr(MemberExpr *E) {
+ return Visit(E->getBase());
+ }
+ Expr *VisitParenExpr(ParenExpr *E) {
+ return Visit(E->getSubExpr());
+ }
+ Expr *VisitUnaryOperator(UnaryOperator *E) {
+ return Visit(E->getSubExpr());
+ }
+};
+
+} // end anonymous namespace
+
+llvm::Value *
CodeGenFunction::BuildCountedByFieldExpr(const Expr *Base,
const ValueDecl *CountedByVD) {
// Find the outer struct expr (i.e. p in p->a.b.c.d).
- Expr *CountedByExpr = const_cast<Expr *>(Base)->IgnoreParenImpCasts();
-
- // Work our way up the expression until we reach the DeclRefExpr.
- while (!isa<DeclRefExpr>(CountedByExpr))
- if (const auto *ME = dyn_cast<MemberExpr>(CountedByExpr))
- CountedByExpr = ME->getBase()->IgnoreParenImpCasts();
-
- // Add back an implicit cast to create the required pr-value.
- CountedByExpr = ImplicitCastExpr::Create(
- getContext(), CountedByExpr->getType(), CK_LValueToRValue, CountedByExpr,
- nullptr, VK_PRValue, FPOptionsOverride());
-
- if (const auto *IFD = dyn_cast<IndirectFieldDecl>(CountedByVD)) {
- // The counted_by field is inside an anonymous struct / union. The
- // IndirectFieldDecl has the correct order of FieldDecls to build this
- // easily. (Yay!)
- for (NamedDecl *ND : IFD->chain()) {
- auto *VD = cast<ValueDecl>(ND);
- CountedByExpr =
- MemberExpr::CreateImplicit(getContext(), CountedByExpr,
- CountedByExpr->getType()->isPointerType(),
- VD, VD->getType(), VK_LValue, OK_Ordinary);
+ Expr *CountedByExpr = MemberExprBaseVisitor().Visit(const_cast<Expr *>(Base));
+
+ llvm::Value *Res =
+ CountedByExpr->getType()->isPointerType()
+ ? EmitPointerWithAlignment(CountedByExpr).getPointer()
+ : EmitDeclRefLValue(cast<DeclRefExpr>(CountedByExpr)).getPointer(*this);
+
+ auto *Zero = llvm::ConstantInt::get(Int32Ty, 0);
+ SmallVector<llvm::Value *, 4> Indices{Zero};
+ if (const auto *FD = dyn_cast<FieldDecl>(CountedByVD)) {
+ Indices.emplace_back(llvm::ConstantInt::get(Int32Ty, FD->getFieldIndex()));
+ } else if (const auto *I = dyn_cast<IndirectFieldDecl>(CountedByVD)) {
+ for (auto *ND : I->chain()) {
+ if (auto *FD = dyn_cast<FieldDecl>(ND)) {
+ Indices.emplace_back(llvm::ConstantInt::get(Int32Ty, FD->getFieldIndex()));
+ }
}
- } else {
- CountedByExpr = MemberExpr::CreateImplicit(
- getContext(), const_cast<Expr *>(CountedByExpr),
- CountedByExpr->getType()->isPointerType(),
- const_cast<ValueDecl *>(CountedByVD), CountedByVD->getType(), VK_LValue,
- OK_Ordinary);
}
- return CountedByExpr;
+ const DeclContext *DC = CountedByVD->getLexicalDeclContext();
+ const auto *CountedByRD = cast<RecordDecl>(DC);
+
+ llvm::Type *Ty = CGM.getTypes().ConvertType(QualType(CountedByRD->getTypeForDecl(), 0));
+ Res = Builder.CreateGEP(Ty, Res, Indices, "bork");
+
+ QualType CountedByTy(CountedByVD->getType());
+ TypeInfo TI = getContext().getTypeInfo(CountedByTy);
+ Ty = CGM.getTypes().ConvertType(CountedByTy);
+ Address Addr(Res, Ty, CharUnits::fromQuantity(TI.Align / getContext().getCharWidth()));
+
+ return Builder.CreateLoad(Addr, Res, "fork");
}
const ValueDecl *
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 618e78809db408b..e3137eb65dbad23 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3032,8 +3032,8 @@ class CodeGenFunction : public CodeGenTypeCache {
const ValueDecl *FindCountedByField(const Expr *Base);
/// Build an expression accessing the "counted_by" field.
- const Expr *BuildCountedByFieldExpr(const Expr *Base,
- const ValueDecl *CountedByVD);
+ llvm::Value *BuildCountedByFieldExpr(const Expr *Base,
+ const ValueDecl *CountedByVD);
llvm::Value *EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
bool isInc, bool isPre);
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hey, that looks pretty good! I'm surprised it wasn't too much more work to convert to generating the GEP directly. Nice job!
Does this change result in codegen differences in IR? (was expecting test changes that used "fork" loads and "bork" GEPs)
Thanks. There will be. There's an issue I need to hammer out with the "union_of_fams". I wanted to get everyone's opinion on this method though before I invested a ton more time on it first. :-) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added the codegen code owners for opinions, but personally, I prefer the direction this patch is heading over the one in #73465
This does seem to be a cleaner way of doing things. Thanks, @nickdesaulniers, for the suggestion. :-) |
@AaronBallman It's not letting me comment on your comment. While a compound literal doesn't have a DRE, it's also not a flexible array, so it's not supposed to get this far. Note that this method should only be executed if the flexible array member exists and has the |
…EP and non-volatile load.
Sorry, I was being lazy with my examples and leaving stuff off.
|
Ah! I see what you mean now. I'll make sure we can catch this. |
This should be ready for a final review now. |
CodeGenFunction::FindCountedByField finds a field with a corresponding base expression. Currently, it throws away the base expression. And the code you've just written tries to recompute the base. Instead of doing this dance, can we just make CodeGenFunction::FindCountedByField return both the field and the base expression? I'm extremely suspicious of the way MemberExprBaseVisitor works; it's basically never right in CodeGen to treat all CastExprs the same way, or handle MemberExprs without checking isArrow(). |
Maybe...
I initially had a check to see if a cast had the same For the |
I'd like to see a few tests involving multiple arrows in the same expression. (If my understanding is correct, you want to cut the recursion when you see an arrow member.) Looking at the code again, I guess FindCountedByField doesn't explicitly compute the base expression, so maybe that doesn't work as-is. But tying the recursion over the expression to the recursion over the record would be a good way to ensure the base expression is actually the expression you want. The way the code is currently written seems to strongly assume that if there's a relevant counted_by field, it's usable no matter how the expression is actually structured. |
Correct. I'll add the code and some testcases.
There are Sema checks to ensure that |
return E; | ||
return Visit(E->getBase()); | ||
} | ||
Expr *VisitCastExpr(CastExpr *E) { return Visit(E->getSubExpr()); } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So this seems to look through cast expressions. Does it mean casts to/from flexible base would work as below? Are these intended?
struct foo {
int c;
int fam[__counted_by(c)];
};
void bar(void) {
char buf[100];
__bdos((struct foo *)buf, 0); // the base foo will not be identified. is it intended?
};
struct not_foo {
int c;
};
void baz(struct foo *p) {
__bdos((struct not_foo*)p, 0); // the base will be identified as `struct foo *`. is it intended?
};
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In a way, yes. I'm being extra careful because this is a security feature. Perhaps I'm being too cautious?
auto *Idx = EmitIdxAfterBase(/*Promote*/true); | ||
|
||
if (SanOpts.has(SanitizerKind::ArrayBounds)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it make it easier if you use CodeGenFunction::EmitLValueForField
instead to get the fields?
… pointer. Also add CallExpr to the visitor.
auto *Idx = EmitIdxAfterBase(/*Promote*/true); | ||
|
||
if (SanOpts.has(SanitizerKind::ArrayBounds)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Recursively skipping over all GEPs seems, at best, extremely fragile; there could be GEPs for reasons other than the member expressions you care about.
@efriedma-quic, @nickdesaulniers, @rapidsna: This is the latest version of this patch. I believe it's ready to submit. I really tried to implement @efriedma-quic's idea, but there were too corner cases that made it much trickier than first imagined. I know that it's not impossible to implement it, but the further I went along the more I realized that the infrastructure just isn't sufficient to deal with it. I would much rather use It would be beneficial to rework This patch takes advantage of the fact that the Clang front-end appends the "source element type" to the GEPs it generates. So it's therefore possible to find the pointer to the correct struct base. PTAL. |
… and doesn't have side-effects.
ME->getMemberDecl()->hasAttr<CountedByAttr>()) { | ||
RecordDecl *RD = ME->getMemberDecl() | ||
->getDeclContext() | ||
->getOuterLexicalRecordContext(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm. Is the use of getOuterLexicalRecordContext()
just trying to look through anonymous records? It makes sense that counted_by
paths for members of anonymous records would be specified starting from the containing non-anonymous type, so we need to find that type. But I think getOuterLexicalRecordContext()
does too much here: it's actively wrong whenever you have a record defined lexically within some other record, which doesn't always look like the anonymous record pattern. For example:
// C++
struct A { // <- the outer lexical record context
struct B {
int member[] __counted_by(...);
};
};
// C
struct A { // <- the outer lexical record context, I'm pretty sure
struct {
int member[] __counted_by(...);
} *ptr;
};
I think getNonTransparentContext()
would be right, but you should also be able to get the base type of the counted_by
path from the attribute itself.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The first example is why I'm using getOuterLexicalRecordContext
, because we want to start looking for the count field from the beginning of the most-enclosing struct.
The second example isn't currently allowed (you'll get an error unless the count
field is in the outer structure). It's probably a bug when sema checking the attribute. I'll need to address that, but I'd like to do it as a separate patch. If the second example is an anonymous struct, then it's iffy if you can define that as a flexible array on its own. If it's a "named" struct, then it should be quite possible to do that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How is the first example supposed to work? Is the expectation that __counted_by
will only be enforced if the access expression happens to previously pass through some instance of A
, e.g. someA->x.y.z.someB->member
, and it better be the instance we have in mind? What happens if I just do someB->member
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Basically, yes. If you have something like:
void foo(struct B *b) {
bar(b->member[12]);`
}
Then there won't be a check, because we won't be able to "see" the count
in struct A
. I know it's restrictive, but it's the best we can do given that someB
may not be allocated within an enclosing struct A
.
auto *Idx = EmitIdxAfterBase(/*Promote*/true); | ||
|
||
if (SanOpts.has(SanitizerKind::ArrayBounds)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we not just reverse the member access to get back to the root of the counted_by
path? Presumably the root of the counted_by
path for a field is always the non-transparent context of the field. This should always be at a constant offset from the address point of the field — there's no such thing as an anonymous array. Or am I misunderstanding something important about the constraints on counted_by
paths?
… This change also restricts the 'counted_by' attribute to a top-level flexible array member. or: "Hal, have you replaced that lightbulb in the kitchen?" "WHAT DOES IT LOOK LIKE I'M DOING?!"
The latest version of this change causes test failures for me:
I don't mean for this next bit to sound as aggressive or irritated as it might but #73168 has been open for almost a month now and it is actively preventing us from testing the Linux kernel against tip of tree LLVM because (for better or worse...) the kernel has started deploying |
Apparently, the
This supposed bug fix snowballed out of control. I'd be happy to revert to a previous version and then submit the newer work as a improvements. |
Linux kernel builds have been broken since the initial feature landed. With timing around the holidays, I wonder if we should consider backing everything out. If the base commit ships in clang-18, then the linux kernel feature test macros will have to be converted to version test macros to work around issues with the initial implementation. Either this fix needs to land BEFORE release/18.x branches, or the entire feature backed out. |
That's another option. I'll prepare a revert. |
Here's a revert of the feature. |
!isa<RecordDecl>(RD->getLexicalParent())) | ||
break; | ||
DC = RD->getLexicalParent(); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
while (true) {
const auto *RD = dyn_cast<RecordDecl>(DC);
if (!RD || !RD->isAnonymousStructOrUnion())
break;
DC = RD->getLexicalParent();
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll do it, but could you explain why this is better? Is it a style guide issue?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's computing and casting things once instead of twice. It's just generally much clearer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not really. I'm checking to make sure that the RecordDecl
's lexical parent is also a RecordDecl
. I need to do this, otherwise DC
will end up as TranslationUnit
in some cases.
DC = RD->getLexicalParent(); | ||
} | ||
|
||
if (DC != FD->getDeclContext()->getOuterLexicalRecordContext()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to figure out this thing about getOuterLexicalRecordContext()
before we can go forward here. It sounds like you might be concerned about the established behavior of some existing attribute that isn't the -fbounds-safety
__counted_by
, but which people are trying to merge with __counted_by
on general principle?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added this to ensure that we're not dealing with FAMs in sub-structures. I need to work with @rapidsna's work to make sure that what they do is what this attribute will do. It's not really a perfect check, but I removed the code in CodeGen that handles FAMs in substructs. So I think we can table this discussion if this patch goes on.
|
||
if (isa<FieldDecl>(D)) | ||
++FieldNo; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am deeply confused by the code in this function. Why are we visiting every member of the record?
It looks like the caller has the exact field path you need, but it's throwing that away and doing a brute-force recursive search. Just take a NamedDecl
here and require it to be either a FieldDecl
or an IndirectFieldDecl
. Or better yet, add that as a method to ASTRecordLayout
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you call getFieldIndex
for a FieldDecl
that's not in a top-level of the RecordDecl
, it will assert. So I need to go through each field one at a time, because the method is broken.
I was planning on adding that to the record layout contexts, but because this patch has been through the wringer, I didn't want to throw yet another reason for people to hate it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's start by breaking that out as a separate patch. Just make a method on ASTRecordLayout
that takes an IndirectFieldDecl
and recursively adds up the offsets for every step in the path.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could I please just do that as a follow-up patch? I don't have a great understanding of ASTRecordLayout
and don't want to get bogged down in yet another endless review cycle.
There are many issues that popped up with the counted_by feature. The patch #73730 has grown too large and approval is blocking Linux testing. Includes reverts of: commit 769bc11 ("[Clang] Implement the 'counted_by' attribute (#68750)") commit bc09ec6 ("[CodeGen] Revamp counted_by calculations (#70606)") commit 1a09cfb ("[Clang] counted_by attr can apply only to C99 flexible array members (#72347)") commit a76adfb ("[NFC][Clang] Refactor code to calculate flexible array member size (#72790)") commit d8447c7 ("[Clang] Correct handling of negative and out-of-bounds indices (#71877)") Partial commit b31cd07 ("[Clang] Regenerate test checks (NFC)") Closes #73168 Closes #75173
#75857 has been merged in. This PR is no longer valid. |
Couldn't you just rebase it like:
It's a bit of a matter of taste obviously how to deal with such situations. I tend to prefer not to split the context across several PRs... |
Since the entire feature has been reverted, I don't think the discussion here is particularly useful. |
This is an alternative to #73465. It generates the GEP directly. It's not tested well, but it might be a nicer solution rather than adding AST nodes.
PTAL