Skip to content

[Safe Buffers][BoundsSafety] Fix a bug in the interop analysis that can cause infinite loops #10129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 65 additions & 36 deletions clang/lib/Analysis/UnsafeBufferUsage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,23 +426,37 @@ getDependentValuesFromCall(const CountAttributedType *CAT,
// Other kinds of function calls are not supported, so an expression of the
// form `f(...)` is not supported.
struct CompatibleCountExprVisitor
: public ConstStmtVisitor<CompatibleCountExprVisitor, bool, const Expr *> {
: public ConstStmtVisitor<CompatibleCountExprVisitor, bool, const Expr *,
bool> {
// The third 'bool' type parameter for each visit method indicates whether the
// current visiting expression is the result of the formal parameter to actual
// argument substitution. Since the argument expression may contain DREs
// referencing to back to those parameters (in cases of recursive calls), the
// analysis may hit an infinite loop if not knowing whether the substitution
// has happened. A typical example that could introduce infinite loop without
// this knowledge is shown below.
// ```
// void f(int * __counted_by(n) p, size_t n) {
// f(p, n);
// }
// ```
using BaseVisitor =
ConstStmtVisitor<CompatibleCountExprVisitor, bool, const Expr *>;
ConstStmtVisitor<CompatibleCountExprVisitor, bool, const Expr *, bool>;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it be simpler to keep this flag as a member variable and then use llvm::SaveAndRestore to set the flag to true for recursive calls? In this case, you wouldn't have to propagate it in each call.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, using a global flag results in much less code change.
Adding a parameter to those visit methods is my personal preference because I feel it naturally indicates that the flag only affects sub-expressions during the visit. While using a global flag requires the programmer to not forget to manage the flag whenever they attempt to make a substitution.

Consider this is a somewhat urgent fix, let's merge it as is for now? If this visitor grows more complicated later, we re-evaluate which approach is better.


const Expr *MemberBase;
const DependentValuesTy *DependentValues;
ASTContext &Ctx;

// If `Deref` has the form `*&e`, return `e`; otherwise return nullptr.
const Expr *trySimplifyDerefAddressof(const UnaryOperator *Deref) {
const Expr *trySimplifyDerefAddressof(const UnaryOperator *Deref,
bool hasBeenSubstituted) {
const Expr *DerefOperand = Deref->getSubExpr()->IgnoreParenImpCasts();

if (const auto *UO = dyn_cast<UnaryOperator>(DerefOperand))
if (UO->getOpcode() == UO_AddrOf)
return UO->getSubExpr();
if (const auto *DRE = dyn_cast<DeclRefExpr>(DerefOperand)) {
if (!DependentValues)
if (!DependentValues || hasBeenSubstituted)
return nullptr;

auto I = DependentValues->find(DRE->getDecl());
Expand All @@ -460,18 +474,22 @@ struct CompatibleCountExprVisitor
ASTContext &Ctx)
: MemberBase(MemberBase), DependentValues(DependentValues), Ctx(Ctx) {}

bool VisitStmt(const Stmt *S, const Expr *E) { return false; }
bool VisitStmt(const Stmt *S, const Expr *E, bool hasBeenSubstituted) {
return false;
}

bool VisitImplicitCastExpr(const ImplicitCastExpr *SelfICE,
const Expr *Other) {
return Visit(SelfICE->getSubExpr(), Other);
bool VisitImplicitCastExpr(const ImplicitCastExpr *SelfICE, const Expr *Other,
bool hasBeenSubstituted) {
return Visit(SelfICE->getSubExpr(), Other, hasBeenSubstituted);
}

bool VisitParenExpr(const ParenExpr *SelfPE, const Expr *Other) {
return Visit(SelfPE->getSubExpr(), Other);
bool VisitParenExpr(const ParenExpr *SelfPE, const Expr *Other,
bool hasBeenSubstituted) {
return Visit(SelfPE->getSubExpr(), Other, hasBeenSubstituted);
}

bool VisitIntegerLiteral(const IntegerLiteral *SelfIL, const Expr *Other) {
bool VisitIntegerLiteral(const IntegerLiteral *SelfIL, const Expr *Other,
bool hasBeenSubstituted) {
if (const auto *IntLit =
dyn_cast<IntegerLiteral>(Other->IgnoreParenImpCasts())) {
return SelfIL == IntLit ||
Expand All @@ -481,7 +499,8 @@ struct CompatibleCountExprVisitor
}

bool VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *Self,
const Expr *Other) {
const Expr *Other,
bool hasBeenSubstituted) {
// If `Self` is a `sizeof` expression, try to evaluate and compare the two
// expressions as constants:
if (Self->getKind() == UnaryExprOrTypeTrait::UETT_SizeOf) {
Expand All @@ -498,17 +517,19 @@ struct CompatibleCountExprVisitor
return false;
}

bool VisitCXXThisExpr(const CXXThisExpr *SelfThis, const Expr *Other) {
bool VisitCXXThisExpr(const CXXThisExpr *SelfThis, const Expr *Other,
bool hasBeenSubstituted) {
return isa<CXXThisExpr>(Other->IgnoreParenImpCasts());
}

bool VisitDeclRefExpr(const DeclRefExpr *SelfDRE, const Expr *Other) {
bool VisitDeclRefExpr(const DeclRefExpr *SelfDRE, const Expr *Other,
bool hasBeenSubstituted) {
const ValueDecl *SelfVD = SelfDRE->getDecl();

if (DependentValues) {
if (DependentValues && !hasBeenSubstituted) {
const auto It = DependentValues->find(SelfVD);
if (It != DependentValues->end())
return Visit(It->second, Other);
return Visit(It->second, Other, true);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not clear to me what the iteration order is, could you explain what this class does? Does it visit everything as intended in something like this? If nothing else, I think it would be good to have test cases with shared variables, and the same variable appearing multiple times in the same expression.

static void foo(int * __counted_by(n + n * m) p, size_t n, int * __counted_by(m * m + l) q, int * __counted_by(l) r, size_t m, size_t o) {
  foo(p, n, q, r, m, o);
}

Copy link
Author

@ziqingluo-90 ziqingluo-90 Mar 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added this test.

Let me try to explain it with more context.
We need to compare an expression after applying a mapping from formal parameters to actual arguments against another expression. In your example, one of the comparisons is

(n + n * m)[n->n, m->m]  vs. n + n * m

where the left-hand side represents the expected length of the formal parameter p of the callee, which needs to be applied with actual arguments (I.e., parameter n maps to argument n, etc.); the right-hand side is the actual length of the argument p inferred by our analysis through p's counted-by type.
(Note that symbols like p, n', m` play different roles---parameter and argument according to the context in this recursive situation.)

Our implementation uses a visitor to traverse the two comparing expressions e1 vs. e2 and apply the substitution when it visits a DRE of a formal parameter in e1. Naturally, it must make sure each reference to a formal parameter in e1 gets substituted exactly once, otherwise it will enter an infinite loop in the example above. This is what the bug is.

The fix lets the visitor take an extra argument representing whether a sub-expression being visited is the result of substitution. If so, no substitution shall happen during the visit of the sub-expression.
For example, suppose we are visiting an expression v + v with a mapping {v -> a + a, a -> b}.
The visitor first visits the LHS v and replaces it with a + a. It then visits a + a with the knowledge that no substitution should happen for a + a. Without the fix, the visitor will erroneously replace a with b. Since the information is passed by an argument, it will not affect the visits of other AST branches. So when the visitor goes back to visit the RHS v, it knows correctly that v needs to be substituted to a + a again.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, that makes sense to me!

}

const auto *O = Other->IgnoreParenImpCasts();
Expand All @@ -523,58 +544,63 @@ struct CompatibleCountExprVisitor
const auto *OtherME = dyn_cast<MemberExpr>(O);
if (MemberBase && OtherME) {
return OtherME->getMemberDecl() == SelfVD &&
Visit(OtherME->getBase(), MemberBase);
Visit(OtherME->getBase(), MemberBase, hasBeenSubstituted);
}

return false;
}

bool VisitMemberExpr(const MemberExpr *Self, const Expr *Other) {
bool VisitMemberExpr(const MemberExpr *Self, const Expr *Other,
bool hasBeenSubstituted) {
// Even though we don't support member expression in counted-by, actual
// arguments can be member expressions.
if (Self == Other)
return true;
if (const auto *DRE = dyn_cast<DeclRefExpr>(Other->IgnoreParenImpCasts()))
return MemberBase && Self->getMemberDecl() == DRE->getDecl() &&
Visit(Self->getBase(), MemberBase);
Visit(Self->getBase(), MemberBase, hasBeenSubstituted);
if (const auto *OtherME =
dyn_cast<MemberExpr>(Other->IgnoreParenImpCasts())) {
return Self->getMemberDecl() == OtherME->getMemberDecl() &&
Visit(Self->getBase(), OtherME->getBase());
Visit(Self->getBase(), OtherME->getBase(), hasBeenSubstituted);
}
return false;
}

bool VisitUnaryOperator(const UnaryOperator *SelfUO, const Expr *Other) {
bool VisitUnaryOperator(const UnaryOperator *SelfUO, const Expr *Other,
bool hasBeenSubstituted) {
if (SelfUO->getOpcode() != UO_Deref)
return false; // We don't support any other unary operator

if (const auto *OtherUO =
dyn_cast<UnaryOperator>(Other->IgnoreParenImpCasts())) {
if (SelfUO->getOpcode() == OtherUO->getOpcode())
return Visit(SelfUO->getSubExpr(), OtherUO->getSubExpr());
return Visit(SelfUO->getSubExpr(), OtherUO->getSubExpr(),
hasBeenSubstituted);
}
// If `Other` is not a dereference expression, try to simplify `SelfUO`:
if (const auto *SimplifiedSelf = trySimplifyDerefAddressof(SelfUO)) {
return Visit(SimplifiedSelf, Other);
if (const auto *SimplifiedSelf =
trySimplifyDerefAddressof(SelfUO, hasBeenSubstituted)) {
return Visit(SimplifiedSelf, Other, hasBeenSubstituted);
}
return false;
}

bool VisitBinaryOperator(const BinaryOperator *SelfBO, const Expr *Other) {
bool VisitBinaryOperator(const BinaryOperator *SelfBO, const Expr *Other,
bool hasBeenSubstituted) {
const auto *OtherBO =
dyn_cast<BinaryOperator>(Other->IgnoreParenImpCasts());
if (OtherBO && OtherBO->getOpcode() == SelfBO->getOpcode()) {
return Visit(SelfBO->getLHS(), OtherBO->getLHS()) &&
Visit(SelfBO->getRHS(), OtherBO->getRHS());
return Visit(SelfBO->getLHS(), OtherBO->getLHS(), hasBeenSubstituted) &&
Visit(SelfBO->getRHS(), OtherBO->getRHS(), hasBeenSubstituted);
}

return false;
}

// Support any overloaded operator[] so long as it is a const method.
bool VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *SelfOpCall,
const Expr *Other) {
const Expr *Other, bool hasBeenSubstituted) {
if (SelfOpCall->getOperator() != OverloadedOperatorKind::OO_Subscript)
return false;

Expand All @@ -585,8 +611,10 @@ struct CompatibleCountExprVisitor
if (const auto *OtherOpCall =
dyn_cast<CXXOperatorCallExpr>(Other->IgnoreParenImpCasts()))
if (SelfOpCall->getOperator() == OtherOpCall->getOperator()) {
return Visit(SelfOpCall->getArg(0), OtherOpCall->getArg(0)) &&
Visit(SelfOpCall->getArg(1), OtherOpCall->getArg(1));
return Visit(SelfOpCall->getArg(0), OtherOpCall->getArg(0),
hasBeenSubstituted) &&
Visit(SelfOpCall->getArg(1), OtherOpCall->getArg(1),
hasBeenSubstituted);
}
return false;
}
Expand All @@ -595,17 +623,17 @@ struct CompatibleCountExprVisitor
// considered unsafe, they can be safely used on constant arrays with
// known-safe literal indexes.
bool VisitArraySubscriptExpr(const ArraySubscriptExpr *SelfAS,
const Expr *Other) {
const Expr *Other, bool hasBeenSubstituted) {
if (const auto *OtherAS =
dyn_cast<ArraySubscriptExpr>(Other->IgnoreParenImpCasts()))
return Visit(SelfAS->getLHS(), OtherAS->getLHS()) &&
Visit(SelfAS->getRHS(), OtherAS->getRHS());
return Visit(SelfAS->getLHS(), OtherAS->getLHS(), hasBeenSubstituted) &&
Visit(SelfAS->getRHS(), OtherAS->getRHS(), hasBeenSubstituted);
return false;
}

// Support non-static member call:
bool VisitCXXMemberCallExpr(const CXXMemberCallExpr *SelfCall,
const Expr *Other) {
const Expr *Other, bool hasBeenSubstituted) {
const CXXMethodDecl *MD = SelfCall->getMethodDecl();

// The callee member function must be a const function with no parameter:
Expand All @@ -614,7 +642,8 @@ struct CompatibleCountExprVisitor
dyn_cast<CXXMemberCallExpr>(Other->IgnoreParenImpCasts())) {
return OtherCall->getMethodDecl() == MD &&
Visit(SelfCall->getImplicitObjectArgument(),
OtherCall->getImplicitObjectArgument());
OtherCall->getImplicitObjectArgument(),
hasBeenSubstituted);
}
}
return false;
Expand Down Expand Up @@ -660,7 +689,7 @@ bool isCompatibleWithCountExpr(const Expr *E, const Expr *ExpectedCountExpr,
const DependentValuesTy *DependentValues,
ASTContext &Ctx) {
CompatibleCountExprVisitor Visitor(MemberBase, DependentValues, Ctx);
return Visitor.Visit(ExpectedCountExpr, E);
return Visitor.Visit(ExpectedCountExpr, E, /* hasBeenSubstituted*/ false);
}

// Returns if a pair of expressions contain method calls to .data()/.c_str() and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -598,3 +598,21 @@ namespace output_param_test {
};

} // namespace output_param_test


static void previous_infinite_loop(int * __counted_by(n) p, size_t n) {
previous_infinite_loop(p, n);
}

static void previous_infinite_loop2(int * __counted_by(n + 10) p, size_t n) {
previous_infinite_loop2(p, n);
}

static void previous_infinite_loop3(int * __counted_by(n + n * m) p, size_t n,
// expected-note@+1 {{consider using 'std::span' and passing '.first(...).data()' to the parameter 'q'}}
int * __counted_by(m * m + o) q,
// expected-note@+1 {{consider using a safe container and passing '.data()' to the parameter 'r' and '.size()' to its dependent parameter 'o' or 'std::span' and passing '.first(...).data()' to the parameter 'r'}}
int * __counted_by(o) r, size_t m, size_t o) {
previous_infinite_loop3(p, n, q, r, m, o);
previous_infinite_loop3(p, n, q, r, m, o + 1); // expected-warning 2{{unsafe assignment to function parameter of count-attributed type}}
}