Skip to content

[clang][analyzer]Add C++ polymorphic ptr arithmetic checker #82977

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions clang/docs/analyzer/checkers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,39 @@ store combinations of flag values:

Projects that use this pattern should not enable this optin checker.

.. _optin-cplusplus-PolymorphicPtrArithmetic:

optin.cplusplus.PolymorphicPtrArithmetic (C++)
""""""""""""""""""""""""""""""""""""""""""""""

This checker reports pointer arithmetic operations on arrays of polymorphic
objects, where the array has the type of its base class.
Deleting an array where the array's static type is different from its dynamic
type is undefined.

This checker corresponds to the CERT rule `CTR56-CPP: Do not use pointer arithmetic on polymorphic objects <https://wiki.sei.cmu.edu/confluence/display/cplusplus/CTR56-CPP.+Do+not+use+pointer+arithmetic+on+polymorphic+objects>`_.

.. code-block:: cpp

class Base {
public:
int member = 0;
virtual ~Base() {}
};
class Derived : public Base {}

Base *create() {
Base *x = new Derived[10]; // note: Casting from 'Derived' to 'Base' here
return x;
}

void foo() {
Base *x = create();
(x + 3)->member += 1; // warn: Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined
x[3].member += 1; // warn: Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined
delete[] static_cast<Derived*>(x);
}

.. _optin-cplusplus-UninitializedObject:

optin.cplusplus.UninitializedObject (C++)
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,11 @@ def PureVirtualCallChecker : Checker<"PureVirtualCall">,

let ParentPackage = CplusplusOptIn in {

def PolymorphicPtrArithmeticChecker : Checker<"PolymorphicPtrArithmetic">,
HelpText<"Reports doing pointer arithmetic on arrays of polymorphic objects "
"with the type of their base class">,
Documentation<HasDocumentation>;

def UninitializedObjectChecker: Checker<"UninitializedObject">,
HelpText<"Reports uninitialized fields after object construction">,
CheckerOptions<[
Expand Down
1 change: 1 addition & 0 deletions clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ add_clang_library(clangStaticAnalyzerCheckers
ContainerModeling.cpp
ConversionChecker.cpp
CXXDeleteChecker.cpp
PolymorphicPtrArithmetic.cpp
CXXSelfAssignmentChecker.cpp
DeadStoresChecker.cpp
DebugCheckers.cpp
Expand Down
178 changes: 178 additions & 0 deletions clang/lib/StaticAnalyzer/Checkers/PolymorphicPtrArithmetic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
//=== PolymorphicPtrArithmetic.cpp ------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This checker reports pointer arithmetic operations on arrays of
// polymorphic objects, where the array has the type of its base class.
// Corresponds to the CTR56-CPP. Do not use pointer arithmetic on
// polymorphic objects
//
//===----------------------------------------------------------------------===//

#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"

using namespace clang;
using namespace ento;

namespace {
class PolymorphicPtrArithmeticChecker
: public Checker<check::PreStmt<BinaryOperator>,
check::PreStmt<ArraySubscriptExpr>> {
const BugType BT{this,
"Pointer arithmetic on polymorphic objects is undefined"};

protected:
class PtrCastVisitor : public BugReporterVisitor {
public:
void Profile(llvm::FoldingSetNodeID &ID) const override {
static int X = 0;
ID.AddPointer(&X);
}
PathDiagnosticPieceRef VisitNode(const ExplodedNode *N,
BugReporterContext &BRC,
PathSensitiveBugReport &BR) override;
};

void checkTypedExpr(const Expr *E, CheckerContext &C) const;

public:
void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
void checkPreStmt(const ArraySubscriptExpr *B, CheckerContext &C) const;
};
} // namespace

void PolymorphicPtrArithmeticChecker::checkPreStmt(const BinaryOperator *B,
CheckerContext &C) const {
if (!B->isAdditiveOp())
return;

bool IsLHSPtr = B->getLHS()->getType()->isAnyPointerType();
bool IsRHSPtr = B->getRHS()->getType()->isAnyPointerType();
if (!IsLHSPtr && !IsRHSPtr)
return;

const Expr *E = IsLHSPtr ? B->getLHS() : B->getRHS();

checkTypedExpr(E, C);
Comment on lines +62 to +69
Copy link
Contributor

@NagyDonat NagyDonat Feb 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bool IsLHSPtr = B->getLHS()->getType()->isAnyPointerType();
bool IsRHSPtr = B->getRHS()->getType()->isAnyPointerType();
if (!IsLHSPtr && !IsRHSPtr)
return;
const Expr *E = IsLHSPtr ? B->getLHS() : B->getRHS();
checkTypedExpr(E, C);
if (B->getLHS()->getType()->isAnyPointerType())
checkTypedExpr(B->getLHS(), C);
if (B->getRHS()->getType()->isAnyPointerType())
checkTypedExpr(B->getRHS(), C);

If the additive operator is a subtraction, then it's possible that both operands are pointers that should be checked.

}

void PolymorphicPtrArithmeticChecker::checkPreStmt(const ArraySubscriptExpr *B,
CheckerContext &C) const {
bool IsLHSPtr = B->getLHS()->getType()->isAnyPointerType();
bool IsRHSPtr = B->getRHS()->getType()->isAnyPointerType();
if (!IsLHSPtr && !IsRHSPtr)
return;

const Expr *E = IsLHSPtr ? B->getLHS() : B->getRHS();

checkTypedExpr(E, C);
Comment on lines +74 to +81
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bool IsLHSPtr = B->getLHS()->getType()->isAnyPointerType();
bool IsRHSPtr = B->getRHS()->getType()->isAnyPointerType();
if (!IsLHSPtr && !IsRHSPtr)
return;
const Expr *E = IsLHSPtr ? B->getLHS() : B->getRHS();
checkTypedExpr(E, C);
checkTypedExpr(B->getBase(), C);

Array subscript expressions have a method getBase() which returns the pointer operand (either the LHS or the RHS).

}

void PolymorphicPtrArithmeticChecker::checkTypedExpr(const Expr *E,
CheckerContext &C) const {
const MemRegion *MR = C.getSVal(E).getAsRegion();
if (!MR)
return;

const auto *BaseClassRegion = MR->getAs<TypedValueRegion>();
const auto *DerivedClassRegion = MR->getBaseRegion()->getAs<SymbolicRegion>();
if (!BaseClassRegion || !DerivedClassRegion)
return;

const auto *BaseClass = BaseClassRegion->getValueType()->getAsCXXRecordDecl();
const auto *DerivedClass =
DerivedClassRegion->getSymbol()->getType()->getPointeeCXXRecordDecl();
if (!BaseClass || !DerivedClass)
return;

if (!BaseClass->hasDefinition() || !DerivedClass->hasDefinition())
return;

if (!DerivedClass->isDerivedFrom(BaseClass))
return;

ExplodedNode *N = C.generateNonFatalErrorNode();
if (!N)
return;

SmallString<256> Buf;
llvm::raw_svector_ostream OS(Buf);

QualType SourceType = BaseClassRegion->getValueType();
QualType TargetType =
DerivedClassRegion->getSymbol()->getType()->getPointeeType();

OS << "Doing pointer arithmetic with '" << TargetType.getAsString()
<< "' objects as their base class '"
<< SourceType.getAsString(C.getASTContext().getPrintingPolicy())
Comment on lines +118 to +120
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for using the printing policy on just one of the two getAsString() calls?

<< "' is undefined";

auto R = std::make_unique<PathSensitiveBugReport>(BT, OS.str(), N);

// Mark region of problematic base class for later use in the BugVisitor.
R->markInteresting(BaseClassRegion);
R->addVisitor<PtrCastVisitor>();
C.emitReport(std::move(R));
}

PathDiagnosticPieceRef
PolymorphicPtrArithmeticChecker::PtrCastVisitor::VisitNode(
const ExplodedNode *N, BugReporterContext &BRC,
PathSensitiveBugReport &BR) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
PathSensitiveBugReport &BR) {
PathSensitiveBugReport &BR) {
// Only emit this diagnostic piece on reports created by this checker
if (BR.getBugType() != &BT)
return;

I presume that this path diagnostic piece should not appear on bug reports emitted by other checkers (a derived -> base cast is not especially important for most situations); and the address of the bug type acts as a convenient identifier to ensure this.

const Stmt *S = N->getStmtForDiagnostics();
if (!S)
return nullptr;

const auto *CastE = dyn_cast<CastExpr>(S);
if (!CastE)
return nullptr;

// FIXME: This way of getting base types does not support reference types.
QualType SourceType = CastE->getSubExpr()->getType()->getPointeeType();
QualType TargetType = CastE->getType()->getPointeeType();

if (SourceType.isNull() || TargetType.isNull() || SourceType == TargetType)
return nullptr;

// Region associated with the current cast expression.
const MemRegion *M = N->getSVal(CastE).getAsRegion();
if (!M)
return nullptr;

// Check if target region was marked as problematic previously.
if (!BR.isInteresting(M))
return nullptr;

SmallString<256> Buf;
llvm::raw_svector_ostream OS(Buf);

OS << "Casting from '" << SourceType.getAsString() << "' to '"
<< TargetType.getAsString() << "' here";

PathDiagnosticLocation Pos(S, BRC.getSourceManager(),
N->getLocationContext());
return std::make_shared<PathDiagnosticEventPiece>(Pos, OS.str(),
/*addPosRange=*/true);
}

void ento::registerPolymorphicPtrArithmeticChecker(CheckerManager &mgr) {
mgr.registerChecker<PolymorphicPtrArithmeticChecker>();
}

bool ento::shouldRegisterPolymorphicPtrArithmeticChecker(
const CheckerManager &mgr) {
return true;
}
141 changes: 141 additions & 0 deletions clang/test/Analysis/PolymorphicPtrArithmetic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// RUN: %clang_cc1 -analyze -analyzer-checker=optin.cplusplus.PolymorphicPtrArithmetic -std=c++11 -verify -analyzer-output=text %s

struct Base {
int x = 0;
virtual ~Base() = default;
};

struct Derived : public Base {};

struct DoubleDerived : public Derived {};

Derived *get();

Base *create() {
Base *b = new Derived[3]; // expected-note{{Casting from 'Derived' to 'Base' here}}
return b;
}

void sink(Base *b) {
b[1].x++; // expected-warning{{Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined}}
// expected-note@-1{{Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined}}
delete[] static_cast<Derived*>(b);
}

void sink_cast(Base *b) {
static_cast<Derived*>(b)[1].x++; // no-warning
delete[] static_cast<Derived*>(b);
}

void sink_derived(Derived *d) {
d[1].x++; // no-warning
delete[] static_cast<Derived*>(d);
}

void same_function() {
Base *sd = new Derived[10]; // expected-note{{Casting from 'Derived' to 'Base' here}}
// expected-note@-1{{Casting from 'Derived' to 'Base' here}}
sd[1].x++; // expected-warning{{Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined}}
// expected-note@-1{{Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined}}
(sd + 1)->x++; // expected-warning{{Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined}}
// expected-note@-1{{Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined}}
delete[] static_cast<Derived*>(sd);

Base *dd = new DoubleDerived[10]; // expected-note{{Casting from 'DoubleDerived' to 'Base' here}}
dd[1].x++; // expected-warning{{Doing pointer arithmetic with 'DoubleDerived' objects as their base class 'Base' is undefined}}
// expected-note@-1{{Doing pointer arithmetic with 'DoubleDerived' objects as their base class 'Base' is undefined}}
delete[] static_cast<DoubleDerived*>(dd);

}

void different_function() {
Base *assigned = get(); // expected-note{{Casting from 'Derived' to 'Base' here}}
assigned[1].x++; // expected-warning{{Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined}}
// expected-note@-1{{Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined}}
delete[] static_cast<Derived*>(assigned);

Base *indirect;
indirect = get(); // expected-note{{Casting from 'Derived' to 'Base' here}}
indirect[1].x++; // expected-warning{{Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined}}
// expected-note@-1{{Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined}}
delete[] static_cast<Derived*>(indirect);

Base *created = create(); // expected-note{{Calling 'create'}}
// expected-note@-1{{Returning from 'create'}}
created[1].x++; // expected-warning{{Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined}}
// expected-note@-1{{Doing pointer arithmetic with 'Derived' objects as their base class 'Base' is undefined}}
delete[] static_cast<Derived*>(created);

Base *sb = new Derived[10]; // expected-note{{Casting from 'Derived' to 'Base' here}}
sink(sb); // expected-note{{Calling 'sink'}}
}

void safe_function() {
Derived *d = new Derived[10];
d[1].x++; // no-warning
delete[] static_cast<Derived*>(d);

Base *b = new Derived[10];
static_cast<Derived*>(b)[1].x++; // no-warning
delete[] static_cast<Derived*>(b);

Base *sb = new Derived[10];
sink_cast(sb); // no-warning

Derived *sd = new Derived[10];
sink_derived(sd); // no-warning
}

void multiple_derived() {
Base *b = new DoubleDerived[10]; // expected-note{{Casting from 'DoubleDerived' to 'Base' here}}
b[1].x++; // expected-warning{{Doing pointer arithmetic with 'DoubleDerived' objects as their base class 'Base' is undefined}}
// expected-note@-1{{Doing pointer arithmetic with 'DoubleDerived' objects as their base class 'Base' is undefined}}
delete[] static_cast<DoubleDerived*>(b);

Base *b2 = new DoubleDerived[10]; // expected-note{{Casting from 'DoubleDerived' to 'Base' here}}
Derived *d2 = static_cast<Derived*>(b2); // expected-note{{Casting from 'Base' to 'Derived' here}}
d2[1].x++; // expected-warning{{Doing pointer arithmetic with 'DoubleDerived' objects as their base class 'Derived' is undefined}}
// expected-note@-1{{Doing pointer arithmetic with 'DoubleDerived' objects as their base class 'Derived' is undefined}}
delete[] static_cast<DoubleDerived*>(d2);

Derived *d3 = new DoubleDerived[10]; // expected-note{{Casting from 'DoubleDerived' to 'Derived' here}}
Base *b3 = d3; // expected-note{{Casting from 'Derived' to 'Base' here}}
b3[1].x++; // expected-warning{{Doing pointer arithmetic with 'DoubleDerived' objects as their base class 'Base' is undefined}}
// expected-note@-1{{Doing pointer arithmetic with 'DoubleDerived' objects as their base class 'Base' is undefined}}
delete[] static_cast<DoubleDerived*>(b3);

Base *b4 = new DoubleDerived[10];
Derived *d4 = static_cast<Derived*>(b4);
DoubleDerived *dd4 = static_cast<DoubleDerived*>(d4);
dd4[1].x++; // no-warning
delete[] static_cast<DoubleDerived*>(dd4);

Base *b5 = new DoubleDerived[10]; // expected-note{{Casting from 'DoubleDerived' to 'Base' here}}
DoubleDerived *dd5 = static_cast<DoubleDerived*>(b5); // expected-note{{Casting from 'Base' to 'DoubleDerived' here}}
Derived *d5 = dd5; // expected-note{{Casting from 'DoubleDerived' to 'Derived' here}}
d5[1].x++; // expected-warning{{Doing pointer arithmetic with 'DoubleDerived' objects as their base class 'Derived' is undefined}}
// expected-note@-1{{Doing pointer arithmetic with 'DoubleDerived' objects as their base class 'Derived' is undefined}}
delete[] static_cast<DoubleDerived*>(d5);
}

void other_operators() {
Derived *d = new Derived[10];
Base *b1 = d;
Base *b2 = d + 1;
Base *b3 = new Derived[10];
if (b1 < b2) return; // no-warning
if (b1 == b3) return; // no-warning
}

void unrelated_casts() {
Base *b = new DoubleDerived[10]; // expected-note{{Casting from 'DoubleDerived' to 'Base' here}}
Base &b2 = *b; // no-note: See the FIXME.

// FIXME: Displaying casts of reference types is not supported.
Derived &d2 = static_cast<Derived&>(b2); // no-note: See the FIXME.

Derived *d = &d2; // no-note: See the FIXME.
d[1].x++; // expected-warning{{Doing pointer arithmetic with 'DoubleDerived' objects as their base class 'Derived' is undefined}}
// expected-note@-1{{Doing pointer arithmetic with 'DoubleDerived' objects as their base class 'Derived' is undefined}}
delete[] static_cast<DoubleDerived*>(d);
}