Skip to content

[clang][analyzer] Stable order for SymbolRef-keyed containers #121551

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ namespace ento {

class MemRegion;

using SymbolID = unsigned;

/// Symbolic value. These values used to capture symbolic execution of
/// the program.
class SymExpr : public llvm::FoldingSetNode {
Expand All @@ -39,9 +41,19 @@ class SymExpr : public llvm::FoldingSetNode {

private:
Kind K;
/// A unique identifier for this symbol.
///
/// It is useful for SymbolData to easily differentiate multiple symbols, but
/// also for "ephemeral" symbols, such as binary operations, because this id
/// can be used for arranging constraints or equivalence classes instead of
/// unstable pointer values.
///
/// Note, however, that it can't be used in Profile because SymbolManager
/// needs to compute Profile before allocating SymExpr.
const SymbolID Sym;

protected:
SymExpr(Kind k) : K(k) {}
SymExpr(Kind k, SymbolID Sym) : K(k), Sym(Sym) {}

static bool isValidTypeForSymbol(QualType T) {
// FIXME: Depending on whether we choose to deprecate structural symbols,
Expand All @@ -56,6 +68,14 @@ class SymExpr : public llvm::FoldingSetNode {

Kind getKind() const { return K; }

/// Get a unique identifier for this symbol.
/// The ID is unique across all SymExprs in a SymbolManager.
/// They reflect the allocation order of these SymExprs,
/// and are likely stable across runs.
/// Used as a key in SymbolRef containers and as part of identity
/// for SymbolData, e.g. SymbolConjured with ID = 7 is "conj_$7".
SymbolID getSymbolID() const { return Sym; }

virtual void dump() const;

virtual void dumpToStream(raw_ostream &os) const {}
Expand Down Expand Up @@ -112,28 +132,21 @@ inline raw_ostream &operator<<(raw_ostream &os,

using SymbolRef = const SymExpr *;
using SymbolRefSmallVectorTy = SmallVector<SymbolRef, 2>;
using SymbolID = unsigned;

/// A symbol representing data which can be stored in a memory location
/// (region).
class SymbolData : public SymExpr {
const SymbolID Sym;

void anchor() override;

protected:
SymbolData(Kind k, SymbolID sym) : SymExpr(k), Sym(sym) {
assert(classof(this));
}
SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym) { assert(classof(this)); }

public:
~SymbolData() override = default;

/// Get a string representation of the kind of the region.
virtual StringRef getKindStr() const = 0;

SymbolID getSymbolID() const { return Sym; }

unsigned computeComplexity() const override {
return 1;
};
Expand Down
100 changes: 78 additions & 22 deletions clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/ImmutableSet.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Allocator.h"
#include <cassert>
Expand All @@ -43,15 +44,16 @@ class StoreManager;
class SymbolRegionValue : public SymbolData {
const TypedValueRegion *R;

public:
friend class SymExprAllocator;
SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
: SymbolData(SymbolRegionValueKind, sym), R(r) {
assert(r);
assert(isValidTypeForSymbol(r->getValueType()));
}

public:
LLVM_ATTRIBUTE_RETURNS_NONNULL
const TypedValueRegion* getRegion() const { return R; }
const TypedValueRegion *getRegion() const { return R; }

static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) {
profile.AddInteger((unsigned) SymbolRegionValueKind);
Expand Down Expand Up @@ -84,7 +86,7 @@ class SymbolConjured : public SymbolData {
const LocationContext *LCtx;
const void *SymbolTag;

public:
friend class SymExprAllocator;
SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
QualType t, unsigned count, const void *symbolTag)
: SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count),
Expand All @@ -98,6 +100,7 @@ class SymbolConjured : public SymbolData {
assert(isValidTypeForSymbol(t));
}

public:
/// It might return null.
const Stmt *getStmt() const { return S; }
unsigned getCount() const { return Count; }
Expand Down Expand Up @@ -137,14 +140,15 @@ class SymbolDerived : public SymbolData {
SymbolRef parentSymbol;
const TypedValueRegion *R;

public:
friend class SymExprAllocator;
SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
: SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {
assert(parent);
assert(r);
assert(isValidTypeForSymbol(r->getValueType()));
}

public:
LLVM_ATTRIBUTE_RETURNS_NONNULL
SymbolRef getParentSymbol() const { return parentSymbol; }
LLVM_ATTRIBUTE_RETURNS_NONNULL
Expand Down Expand Up @@ -180,12 +184,13 @@ class SymbolDerived : public SymbolData {
class SymbolExtent : public SymbolData {
const SubRegion *R;

public:
friend class SymExprAllocator;
SymbolExtent(SymbolID sym, const SubRegion *r)
: SymbolData(SymbolExtentKind, sym), R(r) {
assert(r);
}

public:
LLVM_ATTRIBUTE_RETURNS_NONNULL
const SubRegion *getRegion() const { return R; }

Expand Down Expand Up @@ -222,7 +227,7 @@ class SymbolMetadata : public SymbolData {
unsigned Count;
const void *Tag;

public:
friend class SymExprAllocator;
SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
const LocationContext *LCtx, unsigned count, const void *tag)
: SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
Expand All @@ -234,6 +239,7 @@ class SymbolMetadata : public SymbolData {
assert(tag);
}

public:
LLVM_ATTRIBUTE_RETURNS_NONNULL
const MemRegion *getRegion() const { return R; }

Expand Down Expand Up @@ -286,15 +292,16 @@ class SymbolCast : public SymExpr {
/// The type of the result.
QualType ToTy;

public:
SymbolCast(const SymExpr *In, QualType From, QualType To)
: SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) {
friend class SymExprAllocator;
SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To)
: SymExpr(SymbolCastKind, Sym), Operand(In), FromTy(From), ToTy(To) {
assert(In);
assert(isValidTypeForSymbol(From));
// FIXME: GenericTaintChecker creates symbols of void type.
// Otherwise, 'To' should also be a valid type.
}

public:
unsigned computeComplexity() const override {
if (Complexity == 0)
Complexity = 1 + Operand->computeComplexity();
Expand Down Expand Up @@ -332,9 +339,10 @@ class UnarySymExpr : public SymExpr {
UnaryOperator::Opcode Op;
QualType T;

public:
UnarySymExpr(const SymExpr *In, UnaryOperator::Opcode Op, QualType T)
: SymExpr(UnarySymExprKind), Operand(In), Op(Op), T(T) {
friend class SymExprAllocator;
UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op,
QualType T)
: SymExpr(UnarySymExprKind, Sym), Operand(In), Op(Op), T(T) {
// Note, some unary operators are modeled as a binary operator. E.g. ++x is
// modeled as x + 1.
assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression");
Expand All @@ -345,6 +353,7 @@ class UnarySymExpr : public SymExpr {
assert(!Loc::isLocType(T) && "unary symbol should be nonloc");
}

public:
unsigned computeComplexity() const override {
if (Complexity == 0)
Complexity = 1 + Operand->computeComplexity();
Expand Down Expand Up @@ -381,8 +390,8 @@ class BinarySymExpr : public SymExpr {
QualType T;

protected:
BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t)
: SymExpr(k), Op(op), T(t) {
BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t)
: SymExpr(k, Sym), Op(op), T(t) {
assert(classof(this));
// Binary expressions are results of arithmetic. Pointer arithmetic is not
// handled by binary expressions, but it is instead handled by applying
Expand Down Expand Up @@ -425,14 +434,15 @@ class BinarySymExprImpl : public BinarySymExpr {
LHSTYPE LHS;
RHSTYPE RHS;

public:
BinarySymExprImpl(LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs,
QualType t)
: BinarySymExpr(ClassKind, op, t), LHS(lhs), RHS(rhs) {
friend class SymExprAllocator;
BinarySymExprImpl(SymbolID Sym, LHSTYPE lhs, BinaryOperator::Opcode op,
RHSTYPE rhs, QualType t)
: BinarySymExpr(Sym, ClassKind, op, t), LHS(lhs), RHS(rhs) {
assert(getPointer(lhs));
assert(getPointer(rhs));
}

public:
void dumpToStream(raw_ostream &os) const override {
dumpToStreamImpl(os, LHS);
dumpToStreamImpl(os, getOpcode());
Expand Down Expand Up @@ -478,6 +488,21 @@ using IntSymExpr = BinarySymExprImpl<APSIntPtr, const SymExpr *,
using SymSymExpr = BinarySymExprImpl<const SymExpr *, const SymExpr *,
SymExpr::Kind::SymSymExprKind>;

class SymExprAllocator {
SymbolID NextSymbolID = 0;
llvm::BumpPtrAllocator &Alloc;

public:
explicit SymExprAllocator(llvm::BumpPtrAllocator &Alloc) : Alloc(Alloc) {}

template <class SymT, typename... ArgsT> SymT *make(ArgsT &&...Args) {
return new (Alloc) SymT(nextID(), std::forward<ArgsT>(Args)...);
}

private:
SymbolID nextID() { return NextSymbolID++; }
};

class SymbolManager {
using DataSetTy = llvm::FoldingSet<SymExpr>;
using SymbolDependTy =
Expand All @@ -489,15 +514,14 @@ class SymbolManager {
/// alive as long as the key is live.
SymbolDependTy SymbolDependencies;

unsigned SymbolCounter = 0;
llvm::BumpPtrAllocator& BPAlloc;
SymExprAllocator Alloc;
BasicValueFactory &BV;
ASTContext &Ctx;

public:
SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
llvm::BumpPtrAllocator& bpalloc)
: SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
llvm::BumpPtrAllocator &bpalloc)
: SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx) {}

static bool canSymbolicate(QualType T);

Expand Down Expand Up @@ -687,4 +711,36 @@ class SymbolVisitor {

} // namespace clang

// Override the default definition that would use pointer values of SymbolRefs
// to order them, which is unstable due to ASLR.
// Use the SymbolID instead which reflect the order in which the symbols were
// allocated. This is usually stable across runs leading to the stability of
// ConstraintMap and other containers using SymbolRef as keys.
template <>
struct ::llvm::ImutContainerInfo<clang::ento::SymbolRef>
: public ImutProfileInfo<clang::ento::SymbolRef> {
using value_type = clang::ento::SymbolRef;
using value_type_ref = clang::ento::SymbolRef;
using key_type = value_type;
using key_type_ref = value_type_ref;
using data_type = bool;
using data_type_ref = bool;

static key_type_ref KeyOfValue(value_type_ref D) { return D; }
static data_type_ref DataOfValue(value_type_ref) { return true; }

static bool isEqual(clang::ento::SymbolRef LHS, clang::ento::SymbolRef RHS) {
return LHS->getSymbolID() == RHS->getSymbolID();
}

static bool isLess(clang::ento::SymbolRef LHS, clang::ento::SymbolRef RHS) {
return LHS->getSymbolID() < RHS->getSymbolID();
}

// This might seem redundant, but it is required because of the way
// ImmutableSet is implemented through AVLTree:
// same as ImmutableMap, but with a non-informative "data".
static bool isDataEqual(data_type_ref, data_type_ref) { return true; }
};

#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
Loading
Loading