Skip to content

[analyzer] Refine LCV handling in Store for better taint propagation #114835

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions clang/lib/StaticAnalyzer/Checkers/Taint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,14 @@ std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly);
if (const MemRegion *Reg = V.getAsRegion())
return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly);

if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
StoreManager &StoreMgr = State->getStateManager().getStoreManager();
if (auto DefaultVal = StoreMgr.getDefaultBinding(*LCV)) {
return getTaintedSymbolsImpl(State, *DefaultVal, Kind, returnFirstOnly);
}
}

return {};
}

Expand Down
26 changes: 9 additions & 17 deletions clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,23 +68,15 @@ void ExprEngine::performTrivialCopy(NodeBuilder &Bldr, ExplodedNode *Pred,
Bldr.takeNodes(Pred);

assert(ThisRD);
if (!ThisRD->isEmpty()) {
// Load the source value only for non-empty classes.
// Otherwise it'd retrieve an UnknownVal
// and bind it and RegionStore would think that the actual value
// in this region at this offset is unknown.
SVal V = Call.getArgSVal(0);

// If the value being copied is not unknown, load from its location to get
// an aggregate rvalue.
if (std::optional<Loc> L = V.getAs<Loc>())
V = Pred->getState()->getSVal(*L);
else
assert(V.isUnknownOrUndef());
evalBind(Dst, CallExpr, Pred, ThisVal, V, true);
} else {
Dst.Add(Pred);
}
SVal V = Call.getArgSVal(0);

// If the value being copied is not unknown, load from its location to get
// an aggregate rvalue.
if (std::optional<Loc> L = V.getAs<Loc>())
V = Pred->getState()->getSVal(*L);
else
assert(V.isUnknownOrUndef());
evalBind(Dst, CallExpr, Pred, ThisVal, V, true);

PostStmt PS(CallExpr, LCtx);
for (ExplodedNode *N : Dst) {
Expand Down
139 changes: 117 additions & 22 deletions clang/lib/StaticAnalyzer/Core/RegionStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,10 @@ class BindingKey {
isa<ObjCIvarRegion, CXXDerivedObjectRegion>(r)) &&
"Not a base");
}
public:

public:
bool isDirect() const { return P.getInt() & Direct; }
bool isDefault() const { return !isDirect(); }
bool hasSymbolicOffset() const { return P.getInt() & Symbolic; }

const MemRegion *getRegion() const { return P.getPointer(); }
Expand Down Expand Up @@ -232,27 +233,86 @@ class RegionBindingsRef : public llvm::ImmutableMapRef<const MemRegion *,

void printJson(raw_ostream &Out, const char *NL = "\n",
unsigned int Space = 0, bool IsDot = false) const {
for (iterator I = begin(), E = end(); I != E; ++I) {
// TODO: We might need a .printJson for I.getKey() as well.
using namespace llvm;
DenseMap<const MemRegion *, std::string> StringifyCache;
auto ToString = [&StringifyCache](const MemRegion *R) {
auto [Place, Inserted] = StringifyCache.try_emplace(R);
if (!Inserted)
return Place->second;
std::string Res;
raw_string_ostream OS(Res);
OS << R;
Place->second = Res;
return Res;
};

using Cluster =
std::pair<const MemRegion *, ImmutableMap<BindingKey, SVal>>;
using Binding = std::pair<BindingKey, SVal>;

const auto MemSpaceBeforeRegionName = [&ToString](const Cluster *L,
const Cluster *R) {
if (isa<MemSpaceRegion>(L->first) && !isa<MemSpaceRegion>(R->first))
return true;
if (!isa<MemSpaceRegion>(L->first) && isa<MemSpaceRegion>(R->first))
return false;
return ToString(L->first) < ToString(R->first);
};

const auto SymbolicBeforeOffset = [&ToString](const BindingKey &L,
const BindingKey &R) {
if (L.hasSymbolicOffset() && !R.hasSymbolicOffset())
return true;
if (!L.hasSymbolicOffset() && R.hasSymbolicOffset())
return false;
if (L.hasSymbolicOffset() && R.hasSymbolicOffset())
return ToString(L.getRegion()) < ToString(R.getRegion());
return L.getOffset() < R.getOffset();
};

const auto DefaultBindingBeforeDirectBindings =
[&SymbolicBeforeOffset](const Binding *LPtr, const Binding *RPtr) {
const BindingKey &L = LPtr->first;
const BindingKey &R = RPtr->first;
if (L.isDefault() && !R.isDefault())
return true;
if (!L.isDefault() && R.isDefault())
return false;
assert(L.isDefault() == R.isDefault());
return SymbolicBeforeOffset(L, R);
};

const auto AddrOf = [](const auto &Item) { return &Item; };

std::vector<const Cluster *> SortedClusters;
SortedClusters.reserve(std::distance(begin(), end()));
append_range(SortedClusters, map_range(*this, AddrOf));
llvm::sort(SortedClusters, MemSpaceBeforeRegionName);

for (auto [Idx, C] : llvm::enumerate(SortedClusters)) {
const auto &[BaseRegion, Bindings] = *C;
Indent(Out, Space, IsDot)
<< "{ \"cluster\": \"" << I.getKey() << "\", \"pointer\": \""
<< (const void *)I.getKey() << "\", \"items\": [" << NL;
<< "{ \"cluster\": \"" << BaseRegion << "\", \"pointer\": \""
<< (const void *)BaseRegion << "\", \"items\": [" << NL;

std::vector<const Binding *> SortedBindings;
SortedBindings.reserve(std::distance(Bindings.begin(), Bindings.end()));
append_range(SortedBindings, map_range(Bindings, AddrOf));
llvm::sort(SortedBindings, DefaultBindingBeforeDirectBindings);

++Space;
const ClusterBindings &CB = I.getData();
for (ClusterBindings::iterator CI = CB.begin(), CE = CB.end(); CI != CE;
++CI) {
Indent(Out, Space, IsDot) << "{ " << CI.getKey() << ", \"value\": ";
CI.getData().printJson(Out, /*AddQuotes=*/true);
for (auto [Idx, B] : llvm::enumerate(SortedBindings)) {
const auto &[Key, Value] = *B;
Indent(Out, Space, IsDot) << "{ " << Key << ", \"value\": ";
Value.printJson(Out, /*AddQuotes=*/true);
Out << " }";
if (std::next(CI) != CE)
if (Idx != SortedBindings.size() - 1)
Out << ',';
Out << NL;
}

--Space;
Indent(Out, Space, IsDot) << "]}";
if (std::next(I) != E)
if (Idx != SortedClusters.size() - 1)
Out << ',';
Out << NL;
}
Expand Down Expand Up @@ -548,6 +608,12 @@ class RegionStoreManager : public StoreManager {
return getBinding(getRegionBindings(S), L, T);
}

/// Returns the value of the default binding of region \p BaseR
/// if and only if that is the unique binding in the cluster of \p BaseR.
/// \p BaseR must be a base region.
std::optional<SVal> getUniqueDefaultBinding(Store S,
const MemRegion *BaseR) const;

std::optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override {
RegionBindingsRef B = getRegionBindings(S);
// Default bindings are always applied over a base region so look up the
Expand Down Expand Up @@ -2276,20 +2342,16 @@ NonLoc RegionStoreManager::createLazyBinding(RegionBindingsConstRef B,
return svalBuilder.makeLazyCompoundVal(StoreRef(B.asStore(), *this), R);
}

static bool isRecordEmpty(const RecordDecl *RD) {
if (!RD->field_empty())
return false;
if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD))
return CRD->getNumBases() == 0;
return true;
}

SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B,
const TypedValueRegion *R) {
const RecordDecl *RD = R->getValueType()->castAs<RecordType>()->getDecl();
if (!RD->getDefinition() || isRecordEmpty(RD))
if (!RD->getDefinition())
return UnknownVal();

// We also create a LCV for copying empty structs because then the store
// behavior doesn't depend on the struct layout.
// This way even an empty struct can carry taint, no matter if creduce drops
// the last field member or not.
return createLazyBinding(B, R);
}

Expand Down Expand Up @@ -2549,9 +2611,42 @@ RegionBindingsRef RegionStoreManager::bindVector(RegionBindingsConstRef B,
return NewB;
}

std::optional<SVal>
RegionStoreManager::getUniqueDefaultBinding(Store S,
const MemRegion *BaseR) const {
assert(BaseR == BaseR->getBaseRegion() && "Expecting a base region");
const auto *Cluster = getRegionBindings(S).lookup(BaseR);
if (!Cluster || !llvm::hasSingleElement(*Cluster))
return std::nullopt;

const auto [Key, Value] = *Cluster->begin();
return Key.isDirect() ? std::optional<SVal>{} : Value;
}

std::optional<RegionBindingsRef> RegionStoreManager::tryBindSmallStruct(
RegionBindingsConstRef B, const TypedValueRegion *R, const RecordDecl *RD,
nonloc::LazyCompoundVal LCV) {
// If we try to copy a Conjured value representing the value of the whole
// struct, don't try to element-wise copy each field.
// That would unnecessarily bind Derived symbols slicing off the subregion for
// the field from the whole Conjured symbol.
//
// struct Window { int width; int height; };
// Window getWindow(); <-- opaque fn.
// Window w = getWindow(); <-- conjures a new Window.
// Window w2 = w; <-- trivial copy "w", calling "tryBindSmallStruct"
//
// We should not end up with a new Store for "w2" like this:
// Direct [ 0..31]: Derived{Conj{}, w.width}
// Direct [32..63]: Derived{Conj{}, w.height}
// Instead, we should just bind that Conjured value instead.
if (LCV.getRegion()->getBaseRegion() == LCV.getRegion()) {
if (auto Val = getUniqueDefaultBinding(LCV.getStore(), LCV.getRegion())) {
return B.addBinding(BindingKey::Make(R, BindingKey::Default),
Val.value());
}
}

FieldVector Fields;

if (const CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(RD))
Expand Down
102 changes: 95 additions & 7 deletions clang/test/Analysis/ctor-trivial-copy.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s
// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s \
// RUN: 2>&1 | FileCheck %s


template<typename T>
void clang_analyzer_dump(T&);
void clang_analyzer_printState();
template<typename T> void clang_analyzer_dump_lref(T&);
template<typename T> void clang_analyzer_dump_val(T);
template <typename T> T conjure();
template <typename... Ts> void nop(const Ts &... args) {}

struct aggr {
int x;
Expand All @@ -15,20 +19,104 @@ struct empty {
void test_copy_return() {
aggr s1 = {1, 2};
aggr const& cr1 = aggr(s1);
clang_analyzer_dump(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }}
clang_analyzer_dump_lref(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }}

empty s2;
empty const& cr2 = empty{s2};
clang_analyzer_dump(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }}
clang_analyzer_dump_lref(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }}
}

void test_assign_return() {
aggr s1 = {1, 2};
aggr d1;
clang_analyzer_dump(d1 = s1); // expected-warning {{&d1 }}
clang_analyzer_dump_lref(d1 = s1); // expected-warning {{&d1 }}

empty s2;
empty d2;
clang_analyzer_dump(d2 = s2); // expected-warning {{&d2 }} was Unknown
clang_analyzer_dump_lref(d2 = s2); // expected-warning {{&d2 }} was Unknown
}


namespace trivial_struct_copy {

void _01_empty_structs() {
clang_analyzer_dump_val(conjure<empty>()); // expected-warning {{lazyCompoundVal}}
empty Empty = conjure<empty>();
empty Empty2 = Empty;
empty Empty3 = Empty2;
// All of these should refer to the exact same LCV, because all of
// these trivial copies refer to the original conjured value.
// There were Unknown before:
clang_analyzer_dump_val(Empty); // expected-warning {{lazyCompoundVal}}
clang_analyzer_dump_val(Empty2); // expected-warning {{lazyCompoundVal}}
clang_analyzer_dump_val(Empty3); // expected-warning {{lazyCompoundVal}}

// We should have the same Conjured symbol for "Empty", "Empty2" and "Empty3".
clang_analyzer_printState();
// CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "Empty", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" }
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "Empty2", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" }
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "Empty3", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" }
// CHECK-NEXT: ]}
// CHECK-NEXT: ]},

nop(Empty, Empty2, Empty3);
}

void _02_structs_with_members() {
clang_analyzer_dump_val(conjure<aggr>()); // expected-warning {{lazyCompoundVal}}
aggr Aggr = conjure<aggr>();
aggr Aggr2 = Aggr;
aggr Aggr3 = Aggr2;
// All of these should refer to the exact same LCV, because all of
// these trivial copies refer to the original conjured value.
clang_analyzer_dump_val(Aggr); // expected-warning {{lazyCompoundVal}}
clang_analyzer_dump_val(Aggr2); // expected-warning {{lazyCompoundVal}}
clang_analyzer_dump_val(Aggr3); // expected-warning {{lazyCompoundVal}}

// We should have the same Conjured symbol for "Aggr", "Aggr2" and "Aggr3".
// We used to have Derived symbols for the individual fields that were
// copied as part of copying the whole struct.
clang_analyzer_printState();
// CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "Aggr", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" }
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "Aggr2", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" }
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "Aggr3", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" }
// CHECK-NEXT: ]}
// CHECK-NEXT: ]},

nop(Aggr, Aggr2, Aggr3);
}

// Tests that use `clang_analyzer_printState()` must share the analysis entry
// point, and have a strict ordering between. This is to meet the different
// `clang_analyzer_printState()` calls in a fixed relative ordering, thus
// FileCheck could check the stdouts.
void entrypoint() {
_01_empty_structs();
_02_structs_with_members();
}

} // namespace trivial_struct_copy
Loading
Loading