-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[analyzer] Trigger copy event when copying empty structs (3/4) #115918
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-static-analyzer-1 Author: Balazs Benics (steakhal) ChangesPreviously, ExprEngine would just skip copying empty structs. Split from #114835 Full diff: https://github.com/llvm/llvm-project/pull/115918.diff 3 Files Affected:
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index ccc3097e8d2f97..17ee1f7c945edd 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -68,23 +68,15 @@ void ExprEngine::performTrivialCopy(NodeBuilder &Bldr, ExplodedNode *Pred,
Bldr.takeNodes(Pred);
assert(ThisRD);
- if (!ThisRD->isEmpty()) {
- // Load the source value only for non-empty classes.
- // Otherwise it'd retrieve an UnknownVal
- // and bind it and RegionStore would think that the actual value
- // in this region at this offset is unknown.
- SVal V = Call.getArgSVal(0);
-
- // If the value being copied is not unknown, load from its location to get
- // an aggregate rvalue.
- if (std::optional<Loc> L = V.getAs<Loc>())
- V = Pred->getState()->getSVal(*L);
- else
- assert(V.isUnknownOrUndef());
- evalBind(Dst, CallExpr, Pred, ThisVal, V, true);
- } else {
- Dst.Add(Pred);
- }
+ SVal V = Call.getArgSVal(0);
+
+ // If the value being copied is not unknown, load from its location to get
+ // an aggregate rvalue.
+ if (std::optional<Loc> L = V.getAs<Loc>())
+ V = Pred->getState()->getSVal(*L);
+ else
+ assert(V.isUnknownOrUndef());
+ evalBind(Dst, CallExpr, Pred, ThisVal, V, true);
PostStmt PS(CallExpr, LCtx);
for (ExplodedNode *N : Dst) {
diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index 6bad9a93a30169..b54ba43ff42414 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -608,6 +608,12 @@ class RegionStoreManager : public StoreManager {
return getBinding(getRegionBindings(S), L, T);
}
+ /// Returns the value of the default binding of region \p BaseR
+ /// if and only if that is the unique binding in the cluster of \p BaseR.
+ /// \p BaseR must be a base region.
+ std::optional<SVal> getUniqueDefaultBinding(Store S,
+ const MemRegion *BaseR) const;
+
std::optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override {
RegionBindingsRef B = getRegionBindings(S);
// Default bindings are always applied over a base region so look up the
@@ -2336,20 +2342,16 @@ NonLoc RegionStoreManager::createLazyBinding(RegionBindingsConstRef B,
return svalBuilder.makeLazyCompoundVal(StoreRef(B.asStore(), *this), R);
}
-static bool isRecordEmpty(const RecordDecl *RD) {
- if (!RD->field_empty())
- return false;
- if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD))
- return CRD->getNumBases() == 0;
- return true;
-}
-
SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B,
const TypedValueRegion *R) {
const RecordDecl *RD = R->getValueType()->castAs<RecordType>()->getDecl();
- if (!RD->getDefinition() || isRecordEmpty(RD))
+ if (!RD->getDefinition())
return UnknownVal();
+ // We also create a LCV for copying empty structs because then the store
+ // behavior doesn't depend on the struct layout.
+ // This way even an empty struct can carry taint, no matter if creduce drops
+ // the last field member or not.
return createLazyBinding(B, R);
}
@@ -2609,9 +2611,42 @@ RegionBindingsRef RegionStoreManager::bindVector(RegionBindingsConstRef B,
return NewB;
}
+std::optional<SVal>
+RegionStoreManager::getUniqueDefaultBinding(Store S,
+ const MemRegion *BaseR) const {
+ assert(BaseR == BaseR->getBaseRegion() && "Expecting a base region");
+ const auto *Cluster = getRegionBindings(S).lookup(BaseR);
+ if (!Cluster || !llvm::hasSingleElement(*Cluster))
+ return std::nullopt;
+
+ const auto [Key, Value] = *Cluster->begin();
+ return Key.isDirect() ? std::optional<SVal>{} : Value;
+}
+
std::optional<RegionBindingsRef> RegionStoreManager::tryBindSmallStruct(
RegionBindingsConstRef B, const TypedValueRegion *R, const RecordDecl *RD,
nonloc::LazyCompoundVal LCV) {
+ // If we try to copy a Conjured value representing the value of the whole
+ // struct, don't try to element-wise copy each field.
+ // That would unnecessarily bind Derived symbols slicing off the subregion for
+ // the field from the whole Conjured symbol.
+ //
+ // struct Window { int width; int height; };
+ // Window getWindow(); <-- opaque fn.
+ // Window w = getWindow(); <-- conjures a new Window.
+ // Window w2 = w; <-- trivial copy "w", calling "tryBindSmallStruct"
+ //
+ // We should not end up with a new Store for "w2" like this:
+ // Direct [ 0..31]: Derived{Conj{}, w.width}
+ // Direct [32..63]: Derived{Conj{}, w.height}
+ // Instead, we should just bind that Conjured value instead.
+ if (LCV.getRegion()->getBaseRegion() == LCV.getRegion()) {
+ if (auto Val = getUniqueDefaultBinding(LCV.getStore(), LCV.getRegion())) {
+ return B.addBinding(BindingKey::Make(R, BindingKey::Default),
+ Val.value());
+ }
+ }
+
FieldVector Fields;
if (const CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(RD))
diff --git a/clang/test/Analysis/ctor-trivial-copy.cpp b/clang/test/Analysis/ctor-trivial-copy.cpp
index 5ed188aa8f1eae..41d0d97161bba1 100644
--- a/clang/test/Analysis/ctor-trivial-copy.cpp
+++ b/clang/test/Analysis/ctor-trivial-copy.cpp
@@ -1,8 +1,12 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s \
+// RUN: 2>&1 | FileCheck %s
-template<typename T>
-void clang_analyzer_dump(T&);
+void clang_analyzer_printState();
+template<typename T> void clang_analyzer_dump_lref(T&);
+template<typename T> void clang_analyzer_dump_val(T);
+template <typename T> T conjure();
+template <typename... Ts> void nop(const Ts &... args) {}
struct aggr {
int x;
@@ -15,20 +19,104 @@ struct empty {
void test_copy_return() {
aggr s1 = {1, 2};
aggr const& cr1 = aggr(s1);
- clang_analyzer_dump(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }}
+ clang_analyzer_dump_lref(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }}
empty s2;
empty const& cr2 = empty{s2};
- clang_analyzer_dump(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }}
+ clang_analyzer_dump_lref(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }}
}
void test_assign_return() {
aggr s1 = {1, 2};
aggr d1;
- clang_analyzer_dump(d1 = s1); // expected-warning {{&d1 }}
+ clang_analyzer_dump_lref(d1 = s1); // expected-warning {{&d1 }}
empty s2;
empty d2;
- clang_analyzer_dump(d2 = s2); // expected-warning {{&d2 }} was Unknown
+ clang_analyzer_dump_lref(d2 = s2); // expected-warning {{&d2 }} was Unknown
}
+
+namespace trivial_struct_copy {
+
+void _01_empty_structs() {
+ clang_analyzer_dump_val(conjure<empty>()); // expected-warning {{lazyCompoundVal}}
+ empty Empty = conjure<empty>();
+ empty Empty2 = Empty;
+ empty Empty3 = Empty2;
+ // All of these should refer to the exact same LCV, because all of
+ // these trivial copies refer to the original conjured value.
+ // There were Unknown before:
+ clang_analyzer_dump_val(Empty); // expected-warning {{lazyCompoundVal}}
+ clang_analyzer_dump_val(Empty2); // expected-warning {{lazyCompoundVal}}
+ clang_analyzer_dump_val(Empty3); // expected-warning {{lazyCompoundVal}}
+
+ // We should have the same Conjured symbol for "Empty", "Empty2" and "Empty3".
+ clang_analyzer_printState();
+ // CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "Empty", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" }
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "Empty2", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" }
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "Empty3", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" }
+ // CHECK-NEXT: ]}
+ // CHECK-NEXT: ]},
+
+ nop(Empty, Empty2, Empty3);
+}
+
+void _02_structs_with_members() {
+ clang_analyzer_dump_val(conjure<aggr>()); // expected-warning {{lazyCompoundVal}}
+ aggr Aggr = conjure<aggr>();
+ aggr Aggr2 = Aggr;
+ aggr Aggr3 = Aggr2;
+ // All of these should refer to the exact same LCV, because all of
+ // these trivial copies refer to the original conjured value.
+ clang_analyzer_dump_val(Aggr); // expected-warning {{lazyCompoundVal}}
+ clang_analyzer_dump_val(Aggr2); // expected-warning {{lazyCompoundVal}}
+ clang_analyzer_dump_val(Aggr3); // expected-warning {{lazyCompoundVal}}
+
+ // We should have the same Conjured symbol for "Aggr", "Aggr2" and "Aggr3".
+ // We used to have Derived symbols for the individual fields that were
+ // copied as part of copying the whole struct.
+ clang_analyzer_printState();
+ // CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "Aggr", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" }
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "Aggr2", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" }
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "Aggr3", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" }
+ // CHECK-NEXT: ]}
+ // CHECK-NEXT: ]},
+
+ nop(Aggr, Aggr2, Aggr3);
+}
+
+// Tests that use `clang_analyzer_printState()` must share the analysis entry
+// point, and have a strict ordering between. This is to meet the different
+// `clang_analyzer_printState()` calls in a fixed relative ordering, thus
+// FileCheck could check the stdouts.
+void entrypoint() {
+ _01_empty_structs();
+ _02_structs_with_members();
+}
+
+} // namespace trivial_struct_copy
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, clean little patch, I don't have anything to say here.
Previously, ExprEngine would just skip copying empty structs. Let's make trigger the copy event even for empty structs.
Previously, ExprEngine would just skip copying empty structs.
Let's make trigger the copy event even for empty structs.
Split from #114835