-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[clang][dataflow] Add new join
API and replace existing merge
implementations.
#80361
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-analysis @llvm/pr-subscribers-clang Author: Yitzhak Mandelbaum (ymand) ChangesThis patch adds a new interface for the join operation, now properly called In the process, it drops an odd (and often misused) aspect of Full diff: https://github.com/llvm/llvm-project/pull/80361.diff 4 Files Affected:
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index 1543f900e401d..79100cccafccf 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -19,7 +19,6 @@
#include "clang/AST/DeclBase.h"
#include "clang/AST/Expr.h"
#include "clang/AST/Type.h"
-#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
#include "clang/Analysis/FlowSensitive/Formula.h"
@@ -31,7 +30,6 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include <memory>
#include <type_traits>
#include <utility>
@@ -81,6 +79,8 @@ class Environment {
return ComparisonResult::Unknown;
}
+ /// DEPRECATED. Override `join` and/or `widen`, instead.
+ ///
/// Modifies `MergedVal` to approximate both `Val1` and `Val2`. This could
/// be a strict lattice join or a more general widening operation.
///
@@ -105,6 +105,27 @@ class Environment {
return true;
}
+ /// Modifies `JoinedVal` to approximate both `Val1` and `Val2`. This should
+ /// obey the properties of a lattice join.
+ ///
+ /// `Env1` and `Env2` can be used to query child values and path condition
+ /// implications of `Val1` and `Val2` respectively.
+ ///
+ /// Requirements:
+ ///
+ /// `Val1` and `Val2` must be distinct.
+ ///
+ /// `Val1`, `Val2`, and `JoinedVal` must model values of type `Type`.
+ ///
+ /// `Val1` and `Val2` must be assigned to the same storage location in
+ /// `Env1` and `Env2` respectively.
+ virtual void join(QualType Type, const Value &Val1, const Environment &Env1,
+ const Value &Val2, const Environment &Env2,
+ Value &JoinedVal, Environment &JoinedEnv) {
+ assert(merge(Type, Val1, Env1, Val2, Env2, JoinedVal, JoinedEnv) &&
+ "dropping merged value is unsupported");
+ }
+
/// This function may widen the current value -- replace it with an
/// approximation that can reach a fixed point more quickly than iterated
/// application of the transfer function alone. The previous value is
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 01db65866d135..26a155f5de28c 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -86,14 +86,14 @@ static bool compareDistinctValues(QualType Type, Value &Val1,
llvm_unreachable("All cases covered in switch");
}
-/// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
-/// respectively, of the same type `Type`. Merging generally produces a single
+/// Attempts to join distinct values `Val1` and `Val2` in `Env1` and `Env2`,
+/// respectively, of the same type `Type`. Joining generally produces a single
/// value that (soundly) approximates the two inputs, although the actual
/// meaning depends on `Model`.
-static Value *mergeDistinctValues(QualType Type, Value &Val1,
+static Value *joinDistinctValues(QualType Type, Value &Val1,
const Environment &Env1, Value &Val2,
const Environment &Env2,
- Environment &MergedEnv,
+ Environment &JoinedEnv,
Environment::ValueModel &Model) {
// Join distinct boolean values preserving information about the constraints
// in the respective path conditions.
@@ -113,17 +113,17 @@ static Value *mergeDistinctValues(QualType Type, Value &Val1,
// ```
auto &Expr1 = cast<BoolValue>(Val1).formula();
auto &Expr2 = cast<BoolValue>(Val2).formula();
- auto &A = MergedEnv.arena();
- auto &MergedVal = A.makeAtomRef(A.makeAtom());
- MergedEnv.assume(
+ auto &A = JoinedEnv.arena();
+ auto &JoinedVal = A.makeAtomRef(A.makeAtom());
+ JoinedEnv.assume(
A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
- A.makeEquals(MergedVal, Expr1)),
+ A.makeEquals(JoinedVal, Expr1)),
A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
- A.makeEquals(MergedVal, Expr2))));
- return &A.makeBoolValue(MergedVal);
+ A.makeEquals(JoinedVal, Expr2))));
+ return &A.makeBoolValue(JoinedVal);
}
- Value *MergedVal = nullptr;
+ Value *JoinedVal = nullptr;
if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) {
auto *RecordVal2 = cast<RecordValue>(&Val2);
@@ -131,24 +131,21 @@ static Value *mergeDistinctValues(QualType Type, Value &Val1,
// `RecordVal1` and `RecordVal2` may have different properties associated
// with them. Create a new `RecordValue` with the same location but
// without any properties so that we soundly approximate both values. If a
- // particular analysis needs to merge properties, it should do so in
- // `DataflowAnalysis::merge()`.
- MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc());
+ // particular analysis needs to join properties, it should do so in
+ // `DataflowAnalysis::join()`.
+ JoinedVal = &JoinedEnv.create<RecordValue>(RecordVal1->getLoc());
else
// If the locations for the two records are different, need to create a
// completely new value.
- MergedVal = MergedEnv.createValue(Type);
+ JoinedVal = JoinedEnv.createValue(Type);
} else {
- MergedVal = MergedEnv.createValue(Type);
+ JoinedVal = JoinedEnv.createValue(Type);
}
- // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
- // returns false to avoid storing unneeded values in `DACtx`.
- if (MergedVal)
- if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
- return MergedVal;
+ if (JoinedVal)
+ Model.join(Type, Val1, Env1, Val2, Env2, *JoinedVal, JoinedEnv);
- return nullptr;
+ return JoinedVal;
}
// When widening does not change `Current`, return value will equal `&Prev`.
@@ -240,9 +237,9 @@ joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal,
continue;
}
- if (Value *MergedVal = mergeDistinctValues(
+ if (Value *JoinedVal = joinDistinctValues(
Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
- Result.insert({Loc, MergedVal});
+ Result.insert({Loc, JoinedVal});
}
}
@@ -657,10 +654,10 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
// cast.
auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
assert(Func != nullptr);
- if (Value *MergedVal =
- mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
- *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
- JoinedEnv.ReturnVal = MergedVal;
+ if (Value *JoinedVal =
+ joinDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
+ *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
+ JoinedEnv.ReturnVal = JoinedVal;
}
if (EnvA.ReturnLoc == EnvB.ReturnLoc)
diff --git a/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp
index a6f4c45504fa6..08e2132afd46b 100644
--- a/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp
@@ -364,18 +364,17 @@ class SignPropagationAnalysis
LatticeTransferState State(L, Env);
TransferMatchSwitch(Elt, getASTContext(), State);
}
- bool merge(QualType Type, const Value &Val1, const Environment &Env1,
- const Value &Val2, const Environment &Env2, Value &MergedVal,
- Environment &MergedEnv) override;
+ void join(QualType Type, const Value &Val1, const Environment &Env1,
+ const Value &Val2, const Environment &Env2, Value &MergedVal,
+ Environment &MergedEnv) override;
private:
CFGMatchSwitch<TransferState<NoopLattice>> TransferMatchSwitch;
};
-// Copied from crubit.
-BoolValue &mergeBoolValues(BoolValue &Bool1, const Environment &Env1,
+BoolValue &joinBoolValues(BoolValue &Bool1, const Environment &Env1,
BoolValue &Bool2, const Environment &Env2,
- Environment &MergedEnv) {
+ Environment &JoinedEnv) {
if (&Bool1 == &Bool2) {
return Bool1;
}
@@ -383,41 +382,40 @@ BoolValue &mergeBoolValues(BoolValue &Bool1, const Environment &Env1,
auto &B1 = Bool1.formula();
auto &B2 = Bool2.formula();
- auto &A = MergedEnv.arena();
- auto &MergedBool = MergedEnv.makeAtomicBoolValue();
+ auto &A = JoinedEnv.arena();
+ auto &JoinedBool = JoinedEnv.makeAtomicBoolValue();
// If `Bool1` and `Bool2` is constrained to the same true / false value,
- // `MergedBool` can be constrained similarly without needing to consider the
- // path taken - this simplifies the flow condition tracked in `MergedEnv`.
+ // `JoinedBool` can be constrained similarly without needing to consider the
+ // path taken - this simplifies the flow condition tracked in `JoinedEnv`.
// Otherwise, information about which path was taken is used to associate
- // `MergedBool` with `Bool1` and `Bool2`.
+ // `JoinedBool` with `Bool1` and `Bool2`.
if (Env1.proves(B1) && Env2.proves(B2)) {
- MergedEnv.assume(MergedBool.formula());
+ JoinedEnv.assume(JoinedBool.formula());
} else if (Env1.proves(A.makeNot(B1)) && Env2.proves(A.makeNot(B2))) {
- MergedEnv.assume(A.makeNot(MergedBool.formula()));
+ JoinedEnv.assume(A.makeNot(JoinedBool.formula()));
}
- return MergedBool;
+ return JoinedBool;
}
-bool SignPropagationAnalysis::merge(QualType Type, const Value &Val1,
- const Environment &Env1, const Value &Val2,
- const Environment &Env2, Value &MergedVal,
- Environment &MergedEnv) {
+void SignPropagationAnalysis::join(QualType Type, const Value &Val1,
+ const Environment &Env1, const Value &Val2,
+ const Environment &Env2, Value &JoinedVal,
+ Environment &JoinedEnv) {
if (!Type->isIntegerType())
- return false;
+ return;
SignProperties Ps1 = getSignProperties(Val1, Env1);
SignProperties Ps2 = getSignProperties(Val2, Env2);
if (!Ps1.Neg || !Ps2.Neg)
- return false;
- BoolValue &MergedNeg =
- mergeBoolValues(*Ps1.Neg, Env1, *Ps2.Neg, Env2, MergedEnv);
- BoolValue &MergedZero =
- mergeBoolValues(*Ps1.Zero, Env1, *Ps2.Zero, Env2, MergedEnv);
- BoolValue &MergedPos =
- mergeBoolValues(*Ps1.Pos, Env1, *Ps2.Pos, Env2, MergedEnv);
- setSignProperties(MergedVal,
- SignProperties{&MergedNeg, &MergedZero, &MergedPos});
- return true;
+ return;
+ BoolValue &JoinedNeg =
+ joinBoolValues(*Ps1.Neg, Env1, *Ps2.Neg, Env2, JoinedEnv);
+ BoolValue &JoinedZero =
+ joinBoolValues(*Ps1.Zero, Env1, *Ps2.Zero, Env2, JoinedEnv);
+ BoolValue &JoinedPos =
+ joinBoolValues(*Ps1.Pos, Env1, *Ps2.Pos, Env2, JoinedEnv);
+ setSignProperties(JoinedVal,
+ SignProperties{&JoinedNeg, &JoinedZero, &JoinedPos});
}
template <typename Matcher>
diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
index 466d33358fd38..1fee73ad02999 100644
--- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
@@ -672,26 +672,23 @@ class NullPointerAnalysis final
: ComparisonResult::Different;
}
- bool merge(QualType Type, const Value &Val1, const Environment &Env1,
- const Value &Val2, const Environment &Env2, Value &MergedVal,
- Environment &MergedEnv) override {
- // Nothing to say about a value that is not a pointer.
+ void join(QualType Type, const Value &Val1, const Environment &Env1,
+ const Value &Val2, const Environment &Env2, Value &JoinedVal,
+ Environment &JoinedEnv) override {
+ // Nothing to say about a value that is not a pointer...
if (!Type->isPointerType())
- return false;
+ return;
+ // ... or, a pointer without the `is_null` property.
auto *IsNull1 = cast_or_null<BoolValue>(Val1.getProperty("is_null"));
- if (IsNull1 == nullptr)
- return false;
-
auto *IsNull2 = cast_or_null<BoolValue>(Val2.getProperty("is_null"));
- if (IsNull2 == nullptr)
- return false;
+ if (IsNull1 == nullptr || IsNull2 == nullptr)
+ return;
if (IsNull1 == IsNull2)
- MergedVal.setProperty("is_null", *IsNull1);
+ JoinedVal.setProperty("is_null", *IsNull1);
else
- MergedVal.setProperty("is_null", MergedEnv.makeTopBoolValue());
- return true;
+ JoinedVal.setProperty("is_null", JoinedEnv.makeTopBoolValue());
}
};
@@ -1176,7 +1173,7 @@ TEST_F(FlowConditionTest, Join) {
// Note: currently, arbitrary function calls are uninterpreted, so the test
// exercises this case. If and when we change that, this test will not add to
// coverage (although it may still test a valuable case).
-TEST_F(FlowConditionTest, OpaqueFlowConditionMergesToOpaqueBool) {
+TEST_F(FlowConditionTest, OpaqueFlowConditionJoinsToOpaqueBool) {
std::string Code = R"(
bool foo();
@@ -1211,7 +1208,7 @@ TEST_F(FlowConditionTest, OpaqueFlowConditionMergesToOpaqueBool) {
// the first instance), so the test exercises this case. If and when we change
// that, this test will not add to coverage (although it may still test a
// valuable case).
-TEST_F(FlowConditionTest, OpaqueFieldFlowConditionMergesToOpaqueBool) {
+TEST_F(FlowConditionTest, OpaqueFieldFlowConditionJoinsToOpaqueBool) {
std::string Code = R"(
struct Rec {
Rec* Next;
@@ -1249,7 +1246,7 @@ TEST_F(FlowConditionTest, OpaqueFieldFlowConditionMergesToOpaqueBool) {
// condition is not meaningfully interpreted. Adds to above by nesting the
// interestnig case inside a normal branch. This protects against degenerate
// solutions which only test for empty flow conditions, for example.
-TEST_F(FlowConditionTest, OpaqueFlowConditionInsideBranchMergesToOpaqueBool) {
+TEST_F(FlowConditionTest, OpaqueFlowConditionInsideBranchJoinsToOpaqueBool) {
std::string Code = R"(
bool foo();
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
This patch adds a new interface for the join operation, now properly called `join`. Originally, the framework offered a single `merge` operation, which could serve either as a join or a widening. In practice, though we found this conflation didn't work for non-trivial anlyses, and split of the widening operation (`widen`). This change completes the transition by introducing a proper `join` with strict join semantics. In the process, it drops an odd (and often misused) aspect of `merge` wherein callees could implictly instruct the framework to drop the current entry by returning `false`. This features was never used correctly in analyses and doesn't belong in a join operation, so it is omitted.
clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
Outdated
Show resolved
Hide resolved
Co-authored-by: Dmitri Gribenko <[email protected]>
…ronment.h clang format
clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
Outdated
Show resolved
Hide resolved
Co-authored-by: martinboehme <[email protected]>
This patch adds a new interface for the join operation, now properly called
join
. Originally, the framework offered a singlemerge
operation, which could serve either as a join or a widening. In practice, though we found this conflation didn't work for non-trivial anlyses, and split of the widening operation (widen
). This change completes the transition by introducing a properjoin
with strict join semantics.In the process, it drops an odd (and often misused) aspect of
merge
wherein callees could implictly instruct the framework to drop the current entry by returningfalse
. This features was never used correctly in analyses and doesn't belong in a join operation, so it is omitted.