Skip to content

[clang][dataflow] Add new join API and replace existing merge implementations. #80361

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "clang/AST/DeclBase.h"
#include "clang/AST/Expr.h"
#include "clang/AST/Type.h"
#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
#include "clang/Analysis/FlowSensitive/Formula.h"
Expand All @@ -31,7 +30,6 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include <memory>
#include <type_traits>
#include <utility>

Expand Down Expand Up @@ -81,6 +79,8 @@ class Environment {
return ComparisonResult::Unknown;
}

/// DEPRECATED. Override `join` and/or `widen`, instead.
///
/// Modifies `MergedVal` to approximate both `Val1` and `Val2`. This could
/// be a strict lattice join or a more general widening operation.
///
Expand All @@ -105,6 +105,28 @@ class Environment {
return true;
}

/// Modifies `JoinedVal` to approximate both `Val1` and `Val2`. This should
/// obey the properties of a lattice join.
///
/// `Env1` and `Env2` can be used to query child values and path condition
/// implications of `Val1` and `Val2` respectively.
///
/// Requirements:
///
/// `Val1` and `Val2` must be distinct.
///
/// `Val1`, `Val2`, and `JoinedVal` must model values of type `Type`.
///
/// `Val1` and `Val2` must be assigned to the same storage location in
/// `Env1` and `Env2` respectively.
virtual void join(QualType Type, const Value &Val1, const Environment &Env1,
const Value &Val2, const Environment &Env2,
Value &JoinedVal, Environment &JoinedEnv) {
[[maybe_unused]] bool ShouldKeep =
merge(Type, Val1, Env1, Val2, Env2, JoinedVal, JoinedEnv);
assert(ShouldKeep && "dropping merged value is unsupported");
}

/// This function may widen the current value -- replace it with an
/// approximation that can reach a fixed point more quickly than iterated
/// application of the transfer function alone. The previous value is
Expand Down
59 changes: 28 additions & 31 deletions clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,15 @@ static bool compareDistinctValues(QualType Type, Value &Val1,
llvm_unreachable("All cases covered in switch");
}

/// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
/// respectively, of the same type `Type`. Merging generally produces a single
/// Attempts to join distinct values `Val1` and `Val2` in `Env1` and `Env2`,
/// respectively, of the same type `Type`. Joining generally produces a single
/// value that (soundly) approximates the two inputs, although the actual
/// meaning depends on `Model`.
static Value *mergeDistinctValues(QualType Type, Value &Val1,
const Environment &Env1, Value &Val2,
const Environment &Env2,
Environment &MergedEnv,
Environment::ValueModel &Model) {
static Value *joinDistinctValues(QualType Type, Value &Val1,
const Environment &Env1, Value &Val2,
const Environment &Env2,
Environment &JoinedEnv,
Environment::ValueModel &Model) {
// Join distinct boolean values preserving information about the constraints
// in the respective path conditions.
if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
Expand All @@ -113,42 +113,39 @@ static Value *mergeDistinctValues(QualType Type, Value &Val1,
// ```
auto &Expr1 = cast<BoolValue>(Val1).formula();
auto &Expr2 = cast<BoolValue>(Val2).formula();
auto &A = MergedEnv.arena();
auto &MergedVal = A.makeAtomRef(A.makeAtom());
MergedEnv.assume(
auto &A = JoinedEnv.arena();
auto &JoinedVal = A.makeAtomRef(A.makeAtom());
JoinedEnv.assume(
A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
A.makeEquals(MergedVal, Expr1)),
A.makeEquals(JoinedVal, Expr1)),
A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
A.makeEquals(MergedVal, Expr2))));
return &A.makeBoolValue(MergedVal);
A.makeEquals(JoinedVal, Expr2))));
return &A.makeBoolValue(JoinedVal);
}

Value *MergedVal = nullptr;
Value *JoinedVal = nullptr;
if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) {
auto *RecordVal2 = cast<RecordValue>(&Val2);

if (&RecordVal1->getLoc() == &RecordVal2->getLoc())
// `RecordVal1` and `RecordVal2` may have different properties associated
// with them. Create a new `RecordValue` with the same location but
// without any properties so that we soundly approximate both values. If a
// particular analysis needs to merge properties, it should do so in
// `DataflowAnalysis::merge()`.
MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc());
// particular analysis needs to join properties, it should do so in
// `DataflowAnalysis::join()`.
JoinedVal = &JoinedEnv.create<RecordValue>(RecordVal1->getLoc());
else
// If the locations for the two records are different, need to create a
// completely new value.
MergedVal = MergedEnv.createValue(Type);
JoinedVal = JoinedEnv.createValue(Type);
} else {
MergedVal = MergedEnv.createValue(Type);
JoinedVal = JoinedEnv.createValue(Type);
}

// FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
// returns false to avoid storing unneeded values in `DACtx`.
if (MergedVal)
if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
return MergedVal;
if (JoinedVal)
Model.join(Type, Val1, Env1, Val2, Env2, *JoinedVal, JoinedEnv);

return nullptr;
return JoinedVal;
}

// When widening does not change `Current`, return value will equal `&Prev`.
Expand Down Expand Up @@ -240,9 +237,9 @@ joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal,
continue;
}

if (Value *MergedVal = mergeDistinctValues(
if (Value *JoinedVal = joinDistinctValues(
Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
Result.insert({Loc, MergedVal});
Result.insert({Loc, JoinedVal});
}
}

Expand Down Expand Up @@ -657,10 +654,10 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
// cast.
auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
assert(Func != nullptr);
if (Value *MergedVal =
mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
*EnvB.ReturnVal, EnvB, JoinedEnv, Model))
JoinedEnv.ReturnVal = MergedVal;
if (Value *JoinedVal =
joinDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
*EnvB.ReturnVal, EnvB, JoinedEnv, Model))
JoinedEnv.ReturnVal = JoinedVal;
}

if (EnvA.ReturnLoc == EnvB.ReturnLoc)
Expand Down
58 changes: 28 additions & 30 deletions clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,60 +364,58 @@ class SignPropagationAnalysis
LatticeTransferState State(L, Env);
TransferMatchSwitch(Elt, getASTContext(), State);
}
bool merge(QualType Type, const Value &Val1, const Environment &Env1,
const Value &Val2, const Environment &Env2, Value &MergedVal,
Environment &MergedEnv) override;
void join(QualType Type, const Value &Val1, const Environment &Env1,
const Value &Val2, const Environment &Env2, Value &MergedVal,
Environment &MergedEnv) override;

private:
CFGMatchSwitch<TransferState<NoopLattice>> TransferMatchSwitch;
};

// Copied from crubit.
BoolValue &mergeBoolValues(BoolValue &Bool1, const Environment &Env1,
BoolValue &Bool2, const Environment &Env2,
Environment &MergedEnv) {
BoolValue &joinBoolValues(BoolValue &Bool1, const Environment &Env1,
BoolValue &Bool2, const Environment &Env2,
Environment &JoinedEnv) {
if (&Bool1 == &Bool2) {
return Bool1;
}

auto &B1 = Bool1.formula();
auto &B2 = Bool2.formula();

auto &A = MergedEnv.arena();
auto &MergedBool = MergedEnv.makeAtomicBoolValue();
auto &A = JoinedEnv.arena();
auto &JoinedBool = JoinedEnv.makeAtomicBoolValue();

// If `Bool1` and `Bool2` is constrained to the same true / false value,
// `MergedBool` can be constrained similarly without needing to consider the
// path taken - this simplifies the flow condition tracked in `MergedEnv`.
// `JoinedBool` can be constrained similarly without needing to consider the
// path taken - this simplifies the flow condition tracked in `JoinedEnv`.
// Otherwise, information about which path was taken is used to associate
// `MergedBool` with `Bool1` and `Bool2`.
// `JoinedBool` with `Bool1` and `Bool2`.
if (Env1.proves(B1) && Env2.proves(B2)) {
MergedEnv.assume(MergedBool.formula());
JoinedEnv.assume(JoinedBool.formula());
} else if (Env1.proves(A.makeNot(B1)) && Env2.proves(A.makeNot(B2))) {
MergedEnv.assume(A.makeNot(MergedBool.formula()));
JoinedEnv.assume(A.makeNot(JoinedBool.formula()));
}
return MergedBool;
return JoinedBool;
}

bool SignPropagationAnalysis::merge(QualType Type, const Value &Val1,
const Environment &Env1, const Value &Val2,
const Environment &Env2, Value &MergedVal,
Environment &MergedEnv) {
void SignPropagationAnalysis::join(QualType Type, const Value &Val1,
const Environment &Env1, const Value &Val2,
const Environment &Env2, Value &JoinedVal,
Environment &JoinedEnv) {
if (!Type->isIntegerType())
return false;
return;
SignProperties Ps1 = getSignProperties(Val1, Env1);
SignProperties Ps2 = getSignProperties(Val2, Env2);
if (!Ps1.Neg || !Ps2.Neg)
return false;
BoolValue &MergedNeg =
mergeBoolValues(*Ps1.Neg, Env1, *Ps2.Neg, Env2, MergedEnv);
BoolValue &MergedZero =
mergeBoolValues(*Ps1.Zero, Env1, *Ps2.Zero, Env2, MergedEnv);
BoolValue &MergedPos =
mergeBoolValues(*Ps1.Pos, Env1, *Ps2.Pos, Env2, MergedEnv);
setSignProperties(MergedVal,
SignProperties{&MergedNeg, &MergedZero, &MergedPos});
return true;
return;
BoolValue &JoinedNeg =
joinBoolValues(*Ps1.Neg, Env1, *Ps2.Neg, Env2, JoinedEnv);
BoolValue &JoinedZero =
joinBoolValues(*Ps1.Zero, Env1, *Ps2.Zero, Env2, JoinedEnv);
BoolValue &JoinedPos =
joinBoolValues(*Ps1.Pos, Env1, *Ps2.Pos, Env2, JoinedEnv);
setSignProperties(JoinedVal,
SignProperties{&JoinedNeg, &JoinedZero, &JoinedPos});
}

template <typename Matcher>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -672,26 +672,23 @@ class NullPointerAnalysis final
: ComparisonResult::Different;
}

bool merge(QualType Type, const Value &Val1, const Environment &Env1,
const Value &Val2, const Environment &Env2, Value &MergedVal,
Environment &MergedEnv) override {
// Nothing to say about a value that is not a pointer.
void join(QualType Type, const Value &Val1, const Environment &Env1,
const Value &Val2, const Environment &Env2, Value &JoinedVal,
Environment &JoinedEnv) override {
// Nothing to say about a value that is not a pointer...
if (!Type->isPointerType())
return false;
return;

// ... or, a pointer without the `is_null` property.
auto *IsNull1 = cast_or_null<BoolValue>(Val1.getProperty("is_null"));
if (IsNull1 == nullptr)
return false;

auto *IsNull2 = cast_or_null<BoolValue>(Val2.getProperty("is_null"));
if (IsNull2 == nullptr)
return false;
if (IsNull1 == nullptr || IsNull2 == nullptr)
return;

if (IsNull1 == IsNull2)
MergedVal.setProperty("is_null", *IsNull1);
JoinedVal.setProperty("is_null", *IsNull1);
else
MergedVal.setProperty("is_null", MergedEnv.makeTopBoolValue());
return true;
JoinedVal.setProperty("is_null", JoinedEnv.makeTopBoolValue());
}
};

Expand Down Expand Up @@ -1176,7 +1173,7 @@ TEST_F(FlowConditionTest, Join) {
// Note: currently, arbitrary function calls are uninterpreted, so the test
// exercises this case. If and when we change that, this test will not add to
// coverage (although it may still test a valuable case).
TEST_F(FlowConditionTest, OpaqueFlowConditionMergesToOpaqueBool) {
TEST_F(FlowConditionTest, OpaqueFlowConditionJoinsToOpaqueBool) {
std::string Code = R"(
bool foo();

Expand Down Expand Up @@ -1211,7 +1208,7 @@ TEST_F(FlowConditionTest, OpaqueFlowConditionMergesToOpaqueBool) {
// the first instance), so the test exercises this case. If and when we change
// that, this test will not add to coverage (although it may still test a
// valuable case).
TEST_F(FlowConditionTest, OpaqueFieldFlowConditionMergesToOpaqueBool) {
TEST_F(FlowConditionTest, OpaqueFieldFlowConditionJoinsToOpaqueBool) {
std::string Code = R"(
struct Rec {
Rec* Next;
Expand Down Expand Up @@ -1249,7 +1246,7 @@ TEST_F(FlowConditionTest, OpaqueFieldFlowConditionMergesToOpaqueBool) {
// condition is not meaningfully interpreted. Adds to above by nesting the
// interestnig case inside a normal branch. This protects against degenerate
// solutions which only test for empty flow conditions, for example.
TEST_F(FlowConditionTest, OpaqueFlowConditionInsideBranchMergesToOpaqueBool) {
TEST_F(FlowConditionTest, OpaqueFlowConditionInsideBranchJoinsToOpaqueBool) {
std::string Code = R"(
bool foo();

Expand Down