Skip to content

Commit 672fb27

Browse files
ymandgribozavrmartinboehme
authored
[clang][dataflow] Add new join API and replace existing merge implementations. (#80361)
This patch adds a new interface for the join operation, now properly called `join`. Originally, the framework offered a single `merge` operation, which could serve either as a join or a widening. In practice, though we found this conflation didn't work for non-trivial anlyses, and split of the widening operation (`widen`). This change completes the transition by introducing a proper `join` with strict join semantics. In the process, it drops an odd (and often misused) aspect of `merge` wherein callees could implictly instruct the framework to drop the current entry by returning `false`. This features was never used correctly in analyses and doesn't belong in a join operation, so it is omitted. --------- Co-authored-by: Dmitri Gribenko <[email protected]> Co-authored-by: martinboehme <[email protected]>
1 parent 90e8dc0 commit 672fb27

File tree

4 files changed

+93
-79
lines changed

4 files changed

+93
-79
lines changed

clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "clang/AST/DeclBase.h"
2020
#include "clang/AST/Expr.h"
2121
#include "clang/AST/Type.h"
22-
#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
2322
#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
2423
#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
2524
#include "clang/Analysis/FlowSensitive/Formula.h"
@@ -31,7 +30,6 @@
3130
#include "llvm/ADT/MapVector.h"
3231
#include "llvm/Support/Compiler.h"
3332
#include "llvm/Support/ErrorHandling.h"
34-
#include <memory>
3533
#include <type_traits>
3634
#include <utility>
3735

@@ -81,6 +79,8 @@ class Environment {
8179
return ComparisonResult::Unknown;
8280
}
8381

82+
/// DEPRECATED. Override `join` and/or `widen`, instead.
83+
///
8484
/// Modifies `MergedVal` to approximate both `Val1` and `Val2`. This could
8585
/// be a strict lattice join or a more general widening operation.
8686
///
@@ -105,6 +105,28 @@ class Environment {
105105
return true;
106106
}
107107

108+
/// Modifies `JoinedVal` to approximate both `Val1` and `Val2`. This should
109+
/// obey the properties of a lattice join.
110+
///
111+
/// `Env1` and `Env2` can be used to query child values and path condition
112+
/// implications of `Val1` and `Val2` respectively.
113+
///
114+
/// Requirements:
115+
///
116+
/// `Val1` and `Val2` must be distinct.
117+
///
118+
/// `Val1`, `Val2`, and `JoinedVal` must model values of type `Type`.
119+
///
120+
/// `Val1` and `Val2` must be assigned to the same storage location in
121+
/// `Env1` and `Env2` respectively.
122+
virtual void join(QualType Type, const Value &Val1, const Environment &Env1,
123+
const Value &Val2, const Environment &Env2,
124+
Value &JoinedVal, Environment &JoinedEnv) {
125+
[[maybe_unused]] bool ShouldKeep =
126+
merge(Type, Val1, Env1, Val2, Env2, JoinedVal, JoinedEnv);
127+
assert(ShouldKeep && "dropping merged value is unsupported");
128+
}
129+
108130
/// This function may widen the current value -- replace it with an
109131
/// approximation that can reach a fixed point more quickly than iterated
110132
/// application of the transfer function alone. The previous value is

clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp

Lines changed: 28 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,15 @@ static bool compareDistinctValues(QualType Type, Value &Val1,
8686
llvm_unreachable("All cases covered in switch");
8787
}
8888

89-
/// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
90-
/// respectively, of the same type `Type`. Merging generally produces a single
89+
/// Attempts to join distinct values `Val1` and `Val2` in `Env1` and `Env2`,
90+
/// respectively, of the same type `Type`. Joining generally produces a single
9191
/// value that (soundly) approximates the two inputs, although the actual
9292
/// meaning depends on `Model`.
93-
static Value *mergeDistinctValues(QualType Type, Value &Val1,
94-
const Environment &Env1, Value &Val2,
95-
const Environment &Env2,
96-
Environment &MergedEnv,
97-
Environment::ValueModel &Model) {
93+
static Value *joinDistinctValues(QualType Type, Value &Val1,
94+
const Environment &Env1, Value &Val2,
95+
const Environment &Env2,
96+
Environment &JoinedEnv,
97+
Environment::ValueModel &Model) {
9898
// Join distinct boolean values preserving information about the constraints
9999
// in the respective path conditions.
100100
if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
@@ -113,42 +113,39 @@ static Value *mergeDistinctValues(QualType Type, Value &Val1,
113113
// ```
114114
auto &Expr1 = cast<BoolValue>(Val1).formula();
115115
auto &Expr2 = cast<BoolValue>(Val2).formula();
116-
auto &A = MergedEnv.arena();
117-
auto &MergedVal = A.makeAtomRef(A.makeAtom());
118-
MergedEnv.assume(
116+
auto &A = JoinedEnv.arena();
117+
auto &JoinedVal = A.makeAtomRef(A.makeAtom());
118+
JoinedEnv.assume(
119119
A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
120-
A.makeEquals(MergedVal, Expr1)),
120+
A.makeEquals(JoinedVal, Expr1)),
121121
A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
122-
A.makeEquals(MergedVal, Expr2))));
123-
return &A.makeBoolValue(MergedVal);
122+
A.makeEquals(JoinedVal, Expr2))));
123+
return &A.makeBoolValue(JoinedVal);
124124
}
125125

126-
Value *MergedVal = nullptr;
126+
Value *JoinedVal = nullptr;
127127
if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) {
128128
auto *RecordVal2 = cast<RecordValue>(&Val2);
129129

130130
if (&RecordVal1->getLoc() == &RecordVal2->getLoc())
131131
// `RecordVal1` and `RecordVal2` may have different properties associated
132132
// with them. Create a new `RecordValue` with the same location but
133133
// without any properties so that we soundly approximate both values. If a
134-
// particular analysis needs to merge properties, it should do so in
135-
// `DataflowAnalysis::merge()`.
136-
MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc());
134+
// particular analysis needs to join properties, it should do so in
135+
// `DataflowAnalysis::join()`.
136+
JoinedVal = &JoinedEnv.create<RecordValue>(RecordVal1->getLoc());
137137
else
138138
// If the locations for the two records are different, need to create a
139139
// completely new value.
140-
MergedVal = MergedEnv.createValue(Type);
140+
JoinedVal = JoinedEnv.createValue(Type);
141141
} else {
142-
MergedVal = MergedEnv.createValue(Type);
142+
JoinedVal = JoinedEnv.createValue(Type);
143143
}
144144

145-
// FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
146-
// returns false to avoid storing unneeded values in `DACtx`.
147-
if (MergedVal)
148-
if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
149-
return MergedVal;
145+
if (JoinedVal)
146+
Model.join(Type, Val1, Env1, Val2, Env2, *JoinedVal, JoinedEnv);
150147

151-
return nullptr;
148+
return JoinedVal;
152149
}
153150

154151
// When widening does not change `Current`, return value will equal `&Prev`.
@@ -240,9 +237,9 @@ joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal,
240237
continue;
241238
}
242239

243-
if (Value *MergedVal = mergeDistinctValues(
240+
if (Value *JoinedVal = joinDistinctValues(
244241
Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
245-
Result.insert({Loc, MergedVal});
242+
Result.insert({Loc, JoinedVal});
246243
}
247244
}
248245

@@ -657,10 +654,10 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
657654
// cast.
658655
auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
659656
assert(Func != nullptr);
660-
if (Value *MergedVal =
661-
mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
662-
*EnvB.ReturnVal, EnvB, JoinedEnv, Model))
663-
JoinedEnv.ReturnVal = MergedVal;
657+
if (Value *JoinedVal =
658+
joinDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
659+
*EnvB.ReturnVal, EnvB, JoinedEnv, Model))
660+
JoinedEnv.ReturnVal = JoinedVal;
664661
}
665662

666663
if (EnvA.ReturnLoc == EnvB.ReturnLoc)

clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp

Lines changed: 28 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -364,60 +364,58 @@ class SignPropagationAnalysis
364364
LatticeTransferState State(L, Env);
365365
TransferMatchSwitch(Elt, getASTContext(), State);
366366
}
367-
bool merge(QualType Type, const Value &Val1, const Environment &Env1,
368-
const Value &Val2, const Environment &Env2, Value &MergedVal,
369-
Environment &MergedEnv) override;
367+
void join(QualType Type, const Value &Val1, const Environment &Env1,
368+
const Value &Val2, const Environment &Env2, Value &MergedVal,
369+
Environment &MergedEnv) override;
370370

371371
private:
372372
CFGMatchSwitch<TransferState<NoopLattice>> TransferMatchSwitch;
373373
};
374374

375-
// Copied from crubit.
376-
BoolValue &mergeBoolValues(BoolValue &Bool1, const Environment &Env1,
377-
BoolValue &Bool2, const Environment &Env2,
378-
Environment &MergedEnv) {
375+
BoolValue &joinBoolValues(BoolValue &Bool1, const Environment &Env1,
376+
BoolValue &Bool2, const Environment &Env2,
377+
Environment &JoinedEnv) {
379378
if (&Bool1 == &Bool2) {
380379
return Bool1;
381380
}
382381

383382
auto &B1 = Bool1.formula();
384383
auto &B2 = Bool2.formula();
385384

386-
auto &A = MergedEnv.arena();
387-
auto &MergedBool = MergedEnv.makeAtomicBoolValue();
385+
auto &A = JoinedEnv.arena();
386+
auto &JoinedBool = JoinedEnv.makeAtomicBoolValue();
388387

389388
// If `Bool1` and `Bool2` is constrained to the same true / false value,
390-
// `MergedBool` can be constrained similarly without needing to consider the
391-
// path taken - this simplifies the flow condition tracked in `MergedEnv`.
389+
// `JoinedBool` can be constrained similarly without needing to consider the
390+
// path taken - this simplifies the flow condition tracked in `JoinedEnv`.
392391
// Otherwise, information about which path was taken is used to associate
393-
// `MergedBool` with `Bool1` and `Bool2`.
392+
// `JoinedBool` with `Bool1` and `Bool2`.
394393
if (Env1.proves(B1) && Env2.proves(B2)) {
395-
MergedEnv.assume(MergedBool.formula());
394+
JoinedEnv.assume(JoinedBool.formula());
396395
} else if (Env1.proves(A.makeNot(B1)) && Env2.proves(A.makeNot(B2))) {
397-
MergedEnv.assume(A.makeNot(MergedBool.formula()));
396+
JoinedEnv.assume(A.makeNot(JoinedBool.formula()));
398397
}
399-
return MergedBool;
398+
return JoinedBool;
400399
}
401400

402-
bool SignPropagationAnalysis::merge(QualType Type, const Value &Val1,
403-
const Environment &Env1, const Value &Val2,
404-
const Environment &Env2, Value &MergedVal,
405-
Environment &MergedEnv) {
401+
void SignPropagationAnalysis::join(QualType Type, const Value &Val1,
402+
const Environment &Env1, const Value &Val2,
403+
const Environment &Env2, Value &JoinedVal,
404+
Environment &JoinedEnv) {
406405
if (!Type->isIntegerType())
407-
return false;
406+
return;
408407
SignProperties Ps1 = getSignProperties(Val1, Env1);
409408
SignProperties Ps2 = getSignProperties(Val2, Env2);
410409
if (!Ps1.Neg || !Ps2.Neg)
411-
return false;
412-
BoolValue &MergedNeg =
413-
mergeBoolValues(*Ps1.Neg, Env1, *Ps2.Neg, Env2, MergedEnv);
414-
BoolValue &MergedZero =
415-
mergeBoolValues(*Ps1.Zero, Env1, *Ps2.Zero, Env2, MergedEnv);
416-
BoolValue &MergedPos =
417-
mergeBoolValues(*Ps1.Pos, Env1, *Ps2.Pos, Env2, MergedEnv);
418-
setSignProperties(MergedVal,
419-
SignProperties{&MergedNeg, &MergedZero, &MergedPos});
420-
return true;
410+
return;
411+
BoolValue &JoinedNeg =
412+
joinBoolValues(*Ps1.Neg, Env1, *Ps2.Neg, Env2, JoinedEnv);
413+
BoolValue &JoinedZero =
414+
joinBoolValues(*Ps1.Zero, Env1, *Ps2.Zero, Env2, JoinedEnv);
415+
BoolValue &JoinedPos =
416+
joinBoolValues(*Ps1.Pos, Env1, *Ps2.Pos, Env2, JoinedEnv);
417+
setSignProperties(JoinedVal,
418+
SignProperties{&JoinedNeg, &JoinedZero, &JoinedPos});
421419
}
422420

423421
template <typename Matcher>

clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -672,26 +672,23 @@ class NullPointerAnalysis final
672672
: ComparisonResult::Different;
673673
}
674674

675-
bool merge(QualType Type, const Value &Val1, const Environment &Env1,
676-
const Value &Val2, const Environment &Env2, Value &MergedVal,
677-
Environment &MergedEnv) override {
678-
// Nothing to say about a value that is not a pointer.
675+
void join(QualType Type, const Value &Val1, const Environment &Env1,
676+
const Value &Val2, const Environment &Env2, Value &JoinedVal,
677+
Environment &JoinedEnv) override {
678+
// Nothing to say about a value that is not a pointer...
679679
if (!Type->isPointerType())
680-
return false;
680+
return;
681681

682+
// ... or, a pointer without the `is_null` property.
682683
auto *IsNull1 = cast_or_null<BoolValue>(Val1.getProperty("is_null"));
683-
if (IsNull1 == nullptr)
684-
return false;
685-
686684
auto *IsNull2 = cast_or_null<BoolValue>(Val2.getProperty("is_null"));
687-
if (IsNull2 == nullptr)
688-
return false;
685+
if (IsNull1 == nullptr || IsNull2 == nullptr)
686+
return;
689687

690688
if (IsNull1 == IsNull2)
691-
MergedVal.setProperty("is_null", *IsNull1);
689+
JoinedVal.setProperty("is_null", *IsNull1);
692690
else
693-
MergedVal.setProperty("is_null", MergedEnv.makeTopBoolValue());
694-
return true;
691+
JoinedVal.setProperty("is_null", JoinedEnv.makeTopBoolValue());
695692
}
696693
};
697694

@@ -1176,7 +1173,7 @@ TEST_F(FlowConditionTest, Join) {
11761173
// Note: currently, arbitrary function calls are uninterpreted, so the test
11771174
// exercises this case. If and when we change that, this test will not add to
11781175
// coverage (although it may still test a valuable case).
1179-
TEST_F(FlowConditionTest, OpaqueFlowConditionMergesToOpaqueBool) {
1176+
TEST_F(FlowConditionTest, OpaqueFlowConditionJoinsToOpaqueBool) {
11801177
std::string Code = R"(
11811178
bool foo();
11821179
@@ -1211,7 +1208,7 @@ TEST_F(FlowConditionTest, OpaqueFlowConditionMergesToOpaqueBool) {
12111208
// the first instance), so the test exercises this case. If and when we change
12121209
// that, this test will not add to coverage (although it may still test a
12131210
// valuable case).
1214-
TEST_F(FlowConditionTest, OpaqueFieldFlowConditionMergesToOpaqueBool) {
1211+
TEST_F(FlowConditionTest, OpaqueFieldFlowConditionJoinsToOpaqueBool) {
12151212
std::string Code = R"(
12161213
struct Rec {
12171214
Rec* Next;
@@ -1249,7 +1246,7 @@ TEST_F(FlowConditionTest, OpaqueFieldFlowConditionMergesToOpaqueBool) {
12491246
// condition is not meaningfully interpreted. Adds to above by nesting the
12501247
// interestnig case inside a normal branch. This protects against degenerate
12511248
// solutions which only test for empty flow conditions, for example.
1252-
TEST_F(FlowConditionTest, OpaqueFlowConditionInsideBranchMergesToOpaqueBool) {
1249+
TEST_F(FlowConditionTest, OpaqueFlowConditionInsideBranchJoinsToOpaqueBool) {
12531250
std::string Code = R"(
12541251
bool foo();
12551252

0 commit comments

Comments
 (0)