Skip to content

Commit 2fd614e

Browse files
committed
[dataflow] Add dedicated representation of boolean formulas
This is the first step in untangling the two current jobs of BoolValue. === Desired end-state: === - BoolValue will model C++ booleans e.g. held in StorageLocations. this includes describing uncertainty (e.g. "top" is a Value concern) - Formula describes analysis-level assertions in terms of SAT atoms. These can still be linked together: a BoolValue may have a corresponding SAT atom which is constrained by formulas. === Done in this patch: === BoolValue is left intact, Formula is just the input type to the SAT solver, and we build formulas as needed to invoke the solver. === Incidental changes to debug string printing: === - variables renamed from B0 etc to V0 etc B0 collides with the names of basic blocks, which is confusing when debugging flow conditions. - debug printing of formulas (Formula and Atom) uses operator<< rather than debugString(), so works with gtest. Therefore moved out of DebugSupport.h - Did the same to Solver::Result, and some helper changes to SolverTest, so that we get useful messages on unit test failures - formulas are now printed as infix expressions on one line, rather than wrapped/indented S-exprs. My experience is that this is easier to scan FCs for small examples, and large ones are unreadable either way. - most of the several debugString() functions for constraints/results are unused, so removed them rather than updating tests. Inlined the one that was actually used into its callsite. Differential Revision: https://reviews.llvm.org/D153366
1 parent 61bcaae commit 2fd614e

File tree

15 files changed

+587
-918
lines changed

15 files changed

+587
-918
lines changed

clang/include/clang/Analysis/FlowSensitive/Arena.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE__ARENA_H
99
#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE__ARENA_H
1010

11+
#include "clang/Analysis/FlowSensitive/Formula.h"
1112
#include "clang/Analysis/FlowSensitive/StorageLocation.h"
1213
#include "clang/Analysis/FlowSensitive/Value.h"
1314
#include <vector>
@@ -104,7 +105,17 @@ class Arena {
104105
return create<AtomicBoolValue>();
105106
}
106107

108+
/// Gets the boolean formula equivalent of a BoolValue.
109+
/// Each unique Top values is translated to an Atom.
110+
/// TODO: migrate to storing Formula directly in Values instead.
111+
const Formula &getFormula(const BoolValue &);
112+
113+
/// Returns a new atomic boolean variable, distinct from any other.
114+
Atom makeAtom() { return static_cast<Atom>(NextAtom++); };
115+
107116
private:
117+
llvm::BumpPtrAllocator Alloc;
118+
108119
// Storage for the state of a program.
109120
std::vector<std::unique_ptr<StorageLocation>> Locs;
110121
std::vector<std::unique_ptr<Value>> Vals;
@@ -122,6 +133,9 @@ class Arena {
122133
llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, BiconditionalValue *>
123134
BiconditionalVals;
124135

136+
llvm::DenseMap<const BoolValue *, const Formula *> ValToFormula;
137+
unsigned NextAtom = 0;
138+
125139
AtomicBoolValue &TrueVal;
126140
AtomicBoolValue &FalseVal;
127141
};

clang/include/clang/Analysis/FlowSensitive/DebugSupport.h

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
#include "clang/Analysis/FlowSensitive/Solver.h"
2121
#include "clang/Analysis/FlowSensitive/Value.h"
22-
#include "llvm/ADT/DenseMap.h"
2322
#include "llvm/ADT/StringRef.h"
2423

2524
namespace clang {
@@ -28,52 +27,9 @@ namespace dataflow {
2827
/// Returns a string representation of a value kind.
2928
llvm::StringRef debugString(Value::Kind Kind);
3029

31-
/// Returns a string representation of a boolean assignment to true or false.
32-
llvm::StringRef debugString(Solver::Result::Assignment Assignment);
33-
3430
/// Returns a string representation of the result status of a SAT check.
3531
llvm::StringRef debugString(Solver::Result::Status Status);
3632

37-
/// Returns a string representation for the boolean value `B`.
38-
///
39-
/// Atomic booleans appearing in the boolean value `B` are assigned to labels
40-
/// either specified in `AtomNames` or created by default rules as B0, B1, ...
41-
///
42-
/// Requirements:
43-
///
44-
/// Names assigned to atoms should not be repeated in `AtomNames`.
45-
std::string debugString(
46-
const BoolValue &B,
47-
llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = {{}});
48-
49-
/// Returns a string representation for `Constraints` - a collection of boolean
50-
/// formulas.
51-
///
52-
/// Atomic booleans appearing in the boolean value `Constraints` are assigned to
53-
/// labels either specified in `AtomNames` or created by default rules as B0,
54-
/// B1, ...
55-
///
56-
/// Requirements:
57-
///
58-
/// Names assigned to atoms should not be repeated in `AtomNames`.
59-
std::string debugString(
60-
const llvm::ArrayRef<BoolValue *> Constraints,
61-
llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = {{}});
62-
63-
/// Returns a string representation for `Constraints` - a collection of boolean
64-
/// formulas and the `Result` of satisfiability checking.
65-
///
66-
/// Atomic booleans appearing in `Constraints` and `Result` are assigned to
67-
/// labels either specified in `AtomNames` or created by default rules as B0,
68-
/// B1, ...
69-
///
70-
/// Requirements:
71-
///
72-
/// Names assigned to atoms should not be repeated in `AtomNames`.
73-
std::string debugString(
74-
ArrayRef<BoolValue *> Constraints, const Solver::Result &Result,
75-
llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = {{}});
76-
7733
} // namespace dataflow
7834
} // namespace clang
7935

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
//===- Formula.h - Boolean formulas -----------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_FORMULA_H
10+
#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_FORMULA_H
11+
12+
#include "clang/Basic/LLVM.h"
13+
#include "llvm/ADT/ArrayRef.h"
14+
#include "llvm/ADT/DenseMap.h"
15+
#include "llvm/ADT/DenseMapInfo.h"
16+
#include "llvm/ADT/STLFunctionalExtras.h"
17+
#include "llvm/Support/Allocator.h"
18+
#include "llvm/Support/raw_ostream.h"
19+
#include <cassert>
20+
#include <string>
21+
#include <type_traits>
22+
23+
namespace clang::dataflow {
24+
25+
/// Identifies an atomic boolean variable such as "V1".
26+
///
27+
/// This often represents an assertion that is interesting to the analysis but
28+
/// cannot immediately be proven true or false. For example:
29+
/// - V1 may mean "the program reaches this point",
30+
/// - V2 may mean "the parameter was null"
31+
///
32+
/// We can use these variables in formulas to describe relationships we know
33+
/// to be true: "if the parameter was null, the program reaches this point".
34+
/// We also express hypotheses as formulas, and use a SAT solver to check
35+
/// whether they are consistent with the known facts.
36+
enum class Atom : unsigned {};
37+
38+
/// A boolean expression such as "true" or "V1 & !V2".
39+
/// Expressions may refer to boolean atomic variables. These should take a
40+
/// consistent true/false value across the set of formulas being considered.
41+
///
42+
/// (Formulas are always expressions in terms of boolean variables rather than
43+
/// e.g. integers because our underlying model is SAT rather than e.g. SMT).
44+
///
45+
/// Simple formulas such as "true" and "V1" are self-contained.
46+
/// Compound formulas connect other formulas, e.g. "(V1 & V2) || V3" is an 'or'
47+
/// formula, with pointers to its operands "(V1 & V2)" and "V3" stored as
48+
/// trailing objects.
49+
/// For this reason, Formulas are Arena-allocated and over-aligned.
50+
class Formula;
51+
class alignas(const Formula *) Formula {
52+
public:
53+
enum Kind : unsigned {
54+
/// A reference to an atomic boolean variable.
55+
/// We name these e.g. "V3", where 3 == atom identity == Value.
56+
AtomRef,
57+
// FIXME: add const true/false rather than modeling them as variables
58+
59+
Not, /// True if its only operand is false
60+
61+
// These kinds connect two operands LHS and RHS
62+
And, /// True if LHS and RHS are both true
63+
Or, /// True if either LHS or RHS is true
64+
Implies, /// True if LHS is false or RHS is true
65+
Equal, /// True if LHS and RHS have the same truth value
66+
};
67+
Kind kind() const { return FormulaKind; }
68+
69+
Atom getAtom() const {
70+
assert(kind() == AtomRef);
71+
return static_cast<Atom>(Value);
72+
}
73+
74+
ArrayRef<const Formula *> operands() const {
75+
return ArrayRef(reinterpret_cast<Formula *const *>(this + 1),
76+
numOperands(kind()));
77+
}
78+
79+
using AtomNames = llvm::DenseMap<Atom, std::string>;
80+
// Produce a stable human-readable representation of this formula.
81+
// For example: (V3 | !(V1 & V2))
82+
// If AtomNames is provided, these override the default V0, V1... names.
83+
void print(llvm::raw_ostream &OS, const AtomNames * = nullptr) const;
84+
85+
// Allocate Formulas using Arena rather than calling this function directly.
86+
static Formula &create(llvm::BumpPtrAllocator &Alloc, Kind K,
87+
ArrayRef<const Formula *> Operands,
88+
unsigned Value = 0);
89+
90+
private:
91+
Formula() = default;
92+
Formula(const Formula &) = delete;
93+
Formula &operator=(const Formula &) = delete;
94+
95+
static unsigned numOperands(Kind K) {
96+
switch (K) {
97+
case AtomRef:
98+
return 0;
99+
case Not:
100+
return 1;
101+
case And:
102+
case Or:
103+
case Implies:
104+
case Equal:
105+
return 2;
106+
}
107+
}
108+
109+
Kind FormulaKind;
110+
// Some kinds of formula have scalar values, e.g. AtomRef's atom number.
111+
unsigned Value;
112+
};
113+
114+
// The default names of atoms are V0, V1 etc in order of creation.
115+
inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Atom A) {
116+
return OS << 'V' << static_cast<unsigned>(A);
117+
}
118+
inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Formula &F) {
119+
F.print(OS);
120+
return OS;
121+
}
122+
123+
} // namespace clang::dataflow
124+
namespace llvm {
125+
template <> struct DenseMapInfo<clang::dataflow::Atom> {
126+
using Atom = clang::dataflow::Atom;
127+
using Underlying = std::underlying_type_t<Atom>;
128+
129+
static inline Atom getEmptyKey() { return Atom(Underlying(-1)); }
130+
static inline Atom getTombstoneKey() { return Atom(Underlying(-2)); }
131+
static unsigned getHashValue(const Atom &Val) {
132+
return DenseMapInfo<Underlying>::getHashValue(Underlying(Val));
133+
}
134+
static bool isEqual(const Atom &LHS, const Atom &RHS) { return LHS == RHS; }
135+
};
136+
} // namespace llvm
137+
#endif

clang/include/clang/Analysis/FlowSensitive/Solver.h

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,10 @@
1414
#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_SOLVER_H
1515
#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_SOLVER_H
1616

17-
#include "clang/Analysis/FlowSensitive/Value.h"
17+
#include "clang/Analysis/FlowSensitive/Formula.h"
1818
#include "clang/Basic/LLVM.h"
1919
#include "llvm/ADT/ArrayRef.h"
2020
#include "llvm/ADT/DenseMap.h"
21-
#include "llvm/ADT/DenseSet.h"
22-
#include "llvm/Support/Compiler.h"
2321
#include <optional>
2422
#include <vector>
2523

@@ -49,8 +47,7 @@ class Solver {
4947

5048
/// Constructs a result indicating that the queried boolean formula is
5149
/// satisfiable. The result will hold a solution found by the solver.
52-
static Result
53-
Satisfiable(llvm::DenseMap<AtomicBoolValue *, Assignment> Solution) {
50+
static Result Satisfiable(llvm::DenseMap<Atom, Assignment> Solution) {
5451
return Result(Status::Satisfiable, std::move(Solution));
5552
}
5653

@@ -68,19 +65,17 @@ class Solver {
6865

6966
/// Returns a truth assignment to boolean values that satisfies the queried
7067
/// boolean formula if available. Otherwise, an empty optional is returned.
71-
std::optional<llvm::DenseMap<AtomicBoolValue *, Assignment>>
72-
getSolution() const {
68+
std::optional<llvm::DenseMap<Atom, Assignment>> getSolution() const {
7369
return Solution;
7470
}
7571

7672
private:
77-
Result(
78-
enum Status SATCheckStatus,
79-
std::optional<llvm::DenseMap<AtomicBoolValue *, Assignment>> Solution)
73+
Result(Status SATCheckStatus,
74+
std::optional<llvm::DenseMap<Atom, Assignment>> Solution)
8075
: SATCheckStatus(SATCheckStatus), Solution(std::move(Solution)) {}
8176

8277
Status SATCheckStatus;
83-
std::optional<llvm::DenseMap<AtomicBoolValue *, Assignment>> Solution;
78+
std::optional<llvm::DenseMap<Atom, Assignment>> Solution;
8479
};
8580

8681
virtual ~Solver() = default;
@@ -91,14 +86,12 @@ class Solver {
9186
/// Requirements:
9287
///
9388
/// All elements in `Vals` must not be null.
94-
virtual Result solve(llvm::ArrayRef<BoolValue *> Vals) = 0;
95-
96-
LLVM_DEPRECATED("Pass ArrayRef for determinism", "")
97-
virtual Result solve(llvm::DenseSet<BoolValue *> Vals) {
98-
return solve(ArrayRef(std::vector<BoolValue *>(Vals.begin(), Vals.end())));
99-
}
89+
virtual Result solve(llvm::ArrayRef<const Formula *> Vals) = 0;
10090
};
10191

92+
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Solver::Result &);
93+
llvm::raw_ostream &operator<<(llvm::raw_ostream &, Solver::Result::Assignment);
94+
10295
} // namespace dataflow
10396
} // namespace clang
10497

clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_WATCHEDLITERALSSOLVER_H
1515
#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_WATCHEDLITERALSSOLVER_H
1616

17+
#include "clang/Analysis/FlowSensitive/Formula.h"
1718
#include "clang/Analysis/FlowSensitive/Solver.h"
18-
#include "clang/Analysis/FlowSensitive/Value.h"
1919
#include "llvm/ADT/ArrayRef.h"
2020
#include <limits>
2121

@@ -46,7 +46,7 @@ class WatchedLiteralsSolver : public Solver {
4646
explicit WatchedLiteralsSolver(std::int64_t WorkLimit)
4747
: MaxIterations(WorkLimit) {}
4848

49-
Result solve(llvm::ArrayRef<BoolValue *> Vals) override;
49+
Result solve(llvm::ArrayRef<const Formula *> Vals) override;
5050
};
5151

5252
} // namespace dataflow

clang/lib/Analysis/FlowSensitive/Arena.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,50 @@ IntegerValue &Arena::makeIntLiteral(llvm::APInt Value) {
7676
return *It->second;
7777
}
7878

79+
const Formula &Arena::getFormula(const BoolValue &B) {
80+
auto It = ValToFormula.find(&B);
81+
if (It != ValToFormula.end())
82+
return *It->second;
83+
Formula &F = [&]() -> Formula & {
84+
switch (B.getKind()) {
85+
case Value::Kind::Integer:
86+
case Value::Kind::Reference:
87+
case Value::Kind::Pointer:
88+
case Value::Kind::Struct:
89+
llvm_unreachable("not a boolean");
90+
case Value::Kind::TopBool:
91+
case Value::Kind::AtomicBool:
92+
// TODO: assign atom numbers on creation rather than in getFormula(), so
93+
// they will be deterministic and maybe even meaningful.
94+
return Formula::create(Alloc, Formula::AtomRef, {},
95+
static_cast<unsigned>(makeAtom()));
96+
case Value::Kind::Conjunction:
97+
return Formula::create(
98+
Alloc, Formula::And,
99+
{&getFormula(cast<ConjunctionValue>(B).getLeftSubValue()),
100+
&getFormula(cast<ConjunctionValue>(B).getRightSubValue())});
101+
case Value::Kind::Disjunction:
102+
return Formula::create(
103+
Alloc, Formula::Or,
104+
{&getFormula(cast<DisjunctionValue>(B).getLeftSubValue()),
105+
&getFormula(cast<DisjunctionValue>(B).getRightSubValue())});
106+
case Value::Kind::Negation:
107+
return Formula::create(Alloc, Formula::Not,
108+
{&getFormula(cast<NegationValue>(B).getSubVal())});
109+
case Value::Kind::Implication:
110+
return Formula::create(
111+
Alloc, Formula::Implies,
112+
{&getFormula(cast<ImplicationValue>(B).getLeftSubValue()),
113+
&getFormula(cast<ImplicationValue>(B).getRightSubValue())});
114+
case Value::Kind::Biconditional:
115+
return Formula::create(
116+
Alloc, Formula::Equal,
117+
{&getFormula(cast<BiconditionalValue>(B).getLeftSubValue()),
118+
&getFormula(cast<BiconditionalValue>(B).getRightSubValue())});
119+
}
120+
}();
121+
ValToFormula.try_emplace(&B, &F);
122+
return F;
123+
}
124+
79125
} // namespace clang::dataflow

clang/lib/Analysis/FlowSensitive/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ add_clang_library(clangAnalysisFlowSensitive
33
ControlFlowContext.cpp
44
DataflowAnalysisContext.cpp
55
DataflowEnvironment.cpp
6+
Formula.cpp
67
HTMLLogger.cpp
78
Logger.cpp
89
RecordOps.cpp

0 commit comments

Comments
 (0)