Skip to content

Commit b20789c

Browse files
committed
[clang][dataflow] Add a callback run on the pre-transfer state.
At the same time, rename `PostVisitCFG` to the more descriptive `PostAnalysisCallbacks` (which emphasizes the fact that these callbacks are run after the dataflow analysis itself has converged). Before this patch, it was only possible to run a callback on the state _after_ the transfer function had been applied, but for many analyses, it's more natural to to check the state _before_ the transfer function has been applied, becaus we are usually checking the preconditions for some operation. Some checks are impossible to perform on the "after" state because we can no longer check the precondition; for example, the `++` / `--` operators on raw pointers require the operand to be nonnull, but after the transfer function for the operator has been applied, the original value of the pointer can no longer be accessed. `UncheckedOptionalAccessModelTest` has been modified to run the diagnosis callback on the "before" state. In this particular case, diagnosis can be run unchanged on either the "before" or "after" state, but we want this test to demonstrate that running diagnosis on the "before" state is usually the preferred approach. This change is backwards-compatible; all existing analyses will continue to run the callback on the "after" state.
1 parent e83adfe commit b20789c

File tree

5 files changed

+237
-103
lines changed

5 files changed

+237
-103
lines changed

clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h

Lines changed: 137 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -178,13 +178,50 @@ template <typename LatticeT> struct DataflowAnalysisState {
178178
Environment Env;
179179
};
180180

181+
/// A callback to be called with the state before or after visiting a CFG
182+
/// element.
183+
template <typename AnalysisT>
184+
using CFGEltCallback = std::function<void(
185+
const CFGElement &,
186+
const DataflowAnalysisState<typename AnalysisT::Lattice> &)>;
187+
188+
/// A pair of callbacks to be called with the state before and after visiting a
189+
/// CFG element.
190+
/// Either or both of the callbacks may be null.
191+
template <typename AnalysisT> struct CFGEltCallbacks {
192+
CFGEltCallback<AnalysisT> Before;
193+
CFGEltCallback<AnalysisT> After;
194+
};
195+
196+
/// A callback for performing diagnosis on a CFG element, called with the state
197+
/// before or after visiting that CFG element. Returns a list of diagnostics
198+
/// to emit (if any).
199+
template <typename AnalysisT, typename Diagnostic>
200+
using DiagnosisCallback = llvm::function_ref<llvm::SmallVector<Diagnostic>(
201+
const CFGElement &, ASTContext &,
202+
const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)>;
203+
204+
/// A pair of callbacks for performing diagnosis on a CFG element, called with
205+
/// the state before and after visiting that CFG element.
206+
/// Either or both of the callbacks may be null.
207+
template <typename AnalysisT, typename Diagnostic> struct DiagnosisCallbacks {
208+
DiagnosisCallback<AnalysisT, Diagnostic> Before;
209+
DiagnosisCallback<AnalysisT, Diagnostic> After;
210+
};
211+
212+
/// Default for the maximum number of SAT solver iterations during analysis.
213+
inline constexpr std::int64_t kDefaultMaxSATIterations = 1'000'000'000;
214+
215+
/// Default for the maximum number of block visits during analysis.
216+
inline constexpr std::int32_t kDefaultMaxBlockVisits = 20'000;
217+
181218
/// Performs dataflow analysis and returns a mapping from basic block IDs to
182219
/// dataflow analysis states that model the respective basic blocks. The
183220
/// returned vector, if any, will have the same size as the number of CFG
184221
/// blocks, with indices corresponding to basic block IDs. Returns an error if
185222
/// the dataflow analysis cannot be performed successfully. Otherwise, calls
186-
/// `PostVisitCFG` on each CFG element with the final analysis results at that
187-
/// program point.
223+
/// `PostAnalysisCallbacks` on each CFG element with the final analysis results
224+
/// before and after that program point.
188225
///
189226
/// `MaxBlockVisits` caps the number of block visits during analysis. See
190227
/// `runTypeErasedDataflowAnalysis` for a full description. The default value is
@@ -194,30 +231,40 @@ template <typename LatticeT> struct DataflowAnalysisState {
194231
template <typename AnalysisT>
195232
llvm::Expected<std::vector<
196233
std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
197-
runDataflowAnalysis(
198-
const AdornedCFG &ACFG, AnalysisT &Analysis, const Environment &InitEnv,
199-
std::function<void(const CFGElement &, const DataflowAnalysisState<
200-
typename AnalysisT::Lattice> &)>
201-
PostVisitCFG = nullptr,
202-
std::int32_t MaxBlockVisits = 20'000) {
203-
std::function<void(const CFGElement &,
204-
const TypeErasedDataflowAnalysisState &)>
205-
PostVisitCFGClosure = nullptr;
206-
if (PostVisitCFG) {
207-
PostVisitCFGClosure = [&PostVisitCFG](
208-
const CFGElement &Element,
209-
const TypeErasedDataflowAnalysisState &State) {
210-
auto *Lattice =
211-
llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
212-
// FIXME: we should not be copying the environment here!
213-
// Ultimately the PostVisitCFG only gets a const reference anyway.
214-
PostVisitCFG(Element, DataflowAnalysisState<typename AnalysisT::Lattice>{
215-
*Lattice, State.Env.fork()});
216-
};
234+
runDataflowAnalysis(const AdornedCFG &ACFG, AnalysisT &Analysis,
235+
const Environment &InitEnv,
236+
CFGEltCallbacks<AnalysisT> PostAnalysisCallbacks,
237+
std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
238+
CFGEltCallbacksTypeErased TypeErasedCallbacks;
239+
if (PostAnalysisCallbacks.Before) {
240+
TypeErasedCallbacks.Before =
241+
[&PostAnalysisCallbacks](const CFGElement &Element,
242+
const TypeErasedDataflowAnalysisState &State) {
243+
auto *Lattice =
244+
llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
245+
// FIXME: we should not be copying the environment here!
246+
// Ultimately the `CFGEltCallback` only gets a const reference anyway.
247+
PostAnalysisCallbacks.Before(
248+
Element, DataflowAnalysisState<typename AnalysisT::Lattice>{
249+
*Lattice, State.Env.fork()});
250+
};
251+
}
252+
if (PostAnalysisCallbacks.After) {
253+
TypeErasedCallbacks.After =
254+
[&PostAnalysisCallbacks](const CFGElement &Element,
255+
const TypeErasedDataflowAnalysisState &State) {
256+
auto *Lattice =
257+
llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
258+
// FIXME: we should not be copying the environment here!
259+
// Ultimately the `CFGEltCallback` only gets a const reference anyway.
260+
PostAnalysisCallbacks.After(
261+
Element, DataflowAnalysisState<typename AnalysisT::Lattice>{
262+
*Lattice, State.Env.fork()});
263+
};
217264
}
218265

219266
auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis(
220-
ACFG, Analysis, InitEnv, PostVisitCFGClosure, MaxBlockVisits);
267+
ACFG, Analysis, InitEnv, TypeErasedCallbacks, MaxBlockVisits);
221268
if (!TypeErasedBlockStates)
222269
return TypeErasedBlockStates.takeError();
223270

@@ -239,6 +286,22 @@ runDataflowAnalysis(
239286
return std::move(BlockStates);
240287
}
241288

289+
/// Overload that takes only one post-analysis callback, which is run on the
290+
/// state after visiting the `CFGElement`. This is provided for backwards
291+
/// compatibility; new callers should call the overload taking `CFGEltCallbacks`
292+
/// instead.
293+
template <typename AnalysisT>
294+
llvm::Expected<std::vector<
295+
std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
296+
runDataflowAnalysis(
297+
const AdornedCFG &ACFG, AnalysisT &Analysis, const Environment &InitEnv,
298+
CFGEltCallback<AnalysisT> PostAnalysisCallbackAfterElt = nullptr,
299+
std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
300+
return runDataflowAnalysis(ACFG, Analysis, InitEnv,
301+
{nullptr, PostAnalysisCallbackAfterElt},
302+
MaxBlockVisits);
303+
}
304+
242305
// Create an analysis class that is derived from `DataflowAnalysis`. This is an
243306
// SFINAE adapter that allows us to call two different variants of constructor
244307
// (either with or without the optional `Environment` parameter).
@@ -271,14 +334,11 @@ auto createAnalysis(ASTContext &ASTCtx, Environment &Env)
271334
/// `runDataflowAnalysis` for a full description and explanation of the default
272335
/// value.
273336
template <typename AnalysisT, typename Diagnostic>
274-
llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
275-
const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
276-
llvm::function_ref<llvm::SmallVector<Diagnostic>(
277-
const CFGElement &, ASTContext &,
278-
const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)>
279-
Diagnoser,
280-
std::int64_t MaxSATIterations = 1'000'000'000,
281-
std::int32_t MaxBlockVisits = 20'000) {
337+
llvm::Expected<llvm::SmallVector<Diagnostic>>
338+
diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
339+
DiagnosisCallbacks<AnalysisT, Diagnostic> Diagnoser,
340+
std::int64_t MaxSATIterations = kDefaultMaxSATIterations,
341+
std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
282342
llvm::Expected<AdornedCFG> Context = AdornedCFG::build(FuncDecl);
283343
if (!Context)
284344
return Context.takeError();
@@ -288,21 +348,38 @@ llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
288348
Environment Env(AnalysisContext, FuncDecl);
289349
AnalysisT Analysis = createAnalysis<AnalysisT>(ASTCtx, Env);
290350
llvm::SmallVector<Diagnostic> Diagnostics;
351+
CFGEltCallbacksTypeErased PostAnalysisCallbacks;
352+
if (Diagnoser.Before) {
353+
PostAnalysisCallbacks.Before =
354+
[&ASTCtx, &Diagnoser,
355+
&Diagnostics](const CFGElement &Elt,
356+
const TypeErasedDataflowAnalysisState &State) mutable {
357+
auto EltDiagnostics = Diagnoser.Before(
358+
Elt, ASTCtx,
359+
TransferStateForDiagnostics<typename AnalysisT::Lattice>(
360+
llvm::any_cast<const typename AnalysisT::Lattice &>(
361+
State.Lattice.Value),
362+
State.Env));
363+
llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
364+
};
365+
}
366+
if (Diagnoser.After) {
367+
PostAnalysisCallbacks.After =
368+
[&ASTCtx, &Diagnoser,
369+
&Diagnostics](const CFGElement &Elt,
370+
const TypeErasedDataflowAnalysisState &State) mutable {
371+
auto EltDiagnostics = Diagnoser.After(
372+
Elt, ASTCtx,
373+
TransferStateForDiagnostics<typename AnalysisT::Lattice>(
374+
llvm::any_cast<const typename AnalysisT::Lattice &>(
375+
State.Lattice.Value),
376+
State.Env));
377+
llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
378+
};
379+
}
291380
if (llvm::Error Err =
292-
runTypeErasedDataflowAnalysis(
293-
*Context, Analysis, Env,
294-
[&ASTCtx, &Diagnoser, &Diagnostics](
295-
const CFGElement &Elt,
296-
const TypeErasedDataflowAnalysisState &State) mutable {
297-
auto EltDiagnostics = Diagnoser(
298-
Elt, ASTCtx,
299-
TransferStateForDiagnostics<typename AnalysisT::Lattice>(
300-
llvm::any_cast<const typename AnalysisT::Lattice &>(
301-
State.Lattice.Value),
302-
State.Env));
303-
llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
304-
},
305-
MaxBlockVisits)
381+
runTypeErasedDataflowAnalysis(*Context, Analysis, Env,
382+
PostAnalysisCallbacks, MaxBlockVisits)
306383
.takeError())
307384
return std::move(Err);
308385

@@ -313,6 +390,21 @@ llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
313390
return Diagnostics;
314391
}
315392

393+
/// Overload that takes only one diagnosis callback, which is run on the state
394+
/// after visiting the `CFGElement`. This is provided for backwards
395+
/// compatibility; new callers should call the overload taking
396+
/// `DiagnosisCallbacks` instead.
397+
template <typename AnalysisT, typename Diagnostic>
398+
llvm::Expected<llvm::SmallVector<Diagnostic>>
399+
diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
400+
DiagnosisCallback<AnalysisT, Diagnostic> Diagnoser,
401+
std::int64_t MaxSATIterations = kDefaultMaxSATIterations,
402+
std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
403+
DiagnosisCallbacks<AnalysisT, Diagnostic> Callbacks = {nullptr, Diagnoser};
404+
return diagnoseFunction(FuncDecl, ASTCtx, Callbacks, MaxSATIterations,
405+
MaxBlockVisits);
406+
}
407+
316408
/// Abstract base class for dataflow "models": reusable analysis components that
317409
/// model a particular aspect of program semantics in the `Environment`. For
318410
/// example, a model may capture a type and its related functions.

clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,25 @@ struct TypeErasedDataflowAnalysisState {
132132
}
133133
};
134134

135+
/// A callback to be called with the state before or after visiting a CFG
136+
/// element.
137+
using CFGEltCallbackTypeErased = std::function<void(
138+
const CFGElement &, const TypeErasedDataflowAnalysisState &)>;
139+
140+
/// A pair of callbacks to be called with the state before and after visiting a
141+
/// CFG element.
142+
/// Either or both of the callbacks may be null.
143+
struct CFGEltCallbacksTypeErased {
144+
CFGEltCallbackTypeErased Before;
145+
CFGEltCallbackTypeErased After;
146+
};
147+
135148
/// Performs dataflow analysis and returns a mapping from basic block IDs to
136149
/// dataflow analysis states that model the respective basic blocks. Indices of
137150
/// the returned vector correspond to basic block IDs. Returns an error if the
138151
/// dataflow analysis cannot be performed successfully. Otherwise, calls
139-
/// `PostVisitCFG` on each CFG element with the final analysis results at that
140-
/// program point.
152+
/// `PostAnalysisCallbacks` on each CFG element with the final analysis results
153+
/// before and after that program point.
141154
///
142155
/// `MaxBlockVisits` caps the number of block visits during analysis. It doesn't
143156
/// distinguish between repeat visits to the same block and visits to distinct
@@ -148,9 +161,7 @@ llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>>
148161
runTypeErasedDataflowAnalysis(
149162
const AdornedCFG &ACFG, TypeErasedDataflowAnalysis &Analysis,
150163
const Environment &InitEnv,
151-
std::function<void(const CFGElement &,
152-
const TypeErasedDataflowAnalysisState &)>
153-
PostVisitCFG,
164+
const CFGEltCallbacksTypeErased &PostAnalysisCallbacks,
154165
std::int32_t MaxBlockVisits);
155166

156167
} // namespace dataflow

clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -411,10 +411,9 @@ static void builtinTransfer(unsigned CurBlockID, const CFGElement &Elt,
411411
/// by the user-specified analysis.
412412
static TypeErasedDataflowAnalysisState
413413
transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
414-
std::function<void(const CFGElement &,
415-
const TypeErasedDataflowAnalysisState &)>
416-
PostVisitCFG = nullptr) {
417-
AC.Log.enterBlock(Block, PostVisitCFG != nullptr);
414+
const CFGEltCallbacksTypeErased &PostAnalysisCallbacks = {}) {
415+
AC.Log.enterBlock(Block, PostAnalysisCallbacks.Before != nullptr ||
416+
PostAnalysisCallbacks.After != nullptr);
418417
auto State = computeBlockInputState(Block, AC);
419418
AC.Log.recordState(State);
420419
int ElementIdx = 1;
@@ -423,6 +422,11 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
423422
ElementIdx++, "transferCFGBlock");
424423

425424
AC.Log.enterElement(Element);
425+
426+
if (PostAnalysisCallbacks.Before) {
427+
PostAnalysisCallbacks.Before(Element, State);
428+
}
429+
426430
// Built-in analysis
427431
if (AC.Analysis.builtinOptions()) {
428432
builtinTransfer(Block.getBlockID(), Element, State, AC);
@@ -431,10 +435,10 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
431435
// User-provided analysis
432436
AC.Analysis.transferTypeErased(Element, State.Lattice, State.Env);
433437

434-
// Post processing
435-
if (PostVisitCFG) {
436-
PostVisitCFG(Element, State);
438+
if (PostAnalysisCallbacks.After) {
439+
PostAnalysisCallbacks.After(Element, State);
437440
}
441+
438442
AC.Log.recordState(State);
439443
}
440444

@@ -469,9 +473,7 @@ llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>>
469473
runTypeErasedDataflowAnalysis(
470474
const AdornedCFG &ACFG, TypeErasedDataflowAnalysis &Analysis,
471475
const Environment &InitEnv,
472-
std::function<void(const CFGElement &,
473-
const TypeErasedDataflowAnalysisState &)>
474-
PostVisitCFG,
476+
const CFGEltCallbacksTypeErased &PostAnalysisCallbacks,
475477
std::int32_t MaxBlockVisits) {
476478
PrettyStackTraceAnalysis CrashInfo(ACFG, "runTypeErasedDataflowAnalysis");
477479

@@ -553,12 +555,12 @@ runTypeErasedDataflowAnalysis(
553555
// FIXME: Consider evaluating unreachable basic blocks (those that have a
554556
// state set to `std::nullopt` at this point) to also analyze dead code.
555557

556-
if (PostVisitCFG) {
558+
if (PostAnalysisCallbacks.Before || PostAnalysisCallbacks.After) {
557559
for (const CFGBlock *Block : ACFG.getCFG()) {
558560
// Skip blocks that were not evaluated.
559561
if (!BlockStates[Block->getBlockID()])
560562
continue;
561-
transferCFGBlock(*Block, AC, PostVisitCFG);
563+
transferCFGBlock(*Block, AC, PostAnalysisCallbacks);
562564
}
563565
}
564566

0 commit comments

Comments
 (0)