Skip to content

[clang][dataflow] Add a callback run on the pre-transfer state. #96140

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 137 additions & 45 deletions clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,50 @@ template <typename LatticeT> struct DataflowAnalysisState {
Environment Env;
};

/// A callback to be called with the state before or after visiting a CFG
/// element.
template <typename AnalysisT>
using CFGEltCallback = std::function<void(
const CFGElement &,
const DataflowAnalysisState<typename AnalysisT::Lattice> &)>;

/// A pair of callbacks to be called with the state before and after visiting a
/// CFG element.
/// Either or both of the callbacks may be null.
template <typename AnalysisT> struct CFGEltCallbacks {
CFGEltCallback<AnalysisT> Before;
CFGEltCallback<AnalysisT> After;
};

/// A callback for performing diagnosis on a CFG element, called with the state
/// before or after visiting that CFG element. Returns a list of diagnostics
/// to emit (if any).
template <typename AnalysisT, typename Diagnostic>
using DiagnosisCallback = llvm::function_ref<llvm::SmallVector<Diagnostic>(
const CFGElement &, ASTContext &,
const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)>;

/// A pair of callbacks for performing diagnosis on a CFG element, called with
/// the state before and after visiting that CFG element.
/// Either or both of the callbacks may be null.
template <typename AnalysisT, typename Diagnostic> struct DiagnosisCallbacks {
DiagnosisCallback<AnalysisT, Diagnostic> Before;
DiagnosisCallback<AnalysisT, Diagnostic> After;
};

/// Default for the maximum number of SAT solver iterations during analysis.
inline constexpr std::int64_t kDefaultMaxSATIterations = 1'000'000'000;

/// Default for the maximum number of block visits during analysis.
inline constexpr std::int32_t kDefaultMaxBlockVisits = 20'000;

/// Performs dataflow analysis and returns a mapping from basic block IDs to
/// dataflow analysis states that model the respective basic blocks. The
/// returned vector, if any, will have the same size as the number of CFG
/// blocks, with indices corresponding to basic block IDs. Returns an error if
/// the dataflow analysis cannot be performed successfully. Otherwise, calls
/// `PostVisitCFG` on each CFG element with the final analysis results at that
/// program point.
/// `PostAnalysisCallbacks` on each CFG element with the final analysis results
/// before and after that program point.
///
/// `MaxBlockVisits` caps the number of block visits during analysis. See
/// `runTypeErasedDataflowAnalysis` for a full description. The default value is
Expand All @@ -194,30 +231,40 @@ template <typename LatticeT> struct DataflowAnalysisState {
template <typename AnalysisT>
llvm::Expected<std::vector<
std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
runDataflowAnalysis(
const AdornedCFG &ACFG, AnalysisT &Analysis, const Environment &InitEnv,
std::function<void(const CFGElement &, const DataflowAnalysisState<
typename AnalysisT::Lattice> &)>
PostVisitCFG = nullptr,
std::int32_t MaxBlockVisits = 20'000) {
std::function<void(const CFGElement &,
const TypeErasedDataflowAnalysisState &)>
PostVisitCFGClosure = nullptr;
if (PostVisitCFG) {
PostVisitCFGClosure = [&PostVisitCFG](
const CFGElement &Element,
const TypeErasedDataflowAnalysisState &State) {
auto *Lattice =
llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
// FIXME: we should not be copying the environment here!
// Ultimately the PostVisitCFG only gets a const reference anyway.
PostVisitCFG(Element, DataflowAnalysisState<typename AnalysisT::Lattice>{
*Lattice, State.Env.fork()});
};
runDataflowAnalysis(const AdornedCFG &ACFG, AnalysisT &Analysis,
const Environment &InitEnv,
CFGEltCallbacks<AnalysisT> PostAnalysisCallbacks,
std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
CFGEltCallbacksTypeErased TypeErasedCallbacks;
if (PostAnalysisCallbacks.Before) {
TypeErasedCallbacks.Before =
[&PostAnalysisCallbacks](const CFGElement &Element,
const TypeErasedDataflowAnalysisState &State) {
auto *Lattice =
llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
// FIXME: we should not be copying the environment here!
// Ultimately the `CFGEltCallback` only gets a const reference anyway.
PostAnalysisCallbacks.Before(
Element, DataflowAnalysisState<typename AnalysisT::Lattice>{
*Lattice, State.Env.fork()});
};
}
if (PostAnalysisCallbacks.After) {
TypeErasedCallbacks.After =
[&PostAnalysisCallbacks](const CFGElement &Element,
const TypeErasedDataflowAnalysisState &State) {
auto *Lattice =
llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
// FIXME: we should not be copying the environment here!
// Ultimately the `CFGEltCallback` only gets a const reference anyway.
PostAnalysisCallbacks.After(
Element, DataflowAnalysisState<typename AnalysisT::Lattice>{
*Lattice, State.Env.fork()});
};
}

auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis(
ACFG, Analysis, InitEnv, PostVisitCFGClosure, MaxBlockVisits);
ACFG, Analysis, InitEnv, TypeErasedCallbacks, MaxBlockVisits);
if (!TypeErasedBlockStates)
return TypeErasedBlockStates.takeError();

Expand All @@ -239,6 +286,22 @@ runDataflowAnalysis(
return std::move(BlockStates);
}

/// Overload that takes only one post-analysis callback, which is run on the
/// state after visiting the `CFGElement`. This is provided for backwards
/// compatibility; new callers should call the overload taking `CFGEltCallbacks`
/// instead.
template <typename AnalysisT>
llvm::Expected<std::vector<
std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
runDataflowAnalysis(
const AdornedCFG &ACFG, AnalysisT &Analysis, const Environment &InitEnv,
CFGEltCallback<AnalysisT> PostAnalysisCallbackAfterElt = nullptr,
std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
return runDataflowAnalysis(ACFG, Analysis, InitEnv,
{nullptr, PostAnalysisCallbackAfterElt},
MaxBlockVisits);
}

// Create an analysis class that is derived from `DataflowAnalysis`. This is an
// SFINAE adapter that allows us to call two different variants of constructor
// (either with or without the optional `Environment` parameter).
Expand Down Expand Up @@ -271,14 +334,11 @@ auto createAnalysis(ASTContext &ASTCtx, Environment &Env)
/// `runDataflowAnalysis` for a full description and explanation of the default
/// value.
template <typename AnalysisT, typename Diagnostic>
llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
llvm::function_ref<llvm::SmallVector<Diagnostic>(
const CFGElement &, ASTContext &,
const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)>
Diagnoser,
std::int64_t MaxSATIterations = 1'000'000'000,
std::int32_t MaxBlockVisits = 20'000) {
llvm::Expected<llvm::SmallVector<Diagnostic>>
diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
DiagnosisCallbacks<AnalysisT, Diagnostic> Diagnoser,
std::int64_t MaxSATIterations = kDefaultMaxSATIterations,
std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
llvm::Expected<AdornedCFG> Context = AdornedCFG::build(FuncDecl);
if (!Context)
return Context.takeError();
Expand All @@ -288,21 +348,38 @@ llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
Environment Env(AnalysisContext, FuncDecl);
AnalysisT Analysis = createAnalysis<AnalysisT>(ASTCtx, Env);
llvm::SmallVector<Diagnostic> Diagnostics;
CFGEltCallbacksTypeErased PostAnalysisCallbacks;
if (Diagnoser.Before) {
PostAnalysisCallbacks.Before =
[&ASTCtx, &Diagnoser,
&Diagnostics](const CFGElement &Elt,
const TypeErasedDataflowAnalysisState &State) mutable {
auto EltDiagnostics = Diagnoser.Before(
Elt, ASTCtx,
TransferStateForDiagnostics<typename AnalysisT::Lattice>(
llvm::any_cast<const typename AnalysisT::Lattice &>(
State.Lattice.Value),
State.Env));
llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
};
}
if (Diagnoser.After) {
PostAnalysisCallbacks.After =
[&ASTCtx, &Diagnoser,
&Diagnostics](const CFGElement &Elt,
const TypeErasedDataflowAnalysisState &State) mutable {
auto EltDiagnostics = Diagnoser.After(
Elt, ASTCtx,
TransferStateForDiagnostics<typename AnalysisT::Lattice>(
llvm::any_cast<const typename AnalysisT::Lattice &>(
State.Lattice.Value),
State.Env));
llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
};
}
if (llvm::Error Err =
runTypeErasedDataflowAnalysis(
*Context, Analysis, Env,
[&ASTCtx, &Diagnoser, &Diagnostics](
const CFGElement &Elt,
const TypeErasedDataflowAnalysisState &State) mutable {
auto EltDiagnostics = Diagnoser(
Elt, ASTCtx,
TransferStateForDiagnostics<typename AnalysisT::Lattice>(
llvm::any_cast<const typename AnalysisT::Lattice &>(
State.Lattice.Value),
State.Env));
llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
},
MaxBlockVisits)
runTypeErasedDataflowAnalysis(*Context, Analysis, Env,
PostAnalysisCallbacks, MaxBlockVisits)
.takeError())
return std::move(Err);

Expand All @@ -313,6 +390,21 @@ llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
return Diagnostics;
}

/// Overload that takes only one diagnosis callback, which is run on the state
/// after visiting the `CFGElement`. This is provided for backwards
/// compatibility; new callers should call the overload taking
/// `DiagnosisCallbacks` instead.
template <typename AnalysisT, typename Diagnostic>
llvm::Expected<llvm::SmallVector<Diagnostic>>
diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
DiagnosisCallback<AnalysisT, Diagnostic> Diagnoser,
std::int64_t MaxSATIterations = kDefaultMaxSATIterations,
std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
DiagnosisCallbacks<AnalysisT, Diagnostic> Callbacks = {nullptr, Diagnoser};
return diagnoseFunction(FuncDecl, ASTCtx, Callbacks, MaxSATIterations,
MaxBlockVisits);
}

/// Abstract base class for dataflow "models": reusable analysis components that
/// model a particular aspect of program semantics in the `Environment`. For
/// example, a model may capture a type and its related functions.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,25 @@ struct TypeErasedDataflowAnalysisState {
}
};

/// A callback to be called with the state before or after visiting a CFG
/// element.
using CFGEltCallbackTypeErased = std::function<void(
const CFGElement &, const TypeErasedDataflowAnalysisState &)>;

/// A pair of callbacks to be called with the state before and after visiting a
/// CFG element.
/// Either or both of the callbacks may be null.
struct CFGEltCallbacksTypeErased {
CFGEltCallbackTypeErased Before;
CFGEltCallbackTypeErased After;
};

/// Performs dataflow analysis and returns a mapping from basic block IDs to
/// dataflow analysis states that model the respective basic blocks. Indices of
/// the returned vector correspond to basic block IDs. Returns an error if the
/// dataflow analysis cannot be performed successfully. Otherwise, calls
/// `PostVisitCFG` on each CFG element with the final analysis results at that
/// program point.
/// `PostAnalysisCallbacks` on each CFG element with the final analysis results
/// before and after that program point.
///
/// `MaxBlockVisits` caps the number of block visits during analysis. It doesn't
/// distinguish between repeat visits to the same block and visits to distinct
Expand All @@ -148,9 +161,7 @@ llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>>
runTypeErasedDataflowAnalysis(
const AdornedCFG &ACFG, TypeErasedDataflowAnalysis &Analysis,
const Environment &InitEnv,
std::function<void(const CFGElement &,
const TypeErasedDataflowAnalysisState &)>
PostVisitCFG,
const CFGEltCallbacksTypeErased &PostAnalysisCallbacks,
std::int32_t MaxBlockVisits);

} // namespace dataflow
Expand Down
26 changes: 14 additions & 12 deletions clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,10 +411,9 @@ static void builtinTransfer(unsigned CurBlockID, const CFGElement &Elt,
/// by the user-specified analysis.
static TypeErasedDataflowAnalysisState
transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
std::function<void(const CFGElement &,
const TypeErasedDataflowAnalysisState &)>
PostVisitCFG = nullptr) {
AC.Log.enterBlock(Block, PostVisitCFG != nullptr);
const CFGEltCallbacksTypeErased &PostAnalysisCallbacks = {}) {
AC.Log.enterBlock(Block, PostAnalysisCallbacks.Before != nullptr ||
PostAnalysisCallbacks.After != nullptr);
auto State = computeBlockInputState(Block, AC);
AC.Log.recordState(State);
int ElementIdx = 1;
Expand All @@ -423,6 +422,11 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
ElementIdx++, "transferCFGBlock");

AC.Log.enterElement(Element);

if (PostAnalysisCallbacks.Before) {
PostAnalysisCallbacks.Before(Element, State);
}

// Built-in analysis
if (AC.Analysis.builtinOptions()) {
builtinTransfer(Block.getBlockID(), Element, State, AC);
Expand All @@ -431,10 +435,10 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
// User-provided analysis
AC.Analysis.transferTypeErased(Element, State.Lattice, State.Env);

// Post processing
if (PostVisitCFG) {
PostVisitCFG(Element, State);
if (PostAnalysisCallbacks.After) {
PostAnalysisCallbacks.After(Element, State);
}

AC.Log.recordState(State);
}

Expand Down Expand Up @@ -469,9 +473,7 @@ llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>>
runTypeErasedDataflowAnalysis(
const AdornedCFG &ACFG, TypeErasedDataflowAnalysis &Analysis,
const Environment &InitEnv,
std::function<void(const CFGElement &,
const TypeErasedDataflowAnalysisState &)>
PostVisitCFG,
const CFGEltCallbacksTypeErased &PostAnalysisCallbacks,
std::int32_t MaxBlockVisits) {
PrettyStackTraceAnalysis CrashInfo(ACFG, "runTypeErasedDataflowAnalysis");

Expand Down Expand Up @@ -553,12 +555,12 @@ runTypeErasedDataflowAnalysis(
// FIXME: Consider evaluating unreachable basic blocks (those that have a
// state set to `std::nullopt` at this point) to also analyze dead code.

if (PostVisitCFG) {
if (PostAnalysisCallbacks.Before || PostAnalysisCallbacks.After) {
for (const CFGBlock *Block : ACFG.getCFG()) {
// Skip blocks that were not evaluated.
if (!BlockStates[Block->getBlockID()])
continue;
transferCFGBlock(*Block, AC, PostVisitCFG);
transferCFGBlock(*Block, AC, PostAnalysisCallbacks);
}
}

Expand Down
Loading
Loading