-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[clang][dataflow] Fix inaccuracies in buildStmtToBasicBlockMap()
.
#82496
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang][dataflow] Fix inaccuracies in buildStmtToBasicBlockMap()
.
#82496
Conversation
See the comments added to the code for details on the inaccuracies that have now been fixed. The patch adds tests that fail with the old implementation.
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-analysis Author: None (martinboehme) ChangesSee the comments added to the code for details on the inaccuracies that have The patch adds tests that fail with the old implementation. Full diff: https://github.com/llvm/llvm-project/pull/82496.diff 2 Files Affected:
diff --git a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp
index c9ebffe6f37801..8aed19544be6a2 100644
--- a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp
+++ b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp
@@ -39,8 +39,35 @@ buildStmtToBasicBlockMap(const CFG &Cfg) {
StmtToBlock[Stmt->getStmt()] = Block;
}
- if (const Stmt *TerminatorStmt = Block->getTerminatorStmt())
- StmtToBlock[TerminatorStmt] = Block;
+ }
+ // Some terminator conditions don't appear as a `CFGElement` anywhere else -
+ // for example, this is true if the terminator condition is a `&&` or `||`
+ // operator.
+ // We associate these conditions with the block the terminator appears in,
+ // but only if the condition has not already appeared as a regular
+ // `CFGElement`. (The `insert()` below does nothing if the key already exists
+ // in the map.)
+ for (const CFGBlock *Block : Cfg) {
+ if (Block != nullptr)
+ if (const Stmt *TerminatorCond = Block->getTerminatorCondition())
+ StmtToBlock.insert({TerminatorCond, Block});
+ }
+ // Terminator statements typically don't appear as a `CFGElement` anywhere
+ // else, so we want to associate them with the block that they terminate.
+ // However, there are some important special cases:
+ // - The conditional operator is a type of terminator, but it also appears
+ // as a regular `CFGElement`, and we want to associate it with the block
+ // in which it appears as a `CFGElement`.
+ // - The `&&` and `||` operators are types of terminators, but like the
+ // conditional operator, they can appear as a regular `CFGElement` or
+ // as a terminator condition (see above).
+ // We process terminators last to make sure that we only associate them with
+ // the block they terminate if they haven't previously occurred as a regular
+ // `CFGElement` or as a terminator condition.
+ for (const CFGBlock *Block : Cfg) {
+ if (Block != nullptr)
+ if (const Stmt *TerminatorStmt = Block->getTerminatorStmt())
+ StmtToBlock.insert({TerminatorStmt, Block});
}
return StmtToBlock;
}
diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
index 3bca9cced8d6f7..34f9b0b23719fe 100644
--- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
@@ -77,17 +77,33 @@ class DataflowAnalysisTest : public Test {
return runDataflowAnalysis(*CFCtx, Analysis, Env);
}
+ /// Returns the `CFGBlock` containing `S` (and asserts that it exists).
+ const CFGBlock *blockForStmt(const Stmt &S) {
+ const CFGBlock *Block = CFCtx->getStmtToBlock().lookup(&S);
+ assert(Block != nullptr);
+ return Block;
+ }
+
template <typename StateT>
const StateT &
blockStateForStmt(const std::vector<std::optional<StateT>> &BlockStates,
- const Stmt *S) {
- const CFGBlock *Block = CFCtx->getStmtToBlock().lookup(S);
- assert(Block != nullptr);
- const std::optional<StateT> &MaybeState = BlockStates[Block->getBlockID()];
+ const Stmt &S) {
+ const std::optional<StateT> &MaybeState =
+ BlockStates[blockForStmt(S)->getBlockID()];
assert(MaybeState.has_value());
return *MaybeState;
}
+ /// Returns the first node that matches `Matcher` (and asserts that the match
+ /// was successful, i.e. the returned node is not null).
+ template <typename NodeT, typename MatcherT>
+ const NodeT &matchNode(MatcherT Matcher) {
+ const auto *Node = selectFirst<NodeT>(
+ "node", match(Matcher.bind("node"), AST->getASTContext()));
+ assert(Node != nullptr);
+ return *Node;
+ }
+
std::unique_ptr<ASTUnit> AST;
std::unique_ptr<ControlFlowContext> CFCtx;
std::unique_ptr<DataflowAnalysisContext> DACtx;
@@ -130,6 +146,79 @@ TEST_F(DataflowAnalysisTest, DiagnoseFunctionDiagnoserCalledOnEachElement) {
" (Lifetime ends)\n")));
}
+// Tests for the statement-to-block map.
+using StmtToBlockTest = DataflowAnalysisTest;
+
+TEST_F(StmtToBlockTest, ConditionalOperator) {
+ std::string Code = R"(
+ void target(bool b) {
+ int i = b ? 1 : 0;
+ }
+ )";
+ ASSERT_THAT_ERROR(runAnalysis<NoopAnalysis>(
+ Code, [](ASTContext &C) { return NoopAnalysis(C); })
+ .takeError(),
+ llvm::Succeeded());
+
+ const auto &IDecl = matchNode<DeclStmt>(declStmt(has(varDecl(hasName("i")))));
+ const auto &ConditionalOp =
+ matchNode<ConditionalOperator>(conditionalOperator());
+
+ // The conditional operator should be associated with the same block as the
+ // `DeclStmt` for `i`. (Specifically, the conditional operator should not be
+ // associated with the block for which it is the terminator.)
+ EXPECT_EQ(blockForStmt(IDecl), blockForStmt(ConditionalOp));
+}
+
+TEST_F(StmtToBlockTest, LogicalAnd) {
+ std::string Code = R"(
+ void target(bool b1, bool b2) {
+ bool b = b1 && b2;
+ }
+ )";
+ ASSERT_THAT_ERROR(runAnalysis<NoopAnalysis>(
+ Code, [](ASTContext &C) { return NoopAnalysis(C); })
+ .takeError(),
+ llvm::Succeeded());
+
+ const auto &BDecl = matchNode<DeclStmt>(declStmt(has(varDecl(hasName("b")))));
+ const auto &AndOp =
+ matchNode<BinaryOperator>(binaryOperator(hasOperatorName("&&")));
+
+ // The `&&` operator should be associated with the same block as the
+ // `DeclStmt` for `b`. (Specifically, the `&&` operator should not be
+ // associated with the block for which it is the terminator.)
+ EXPECT_EQ(blockForStmt(BDecl), blockForStmt(AndOp));
+}
+
+TEST_F(StmtToBlockTest, IfStatementWithLogicalAnd) {
+ std::string Code = R"(
+ void target(bool b1, bool b2) {
+ if (b1 && b2)
+ ;
+ }
+ )";
+ ASSERT_THAT_ERROR(runAnalysis<NoopAnalysis>(
+ Code, [](ASTContext &C) { return NoopAnalysis(C); })
+ .takeError(),
+ llvm::Succeeded());
+
+ const auto &If = matchNode<IfStmt>(ifStmt());
+ const auto &B2 =
+ matchNode<DeclRefExpr>(declRefExpr(to(varDecl(hasName("b2")))));
+ const auto &AndOp =
+ matchNode<BinaryOperator>(binaryOperator(hasOperatorName("&&")));
+
+ // The if statement is the terminator for the block that contains both `b2`
+ // and the `&&` operator (which appears only as a terminator condition, not
+ // as a regular `CFGElement`).
+ const CFGBlock *IfBlock = blockForStmt(If);
+ const CFGBlock *B2Block = blockForStmt(B2);
+ const CFGBlock *AndOpBlock = blockForStmt(AndOp);
+ EXPECT_EQ(IfBlock, B2Block);
+ EXPECT_EQ(IfBlock, AndOpBlock);
+}
+
// Tests that check we discard state for expressions correctly.
using DiscardExprStateTest = DataflowAnalysisTest;
@@ -144,25 +233,20 @@ TEST_F(DiscardExprStateTest, WhileStatement) {
auto BlockStates = llvm::cantFail(runAnalysis<NoopAnalysis>(
Code, [](ASTContext &C) { return NoopAnalysis(C); }));
- auto *NotEqOp = selectFirst<BinaryOperator>(
- "op", match(binaryOperator(hasOperatorName("!=")).bind("op"),
- AST->getASTContext()));
- ASSERT_NE(NotEqOp, nullptr);
-
- auto *CallFoo = selectFirst<CallExpr>(
- "call", match(callExpr(callee(functionDecl(hasName("foo")))).bind("call"),
- AST->getASTContext()));
- ASSERT_NE(CallFoo, nullptr);
+ const auto &NotEqOp =
+ matchNode<BinaryOperator>(binaryOperator(hasOperatorName("!=")));
+ const auto &CallFoo =
+ matchNode<CallExpr>(callExpr(callee(functionDecl(hasName("foo")))));
// In the block that evaluates the expression `p != nullptr`, this expression
// is associated with a value.
const auto &NotEqOpState = blockStateForStmt(BlockStates, NotEqOp);
- EXPECT_NE(NotEqOpState.Env.getValue(*NotEqOp), nullptr);
+ EXPECT_NE(NotEqOpState.Env.getValue(NotEqOp), nullptr);
// In the block that calls `foo(p)`, the value for `p != nullptr` is discarded
// because it is not consumed by this block.
const auto &CallFooState = blockStateForStmt(BlockStates, CallFoo);
- EXPECT_EQ(CallFooState.Env.getValue(*NotEqOp), nullptr);
+ EXPECT_EQ(CallFooState.Env.getValue(NotEqOp), nullptr);
}
TEST_F(DiscardExprStateTest, BooleanOperator) {
@@ -174,29 +258,24 @@ TEST_F(DiscardExprStateTest, BooleanOperator) {
auto BlockStates = llvm::cantFail(runAnalysis<NoopAnalysis>(
Code, [](ASTContext &C) { return NoopAnalysis(C); }));
- auto *AndOp = selectFirst<BinaryOperator>(
- "op", match(binaryOperator(hasOperatorName("&&")).bind("op"),
- AST->getASTContext()));
- ASSERT_NE(AndOp, nullptr);
-
- auto *Return = selectFirst<ReturnStmt>(
- "return", match(returnStmt().bind("return"), AST->getASTContext()));
- ASSERT_NE(Return, nullptr);
+ const auto &AndOp =
+ matchNode<BinaryOperator>(binaryOperator(hasOperatorName("&&")));
+ const auto &Return = matchNode<ReturnStmt>(returnStmt());
// In the block that evaluates the LHS of the `&&` operator, the LHS is
// associated with a value, while the right-hand side is not (unsurprisingly,
// as it hasn't been evaluated yet).
- const auto &LHSState = blockStateForStmt(BlockStates, AndOp->getLHS());
- auto *LHSValue = cast<BoolValue>(LHSState.Env.getValue(*AndOp->getLHS()));
+ const auto &LHSState = blockStateForStmt(BlockStates, *AndOp.getLHS());
+ auto *LHSValue = cast<BoolValue>(LHSState.Env.getValue(*AndOp.getLHS()));
ASSERT_NE(LHSValue, nullptr);
- EXPECT_EQ(LHSState.Env.getValue(*AndOp->getRHS()), nullptr);
+ EXPECT_EQ(LHSState.Env.getValue(*AndOp.getRHS()), nullptr);
// In the block that evaluates the RHS, the RHS is associated with a
// value. The value for the LHS has been discarded as it is not consumed by
// this block.
- const auto &RHSState = blockStateForStmt(BlockStates, AndOp->getRHS());
- EXPECT_EQ(RHSState.Env.getValue(*AndOp->getLHS()), nullptr);
- auto *RHSValue = cast<BoolValue>(RHSState.Env.getValue(*AndOp->getRHS()));
+ const auto &RHSState = blockStateForStmt(BlockStates, *AndOp.getRHS());
+ EXPECT_EQ(RHSState.Env.getValue(*AndOp.getLHS()), nullptr);
+ auto *RHSValue = cast<BoolValue>(RHSState.Env.getValue(*AndOp.getRHS()));
ASSERT_NE(RHSValue, nullptr);
// In the block that evaluates the return statement, the expression `b1 && b2`
@@ -217,9 +296,9 @@ TEST_F(DiscardExprStateTest, BooleanOperator) {
// operands, rather than from the environment for the block that contains the
// `&&`.
const auto &ReturnState = blockStateForStmt(BlockStates, Return);
- EXPECT_EQ(ReturnState.Env.getValue(*AndOp->getLHS()), nullptr);
- EXPECT_EQ(ReturnState.Env.getValue(*AndOp->getRHS()), nullptr);
- EXPECT_EQ(ReturnState.Env.getValue(*AndOp),
+ EXPECT_EQ(ReturnState.Env.getValue(*AndOp.getLHS()), nullptr);
+ EXPECT_EQ(ReturnState.Env.getValue(*AndOp.getRHS()), nullptr);
+ EXPECT_EQ(ReturnState.Env.getValue(AndOp),
&ReturnState.Env.makeAnd(*LHSValue, *RHSValue));
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice work! Quite subtle...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is horrifying. It makes me think that we might want to revamp the CfgBlock interface at some point.
CI failure looks unrelated. Patch locally builds and tests cleanly. |
Yes, there a quite a few surprises in the structure of the CFG that I've only discovered gradually over time. I'd also like to eliminate the sharp edges in the API, though I suspect that existing clients rely on the idiosyncrasies of the current API -- so we might have to introduce a second, parallel API for a while so we can migrate usage over gradually. |
See the comments added to the code for details on the inaccuracies that have
now been fixed.
The patch adds tests that fail with the old implementation.