-
Notifications
You must be signed in to change notification settings - Fork 10.5k
[SIL Diagnostics] Create a mandatory pass to check correct usage of yields in generalized accessors: _read and _modify #21067
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,325 @@ | ||
//===------ YieldOnceCheck.cpp - Check usage of yields in accessors ------===// | ||
// | ||
// This source file is part of the Swift.org open source project | ||
// | ||
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors | ||
// Licensed under Apache License v2.0 with Runtime Library Exception | ||
// | ||
// See https://swift.org/LICENSE.txt for license information | ||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
// This pass statically verifies that yield-once coroutines, such as the | ||
// generalized accessors `read` and `modify`, yield exactly once in every | ||
// invocation, and diagnoses any violation of this property. This pass uses a | ||
// linear-time, data-flow analysis to check that every path in the control-flow | ||
// graph of the coroutine has a yield instruction before a return instruction. | ||
|
||
#define DEBUG_TYPE "yield-once-check" | ||
#include "swift/AST/ASTWalker.h" | ||
#include "swift/AST/DiagnosticsSIL.h" | ||
#include "swift/AST/Expr.h" | ||
#include "swift/AST/Stmt.h" | ||
#include "swift/SIL/BasicBlockUtils.h" | ||
#include "swift/SIL/Dominance.h" | ||
#include "swift/SILOptimizer/PassManager/Transforms.h" | ||
#include "llvm/ADT/DenseSet.h" | ||
|
||
using namespace swift; | ||
|
||
namespace { | ||
|
||
class YieldOnceCheck : public SILFunctionTransform { | ||
|
||
template <typename... T, typename... U> | ||
static InFlightDiagnostic diagnose(ASTContext &Context, SourceLoc loc, | ||
Diag<T...> diag, U &&... args) { | ||
return Context.Diags.diagnose(loc, diag, std::forward<U>(args)...); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should consider pushing this up to |
||
|
||
/// Data-flow analysis state that is associated with basic blocks. | ||
struct BBState { | ||
|
||
/// Indicates whether a basic block is encountered before seeing a yield | ||
/// (BeforeYield) or after seeing a yield (AfterYield), or in both states | ||
/// (Conflict). This enum is a semi-lattice where Conflict is the top and | ||
/// the merge of BeforeYield and AfterYield states is Conflict. | ||
enum YieldState { BeforeYield, AfterYield, Conflict } yieldState; | ||
|
||
private: | ||
// The following state is maintained for emitting diagnostics. | ||
|
||
/// For AfterYield and Conflict states, this field records the yield | ||
/// instruction that was seen while propagating the state. | ||
SILInstruction *yieldInst; | ||
|
||
BBState(YieldState yState, SILInstruction *yieldI) | ||
: yieldState(yState), yieldInst(yieldI) {} | ||
|
||
public: | ||
static BBState getInitialState() { return BBState(BeforeYield, nullptr); } | ||
|
||
static BBState getAfterYieldState(SILInstruction *yieldI) { | ||
assert(yieldI); | ||
return BBState(AfterYield, yieldI); | ||
} | ||
|
||
static BBState getConflictState(SILInstruction *yieldI) { | ||
assert(yieldI); | ||
return BBState(Conflict, yieldI); | ||
} | ||
|
||
SILInstruction *getYieldInstruction() const { | ||
assert(yieldState == AfterYield || yieldState == Conflict); | ||
return yieldInst; | ||
} | ||
}; | ||
|
||
/// A structure that records an error found during the analysis along with | ||
/// some context information that will be used by diagnostics. | ||
struct YieldError { | ||
/// The kind of error. | ||
enum Kind { MultipleYield, ReturnBeforeYield, ReturnOnConflict } errorKind; | ||
/// The termination instruction where the error should be reported. | ||
SILInstruction *termInst; | ||
/// The input state when the error is encountered. | ||
BBState inState; | ||
|
||
private: | ||
YieldError(Kind kind, SILInstruction *term, BBState state) | ||
: errorKind(kind), termInst(term), inState(state) {} | ||
|
||
public: | ||
static YieldError getMultipleYieldError(YieldInst *yield, BBState state) { | ||
assert(state.yieldState != BBState::BeforeYield); | ||
return YieldError(MultipleYield, yield, state); | ||
} | ||
|
||
static YieldError getReturnBeforeYieldError(ReturnInst *returnI, | ||
BBState state) { | ||
assert(state.yieldState == BBState::BeforeYield); | ||
return YieldError(ReturnBeforeYield, returnI, state); | ||
} | ||
|
||
static YieldError getReturnOnConflict(ReturnInst *returnI, BBState state) { | ||
assert(state.yieldState == BBState::Conflict); | ||
return YieldError(ReturnOnConflict, returnI, state); | ||
} | ||
}; | ||
|
||
/// Transfer function of the data-flow analysis. | ||
/// | ||
/// \param bb Basic block that should be processed | ||
/// \param inState BBState at the start of the basic block | ||
/// \param error out parameter that will contain information about | ||
/// an error that is detected. | ||
/// \return the state at the exit of the basic block if it can be computed | ||
/// and None otherwise. | ||
static Optional<BBState> | ||
transferStateThroughBasicBlock(SILBasicBlock *bb, BBState inState, | ||
Optional<YieldError> &error) { | ||
error = None; | ||
auto *term = bb->getTerminator(); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this initialize 'error' to None? That way if a caller reuses the passed in error it will get correctly set to None when there is no error. |
||
if (auto *returnInst = dyn_cast<ReturnInst>(term)) { | ||
if (inState.yieldState == BBState::BeforeYield) { | ||
error = YieldError::getReturnBeforeYieldError(returnInst, inState); | ||
return None; | ||
} | ||
|
||
if (inState.yieldState == BBState::Conflict) { | ||
error = YieldError::getReturnOnConflict(returnInst, inState); | ||
return None; | ||
} | ||
return inState; | ||
} | ||
|
||
if (auto *yieldInst = dyn_cast<YieldInst>(term)) { | ||
if (inState.yieldState != BBState::BeforeYield) { | ||
error = YieldError::getMultipleYieldError(yieldInst, inState); | ||
return None; | ||
} | ||
|
||
// If the current state is BeforeYield and if the basic block ends in a | ||
// yield the new state is AfterYield. | ||
return inState.getAfterYieldState(term); | ||
} | ||
|
||
// We cannot have throws within generalized accessors. | ||
assert(!isa<ThrowInst>(term)); | ||
|
||
return inState; | ||
} | ||
|
||
/// Merge operation of the data-flow analysis. | ||
/// | ||
/// \param mergeBlock the basic block that is reached with two states | ||
/// \param oldState the previous state at the entry of the basic block | ||
/// \param newState the current state at the entry of the basic block | ||
/// \return the new state obtained by merging the oldState with the newState | ||
static BBState merge(SILBasicBlock *mergeBlock, BBState oldState, | ||
BBState newState) { | ||
auto oldYieldState = oldState.yieldState; | ||
auto newYieldState = newState.yieldState; | ||
|
||
if (oldYieldState == BBState::Conflict) { | ||
return oldState; | ||
} | ||
|
||
if (newYieldState == BBState::Conflict) { | ||
return newState; | ||
} | ||
|
||
if (oldYieldState == newYieldState) { | ||
return oldState; | ||
} | ||
|
||
// Here, one state is AfterYield and the other one is BeforeYield. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add an assertion for this? That way if the lattice changes but the merge function doesn't we'll get an assertion failure. |
||
// Merging them will result in Conflict. | ||
assert((newYieldState == BBState::AfterYield && | ||
oldYieldState == BBState::BeforeYield) || | ||
(newYieldState == BBState::BeforeYield && | ||
oldYieldState == BBState::AfterYield)); | ||
|
||
SILInstruction *yieldInst = (newYieldState == BBState::AfterYield) | ||
? newState.getYieldInstruction() | ||
: oldState.getYieldInstruction(); | ||
|
||
return BBState::getConflictState(yieldInst); | ||
} | ||
|
||
/// Perform a data-flow analysis to check whether there is exactly one | ||
/// yield before a return in every path in the control-flow graph. | ||
/// Diagnostics are not reported for nodes unreachable from the entry of | ||
/// the control-flow graph. | ||
void diagnoseYieldOnceUsage(SILFunction &fun) { | ||
llvm::DenseMap<SILBasicBlock *, BBState> bbToStateMap; | ||
SmallVector<SILBasicBlock *, 16> worklist; | ||
|
||
auto *entryBB = fun.getEntryBlock(); | ||
bbToStateMap.try_emplace(entryBB, BBState::getInitialState()); | ||
worklist.push_back(entryBB); | ||
|
||
// ReturnBeforeYield errors, which denote that no paths yield before | ||
// returning, are not diagnosed until the analysis completes, in order to | ||
// distinguish them from ReturnOnConflict errors, which happen when some | ||
// paths yield and some don't. | ||
Optional<YieldError> returnBeforeYieldError = None; | ||
|
||
// The algorithm uses a worklist to propagate the state through basic | ||
// blocks until a fix point. Since the state lattice has height one, each | ||
// basic block will be visited at most twice, and at most once if there are | ||
// no conflicts (which are errors). The basic blocks are added to the | ||
// worklist in a breadth-first fashion. The order of visiting basic blocks | ||
// is not important for correctness, but it could change the errors | ||
// diagnosed when there are multiple errors. Breadth-first order diagnoses | ||
// errors along shorter paths to return. | ||
while (!worklist.empty()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we're not iterating to a fixed point, instead of using a worklist can you just use PostOrderFunctionInfo::getReversePostOrder() to do the traversal? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are a couple of issues here: (a) the analysis does visit a node more than one once, if there is a conflict i.e, in the case of error. It pushes an already visited successor on to the worklist, if the successor is seen in a state that is different from the state it was seen with before. In the worst case, it processes each node twice, but that can happen only when there is an error. (b) The use of breadth-first search prioritizes shorter error paths over longer ones when there are multiple error paths. (However, an exception to this rule is that the analysis prioritizes "multiple-yield" error over a "return-without-yield" error by deferring the reporting of the latter error until the iteration completes.) I wonder whether RPO will preserve this behavior of shortest paths? E.g. if we have a tree of the form: (where D is a successor of C) Then post-order is BDCA and I suppose reverse post-order is ACDB. So we see There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the goal of visiting conflict blocks multiple times just to improve diagnostics in some way? It's not clear to me what it gets you at the moment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A block seen in the conflict state won't be visited twice, but a block seen in a non-conflict state could be processed again if it is seen in the conflict state. This is just to determine if we have a multiple yield error or a missing yield error. E.g.
OTOH,
It is just done to distinguish between the above two scenarios. Multiple yield errors seem easier to report by pointing to the two yields. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That seems fair. |
||
SILBasicBlock *bb = worklist.pop_back_val(); | ||
const BBState &state = bbToStateMap.find(bb)->getSecond(); | ||
|
||
Optional<YieldError> errorResult = None; | ||
auto resultState = transferStateThroughBasicBlock(bb, state, errorResult); | ||
|
||
if (!resultState.hasValue()) { | ||
auto error = errorResult.getValue(); | ||
|
||
// ReturnBeforeYield errors will not be reported until the analysis | ||
// completes. So record it and continue. | ||
if (error.errorKind == YieldError::ReturnBeforeYield) { | ||
if (!returnBeforeYieldError.hasValue()) { | ||
returnBeforeYieldError = error; | ||
} | ||
continue; | ||
} | ||
|
||
emitDiagnostics(error, fun); | ||
return; | ||
} | ||
|
||
auto nextState = resultState.getValue(); | ||
|
||
for (auto *succBB : bb->getSuccessorBlocks()) { | ||
// Optimistically try to set the state of the successor as next state. | ||
auto insertResult = bbToStateMap.try_emplace(succBB, nextState); | ||
|
||
// If the insertion was successful, it means we are seeing the successor | ||
// for the first time. Add the successor to the worklist. | ||
if (insertResult.second) { | ||
worklist.insert(worklist.begin(), succBB); | ||
continue; | ||
} | ||
|
||
// Here the successor already has a state. Merge the current and | ||
// previous states and propagate it if it is different from the | ||
// old state. | ||
const auto &oldState = insertResult.first->second; | ||
auto mergedState = merge(succBB, oldState, nextState); | ||
|
||
if (mergedState.yieldState == oldState.yieldState) | ||
continue; | ||
|
||
// Even though at this point there has to be an error since there is an | ||
// inconsistency between states coming along two different paths, | ||
// continue propagation of this conflict state to determine | ||
// whether this results in multiple-yields error or return-on-conflict | ||
// error. | ||
insertResult.first->second = mergedState; | ||
worklist.insert(worklist.begin(), succBB); | ||
} | ||
} | ||
|
||
if (returnBeforeYieldError.hasValue()) { | ||
emitDiagnostics(returnBeforeYieldError.getValue(), fun); | ||
} | ||
} | ||
|
||
void emitDiagnostics(YieldError &error, SILFunction &fun) { | ||
ASTContext &astCtx = fun.getModule().getASTContext(); | ||
|
||
switch (error.errorKind) { | ||
case YieldError::ReturnBeforeYield: { | ||
diagnose(astCtx, error.termInst->getLoc().getSourceLoc(), | ||
diag::return_before_yield); | ||
return; | ||
} | ||
case YieldError::MultipleYield: { | ||
diagnose(astCtx, error.termInst->getLoc().getSourceLoc(), | ||
diag::multiple_yields); | ||
|
||
// Add a note that points to the previous yield. | ||
diagnose(astCtx, | ||
error.inState.getYieldInstruction()->getLoc().getSourceLoc(), | ||
diag::previous_yield); | ||
return; | ||
} | ||
case YieldError::ReturnOnConflict: { | ||
// Emit an error on the return statement. | ||
diagnose(astCtx, error.termInst->getLoc().getSourceLoc(), | ||
diag::possible_return_before_yield); | ||
|
||
// Add a note that points to the yield instruction found. | ||
auto *yieldInst = error.inState.getYieldInstruction(); | ||
diagnose(astCtx, yieldInst->getLoc().getSourceLoc(), diag::one_yield); | ||
} | ||
} | ||
} | ||
|
||
/// The entry point to the transformation. | ||
void run() override { | ||
auto *fun = getFunction(); | ||
|
||
if (fun->getLoweredFunctionType()->getCoroutineKind() != | ||
SILCoroutineKind::YieldOnce) | ||
return; | ||
|
||
diagnoseYieldOnceUsage(*fun); | ||
} | ||
}; | ||
|
||
} // end anonymous namespace | ||
|
||
SILTransform *swift::createYieldOnceCheck() { | ||
return new YieldOnceCheck(); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you put a brief description here of the high-level property that the pass is checking for here and a brief overview of high-level approach it takes. Not too much detail, but enough that someone new to the project could read it and know what the purpose of this pass is.