Skip to content

Commit fe9b76a

Browse files
authored
Merge pull request swiftlang#193 from ddunbar/core-wait-for-tasks-on-cancellation
[Core] Fix a race condition in task cancellation.
2 parents c9f7a62 + b40008f commit fe9b76a

File tree

2 files changed

+46
-14
lines changed

2 files changed

+46
-14
lines changed

lib/BuildSystem/BuildSystem.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1125,7 +1125,7 @@ Rule BuildSystemEngineDelegate::lookupRule(const KeyType& keyData) {
11251125
if (node->isVirtual()) {
11261126
return Rule{
11271127
keyData,
1128-
/*Action=*/ [node](BuildEngine& engine) -> Task* {
1128+
/*Action=*/ [](BuildEngine& engine) -> Task* {
11291129
return engine.registerTask(new VirtualInputNodeTask());
11301130
},
11311131
/*IsValid=*/ [node](BuildEngine& engine, const Rule& rule,

lib/Core/BuildEngine.cpp

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -856,7 +856,7 @@ class BuildEngineImpl {
856856
ruleInfo->keyID, ruleInfo->rule, ruleInfo->result, &error);
857857
if (!result) {
858858
delegate.error(error);
859-
completeRemainingTasks();
859+
cancelRemainingTasks();
860860
return false;
861861
}
862862
}
@@ -891,6 +891,10 @@ class BuildEngineImpl {
891891

892892
// If we haven't done any other work at this point but we have pending
893893
// tasks, we need to wait for a task to complete.
894+
//
895+
// NOTE: Cancellation also implements this process, if you modify this
896+
// code please also validate that \see cancelRemainingTasks() is still
897+
// correct.
894898
if (!didWork && numOutstandingUnfinishedTasks != 0) {
895899
TracingInterval i(EngineQueueItemKind::Waiting);
896900

@@ -901,7 +905,7 @@ class BuildEngineImpl {
901905
// of the mutex, if one has been added then we may have already missed
902906
// the condition notification and cannot safely wait.
903907
if (finishedTaskInfos.empty()) {
904-
finishedTaskInfosCondition.wait(lock);
908+
finishedTaskInfosCondition.wait(lock);
905909
}
906910

907911
didWork = true;
@@ -1054,17 +1058,45 @@ class BuildEngineImpl {
10541058
assert(!cycleList.empty());
10551059

10561060
delegate.cycleDetected(cycleList);
1057-
completeRemainingTasks();
1061+
cancelRemainingTasks();
10581062
}
10591063

1060-
// Complete all of the remaining tasks.
1061-
//
1062-
// FIXME: Should we have a task abort callback?
1063-
void completeRemainingTasks() {
1064+
// Cancel all of the remaining tasks.
1065+
void cancelRemainingTasks() {
1066+
// We need to wait for any currently running tasks to be reported as
1067+
// complete. Not doing this would mean we could get asynchronous calls
1068+
// attempting to modify the task state concurrently with the cancellation
1069+
// process, which isn't something we want to need to synchronize on.
1070+
//
1071+
// We don't process the requests at all, we simply drain them. In practice,
1072+
// we expect clients to implement cancellation in conjection with causing
1073+
// long-running tasks to also cancel and fail, so preserving those results
1074+
// is not valuable.
1075+
while (numOutstandingUnfinishedTasks != 0) {
1076+
std::unique_lock<std::mutex> lock(finishedTaskInfosMutex);
1077+
if (finishedTaskInfos.empty()) {
1078+
finishedTaskInfosCondition.wait(lock);
1079+
} else {
1080+
assert(finishedTaskInfos.size() <= numOutstandingUnfinishedTasks);
1081+
numOutstandingUnfinishedTasks -= finishedTaskInfos.size();
1082+
finishedTaskInfos.clear();
1083+
}
1084+
}
1085+
10641086
for (auto& it: taskInfos) {
1065-
// Complete the task, even though it did not update the value.
1087+
// Cancel the task, marking it incomplete.
1088+
//
1089+
// This will force it to rerun in a later build, but since it was already
1090+
// running in this build that was almost certainly going to be
1091+
// required. Technically, there are rare situations where it wouldn't have
1092+
// to rerun (e.g., if resultIsValid becomes true after being false in this
1093+
// run), and if we were willing to restore the tasks state--either by
1094+
// keeping the old one or by restoring from the database--we could ensure
1095+
// that doesn't happen.
10661096
//
1067-
// FIXME: What should we do here with the value?
1097+
// NOTE: Actually, we currently don't sync this write to the database, so
1098+
// in some cases we do actually preserve this information (if the client
1099+
// ends up cancelling, then reloading froom the database).
10681100
TaskInfo* taskInfo = &it.second;
10691101
RuleInfo* ruleInfo = taskInfo->forRuleInfo;
10701102
assert(taskInfo == ruleInfo->getPendingTaskInfo());
@@ -1176,7 +1208,7 @@ class BuildEngineImpl {
11761208
db->lookupRuleResult(ruleInfo.keyID, ruleInfo.rule, &ruleInfo.result, &error);
11771209
if (!error.empty()) {
11781210
delegate.error(error);
1179-
completeRemainingTasks();
1211+
cancelRemainingTasks();
11801212
}
11811213
}
11821214

@@ -1350,7 +1382,7 @@ class BuildEngineImpl {
13501382
// Validate the InputID.
13511383
if (inputID > BuildEngine::kMaximumInputID) {
13521384
delegate.error("error: attempt to use reserved input ID");
1353-
completeRemainingTasks();
1385+
cancelRemainingTasks();
13541386
return;
13551387
}
13561388

@@ -1368,7 +1400,7 @@ class BuildEngineImpl {
13681400

13691401
if (!taskInfo->forRuleInfo->isInProgressComputing()) {
13701402
delegate.error("error: invalid state for adding discovered dependency");
1371-
completeRemainingTasks();
1403+
cancelRemainingTasks();
13721404
return;
13731405
}
13741406

@@ -1385,7 +1417,7 @@ class BuildEngineImpl {
13851417

13861418
if (!taskInfo->forRuleInfo->isInProgressComputing()) {
13871419
delegate.error("error: invalid state for marking task complete");
1388-
completeRemainingTasks();
1420+
cancelRemainingTasks();
13891421
return;
13901422
}
13911423

0 commit comments

Comments
 (0)