Skip to content

Commit fa830fe

Browse files
committed
[BatchMode] Preserve invariant that shuffled batches are subsequences of inputs.
1 parent 03a9340 commit fa830fe

File tree

3 files changed

+95
-43
lines changed

3 files changed

+95
-43
lines changed

lib/Driver/Compilation.cpp

Lines changed: 44 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "swift/AST/DiagnosticEngine.h"
1616
#include "swift/AST/DiagnosticsDriver.h"
1717
#include "swift/Basic/Program.h"
18+
#include "swift/Basic/STLExtras.h"
1819
#include "swift/Basic/Statistic.h"
1920
#include "swift/Basic/TaskQueue.h"
2021
#include "swift/Basic/Version.h"
@@ -724,31 +725,31 @@ namespace driver {
724725
Batches.push_back(Comp.addJob(std::move(J)));
725726
}
726727

727-
/// Inspect current batch \p i of the \p Partition currently being built
728-
/// and, if that batch is "full" (in the sense of holding an evenly-divided
729-
/// portion of NumJobs) then advance \p i to the next batch index in the
730-
/// partition.
731-
void maybeAdvanceToNextPartition(size_t &i,
732-
BatchPartition const &Partition,
733-
size_t NumJobs) {
734-
assert(i < Partition.size());
735-
size_t Remainder = NumJobs % Partition.size();
736-
size_t TargetSize = NumJobs / Partition.size();
737-
// Spread remainder evenly across partitions by adding 1 to the target
738-
// size of the first Remainder of them.
739-
if (i < Remainder)
740-
TargetSize++;
741-
if (Partition[i].size() >= TargetSize)
742-
++i;
743-
assert(i < Partition.size());
744-
}
745-
746-
/// Shuffle \p Batchable if -driver-batch-seed is nonzero.
747-
void maybeShuffleBatchable(std::vector<const Job *> &Batchable) {
728+
/// Build a vector of partition indices, one per Job: the i'th index says
729+
/// which batch of the partition the i'th Job will be assigned to. If we are
730+
/// shuffling due to -driver-batch-seed, the returned indices will not be
731+
/// arranged in contiguous runs. We shuffle partition-indices here, not
732+
/// elements themselves, to preserve the invariant that each batch is a
733+
/// subsequence of the full set of inputs, not just a subset.
734+
std::vector<size_t>
735+
assignJobsToPartitions(size_t PartitionSize,
736+
size_t NumJobs) {
737+
size_t Remainder = NumJobs % PartitionSize;
738+
size_t TargetSize = NumJobs / PartitionSize;
739+
std::vector<size_t> PartitionIndex;
740+
PartitionIndex.reserve(NumJobs);
741+
for (size_t P = 0; P < PartitionSize; ++P) {
742+
// Spread remainder evenly across partitions by adding 1 to the target
743+
// size of the first Remainder of them.
744+
size_t FillCount = TargetSize + ((P < Remainder) ? 1 : 0);
745+
std::fill_n(std::back_inserter(PartitionIndex), FillCount, P);
746+
}
748747
if (Comp.BatchSeed != 0) {
749748
std::minstd_rand gen(Comp.BatchSeed);
750-
std::shuffle(Batchable.begin(), Batchable.end(), gen);
749+
std::shuffle(PartitionIndex.begin(), PartitionIndex.end(), gen);
751750
}
751+
assert(PartitionIndex.size() == NumJobs);
752+
return PartitionIndex;
752753
}
753754

754755
/// Create \c NumberOfParallelCommands batches and assign each job to a
@@ -762,28 +763,28 @@ namespace driver {
762763
}
763764

764765
assert(!Partition.empty());
765-
maybeShuffleBatchable(Batchable);
766-
767-
size_t i = 0;
766+
auto PartitionIndex = assignJobsToPartitions(Partition.size(),
767+
Batchable.size());
768+
assert(PartitionIndex.size() == Batchable.size());
768769
auto const &TC = Comp.getToolChain();
769-
for (const Job *Cmd : Batchable) {
770-
maybeAdvanceToNextPartition(i, Partition, Batchable.size());
771-
std::vector<const Job*> &P = Partition[i];
772-
if (P.empty() || TC.jobsAreBatchCombinable(Comp, P[0], Cmd)) {
773-
if (Comp.ShowJobLifecycle)
774-
llvm::outs() << "Adding " << LogJob(Cmd)
775-
<< " to batch " << i << '\n';
776-
P.push_back(Cmd);
777-
} else {
778-
// Strange but theoretically possible that we have a batchable job
779-
// that's not combinable with others; tack a new batch on for it.
780-
if (Comp.ShowJobLifecycle)
781-
llvm::outs() << "Adding " << LogJob(Cmd)
782-
<< " to new batch " << Partition.size() << '\n';
783-
Partition.push_back(std::vector<const Job*>());
784-
Partition.back().push_back(Cmd);
785-
}
786-
}
770+
for_each(Batchable, PartitionIndex, [&](const Job *Cmd, size_t Idx) {
771+
assert(Idx < Partition.size());
772+
std::vector<const Job*> &P = Partition[Idx];
773+
if (P.empty() || TC.jobsAreBatchCombinable(Comp, P[0], Cmd)) {
774+
if (Comp.ShowJobLifecycle)
775+
llvm::outs() << "Adding " << LogJob(Cmd)
776+
<< " to batch " << Idx << '\n';
777+
P.push_back(Cmd);
778+
} else {
779+
// Strange but theoretically possible that we have a batchable job
780+
// that's not combinable with others; tack a new batch on for it.
781+
if (Comp.ShowJobLifecycle)
782+
llvm::outs() << "Adding " << LogJob(Cmd)
783+
<< " to new batch " << Partition.size() << '\n';
784+
Partition.push_back(std::vector<const Job*>());
785+
Partition.back().push_back(Cmd);
786+
}
787+
});
787788
}
788789

789790
// FIXME: at the moment we're not passing OutputFileMaps to frontends, so

lib/Driver/ToolChain.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,35 @@ mergeBatchInputs(ArrayRef<const Job *> jobs,
280280
return false;
281281
}
282282

283+
/// Debugging only: return whether the set of \p jobs is an ordered subsequence
284+
/// of the sequence of top-level input files in the \c Compilation \p C.
285+
static bool
286+
jobsAreSubsequenceOfCompilationInputs(ArrayRef<const Job *> jobs,
287+
Compilation &C) {
288+
llvm::SmallVector<const Job *, 16> sortedJobs;
289+
llvm::StringMap<const Job *> jobsByInput;
290+
for (const Job *J : jobs) {
291+
const CompileJobAction *CJA = cast<CompileJobAction>(&J->getSource());
292+
const InputAction* IA = findSingleSwiftInput(CJA);
293+
auto R = jobsByInput.insert(std::make_pair(IA->getInputArg().getValue(),
294+
J));
295+
assert(R.second);
296+
}
297+
for (const InputPair &P : C.getInputFiles()) {
298+
auto I = jobsByInput.find(P.second->getValue());
299+
if (I != jobsByInput.end()) {
300+
sortedJobs.push_back(I->second);
301+
}
302+
}
303+
if (sortedJobs.size() != jobs.size())
304+
return false;
305+
for (size_t i = 0; i < sortedJobs.size(); ++i) {
306+
if (sortedJobs[i] != jobs[i])
307+
return false;
308+
}
309+
return true;
310+
}
311+
283312
/// Construct a \c BatchJob by merging the constituent \p jobs' CommandOutput,
284313
/// input \c Job and \c Action members. Call through to \c constructInvocation
285314
/// on \p BatchJob, to build the \c InvocationInfo.
@@ -290,6 +319,8 @@ ToolChain::constructBatchJob(ArrayRef<const Job *> jobs,
290319
if (jobs.empty())
291320
return nullptr;
292321

322+
assert(jobsAreSubsequenceOfCompilationInputs(jobs, C));
323+
293324
// Synthetic OutputInfo is a slightly-modified version of the initial
294325
// compilation's OI.
295326
auto OI = C.getOutputInfo();
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// When multiple additional-outputs on the same command-line are no longer
2+
// supported (i.e. when we've moved to mandatory use of output file maps for
3+
// communicating multiple additional-outputs to frontends) this test will no
4+
// longer make sense, and should be removed.
5+
//
6+
// RUN: %empty-directory(%t)
7+
// RUN: touch %t/file-01.swift %t/file-02.swift %t/file-03.swift %t/file-04.swift %t/file-05.swift
8+
// RUN: touch %t/file-06.swift %t/file-07.swift %t/file-08.swift %t/file-09.swift
9+
//
10+
// RUN: %swiftc_driver -enable-batch-mode -driver-batch-seed 1 -driver-print-jobs -driver-skip-execution -j 3 -emit-module -module-name foo %t/file-01.swift %t/file-02.swift %t/file-03.swift %t/file-04.swift %t/file-05.swift %t/file-06.swift %t/file-07.swift %t/file-08.swift %t/file-09.swift >%t/out.txt
11+
// RUN: %FileCheck %s <%t/out.txt
12+
//
13+
// Each batch should get 3 primaries; check that each has 3 modules _in the same numeric order_.
14+
//
15+
// CHECK: {{.*}}/swift {{.*}}-primary-file {{[^ ]*}}/file-[[A1:[0-9]+]].swift {{.*}}-primary-file {{[^ ]*}}/file-[[A2:[0-9]+]].swift {{.*}}-primary-file {{[^ ]*}}/file-[[A3:[0-9]+]].swift
16+
// CHECK-SAME: -o {{.*}}/file-[[A1]]-{{[a-z0-9]+}}.swiftmodule -o {{.*}}/file-[[A2]]-{{[a-z0-9]+}}.swiftmodule -o {{.*}}/file-[[A3]]-{{[a-z0-9]+}}.swiftmodule
17+
// CHECK: {{.*}}/swift {{.*}}-primary-file {{[^ ]*}}/file-[[B1:[0-9]+]].swift {{.*}}-primary-file {{[^ ]*}}/file-[[B2:[0-9]+]].swift {{.*}}-primary-file {{[^ ]*}}/file-[[B3:[0-9]+]].swift
18+
// CHECK-SAME: -o {{.*}}/file-[[B1]]-{{[a-z0-9]+}}.swiftmodule -o {{.*}}/file-[[B2]]-{{[a-z0-9]+}}.swiftmodule -o {{.*}}/file-[[B3]]-{{[a-z0-9]+}}.swiftmodule
19+
// CHECK: {{.*}}/swift {{.*}}-primary-file {{[^ ]*}}/file-[[C1:[0-9]+]].swift {{.*}}-primary-file {{[^ ]*}}/file-[[C2:[0-9]+]].swift {{.*}}-primary-file {{[^ ]*}}/file-[[C3:[0-9]+]].swift
20+
// CHECK-SAME: -o {{.*}}/file-[[C1]]-{{[a-z0-9]+}}.swiftmodule -o {{.*}}/file-[[C2]]-{{[a-z0-9]+}}.swiftmodule -o {{.*}}/file-[[C3]]-{{[a-z0-9]+}}.swiftmodule

0 commit comments

Comments
 (0)