Skip to content

Commit 2e2d161

Browse files
authored
Merge pull request #14732 from graydon/batch-mode-driver-work
2 parents 6095dc9 + 263c4c5 commit 2e2d161

File tree

7 files changed

+183
-59
lines changed

7 files changed

+183
-59
lines changed

include/swift/Driver/Compilation.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ class Compilation {
161161
/// redundant work.
162162
bool EnableBatchMode;
163163

164+
/// Provides a randomization seed to batch-mode partitioning, for debugging.
165+
unsigned BatchSeed;
166+
164167
/// True if temporary files should not be deleted.
165168
bool SaveTemps;
166169

@@ -203,6 +206,7 @@ class Compilation {
203206
unsigned NumberOfParallelCommands = 1,
204207
bool EnableIncrementalBuild = false,
205208
bool EnableBatchMode = false,
209+
unsigned BatchSeed = 0,
206210
bool SkipTaskExecution = false,
207211
bool SaveTemps = false,
208212
bool ShowDriverTimeCompilation = false,

include/swift/Driver/Driver.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ class Driver {
156156
/// Indicates whether the driver should check that the input files exist.
157157
bool CheckInputFilesExist = true;
158158

159+
/// Provides a randomization seed to batch-mode partitioning, for debugging.
160+
unsigned DriverBatchSeed = 0;
161+
159162
public:
160163
Driver(StringRef DriverExecutable, StringRef Name,
161164
ArrayRef<const char *> Args, DiagnosticEngine &Diags);

include/swift/Option/Options.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ def driver_show_job_lifecycle : Flag<["-"], "driver-show-job-lifecycle">,
9898
HelpText<"Show every step in the lifecycle of driver jobs">;
9999
def driver_use_filelists : Flag<["-"], "driver-use-filelists">,
100100
InternalDebugOpt, HelpText<"Pass input files as filelists whenever possible">;
101+
def driver_batch_seed : Separate<["-"], "driver-batch-seed">,
102+
InternalDebugOpt,
103+
HelpText<"Use the given seed value to randomize batch-mode partitions">;
101104

102105
def driver_always_rebuild_dependents :
103106
Flag<["-"], "driver-always-rebuild-dependents">, InternalDebugOpt,

lib/Driver/Compilation.cpp

Lines changed: 87 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@
4242

4343
#include "CompilationRecord.h"
4444

45+
// Batch-mode has a sub-mode for testing that randomizes batch partitions,
46+
// by user-provided seed. That is the only thing randomized here.
47+
#include <random>
48+
4549
using namespace swift;
4650
using namespace swift::sys;
4751
using namespace swift::driver;
@@ -97,6 +101,7 @@ Compilation::Compilation(DiagnosticEngine &Diags,
97101
unsigned NumberOfParallelCommands,
98102
bool EnableIncrementalBuild,
99103
bool EnableBatchMode,
104+
unsigned BatchSeed,
100105
bool SkipTaskExecution,
101106
bool SaveTemps,
102107
bool ShowDriverTimeCompilation,
@@ -112,6 +117,7 @@ Compilation::Compilation(DiagnosticEngine &Diags,
112117
SkipTaskExecution(SkipTaskExecution),
113118
EnableIncrementalBuild(EnableIncrementalBuild),
114119
EnableBatchMode(EnableBatchMode),
120+
BatchSeed(BatchSeed),
115121
SaveTemps(SaveTemps),
116122
ShowDriverTimeCompilation(ShowDriverTimeCompilation),
117123
Stats(std::move(StatsReporter)) {
@@ -120,7 +126,8 @@ Compilation::Compilation(DiagnosticEngine &Diags,
120126
static bool writeFilelistIfNecessary(const Job *job, DiagnosticEngine &diags);
121127

122128
using CommandSet = llvm::SmallPtrSet<const Job *, 16>;
123-
129+
using CommandSetVector = llvm::SetVector<const Job*>;
130+
using BatchPartition = std::vector<std::vector<const Job*>>;
124131

125132
using InputInfoMap = llvm::SmallMapVector<const llvm::opt::Arg *,
126133
CompileJobAction::InputInfo, 16>;
@@ -140,7 +147,7 @@ namespace driver {
140147
/// A temporary buffer to hold commands that were scheduled but haven't been
141148
/// added to the Task Queue yet, because we might try batching them together
142149
/// first.
143-
CommandSet PendingExecution;
150+
CommandSetVector PendingExecution;
144151

145152
/// Set of synthetic BatchJobs that serve to cluster subsets of jobs waiting
146153
/// in PendingExecution. Also used to identify (then unpack) BatchJobs back
@@ -680,7 +687,8 @@ namespace driver {
680687

681688
/// Insert all jobs in \p Cmds (of descriptive name \p Kind) to the \c
682689
/// TaskQueue, and clear \p Cmds.
683-
void transferJobsToTaskQueue(CommandSet &Cmds, StringRef Kind) {
690+
template <typename Container>
691+
void transferJobsToTaskQueue(Container &Cmds, StringRef Kind) {
684692
for (const Job *Cmd : Cmds) {
685693
if (Comp.ShowJobLifecycle)
686694
llvm::outs() << "Adding " << Kind
@@ -694,8 +702,8 @@ namespace driver {
694702
/// Partition the jobs in \c PendingExecution into those that are \p
695703
/// Batchable and those that are \p NonBatchable, clearing \p
696704
/// PendingExecution.
697-
void getPendingBatchableJobs(CommandSet &Batchable,
698-
CommandSet &NonBatchable) {
705+
void getPendingBatchableJobs(CommandSetVector &Batchable,
706+
CommandSetVector &NonBatchable) {
699707
for (const Job *Cmd : PendingExecution) {
700708
if (Comp.getToolChain().jobIsBatchable(Comp, Cmd)) {
701709
if (Comp.ShowJobLifecycle)
@@ -710,49 +718,83 @@ namespace driver {
710718
PendingExecution.clear();
711719
}
712720

713-
/// If \p CurrentBatch is nonempty, construct a new \c BatchJob from its
721+
/// If \p Batch is nonempty, construct a new \c BatchJob from its
714722
/// contents by calling \p ToolChain::constructBatchJob, then insert the
715-
/// new \c BatchJob into \p Batches and clear \p CurrentBatch.
723+
/// new \c BatchJob into \p Batches.
716724
void
717-
formBatchJobFromCurrentBatch(CommandSet &Batches,
718-
llvm::SetVector<const Job *> &CurrentBatch) {
719-
if (CurrentBatch.empty())
725+
formBatchJobFromPartitionBatch(std::vector<const Job *> &Batches,
726+
std::vector<const Job *> const &Batch) {
727+
if (Batch.empty())
720728
return;
721729
if (Comp.ShowJobLifecycle)
722730
llvm::outs() << "Forming batch job from "
723-
<< CurrentBatch.size() << " constituents\n";
731+
<< Batch.size() << " constituents\n";
724732
auto const &TC = Comp.getToolChain();
725-
auto J = TC.constructBatchJob(CurrentBatch.getArrayRef(), Comp);
733+
auto J = TC.constructBatchJob(Batch, Comp);
726734
if (J)
727-
Batches.insert(Comp.addJob(std::move(J)));
728-
CurrentBatch.clear();
735+
Batches.push_back(Comp.addJob(std::move(J)));
729736
}
730737

731-
/// Return true iff \p Cmd can be expanded by \p CurrentBatch, meaning
732-
/// that \p CurrentBatch is smaller than \p TargetBatchSize and \p Cmd
733-
/// is batch-combinable with the equivalence class of \p CurrentBatch
734-
/// (as represented by element 0 of \p CurrentBatch).
735-
bool canExpandBatch(const Job *Cmd,
736-
llvm::SetVector<const Job *> &CurrentBatch,
737-
size_t TargetBatchSize) {
738-
auto const &TC = Comp.getToolChain();
739-
return (CurrentBatch.empty() ||
740-
(TC.jobsAreBatchCombinable(Comp, Cmd, CurrentBatch[0]) &&
741-
CurrentBatch.size() < TargetBatchSize));
738+
/// Inspect current batch \p i of the \p Partition currently being built
739+
/// and, if that batch is "full" (in the sense of holding an evenly-divided
740+
/// portion of NumJobs) then advance \p i to the next batch index in the
741+
/// partition.
742+
void maybeAdvanceToNextPartition(size_t &i,
743+
BatchPartition const &Partition,
744+
size_t NumJobs) {
745+
assert(i < Partition.size());
746+
size_t Remainder = NumJobs % Partition.size();
747+
size_t TargetSize = NumJobs / Partition.size();
748+
// Spread remainder evenly across partitions by adding 1 to the target
749+
// size of the first Remainder of them.
750+
if (i < Remainder)
751+
TargetSize++;
752+
if (Partition[i].size() >= TargetSize)
753+
++i;
754+
assert(i < Partition.size());
742755
}
743756

744-
/// If \p CurrentBatch can't be expanded with \p Cmd, form a new \c BatchJob
745-
/// from \p CurrentBatch, add it to \p Batches, and reset\p CurrentBatch;
746-
/// then in either case, insert \p Cmd into \p CurrentBatch.
747-
void expandBatch(const Job *Cmd,
748-
CommandSet &Batches,
749-
llvm::SetVector<const Job *> &CurrentBatch,
750-
size_t TargetBatchSize) {
751-
if (!canExpandBatch(Cmd, CurrentBatch, TargetBatchSize)) {
752-
formBatchJobFromCurrentBatch(Batches, CurrentBatch);
757+
/// Shuffle \p Batchable if -driver-batch-seed is nonzero.
758+
void maybeShuffleBatchable(std::vector<const Job *> &Batchable) {
759+
if (Comp.BatchSeed != 0) {
760+
std::minstd_rand gen(Comp.BatchSeed);
761+
std::shuffle(Batchable.begin(), Batchable.end(), gen);
762+
}
763+
}
764+
765+
/// Create \c NumberOfParallelCommands batches and assign each job to a
766+
/// batch either filling each partition in order or, if seeded with a
767+
/// nonzero value, pseudo-randomly (but determinstically and nearly-evenly).
768+
void partitionIntoBatches(std::vector<const Job *> Batchable,
769+
BatchPartition &Partition) {
770+
if (Comp.ShowJobLifecycle) {
771+
llvm::outs() << "Found " << Batchable.size() << " batchable jobs\n";
772+
llvm::outs() << "Forming into " << Partition.size() << " batches\n";
773+
}
774+
775+
assert(Partition.size() > 0);
776+
maybeShuffleBatchable(Batchable);
777+
778+
size_t i = 0;
779+
auto const &TC = Comp.getToolChain();
780+
for (const Job *Cmd : Batchable) {
781+
maybeAdvanceToNextPartition(i, Partition, Batchable.size());
782+
std::vector<const Job*> &P = Partition[i];
783+
if (P.empty() || TC.jobsAreBatchCombinable(Comp, P[0], Cmd)) {
784+
if (Comp.ShowJobLifecycle)
785+
llvm::outs() << "Adding " << LogJob(Cmd)
786+
<< " to batch " << i << '\n';
787+
P.push_back(Cmd);
788+
} else {
789+
// Strange but theoretically possible that we have a batchable job
790+
// that's not combinable with others; tack a new batch on for it.
791+
if (Comp.ShowJobLifecycle)
792+
llvm::outs() << "Adding " << LogJob(Cmd)
793+
<< " to new batch " << Partition.size() << '\n';
794+
Partition.push_back(std::vector<const Job*>());
795+
Partition.back().push_back(Cmd);
796+
}
753797
}
754-
llvm::outs() << "Adding to batch: " << LogJob(Cmd) << "\n";
755-
CurrentBatch.insert(Cmd);
756798
}
757799

758800
/// Select jobs that are batch-combinable from \c PendingExecution, combine
@@ -768,25 +810,18 @@ namespace driver {
768810
return;
769811
}
770812

771-
// Partition the pending jobs.
772-
CommandSet Batchable, NonBatchable, Batches;
813+
// Split the batchable from non-batchable pending jobs.
814+
CommandSetVector Batchable, NonBatchable;
773815
getPendingBatchableJobs(Batchable, NonBatchable);
774-
size_t TargetBatchSize = Batchable.size() / Comp.NumberOfParallelCommands;
775816

776-
if (Comp.ShowJobLifecycle) {
777-
llvm::outs() << "Found " << Batchable.size() << " batchable jobs\n";
778-
llvm::outs() << "Aiming for batch size " << TargetBatchSize << '\n';
779-
}
780-
781-
// Batch the batchable jobs.
782-
llvm::SetVector<const Job *> CurrentBatch;
783-
for (const Job *Cmd : Batchable) {
784-
expandBatch(Cmd, Batches, CurrentBatch, TargetBatchSize);
785-
}
817+
// Partition the batchable jobs into sets.
818+
BatchPartition Partition(Comp.NumberOfParallelCommands);
819+
partitionIntoBatches(Batchable.takeVector(), Partition);
786820

787-
// Form a residual incomplete batch if any jobs remain.
788-
if (!CurrentBatch.empty()) {
789-
formBatchJobFromCurrentBatch(Batches, CurrentBatch);
821+
// Construct a BatchJob from each batch in the partition.
822+
std::vector<const Job *> Batches;
823+
for (auto const &Batch : Partition) {
824+
formBatchJobFromPartitionBatch(Batches, Batch);
790825
}
791826

792827
// Save batches so we can locate and decompose them on task-exit.

lib/Driver/Driver.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,12 @@ Driver::buildCompilation(const ToolChain &TC,
522522
ArgList->hasArg(options::OPT_driver_show_incremental);
523523
bool ShowJobLifecycle =
524524
ArgList->hasArg(options::OPT_driver_show_job_lifecycle);
525+
if (const Arg *A = ArgList->getLastArg(options::OPT_driver_batch_seed)) {
526+
if (StringRef(A->getValue()).getAsInteger(10, DriverBatchSeed)) {
527+
Diags.diagnose(SourceLoc(), diag::error_invalid_arg_value,
528+
A->getAsString(*ArgList), A->getValue());
529+
}
530+
}
525531

526532
bool Incremental = ArgList->hasArg(options::OPT_incremental);
527533
if (ArgList->hasArg(options::OPT_whole_module_optimization)) {
@@ -684,6 +690,7 @@ Driver::buildCompilation(const ToolChain &TC,
684690
NumberOfParallelCommands,
685691
Incremental,
686692
BatchMode,
693+
DriverBatchSeed,
687694
DriverSkipExecution,
688695
SaveTemps,
689696
ShowDriverTimeCompilation,

lib/Driver/Job.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -244,36 +244,44 @@ void Job::printSummary(raw_ostream &os) const {
244244
// from a JobAction that itself has InputActions sources, then we collect
245245
// those up. Otherwise it's more correct to talk about our inputs as the
246246
// outputs of our input-jobs.
247-
SmallVector<std::string, 4> Inputs;
247+
SmallVector<StringRef, 4> Inputs;
248+
SmallVector<StringRef, 4> Outputs = getOutput().getPrimaryOutputFilenames();
248249

249250
for (const Action *A : getSource().getInputs())
250251
if (const auto *IA = dyn_cast<InputAction>(A))
251252
Inputs.push_back(IA->getInputArg().getValue());
252253

253254
for (const Job *J : getInputs())
254-
for (const std::string &f : J->getOutput().getPrimaryOutputFilenames())
255+
for (StringRef f : J->getOutput().getPrimaryOutputFilenames())
255256
Inputs.push_back(f);
256257

257258
size_t limit = 3;
258-
size_t actual = Inputs.size();
259-
if (actual > limit) {
259+
size_t actual_in = Inputs.size();
260+
size_t actual_out = Outputs.size();
261+
if (actual_in > limit) {
260262
Inputs.erase(Inputs.begin() + limit, Inputs.end());
261263
}
264+
if (actual_out > limit) {
265+
Outputs.erase(Outputs.begin() + limit, Outputs.end());
266+
}
262267

263268
os << "{" << getSource().getClassName() << ": ";
264-
interleave(getOutput().getPrimaryOutputFilenames(),
269+
interleave(Outputs,
265270
[&](const std::string &Arg) {
266271
os << llvm::sys::path::filename(Arg);
267272
},
268273
[&] { os << ' '; });
274+
if (actual_out > limit) {
275+
os << " ... " << (actual_out-limit) << " more";
276+
}
269277
os << " <= ";
270278
interleave(Inputs,
271279
[&](const std::string &Arg) {
272280
os << llvm::sys::path::filename(Arg);
273281
},
274282
[&] { os << ' '; });
275-
if (actual > limit) {
276-
os << " ... " << (actual-limit) << " more";
283+
if (actual_in > limit) {
284+
os << " ... " << (actual_in-limit) << " more";
277285
}
278286
os << "}";
279287
}

0 commit comments

Comments
 (0)