|
52 | 52 | #include "llvm/Transforms/ObjCARC.h"
|
53 | 53 | #include "llvm/Transforms/Utils/FunctionImportUtils.h"
|
54 | 54 |
|
| 55 | +#include <numeric> |
| 56 | + |
55 | 57 | using namespace llvm;
|
56 | 58 |
|
57 | 59 | #define DEBUG_TYPE "thinlto"
|
@@ -881,11 +883,24 @@ void ThinLTOCodeGenerator::run() {
|
881 | 883 | for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries)
|
882 | 884 | ExportLists[DefinedGVSummaries.first()];
|
883 | 885 |
|
| 886 | + // Compute the ordering we will process the inputs: the rough heuristic here |
| 887 | + // is to sort them per size so that the largest module get schedule as soon as |
| 888 | + // possible. This is purely a compile-time optimization. |
| 889 | + std::vector<int> ModulesOrdering; |
| 890 | + ModulesOrdering.resize(Modules.size()); |
| 891 | + std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); |
| 892 | + std::sort(ModulesOrdering.begin(), ModulesOrdering.end(), |
| 893 | + [&](int LeftIndex, int RightIndex) { |
| 894 | + auto LSize = Modules[LeftIndex].getBufferSize(); |
| 895 | + auto RSize = Modules[RightIndex].getBufferSize(); |
| 896 | + return LSize > RSize; |
| 897 | + }); |
| 898 | + |
884 | 899 | // Parallel optimizer + codegen
|
885 | 900 | {
|
886 | 901 | ThreadPool Pool(getNumCores());
|
887 |
| - int count = 0; |
888 |
| - for (auto &ModuleBuffer : Modules) { |
| 902 | + for (auto IndexCount : ModulesOrdering) { |
| 903 | + auto &ModuleBuffer = Modules[IndexCount]; |
889 | 904 | Pool.async([&](int count) {
|
890 | 905 | auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
|
891 | 906 | auto &ExportList = ExportLists[ModuleIdentifier];
|
@@ -937,8 +952,7 @@ void ThinLTOCodeGenerator::run() {
|
937 | 952 |
|
938 | 953 | OutputBuffer = CacheEntry.write(std::move(OutputBuffer));
|
939 | 954 | ProducedBinaries[count] = std::move(OutputBuffer);
|
940 |
| - }, count); |
941 |
| - count++; |
| 955 | + }, IndexCount); |
942 | 956 | }
|
943 | 957 | }
|
944 | 958 |
|
|
0 commit comments