-
Notifications
You must be signed in to change notification settings - Fork 787
[SYCL] Enable parallel execution of llvm-foreach commands under an option #4360
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
782277c
535fb60
82b1779
516779b
f4051c0
8e709ba
8a9f664
1690ad7
2edab5f
6f9748e
6cd8cb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,9 @@ | |
#include "llvm/Support/Path.h" | ||
#include "llvm/Support/Program.h" | ||
#include "llvm/Support/SystemUtils.h" | ||
#include "llvm/Support/Threading.h" | ||
|
||
#include <list> | ||
#include <vector> | ||
|
||
using namespace llvm; | ||
|
@@ -69,6 +71,17 @@ static cl::opt<std::string> OutIncrement{ | |
"pass."), | ||
cl::init(""), cl::value_desc("R")}; | ||
|
||
static cl::opt<unsigned int> JobsInParallel{ | ||
"jobs", | ||
cl::Optional, | ||
cl::init(1), | ||
cl::desc("Specify the number of threads for launching input commands in " | ||
"parallel mode"), | ||
}; | ||
|
||
static cl::alias JobsInParallelShort{"j", cl::desc("Alias for --jobs"), | ||
cl::aliasopt(JobsInParallel)}; | ||
|
||
static void error(const Twine &Msg) { | ||
errs() << "llvm-foreach: " << Msg << '\n'; | ||
exit(1); | ||
|
@@ -79,6 +92,32 @@ static void error(std::error_code EC, const Twine &Prefix) { | |
error(Prefix + ": " + EC.message()); | ||
} | ||
|
||
// With BlockingWait=false this function just goes through the all | ||
// submitted jobs to check if some of them have finished. | ||
int checkIfJobsAreFinished(std::list<sys::ProcessInfo> &JobsSubmitted, | ||
bool BlockingWait = true) { | ||
std::string ErrMsg; | ||
auto It = JobsSubmitted.begin(); | ||
vmaksimo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
while (It != JobsSubmitted.end()) { | ||
sys::ProcessInfo WaitResult = | ||
sys::Wait(*It, 0, /*WaitUntilTerminates*/ BlockingWait, &ErrMsg); | ||
|
||
// Check if the job has finished (PID will be 0 if it's not). | ||
if (!BlockingWait && !WaitResult.Pid) { | ||
It++; | ||
continue; | ||
} | ||
vmaksimo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assert(BlockingWait || WaitResult.Pid); | ||
It = JobsSubmitted.erase(It); | ||
|
||
if (WaitResult.ReturnCode != 0) { | ||
errs() << "llvm-foreach: " << ErrMsg << '\n'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally, we should have launched command in the error message so it could be launched standalone to debug it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea! We should maintain a map with the process id and the executed command (didn't find a way to get this info just from the process id) to support this with the parallel execution. I'll do it in a separate PR after this one is merged. |
||
return WaitResult.ReturnCode; | ||
} | ||
} | ||
return 0; | ||
} | ||
|
||
int main(int argc, char **argv) { | ||
cl::ParseCommandLineOptions( | ||
argc, argv, | ||
|
@@ -160,6 +199,16 @@ int main(int argc, char **argv) { | |
PrevNumOfLines = FileLists[i].size(); | ||
} | ||
|
||
if (!JobsInParallel) | ||
error("Number of parallel threads should be a positive integer"); | ||
|
||
size_t MaxSafeNumThreads = optimal_concurrency().compute_thread_count(); | ||
if (JobsInParallel > MaxSafeNumThreads) { | ||
JobsInParallel = MaxSafeNumThreads; | ||
outs() << "llvm-foreach: adjusted number of threads to " | ||
<< MaxSafeNumThreads << " (max safe available).\n"; | ||
} | ||
|
||
std::error_code EC; | ||
raw_fd_ostream OS{OutputFileList, EC, sys::fs::OpenFlags::OF_None}; | ||
if (!OutputFileList.empty()) | ||
|
@@ -170,6 +219,7 @@ int main(int argc, char **argv) { | |
std::string IncOutArg; | ||
std::vector<std::string> ResInArgs(InReplaceArgs.size()); | ||
std::string ResFileList = ""; | ||
std::list<sys::ProcessInfo> JobsSubmitted; | ||
for (size_t j = 0; j != FileLists[0].size(); ++j) { | ||
for (size_t i = 0; i < InReplaceArgs.size(); ++i) { | ||
ArgumentReplace CurReplace = InReplaceArgs[i]; | ||
|
@@ -221,17 +271,23 @@ int main(int argc, char **argv) { | |
Args[OutIncrementArg.ArgNum] = IncOutArg; | ||
} | ||
|
||
std::string ErrMsg; | ||
// TODO: Add possibility to execute commands in parallel. | ||
int Result = | ||
sys::ExecuteAndWait(Prog, Args, /*Env=*/None, /*Redirects=*/None, | ||
/*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); | ||
if (Result != 0) { | ||
errs() << "llvm-foreach: " << ErrMsg << '\n'; | ||
Res = Result; | ||
} | ||
// Do not start execution of a new job until previous one(s) are finished, | ||
// if the maximum number of parallel workers is reached. | ||
while (JobsSubmitted.size() == JobsInParallel) | ||
if (int Result = | ||
checkIfJobsAreFinished(JobsSubmitted, /*BlockingWait*/ false)) | ||
Res = Result; | ||
|
||
JobsSubmitted.emplace_back(sys::ExecuteNoWait( | ||
Prog, Args, /*Env=*/None, /*Redirects=*/None, /*MemoryLimit=*/0)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, it's a good thought, but it was done on purpose in c69a311:
I believe I have a bug here (should not assign the result to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
checkIfJobsAreFinished may return non-zero for a variety of reasons, not only due to a timeout. In fact timeout is not even possible, AFAICS, as SecondsToWait for sys::Wait is 0. So I believe the most likely reason for checkIfJobsAreFinished non-zero return is some error during tool execution, in which case the results will be discarded, and it does not seem to make sense to run any other tools after that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest that we discuss that matter separately and apply some changes (if needed) through a separate PR: apparently there was a decision to not to stop submitting further jobs if one of them failed when we executed them in serial manner. It is not clear to me why we shouldn't follow the same logic for parallel execution. Submitting further jobs after the first failed also allows us to detect all issues in all submitted jobs and not just in the first one. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
that does not seem right to me, and can be resource-costly approach. All jobs in a llvm-foreach batch are similar and in most cases they will fail for the same reason, I believe. |
||
} | ||
|
||
// Wait for all commands to be executed. | ||
while (!JobsSubmitted.empty()) | ||
if (int Result = | ||
checkIfJobsAreFinished(JobsSubmitted, /*BlockingWait*/ true)) | ||
Res = Result; | ||
|
||
vmaksimo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (!OutputFileList.empty()) { | ||
OS.close(); | ||
} | ||
|
Uh oh!
There was an error while loading. Please reload this page.