Skip to content

Fix lldb crash while handling concurrent vfork() #81564

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lldb/source/Plugins/Process/Linux/NativeThreadLinux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,9 @@ void NativeThreadLinux::SetStoppedByFork(bool is_vfork, lldb::pid_t child_pid) {
m_stop_info.signo = SIGTRAP;
m_stop_info.details.fork.child_pid = child_pid;
m_stop_info.details.fork.child_tid = child_pid;
m_stop_description = std::to_string(child_pid);
m_stop_description += " ";
m_stop_description += std::to_string(child_pid);
}

void NativeThreadLinux::SetStoppedByVForkDone() {
Expand Down
28 changes: 17 additions & 11 deletions lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,10 +263,9 @@ ProcessGDBRemote::ProcessGDBRemote(lldb::TargetSP target_sp,
m_continue_C_tids(), m_continue_s_tids(), m_continue_S_tids(),
m_max_memory_size(0), m_remote_stub_max_memory_size(0),
m_addr_to_mmap_size(), m_thread_create_bp_sp(),
m_waiting_for_attach(false),
m_command_sp(), m_breakpoint_pc_offset(0),
m_waiting_for_attach(false), m_command_sp(), m_breakpoint_pc_offset(0),
m_initial_tid(LLDB_INVALID_THREAD_ID), m_allow_flash_writes(false),
m_erased_flash_ranges(), m_vfork_in_progress(false) {
m_erased_flash_ranges(), m_vfork_in_progress_count(0) {
m_async_broadcaster.SetEventName(eBroadcastBitAsyncThreadShouldExit,
"async thread should exit");
m_async_broadcaster.SetEventName(eBroadcastBitAsyncContinue,
Expand Down Expand Up @@ -5272,8 +5271,10 @@ class CommandObjectProcessGDBRemoteSpeedTest : public CommandObjectParsed {
(ProcessGDBRemote *)m_interpreter.GetExecutionContext()
.GetProcessPtr();
if (process) {
StreamSP output_stream_sp(
m_interpreter.GetDebugger().GetAsyncOutputStream());
StreamSP output_stream_sp = result.GetImmediateOutputStream();
if (!output_stream_sp)
output_stream_sp =
StreamSP(m_interpreter.GetDebugger().GetAsyncOutputStream());
result.SetImmediateOutputStream(output_stream_sp);

const uint32_t num_packets =
Expand Down Expand Up @@ -5615,8 +5616,11 @@ void ProcessGDBRemote::DidFork(lldb::pid_t child_pid, lldb::tid_t child_tid) {
void ProcessGDBRemote::DidVFork(lldb::pid_t child_pid, lldb::tid_t child_tid) {
Log *log = GetLog(GDBRLog::Process);

assert(!m_vfork_in_progress);
m_vfork_in_progress = true;
LLDB_LOG(
log,
"ProcessGDBRemote::DidFork() called for child_pid: {0}, child_tid {1}",
child_pid, child_tid);
++m_vfork_in_progress_count;

// Disable all software breakpoints for the duration of vfork.
if (m_gdb_comm.SupportsGDBStoppointPacket(eBreakpointSoftware))
Expand Down Expand Up @@ -5670,8 +5674,8 @@ void ProcessGDBRemote::DidVFork(lldb::pid_t child_pid, lldb::tid_t child_tid) {
}

void ProcessGDBRemote::DidVForkDone() {
assert(m_vfork_in_progress);
m_vfork_in_progress = false;
assert(m_vfork_in_progress_count > 0);
--m_vfork_in_progress_count;

// Reenable all software breakpoints that were enabled before vfork.
if (m_gdb_comm.SupportsGDBStoppointPacket(eBreakpointSoftware))
Expand All @@ -5681,7 +5685,9 @@ void ProcessGDBRemote::DidVForkDone() {
void ProcessGDBRemote::DidExec() {
// If we are following children, vfork is finished by exec (rather than
// vforkdone that is submitted for parent).
if (GetFollowForkMode() == eFollowChild)
m_vfork_in_progress = false;
if (GetFollowForkMode() == eFollowChild) {
if (m_vfork_in_progress_count > 0)
--m_vfork_in_progress_count;
}
Process::DidExec();
}
3 changes: 2 additions & 1 deletion lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,8 @@ class ProcessGDBRemote : public Process,
using FlashRange = FlashRangeVector::Entry;
FlashRangeVector m_erased_flash_ranges;

bool m_vfork_in_progress;
// Number of vfork() operations being handled.
uint32_t m_vfork_in_progress_count;

// Accessors
bool IsRunning(lldb::StateType state) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
CXX_SOURCES := main.cpp
ENABLE_THREADS := YES

include Makefile.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""
Make sure that the concurrent vfork() from multiple threads works correctly.
"""

import lldb
import lldbsuite.test.lldbutil as lldbutil
from lldbsuite.test.lldbtest import *
from lldbsuite.test.decorators import *


class TestConcurrentVFork(TestBase):
NO_DEBUG_INFO_TESTCASE = True

def build_run_to_breakpoint(self, use_fork, call_exec):
self.build()

args = []
if use_fork:
args.append("--fork")
if call_exec:
args.append("--exec")
launch_info = lldb.SBLaunchInfo(args)
launch_info.SetWorkingDirectory(self.getBuildDir())

return lldbutil.run_to_source_breakpoint(
self, "// break here", lldb.SBFileSpec("main.cpp")
)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return all of the info from lldbutil.run_to_source_breakpoint from this function:

return (target, process, thread, bkpt)


def follow_parent_helper(self, use_fork, call_exec):
(target, process, thread, bkpt) = self.build_run_to_breakpoint(
use_fork, call_exec
)

parent_pid = target.FindFirstGlobalVariable("g_pid").GetValueAsUnsigned()
self.runCmd("settings set target.process.follow-fork-mode parent")
self.runCmd("settings set target.process.stop-on-exec False", check=False)
self.expect(
"continue", substrs=[f"Process {parent_pid} exited with status = 0"]
)

def follow_child_helper(self, use_fork, call_exec):
self.build_run_to_breakpoint(use_fork, call_exec)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(target, process, thread, bkpt) = self.build_run_to_breakpoint(use_fork, call_exec)


self.runCmd("settings set target.process.follow-fork-mode child")
self.runCmd("settings set target.process.stop-on-exec False", check=False)
# Child process exits with code "index + 10" since index is [0-4]
# so the exit code should be 1[0-4]
self.expect("continue", patterns=[r"exited with status = 1[0-4]"])

@skipUnlessPlatform(["linux"])
def test_follow_parent_vfork_no_exec(self):
"""
Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent.
And follow-parent successfully detach all child processes and exit debugger without calling exec.
"""
self.follow_parent_helper(use_fork=False, call_exec=False)

@skipUnlessPlatform(["linux"])
def test_follow_parent_fork_no_exec(self):
"""
Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-parent.
And follow-parent successfully detach all child processes and exit debugger without calling exec
"""
self.follow_parent_helper(use_fork=True, call_exec=False)

@skipUnlessPlatform(["linux"])
def test_follow_parent_vfork_call_exec(self):
"""
Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent.
And follow-parent successfully detach all child processes and exit debugger after calling exec.
"""
self.follow_parent_helper(use_fork=False, call_exec=True)

@skipUnlessPlatform(["linux"])
def test_follow_parent_fork_call_exec(self):
"""
Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent.
And follow-parent successfully detach all child processes and exit debugger after calling exec.
"""
self.follow_parent_helper(use_fork=True, call_exec=True)

@skipUnlessPlatform(["linux"])
def test_follow_child_vfork_no_exec(self):
"""
Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-child.
And follow-child successfully detach parent process and exit child process with correct exit code without calling exec.
"""
self.follow_child_helper(use_fork=False, call_exec=False)

@skipUnlessPlatform(["linux"])
def test_follow_child_fork_no_exec(self):
"""
Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-child.
And follow-child successfully detach parent process and exit child process with correct exit code without calling exec.
"""
self.follow_child_helper(use_fork=True, call_exec=False)

@skipUnlessPlatform(["linux"])
def test_follow_child_vfork_call_exec(self):
"""
Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-child.
And follow-child successfully detach parent process and exit child process with correct exit code after calling exec.
"""
self.follow_child_helper(use_fork=False, call_exec=True)

@skipUnlessPlatform(["linux"])
def test_follow_child_fork_call_exec(self):
"""
Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-child.
And follow-child successfully detach parent process and exit child process with correct exit code after calling exec.
"""
self.follow_child_helper(use_fork=True, call_exec=True)
105 changes: 105 additions & 0 deletions lldb/test/API/functionalities/fork/concurrent_vfork/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#include <assert.h>
#include <iostream>
#include <mutex>
#include <sys/wait.h>
#include <thread>
#include <unistd.h>
#include <vector>

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be a good idea to create a global std::vector<pid_t> and a mutex to protect it. And any child processes that get forked get added to this vector. So maybe add:

std::mutex g_child_pids_mutex;
std::vector<pid_t> g_child_pids;

I will comment below where to use it.

pid_t g_pid = 0;
std::mutex g_child_pids_mutex;
std::vector<pid_t> g_child_pids;

const char *g_program = nullptr;
bool g_use_vfork = true; // Use vfork by default.
bool g_call_exec = false; // Does not call exec by default.

int call_vfork(int index) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably want to test with both fork() and vfork() here and we want to know if we want to call exec*() or not. If we add a const char *exec_path to the call and this is not NULL, then we call exec. So maybe change this to be:

int call_vfork(int index, bool use_vfork, const char *exec_path)

pid_t child_pid = 0;
if (g_use_vfork) {
child_pid = vfork();
} else {
child_pid = fork();
}

if (child_pid == -1) {
// Error handling
perror("vfork");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perror(use_vfork ? "vfork" : "fork");

return 1;
} else if (child_pid == 0) {
// This code is executed by the child process
g_pid = getpid();
printf("Child process: %d\n", g_pid);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add code to call exec*() here:

if (call_exec)
  execl(exec_path, exec_path, NULL); // Call this program through exec() with no args.
else
  _exit(index + 10); // Exit the child process


if (g_call_exec) {
std::string child_exit_code = std::to_string(index + 10);
execl(g_program, g_program, "--child", child_exit_code.c_str(), NULL);
} else {
_exit(index + 10);
}
} else {
// This code is executed by the parent process
printf("[Parent] Forked process id: %d\n", child_pid);
}
return 0;
}

void wait_all_children_to_exit() {
std::lock_guard<std::mutex> Lock(g_child_pids_mutex);
for (pid_t child_pid : g_child_pids) {
int child_status = 0;
pid_t pid = waitpid(child_pid, &child_status, 0);
if (child_status != 0) {
int exit_code = WEXITSTATUS(child_status);
if (exit_code > 15 || exit_code < 10) {
printf("Error: child process exits with unexpected code %d\n",
exit_code);
_exit(1); // This will let our program know that some child processes
// didn't exist with an expected exit status.
}
}
if (pid != child_pid)
_exit(2); // This will let our program know it didn't succeed
}
}

void create_threads(int num_threads) {
std::vector<std::thread> threads;
for (int i = 0; i < num_threads; ++i) {
threads.emplace_back(std::thread(call_vfork, i));
}
printf("Created %d threads, joining...\n",
num_threads); // end_of_create_threads
for (auto &thread : threads) {
thread.join();
}
wait_all_children_to_exit();
}

// Can be called in various ways:
// 1. [program]: use vfork and not call exec
// 2. [program] --fork: use fork and not call exec
// 3. [program] --fork --exec: use fork and call exec
// 4. [program] --exec: use vfork and call exec
// 5. [program] --child [exit_code]: child process
int main(int argc, char *argv[]) {
g_pid = getpid();
g_program = argv[0];

for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "--child") == 0) {
assert(i + 1 < argc);
int child_exit_code = std::stoi(argv[i + 1]);
printf("Child process: %d, exiting with code %d\n", g_pid,
child_exit_code);
_exit(child_exit_code);
} else if (strcmp(argv[i], "--fork") == 0)
g_use_vfork = false;
else if (strcmp(argv[i], "--exec") == 0)
g_call_exec = true;
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parse args manually to check if we should use fork or vfork and if we should exec:

bool use_vfork = true;
bool call_exec = false;
for (int i=1; i<argc; ++i) {
  if (strcmp(argv[i], "--fork") == 0)
    use_vfork = false;
  else if (strcmp(argv[i], "--exec") == 0)
    call_exec = true;
}

int num_threads = 5; // break here
create_threads(num_threads);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pass use_vfork and exec_path into this function so we can pass it to int call_vfork(int index, bool use_vfork, const char *exec_path):

create_threads(num_threads, use_vfork, call_exec ? argv[0] : nullptr);

return 0;
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now reap the children you spawned:

std::lock_guard<std::mutex> Lock(g_child_pids_mutex);
for (pid_t child_pid: g_child_pids) {
  int child_status = 0;
  pid_t pid = waitpid(child_pid, &child_status, 0);
  if (child_status != 0)
    _exit(1); // This will let our program know that some child processes didn't exist with a zero exit status.
  if (pid !=  child_pid)
    _exit(2); // This will let our program know it didn't succeed
}

This will ensure that if we stayed with the parent, then we successfully resumed all of the child processes and that they didn't crash and that they exited. If we don't resume the child processes correctly, they can get caught in limbo if they weren't resumed and this test will deadlock waiting for the child process to exit. There are some options you can do that allows you to not hang where instead of zero passed as the last parameter of waitpid(), you can specify WNOHANG, but that will get racy. We would poll for a few seconds. We will want to make sure that the test doesn't just deadlock and cause the test suite to never complete.