Skip to content

New ThreadPlanSingleThreadTimeout to resolve potential deadlock in single thread stepping #90930

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions lldb/include/lldb/Target/Process.h
Original file line number Diff line number Diff line change
Expand Up @@ -1320,10 +1320,16 @@ class Process : public std::enable_shared_from_this<Process>,

size_t GetThreadStatus(Stream &ostrm, bool only_threads_with_stop_reason,
uint32_t start_frame, uint32_t num_frames,
uint32_t num_frames_with_source,
bool stop_format);
uint32_t num_frames_with_source, bool stop_format);

void SendAsyncInterrupt();
/// Send an async interrupt request.
///
/// If \a thread is specified the async interrupt stop will be attributed to
/// the specified thread.
///
/// \param[in] thread
/// The thread the async interrupt will be attributed to.
void SendAsyncInterrupt(Thread *thread = nullptr);

// Notify this process class that modules got loaded.
//
Expand Down Expand Up @@ -2873,6 +2879,17 @@ void PruneThreadPlans();
return std::nullopt;
}

/// Handle thread specific async interrupt and return the original thread
/// that requested the async interrupt. It can be null if original thread
/// has exited.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should say what the description string is for.

///
/// \param[in] description
/// Returns the stop reason description of the async interrupt.
virtual lldb::ThreadSP
HandleThreadAsyncInterrupt(uint8_t signo, const std::string &description) {
return lldb::ThreadSP();
}

lldb::StateType GetPrivateState();

/// The "private" side of resuming a process. This doesn't alter the state
Expand Down Expand Up @@ -3159,6 +3176,11 @@ void PruneThreadPlans();
// Resume will only request a resume, using this
// flag to check.

lldb::tid_t m_interrupt_tid; /// The tid of the thread that issued the async
/// interrupt, used by thread plan timeout. It
/// can be LLDB_INVALID_THREAD_ID to indicate
/// user level async interrupt.

/// This is set at the beginning of Process::Finalize() to stop functions
/// from looking up or creating things during or after a finalize call.
std::atomic<bool> m_finalizing;
Expand Down
4 changes: 4 additions & 0 deletions lldb/include/lldb/Target/StopInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ class StopInfo : public std::enable_shared_from_this<StopInfo> {
const char *description = nullptr,
std::optional<int> code = std::nullopt);

static lldb::StopInfoSP
CreateStopReasonWithInterrupt(Thread &thread, int signo,
const char *description);

static lldb::StopInfoSP CreateStopReasonToTrace(Thread &thread);

static lldb::StopInfoSP
Expand Down
2 changes: 2 additions & 0 deletions lldb/include/lldb/Target/Thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ class ThreadProperties : public Properties {
bool GetStepOutAvoidsNoDebug() const;

uint64_t GetMaxBacktraceDepth() const;

uint64_t GetSingleThreadPlanTimeout() const;
};

class Thread : public std::enable_shared_from_this<Thread>,
Expand Down
12 changes: 9 additions & 3 deletions lldb/include/lldb/Target/ThreadPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,8 @@ class ThreadPlan : public std::enable_shared_from_this<ThreadPlan>,
eKindStepInRange,
eKindRunToAddress,
eKindStepThrough,
eKindStepUntil
eKindStepUntil,
eKindSingleThreadTimeout,
};

virtual ~ThreadPlan();
Expand Down Expand Up @@ -395,6 +396,11 @@ class ThreadPlan : public std::enable_shared_from_this<ThreadPlan>,

bool IsControllingPlan() { return m_is_controlling_plan; }

// Returns true if this plan is a leaf plan, meaning the plan will be popped
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Leaf plans are only auto-popped if they don't explain the stop, right? Might be worth mentioning that here.

// during each stop if it does not explain the stop and re-pushed before
// resuming to stay at the top of the stack.
virtual bool IsLeafPlan() { return false; }

bool SetIsControllingPlan(bool value) {
bool old_value = m_is_controlling_plan;
m_is_controlling_plan = value;
Expand Down Expand Up @@ -483,6 +489,8 @@ class ThreadPlan : public std::enable_shared_from_this<ThreadPlan>,
return m_takes_iteration_count;
}

virtual lldb::StateType GetPlanRunState() = 0;

protected:
// Constructors and Destructors
ThreadPlan(ThreadPlanKind kind, const char *name, Thread &thread,
Expand Down Expand Up @@ -522,8 +530,6 @@ class ThreadPlan : public std::enable_shared_from_this<ThreadPlan>,
GetThread().SetStopInfo(stop_reason_sp);
}

virtual lldb::StateType GetPlanRunState() = 0;

bool IsUsuallyUnexplainedStopReason(lldb::StopReason);

Status m_status;
Expand Down
107 changes: 107 additions & 0 deletions lldb/include/lldb/Target/ThreadPlanSingleThreadTimeout.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
//===-- ThreadPlanSingleThreadTimeout.h -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLDB_TARGET_THREADPLANSINGLETHREADTIMEOUT_H
#define LLDB_TARGET_THREADPLANSINGLETHREADTIMEOUT_H

#include "lldb/Target/Thread.h"
#include "lldb/Target/ThreadPlan.h"
#include "lldb/Utility/Event.h"
#include "lldb/Utility/LLDBLog.h"
#include "lldb/Utility/State.h"

#include <chrono>
#include <thread>

namespace lldb_private {

//
// Thread plan used by single thread execution to issue timeout. This is useful
// to detect potential deadlock in single thread execution. The timeout measures
// the elapsed time from the last internal stop and gets reset by each internal
// stop to ensure we are accurately detecting execution not moving forward.
// This means this thread plan may be created/destroyed multiple times by the
// parent execution plan.
//
// When a timeout happens, the thread plan resolves the potential deadlock by
// issuing a thread specific async interrupt to enter stop state, then execution
// is resumed with all threads running to resolve the potential deadlock
//
class ThreadPlanSingleThreadTimeout : public ThreadPlan {
enum class State {
WaitTimeout, // Waiting for timeout.
AsyncInterrupt, // Async interrupt has been issued.
Done, // Finished resume all threads.
};

public:
// TODO: allow timeout to be set on per thread plan basis.
struct TimeoutInfo {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't something you need to do in this patch, but it would be good to allow different thread plan invocations to use different timeouts, and not just depend on the thread timeout. For instance, we should be able to reimplement RunThreadPlan with this infrastructure. But the timeouts you want for expression evaluation and stepping are likely to be very different (and different executions also use different timeouts). So being able to pass in the timeout, rather than relying on one thread specific one will be necessary. We could for instance add the timeout to the TimeoutInfo and plumb that so that the ThreadPlan that mixes in TimeoutResumeAll can pass in a timeout.

// Whether there is a ThreadPlanSingleThreadTimeout instance alive.
bool m_isAlive = false;
ThreadPlanSingleThreadTimeout::State m_last_state = State::WaitTimeout;
};

~ThreadPlanSingleThreadTimeout() override;

// If input \param thread is running in single thread mode, push a
// new ThreadPlanSingleThreadTimeout based on timeout setting from fresh new
// state. The reference of \param info is passed in so that when
// ThreadPlanSingleThreadTimeout got popped its last state can be stored
// in it for future resume.
static void PushNewWithTimeout(Thread &thread, TimeoutInfo &info);

// Push a new ThreadPlanSingleThreadTimeout by restoring state from
// input \param info and resume execution.
static void ResumeFromPrevState(Thread &thread, TimeoutInfo &info);

void GetDescription(Stream *s, lldb::DescriptionLevel level) override;
bool ValidatePlan(Stream *error) override { return true; }
bool WillStop() override;
void DidPop() override;

bool IsLeafPlan() override { return true; }
bool DoPlanExplainsStop(Event *event_ptr) override;

lldb::StateType GetPlanRunState() override;
static void TimeoutThreadFunc(ThreadPlanSingleThreadTimeout *self);

bool MischiefManaged() override;

bool ShouldStop(Event *event_ptr) override;
void SetStopOthers(bool new_value) override;
bool StopOthers() override;

private:
ThreadPlanSingleThreadTimeout(Thread &thread, TimeoutInfo &info);

bool IsTimeoutAsyncInterrupt(Event *event_ptr);
bool HandleEvent(Event *event_ptr);
void HandleTimeout();
uint64_t GetRemainingTimeoutMilliSeconds();

static std::string StateToString(State state);

ThreadPlanSingleThreadTimeout(const ThreadPlanSingleThreadTimeout &) = delete;
const ThreadPlanSingleThreadTimeout &
operator=(const ThreadPlanSingleThreadTimeout &) = delete;

TimeoutInfo &m_info; // Reference to controlling ThreadPlan's TimeoutInfo.
State m_state;

// Lock for m_wakeup_cv and m_exit_flag between thread plan thread and timer
// thread
std::mutex m_mutex;
std::condition_variable m_wakeup_cv;
std::thread m_timer_thread;
std::chrono::steady_clock::time_point m_timeout_start;
};

} // namespace lldb_private

#endif // LLDB_TARGET_THREADPLANSINGLETHREADTIMEOUT_H
1 change: 1 addition & 0 deletions lldb/include/lldb/Target/ThreadPlanStepOut.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class ThreadPlanStepOut : public ThreadPlan, public ThreadPlanShouldStopHere {
bool ValidatePlan(Stream *error) override;
bool ShouldStop(Event *event_ptr) override;
bool StopOthers() override;
void SetStopOthers(bool new_value) override { m_stop_others = new_value; }
lldb::StateType GetPlanRunState() override;
bool WillStop() override;
bool MischiefManaged() override;
Expand Down
7 changes: 6 additions & 1 deletion lldb/include/lldb/Target/ThreadPlanStepOverRange.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@
#include "lldb/Target/StackID.h"
#include "lldb/Target/Thread.h"
#include "lldb/Target/ThreadPlanStepRange.h"
#include "lldb/Target/TimeoutResumeAll.h"

namespace lldb_private {

class ThreadPlanStepOverRange : public ThreadPlanStepRange,
ThreadPlanShouldStopHere {
ThreadPlanShouldStopHere,
TimeoutResumeAll {
public:
ThreadPlanStepOverRange(Thread &thread, const AddressRange &range,
const SymbolContext &addr_context,
Expand All @@ -27,7 +29,9 @@ class ThreadPlanStepOverRange : public ThreadPlanStepRange,
~ThreadPlanStepOverRange() override;

void GetDescription(Stream *s, lldb::DescriptionLevel level) override;
void SetStopOthers(bool new_value) override;
bool ShouldStop(Event *event_ptr) override;
void DidPush() override;

protected:
bool DoPlanExplainsStop(Event *event_ptr) override;
Expand All @@ -44,6 +48,7 @@ class ThreadPlanStepOverRange : public ThreadPlanStepRange,
bool IsEquivalentContext(const SymbolContext &context);

bool m_first_resume;
lldb::RunMode m_run_mode;

ThreadPlanStepOverRange(const ThreadPlanStepOverRange &) = delete;
const ThreadPlanStepOverRange &
Expand Down
7 changes: 7 additions & 0 deletions lldb/include/lldb/Target/ThreadPlanStepRange.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,15 @@ class ThreadPlanStepRange : public ThreadPlan {
// run' plan, then just single step.
bool SetNextBranchBreakpoint();

// Whether the input stop info is caused by the next branch breakpoint.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This routine does NOT distinguish between the cases where the next branch breakpoint is the ONLY breakpoint at this site, and where it's one of many breakpoints that share this site. Later, you distinguish between the two cases where this is used (e.g. in ThreadPlanStepRange::NextRangeBreakpointExplainsStop.
It might be interesting to see if it makes sense to incorporate the logic that does that into IsNextBranchBreakpointStop. If it does that would allow some code reuse. If it does not, you should note in the comment here that it does not distinguish between the two cases, so someone doesn't make a mistake later on.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function only checks the branch breakpoint without caring whether its site is shared by others or not. Let me add a comment.

// Note: this does not check if branch breakpoint site is shared by other
// breakpoints or not.
bool IsNextBranchBreakpointStop(lldb::StopInfoSP stop_info_sp);

void ClearNextBranchBreakpoint();

void ClearNextBranchBreakpointExplainedStop();

bool NextRangeBreakpointExplainsStop(lldb::StopInfoSP stop_info_sp);

SymbolContext m_addr_context;
Expand Down
40 changes: 40 additions & 0 deletions lldb/include/lldb/Target/TimeoutResumeAll.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
//===-- TimeoutResumeAll.h -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLDB_TARGET_TIMEOUTRESUMEALL_H
#define LLDB_TARGET_TIMEOUTRESUMEALL_H

#include "lldb/Target/ThreadPlanSingleThreadTimeout.h"

namespace lldb_private {

// Mixin class that provides the capability for ThreadPlan to support single
// thread execution that resumes all threads after a timeout.
// Opt-in thread plan should call PushNewTimeout() in its DidPush() and
// ResumeWithTimeout() during DoWillResume().
class TimeoutResumeAll {
public:
TimeoutResumeAll(Thread &thread) : m_thread(thread) {}

void PushNewTimeout() {
ThreadPlanSingleThreadTimeout::PushNewWithTimeout(m_thread, m_timeout_info);
}

void ResumeWithTimeout() {
ThreadPlanSingleThreadTimeout::ResumeFromPrevState(m_thread,
m_timeout_info);
}

private:
Thread &m_thread;
ThreadPlanSingleThreadTimeout::TimeoutInfo m_timeout_info;
};

} // namespace lldb_private

#endif // LLDB_TARGET_TIMEOUTRESUMEALL_H
1 change: 1 addition & 0 deletions lldb/include/lldb/lldb-enumerations.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ enum StopReason {
eStopReasonFork,
eStopReasonVFork,
eStopReasonVForkDone,
eStopReasonInterrupt, ///< Thread requested interrupt
};

/// Command Return Status Types.
Expand Down
6 changes: 6 additions & 0 deletions lldb/source/API/SBThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@ size_t SBThread::GetStopReasonDataCount() {
case eStopReasonSignal:
return 1;

case eStopReasonInterrupt:
return 1;

case eStopReasonException:
return 1;

Expand Down Expand Up @@ -261,6 +264,9 @@ uint64_t SBThread::GetStopReasonDataAtIndex(uint32_t idx) {
case eStopReasonSignal:
return stop_info_sp->GetValue();

case eStopReasonInterrupt:
return stop_info_sp->GetValue();

case eStopReasonException:
return stop_info_sp->GetValue();

Expand Down
2 changes: 1 addition & 1 deletion lldb/source/Interpreter/CommandInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2513,7 +2513,7 @@ bool CommandInterpreter::DidProcessStopAbnormally() const {
const StopReason reason = stop_info->GetStopReason();
if (reason == eStopReasonException ||
reason == eStopReasonInstrumentation ||
reason == eStopReasonProcessorTrace)
reason == eStopReasonProcessorTrace || reason == eStopReasonInterrupt)
return true;

if (reason == eStopReasonSignal) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,8 @@ static const char *GetStopReasonString(StopReason stop_reason) {
return "vfork";
case eStopReasonVForkDone:
return "vforkdone";
case eStopReasonInterrupt:
return "async interrupt";
case eStopReasonInstrumentation:
case eStopReasonInvalid:
case eStopReasonPlanComplete:
Expand Down
Loading