Skip to content

Commit 7dd93d0

Browse files
authored
Merge pull request #8274 from jasonmolenda/cp/fix-flakey-TestConcurrent-tests
Cp/fix flakey test concurrent tests
2 parents 0257505 + 471ed2b commit 7dd93d0

File tree

7 files changed

+173
-47
lines changed

7 files changed

+173
-47
lines changed

lldb/include/lldb/Target/StopInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ class StopInfo : public std::enable_shared_from_this<StopInfo> {
7979

8080
virtual bool IsValidForOperatingSystemThread(Thread &thread) { return true; }
8181

82+
/// A Continue operation can result in a false stop event
83+
/// before any execution has happened. We need to detect this
84+
/// and silently continue again one more time.
85+
virtual bool WasContinueInterrupted(Thread &thread) { return false; }
86+
8287
// Sometimes the thread plan logic will know that it wants a given stop to
8388
// stop or not, regardless of what the ordinary logic for that StopInfo would
8489
// dictate. The main example of this is the ThreadPlanCallFunction, which

lldb/include/lldb/Target/Thread.h

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include <memory>
1313
#include <mutex>
14+
#include <optional>
1415
#include <string>
1516
#include <vector>
1617

@@ -1187,13 +1188,20 @@ class Thread : public std::enable_shared_from_this<Thread>,
11871188

11881189
void CalculatePublicStopInfo();
11891190

1190-
// Ask the thread subclass to set its stop info.
1191-
//
1192-
// Thread subclasses should call Thread::SetStopInfo(...) with the reason the
1193-
// thread stopped.
1194-
//
1195-
// \return
1196-
// True if Thread::SetStopInfo(...) was called, false otherwise.
1191+
/// Ask the thread subclass to set its stop info.
1192+
///
1193+
/// Thread subclasses should call Thread::SetStopInfo(...) with the reason the
1194+
/// thread stopped.
1195+
///
1196+
/// A thread that is sitting at a breakpoint site, but has not yet executed
1197+
/// the breakpoint instruction, should have a breakpoint-hit StopInfo set.
1198+
/// When execution is resumed, any thread sitting at a breakpoint site will
1199+
/// instruction-step over the breakpoint instruction silently, and we will
1200+
/// never record this breakpoint as being hit, updating the hit count,
1201+
/// possibly executing breakpoint commands or conditions.
1202+
///
1203+
/// \return
1204+
/// True if Thread::SetStopInfo(...) was called, false otherwise.
11971205
virtual bool CalculateStopInfo() = 0;
11981206

11991207
// Gets the temporary resume state for a thread.
@@ -1251,6 +1259,16 @@ class Thread : public std::enable_shared_from_this<Thread>,
12511259

12521260
lldb::ValueObjectSP GetSiginfoValue();
12531261

1262+
/// Request the pc value the thread had when previously stopped.
1263+
///
1264+
/// When the thread performs execution, it copies the current RegisterContext
1265+
/// GetPC() value. This method returns that value, if it is available.
1266+
///
1267+
/// \return
1268+
/// The PC value before execution was resumed. May not be available;
1269+
/// an empty std::optional is returned in that case.
1270+
std::optional<lldb::addr_t> GetPreviousFrameZeroPC();
1271+
12541272
protected:
12551273
friend class ThreadPlan;
12561274
friend class ThreadList;
@@ -1331,6 +1349,9 @@ class Thread : public std::enable_shared_from_this<Thread>,
13311349
///populated after a thread stops.
13321350
lldb::StackFrameListSP m_prev_frames_sp; ///< The previous stack frames from
13331351
///the last time this thread stopped.
1352+
std::optional<lldb::addr_t>
1353+
m_prev_framezero_pc; ///< Frame 0's PC the last
1354+
/// time this thread was stopped.
13341355
int m_resume_signal; ///< The signal that should be used when continuing this
13351356
///thread.
13361357
lldb::StateType m_resume_state; ///< This state is used to force a thread to

lldb/packages/Python/lldbsuite/test/concurrent_base.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -264,12 +264,40 @@ def do_thread_actions(
264264
"Expected main thread (finish) breakpoint to be hit once",
265265
)
266266

267-
num_threads = self.inferior_process.GetNumThreads()
267+
# There should be a single active thread (the main one) which hit
268+
# the breakpoint after joining. Depending on the pthread
269+
# implementation we may have a worker thread finishing the pthread_join()
270+
# after it has returned. Filter the threads to only count those
271+
# with user functions on them from our test case file,
272+
# lldb/test/API/functionalities/thread/concurrent_events/main.cpp
273+
user_code_funcnames = [
274+
"breakpoint_func",
275+
"crash_func",
276+
"do_action_args",
277+
"dotest",
278+
"main",
279+
"register_signal_handler",
280+
"signal_func",
281+
"sigusr1_handler",
282+
"start_threads",
283+
"watchpoint_func",
284+
]
285+
num_threads_with_usercode = 0
286+
for t in self.inferior_process.threads:
287+
thread_has_user_code = False
288+
for f in t.frames:
289+
for funcname in user_code_funcnames:
290+
if funcname in f.GetDisplayFunctionName():
291+
thread_has_user_code = True
292+
break
293+
if thread_has_user_code:
294+
num_threads_with_usercode += 1
295+
268296
self.assertEqual(
269297
1,
270-
num_threads,
298+
num_threads_with_usercode,
271299
"Expecting 1 thread but seeing %d. Details:%s"
272-
% (num_threads, "\n\t".join(self.describe_threads())),
300+
% (num_threads_with_usercode, "\n\t".join(self.describe_threads())),
273301
)
274302
self.runCmd("continue")
275303

lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp

Lines changed: 57 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include "lldb/Target/Thread.h"
2727
#include "lldb/Target/ThreadPlan.h"
2828
#include "lldb/Target/UnixSignals.h"
29+
#include "lldb/Utility/LLDBLog.h"
30+
#include "lldb/Utility/Log.h"
2931
#include "lldb/Utility/StreamString.h"
3032
#include <optional>
3133

@@ -600,6 +602,7 @@ StopInfoSP StopInfoMachException::CreateStopReasonWithMachException(
600602
if (exc_type == 0)
601603
return StopInfoSP();
602604

605+
bool not_stepping_but_got_singlestep_exception = false;
603606
uint32_t pc_decrement = 0;
604607
ExecutionContext exe_ctx(thread.shared_from_this());
605608
Target *target = exe_ctx.GetTargetPtr();
@@ -729,30 +732,8 @@ StopInfoSP StopInfoMachException::CreateStopReasonWithMachException(
729732
// is set
730733
is_actual_breakpoint = true;
731734
is_trace_if_actual_breakpoint_missing = true;
732-
#ifndef NDEBUG
733-
if (thread.GetTemporaryResumeState() != eStateStepping) {
734-
StreamString s;
735-
s.Printf("CreateStopReasonWithMachException got EXC_BREAKPOINT [1,0] "
736-
"indicating trace event, but thread is not tracing, it has "
737-
"ResumeState %d",
738-
thread.GetTemporaryResumeState());
739-
if (RegisterContextSP regctx = thread.GetRegisterContext()) {
740-
if (const RegisterInfo *ri = regctx->GetRegisterInfoByName("esr")) {
741-
uint32_t esr =
742-
(uint32_t)regctx->ReadRegisterAsUnsigned(ri, UINT32_MAX);
743-
if (esr != UINT32_MAX) {
744-
s.Printf(" esr value: 0x%" PRIx32, esr);
745-
}
746-
}
747-
}
748-
thread.GetProcess()->DumpPluginHistory(s);
749-
llvm::report_fatal_error(s.GetData());
750-
lldbassert(
751-
false &&
752-
"CreateStopReasonWithMachException got EXC_BREAKPOINT [1,0] "
753-
"indicating trace event, but thread was not doing a step.");
754-
}
755-
#endif
735+
if (thread.GetTemporaryResumeState() != eStateStepping)
736+
not_stepping_but_got_singlestep_exception = true;
756737
}
757738
if (exc_code == 0x102) // EXC_ARM_DA_DEBUG
758739
{
@@ -839,6 +820,56 @@ StopInfoSP StopInfoMachException::CreateStopReasonWithMachException(
839820
break;
840821
}
841822

842-
return StopInfoSP(new StopInfoMachException(thread, exc_type, exc_data_count,
843-
exc_code, exc_sub_code));
823+
return std::make_shared<StopInfoMachException>(
824+
thread, exc_type, exc_data_count, exc_code, exc_sub_code,
825+
not_stepping_but_got_singlestep_exception);
826+
}
827+
828+
// Detect an unusual situation on Darwin where:
829+
//
830+
// 0. We did an instruction-step before this.
831+
// 1. We have a hardware breakpoint or watchpoint set.
832+
// 2. We resumed the process, but not with an instruction-step.
833+
// 3. The thread gets an "instruction-step completed" mach exception.
834+
// 4. The pc has not advanced - it is the same as before.
835+
//
836+
// This method returns true for that combination of events.
837+
bool StopInfoMachException::WasContinueInterrupted(Thread &thread) {
838+
Log *log = GetLog(LLDBLog::Step);
839+
840+
// We got an instruction-step completed mach exception but we were not
841+
// doing an instruction step on this thread.
842+
if (!m_not_stepping_but_got_singlestep_exception)
843+
return false;
844+
845+
RegisterContextSP reg_ctx_sp(thread.GetRegisterContext());
846+
std::optional<addr_t> prev_pc = thread.GetPreviousFrameZeroPC();
847+
if (!reg_ctx_sp || !prev_pc)
848+
return false;
849+
850+
// The previous pc value and current pc value are the same.
851+
if (*prev_pc != reg_ctx_sp->GetPC())
852+
return false;
853+
854+
// We have a watchpoint -- this is the kernel bug.
855+
ProcessSP process_sp = thread.GetProcess();
856+
if (process_sp->GetWatchpointResourceList().GetSize()) {
857+
LLDB_LOGF(log,
858+
"Thread stopped with insn-step completed mach exception but "
859+
"thread was not stepping; there is a hardware watchpoint set.");
860+
return true;
861+
}
862+
863+
// We have a hardware breakpoint -- this is the kernel bug.
864+
auto &bp_site_list = process_sp->GetBreakpointSiteList();
865+
for (auto &site : bp_site_list.Sites()) {
866+
if (site->IsHardware() && site->IsEnabled()) {
867+
LLDB_LOGF(log,
868+
"Thread stopped with insn-step completed mach exception but "
869+
"thread was not stepping; there is a hardware breakpoint set.");
870+
return true;
871+
}
872+
}
873+
874+
return false;
844875
}

lldb/source/Plugins/Process/Utility/StopInfoMachException.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,12 @@ class StopInfoMachException : public StopInfo {
3131
// Constructors and Destructors
3232
StopInfoMachException(Thread &thread, uint32_t exc_type,
3333
uint32_t exc_data_count, uint64_t exc_code,
34-
uint64_t exc_subcode)
34+
uint64_t exc_subcode,
35+
bool not_stepping_but_got_singlestep_exception)
3536
: StopInfo(thread, exc_type), m_exc_data_count(exc_data_count),
36-
m_exc_code(exc_code), m_exc_subcode(exc_subcode) {}
37+
m_exc_code(exc_code), m_exc_subcode(exc_subcode),
38+
m_not_stepping_but_got_singlestep_exception(
39+
not_stepping_but_got_singlestep_exception) {}
3740

3841
~StopInfoMachException() override = default;
3942

@@ -58,10 +61,14 @@ class StopInfoMachException : public StopInfo {
5861
uint64_t exc_code, uint64_t exc_sub_code, uint64_t exc_sub_sub_code,
5962
bool pc_already_adjusted = true, bool adjust_pc_if_needed = false);
6063

64+
bool WasContinueInterrupted(Thread &thread) override;
65+
6166
protected:
6267
uint32_t m_exc_data_count;
6368
uint64_t m_exc_code;
6469
uint64_t m_exc_subcode;
70+
71+
bool m_not_stepping_but_got_singlestep_exception;
6572
};
6673

6774
} // namespace lldb_private

lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1601,6 +1601,26 @@ bool ProcessGDBRemote::CalculateThreadStopInfo(ThreadGDBRemote *thread) {
16011601
// has no stop reason.
16021602
thread->GetRegisterContext()->InvalidateIfNeeded(true);
16031603
if (!GetThreadStopInfoFromJSON(thread, m_jstopinfo_sp)) {
1604+
// If a thread is stopped at a breakpoint site, set that as the stop
1605+
// reason even if it hasn't executed the breakpoint instruction yet.
1606+
// We will silently step over the breakpoint when we resume execution
1607+
// and miss the fact that this thread hit the breakpoint.
1608+
const size_t num_thread_ids = m_thread_ids.size();
1609+
for (size_t i = 0; i < num_thread_ids; i++) {
1610+
if (m_thread_ids[i] == thread->GetID() && m_thread_pcs.size() > i) {
1611+
addr_t pc = m_thread_pcs[i];
1612+
lldb::BreakpointSiteSP bp_site_sp =
1613+
thread->GetProcess()->GetBreakpointSiteList().FindByAddress(pc);
1614+
if (bp_site_sp) {
1615+
if (bp_site_sp->ValidForThisThread(*thread)) {
1616+
thread->SetStopInfo(
1617+
StopInfo::CreateStopReasonWithBreakpointSiteID(
1618+
*thread, bp_site_sp->GetID()));
1619+
return true;
1620+
}
1621+
}
1622+
}
1623+
}
16041624
thread->SetStopInfo(StopInfoSP());
16051625
}
16061626
return true;
@@ -1715,7 +1735,9 @@ ThreadSP ProcessGDBRemote::SetThreadStopInfo(
17151735
} else {
17161736
bool handled = false;
17171737
bool did_exec = false;
1718-
if (!reason.empty()) {
1738+
// debugserver can send reason = "none" which is equivalent
1739+
// to no reason.
1740+
if (!reason.empty() && reason != "none") {
17191741
if (reason == "trace") {
17201742
addr_t pc = thread_sp->GetRegisterContext()->GetPC();
17211743
lldb::BreakpointSiteSP bp_site_sp =
@@ -1852,11 +1874,10 @@ ThreadSP ProcessGDBRemote::SetThreadStopInfo(
18521874
lldb::BreakpointSiteSP bp_site_sp =
18531875
thread_sp->GetProcess()->GetBreakpointSiteList().FindByAddress(pc);
18541876

1855-
// If the current pc is a breakpoint site then the StopInfo should be
1856-
// set to Breakpoint even though the remote stub did not set it as such.
1857-
// This can happen when the thread is involuntarily interrupted (e.g.
1858-
// due to stops on other threads) just as it is about to execute the
1859-
// breakpoint instruction.
1877+
// If a thread is stopped at a breakpoint site, set that as the stop
1878+
// reason even if it hasn't executed the breakpoint instruction yet.
1879+
// We will silently step over the breakpoint when we resume execution
1880+
// and miss the fact that this thread hit the breakpoint.
18601881
if (bp_site_sp && bp_site_sp->ValidForThisThread(*thread_sp)) {
18611882
thread_sp->SetStopInfo(StopInfo::CreateStopReasonWithBreakpointSiteID(
18621883
*thread_sp, bp_site_sp->GetID()));

lldb/source/Target/Thread.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ Thread::Thread(Process &process, lldb::tid_t tid, bool use_invalid_index_id)
223223
: process.GetNextThreadIndexID(tid)),
224224
m_reg_context_sp(), m_state(eStateUnloaded), m_state_mutex(),
225225
m_frame_mutex(), m_curr_frames_sp(), m_prev_frames_sp(),
226-
m_resume_signal(LLDB_INVALID_SIGNAL_NUMBER),
226+
m_prev_framezero_pc(), m_resume_signal(LLDB_INVALID_SIGNAL_NUMBER),
227227
m_resume_state(eStateRunning), m_temporary_resume_state(eStateRunning),
228228
m_unwinder_up(), m_destroy_called(false),
229229
m_override_should_notify(eLazyBoolCalculate),
@@ -252,6 +252,7 @@ void Thread::DestroyThread() {
252252
std::lock_guard<std::recursive_mutex> guard(m_frame_mutex);
253253
m_curr_frames_sp.reset();
254254
m_prev_frames_sp.reset();
255+
m_prev_framezero_pc.reset();
255256
}
256257

257258
void Thread::BroadcastSelectedFrameChange(StackID &new_frame_id) {
@@ -440,6 +441,12 @@ lldb::StopInfoSP Thread::GetPrivateStopInfo(bool calculate) {
440441
}
441442
}
442443
}
444+
445+
// If we were resuming the process and it was interrupted,
446+
// return no stop reason. This thread would like to resume.
447+
if (m_stop_info_sp && m_stop_info_sp->WasContinueInterrupted(*this))
448+
return {};
449+
443450
return m_stop_info_sp;
444451
}
445452

@@ -1471,16 +1478,22 @@ StackFrameListSP Thread::GetStackFrameList() {
14711478
return m_curr_frames_sp;
14721479
}
14731480

1481+
std::optional<addr_t> Thread::GetPreviousFrameZeroPC() {
1482+
return m_prev_framezero_pc;
1483+
}
1484+
14741485
void Thread::ClearStackFrames() {
14751486
std::lock_guard<std::recursive_mutex> guard(m_frame_mutex);
14761487

14771488
GetUnwinder().Clear();
1489+
m_prev_framezero_pc.reset();
1490+
if (RegisterContextSP reg_ctx_sp = GetRegisterContext())
1491+
m_prev_framezero_pc = reg_ctx_sp->GetPC();
14781492

14791493
// Only store away the old "reference" StackFrameList if we got all its
14801494
// frames:
14811495
// FIXME: At some point we can try to splice in the frames we have fetched
1482-
// into
1483-
// the new frame as we make it, but let's not try that now.
1496+
// into the new frame as we make it, but let's not try that now.
14841497
if (m_curr_frames_sp && m_curr_frames_sp->GetAllFramesFetched())
14851498
m_prev_frames_sp.swap(m_curr_frames_sp);
14861499
m_curr_frames_sp.reset();

0 commit comments

Comments
 (0)