Skip to content

Commit 87bd470

Browse files
committed
Avoid disconnecting all peers if user code is slow
In the sample client (and likely other downstream users), event processing may block on slow operations (e.g. Bitcoin Core RPCs) and ChannelManager persistence may take some time. This should be fine, except that we consider this a case of possible backgrounding and disconnect all of our peers when it happens. Instead, we here avoid considering event processing time in the time between PeerManager events.
1 parent d741fb1 commit 87bd470

File tree

1 file changed

+17
-2
lines changed
  • lightning-background-processor/src

1 file changed

+17
-2
lines changed

lightning-background-processor/src/lib.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,16 +218,24 @@ impl BackgroundProcessor {
218218
let mut last_prune_call = Instant::now();
219219
let mut have_pruned = false;
220220

221+
// When considering how long its taken since the last timer tick, we don't want to
222+
// count any time spent in user code, especially since event processing can block on
223+
// disk writes. Thus, we track how long we spent in event handling here.
224+
let mut ev_handle_time_since_last_ping = Duration::from_millis(0);
221225
loop {
222-
peer_manager.process_events();
226+
let ev_handle_start = Instant::now();
227+
peer_manager.process_events(); // Note that this may block on ChannelManager's locking
223228
channel_manager.process_pending_events(&event_handler);
224229
chain_monitor.process_pending_events(&event_handler);
230+
ev_handle_time_since_last_ping += ev_handle_start.elapsed();
225231
let updates_available =
226232
channel_manager.await_persistable_update_timeout(Duration::from_millis(100));
227233
if updates_available {
234+
let persist_start = Instant::now();
228235
log_trace!(logger, "Persisting ChannelManager...");
229236
persister.persist_manager(&*channel_manager)?;
230237
log_trace!(logger, "Done persisting ChannelManager.");
238+
ev_handle_time_since_last_ping += Instant::now() - persist_start;
231239
}
232240
// Exit the loop if the background processor was requested to stop.
233241
if stop_thread.load(Ordering::Acquire) == true {
@@ -239,21 +247,28 @@ impl BackgroundProcessor {
239247
channel_manager.timer_tick_occurred();
240248
last_freshness_call = Instant::now();
241249
}
242-
if last_ping_call.elapsed().as_secs() > PING_TIMER * 2 {
250+
if (last_ping_call.elapsed() - ev_handle_time_since_last_ping).as_secs() > PING_TIMER * 2 {
243251
// On various platforms, we may be starved of CPU cycles for several reasons.
244252
// E.g. on iOS, if we've been in the background, we will be entirely paused.
245253
// Similarly, if we're on a desktop platform and the device has been asleep, we
246254
// may not get any cycles.
247255
// In any case, if we've been entirely paused for more than double our ping
248256
// timer, we should have disconnected all sockets by now (and they're probably
249257
// dead anyway), so disconnect them by calling `timer_tick_occurred()` twice.
258+
// Note that we have to take care to not get here just because user event
259+
// processing was slow at the top of the loop. For example, the sample client
260+
// may call Bitcoin Core RPCs during event handling, which very often takes
261+
// more than a handful of seconds to complete, and shouldn't disconnect all our
262+
// peers.
250263
log_trace!(logger, "Awoke after more than double our ping timer, disconnecting peers.");
251264
peer_manager.disconnect_all_peers();
252265
last_ping_call = Instant::now();
266+
ev_handle_time_since_last_ping = Duration::from_millis(0);
253267
} else if last_ping_call.elapsed().as_secs() > PING_TIMER {
254268
log_trace!(logger, "Calling PeerManager's timer_tick_occurred");
255269
peer_manager.timer_tick_occurred();
256270
last_ping_call = Instant::now();
271+
ev_handle_time_since_last_ping = Duration::from_millis(0);
257272
}
258273

259274
// Note that we want to run a graph prune once not long after startup before

0 commit comments

Comments
 (0)