Skip to content

Commit b1bdba5

Browse files
Merge pull request #1269 from TheBlueMatt/022-01-no-disconnect-on-slow-persist
Avoid disconnecting all peers if user code is slow
2 parents d741fb1 + 2d3a210 commit b1bdba5

File tree

1 file changed

+18
-7
lines changed
  • lightning-background-processor/src

1 file changed

+18
-7
lines changed

lightning-background-processor/src/lib.rs

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ const FRESHNESS_TIMER: u64 = 60;
6161
const FRESHNESS_TIMER: u64 = 1;
6262

6363
#[cfg(all(not(test), not(debug_assertions)))]
64-
const PING_TIMER: u64 = 5;
64+
const PING_TIMER: u64 = 10;
6565
/// Signature operations take a lot longer without compiler optimisations.
6666
/// Increasing the ping timer allows for this but slower devices will be disconnected if the
6767
/// timeout is reached.
@@ -219,11 +219,17 @@ impl BackgroundProcessor {
219219
let mut have_pruned = false;
220220

221221
loop {
222-
peer_manager.process_events();
222+
peer_manager.process_events(); // Note that this may block on ChannelManager's locking
223223
channel_manager.process_pending_events(&event_handler);
224224
chain_monitor.process_pending_events(&event_handler);
225+
226+
// We wait up to 100ms, but track how long it takes to detect being put to sleep,
227+
// see `await_start`'s use below.
228+
let await_start = Instant::now();
225229
let updates_available =
226230
channel_manager.await_persistable_update_timeout(Duration::from_millis(100));
231+
let await_time = await_start.elapsed();
232+
227233
if updates_available {
228234
log_trace!(logger, "Persisting ChannelManager...");
229235
persister.persist_manager(&*channel_manager)?;
@@ -239,15 +245,20 @@ impl BackgroundProcessor {
239245
channel_manager.timer_tick_occurred();
240246
last_freshness_call = Instant::now();
241247
}
242-
if last_ping_call.elapsed().as_secs() > PING_TIMER * 2 {
248+
if await_time > Duration::from_secs(1) {
243249
// On various platforms, we may be starved of CPU cycles for several reasons.
244250
// E.g. on iOS, if we've been in the background, we will be entirely paused.
245251
// Similarly, if we're on a desktop platform and the device has been asleep, we
246252
// may not get any cycles.
247-
// In any case, if we've been entirely paused for more than double our ping
248-
// timer, we should have disconnected all sockets by now (and they're probably
249-
// dead anyway), so disconnect them by calling `timer_tick_occurred()` twice.
250-
log_trace!(logger, "Awoke after more than double our ping timer, disconnecting peers.");
253+
// We detect this by checking if our max-100ms-sleep, above, ran longer than a
254+
// full second, at which point we assume sockets may have been killed (they
255+
// appear to be at least on some platforms, even if it has only been a second).
256+
// Note that we have to take care to not get here just because user event
257+
// processing was slow at the top of the loop. For example, the sample client
258+
// may call Bitcoin Core RPCs during event handling, which very often takes
259+
// more than a handful of seconds to complete, and shouldn't disconnect all our
260+
// peers.
261+
log_trace!(logger, "100ms sleep took more than a second, disconnecting peers.");
251262
peer_manager.disconnect_all_peers();
252263
last_ping_call = Instant::now();
253264
} else if last_ping_call.elapsed().as_secs() > PING_TIMER {

0 commit comments

Comments
 (0)