Skip to content

Commit c49d771

Browse files
Always remove disconnected peers with no channels
When a peer disconnects but still has channels, the peer's `peer_state` entry in the `per_peer_state` is not removed by the `peer_disconnected` function. If the channels of to that peer is later closed while still being disconnected (i.e. force closed), we therefore need to remove the peer from `peer_state` separately. To remove the peers separately, we push such peers to a separate HashSet that holds peers awaiting removal, and remove the peers on a timer to limit the negative effects on paralleism as much as possible.
1 parent a65b118 commit c49d771

File tree

1 file changed

+120
-17
lines changed

1 file changed

+120
-17
lines changed

lightning/src/ln/channelmanager.rs

Lines changed: 120 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,8 @@ pub(super) struct PeerState<Signer: Sign> {
466466
/// Messages to send to the peer - pushed to in the same lock that they are generated in (except
467467
/// for broadcast messages, where ordering isn't as strict).
468468
pub(super) pending_msg_events: Vec<MessageSendEvent>,
469+
/// Represents wether we're connected to the node or not.
470+
connected: bool,
469471
}
470472

471473
/// Stores a PaymentSecret and any other data we may need to validate an inbound payment is
@@ -595,6 +597,8 @@ pub type SimpleRefChannelManager<'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, M, T, F, L> = C
595597
// | |
596598
// | |__`best_block`
597599
// | |
600+
// | |__`pending_peers_awaiting_removal`
601+
// | |
598602
// | |__`pending_events`
599603
// | |
600604
// | |__`pending_background_events`
@@ -763,6 +767,16 @@ where
763767

764768
/// See `ChannelManager` struct-level documentation for lock order requirements.
765769
pending_events: Mutex<Vec<events::Event>>,
770+
/// When a peer disconnects but still has channels, the peer's `peer_state` entry in the
771+
/// `per_peer_state` is not removed by the `peer_disconnected` function. If the channels of
772+
/// to that peer is later closed while still being disconnected (i.e. force closed), we
773+
/// therefore need to remove the peer from `peer_state` separately.
774+
/// To avoid having to take the `per_peer_state` `write` lock once the channels are closed, we
775+
/// instead store such peers awaiting removal in this field, and remove them on a timer to
776+
/// limit the negative effects on parallelism as much as possible.
777+
///
778+
/// See `ChannelManager` struct-level documentation for lock order requirements.
779+
pending_peers_awaiting_removal: Mutex<HashSet<PublicKey>>,
766780
/// See `ChannelManager` struct-level documentation for lock order requirements.
767781
pending_background_events: Mutex<Vec<BackgroundEvent>>,
768782
/// Used when we have to take a BIG lock to make sure everything is self-consistent.
@@ -1293,10 +1307,11 @@ macro_rules! try_chan_entry {
12931307
}
12941308

12951309
macro_rules! remove_channel {
1296-
($self: expr, $entry: expr) => {
1310+
($self: expr, $entry: expr, $peer_state: expr) => {
12971311
{
12981312
let channel = $entry.remove_entry().1;
12991313
update_maps_on_chan_removal!($self, channel);
1314+
$self.add_pending_peer_to_be_removed(channel.get_counterparty_node_id(), $peer_state);
13001315
channel
13011316
}
13021317
}
@@ -1470,6 +1485,7 @@ where
14701485
per_peer_state: FairRwLock::new(HashMap::new()),
14711486

14721487
pending_events: Mutex::new(Vec::new()),
1488+
pending_peers_awaiting_removal: Mutex::new(HashSet::new()),
14731489
pending_background_events: Mutex::new(Vec::new()),
14741490
total_consistency_lock: RwLock::new(()),
14751491
persistence_notifier: Notifier::new(),
@@ -1708,7 +1724,7 @@ where
17081724
let (result, is_permanent) =
17091725
handle_monitor_update_res!(self, update_res, chan_entry.get_mut(), RAACommitmentOrder::CommitmentFirst, chan_entry.key(), NO_UPDATE);
17101726
if is_permanent {
1711-
remove_channel!(self, chan_entry);
1727+
remove_channel!(self, chan_entry, peer_state);
17121728
break result;
17131729
}
17141730
}
@@ -1719,7 +1735,7 @@ where
17191735
});
17201736

17211737
if chan_entry.get().is_shutdown() {
1722-
let channel = remove_channel!(self, chan_entry);
1738+
let channel = remove_channel!(self, chan_entry, peer_state);
17231739
if let Ok(channel_update) = self.get_channel_update_for_broadcast(&channel) {
17241740
peer_state.pending_msg_events.push(events::MessageSendEvent::BroadcastChannelUpdate {
17251741
msg: channel_update
@@ -1822,7 +1838,7 @@ where
18221838
} else {
18231839
self.issue_channel_close_events(chan.get(),ClosureReason::HolderForceClosed);
18241840
}
1825-
remove_channel!(self, chan)
1841+
remove_channel!(self, chan, peer_state)
18261842
} else {
18271843
return Err(APIError::ChannelUnavailable{ err: format!("Channel with id {} not found for the passed counterparty node_id {}", log_bytes!(*channel_id), peer_node_id) });
18281844
}
@@ -1861,6 +1877,13 @@ where
18611877
}
18621878
}
18631879

1880+
fn add_pending_peer_to_be_removed(&self, counterparty_node_id: PublicKey, peer_state: &mut PeerState<<SP::Target as SignerProvider>::Signer>) {
1881+
let peer_should_be_removed = !peer_state.connected && peer_state.channel_by_id.len() == 0;
1882+
if peer_should_be_removed {
1883+
self.pending_peers_awaiting_removal.lock().unwrap().insert(counterparty_node_id);
1884+
}
1885+
}
1886+
18641887
/// Force closes a channel, immediately broadcasting the latest local transaction(s) and
18651888
/// rejecting new HTLCs on the given channel. Fails if `channel_id` is unknown to
18661889
/// the manager, or if the `counterparty_node_id` isn't the counterparty of the corresponding
@@ -3278,6 +3301,34 @@ where
32783301
true
32793302
}
32803303

3304+
/// Removes peers which have been been added to `pending_peers_awaiting_removal` which are
3305+
/// still disconnected and we have no channels to.
3306+
///
3307+
/// Must be called without the `per_peer_state` lock acquired.
3308+
fn remove_peers_awaiting_removal(&self) {
3309+
let mut pending_peers_awaiting_removal = HashSet::new();
3310+
mem::swap(&mut *self.pending_peers_awaiting_removal.lock().unwrap(), &mut pending_peers_awaiting_removal);
3311+
if pending_peers_awaiting_removal.len() > 0 {
3312+
let mut per_peer_state = self.per_peer_state.write().unwrap();
3313+
for counterparty_node_id in pending_peers_awaiting_removal.drain() {
3314+
match per_peer_state.entry(counterparty_node_id) {
3315+
hash_map::Entry::Occupied(entry) => {
3316+
// Remove the entry if the peer is still disconnected and we still
3317+
// have no channels to the peer.
3318+
let remove_entry = {
3319+
let peer_state = entry.get().lock().unwrap();
3320+
!peer_state.connected && peer_state.channel_by_id.len() == 0
3321+
};
3322+
if remove_entry {
3323+
entry.remove_entry();
3324+
}
3325+
},
3326+
hash_map::Entry::Vacant(_) => { /* The PeerState has already been removed */ }
3327+
}
3328+
}
3329+
}
3330+
}
3331+
32813332
#[cfg(any(test, feature = "_test_utils"))]
32823333
/// Process background events, for functional testing
32833334
pub fn test_process_background_events(&self) {
@@ -3356,13 +3407,14 @@ where
33563407
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
33573408
let peer_state = &mut *peer_state_lock;
33583409
let pending_msg_events = &mut peer_state.pending_msg_events;
3410+
let counterparty_node_id = *counterparty_node_id;
33593411
peer_state.channel_by_id.retain(|chan_id, chan| {
33603412
let chan_needs_persist = self.update_channel_fee(chan_id, chan, new_feerate);
33613413
if chan_needs_persist == NotifyOption::DoPersist { should_persist = NotifyOption::DoPersist; }
33623414

33633415
if let Err(e) = chan.timer_check_closing_negotiation_progress() {
33643416
let (needs_close, err) = convert_chan_err!(self, e, chan, chan_id);
3365-
handle_errors.push((Err(err), *counterparty_node_id));
3417+
handle_errors.push((Err(err), counterparty_node_id));
33663418
if needs_close { return false; }
33673419
}
33683420

@@ -3396,8 +3448,10 @@ where
33963448

33973449
true
33983450
});
3451+
self.add_pending_peer_to_be_removed(counterparty_node_id, peer_state);
33993452
}
34003453
}
3454+
self.remove_peers_awaiting_removal();
34013455

34023456
self.claimable_payments.lock().unwrap().claimable_htlcs.retain(|payment_hash, (_, htlcs)| {
34033457
if htlcs.is_empty() {
@@ -4133,7 +4187,7 @@ where
41334187
}
41344188
};
41354189
peer_state.pending_msg_events.push(send_msg_err_event);
4136-
let _ = remove_channel!(self, channel);
4190+
let _ = remove_channel!(self, channel, peer_state);
41374191
return Err(APIError::APIMisuseError { err: "Please use accept_inbound_channel_from_trusted_peer_0conf to accept channels with zero confirmations.".to_owned() });
41384192
}
41394193

@@ -4419,7 +4473,7 @@ where
44194473
let (result, is_permanent) =
44204474
handle_monitor_update_res!(self, update_res, chan_entry.get_mut(), RAACommitmentOrder::CommitmentFirst, chan_entry.key(), NO_UPDATE);
44214475
if is_permanent {
4422-
remove_channel!(self, chan_entry);
4476+
remove_channel!(self, chan_entry, peer_state);
44234477
break result;
44244478
}
44254479
}
@@ -4468,7 +4522,7 @@ where
44684522
// also implies there are no pending HTLCs left on the channel, so we can
44694523
// fully delete it from tracking (the channel monitor is still around to
44704524
// watch for old state broadcasts)!
4471-
(tx, Some(remove_channel!(self, chan_entry)))
4525+
(tx, Some(remove_channel!(self, chan_entry, peer_state)))
44724526
} else { (tx, None) }
44734527
},
44744528
hash_map::Entry::Vacant(_) => return Err(MsgHandleErrInternal::send_err_msg_no_close(format!("Got a message for a channel from the wrong node! No such channel for the passed counterparty_node_id {}", counterparty_node_id), msg.channel_id))
@@ -4971,12 +5025,11 @@ where
49715025
if let Some(peer_state_mutex) = per_peer_state.get(&counterparty_node_id) {
49725026
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
49735027
let peer_state = &mut *peer_state_lock;
4974-
let pending_msg_events = &mut peer_state.pending_msg_events;
49755028
if let hash_map::Entry::Occupied(chan_entry) = peer_state.channel_by_id.entry(funding_outpoint.to_channel_id()) {
4976-
let mut chan = remove_channel!(self, chan_entry);
5029+
let mut chan = remove_channel!(self, chan_entry, peer_state);
49775030
failed_channels.push(chan.force_shutdown(false));
49785031
if let Ok(update) = self.get_channel_update_for_broadcast(&chan) {
4979-
pending_msg_events.push(events::MessageSendEvent::BroadcastChannelUpdate {
5032+
peer_state.pending_msg_events.push(events::MessageSendEvent::BroadcastChannelUpdate {
49805033
msg: update
49815034
});
49825035
}
@@ -4986,7 +5039,7 @@ where
49865039
ClosureReason::CommitmentTxConfirmed
49875040
};
49885041
self.issue_channel_close_events(&chan, reason);
4989-
pending_msg_events.push(events::MessageSendEvent::HandleError {
5042+
peer_state.pending_msg_events.push(events::MessageSendEvent::HandleError {
49905043
node_id: chan.get_counterparty_node_id(),
49915044
action: msgs::ErrorAction::SendErrorMessage {
49925045
msg: msgs::ErrorMessage { channel_id: chan.channel_id(), data: "Channel force-closed".to_owned() }
@@ -5028,7 +5081,7 @@ where
50285081
{
50295082
let per_peer_state = self.per_peer_state.read().unwrap();
50305083

5031-
for (_cp_id, peer_state_mutex) in per_peer_state.iter() {
5084+
for (cp_id, peer_state_mutex) in per_peer_state.iter() {
50325085
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
50335086
let peer_state = &mut *peer_state_lock;
50345087
let pending_msg_events = &mut peer_state.pending_msg_events;
@@ -5068,6 +5121,7 @@ where
50685121
}
50695122
}
50705123
});
5124+
self.add_pending_peer_to_be_removed(*cp_id, peer_state);
50715125
}
50725126
}
50735127

@@ -5092,7 +5146,7 @@ where
50925146
{
50935147
let per_peer_state = self.per_peer_state.read().unwrap();
50945148

5095-
for (_cp_id, peer_state_mutex) in per_peer_state.iter() {
5149+
for (cp_id, peer_state_mutex) in per_peer_state.iter() {
50965150
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
50975151
let peer_state = &mut *peer_state_lock;
50985152
let pending_msg_events = &mut peer_state.pending_msg_events;
@@ -5130,6 +5184,7 @@ where
51305184
}
51315185
}
51325186
});
5187+
self.add_pending_peer_to_be_removed(*cp_id, peer_state);
51335188
}
51345189
}
51355190

@@ -5693,7 +5748,7 @@ where
56935748
let mut timed_out_htlcs = Vec::new();
56945749
{
56955750
let per_peer_state = self.per_peer_state.read().unwrap();
5696-
for (_cp_id, peer_state_mutex) in per_peer_state.iter() {
5751+
for (cp_id, peer_state_mutex) in per_peer_state.iter() {
56975752
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
56985753
let peer_state = &mut *peer_state_lock;
56995754
let pending_msg_events = &mut peer_state.pending_msg_events;
@@ -5777,6 +5832,7 @@ where
57775832
}
57785833
true
57795834
});
5835+
self.add_pending_peer_to_be_removed(*cp_id, peer_state);
57805836
}
57815837
}
57825838

@@ -6023,6 +6079,7 @@ where
60236079
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
60246080
let peer_state = &mut *peer_state_lock;
60256081
let pending_msg_events = &mut peer_state.pending_msg_events;
6082+
peer_state.connected = false;
60266083
peer_state.channel_by_id.retain(|_, chan| {
60276084
chan.remove_uncommitted_htlcs_and_mark_paused(&self.logger);
60286085
if chan.is_shutdown() {
@@ -6088,17 +6145,20 @@ where
60886145
channel_by_id: HashMap::new(),
60896146
latest_features: init_msg.features.clone(),
60906147
pending_msg_events: Vec::new(),
6148+
connected: true,
60916149
}));
60926150
},
60936151
hash_map::Entry::Occupied(e) => {
6094-
e.get().lock().unwrap().latest_features = init_msg.features.clone();
6152+
let mut peer_state = e.get().lock().unwrap();
6153+
peer_state.latest_features = init_msg.features.clone();
6154+
peer_state.connected = true;
60956155
},
60966156
}
60976157
}
60986158

60996159
let per_peer_state = self.per_peer_state.read().unwrap();
61006160

6101-
for (_cp_id, peer_state_mutex) in per_peer_state.iter() {
6161+
for (cp_id, peer_state_mutex) in per_peer_state.iter() {
61026162
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
61036163
let peer_state = &mut *peer_state_lock;
61046164
let pending_msg_events = &mut peer_state.pending_msg_events;
@@ -6130,6 +6190,7 @@ where
61306190
}
61316191
retain
61326192
});
6193+
self.add_pending_peer_to_be_removed(*cp_id, peer_state);
61336194
}
61346195
//TODO: Also re-broadcast announcement_signatures
61356196
Ok(())
@@ -6637,6 +6698,8 @@ where
66376698

66386699
write_ver_prefix!(writer, SERIALIZATION_VERSION, MIN_SERIALIZATION_VERSION);
66396700

6701+
self.remove_peers_awaiting_removal();
6702+
66406703
self.genesis_hash.write(writer)?;
66416704
{
66426705
let best_block = self.best_block.read().unwrap();
@@ -7102,6 +7165,7 @@ where
71027165
channel_by_id: peer_channels.remove(&peer_pubkey).unwrap_or(HashMap::new()),
71037166
latest_features: Readable::read(reader)?,
71047167
pending_msg_events: Vec::new(),
7168+
connected: false,
71057169
};
71067170
per_peer_state.insert(peer_pubkey, Mutex::new(peer_state));
71077171
}
@@ -7457,6 +7521,7 @@ where
74577521
per_peer_state: FairRwLock::new(per_peer_state),
74587522

74597523
pending_events: Mutex::new(pending_events_read),
7524+
pending_peers_awaiting_removal: Mutex::new(HashSet::new()),
74607525
pending_background_events: Mutex::new(pending_background_events_read),
74617526
total_consistency_lock: RwLock::new(()),
74627527
persistence_notifier: Notifier::new(),
@@ -7924,6 +7989,44 @@ mod tests {
79247989
}
79257990
}
79267991

7992+
#[test]
7993+
fn test_drop_disconnected_peers_when_removing_channels() {
7994+
let chanmon_cfgs = create_chanmon_cfgs(2);
7995+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
7996+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
7997+
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
7998+
7999+
let chan = create_announced_chan_between_nodes(&nodes, 0, 1);
8000+
8001+
nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
8002+
nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
8003+
8004+
nodes[0].node.force_close_broadcasting_latest_txn(&chan.2, &nodes[1].node.get_our_node_id()).unwrap();
8005+
check_closed_broadcast!(nodes[0], true);
8006+
check_added_monitors!(nodes[0], 1);
8007+
check_closed_event!(nodes[0], 1, ClosureReason::HolderForceClosed);
8008+
8009+
{
8010+
// Assert that nodes[1] is awaiting removal for nodes[0] once nodes[1] has been
8011+
// disconnected and the channel between has been force closed.
8012+
let nodes_0_per_peer_state = nodes[0].node.per_peer_state.read().unwrap();
8013+
let nodes_0_pending_peers_awaiting_removal = nodes[0].node.pending_peers_awaiting_removal.lock().unwrap();
8014+
assert_eq!(nodes_0_pending_peers_awaiting_removal.len(), 1);
8015+
assert!(nodes_0_pending_peers_awaiting_removal.get(&nodes[1].node.get_our_node_id()).is_some());
8016+
// Assert that nodes[1] isn't removed before `timer_tick_occurred` has been executed.
8017+
assert_eq!(nodes_0_per_peer_state.len(), 1);
8018+
assert!(nodes_0_per_peer_state.get(&nodes[1].node.get_our_node_id()).is_some());
8019+
}
8020+
8021+
nodes[0].node.timer_tick_occurred();
8022+
8023+
{
8024+
// Assert that nodes[1] has now been removed.
8025+
assert_eq!(nodes[0].node.per_peer_state.read().unwrap().len(), 0);
8026+
assert_eq!(nodes[0].node.pending_peers_awaiting_removal.lock().unwrap().len(), 0);
8027+
}
8028+
}
8029+
79278030
#[test]
79288031
fn bad_inbound_payment_hash() {
79298032
// Add coverage for checking that a user-provided payment hash matches the payment secret.

0 commit comments

Comments
 (0)