Skip to content

Commit a44c46a

Browse files
committed
Free holding cell on monitor-updating-restored when there's no upd
If there is no pending channel update messages when monitor updating is restored (though there may be an RAA to send), and we're connected to our peer and not awaiting a remote RAA, we need to free anything in our holding cell. However, we don't want to immediately free the holding cell during channel_monitor_updated as it presents a somewhat bug-prone case of reentrancy: a) it would re-enter user code around a monitor update while being called from user code notifying us of the same monitor being updated, making deadlocs very likely (in fact, our fuzzers would have a bug here!), b) the re-entrancy only occurs in a very rare case, making it likely users will not hit it in testing, only deadlocking in production. Thus, we add a holding-cell-free pass over each channel in get_and_clear_pending_msg_events. This fits up nicely with the anticipated bug - users almost certainly need to process new network messages immediately after monitor updating has been restored to send messages which were not sent originally when the monitor updating was paused. Without this, chanmon_fail_consistency was able to find a stuck condition where we sit on an HTLC failure in our holding cell and don't ever handle it (at least until we have other actions to take which empty the holding cell).
1 parent 513c9dd commit a44c46a

File tree

2 files changed

+75
-12
lines changed

2 files changed

+75
-12
lines changed

lightning/src/ln/channel.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2300,6 +2300,13 @@ impl<Signer: Sign> Channel<Signer> {
23002300
}, commitment_signed, closing_signed, monitor_update))
23012301
}
23022302

2303+
/// Public version of the below, checking relevant preconditions first
2304+
pub fn maybe_free_holding_cell_htlcs<L: Deref>(&mut self, logger: &L) -> Result<(Option<(msgs::CommitmentUpdate, ChannelMonitorUpdate)>, Vec<(HTLCSource, PaymentHash)>), ChannelError> where L::Target: Logger {
2305+
if self.channel_state >= ChannelState::ChannelFunded as u32 &&
2306+
(self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::PeerDisconnected as u32 | ChannelState::MonitorUpdateFailed as u32)) == 0 {
2307+
self.free_holding_cell_htlcs(logger)
2308+
} else { Ok((None, Vec::new())) }
2309+
}
23032310
/// Used to fulfill holding_cell_htlcs when we get a remote ack (or implicitly get it by them
23042311
/// fulfilling or failing the last pending HTLC)
23052312
fn free_holding_cell_htlcs<L: Deref>(&mut self, logger: &L) -> Result<(Option<(msgs::CommitmentUpdate, ChannelMonitorUpdate)>, Vec<(HTLCSource, PaymentHash)>), ChannelError> where L::Target: Logger {
@@ -2350,7 +2357,9 @@ impl<Signer: Sign> Channel<Signer> {
23502357
&HTLCUpdateAwaitingACK::ClaimHTLC { ref payment_preimage, htlc_id, .. } => {
23512358
match self.get_update_fulfill_htlc(htlc_id, *payment_preimage, logger) {
23522359
Ok((update_fulfill_msg_option, additional_monitor_update_opt)) => {
2353-
update_fulfill_htlcs.push(update_fulfill_msg_option.unwrap());
2360+
if let Some(update_fulfill_msg) = update_fulfill_msg_option { //WTF XXX
2361+
update_fulfill_htlcs.push(update_fulfill_msg);
2362+
}
23542363
if let Some(mut additional_monitor_update) = additional_monitor_update_opt {
23552364
monitor_update.updates.append(&mut additional_monitor_update.updates);
23562365
}

lightning/src/ln/channelmanager.rs

Lines changed: 65 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -726,13 +726,12 @@ macro_rules! handle_monitor_err {
726726
($self: ident, $err: expr, $channel_state: expr, $entry: expr, $action_type: path, $resend_raa: expr, $resend_commitment: expr) => {
727727
handle_monitor_err!($self, $err, $channel_state, $entry, $action_type, $resend_raa, $resend_commitment, Vec::new(), Vec::new())
728728
};
729-
($self: ident, $err: expr, $channel_state: expr, $entry: expr, $action_type: path, $resend_raa: expr, $resend_commitment: expr, $failed_forwards: expr, $failed_fails: expr) => {
729+
($self: ident, $err: expr, $short_to_id: expr, $chan: expr, $action_type: path, $resend_raa: expr, $resend_commitment: expr, $failed_forwards: expr, $failed_fails: expr, $chan_id: expr) => {
730730
match $err {
731731
ChannelMonitorUpdateErr::PermanentFailure => {
732-
log_error!($self.logger, "Closing channel {} due to monitor update PermanentFailure", log_bytes!($entry.key()[..]));
733-
let (channel_id, mut chan) = $entry.remove_entry();
734-
if let Some(short_id) = chan.get_short_channel_id() {
735-
$channel_state.short_to_id.remove(&short_id);
732+
log_error!($self.logger, "Closing channel {} due to monitor update PermanentFailure", log_bytes!($chan_id[..]));
733+
if let Some(short_id) = $chan.get_short_channel_id() {
734+
$short_to_id.remove(&short_id);
736735
}
737736
// TODO: $failed_fails is dropped here, which will cause other channels to hit the
738737
// chain in a confused state! We need to move them into the ChannelMonitor which
@@ -743,12 +742,12 @@ macro_rules! handle_monitor_err {
743742
// splitting hairs we'd prefer to claim payments that were to us, but we haven't
744743
// given up the preimage yet, so might as well just wait until the payment is
745744
// retried, avoiding the on-chain fees.
746-
let res: Result<(), _> = Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure".to_owned(), channel_id, chan.force_shutdown(true), $self.get_channel_update(&chan).ok()));
747-
res
745+
let res: Result<(), _> = Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure".to_owned(), *$chan_id, $chan.force_shutdown(true), $self.get_channel_update(&$chan).ok()));
746+
(res, true)
748747
},
749748
ChannelMonitorUpdateErr::TemporaryFailure => {
750749
log_info!($self.logger, "Disabling channel {} due to monitor update TemporaryFailure. On restore will send {} and process {} forwards and {} fails",
751-
log_bytes!($entry.key()[..]),
750+
log_bytes!($chan_id[..]),
752751
if $resend_commitment && $resend_raa {
753752
match $action_type {
754753
RAACommitmentOrder::CommitmentFirst => { "commitment then RAA" },
@@ -765,11 +764,18 @@ macro_rules! handle_monitor_err {
765764
if !$resend_raa {
766765
debug_assert!($action_type == RAACommitmentOrder::CommitmentFirst || !$resend_commitment);
767766
}
768-
$entry.get_mut().monitor_update_failed($resend_raa, $resend_commitment, $failed_forwards, $failed_fails);
769-
Err(MsgHandleErrInternal::from_chan_no_close(ChannelError::Ignore("Failed to update ChannelMonitor".to_owned()), *$entry.key()))
767+
$chan.monitor_update_failed($resend_raa, $resend_commitment, $failed_forwards, $failed_fails);
768+
(Err(MsgHandleErrInternal::from_chan_no_close(ChannelError::Ignore("Failed to update ChannelMonitor".to_owned()), *$chan_id)), false)
770769
},
771770
}
772-
}
771+
};
772+
($self: ident, $err: expr, $channel_state: expr, $entry: expr, $action_type: path, $resend_raa: expr, $resend_commitment: expr, $failed_forwards: expr, $failed_fails: expr) => { {
773+
let (res, drop) = handle_monitor_err!($self, $err, $channel_state.short_to_id, $entry.get_mut(), $action_type, $resend_raa, $resend_commitment, $failed_forwards, $failed_fails, $entry.key());
774+
if drop {
775+
$entry.remove_entry();
776+
}
777+
res
778+
} };
773779
}
774780

775781
macro_rules! return_monitor_err {
@@ -3224,6 +3230,52 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
32243230
}
32253231
}
32263232

3233+
/// Process pending events from the `chain::Watch`.
3234+
fn check_free_holding_cells(&self) {
3235+
let mut forwarding_failed_htlcs = Vec::new();
3236+
let mut handle_errors = Vec::new();
3237+
{
3238+
let mut channel_state_lock = self.channel_state.lock().unwrap();
3239+
let channel_state = &mut *channel_state_lock;
3240+
let by_id = &mut channel_state.by_id;
3241+
let short_to_id = &mut channel_state.short_to_id;
3242+
let pending_msg_events = &mut channel_state.pending_msg_events;
3243+
3244+
by_id.retain(|channel_id, chan| {
3245+
match chan.maybe_free_holding_cell_htlcs(&self.logger) {
3246+
Ok((None, ref htlcs)) if htlcs.is_empty() => true,
3247+
Ok((commitment_opt, failed_htlcs)) => {
3248+
forwarding_failed_htlcs.push((failed_htlcs, *channel_id));
3249+
if let Some((commitment_update, monitor_update)) = commitment_opt {
3250+
if let Err(e) = self.chain_monitor.update_channel(chan.get_funding_txo().unwrap(), monitor_update) {
3251+
let (res, drop) = handle_monitor_err!(self, e, short_to_id, chan, RAACommitmentOrder::CommitmentFirst, false, true, Vec::new(), Vec::new(), channel_id);
3252+
handle_errors.push((chan.get_counterparty_node_id(), res));
3253+
if drop { return false; }
3254+
}
3255+
pending_msg_events.push(events::MessageSendEvent::UpdateHTLCs {
3256+
node_id: chan.get_counterparty_node_id(),
3257+
updates: commitment_update,
3258+
});
3259+
}
3260+
true
3261+
},
3262+
Err(e) => {
3263+
let (drop, res) = convert_chan_err!(self, e, short_to_id, chan, channel_id);
3264+
handle_errors.push((chan.get_counterparty_node_id(), Err(res)));
3265+
!drop
3266+
}
3267+
}
3268+
});
3269+
}
3270+
for (failures, channel_id) in forwarding_failed_htlcs.drain(..) {
3271+
self.fail_holding_cell_htlcs(failures, channel_id);
3272+
}
3273+
3274+
for (counterparty_node_id, err) in handle_errors.drain(..) {
3275+
let _ = handle_error!(self, err, counterparty_node_id);
3276+
}
3277+
}
3278+
32273279
/// Handle a list of channel failures during a block_connected or block_disconnected call,
32283280
/// pushing the channel monitor update (if any) to the background events queue and removing the
32293281
/// Channel object.
@@ -3260,6 +3312,8 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> MessageSend
32603312
// ChannelMonitors when clearing other events.
32613313
self.process_pending_monitor_events();
32623314

3315+
self.check_free_holding_cells();
3316+
32633317
let mut ret = Vec::new();
32643318
let mut channel_state = self.channel_state.lock().unwrap();
32653319
mem::swap(&mut ret, &mut channel_state.pending_msg_events);

0 commit comments

Comments
 (0)