Skip to content

Commit eb0b664

Browse files
committed
Free holding cell on monitor-updating-restored when there's no upd
If there is no pending channel update messages when monitor updating is restored (though there may be an RAA to send), and we're connected to our peer and not awaiting a remote RAA, we need to free anything in our holding cell. Without this, chanmon_fail_consistency was able to find a stuck condition where we sit on an HTLC failure in our holding cell and don't ever handle it (at least until we have other actions to take which empty the holding cell). Still, this approach sucks - it introduces reentrancy in a particularly dangerous form: a) we re-enter user code around monitor updates while being called from user code around monitor updates, making deadlocks very likely (in fact, our current tests have a bug here!), b) the re-entrancy only occurs in a very rare case, making it likely users will not hit it in testing, only deadlocking in production. I'm not entirely sure what the alternative is, however - we could move to a world where we poll for holding cell events that can be freed on our 1-minute-timer, but we still have a super rare reentrancy case, just in timer_chan_freshness_every_min() instead.
1 parent 6170828 commit eb0b664

File tree

2 files changed

+55
-17
lines changed

2 files changed

+55
-17
lines changed

lightning/src/ln/channel.rs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2756,7 +2756,10 @@ impl<Signer: Sign> Channel<Signer> {
27562756
/// Indicates that the latest ChannelMonitor update has been committed by the client
27572757
/// successfully and we should restore normal operation. Returns messages which should be sent
27582758
/// to the remote side.
2759-
pub fn monitor_updating_restored<L: Deref>(&mut self, logger: &L) -> (Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, RAACommitmentOrder, Vec<(PendingHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, bool, Option<msgs::FundingLocked>) where L::Target: Logger {
2759+
pub fn monitor_updating_restored<L: Deref>(&mut self, logger: &L) -> (
2760+
Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, RAACommitmentOrder, Option<ChannelMonitorUpdate>,
2761+
Vec<(PendingHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, Vec<(HTLCSource, PaymentHash)>,
2762+
bool, Option<msgs::FundingLocked>) where L::Target: Logger {
27602763
assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, ChannelState::MonitorUpdateFailed as u32);
27612764
self.channel_state &= !(ChannelState::MonitorUpdateFailed as u32);
27622765

@@ -2786,25 +2789,39 @@ impl<Signer: Sign> Channel<Signer> {
27862789
if self.channel_state & (ChannelState::PeerDisconnected as u32) != 0 {
27872790
self.monitor_pending_revoke_and_ack = false;
27882791
self.monitor_pending_commitment_signed = false;
2789-
return (None, None, RAACommitmentOrder::RevokeAndACKFirst, forwards, failures, needs_broadcast_safe, funding_locked);
2792+
return (None, None, RAACommitmentOrder::RevokeAndACKFirst, None, forwards, failures, Vec::new(), needs_broadcast_safe, funding_locked);
27902793
}
27912794

27922795
let raa = if self.monitor_pending_revoke_and_ack {
27932796
Some(self.get_last_revoke_and_ack())
27942797
} else { None };
2795-
let commitment_update = if self.monitor_pending_commitment_signed {
2798+
let mut commitment_update = if self.monitor_pending_commitment_signed {
27962799
Some(self.get_last_commitment_update(logger))
27972800
} else { None };
27982801

2802+
let mut order = self.resend_order.clone();
27992803
self.monitor_pending_revoke_and_ack = false;
28002804
self.monitor_pending_commitment_signed = false;
2801-
let order = self.resend_order.clone();
2805+
2806+
let mut htlcs_failed_to_forward = Vec::new();
2807+
let mut chanmon_update = None;
2808+
if commitment_update.is_none() && self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32) == 0 {
2809+
order = RAACommitmentOrder::RevokeAndACKFirst;
2810+
2811+
let (update_opt, mut failed_htlcs) = self.free_holding_cell_htlcs(logger).unwrap();
2812+
htlcs_failed_to_forward.append(&mut failed_htlcs);
2813+
if let Some((com_update, mon_update)) = update_opt {
2814+
commitment_update = Some(com_update);
2815+
chanmon_update = Some(mon_update);
2816+
}
2817+
}
2818+
28022819
log_trace!(logger, "Restored monitor updating resulting in {}{} commitment update and {} RAA, with {} first",
28032820
if needs_broadcast_safe { "a funding broadcast safe, " } else { "" },
28042821
if commitment_update.is_some() { "a" } else { "no" },
28052822
if raa.is_some() { "an" } else { "no" },
28062823
match order { RAACommitmentOrder::CommitmentFirst => "commitment", RAACommitmentOrder::RevokeAndACKFirst => "RAA"});
2807-
(raa, commitment_update, order, forwards, failures, needs_broadcast_safe, funding_locked)
2824+
(raa, commitment_update, order, chanmon_update, forwards, failures, htlcs_failed_to_forward, needs_broadcast_safe, funding_locked)
28082825
}
28092826

28102827
pub fn update_fee<F: Deref>(&mut self, fee_estimator: &F, msg: &msgs::UpdateFee) -> Result<(), ChannelError>

lightning/src/ln/channelmanager.rs

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -745,20 +745,30 @@ macro_rules! maybe_break_monitor_err {
745745
}
746746

747747
macro_rules! handle_chan_restoration_locked {
748-
($self: expr, $channel_lock: expr, $channel_state: expr, $channel_entry: expr,
749-
$raa: expr, $commitment_update: expr, $order: expr,
748+
($self: ident, $channel_lock: expr, $channel_state: expr, $channel_entry: expr,
749+
$raa: expr, $commitment_update: expr, $order: expr, $chanmon_update: expr,
750750
$pending_forwards: expr, $broadcast_safe: expr, $funding_locked: expr) => { {
751751
let mut htlc_forwards = None;
752752
let mut funding_broadcast_safe = None;
753753
let counterparty_node_id = $channel_entry.get().get_counterparty_node_id();
754+
let channel_id = $channel_entry.get().channel_id();
754755

755-
{
756+
let res = loop {
756757
if !$pending_forwards.is_empty() {
757758
htlc_forwards = Some(($channel_entry.get().get_short_channel_id().expect("We can't have pending forwards before funding confirmation"),
758759
$channel_entry.get().get_funding_txo().unwrap(), $pending_forwards));
759760
}
760761

761762
macro_rules! handle_cs { () => {
763+
if let Some(monitor_update) = $chanmon_update {
764+
assert!($order == RAACommitmentOrder::RevokeAndACKFirst);
765+
assert!(!$broadcast_safe);
766+
assert!($funding_locked.is_none());
767+
assert!($commitment_update.is_some());
768+
if let Err(e) = $self.chain_monitor.update_channel($channel_entry.get().get_funding_txo().unwrap(), monitor_update) {
769+
break handle_monitor_err!($self, e, $channel_state, $channel_entry, RAACommitmentOrder::CommitmentFirst, false, true);
770+
}
771+
}
762772
if let Some(update) = $commitment_update {
763773
$channel_state.pending_msg_events.push(events::MessageSendEvent::UpdateHTLCs {
764774
node_id: counterparty_node_id,
@@ -801,21 +811,26 @@ macro_rules! handle_chan_restoration_locked {
801811
msg: announcement_sigs,
802812
});
803813
}
804-
$channel_state.short_to_id.insert($channel_entry.get().get_short_channel_id().unwrap(), $channel_entry.get().channel_id());
814+
$channel_state.short_to_id.insert($channel_entry.get().get_short_channel_id().unwrap(), channel_id);
805815
}
806-
}
807-
(htlc_forwards, funding_broadcast_safe)
816+
break Ok(());
817+
};
818+
819+
(htlc_forwards, funding_broadcast_safe, res, channel_id, counterparty_node_id)
808820
} }
809821
}
810822

811823
macro_rules! post_handle_chan_restoration {
812-
($self: expr, $locked_res: expr, $pending_failures: expr) => { {
813-
let (htlc_forwards, funding_broadcast_safe) = $locked_res;
824+
($self: ident, $locked_res: expr, $pending_failures: expr, $forwarding_failures: expr) => { {
825+
let (htlc_forwards, funding_broadcast_safe, res, channel_id, counterparty_node_id) = $locked_res;
826+
827+
let _ = handle_error!($self, res, counterparty_node_id);
814828

815829
if let Some(ev) = funding_broadcast_safe {
816830
$self.pending_events.lock().unwrap().push(ev);
817831
}
818832

833+
$self.fail_holding_cell_htlcs($forwarding_failures, channel_id);
819834
for failure in $pending_failures.drain(..) {
820835
$self.fail_htlc_backwards_internal($self.channel_state.lock().unwrap(), failure.0, &failure.1, failure.2);
821836
}
@@ -2332,6 +2347,12 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
23322347
/// ChannelMonitorUpdateErr::TemporaryFailures is fine. The highest_applied_update_id field
23332348
/// exists largely only to prevent races between this and concurrent update_monitor calls.
23342349
///
2350+
/// In some cases, this may generate a monitor update, resulting in a call to the
2351+
/// `chain::Watch`'s `update_channel` method for the same channel monitor which is being
2352+
/// notified of a successful update here. Because of this, please be very careful with
2353+
/// reentrancy bugs! It is incredibly easy to write an implementation of `update_channel` which
2354+
/// will take a lock that is also held when calling this method.
2355+
///
23352356
/// Thus, the anticipated use is, at a high level:
23362357
/// 1) You register a chain::Watch with this ChannelManager,
23372358
/// 2) it stores each update to disk, and begins updating any remote (eg watchtower) copies of
@@ -2343,7 +2364,7 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
23432364
pub fn channel_monitor_updated(&self, funding_txo: &OutPoint, highest_applied_update_id: u64) {
23442365
let _persistence_guard = PersistenceNotifierGuard::new(&self.total_consistency_lock, &self.persistence_notifier);
23452366

2346-
let (mut pending_failures, chan_restoration_res) = {
2367+
let (mut pending_failures, forwarding_failures, chan_restoration_res) = {
23472368
let mut channel_lock = self.channel_state.lock().unwrap();
23482369
let channel_state = &mut *channel_lock;
23492370
let mut channel = match channel_state.by_id.entry(funding_txo.to_channel_id()) {
@@ -2354,10 +2375,10 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
23542375
return;
23552376
}
23562377

2357-
let (raa, commitment_update, order, pending_forwards, pending_failures, needs_broadcast_safe, funding_locked) = channel.get_mut().monitor_updating_restored(&self.logger);
2358-
(pending_failures, handle_chan_restoration_locked!(self, channel_lock, channel_state, channel, raa, commitment_update, order, pending_forwards, needs_broadcast_safe, funding_locked))
2378+
let (raa, commitment_update, order, chanmon_update, pending_forwards, pending_failures, forwarding_failures, needs_broadcast_safe, funding_locked) = channel.get_mut().monitor_updating_restored(&self.logger);
2379+
(pending_failures, forwarding_failures, handle_chan_restoration_locked!(self, channel_lock, channel_state, channel, raa, commitment_update, order, chanmon_update, pending_forwards, needs_broadcast_safe, funding_locked))
23592380
};
2360-
post_handle_chan_restoration!(self, chan_restoration_res, pending_failures);
2381+
post_handle_chan_restoration!(self, chan_restoration_res, pending_failures, forwarding_failures);
23612382
}
23622383

23632384
fn internal_open_channel(&self, counterparty_node_id: &PublicKey, their_features: InitFeatures, msg: &msgs::OpenChannel) -> Result<(), MsgHandleErrInternal> {

0 commit comments

Comments
 (0)