Skip to content

Commit f35c8bf

Browse files
committed
Free holding cell on monitor-updating-restored when there's no upd
If there is no pending channel update messages when monitor updating is restored (though there may be an RAA to send), and we're connected to our peer and not awaiting a remote RAA, we need to free anything in our holding cell. Without this, chanmon_fail_consistency was able to find a stuck condition where we sit on an HTLC failure in our holding cell and don't ever handle it (at least until we have other actions to take which empty the holding cell). Still, this approach sucks - it introduces reentrancy in a particularly dangerous form: a) we re-enter user code around monitor updates while being called from user code around monitor updates, making deadlocks very likely (in fact, our current tests have a bug here!), b) the re-entrancy only occurs in a very rare case, making it likely users will not hit it in testing, only deadlocking in production. I'm not entirely sure what the alternative is, however - we could move to a world where we poll for holding cell events that can be freed on our 1-minute-timer, but we still have a super rare reentrancy case, just in timer_chan_freshness_every_min() instead.
1 parent 4e82003 commit f35c8bf

File tree

2 files changed

+68
-33
lines changed

2 files changed

+68
-33
lines changed

lightning/src/ln/channel.rs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2605,7 +2605,10 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
26052605
/// Indicates that the latest ChannelMonitor update has been committed by the client
26062606
/// successfully and we should restore normal operation. Returns messages which should be sent
26072607
/// to the remote side.
2608-
pub fn monitor_updating_restored<L: Deref>(&mut self, logger: &L) -> (Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, RAACommitmentOrder, Vec<(PendingHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, bool, Option<msgs::FundingLocked>) where L::Target: Logger {
2608+
pub fn monitor_updating_restored<L: Deref>(&mut self, logger: &L) -> (
2609+
Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, RAACommitmentOrder, Option<ChannelMonitorUpdate>,
2610+
Vec<(PendingHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, Vec<(HTLCSource, PaymentHash)>,
2611+
bool, Option<msgs::FundingLocked>) where L::Target: Logger {
26092612
assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, ChannelState::MonitorUpdateFailed as u32);
26102613
self.channel_state &= !(ChannelState::MonitorUpdateFailed as u32);
26112614

@@ -2635,25 +2638,39 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
26352638
if self.channel_state & (ChannelState::PeerDisconnected as u32) != 0 {
26362639
self.monitor_pending_revoke_and_ack = false;
26372640
self.monitor_pending_commitment_signed = false;
2638-
return (None, None, RAACommitmentOrder::RevokeAndACKFirst, forwards, failures, needs_broadcast_safe, funding_locked);
2641+
return (None, None, RAACommitmentOrder::RevokeAndACKFirst, None, forwards, failures, Vec::new(), needs_broadcast_safe, funding_locked);
26392642
}
26402643

26412644
let raa = if self.monitor_pending_revoke_and_ack {
26422645
Some(self.get_last_revoke_and_ack())
26432646
} else { None };
2644-
let commitment_update = if self.monitor_pending_commitment_signed {
2647+
let mut commitment_update = if self.monitor_pending_commitment_signed {
26452648
Some(self.get_last_commitment_update(logger))
26462649
} else { None };
26472650

2651+
let mut order = self.resend_order.clone();
26482652
self.monitor_pending_revoke_and_ack = false;
26492653
self.monitor_pending_commitment_signed = false;
2650-
let order = self.resend_order.clone();
2654+
2655+
let mut htlcs_failed_to_forward = Vec::new();
2656+
let mut chanmon_update = None;
2657+
if commitment_update.is_none() && self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32) == 0 {
2658+
order = RAACommitmentOrder::RevokeAndACKFirst;
2659+
2660+
let (update_opt, mut failed_htlcs) = self.free_holding_cell_htlcs(logger).unwrap();
2661+
htlcs_failed_to_forward.append(&mut failed_htlcs);
2662+
if let Some((com_update, mon_update)) = update_opt {
2663+
commitment_update = Some(com_update);
2664+
chanmon_update = Some(mon_update);
2665+
}
2666+
}
2667+
26512668
log_trace!(logger, "Restored monitor updating resulting in {}{} commitment update and {} RAA, with {} first",
26522669
if needs_broadcast_safe { "a funding broadcast safe, " } else { "" },
26532670
if commitment_update.is_some() { "a" } else { "no" },
26542671
if raa.is_some() { "an" } else { "no" },
26552672
match order { RAACommitmentOrder::CommitmentFirst => "commitment", RAACommitmentOrder::RevokeAndACKFirst => "RAA"});
2656-
(raa, commitment_update, order, forwards, failures, needs_broadcast_safe, funding_locked)
2673+
(raa, commitment_update, order, chanmon_update, forwards, failures, htlcs_failed_to_forward, needs_broadcast_safe, funding_locked)
26572674
}
26582675

26592676
pub fn update_fee<F: Deref>(&mut self, fee_estimator: &F, msg: &msgs::UpdateFee) -> Result<(), ChannelError>

lightning/src/ln/channelmanager.rs

Lines changed: 46 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2198,6 +2198,10 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
21982198
/// ChannelMonitorUpdateErr::TemporaryFailures is fine. The highest_applied_update_id field
21992199
/// exists largely only to prevent races between this and concurrent update_monitor calls.
22002200
///
2201+
/// XXX: Update to note re-entrancy - this is really terrible - the reentrancy only happens in
2202+
/// a really rare case making it incredibly likely users will miss it and never hit it in
2203+
/// testing.
2204+
///
22012205
/// Thus, the anticipated use is, at a high level:
22022206
/// 1) You register a chain::Watch with this ChannelManager,
22032207
/// 2) it stores each update to disk, and begins updating any remote (eg watchtower) copies of
@@ -2209,42 +2213,53 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
22092213
pub fn channel_monitor_updated(&self, funding_txo: &OutPoint, highest_applied_update_id: u64) {
22102214
let _consistency_lock = self.total_consistency_lock.read().unwrap();
22112215

2212-
let mut close_results = Vec::new();
2213-
let mut htlc_forwards = Vec::new();
2214-
let mut htlc_failures = Vec::new();
2215-
let mut pending_events = Vec::new();
2216+
let mut htlc_forwards = None;
2217+
let mut htlc_failures;
2218+
let htlc_forwarding_failures;
2219+
let mut pending_event = None;
22162220

2217-
{
2221+
let (counterparty_node_id, res) = loop {
22182222
let mut channel_lock = self.channel_state.lock().unwrap();
22192223
let channel_state = &mut *channel_lock;
2220-
let short_to_id = &mut channel_state.short_to_id;
22212224
let pending_msg_events = &mut channel_state.pending_msg_events;
2222-
let channel = match channel_state.by_id.get_mut(&funding_txo.to_channel_id()) {
2223-
Some(chan) => chan,
2224-
None => return,
2225+
let mut channel = match channel_state.by_id.entry(funding_txo.to_channel_id()) {
2226+
hash_map::Entry::Vacant(_) => return,
2227+
hash_map::Entry::Occupied(e) => e,
22252228
};
2226-
if !channel.is_awaiting_monitor_update() || channel.get_latest_monitor_update_id() != highest_applied_update_id {
2229+
if !channel.get().is_awaiting_monitor_update() || channel.get().get_latest_monitor_update_id() != highest_applied_update_id {
22272230
return;
22282231
}
2232+
let counterparty_node_id = channel.get().get_counterparty_node_id();
22292233

2230-
let (raa, commitment_update, order, pending_forwards, mut pending_failures, needs_broadcast_safe, funding_locked) = channel.monitor_updating_restored(&self.logger);
2234+
let (raa, commitment_update, order, chanmon_update, pending_forwards, pending_failures, forwarding_failds, needs_broadcast_safe, funding_locked) = channel.get_mut().monitor_updating_restored(&self.logger);
22312235
if !pending_forwards.is_empty() {
2232-
htlc_forwards.push((channel.get_short_channel_id().expect("We can't have pending forwards before funding confirmation"), funding_txo.clone(), pending_forwards));
2236+
htlc_forwards = Some((channel.get().get_short_channel_id().expect("We can't have pending forwards before funding confirmation"), funding_txo.clone(), pending_forwards));
22332237
}
2234-
htlc_failures.append(&mut pending_failures);
2238+
htlc_failures = pending_failures;
2239+
htlc_forwarding_failures = forwarding_failds;
22352240

22362241
macro_rules! handle_cs { () => {
2242+
if let Some(monitor_update) = chanmon_update {
2243+
assert!(order == RAACommitmentOrder::RevokeAndACKFirst);
2244+
assert!(!needs_broadcast_safe);
2245+
assert!(funding_locked.is_none());
2246+
assert!(commitment_update.is_some());
2247+
if let Err(e) = self.chain_monitor.update_channel(*funding_txo, monitor_update) {
2248+
break (counterparty_node_id,
2249+
handle_monitor_err!(self, e, channel_state, channel, RAACommitmentOrder::CommitmentFirst, false, true));
2250+
}
2251+
}
22372252
if let Some(update) = commitment_update {
22382253
pending_msg_events.push(events::MessageSendEvent::UpdateHTLCs {
2239-
node_id: channel.get_counterparty_node_id(),
2254+
node_id: counterparty_node_id,
22402255
updates: update,
22412256
});
22422257
}
22432258
} }
22442259
macro_rules! handle_raa { () => {
22452260
if let Some(revoke_and_ack) = raa {
22462261
pending_msg_events.push(events::MessageSendEvent::SendRevokeAndACK {
2247-
node_id: channel.get_counterparty_node_id(),
2262+
node_id: counterparty_node_id,
22482263
msg: revoke_and_ack,
22492264
});
22502265
}
@@ -2260,35 +2275,38 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
22602275
},
22612276
}
22622277
if needs_broadcast_safe {
2263-
pending_events.push(events::Event::FundingBroadcastSafe {
2264-
funding_txo: channel.get_funding_txo().unwrap(),
2265-
user_channel_id: channel.get_user_id(),
2278+
pending_event = Some(events::Event::FundingBroadcastSafe {
2279+
funding_txo: channel.get().get_funding_txo().unwrap(),
2280+
user_channel_id: channel.get().get_user_id(),
22662281
});
22672282
}
22682283
if let Some(msg) = funding_locked {
22692284
pending_msg_events.push(events::MessageSendEvent::SendFundingLocked {
2270-
node_id: channel.get_counterparty_node_id(),
2285+
node_id: counterparty_node_id,
22712286
msg,
22722287
});
2273-
if let Some(announcement_sigs) = self.get_announcement_sigs(channel) {
2288+
if let Some(announcement_sigs) = self.get_announcement_sigs(channel.get()) {
22742289
pending_msg_events.push(events::MessageSendEvent::SendAnnouncementSignatures {
2275-
node_id: channel.get_counterparty_node_id(),
2290+
node_id: counterparty_node_id,
22762291
msg: announcement_sigs,
22772292
});
22782293
}
2279-
short_to_id.insert(channel.get_short_channel_id().unwrap(), channel.channel_id());
2294+
channel_state.short_to_id.insert(channel.get().get_short_channel_id().unwrap(), channel.get().channel_id());
22802295
}
2281-
}
2296+
break (counterparty_node_id, Ok(()));
2297+
};
2298+
let _ = handle_error!(self, res, counterparty_node_id);
22822299

2283-
self.pending_events.lock().unwrap().append(&mut pending_events);
2300+
if let Some(ev) = pending_event {
2301+
self.pending_events.lock().unwrap().push(ev);
2302+
}
22842303

2304+
self.fail_holding_cell_htlcs(htlc_forwarding_failures, funding_txo.to_channel_id());
22852305
for failure in htlc_failures.drain(..) {
22862306
self.fail_htlc_backwards_internal(self.channel_state.lock().unwrap(), failure.0, &failure.1, failure.2);
22872307
}
2288-
self.forward_htlcs(&mut htlc_forwards[..]);
2289-
2290-
for res in close_results.drain(..) {
2291-
self.finish_force_close_channel(res);
2308+
if let Some(forwards) = htlc_forwards {
2309+
self.forward_htlcs(&mut [forwards][..]);
22922310
}
22932311
}
22942312

0 commit comments

Comments
 (0)