Skip to content

Commit b9a1db5

Browse files
committed
Stop failing back HTLCs on peer disconnection
Previously, if we got disconnected from a peer while there were HTLCs pending forwarding in the holding cell, we'd clear them and fail them all backwards. This is largely fine, but since we now have support for handling such HTLCs on reconnect, we might as well not, instead relying on our timeout logic to fail them backwards if it takes too long to forward them.
1 parent 8acdbaf commit b9a1db5

File tree

2 files changed

+20
-65
lines changed

2 files changed

+20
-65
lines changed

lightning/src/ln/channel.rs

Lines changed: 15 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2695,19 +2695,16 @@ impl<Signer: Sign> Channel<Signer> {
26952695
}
26962696
}
26972697

2698-
/// Removes any uncommitted HTLCs, to be used on peer disconnection, including any pending
2699-
/// HTLCs that we intended to add but haven't as we were waiting on a remote revoke.
2700-
/// Returns the set of PendingHTLCStatuses from remote uncommitted HTLCs (which we're
2701-
/// implicitly dropping) and the payment_hashes of HTLCs we tried to add but are dropping.
2698+
/// Removes any uncommitted inbound HTLCs and resets the state of uncommitted outbound HTLC
2699+
/// updates, to be used on peer disconnection. After this, update_*_htlc messages need to be
2700+
/// resent.
27022701
/// No further message handling calls may be made until a channel_reestablish dance has
27032702
/// completed.
2704-
pub fn remove_uncommitted_htlcs_and_mark_paused<L: Deref>(&mut self, logger: &L) -> Vec<(HTLCSource, PaymentHash)> where L::Target: Logger {
2705-
let mut outbound_drops = Vec::new();
2706-
2703+
pub fn remove_uncommitted_htlcs_and_mark_paused<L: Deref>(&mut self, logger: &L) where L::Target: Logger {
27072704
assert_eq!(self.channel_state & ChannelState::ShutdownComplete as u32, 0);
27082705
if self.channel_state < ChannelState::FundingSent as u32 {
27092706
self.channel_state = ChannelState::ShutdownComplete as u32;
2710-
return outbound_drops;
2707+
return;
27112708
}
27122709
// Upon reconnect we have to start the closing_signed dance over, but shutdown messages
27132710
// will be retransmitted.
@@ -2750,23 +2747,8 @@ impl<Signer: Sign> Channel<Signer> {
27502747
}
27512748
}
27522749

2753-
self.holding_cell_htlc_updates.retain(|htlc_update| {
2754-
match htlc_update {
2755-
// Note that currently on channel reestablish we assert that there are
2756-
// no holding cell HTLC update_adds, so if in the future we stop
2757-
// dropping added HTLCs here and failing them backwards, then there will
2758-
// need to be corresponding changes made in the Channel's re-establish
2759-
// logic.
2760-
&HTLCUpdateAwaitingACK::AddHTLC { ref payment_hash, ref source, .. } => {
2761-
outbound_drops.push((source.clone(), payment_hash.clone()));
2762-
false
2763-
},
2764-
&HTLCUpdateAwaitingACK::ClaimHTLC {..} | &HTLCUpdateAwaitingACK::FailHTLC {..} => true,
2765-
}
2766-
});
27672750
self.channel_state |= ChannelState::PeerDisconnected as u32;
2768-
log_debug!(logger, "Peer disconnection resulted in {} remote-announced HTLC drops and {} waiting-to-locally-announced HTLC drops on channel {}", outbound_drops.len(), inbound_drop_count, log_bytes!(self.channel_id()));
2769-
outbound_drops
2751+
log_debug!(logger, "Peer disconnection resulted in {} remote-announced HTLC drops on channel {}", inbound_drop_count, log_bytes!(self.channel_id()));
27702752
}
27712753

27722754
/// Indicates that a ChannelMonitor update failed to be stored by the client and further
@@ -2925,7 +2907,7 @@ impl<Signer: Sign> Channel<Signer> {
29252907

29262908
/// May panic if some calls other than message-handling calls (which will all Err immediately)
29272909
/// have been called between remove_uncommitted_htlcs_and_mark_paused and this call.
2928-
pub fn channel_reestablish<L: Deref>(&mut self, msg: &msgs::ChannelReestablish, logger: &L) -> Result<(Option<msgs::FundingLocked>, Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, Option<ChannelMonitorUpdate>, RAACommitmentOrder, Option<msgs::Shutdown>), ChannelError> where L::Target: Logger {
2910+
pub fn channel_reestablish<L: Deref>(&mut self, msg: &msgs::ChannelReestablish, logger: &L) -> Result<(Option<msgs::FundingLocked>, Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, Option<ChannelMonitorUpdate>, RAACommitmentOrder, Vec<(HTLCSource, PaymentHash)>, Option<msgs::Shutdown>), ChannelError> where L::Target: Logger {
29292911
if self.channel_state & (ChannelState::PeerDisconnected as u32) == 0 {
29302912
// While BOLT 2 doesn't indicate explicitly we should error this channel here, it
29312913
// almost certainly indicates we are going to end up out-of-sync in some way, so we
@@ -2976,15 +2958,15 @@ impl<Signer: Sign> Channel<Signer> {
29762958
return Err(ChannelError::Close("Peer claimed they saw a revoke_and_ack but we haven't sent funding_locked yet".to_owned()));
29772959
}
29782960
// Short circuit the whole handler as there is nothing we can resend them
2979-
return Ok((None, None, None, None, RAACommitmentOrder::CommitmentFirst, shutdown_msg));
2961+
return Ok((None, None, None, None, RAACommitmentOrder::CommitmentFirst, Vec::new(), shutdown_msg));
29802962
}
29812963

29822964
// We have OurFundingLocked set!
29832965
let next_per_commitment_point = self.holder_signer.get_per_commitment_point(self.cur_holder_commitment_transaction_number, &self.secp_ctx);
29842966
return Ok((Some(msgs::FundingLocked {
29852967
channel_id: self.channel_id(),
29862968
next_per_commitment_point,
2987-
}), None, None, None, RAACommitmentOrder::CommitmentFirst, shutdown_msg));
2969+
}), None, None, None, RAACommitmentOrder::CommitmentFirst, Vec::new(), shutdown_msg));
29882970
}
29892971

29902972
let required_revoke = if msg.next_remote_commitment_number + 1 == INITIAL_COMMITMENT_NUMBER - self.cur_holder_commitment_transaction_number {
@@ -3025,14 +3007,6 @@ impl<Signer: Sign> Channel<Signer> {
30253007
}
30263008

30273009
if (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::MonitorUpdateFailed as u32)) == 0 {
3028-
// Note that if in the future we no longer drop holding cell update_adds on peer
3029-
// disconnect, this logic will need to be updated.
3030-
for htlc_update in self.holding_cell_htlc_updates.iter() {
3031-
if let &HTLCUpdateAwaitingACK::AddHTLC { .. } = htlc_update {
3032-
debug_assert!(false, "There shouldn't be any add-HTLCs in the holding cell now because they should have been dropped on peer disconnect. Panic here because said HTLCs won't be handled correctly.");
3033-
}
3034-
}
3035-
30363010
// We're up-to-date and not waiting on a remote revoke (if we are our
30373011
// channel_reestablish should result in them sending a revoke_and_ack), but we may
30383012
// have received some updates while we were disconnected. Free the holding cell
@@ -3041,20 +3015,14 @@ impl<Signer: Sign> Channel<Signer> {
30413015
Err(ChannelError::Close(msg)) => return Err(ChannelError::Close(msg)),
30423016
Err(ChannelError::Ignore(_)) | Err(ChannelError::CloseDelayBroadcast(_)) => panic!("Got non-channel-failing result from free_holding_cell_htlcs"),
30433017
Ok((Some((commitment_update, monitor_update)), htlcs_to_fail)) => {
3044-
// If in the future we no longer drop holding cell update_adds on peer
3045-
// disconnect, we may be handed some HTLCs to fail backwards here.
3046-
assert!(htlcs_to_fail.is_empty());
3047-
return Ok((resend_funding_locked, required_revoke, Some(commitment_update), Some(monitor_update), self.resend_order.clone(), shutdown_msg));
3018+
return Ok((resend_funding_locked, required_revoke, Some(commitment_update), Some(monitor_update), self.resend_order.clone(), htlcs_to_fail, shutdown_msg));
30483019
},
30493020
Ok((None, htlcs_to_fail)) => {
3050-
// If in the future we no longer drop holding cell update_adds on peer
3051-
// disconnect, we may be handed some HTLCs to fail backwards here.
3052-
assert!(htlcs_to_fail.is_empty());
3053-
return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), shutdown_msg));
3021+
return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), htlcs_to_fail, shutdown_msg));
30543022
},
30553023
}
30563024
} else {
3057-
return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), shutdown_msg));
3025+
return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), Vec::new(), shutdown_msg));
30583026
}
30593027
} else if msg.next_local_commitment_number == next_counterparty_commitment_number - 1 {
30603028
if required_revoke.is_some() {
@@ -3065,10 +3033,10 @@ impl<Signer: Sign> Channel<Signer> {
30653033

30663034
if self.channel_state & (ChannelState::MonitorUpdateFailed as u32) != 0 {
30673035
self.monitor_pending_commitment_signed = true;
3068-
return Ok((resend_funding_locked, None, None, None, self.resend_order.clone(), shutdown_msg));
3036+
return Ok((resend_funding_locked, None, None, None, self.resend_order.clone(), Vec::new(), shutdown_msg));
30693037
}
30703038

3071-
return Ok((resend_funding_locked, required_revoke, Some(self.get_last_commitment_update(logger)), None, self.resend_order.clone(), shutdown_msg));
3039+
return Ok((resend_funding_locked, required_revoke, Some(self.get_last_commitment_update(logger)), None, self.resend_order.clone(), Vec::new(), shutdown_msg));
30723040
} else {
30733041
return Err(ChannelError::Close("Peer attempted to reestablish channel with a very old remote commitment transaction".to_owned()));
30743042
}
@@ -4404,7 +4372,7 @@ impl Readable for ChannelUpdateStatus {
44044372
impl<Signer: Sign> Writeable for Channel<Signer> {
44054373
fn write<W: Writer>(&self, writer: &mut W) -> Result<(), ::std::io::Error> {
44064374
// Note that we write out as if remove_uncommitted_htlcs_and_mark_paused had just been
4407-
// called but include holding cell updates (and obviously we don't modify self).
4375+
// called.
44084376

44094377
writer.write_all(&[SERIALIZATION_VERSION; 1])?;
44104378
writer.write_all(&[MIN_SERIALIZATION_VERSION; 1])?;

lightning/src/ln/channelmanager.rs

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3363,7 +3363,7 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
33633363
}
33643364

33653365
fn internal_channel_reestablish(&self, counterparty_node_id: &PublicKey, msg: &msgs::ChannelReestablish) -> Result<(), MsgHandleErrInternal> {
3366-
let chan_restoration_res = {
3366+
let (htlcs_failed_forward, chan_restoration_res) = {
33673367
let mut channel_state_lock = self.channel_state.lock().unwrap();
33683368
let channel_state = &mut *channel_state_lock;
33693369

@@ -3376,20 +3376,21 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
33763376
// disconnect, so Channel's reestablish will never hand us any holding cell
33773377
// freed HTLCs to fail backwards. If in the future we no longer drop pending
33783378
// add-HTLCs on disconnect, we may be handed HTLCs to fail backwards here.
3379-
let (funding_locked, revoke_and_ack, commitment_update, monitor_update_opt, order, shutdown) =
3379+
let (funding_locked, revoke_and_ack, commitment_update, monitor_update_opt, order, htlcs_failed_forward, shutdown) =
33803380
try_chan_entry!(self, chan.get_mut().channel_reestablish(msg, &self.logger), channel_state, chan);
33813381
if let Some(msg) = shutdown {
33823382
channel_state.pending_msg_events.push(events::MessageSendEvent::SendShutdown {
33833383
node_id: counterparty_node_id.clone(),
33843384
msg,
33853385
});
33863386
}
3387-
handle_chan_restoration_locked!(self, channel_state_lock, channel_state, chan, revoke_and_ack, commitment_update, order, monitor_update_opt, Vec::new(), None, funding_locked)
3387+
(htlcs_failed_forward, handle_chan_restoration_locked!(self, channel_state_lock, channel_state, chan, revoke_and_ack, commitment_update, order, monitor_update_opt, Vec::new(), None, funding_locked))
33883388
},
33893389
hash_map::Entry::Vacant(_) => return Err(MsgHandleErrInternal::send_err_msg_no_close("Failed to find corresponding channel".to_owned(), msg.channel_id))
33903390
}
33913391
};
33923392
post_handle_chan_restoration!(self, chan_restoration_res);
3393+
self.fail_holding_cell_htlcs(htlcs_failed_forward, msg.channel_id);
33933394
Ok(())
33943395
}
33953396

@@ -4052,7 +4053,6 @@ impl<Signer: Sign, M: Deref , T: Deref , K: Deref , F: Deref , L: Deref >
40524053
fn peer_disconnected(&self, counterparty_node_id: &PublicKey, no_connection_possible: bool) {
40534054
let _persistence_guard = PersistenceNotifierGuard::notify_on_drop(&self.total_consistency_lock, &self.persistence_notifier);
40544055
let mut failed_channels = Vec::new();
4055-
let mut failed_payments = Vec::new();
40564056
let mut no_channels_remain = true;
40574057
{
40584058
let mut channel_state_lock = self.channel_state.lock().unwrap();
@@ -4081,15 +4081,7 @@ impl<Signer: Sign, M: Deref , T: Deref , K: Deref , F: Deref , L: Deref >
40814081
log_debug!(self.logger, "Marking channels with {} disconnected and generating channel_updates", log_pubkey!(counterparty_node_id));
40824082
channel_state.by_id.retain(|_, chan| {
40834083
if chan.get_counterparty_node_id() == *counterparty_node_id {
4084-
// Note that currently on channel reestablish we assert that there are no
4085-
// holding cell add-HTLCs, so if in the future we stop removing uncommitted HTLCs
4086-
// on peer disconnect here, there will need to be corresponding changes in
4087-
// reestablish logic.
4088-
let failed_adds = chan.remove_uncommitted_htlcs_and_mark_paused(&self.logger);
4089-
if !failed_adds.is_empty() {
4090-
let chan_update = self.get_channel_update(&chan).map(|u| u.encode_with_len()).unwrap(); // Cannot add/recv HTLCs before we have a short_id so unwrap is safe
4091-
failed_payments.push((chan_update, failed_adds));
4092-
}
4084+
chan.remove_uncommitted_htlcs_and_mark_paused(&self.logger);
40934085
if chan.is_shutdown() {
40944086
if let Some(short_id) = chan.get_short_channel_id() {
40954087
short_to_id.remove(&short_id);
@@ -4133,11 +4125,6 @@ impl<Signer: Sign, M: Deref , T: Deref , K: Deref , F: Deref , L: Deref >
41334125
for failure in failed_channels.drain(..) {
41344126
self.finish_force_close_channel(failure);
41354127
}
4136-
for (chan_update, mut htlc_sources) in failed_payments {
4137-
for (htlc_source, payment_hash) in htlc_sources.drain(..) {
4138-
self.fail_htlc_backwards_internal(self.channel_state.lock().unwrap(), htlc_source, &payment_hash, HTLCFailReason::Reason { failure_code: 0x1000 | 7, data: chan_update.clone() });
4139-
}
4140-
}
41414128
}
41424129

41434130
fn peer_connected(&self, counterparty_node_id: &PublicKey, init_msg: &msgs::Init) {

0 commit comments

Comments
 (0)