Skip to content

Commit e3b186a

Browse files
committed
Handle monitor update failures in msg-recv functions
This adds a few TODOs around further message rebroadcasting which needs to be implemented as well as some loss of tracking of HTLCs on permanent channel failure which needs to get transferred over to the appropriate in-memory ChannelMonitor.
1 parent 3ef1763 commit e3b186a

File tree

2 files changed

+69
-10
lines changed

2 files changed

+69
-10
lines changed

src/ln/channel.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2154,7 +2154,7 @@ impl Channel {
21542154
/// commitment update or a revoke_and_ack generation). The messages which were generated from
21552155
/// that original call must *not* have been sent to the remote end, and must instead have been
21562156
/// dropped. They will be regenerated when monitor_updating_restored is called.
2157-
pub fn monitor_update_failed(&mut self, order: RAACommitmentOrder) {
2157+
pub fn monitor_update_failed(&mut self, order: RAACommitmentOrder, mut pending_forwards: Vec<(PendingForwardHTLCInfo, u64)>, mut pending_fails: Vec<(HTLCSource, [u8; 32], HTLCFailReason)>, raa_first_dropped_cs: bool) {
21582158
assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, 0);
21592159
match order {
21602160
RAACommitmentOrder::CommitmentFirst => {
@@ -2163,9 +2163,13 @@ impl Channel {
21632163
},
21642164
RAACommitmentOrder::RevokeAndACKFirst => {
21652165
self.monitor_pending_revoke_and_ack = true;
2166-
self.monitor_pending_commitment_signed = false;
2166+
self.monitor_pending_commitment_signed = raa_first_dropped_cs;
21672167
},
21682168
}
2169+
assert!(self.monitor_pending_forwards.is_empty());
2170+
mem::swap(&mut pending_forwards, &mut self.monitor_pending_forwards);
2171+
assert!(self.monitor_pending_failures.is_empty());
2172+
mem::swap(&mut pending_fails, &mut self.monitor_pending_failures);
21692173
self.monitor_pending_order = Some(order);
21702174
self.channel_state |= ChannelState::MonitorUpdateFailed as u32;
21712175
}

src/ln/channelmanager.rs

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,43 @@ macro_rules! try_chan_entry {
442442
}
443443
}
444444

445+
macro_rules! return_monitor_err {
446+
($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path) => {
447+
return_monitor_err!($self, $err, $channel_state, $entry, $action_type, Vec::new(), Vec::new())
448+
};
449+
($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path, $raa_first_dropped_cs: expr) => {
450+
if $action_type != RAACommitmentOrder::RevokeAndACKFirst { panic!("Bad return_monitor_err call!"); }
451+
return_monitor_err!($self, $err, $channel_state, $entry, $action_type, Vec::new(), Vec::new(), $raa_first_dropped_cs)
452+
};
453+
($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path, $failed_forwards: expr, $failed_fails: expr) => {
454+
return_monitor_err!($self, $err, $channel_state, $entry, $action_type, $failed_forwards, $failed_fails, false)
455+
};
456+
($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path, $failed_forwards: expr, $failed_fails: expr, $raa_first_dropped_cs: expr) => {
457+
match $err {
458+
ChannelMonitorUpdateErr::PermanentFailure => {
459+
let (channel_id, mut chan) = $entry.remove_entry();
460+
if let Some(short_id) = chan.get_short_channel_id() {
461+
$channel_state.short_to_id.remove(&short_id);
462+
}
463+
// TODO: $failed_fails is dropped here, which will cause other channels to hit the
464+
// chain in a confused state! We need to move them into the ChannelMonitor which
465+
// will be responsible for failing backwards once things confirm on-chain.
466+
// It's ok that we drop $failed_forwards here - at this point we'd rather they
467+
// broadcast HTLC-Timeout and pay the associated fees to get their funds back than
468+
// us bother trying to claim it just to forward on to another peer. If we're
469+
// splitting hairs we'd prefer to claim things that went *to us*, but we haven't
470+
// given up the preimage yet, so might as well just wait until the payment is
471+
// retried, avoiding the on-chain fees.
472+
return Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure", channel_id, chan.force_shutdown(), $self.get_channel_update(&chan).ok()))
473+
},
474+
ChannelMonitorUpdateErr::TemporaryFailure => {
475+
$entry.get_mut().monitor_update_failed($action_type, $failed_forwards, $failed_fails, $raa_first_dropped_cs);
476+
return Err(MsgHandleErrInternal::from_chan_no_close(ChannelError::Ignore("Failed to update ChannelMonitor"), *$entry.key()));
477+
},
478+
}
479+
}
480+
}
481+
445482
// Does not break in case of TemporaryFailure!
446483
macro_rules! maybe_break_monitor_err {
447484
($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path) => {
@@ -454,7 +491,7 @@ macro_rules! maybe_break_monitor_err {
454491
break Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure", channel_id, chan.force_shutdown(), $self.get_channel_update(&chan).ok()))
455492
},
456493
ChannelMonitorUpdateErr::TemporaryFailure => {
457-
$entry.get_mut().monitor_update_failed($action_type);
494+
$entry.get_mut().monitor_update_failed($action_type, Vec::new(), Vec::new(), false);
458495
},
459496
}
460497
}
@@ -1683,6 +1720,13 @@ impl ChannelManager {
16831720
if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
16841721
match e {
16851722
ChannelMonitorUpdateErr::PermanentFailure => {
1723+
// TODO: There may be some pending HTLCs that we intended to fail
1724+
// backwards when a monitor update failed. We should make sure
1725+
// knowledge of those gets moved into the appropriate in-memory
1726+
// ChannelMonitor and they get failed backwards once we get
1727+
// on-chain confirmations.
1728+
// Note I think #198 addresses this, so once its merged a test
1729+
// should be written.
16861730
if let Some(short_id) = channel.get_short_channel_id() {
16871731
short_to_id.remove(&short_id);
16881732
}
@@ -2285,8 +2329,9 @@ impl ChannelManager {
22852329
}
22862330
let (revoke_and_ack, commitment_signed, closing_signed, chan_monitor) =
22872331
try_chan_entry!(self, chan.get_mut().commitment_signed(&msg, &*self.fee_estimator), channel_state, chan);
2288-
if let Err(_e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
2289-
unimplemented!();
2332+
if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
2333+
return_monitor_err!(self, e, channel_state, chan, RAACommitmentOrder::RevokeAndACKFirst, commitment_signed.is_some());
2334+
//TODO: Rebroadcast closing_signed if present on monitor update restoration
22902335
}
22912336
channel_state.pending_msg_events.push(events::MessageSendEvent::SendRevokeAndACK {
22922337
node_id: their_node_id.clone(),
@@ -2362,8 +2407,8 @@ impl ChannelManager {
23622407
}
23632408
let (commitment_update, pending_forwards, pending_failures, closing_signed, chan_monitor) =
23642409
try_chan_entry!(self, chan.get_mut().revoke_and_ack(&msg, &*self.fee_estimator), channel_state, chan);
2365-
if let Err(_e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
2366-
unimplemented!();
2410+
if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
2411+
return_monitor_err!(self, e, channel_state, chan, RAACommitmentOrder::CommitmentFirst, pending_forwards, pending_failures);
23672412
}
23682413
if let Some(updates) = commitment_update {
23692414
channel_state.pending_msg_events.push(events::MessageSendEvent::UpdateHTLCs {
@@ -2457,11 +2502,21 @@ impl ChannelManager {
24572502
if chan.get().get_their_node_id() != *their_node_id {
24582503
return Err(MsgHandleErrInternal::send_err_msg_no_close("Got a message for a channel from the wrong node!", msg.channel_id));
24592504
}
2460-
let (funding_locked, revoke_and_ack, commitment_update, channel_monitor, order, shutdown) =
2505+
let (funding_locked, revoke_and_ack, commitment_update, channel_monitor, mut order, shutdown) =
24612506
try_chan_entry!(self, chan.get_mut().channel_reestablish(msg), channel_state, chan);
24622507
if let Some(monitor) = channel_monitor {
2463-
if let Err(_e) = self.monitor.add_update_monitor(monitor.get_funding_txo().unwrap(), monitor) {
2464-
unimplemented!();
2508+
if let Err(e) = self.monitor.add_update_monitor(monitor.get_funding_txo().unwrap(), monitor) {
2509+
// channel_reestablish doesn't guarantee the order it returns is sensical
2510+
// for the messages it returns, but if we're setting what messages to
2511+
// re-transmit on monitor update success, we need to make sure it is sane.
2512+
if revoke_and_ack.is_none() {
2513+
order = RAACommitmentOrder::CommitmentFirst;
2514+
}
2515+
if commitment_update.is_none() {
2516+
order = RAACommitmentOrder::RevokeAndACKFirst;
2517+
}
2518+
return_monitor_err!(self, e, channel_state, chan, order);
2519+
//TODO: Resend the funding_locked if needed once we get the monitor running again
24652520
}
24662521
}
24672522
if let Some(msg) = funding_locked {

0 commit comments

Comments
 (0)