Skip to content

Commit 3d9bb59

Browse files
committed
Handle monitor update failures in msg-recv functions
This adds a few TODOs around further message rebroadcasting which needs to be implemented as well as some loss of tracking of HTLCs on permanent channel failure which needs to get transferred over to the appropriate in-memory ChannelMonitor.
1 parent 358036d commit 3d9bb59

File tree

2 files changed

+69
-10
lines changed

2 files changed

+69
-10
lines changed

src/ln/channel.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2154,7 +2154,7 @@ impl Channel {
21542154
/// commitment update or a revoke_and_ack generation). The messages which were generated from
21552155
/// that original call must *not* have been sent to the remote end, and must instead have been
21562156
/// dropped. They will be regenerated when monitor_updating_restored is called.
2157-
pub fn monitor_update_failed(&mut self, order: RAACommitmentOrder) {
2157+
pub fn monitor_update_failed(&mut self, order: RAACommitmentOrder, mut pending_forwards: Vec<(PendingForwardHTLCInfo, u64)>, mut pending_fails: Vec<(HTLCSource, [u8; 32], HTLCFailReason)>, raa_first_dropped_cs: bool) {
21582158
assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, 0);
21592159
match order {
21602160
RAACommitmentOrder::CommitmentFirst => {
@@ -2163,9 +2163,13 @@ impl Channel {
21632163
},
21642164
RAACommitmentOrder::RevokeAndACKFirst => {
21652165
self.monitor_pending_revoke_and_ack = true;
2166-
self.monitor_pending_commitment_signed = false;
2166+
self.monitor_pending_commitment_signed = raa_first_dropped_cs;
21672167
},
21682168
}
2169+
assert!(self.monitor_pending_forwards.is_empty());
2170+
mem::swap(&mut pending_forwards, &mut self.monitor_pending_forwards);
2171+
assert!(self.monitor_pending_failures.is_empty());
2172+
mem::swap(&mut pending_fails, &mut self.monitor_pending_failures);
21692173
self.monitor_pending_order = Some(order);
21702174
self.channel_state |= ChannelState::MonitorUpdateFailed as u32;
21712175
}

src/ln/channelmanager.rs

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,43 @@ macro_rules! try_chan_entry {
442442
}
443443
}
444444

445+
macro_rules! return_monitor_err {
446+
($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path) => {
447+
return_monitor_err!($self, $err, $channel_state, $entry, $action_type, Vec::new(), Vec::new())
448+
};
449+
($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path, $raa_first_dropped_cs: expr) => {
450+
if $action_type != RAACommitmentOrder::RevokeAndACKFirst { panic!("Bad return_monitor_err call!"); }
451+
return_monitor_err!($self, $err, $channel_state, $entry, $action_type, Vec::new(), Vec::new(), $raa_first_dropped_cs)
452+
};
453+
($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path, $failed_forwards: expr, $failed_fails: expr) => {
454+
return_monitor_err!($self, $err, $channel_state, $entry, $action_type, $failed_forwards, $failed_fails, false)
455+
};
456+
($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path, $failed_forwards: expr, $failed_fails: expr, $raa_first_dropped_cs: expr) => {
457+
match $err {
458+
ChannelMonitorUpdateErr::PermanentFailure => {
459+
let (channel_id, mut chan) = $entry.remove_entry();
460+
if let Some(short_id) = chan.get_short_channel_id() {
461+
$channel_state.short_to_id.remove(&short_id);
462+
}
463+
// TODO: $failed_fails is dropped here, which will cause other channels to hit the
464+
// chain in a confused state! We need to move them into the ChannelMonitor which
465+
// will be responsible for failing backwards once things confirm on-chain.
466+
// It's ok that we drop $failed_forwards here - at this point we'd rather they
467+
// broadcast HTLC-Timeout and pay the associated fees to get their funds back than
468+
// us bother trying to claim it just to forward on to another peer. If we're
469+
// splitting hairs we'd prefer to claim things that went *to us*, but we haven't
470+
// given up the preimage yet, so might as well just wait until the payment is
471+
// retried, avoiding the on-chain fees.
472+
return Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure", channel_id, chan.force_shutdown(), $self.get_channel_update(&chan).ok()))
473+
},
474+
ChannelMonitorUpdateErr::TemporaryFailure => {
475+
$entry.get_mut().monitor_update_failed($action_type, $failed_forwards, $failed_fails, $raa_first_dropped_cs);
476+
return Err(MsgHandleErrInternal::from_chan_no_close(ChannelError::Ignore("Failed to update ChannelMonitor"), *$entry.key()));
477+
},
478+
}
479+
}
480+
}
481+
445482
// Does not break in case of TemporaryFailure!
446483
macro_rules! maybe_break_monitor_err {
447484
($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path) => {
@@ -454,7 +491,7 @@ macro_rules! maybe_break_monitor_err {
454491
break Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure", channel_id, chan.force_shutdown(), $self.get_channel_update(&chan).ok()))
455492
},
456493
ChannelMonitorUpdateErr::TemporaryFailure => {
457-
$entry.get_mut().monitor_update_failed($action_type);
494+
$entry.get_mut().monitor_update_failed($action_type, Vec::new(), Vec::new(), false);
458495
},
459496
}
460497
}
@@ -1681,6 +1718,13 @@ impl ChannelManager {
16811718
if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
16821719
match e {
16831720
ChannelMonitorUpdateErr::PermanentFailure => {
1721+
// TODO: There may be some pending HTLCs that we intended to fail
1722+
// backwards when a monitor update failed. We should make sure
1723+
// knowledge of those gets moved into the appropriate in-memory
1724+
// ChannelMonitor and they get failed backwards once we get
1725+
// on-chain confirmations.
1726+
// Note I think #198 addresses this, so once its merged a test
1727+
// should be written.
16841728
if let Some(short_id) = channel.get_short_channel_id() {
16851729
short_to_id.remove(&short_id);
16861730
}
@@ -2283,8 +2327,9 @@ impl ChannelManager {
22832327
}
22842328
let (revoke_and_ack, commitment_signed, closing_signed, chan_monitor) =
22852329
try_chan_entry!(self, chan.get_mut().commitment_signed(&msg, &*self.fee_estimator), channel_state, chan);
2286-
if let Err(_e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
2287-
unimplemented!();
2330+
if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
2331+
return_monitor_err!(self, e, channel_state, chan, RAACommitmentOrder::RevokeAndACKFirst, commitment_signed.is_some());
2332+
//TODO: Rebroadcast closing_signed if present on monitor update restoration
22882333
}
22892334
channel_state.pending_msg_events.push(events::MessageSendEvent::SendRevokeAndACK {
22902335
node_id: their_node_id.clone(),
@@ -2360,8 +2405,8 @@ impl ChannelManager {
23602405
}
23612406
let (commitment_update, pending_forwards, pending_failures, closing_signed, chan_monitor) =
23622407
try_chan_entry!(self, chan.get_mut().revoke_and_ack(&msg, &*self.fee_estimator), channel_state, chan);
2363-
if let Err(_e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
2364-
unimplemented!();
2408+
if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
2409+
return_monitor_err!(self, e, channel_state, chan, RAACommitmentOrder::CommitmentFirst, pending_forwards, pending_failures);
23652410
}
23662411
if let Some(updates) = commitment_update {
23672412
channel_state.pending_msg_events.push(events::MessageSendEvent::UpdateHTLCs {
@@ -2455,11 +2500,21 @@ impl ChannelManager {
24552500
if chan.get().get_their_node_id() != *their_node_id {
24562501
return Err(MsgHandleErrInternal::send_err_msg_no_close("Got a message for a channel from the wrong node!", msg.channel_id));
24572502
}
2458-
let (funding_locked, revoke_and_ack, commitment_update, channel_monitor, order, shutdown) =
2503+
let (funding_locked, revoke_and_ack, commitment_update, channel_monitor, mut order, shutdown) =
24592504
try_chan_entry!(self, chan.get_mut().channel_reestablish(msg), channel_state, chan);
24602505
if let Some(monitor) = channel_monitor {
2461-
if let Err(_e) = self.monitor.add_update_monitor(monitor.get_funding_txo().unwrap(), monitor) {
2462-
unimplemented!();
2506+
if let Err(e) = self.monitor.add_update_monitor(monitor.get_funding_txo().unwrap(), monitor) {
2507+
// channel_reestablish doesn't guarantee the order it returns is sensical
2508+
// for the messages it returns, but if we're setting what messages to
2509+
// re-transmit on monitor update success, we need to make sure it is sane.
2510+
if revoke_and_ack.is_none() {
2511+
order = RAACommitmentOrder::CommitmentFirst;
2512+
}
2513+
if commitment_update.is_none() {
2514+
order = RAACommitmentOrder::RevokeAndACKFirst;
2515+
}
2516+
return_monitor_err!(self, e, channel_state, chan, order);
2517+
//TODO: Resend the funding_locked if needed once we get the monitor running again
24632518
}
24642519
}
24652520
if let Some(msg) = funding_locked {

0 commit comments

Comments
 (0)