Skip to content

Commit 599e1ab

Browse files
committed
Support async ChannelMonitorUpdates to closed chans at runtime
One of the largest gaps in our async persistence functionality has been preimage (claim) updates to closed channels. Here we finally implement support for this (for updates at runtime). Thanks to all the work we've built up over the past many commits, this is a well-contained patch within `claim_mpp_part`, pushing the generated `ChannelMonitorUpdate`s through the same pipeline we use for open channels. Sadly we can't use the `handle_new_monitor_update` macro wholesale as it handles the `Channel` resumption as well which we don't do here.
1 parent 3adbe07 commit 599e1ab

File tree

2 files changed

+147
-84
lines changed

2 files changed

+147
-84
lines changed

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3713,3 +3713,111 @@ fn test_partial_claim_mon_update_compl_actions() {
37133713
send_payment(&nodes[2], &[&nodes[3]], 100_000);
37143714
assert!(!get_monitor!(nodes[3], chan_4_id).get_stored_preimages().contains_key(&payment_hash));
37153715
}
3716+
3717+
3718+
#[test]
3719+
fn test_claim_to_closed_channel_blocks_forwarded_preimage_removal() {
3720+
// One of the last features for async persistence we implemented was the correct blocking of
3721+
// RAA(s) which remove a preimage from an outbound channel for a forwarded payment until the
3722+
// preimage write makes it durably to the closed inbound channel.
3723+
// This tests that behavior.
3724+
let chanmon_cfgs = create_chanmon_cfgs(3);
3725+
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
3726+
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
3727+
let nodes = create_network(3, &node_cfgs, &node_chanmgrs);
3728+
3729+
// First open channels, route a payment, and force-close the first hop.
3730+
let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 500_000_000);
3731+
let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 500_000_000);
3732+
3733+
let (payment_preimage, payment_hash, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000);
3734+
3735+
nodes[0].node.force_close_broadcasting_latest_txn(&chan_a.2, &nodes[1].node.get_our_node_id(), String::new()).unwrap();
3736+
check_added_monitors!(nodes[0], 1);
3737+
let a_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) };
3738+
check_closed_event!(nodes[0], 1, a_reason, [nodes[1].node.get_our_node_id()], 1000000);
3739+
check_closed_broadcast!(nodes[0], true);
3740+
3741+
let as_commit_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3742+
assert_eq!(as_commit_tx.len(), 1);
3743+
3744+
mine_transaction(&nodes[1], &as_commit_tx[0]);
3745+
check_added_monitors!(nodes[1], 1);
3746+
check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed, [nodes[0].node.get_our_node_id()], 1000000);
3747+
check_closed_broadcast!(nodes[1], true);
3748+
3749+
// Now that B has a pending forwarded payment across it with the inbound edge on-chain, claim
3750+
// the payment on C and give B the preimage for it.
3751+
nodes[2].node.claim_funds(payment_preimage);
3752+
check_added_monitors!(nodes[2], 1);
3753+
expect_payment_claimed!(nodes[2], payment_hash, 1_000_000);
3754+
3755+
let updates = get_htlc_update_msgs!(nodes[2], nodes[1].node.get_our_node_id());
3756+
chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
3757+
nodes[1].node.handle_update_fulfill_htlc(nodes[2].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]);
3758+
check_added_monitors!(nodes[1], 1);
3759+
commitment_signed_dance!(nodes[1], nodes[2], updates.commitment_signed, false);
3760+
3761+
// At this point nodes[1] has the preimage and is waiting for the `ChannelMonitorUpdate` for
3762+
// channel A to hit disk. Until it does so, it shouldn't ever let the preimage dissapear from
3763+
// channel B's `ChannelMonitor`
3764+
assert!(get_monitor!(nodes[1], chan_b.2).get_all_current_outbound_htlcs().iter().any(|(_, (_, preimage))| *preimage == Some(payment_preimage)));
3765+
3766+
// Once we complete the `ChannelMonitorUpdate` on channel A, and the `ChannelManager` processes
3767+
// background events (via `get_and_clear_pending_msg_events`), the final `ChannelMonitorUpdate`
3768+
// will fly and we'll drop the preimage from channel B's `ChannelMonitor`. We'll also release
3769+
// the `Event::PaymentForwarded`.
3770+
check_added_monitors!(nodes[1], 0);
3771+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3772+
assert!(nodes[1].node.get_and_clear_pending_events().is_empty());
3773+
3774+
nodes[1].chain_monitor.complete_sole_pending_chan_update(&chan_a.2);
3775+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3776+
check_added_monitors!(nodes[1], 1);
3777+
assert!(!get_monitor!(nodes[1], chan_b.2).get_all_current_outbound_htlcs().iter().any(|(_, (_, preimage))| *preimage == Some(payment_preimage)));
3778+
expect_payment_forwarded!(nodes[1], nodes[0], nodes[2], None, true, false);
3779+
}
3780+
3781+
#[test]
3782+
fn test_claim_to_closed_channel_blocks_claimed_event() {
3783+
// One of the last features for async persistence we implemented was the correct blocking of
3784+
// event(s) until the preimage for a claimed HTLC is durably on disk in a ChannelMonitor for a
3785+
// closed channel.
3786+
// This tests that behavior.
3787+
let chanmon_cfgs = create_chanmon_cfgs(2);
3788+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
3789+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
3790+
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
3791+
3792+
// First open channels, route a payment, and force-close the first hop.
3793+
let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 500_000_000);
3794+
3795+
let (payment_preimage, payment_hash, ..) = route_payment(&nodes[0], &[&nodes[1]], 1_000_000);
3796+
3797+
nodes[0].node.force_close_broadcasting_latest_txn(&chan_a.2, &nodes[1].node.get_our_node_id(), String::new()).unwrap();
3798+
check_added_monitors!(nodes[0], 1);
3799+
let a_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) };
3800+
check_closed_event!(nodes[0], 1, a_reason, [nodes[1].node.get_our_node_id()], 1000000);
3801+
check_closed_broadcast!(nodes[0], true);
3802+
3803+
let as_commit_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3804+
assert_eq!(as_commit_tx.len(), 1);
3805+
3806+
mine_transaction(&nodes[1], &as_commit_tx[0]);
3807+
check_added_monitors!(nodes[1], 1);
3808+
check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed, [nodes[0].node.get_our_node_id()], 1000000);
3809+
check_closed_broadcast!(nodes[1], true);
3810+
3811+
// Now that B has a pending payment with the inbound HTLC on a closed channel, claim the
3812+
// payment on disk, but don't let the `ChannelMonitorUpdate` complete. This should prevent the
3813+
// `Event::PaymentClaimed` from being generated.
3814+
chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
3815+
nodes[1].node.claim_funds(payment_preimage);
3816+
check_added_monitors!(nodes[1], 1);
3817+
assert!(nodes[1].node.get_and_clear_pending_events().is_empty());
3818+
3819+
// Once we complete the `ChannelMonitorUpdate` the `Event::PaymentClaimed` will become
3820+
// available.
3821+
nodes[1].chain_monitor.complete_sole_pending_chan_update(&chan_a.2);
3822+
expect_payment_claimed!(nodes[1], payment_hash, 1_000_000);
3823+
}

lightning/src/ln/channelmanager.rs

Lines changed: 39 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -3922,39 +3922,6 @@ where
39223922
self.close_channel_internal(channel_id, counterparty_node_id, target_feerate_sats_per_1000_weight, shutdown_script)
39233923
}
39243924

3925-
fn set_closed_chan_next_monitor_update_id(
3926-
peer_state: &mut PeerState<SP>, channel_id: ChannelId, monitor_update: &mut ChannelMonitorUpdate,
3927-
) {
3928-
match peer_state.closed_channel_monitor_update_ids.entry(channel_id) {
3929-
btree_map::Entry::Vacant(entry) => {
3930-
let is_closing_unupdated_monitor = monitor_update.update_id == 1
3931-
&& monitor_update.updates.len() == 1
3932-
&& matches!(&monitor_update.updates[0], ChannelMonitorUpdateStep::ChannelForceClosed { .. });
3933-
// If the ChannelMonitorUpdate is closing a channel that never got past initial
3934-
// funding (to have any commitment updates), we'll skip inserting in
3935-
// `locked_close_channel`, allowing us to avoid keeping around the PeerState for
3936-
// that peer. In that specific case we expect no entry in the map here. In any
3937-
// other cases, this is a bug, but in production we go ahead and recover by
3938-
// inserting the update_id and hoping its right.
3939-
debug_assert!(is_closing_unupdated_monitor, "Expected closing monitor against an unused channel, got {:?}", monitor_update);
3940-
if !is_closing_unupdated_monitor {
3941-
entry.insert(monitor_update.update_id);
3942-
}
3943-
},
3944-
btree_map::Entry::Occupied(entry) => {
3945-
// If we're running in a threaded environment its possible we generate updates for
3946-
// a channel that is closing, then apply some preimage update, then go back and
3947-
// apply the close monitor update here. In order to ensure the updates are still
3948-
// well-ordered, we have to use the `closed_channel_monitor_update_ids` map to
3949-
// override the `update_id`, taking care to handle old monitors where the
3950-
// `latest_update_id` is already `u64::MAX`.
3951-
let latest_update_id = entry.into_mut();
3952-
*latest_update_id = latest_update_id.saturating_add(1);
3953-
monitor_update.update_id = *latest_update_id;
3954-
}
3955-
}
3956-
}
3957-
39583925
/// Applies a [`ChannelMonitorUpdate`] which may or may not be for a channel which is closed.
39593926
#[must_use]
39603927
fn apply_post_close_monitor_update(
@@ -7220,38 +7187,53 @@ where
72207187
let mut peer_state = peer_state_opt.expect("peer_state_opt is always Some when the counterparty_node_id is Some");
72217188

72227189
let mut preimage_update = ChannelMonitorUpdate {
7223-
update_id: 0, // set in set_closed_chan_next_monitor_update_id
7224-
counterparty_node_id: prev_hop.counterparty_node_id,
7190+
update_id: 0, // set below
7191+
counterparty_node_id: Some(counterparty_node_id),
72257192
updates: vec![ChannelMonitorUpdateStep::PaymentPreimage {
72267193
payment_preimage,
72277194
payment_info,
72287195
}],
72297196
channel_id: Some(prev_hop.channel_id),
72307197
};
72317198

7232-
// Note that the below is race-y - we set the `update_id` here and then drop the peer_state
7233-
// lock before applying the update in `apply_post_close_monitor_update` (or via the
7234-
// background events pipeline). During that time, some other update could be created and
7235-
// then applied, resultin in `ChannelMonitorUpdate`s being applied out of order and causing
7236-
// a panic.
7237-
Self::set_closed_chan_next_monitor_update_id(&mut *peer_state, prev_hop.channel_id, &mut preimage_update);
7199+
if let Some(latest_update_id) = peer_state.closed_channel_monitor_update_ids.get_mut(&chan_id) {
7200+
*latest_update_id = latest_update_id.saturating_add(1);
7201+
preimage_update.update_id = *latest_update_id;
7202+
} else {
7203+
let err = "We need the latest ChannelMonitorUpdate ID to build a new update.
7204+
This should have been checked for availability on startup but somehow it is no longer available.
7205+
This indicates a bug inside LDK. Please report this error at https://github.com/lightningdevkit/rust-lightning/issues/new";
7206+
log_error!(self.logger, "{}", err);
7207+
panic!("{}", err);
7208+
}
72387209

7239-
mem::drop(peer_state);
7240-
mem::drop(per_peer_state);
7210+
// Note that we do process the completion action here. This totally could be a
7211+
// duplicate claim, but we have no way of knowing without interrogating the
7212+
// `ChannelMonitor` we've provided the above update to. Instead, note that `Event`s are
7213+
// generally always allowed to be duplicative (and it's specifically noted in
7214+
// `PaymentForwarded`).
7215+
let (action_opt, raa_blocker_opt) = completion_action(None, false);
7216+
7217+
if let Some(raa_blocker) = raa_blocker_opt {
7218+
peer_state.actions_blocking_raa_monitor_updates
7219+
.entry(prev_hop.channel_id)
7220+
.or_default()
7221+
.push(raa_blocker);
7222+
}
7223+
7224+
// Given the fact that we're in a bit of a weird edge case, its worth hashing the preimage
7225+
// to include the `payment_hash` in the log metadata here.
7226+
let payment_hash = payment_preimage.into();
7227+
let logger = WithContext::from(&self.logger, Some(counterparty_node_id), Some(chan_id), Some(payment_hash));
72417228

72427229
if !during_init {
7243-
// We update the ChannelMonitor on the backward link, after
7244-
// receiving an `update_fulfill_htlc` from the forward link.
7245-
let update_res = self.apply_post_close_monitor_update(counterparty_node_id, prev_hop.channel_id, prev_hop.funding_txo, preimage_update);
7246-
if update_res != ChannelMonitorUpdateStatus::Completed {
7247-
// TODO: This needs to be handled somehow - if we receive a monitor update
7248-
// with a preimage we *must* somehow manage to propagate it to the upstream
7249-
// channel, or we must have an ability to receive the same event and try
7250-
// again on restart.
7251-
log_error!(WithContext::from(&self.logger, None, Some(prev_hop.channel_id), None),
7252-
"Critical error: failed to update channel monitor with preimage {:?}: {:?}",
7253-
payment_preimage, update_res);
7230+
if let Some(action) = action_opt {
7231+
log_trace!(logger, "Tracking monitor update completion action for closed channel {}: {:?}",
7232+
chan_id, action);
7233+
peer_state.monitor_update_blocked_actions.entry(chan_id).or_insert(Vec::new()).push(action);
72547234
}
7235+
7236+
handle_new_monitor_update!(self, prev_hop.funding_txo, preimage_update, peer_state, peer_state, per_peer_state, logger, chan_id, POST_CHANNEL_CLOSE);
72557237
} else {
72567238
// If we're running during init we cannot update a monitor directly - they probably
72577239
// haven't actually been loaded yet. Instead, push the monitor update as a background
@@ -7265,39 +7247,12 @@ where
72657247
update: preimage_update,
72667248
};
72677249
self.pending_background_events.lock().unwrap().push(event);
7268-
}
72697250

7270-
// Note that we do process the completion action here. This totally could be a
7271-
// duplicate claim, but we have no way of knowing without interrogating the
7272-
// `ChannelMonitor` we've provided the above update to. Instead, note that `Event`s are
7273-
// generally always allowed to be duplicative (and it's specifically noted in
7274-
// `PaymentForwarded`).
7275-
let (action_opt, raa_blocker_opt) = completion_action(None, false);
7276-
7277-
if let Some(raa_blocker) = raa_blocker_opt {
7278-
// TODO: Avoid always blocking the world for the write lock here.
7279-
let mut per_peer_state = self.per_peer_state.write().unwrap();
7280-
let peer_state_mutex = per_peer_state.entry(counterparty_node_id).or_insert_with(||
7281-
Mutex::new(PeerState {
7282-
channel_by_id: new_hash_map(),
7283-
inbound_channel_request_by_id: new_hash_map(),
7284-
latest_features: InitFeatures::empty(),
7285-
pending_msg_events: Vec::new(),
7286-
in_flight_monitor_updates: BTreeMap::new(),
7287-
monitor_update_blocked_actions: BTreeMap::new(),
7288-
actions_blocking_raa_monitor_updates: BTreeMap::new(),
7289-
closed_channel_monitor_update_ids: BTreeMap::new(),
7290-
is_connected: false,
7291-
}));
7292-
let mut peer_state = peer_state_mutex.lock().unwrap();
7251+
mem::drop(peer_state);
7252+
mem::drop(per_peer_state);
72937253

7294-
peer_state.actions_blocking_raa_monitor_updates
7295-
.entry(prev_hop.channel_id)
7296-
.or_default()
7297-
.push(raa_blocker);
7254+
self.handle_monitor_update_completion_actions(action_opt);
72987255
}
7299-
7300-
self.handle_monitor_update_completion_actions(action_opt);
73017256
}
73027257

73037258
fn finalize_claims(&self, sources: Vec<HTLCSource>) {

0 commit comments

Comments
 (0)