Skip to content

Commit cb86399

Browse files
committed
Don't pause events for chainsync persistence
We used to wait on ChannelMonitor persistence to avoid duplicate payment events. But this can still happen in cases where ChannelMonitor handed the event to ChannelManager and we did not persist ChannelManager after event handling. It is expected to receive payment duplicate events and clients should handle these events in an idempotent manner. Removing this hold-up of events simplifies the logic and makes it easier to not persist ChannelMonitors on every block connect.
1 parent 9a438ee commit cb86399

File tree

2 files changed

+32
-135
lines changed

2 files changed

+32
-135
lines changed

lightning/src/chain/chainmonitor.rs

Lines changed: 14 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ use bitcoin::hash_types::{Txid, BlockHash};
2929
use crate::chain;
3030
use crate::chain::{ChannelMonitorUpdateStatus, Filter, WatchedOutput};
3131
use crate::chain::chaininterface::{BroadcasterInterface, FeeEstimator};
32-
use crate::chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, Balance, MonitorEvent, TransactionOutputs, WithChannelMonitor, LATENCY_GRACE_PERIOD_BLOCKS};
32+
use crate::chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, Balance, MonitorEvent, TransactionOutputs, WithChannelMonitor};
3333
use crate::chain::transaction::{OutPoint, TransactionData};
3434
use crate::ln::ChannelId;
3535
use crate::sign::ecdsa::WriteableEcdsaChannelSigner;
@@ -209,24 +209,15 @@ struct MonitorHolder<ChannelSigner: WriteableEcdsaChannelSigner> {
209209
/// update_persisted_channel, the user returns a
210210
/// [`ChannelMonitorUpdateStatus::InProgress`], and then calls channel_monitor_updated
211211
/// immediately, racing our insertion of the pending update into the contained Vec.
212-
///
213-
/// Beyond the synchronization of updates themselves, we cannot handle user events until after
214-
/// any chain updates have been stored on disk. Thus, we scan this list when returning updates
215-
/// to the ChannelManager, refusing to return any updates for a ChannelMonitor which is still
216-
/// being persisted fully to disk after a chain update.
217-
///
218-
/// This avoids the possibility of handling, e.g. an on-chain claim, generating a claim monitor
219-
/// event, resulting in the relevant ChannelManager generating a PaymentSent event and dropping
220-
/// the pending payment entry, and then reloading before the monitor is persisted, resulting in
221-
/// the ChannelManager re-adding the same payment entry, before the same block is replayed,
222-
/// resulting in a duplicate PaymentSent event.
223212
pending_monitor_updates: Mutex<Vec<MonitorUpdateId>>,
224213
/// The last block height at which no [`UpdateOrigin::ChainSync`] monitor updates were present
225214
/// in `pending_monitor_updates`.
226215
/// If it's been more than [`LATENCY_GRACE_PERIOD_BLOCKS`] since we started waiting on a chain
227216
/// sync event, we let monitor events return to `ChannelManager` because we cannot hold them up
228217
/// forever or we'll end up with HTLC preimages waiting to feed back into an upstream channel
229218
/// forever, risking funds loss.
219+
///
220+
/// [`LATENCY_GRACE_PERIOD_BLOCKS`]: crate::util::ser::Writeable::write
230221
last_chain_persist_height: AtomicUsize,
231222
}
232223

@@ -393,7 +384,7 @@ where C::Target: chain::Filter,
393384
chain_sync_update_id
394385
),
395386
ChannelMonitorUpdateStatus::InProgress => {
396-
log_debug!(logger, "Channel Monitor sync for channel {} in progress, holding events until completion!", log_funding_info!(monitor));
387+
log_debug!(logger, "Channel Monitor sync for channel {} in progress.", log_funding_info!(monitor));
397388
pending_monitor_updates.push(update_id);
398389
},
399390
ChannelMonitorUpdateStatus::UnrecoverableError => {
@@ -924,21 +915,12 @@ where C::Target: chain::Filter,
924915
fn release_pending_monitor_events(&self) -> Vec<(OutPoint, ChannelId, Vec<MonitorEvent>, Option<PublicKey>)> {
925916
let mut pending_monitor_events = self.pending_monitor_events.lock().unwrap().split_off(0);
926917
for monitor_state in self.monitors.read().unwrap().values() {
927-
let logger = WithChannelMonitor::from(&self.logger, &monitor_state.monitor);
928-
let is_pending_monitor_update = monitor_state.has_pending_chainsync_updates(&monitor_state.pending_monitor_updates.lock().unwrap());
929-
if !is_pending_monitor_update || monitor_state.last_chain_persist_height.load(Ordering::Acquire) + LATENCY_GRACE_PERIOD_BLOCKS as usize <= self.highest_chain_height.load(Ordering::Acquire) {
930-
if is_pending_monitor_update {
931-
log_error!(logger, "A ChannelMonitor sync took longer than {} blocks to complete.", LATENCY_GRACE_PERIOD_BLOCKS);
932-
log_error!(logger, " To avoid funds-loss, we are allowing monitor updates to be released.");
933-
log_error!(logger, " This may cause duplicate payment events to be generated.");
934-
}
935-
let monitor_events = monitor_state.monitor.get_and_clear_pending_monitor_events();
936-
if monitor_events.len() > 0 {
937-
let monitor_outpoint = monitor_state.monitor.get_funding_txo().0;
938-
let monitor_channel_id = monitor_state.monitor.channel_id();
939-
let counterparty_node_id = monitor_state.monitor.get_counterparty_node_id();
940-
pending_monitor_events.push((monitor_outpoint, monitor_channel_id, monitor_events, counterparty_node_id));
941-
}
918+
let monitor_events = monitor_state.monitor.get_and_clear_pending_monitor_events();
919+
if monitor_events.len() > 0 {
920+
let monitor_outpoint = monitor_state.monitor.get_funding_txo().0;
921+
let monitor_channel_id = monitor_state.monitor.channel_id();
922+
let counterparty_node_id = monitor_state.monitor.get_counterparty_node_id();
923+
pending_monitor_events.push((monitor_outpoint, monitor_channel_id, monitor_events, counterparty_node_id));
942924
}
943925
}
944926
pending_monitor_events
@@ -975,15 +957,12 @@ impl<ChannelSigner: WriteableEcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L
975957
#[cfg(test)]
976958
mod tests {
977959
use crate::check_added_monitors;
978-
use crate::{expect_payment_claimed, expect_payment_path_successful, get_event_msg};
979-
use crate::{get_htlc_update_msgs, get_local_commitment_txn, get_revoke_commit_msgs, get_route_and_payment_hash, unwrap_send_err};
980-
use crate::chain::{ChannelMonitorUpdateStatus, Confirm, Watch};
981-
use crate::chain::channelmonitor::LATENCY_GRACE_PERIOD_BLOCKS;
960+
use crate::{expect_payment_path_successful, get_event_msg};
961+
use crate::{get_htlc_update_msgs, get_revoke_commit_msgs};
962+
use crate::chain::{ChannelMonitorUpdateStatus, Watch};
982963
use crate::events::{Event, MessageSendEvent, MessageSendEventsProvider};
983-
use crate::ln::channelmanager::{PaymentSendFailure, PaymentId, RecipientOnionFields};
984964
use crate::ln::functional_test_utils::*;
985965
use crate::ln::msgs::ChannelMessageHandler;
986-
use crate::util::errors::APIError;
987966

988967
#[test]
989968
fn test_async_ooo_offchain_updates() {
@@ -1090,76 +1069,6 @@ mod tests {
10901069
check_added_monitors!(nodes[0], 1);
10911070
}
10921071

1093-
fn do_chainsync_pauses_events(block_timeout: bool) {
1094-
// When a chainsync monitor update occurs, any MonitorUpdates should be held before being
1095-
// passed upstream to a `ChannelManager` via `Watch::release_pending_monitor_events`. This
1096-
// tests that behavior, as well as some ways it might go wrong.
1097-
let chanmon_cfgs = create_chanmon_cfgs(2);
1098-
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
1099-
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
1100-
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
1101-
let channel = create_announced_chan_between_nodes(&nodes, 0, 1);
1102-
1103-
// Get a route for later and rebalance the channel somewhat
1104-
send_payment(&nodes[0], &[&nodes[1]], 10_000_000);
1105-
let (route, second_payment_hash, _, second_payment_secret) = get_route_and_payment_hash!(nodes[0], nodes[1], 100_000);
1106-
1107-
// First route a payment that we will claim on chain and give the recipient the preimage.
1108-
let (payment_preimage, payment_hash, ..) = route_payment(&nodes[0], &[&nodes[1]], 1_000_000);
1109-
nodes[1].node.claim_funds(payment_preimage);
1110-
expect_payment_claimed!(nodes[1], payment_hash, 1_000_000);
1111-
nodes[1].node.get_and_clear_pending_msg_events();
1112-
check_added_monitors!(nodes[1], 1);
1113-
let remote_txn = get_local_commitment_txn!(nodes[1], channel.2);
1114-
assert_eq!(remote_txn.len(), 2);
1115-
1116-
// Temp-fail the block connection which will hold the channel-closed event
1117-
chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
1118-
chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
1119-
1120-
// Connect B's commitment transaction, but only to the ChainMonitor/ChannelMonitor. The
1121-
// channel is now closed, but the ChannelManager doesn't know that yet.
1122-
let new_header = create_dummy_header(nodes[0].best_block_info().0, 0);
1123-
nodes[0].chain_monitor.chain_monitor.transactions_confirmed(&new_header,
1124-
&[(0, &remote_txn[0]), (1, &remote_txn[1])], nodes[0].best_block_info().1 + 1);
1125-
assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty());
1126-
nodes[0].chain_monitor.chain_monitor.best_block_updated(&new_header, nodes[0].best_block_info().1 + 1);
1127-
assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty());
1128-
1129-
// If the ChannelManager tries to update the channel, however, the ChainMonitor will pass
1130-
// the update through to the ChannelMonitor which will refuse it (as the channel is closed).
1131-
chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::Completed);
1132-
unwrap_send_err!(nodes[0].node.send_payment_with_route(&route, second_payment_hash,
1133-
RecipientOnionFields::secret_only(second_payment_secret), PaymentId(second_payment_hash.0)
1134-
), false, APIError::MonitorUpdateInProgress, {});
1135-
check_added_monitors!(nodes[0], 1);
1136-
1137-
// However, as the ChainMonitor is still waiting for the original persistence to complete,
1138-
// it won't yet release the MonitorEvents.
1139-
assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty());
1140-
1141-
if block_timeout {
1142-
// After three blocks, pending MontiorEvents should be released either way.
1143-
let latest_header = create_dummy_header(nodes[0].best_block_info().0, 0);
1144-
nodes[0].chain_monitor.chain_monitor.best_block_updated(&latest_header, nodes[0].best_block_info().1 + LATENCY_GRACE_PERIOD_BLOCKS);
1145-
} else {
1146-
let persistences = chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clone();
1147-
for (funding_outpoint, update_ids) in persistences {
1148-
for update_id in update_ids {
1149-
nodes[0].chain_monitor.chain_monitor.channel_monitor_updated(funding_outpoint, update_id).unwrap();
1150-
}
1151-
}
1152-
}
1153-
1154-
expect_payment_sent(&nodes[0], payment_preimage, None, true, false);
1155-
}
1156-
1157-
#[test]
1158-
fn chainsync_pauses_events() {
1159-
do_chainsync_pauses_events(false);
1160-
do_chainsync_pauses_events(true);
1161-
}
1162-
11631072
#[test]
11641073
#[cfg(feature = "std")]
11651074
fn update_during_chainsync_poisons_channel() {
@@ -1182,3 +1091,4 @@ mod tests {
11821091
}).is_err());
11831092
}
11841093
}
1094+

lightning/src/ln/payment_tests.rs

Lines changed: 18 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,9 @@
1111
//! serialization ordering between ChannelManager/ChannelMonitors and ensuring we can still retry
1212
//! payments thereafter.
1313
14-
use crate::chain::{ChannelMonitorUpdateStatus, Confirm, Listen, Watch};
14+
use crate::chain::{ChannelMonitorUpdateStatus, Confirm, Listen};
1515
use crate::chain::channelmonitor::{ANTI_REORG_DELAY, HTLC_FAIL_BACK_BUFFER, LATENCY_GRACE_PERIOD_BLOCKS};
1616
use crate::sign::EntropySource;
17-
use crate::chain::transaction::OutPoint;
1817
use crate::events::{ClosureReason, Event, HTLCDestination, MessageSendEvent, MessageSendEventsProvider, PathFailure, PaymentFailureReason, PaymentPurpose};
1918
use crate::ln::channel::{EXPIRE_PREV_CONFIG_TICKS, commit_tx_fee_msat, get_holder_selected_channel_reserve_satoshis, ANCHOR_OUTPUT_VALUE_SATOSHI};
2019
use crate::ln::channelmanager::{BREAKDOWN_TIMEOUT, MPP_TIMEOUT_TICKS, MIN_CLTV_EXPIRY_DELTA, PaymentId, PaymentSendFailure, RecentPaymentDetails, RecipientOnionFields, HTLCForwardInfo, PendingHTLCRouting, PendingAddHTLCInfo};
@@ -1030,16 +1029,15 @@ fn test_completed_payment_not_retryable_on_reload() {
10301029
do_test_completed_payment_not_retryable_on_reload(false);
10311030
}
10321031

1033-
1034-
fn do_test_dup_htlc_onchain_fails_on_reload(persist_manager_post_event: bool, confirm_commitment_tx: bool, payment_timeout: bool) {
1032+
fn do_test_dup_htlc_onchain_doesnt_fail_on_reload(persist_manager_post_event: bool, confirm_commitment_tx: bool, payment_timeout: bool) {
10351033
// When a Channel is closed, any outbound HTLCs which were relayed through it are simply
1036-
// dropped when the Channel is. From there, the ChannelManager relies on the ChannelMonitor
1037-
// having a copy of the relevant fail-/claim-back data and processes the HTLC fail/claim when
1038-
// the ChannelMonitor tells it to.
1034+
// dropped. From there, the ChannelManager relies on the ChannelMonitor having a copy of the
1035+
// relevant fail-/claim-back data and processes the HTLC fail/claim when the ChannelMonitor tells
1036+
// it to.
10391037
//
1040-
// If, due to an on-chain event, an HTLC is failed/claimed, we should avoid providing the
1041-
// ChannelManager the HTLC event until after the monitor is re-persisted. This should prevent a
1042-
// duplicate HTLC fail/claim (e.g. via a PaymentPathFailed event).
1038+
// If, due to an on-chain event, an HTLC is failed/claimed, we provide the
1039+
// ChannelManager with the HTLC event without waiting for ChannelMonitor persistence.
1040+
// This might generate duplicate HTLC fail/claim (e.g. via a PaymentPathFailed event) on reload.
10431041
let chanmon_cfgs = create_chanmon_cfgs(2);
10441042
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
10451043
let persister;
@@ -1112,14 +1110,9 @@ fn do_test_dup_htlc_onchain_fails_on_reload(persist_manager_post_event: bool, co
11121110
connect_block(&nodes[0], &claim_block);
11131111
}
11141112

1115-
let funding_txo = OutPoint { txid: funding_tx.txid(), index: 0 };
1116-
let mon_updates: Vec<_> = chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap()
1117-
.get_mut(&funding_txo).unwrap().drain().collect();
1118-
// If we are using chain::Confirm instead of chain::Listen, we will get the same update twice.
1119-
// If we're testing connection idempotency we may get substantially more.
1120-
assert!(mon_updates.len() >= 1);
1121-
assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty());
1122-
assert!(nodes[0].node.get_and_clear_pending_events().is_empty());
1113+
// Note that we skip persisting ChannelMonitors. We should still be generating the payment sent
1114+
// event without ChannelMonitor persistence. If we reset to a previous state on reload, the block
1115+
// should be replayed and we'll regenerate the event.
11231116

11241117
// If we persist the ChannelManager here, we should get the PaymentSent event after
11251118
// deserialization.
@@ -1128,13 +1121,7 @@ fn do_test_dup_htlc_onchain_fails_on_reload(persist_manager_post_event: bool, co
11281121
chan_manager_serialized = nodes[0].node.encode();
11291122
}
11301123

1131-
// Now persist the ChannelMonitor and inform the ChainMonitor that we're done, generating the
1132-
// payment sent event.
1133-
chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::Completed);
11341124
let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode();
1135-
for update in mon_updates {
1136-
nodes[0].chain_monitor.chain_monitor.channel_monitor_updated(funding_txo, update).unwrap();
1137-
}
11381125
if payment_timeout {
11391126
expect_payment_failed!(nodes[0], payment_hash, false);
11401127
} else {
@@ -1168,13 +1155,13 @@ fn do_test_dup_htlc_onchain_fails_on_reload(persist_manager_post_event: bool, co
11681155
}
11691156

11701157
#[test]
1171-
fn test_dup_htlc_onchain_fails_on_reload() {
1172-
do_test_dup_htlc_onchain_fails_on_reload(true, true, true);
1173-
do_test_dup_htlc_onchain_fails_on_reload(true, true, false);
1174-
do_test_dup_htlc_onchain_fails_on_reload(true, false, false);
1175-
do_test_dup_htlc_onchain_fails_on_reload(false, true, true);
1176-
do_test_dup_htlc_onchain_fails_on_reload(false, true, false);
1177-
do_test_dup_htlc_onchain_fails_on_reload(false, false, false);
1158+
fn test_dup_htlc_onchain_doesnt_fail_on_reload() {
1159+
do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, true, true);
1160+
do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, true, false);
1161+
do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, false, false);
1162+
do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, true, true);
1163+
do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, true, false);
1164+
do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, false, false);
11781165
}
11791166

11801167
#[test]

0 commit comments

Comments
 (0)