Skip to content

Commit 6ef8d30

Browse files
committed
Rewrite InvoicePayer retry to correctly handle MPP partial failures
This rewrites a good chunk of the retry logic in `InvoicePayer` to address two issues: * it was not considering the return value of `send_payment` (and `retry_payment`) may indicate a failure on some paths but not others, * it was not considering that more failures may still come later when removing elements from the retry count map. This could result in us seeing an MPP-partial-failure, failing to retry, removing the retries count entry, and then retrying other parts, potentially forever.
1 parent 06b6016 commit 6ef8d30

File tree

1 file changed

+138
-78
lines changed

1 file changed

+138
-78
lines changed

lightning-invoice/src/payment.rs

Lines changed: 138 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ where
220220
if invoice.amount_milli_satoshis().is_none() {
221221
Err(PaymentError::Invoice("amount missing"))
222222
} else {
223-
self.pay_invoice_internal(invoice, None)
223+
self.pay_invoice_internal(invoice, None, 0)
224224
}
225225
}
226226

@@ -232,59 +232,142 @@ where
232232
if invoice.amount_milli_satoshis().is_some() {
233233
Err(PaymentError::Invoice("amount unexpected"))
234234
} else {
235-
self.pay_invoice_internal(invoice, Some(amount_msats))
235+
self.pay_invoice_internal(invoice, Some(amount_msats), 0)
236236
}
237237
}
238238

239239
fn pay_invoice_internal(
240-
&self, invoice: &Invoice, amount_msats: Option<u64>
240+
&self, invoice: &Invoice, amount_msats: Option<u64>, retry_count: usize
241241
) -> Result<PaymentId, PaymentError> {
242242
debug_assert!(invoice.amount_milli_satoshis().is_some() ^ amount_msats.is_some());
243243
let payment_hash = PaymentHash(invoice.payment_hash().clone().into_inner());
244-
let mut payment_cache = self.payment_cache.lock().unwrap();
245-
match payment_cache.entry(payment_hash) {
246-
hash_map::Entry::Vacant(entry) => {
247-
let payer = self.payer.node_id();
248-
let mut payee = Payee::new(invoice.recover_payee_pub_key())
249-
.with_expiry_time(expiry_time_from_unix_epoch(&invoice).as_secs())
250-
.with_route_hints(invoice.route_hints());
251-
if let Some(features) = invoice.features() {
252-
payee = payee.with_features(features.clone());
253-
}
254-
let params = RouteParameters {
255-
payee,
256-
final_value_msat: invoice.amount_milli_satoshis().or(amount_msats).unwrap(),
257-
final_cltv_expiry_delta: invoice.min_final_cltv_expiry() as u32,
244+
let failed_paths_data = loop {
245+
let mut payment_cache = self.payment_cache.lock().unwrap();
246+
match payment_cache.entry(payment_hash) {
247+
hash_map::Entry::Vacant(entry) => {
248+
let payer = self.payer.node_id();
249+
let mut payee = Payee::new(invoice.recover_payee_pub_key())
250+
.with_expiry_time(expiry_time_from_unix_epoch(&invoice).as_secs())
251+
.with_route_hints(invoice.route_hints());
252+
if let Some(features) = invoice.features() {
253+
payee = payee.with_features(features.clone());
254+
}
255+
let params = RouteParameters {
256+
payee,
257+
final_value_msat: invoice.amount_milli_satoshis().or(amount_msats).unwrap(),
258+
final_cltv_expiry_delta: invoice.min_final_cltv_expiry() as u32,
259+
};
260+
let first_hops = self.payer.first_hops();
261+
let route = self.router.find_route(
262+
&payer,
263+
&params,
264+
Some(&first_hops.iter().collect::<Vec<_>>()),
265+
&self.scorer.lock(),
266+
).map_err(|e| PaymentError::Routing(e))?;
267+
268+
let payment_secret = Some(invoice.payment_secret().clone());
269+
let payment_id = match self.payer.send_payment(&route, payment_hash, &payment_secret) {
270+
Ok(payment_id) => payment_id,
271+
Err(PaymentSendFailure::ParameterError(e)) =>
272+
return Err(PaymentError::Sending(PaymentSendFailure::ParameterError(e))),
273+
Err(PaymentSendFailure::PathParameterError(e)) =>
274+
return Err(PaymentError::Sending(PaymentSendFailure::PathParameterError(e))),
275+
Err(PaymentSendFailure::AllFailedRetrySafe(e)) => {
276+
if retry_count >= self.retry_attempts.0 {
277+
return Err(PaymentError::Sending(PaymentSendFailure::AllFailedRetrySafe(e)))
278+
}
279+
break None;
280+
},
281+
Err(PaymentSendFailure::PartialFailure { results: _, failed_paths_retry, payment_id }) => {
282+
if let Some(retry_data) = failed_paths_retry {
283+
entry.insert(retry_count);
284+
break Some((retry_data, payment_id));
285+
} else {
286+
// This may happen if we send a payment and some paths fail, but
287+
// only due to a temporary monitor failure or the like, implying
288+
// they're really in-flight, but we haven't sent the initial
289+
// HTLC-Add messages yet.
290+
payment_id
291+
}
292+
},
293+
};
294+
entry.insert(retry_count);
295+
return Ok(payment_id);
296+
},
297+
hash_map::Entry::Occupied(_) => return Err(PaymentError::Invoice("payment pending")),
298+
}
299+
};
300+
if let Some((retry, payment_id)) = failed_paths_data {
301+
// Some paths were sent, even if we failed to send the full MPP value our recipient may
302+
// misbehave and claim the funds, at which point we have to consider the payment sent,
303+
// so return `Ok()` here, ignoring any retry errors.
304+
let _ = self.retry_payment(true, payment_id, payment_hash, &retry);
305+
Ok(payment_id)
306+
} else {
307+
self.pay_invoice_internal(invoice, amount_msats, retry_count + 1)
308+
}
309+
}
310+
311+
fn retry_payment(&self, other_paths_pending: bool, payment_id: PaymentId, payment_hash: PaymentHash, params: &RouteParameters)
312+
-> Result<(), ()> {
313+
let route;
314+
{
315+
let mut payment_cache = self.payment_cache.lock().unwrap();
316+
let entry = loop {
317+
let entry = payment_cache.entry(payment_hash);
318+
match entry {
319+
hash_map::Entry::Occupied(_) => break entry,
320+
hash_map::Entry::Vacant(entry) => entry.insert(0),
258321
};
322+
};
323+
if let hash_map::Entry::Occupied(mut entry) = entry {
324+
let max_payment_attempts = self.retry_attempts.0 + 1;
325+
let attempts = entry.get_mut();
326+
*attempts += 1;
327+
328+
if *attempts >= max_payment_attempts {
329+
log_trace!(self.logger, "Payment {} exceeded maximum attempts; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
330+
if !other_paths_pending { entry.remove(); }
331+
return Err(());
332+
} else if has_expired(params) {
333+
log_trace!(self.logger, "Invoice expired for payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
334+
if !other_paths_pending { entry.remove(); }
335+
return Err(());
336+
}
337+
338+
let payer = self.payer.node_id();
259339
let first_hops = self.payer.first_hops();
260-
let route = self.router.find_route(
261-
&payer,
262-
&params,
263-
Some(&first_hops.iter().collect::<Vec<_>>()),
264-
&self.scorer.lock(),
265-
).map_err(|e| PaymentError::Routing(e))?;
266-
267-
let payment_hash = PaymentHash(invoice.payment_hash().clone().into_inner());
268-
let payment_secret = Some(invoice.payment_secret().clone());
269-
let payment_id = self.payer.send_payment(&route, payment_hash, &payment_secret)
270-
.map_err(|e| PaymentError::Sending(e))?;
271-
entry.insert(0);
272-
Ok(payment_id)
273-
},
274-
hash_map::Entry::Occupied(_) => Err(PaymentError::Invoice("payment pending")),
340+
route = self.router.find_route(&payer, &params, Some(&first_hops.iter().collect::<Vec<_>>()), &self.scorer.lock());
341+
if route.is_err() {
342+
log_trace!(self.logger, "Failed to find a route for payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
343+
if !other_paths_pending { entry.remove(); }
344+
return Err(());
345+
}
346+
} else {
347+
unreachable!();
348+
}
275349
}
276-
}
277350

278-
fn retry_payment(
279-
&self, payment_id: PaymentId, params: &RouteParameters
280-
) -> Result<(), PaymentError> {
281-
let payer = self.payer.node_id();
282-
let first_hops = self.payer.first_hops();
283-
let route = self.router.find_route(
284-
&payer, &params, Some(&first_hops.iter().collect::<Vec<_>>()),
285-
&self.scorer.lock()
286-
).map_err(|e| PaymentError::Routing(e))?;
287-
self.payer.retry_payment(&route, payment_id).map_err(|e| PaymentError::Sending(e))
351+
let retry_res = self.payer.retry_payment(&route.unwrap(), payment_id);
352+
match retry_res {
353+
Ok(()) => Ok(()),
354+
Err(PaymentSendFailure::ParameterError(_)) |
355+
Err(PaymentSendFailure::PathParameterError(_)) => {
356+
log_trace!(self.logger, "Failed to retry for payment {} due to bogus route/payment data, not retrying.", log_bytes!(payment_hash.0));
357+
if !other_paths_pending { self.payment_cache.lock().unwrap().remove(&payment_hash); }
358+
return Err(());
359+
},
360+
Err(PaymentSendFailure::AllFailedRetrySafe(_)) => {
361+
self.retry_payment(other_paths_pending, payment_id, payment_hash, params)
362+
},
363+
Err(PaymentSendFailure::PartialFailure { results: _, failed_paths_retry, .. }) => {
364+
if let Some(retry) = failed_paths_retry {
365+
self.retry_payment(true, payment_id, payment_hash, &retry)
366+
} else {
367+
Ok(())
368+
}
369+
},
370+
}
288371
}
289372

290373
/// Removes the payment cached by the given payment hash.
@@ -316,48 +399,25 @@ where
316399
{
317400
fn handle_event(&self, event: &Event) {
318401
match event {
319-
Event::PaymentPathFailed {
320-
payment_id, payment_hash, rejected_by_dest, path, short_channel_id, retry, ..
321-
} => {
402+
Event::PaymentPathFailed { all_paths_failed, payment_id, payment_hash, rejected_by_dest, path, short_channel_id, retry, .. } => {
322403
if let Some(short_channel_id) = short_channel_id {
323404
self.scorer.lock().payment_path_failed(path, *short_channel_id);
324405
}
325406

326-
let mut payment_cache = self.payment_cache.lock().unwrap();
327-
let entry = loop {
328-
let entry = payment_cache.entry(*payment_hash);
329-
match entry {
330-
hash_map::Entry::Occupied(_) => break entry,
331-
hash_map::Entry::Vacant(entry) => entry.insert(0),
332-
};
333-
};
334-
if let hash_map::Entry::Occupied(mut entry) = entry {
335-
let max_payment_attempts = self.retry_attempts.0 + 1;
336-
let attempts = entry.get_mut();
337-
*attempts += 1;
338-
339-
if *rejected_by_dest {
340-
log_trace!(self.logger, "Payment {} rejected by destination; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
341-
} else if payment_id.is_none() {
342-
log_trace!(self.logger, "Payment {} has no id; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
343-
} else if *attempts >= max_payment_attempts {
344-
log_trace!(self.logger, "Payment {} exceeded maximum attempts; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
345-
} else if retry.is_none() {
346-
log_trace!(self.logger, "Payment {} missing retry params; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
347-
} else if has_expired(retry.as_ref().unwrap()) {
348-
log_trace!(self.logger, "Invoice expired for payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
349-
} else if self.retry_payment(*payment_id.as_ref().unwrap(), retry.as_ref().unwrap()).is_err() {
350-
log_trace!(self.logger, "Error retrying payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
351-
} else {
352-
log_trace!(self.logger, "Payment {} failed; retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
407+
if *rejected_by_dest {
408+
log_trace!(self.logger, "Payment {} rejected by destination; not retrying", log_bytes!(payment_hash.0));
409+
if *all_paths_failed { self.payment_cache.lock().unwrap().remove(payment_hash); }
410+
} else if payment_id.is_none() {
411+
log_trace!(self.logger, "Payment {} has no id; not retrying", log_bytes!(payment_hash.0));
412+
if *all_paths_failed { self.payment_cache.lock().unwrap().remove(payment_hash); }
413+
} else if let Some(params) = retry {
414+
if self.retry_payment(!all_paths_failed, payment_id.unwrap(), *payment_hash, params).is_ok() {
415+
// We retried at least somewhat, don't provide the PaymentPathFailed event to the user.
353416
return;
354417
}
355-
356-
// Either the payment was rejected, the maximum attempts were exceeded, or an
357-
// error occurred when attempting to retry.
358-
entry.remove();
359418
} else {
360-
unreachable!();
419+
log_trace!(self.logger, "Payment {} missing retry params; not retrying", log_bytes!(payment_hash.0));
420+
if *all_paths_failed { self.payment_cache.lock().unwrap().remove(payment_hash); }
361421
}
362422
},
363423
Event::PaymentSent { payment_hash, .. } => {

0 commit comments

Comments
 (0)