Skip to content

Commit 60b4baf

Browse files
committed
Rewrite InvoicePayer retry to correctly handle MPP partial failures
This rewrites a good chunk of the retry logic in `InvoicePayer` to address two issues: * it was not considering the return value of `send_payment` (and `retry_payment`) may indicate a failure on some paths but not others, * it was not considering that more failures may still come later when removing elements from the retry count map. This could result in us seeing an MPP-partial-failure, failing to retry, removing the retries count entry, and then retrying other parts, potentially forever.
1 parent 06b6016 commit 60b4baf

File tree

1 file changed

+137
-78
lines changed

1 file changed

+137
-78
lines changed

lightning-invoice/src/payment.rs

Lines changed: 137 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ where
220220
if invoice.amount_milli_satoshis().is_none() {
221221
Err(PaymentError::Invoice("amount missing"))
222222
} else {
223-
self.pay_invoice_internal(invoice, None)
223+
self.pay_invoice_internal(invoice, None, 0)
224224
}
225225
}
226226

@@ -232,59 +232,141 @@ where
232232
if invoice.amount_milli_satoshis().is_some() {
233233
Err(PaymentError::Invoice("amount unexpected"))
234234
} else {
235-
self.pay_invoice_internal(invoice, Some(amount_msats))
235+
self.pay_invoice_internal(invoice, Some(amount_msats), 0)
236236
}
237237
}
238238

239239
fn pay_invoice_internal(
240-
&self, invoice: &Invoice, amount_msats: Option<u64>
240+
&self, invoice: &Invoice, amount_msats: Option<u64>, retry_count: usize
241241
) -> Result<PaymentId, PaymentError> {
242242
debug_assert!(invoice.amount_milli_satoshis().is_some() ^ amount_msats.is_some());
243243
let payment_hash = PaymentHash(invoice.payment_hash().clone().into_inner());
244-
let mut payment_cache = self.payment_cache.lock().unwrap();
245-
match payment_cache.entry(payment_hash) {
246-
hash_map::Entry::Vacant(entry) => {
247-
let payer = self.payer.node_id();
248-
let mut payee = Payee::new(invoice.recover_payee_pub_key())
249-
.with_expiry_time(expiry_time_from_unix_epoch(&invoice).as_secs())
250-
.with_route_hints(invoice.route_hints());
251-
if let Some(features) = invoice.features() {
252-
payee = payee.with_features(features.clone());
253-
}
254-
let params = RouteParameters {
255-
payee,
256-
final_value_msat: invoice.amount_milli_satoshis().or(amount_msats).unwrap(),
257-
final_cltv_expiry_delta: invoice.min_final_cltv_expiry() as u32,
244+
let failed_paths_data = loop {
245+
let mut payment_cache = self.payment_cache.lock().unwrap();
246+
match payment_cache.entry(payment_hash) {
247+
hash_map::Entry::Vacant(entry) => {
248+
let payer = self.payer.node_id();
249+
let mut payee = Payee::new(invoice.recover_payee_pub_key())
250+
.with_expiry_time(expiry_time_from_unix_epoch(&invoice).as_secs())
251+
.with_route_hints(invoice.route_hints());
252+
if let Some(features) = invoice.features() {
253+
payee = payee.with_features(features.clone());
254+
}
255+
let params = RouteParameters {
256+
payee,
257+
final_value_msat: invoice.amount_milli_satoshis().or(amount_msats).unwrap(),
258+
final_cltv_expiry_delta: invoice.min_final_cltv_expiry() as u32,
259+
};
260+
let first_hops = self.payer.first_hops();
261+
let route = self.router.find_route(
262+
&payer,
263+
&params,
264+
Some(&first_hops.iter().collect::<Vec<_>>()),
265+
).map_err(|e| PaymentError::Routing(e))?;
266+
267+
let payment_secret = Some(invoice.payment_secret().clone());
268+
let payment_id = match self.payer.send_payment(&route, payment_hash, &payment_secret) {
269+
Ok(payment_id) => payment_id,
270+
Err(PaymentSendFailure::ParameterError(e)) =>
271+
return Err(PaymentError::Sending(PaymentSendFailure::ParameterError(e))),
272+
Err(PaymentSendFailure::PathParameterError(e)) =>
273+
return Err(PaymentError::Sending(PaymentSendFailure::PathParameterError(e))),
274+
Err(PaymentSendFailure::AllFailedRetrySafe(e)) => {
275+
if retry_count >= self.retry_attempts.0 {
276+
return Err(PaymentError::Sending(PaymentSendFailure::AllFailedRetrySafe(e)))
277+
}
278+
break None;
279+
},
280+
Err(PaymentSendFailure::PartialFailure { results: _, failed_paths_retry, payment_id }) => {
281+
if let Some(retry_data) = failed_paths_retry {
282+
entry.insert(retry_count);
283+
break Some((retry_data, payment_id));
284+
} else {
285+
// This may happen if we send a payment and some paths fail, but
286+
// only due to a temporary monitor failure or the like, implying
287+
// they're really in-flight, but we haven't sent the initial
288+
// HTLC-Add messages yet.
289+
payment_id
290+
}
291+
},
292+
};
293+
entry.insert(retry_count);
294+
return Ok(payment_id);
295+
},
296+
hash_map::Entry::Occupied(_) => return Err(PaymentError::Invoice("payment pending")),
297+
}
298+
};
299+
if let Some((retry, payment_id)) = failed_paths_data {
300+
// Some paths were sent, even if we failed to send the full MPP value our recipient may
301+
// misbehave and claim the funds, at which point we have to consider the payment sent,
302+
// so return `Ok()` here, ignoring any retry errors.
303+
let _ = self.retry_payment(true, payment_id, payment_hash, &retry);
304+
Ok(payment_id)
305+
} else {
306+
self.pay_invoice_internal(invoice, amount_msats, retry_count + 1)
307+
}
308+
}
309+
310+
fn retry_payment(&self, other_paths_pending: bool, payment_id: PaymentId, payment_hash: PaymentHash, params: &RouteParameters)
311+
-> Result<(), ()> {
312+
let route;
313+
{
314+
let mut payment_cache = self.payment_cache.lock().unwrap();
315+
let entry = loop {
316+
let entry = payment_cache.entry(payment_hash);
317+
match entry {
318+
hash_map::Entry::Occupied(_) => break entry,
319+
hash_map::Entry::Vacant(entry) => entry.insert(0),
258320
};
321+
};
322+
if let hash_map::Entry::Occupied(mut entry) = entry {
323+
let max_payment_attempts = self.retry_attempts.0 + 1;
324+
let attempts = entry.get_mut();
325+
*attempts += 1;
326+
327+
if *attempts >= max_payment_attempts {
328+
log_trace!(self.logger, "Payment {} exceeded maximum attempts; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
329+
if !other_paths_pending { entry.remove(); }
330+
return Err(());
331+
} else if has_expired(params) {
332+
log_trace!(self.logger, "Invoice expired for payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
333+
if !other_paths_pending { entry.remove(); }
334+
return Err(());
335+
}
336+
337+
let payer = self.payer.node_id();
259338
let first_hops = self.payer.first_hops();
260-
let route = self.router.find_route(
261-
&payer,
262-
&params,
263-
Some(&first_hops.iter().collect::<Vec<_>>()),
264-
&self.scorer.lock(),
265-
).map_err(|e| PaymentError::Routing(e))?;
266-
267-
let payment_hash = PaymentHash(invoice.payment_hash().clone().into_inner());
268-
let payment_secret = Some(invoice.payment_secret().clone());
269-
let payment_id = self.payer.send_payment(&route, payment_hash, &payment_secret)
270-
.map_err(|e| PaymentError::Sending(e))?;
271-
entry.insert(0);
272-
Ok(payment_id)
273-
},
274-
hash_map::Entry::Occupied(_) => Err(PaymentError::Invoice("payment pending")),
339+
route = self.router.find_route(&payer, &params, Some(&first_hops.iter().collect::<Vec<_>>()), &self.scorer.lock());
340+
if route.is_err() {
341+
log_trace!(self.logger, "Failed to find a route for payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
342+
if !other_paths_pending { entry.remove(); }
343+
return Err(());
344+
}
345+
} else {
346+
unreachable!();
347+
}
275348
}
276-
}
277349

278-
fn retry_payment(
279-
&self, payment_id: PaymentId, params: &RouteParameters
280-
) -> Result<(), PaymentError> {
281-
let payer = self.payer.node_id();
282-
let first_hops = self.payer.first_hops();
283-
let route = self.router.find_route(
284-
&payer, &params, Some(&first_hops.iter().collect::<Vec<_>>()),
285-
&self.scorer.lock()
286-
).map_err(|e| PaymentError::Routing(e))?;
287-
self.payer.retry_payment(&route, payment_id).map_err(|e| PaymentError::Sending(e))
350+
let retry_res = self.payer.retry_payment(&route.unwrap(), payment_id);
351+
match retry_res {
352+
Ok(()) => Ok(()),
353+
Err(PaymentSendFailure::ParameterError(_)) |
354+
Err(PaymentSendFailure::PathParameterError(_)) => {
355+
log_trace!(self.logger, "Failed to retry for payment {} due to bogus route/payment data, not retrying.", log_bytes!(payment_hash.0));
356+
if !other_paths_pending { self.payment_cache.lock().unwrap().remove(&payment_hash); }
357+
return Err(());
358+
},
359+
Err(PaymentSendFailure::AllFailedRetrySafe(_)) => {
360+
self.retry_payment(other_paths_pending, payment_id, payment_hash, params)
361+
},
362+
Err(PaymentSendFailure::PartialFailure { results: _, failed_paths_retry, .. }) => {
363+
if let Some(retry) = failed_paths_retry {
364+
self.retry_payment(true, payment_id, payment_hash, &retry)
365+
} else {
366+
Ok(())
367+
}
368+
},
369+
}
288370
}
289371

290372
/// Removes the payment cached by the given payment hash.
@@ -316,48 +398,25 @@ where
316398
{
317399
fn handle_event(&self, event: &Event) {
318400
match event {
319-
Event::PaymentPathFailed {
320-
payment_id, payment_hash, rejected_by_dest, path, short_channel_id, retry, ..
321-
} => {
401+
Event::PaymentPathFailed { all_paths_failed, payment_id, payment_hash, rejected_by_dest, path, short_channel_id, retry, .. } => {
322402
if let Some(short_channel_id) = short_channel_id {
323403
self.scorer.lock().payment_path_failed(path, *short_channel_id);
324404
}
325405

326-
let mut payment_cache = self.payment_cache.lock().unwrap();
327-
let entry = loop {
328-
let entry = payment_cache.entry(*payment_hash);
329-
match entry {
330-
hash_map::Entry::Occupied(_) => break entry,
331-
hash_map::Entry::Vacant(entry) => entry.insert(0),
332-
};
333-
};
334-
if let hash_map::Entry::Occupied(mut entry) = entry {
335-
let max_payment_attempts = self.retry_attempts.0 + 1;
336-
let attempts = entry.get_mut();
337-
*attempts += 1;
338-
339-
if *rejected_by_dest {
340-
log_trace!(self.logger, "Payment {} rejected by destination; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
341-
} else if payment_id.is_none() {
342-
log_trace!(self.logger, "Payment {} has no id; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
343-
} else if *attempts >= max_payment_attempts {
344-
log_trace!(self.logger, "Payment {} exceeded maximum attempts; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
345-
} else if retry.is_none() {
346-
log_trace!(self.logger, "Payment {} missing retry params; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
347-
} else if has_expired(retry.as_ref().unwrap()) {
348-
log_trace!(self.logger, "Invoice expired for payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
349-
} else if self.retry_payment(*payment_id.as_ref().unwrap(), retry.as_ref().unwrap()).is_err() {
350-
log_trace!(self.logger, "Error retrying payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
351-
} else {
352-
log_trace!(self.logger, "Payment {} failed; retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
406+
if *rejected_by_dest {
407+
log_trace!(self.logger, "Payment {} rejected by destination; not retrying", log_bytes!(payment_hash.0));
408+
if *all_paths_failed { self.payment_cache.lock().unwrap().remove(payment_hash); }
409+
} else if payment_id.is_none() {
410+
log_trace!(self.logger, "Payment {} has no id; not retrying", log_bytes!(payment_hash.0));
411+
if *all_paths_failed { self.payment_cache.lock().unwrap().remove(payment_hash); }
412+
} else if let Some(params) = retry {
413+
if self.retry_payment(!all_paths_failed, payment_id.unwrap(), *payment_hash, params).is_ok() {
414+
// We retried at least somewhat, don't provide the PaymentPathFailed event to the user.
353415
return;
354416
}
355-
356-
// Either the payment was rejected, the maximum attempts were exceeded, or an
357-
// error occurred when attempting to retry.
358-
entry.remove();
359417
} else {
360-
unreachable!();
418+
log_trace!(self.logger, "Payment {} missing retry params; not retrying", log_bytes!(payment_hash.0));
419+
if *all_paths_failed { self.payment_cache.lock().unwrap().remove(payment_hash); }
361420
}
362421
},
363422
Event::PaymentSent { payment_hash, .. } => {

0 commit comments

Comments
 (0)