Skip to content

Commit 7112661

Browse files
committed
Rewrite InvoicePayer retry to correctly handle MPP partial failures
This rewrites a good chunk of the retry logic in `InvoicePayer` to address two issues: * it was not considering the return value of `send_payment` (and `retry_payment`) may indicate a failure on some paths but not others, * it was not considering that more failures may still come later when removing elements from the retry count map. This could result in us seeing an MPP-partial-failure, failing to retry, removing the retries count entry, and then retrying other parts, potentially forever.
1 parent e9e8a7d commit 7112661

File tree

1 file changed

+132
-76
lines changed

1 file changed

+132
-76
lines changed

lightning-invoice/src/payment.rs

Lines changed: 132 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ where
228228
if invoice.amount_milli_satoshis().is_none() {
229229
Err(PaymentError::Invoice("amount missing"))
230230
} else {
231-
self.pay_invoice_internal(invoice, None)
231+
self.pay_invoice_internal(invoice, None, 0)
232232
}
233233
}
234234

@@ -244,59 +244,138 @@ where
244244
if invoice.amount_milli_satoshis().is_some() {
245245
Err(PaymentError::Invoice("amount unexpected"))
246246
} else {
247-
self.pay_invoice_internal(invoice, Some(amount_msats))
247+
self.pay_invoice_internal(invoice, Some(amount_msats), 0)
248248
}
249249
}
250250

251251
fn pay_invoice_internal(
252-
&self, invoice: &Invoice, amount_msats: Option<u64>
252+
&self, invoice: &Invoice, amount_msats: Option<u64>, retry_count: usize
253253
) -> Result<PaymentId, PaymentError> {
254254
debug_assert!(invoice.amount_milli_satoshis().is_some() ^ amount_msats.is_some());
255255
let payment_hash = PaymentHash(invoice.payment_hash().clone().into_inner());
256-
let mut payment_cache = self.payment_cache.lock().unwrap();
257-
match payment_cache.entry(payment_hash) {
258-
hash_map::Entry::Vacant(entry) => {
259-
let payer = self.payer.node_id();
260-
let mut payee = Payee::new(invoice.recover_payee_pub_key())
261-
.with_expiry_time(expiry_time_from_unix_epoch(&invoice).as_secs())
262-
.with_route_hints(invoice.route_hints());
263-
if let Some(features) = invoice.features() {
264-
payee = payee.with_features(features.clone());
265-
}
266-
let params = RouteParameters {
267-
payee,
268-
final_value_msat: invoice.amount_milli_satoshis().or(amount_msats).unwrap(),
269-
final_cltv_expiry_delta: invoice.min_final_cltv_expiry() as u32,
256+
let retry_data_payment_id = loop {
257+
let mut payment_cache = self.payment_cache.lock().unwrap();
258+
match payment_cache.entry(payment_hash) {
259+
hash_map::Entry::Vacant(entry) => {
260+
let payer = self.payer.node_id();
261+
let mut payee = Payee::new(invoice.recover_payee_pub_key())
262+
.with_expiry_time(expiry_time_from_unix_epoch(&invoice).as_secs())
263+
.with_route_hints(invoice.route_hints());
264+
if let Some(features) = invoice.features() {
265+
payee = payee.with_features(features.clone());
266+
}
267+
let params = RouteParameters {
268+
payee,
269+
final_value_msat: invoice.amount_milli_satoshis().or(amount_msats).unwrap(),
270+
final_cltv_expiry_delta: invoice.min_final_cltv_expiry() as u32,
271+
};
272+
let first_hops = self.payer.first_hops();
273+
let route = self.router.find_route(
274+
&payer,
275+
&params,
276+
Some(&first_hops.iter().collect::<Vec<_>>()),
277+
&self.scorer.lock(),
278+
).map_err(|e| PaymentError::Routing(e))?;
279+
280+
let payment_secret = Some(invoice.payment_secret().clone());
281+
let payment_id = match self.payer.send_payment(&route, payment_hash, &payment_secret) {
282+
Ok(payment_id) => payment_id,
283+
Err(PaymentSendFailure::ParameterError(e)) =>
284+
return Err(PaymentError::Sending(PaymentSendFailure::ParameterError(e))),
285+
Err(PaymentSendFailure::PathParameterError(e)) =>
286+
return Err(PaymentError::Sending(PaymentSendFailure::PathParameterError(e))),
287+
Err(PaymentSendFailure::AllFailedRetrySafe(e)) => {
288+
if retry_count >= self.retry_attempts.0 {
289+
return Err(PaymentError::Sending(PaymentSendFailure::AllFailedRetrySafe(e)))
290+
}
291+
break None;
292+
},
293+
Err(PaymentSendFailure::PartialFailure { results: _, failed_paths_retry, payment_id }) => {
294+
if let Some(retry_data) = failed_paths_retry {
295+
entry.insert(retry_count);
296+
break Some((retry_data, payment_id));
297+
} else {
298+
// This may happen if we send a payment and some paths fail, but
299+
// only due to a temporary monitor failure or the like, implying
300+
// they're really in-flight, but we haven't sent the initial
301+
// HTLC-Add messages yet.
302+
payment_id
303+
}
304+
},
305+
};
306+
entry.insert(retry_count);
307+
return Ok(payment_id);
308+
},
309+
hash_map::Entry::Occupied(_) => return Err(PaymentError::Invoice("payment pending")),
310+
}
311+
};
312+
if let Some((retry_data, payment_id)) = retry_data_payment_id {
313+
// Some paths were sent, even if we failed to send the full MPP value our recipient may
314+
// misbehave and claim the funds, at which point we have to consider the payment sent,
315+
// so return `Ok()` here, ignoring any retry errors.
316+
let _ = self.retry_payment(payment_id, payment_hash, &retry_data);
317+
Ok(payment_id)
318+
} else {
319+
self.pay_invoice_internal(invoice, amount_msats, retry_count + 1)
320+
}
321+
}
322+
323+
fn retry_payment(&self, payment_id: PaymentId, payment_hash: PaymentHash, params: &RouteParameters)
324+
-> Result<(), ()> {
325+
let route;
326+
{
327+
let mut payment_cache = self.payment_cache.lock().unwrap();
328+
let entry = loop {
329+
let entry = payment_cache.entry(payment_hash);
330+
match entry {
331+
hash_map::Entry::Occupied(_) => break entry,
332+
hash_map::Entry::Vacant(entry) => entry.insert(0),
270333
};
334+
};
335+
if let hash_map::Entry::Occupied(mut entry) = entry {
336+
let max_payment_attempts = self.retry_attempts.0 + 1;
337+
let attempts = entry.get_mut();
338+
*attempts += 1;
339+
340+
if *attempts >= max_payment_attempts {
341+
log_trace!(self.logger, "Payment {} exceeded maximum attempts; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
342+
return Err(());
343+
} else if has_expired(params) {
344+
log_trace!(self.logger, "Invoice expired for payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
345+
return Err(());
346+
}
347+
348+
let payer = self.payer.node_id();
271349
let first_hops = self.payer.first_hops();
272-
let route = self.router.find_route(
273-
&payer,
274-
&params,
275-
Some(&first_hops.iter().collect::<Vec<_>>()),
276-
&self.scorer.lock(),
277-
).map_err(|e| PaymentError::Routing(e))?;
278-
279-
let payment_hash = PaymentHash(invoice.payment_hash().clone().into_inner());
280-
let payment_secret = Some(invoice.payment_secret().clone());
281-
let payment_id = self.payer.send_payment(&route, payment_hash, &payment_secret)
282-
.map_err(|e| PaymentError::Sending(e))?;
283-
entry.insert(0);
284-
Ok(payment_id)
285-
},
286-
hash_map::Entry::Occupied(_) => Err(PaymentError::Invoice("payment pending")),
350+
route = self.router.find_route(&payer, &params, Some(&first_hops.iter().collect::<Vec<_>>()), &self.scorer.lock());
351+
if route.is_err() {
352+
log_trace!(self.logger, "Failed to find a route for payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
353+
return Err(());
354+
}
355+
} else {
356+
unreachable!();
357+
}
287358
}
288-
}
289359

290-
fn retry_payment(
291-
&self, payment_id: PaymentId, params: &RouteParameters
292-
) -> Result<(), PaymentError> {
293-
let payer = self.payer.node_id();
294-
let first_hops = self.payer.first_hops();
295-
let route = self.router.find_route(
296-
&payer, &params, Some(&first_hops.iter().collect::<Vec<_>>()),
297-
&self.scorer.lock()
298-
).map_err(|e| PaymentError::Routing(e))?;
299-
self.payer.retry_payment(&route, payment_id).map_err(|e| PaymentError::Sending(e))
360+
let retry_res = self.payer.retry_payment(&route.unwrap(), payment_id);
361+
match retry_res {
362+
Ok(()) => Ok(()),
363+
Err(PaymentSendFailure::ParameterError(_)) |
364+
Err(PaymentSendFailure::PathParameterError(_)) => {
365+
log_trace!(self.logger, "Failed to retry for payment {} due to bogus route/payment data, not retrying.", log_bytes!(payment_hash.0));
366+
return Err(());
367+
},
368+
Err(PaymentSendFailure::AllFailedRetrySafe(_)) => {
369+
self.retry_payment(payment_id, payment_hash, params)
370+
},
371+
Err(PaymentSendFailure::PartialFailure { results: _, failed_paths_retry, .. }) => {
372+
if let Some(retry) = failed_paths_retry {
373+
self.retry_payment(payment_id, payment_hash, &retry)
374+
} else {
375+
Ok(())
376+
}
377+
},
378+
}
300379
}
301380

302381
/// Removes the payment cached by the given payment hash.
@@ -329,48 +408,25 @@ where
329408
fn handle_event(&self, event: &Event) {
330409
match event {
331410
Event::PaymentPathFailed {
332-
payment_id, payment_hash, rejected_by_dest, path, short_channel_id, retry, ..
411+
all_paths_failed, payment_id, payment_hash, rejected_by_dest, path, short_channel_id, retry, ..
333412
} => {
334413
if let Some(short_channel_id) = short_channel_id {
335414
self.scorer.lock().payment_path_failed(path, *short_channel_id);
336415
}
337416

338-
let mut payment_cache = self.payment_cache.lock().unwrap();
339-
let entry = loop {
340-
let entry = payment_cache.entry(*payment_hash);
341-
match entry {
342-
hash_map::Entry::Occupied(_) => break entry,
343-
hash_map::Entry::Vacant(entry) => entry.insert(0),
344-
};
345-
};
346-
if let hash_map::Entry::Occupied(mut entry) = entry {
347-
let max_payment_attempts = self.retry_attempts.0 + 1;
348-
let attempts = entry.get_mut();
349-
*attempts += 1;
350-
351-
if *rejected_by_dest {
352-
log_trace!(self.logger, "Payment {} rejected by destination; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
353-
} else if payment_id.is_none() {
354-
log_trace!(self.logger, "Payment {} has no id; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
355-
} else if *attempts >= max_payment_attempts {
356-
log_trace!(self.logger, "Payment {} exceeded maximum attempts; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
357-
} else if retry.is_none() {
358-
log_trace!(self.logger, "Payment {} missing retry params; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
359-
} else if has_expired(retry.as_ref().unwrap()) {
360-
log_trace!(self.logger, "Invoice expired for payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
361-
} else if self.retry_payment(*payment_id.as_ref().unwrap(), retry.as_ref().unwrap()).is_err() {
362-
log_trace!(self.logger, "Error retrying payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
363-
} else {
364-
log_trace!(self.logger, "Payment {} failed; retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
417+
if *rejected_by_dest {
418+
log_trace!(self.logger, "Payment {} rejected by destination; not retrying", log_bytes!(payment_hash.0));
419+
} else if payment_id.is_none() {
420+
log_trace!(self.logger, "Payment {} has no id; not retrying", log_bytes!(payment_hash.0));
421+
} else if let Some(params) = retry {
422+
if self.retry_payment(payment_id.unwrap(), *payment_hash, params).is_ok() {
423+
// We retried at least somewhat, don't provide the PaymentPathFailed event to the user.
365424
return;
366425
}
367-
368-
// Either the payment was rejected, the maximum attempts were exceeded, or an
369-
// error occurred when attempting to retry.
370-
entry.remove();
371426
} else {
372-
unreachable!();
427+
log_trace!(self.logger, "Payment {} missing retry params; not retrying", log_bytes!(payment_hash.0));
373428
}
429+
if *all_paths_failed { self.payment_cache.lock().unwrap().remove(payment_hash); }
374430
},
375431
Event::PaymentSent { payment_hash, .. } => {
376432
let mut payment_cache = self.payment_cache.lock().unwrap();

0 commit comments

Comments
 (0)