Skip to content

Commit 5a48fe4

Browse files
committed
Rewrite InvoicePayer retry to correctly handle MPP partial failures
This rewrites a good chunk of the retry logic in `InvoicePayer` to address two issues: * it was not considering the return value of `send_payment` (and `retry_payment`) may indicate a failure on some paths but not others, * it was not considering that more failures may still come later when removing elements from the retry count map. This could result in us seeing an MPP-partial-failure, failing to retry, removing the retries count entry, and then retrying other parts, potentially forever.
1 parent a8755e1 commit 5a48fe4

File tree

1 file changed

+137
-74
lines changed

1 file changed

+137
-74
lines changed

lightning-invoice/src/payment.rs

Lines changed: 137 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ where
201201
if invoice.amount_milli_satoshis().is_none() {
202202
Err(PaymentError::Invoice("amount missing"))
203203
} else {
204-
self.pay_invoice_internal(invoice, None)
204+
self.pay_invoice_internal(invoice, None, 0)
205205
}
206206
}
207207

@@ -213,57 +213,141 @@ where
213213
if invoice.amount_milli_satoshis().is_some() {
214214
Err(PaymentError::Invoice("amount unexpected"))
215215
} else {
216-
self.pay_invoice_internal(invoice, Some(amount_msats))
216+
self.pay_invoice_internal(invoice, Some(amount_msats), 0)
217217
}
218218
}
219219

220220
fn pay_invoice_internal(
221-
&self, invoice: &Invoice, amount_msats: Option<u64>
221+
&self, invoice: &Invoice, amount_msats: Option<u64>, retry_count: usize
222222
) -> Result<PaymentId, PaymentError> {
223223
debug_assert!(invoice.amount_milli_satoshis().is_some() ^ amount_msats.is_some());
224224
let payment_hash = PaymentHash(invoice.payment_hash().clone().into_inner());
225-
let mut payment_cache = self.payment_cache.lock().unwrap();
226-
match payment_cache.entry(payment_hash) {
227-
hash_map::Entry::Vacant(entry) => {
228-
let payer = self.payer.node_id();
229-
let mut payee = Payee::new(invoice.recover_payee_pub_key())
230-
.with_expiry_time(expiry_time_from_unix_epoch(&invoice).as_secs())
231-
.with_route_hints(invoice.route_hints());
232-
if let Some(features) = invoice.features() {
233-
payee = payee.with_features(features.clone());
234-
}
235-
let params = RouteParameters {
236-
payee,
237-
final_value_msat: invoice.amount_milli_satoshis().or(amount_msats).unwrap(),
238-
final_cltv_expiry_delta: invoice.min_final_cltv_expiry() as u32,
225+
let failed_paths_data = loop {
226+
let mut payment_cache = self.payment_cache.lock().unwrap();
227+
match payment_cache.entry(payment_hash) {
228+
hash_map::Entry::Vacant(entry) => {
229+
let payer = self.payer.node_id();
230+
let mut payee = Payee::new(invoice.recover_payee_pub_key())
231+
.with_expiry_time(expiry_time_from_unix_epoch(&invoice).as_secs())
232+
.with_route_hints(invoice.route_hints());
233+
if let Some(features) = invoice.features() {
234+
payee = payee.with_features(features.clone());
235+
}
236+
let params = RouteParameters {
237+
payee,
238+
final_value_msat: invoice.amount_milli_satoshis().or(amount_msats).unwrap(),
239+
final_cltv_expiry_delta: invoice.min_final_cltv_expiry() as u32,
240+
};
241+
let first_hops = self.payer.first_hops();
242+
let route = self.router.find_route(
243+
&payer,
244+
&params,
245+
Some(&first_hops.iter().collect::<Vec<_>>()),
246+
).map_err(|e| PaymentError::Routing(e))?;
247+
248+
let payment_secret = Some(invoice.payment_secret().clone());
249+
let payment_id = match self.payer.send_payment(&route, payment_hash, &payment_secret) {
250+
Ok(payment_id) => payment_id,
251+
Err(PaymentSendFailure::ParameterError(e)) =>
252+
return Err(PaymentError::Sending(PaymentSendFailure::ParameterError(e))),
253+
Err(PaymentSendFailure::PathParameterError(e)) =>
254+
return Err(PaymentError::Sending(PaymentSendFailure::PathParameterError(e))),
255+
Err(PaymentSendFailure::AllFailedRetrySafe(e)) => {
256+
if retry_count >= self.retry_attempts.0 {
257+
return Err(PaymentError::Sending(PaymentSendFailure::AllFailedRetrySafe(e)))
258+
}
259+
break None;
260+
},
261+
Err(PaymentSendFailure::PartialFailure { results: _, failed_paths_retry, payment_id }) => {
262+
if let Some(retry_data) = failed_paths_retry {
263+
entry.insert(retry_count);
264+
break Some((retry_data, payment_id));
265+
} else {
266+
// This may happen if we send a payment and some paths fail, but
267+
// only due to a temporary monitor failure or the like, implying
268+
// they're really in-flight, but we haven't sent the initial
269+
// HTLC-Add messages yet.
270+
payment_id
271+
}
272+
},
273+
};
274+
entry.insert(retry_count);
275+
return Ok(payment_id);
276+
},
277+
hash_map::Entry::Occupied(_) => return Err(PaymentError::Invoice("payment pending")),
278+
}
279+
};
280+
if let Some((retry, payment_id)) = failed_paths_data {
281+
// Some paths were sent, even if we failed to send the full MPP value our recipient may
282+
// misbehave and claim the funds, at which point we have to consider the payment sent,
283+
// so return `Ok()` here, ignoring any retry errors.
284+
let _ = self.retry_payment(true, payment_id, payment_hash, &retry);
285+
Ok(payment_id)
286+
} else {
287+
self.pay_invoice_internal(invoice, amount_msats, retry_count + 1)
288+
}
289+
}
290+
291+
fn retry_payment(&self, other_paths_pending: bool, payment_id: PaymentId, payment_hash: PaymentHash, params: &RouteParameters)
292+
-> Result<(), ()> {
293+
let route;
294+
{
295+
let mut payment_cache = self.payment_cache.lock().unwrap();
296+
let entry = loop {
297+
let entry = payment_cache.entry(payment_hash);
298+
match entry {
299+
hash_map::Entry::Occupied(_) => break entry,
300+
hash_map::Entry::Vacant(entry) => entry.insert(0),
239301
};
302+
};
303+
if let hash_map::Entry::Occupied(mut entry) = entry {
304+
let max_payment_attempts = self.retry_attempts.0 + 1;
305+
let attempts = entry.get_mut();
306+
*attempts += 1;
307+
308+
if *attempts >= max_payment_attempts {
309+
log_trace!(self.logger, "Payment {} exceeded maximum attempts; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
310+
if !other_paths_pending { entry.remove(); }
311+
return Err(());
312+
} else if has_expired(params) {
313+
log_trace!(self.logger, "Invoice expired for payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
314+
if !other_paths_pending { entry.remove(); }
315+
return Err(());
316+
}
317+
318+
let payer = self.payer.node_id();
240319
let first_hops = self.payer.first_hops();
241-
let route = self.router.find_route(
242-
&payer,
243-
&params,
244-
Some(&first_hops.iter().collect::<Vec<_>>()),
245-
).map_err(|e| PaymentError::Routing(e))?;
246-
247-
let payment_hash = PaymentHash(invoice.payment_hash().clone().into_inner());
248-
let payment_secret = Some(invoice.payment_secret().clone());
249-
let payment_id = self.payer.send_payment(&route, payment_hash, &payment_secret)
250-
.map_err(|e| PaymentError::Sending(e))?;
251-
entry.insert(0);
252-
Ok(payment_id)
253-
},
254-
hash_map::Entry::Occupied(_) => Err(PaymentError::Invoice("payment pending")),
320+
route = self.router.find_route(&payer, &params, Some(&first_hops.iter().collect::<Vec<_>>()));
321+
if route.is_err() {
322+
log_trace!(self.logger, "Failed to find a route for payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
323+
if !other_paths_pending { entry.remove(); }
324+
return Err(());
325+
}
326+
} else {
327+
unreachable!();
328+
}
255329
}
256-
}
257330

258-
fn retry_payment(
259-
&self, payment_id: PaymentId, params: &RouteParameters
260-
) -> Result<(), PaymentError> {
261-
let payer = self.payer.node_id();
262-
let first_hops = self.payer.first_hops();
263-
let route = self.router.find_route(
264-
&payer, &params, Some(&first_hops.iter().collect::<Vec<_>>())
265-
).map_err(|e| PaymentError::Routing(e))?;
266-
self.payer.retry_payment(&route, payment_id).map_err(|e| PaymentError::Sending(e))
331+
let retry_res = self.payer.retry_payment(&route.unwrap(), payment_id);
332+
match retry_res {
333+
Ok(()) => Ok(()),
334+
Err(PaymentSendFailure::ParameterError(_)) |
335+
Err(PaymentSendFailure::PathParameterError(_)) => {
336+
log_trace!(self.logger, "Failed to retry for payment {} due to bogus route/payment data, not retrying.", log_bytes!(payment_hash.0));
337+
if !other_paths_pending { self.payment_cache.lock().unwrap().remove(&payment_hash); }
338+
return Err(());
339+
},
340+
Err(PaymentSendFailure::AllFailedRetrySafe(_)) => {
341+
self.retry_payment(other_paths_pending, payment_id, payment_hash, params)
342+
},
343+
Err(PaymentSendFailure::PartialFailure { results: _, failed_paths_retry, .. }) => {
344+
if let Some(retry) = failed_paths_retry {
345+
self.retry_payment(true, payment_id, payment_hash, &retry)
346+
} else {
347+
Ok(())
348+
}
349+
},
350+
}
267351
}
268352

269353
/// Removes the payment cached by the given payment hash.
@@ -294,42 +378,21 @@ where
294378
{
295379
fn handle_event(&self, event: &Event) {
296380
match event {
297-
Event::PaymentPathFailed { payment_id, payment_hash, rejected_by_dest, retry, .. } => {
298-
let mut payment_cache = self.payment_cache.lock().unwrap();
299-
let entry = loop {
300-
let entry = payment_cache.entry(*payment_hash);
301-
match entry {
302-
hash_map::Entry::Occupied(_) => break entry,
303-
hash_map::Entry::Vacant(entry) => entry.insert(0),
304-
};
305-
};
306-
if let hash_map::Entry::Occupied(mut entry) = entry {
307-
let max_payment_attempts = self.retry_attempts.0 + 1;
308-
let attempts = entry.get_mut();
309-
*attempts += 1;
310-
311-
if *rejected_by_dest {
312-
log_trace!(self.logger, "Payment {} rejected by destination; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
313-
} else if payment_id.is_none() {
314-
log_trace!(self.logger, "Payment {} has no id; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
315-
} else if *attempts >= max_payment_attempts {
316-
log_trace!(self.logger, "Payment {} exceeded maximum attempts; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
317-
} else if retry.is_none() {
318-
log_trace!(self.logger, "Payment {} missing retry params; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
319-
} else if has_expired(retry.as_ref().unwrap()) {
320-
log_trace!(self.logger, "Invoice expired for payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
321-
} else if self.retry_payment(*payment_id.as_ref().unwrap(), retry.as_ref().unwrap()).is_err() {
322-
log_trace!(self.logger, "Error retrying payment {}; not retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
323-
} else {
324-
log_trace!(self.logger, "Payment {} failed; retrying (attempts: {})", log_bytes!(payment_hash.0), attempts);
381+
Event::PaymentPathFailed { all_paths_failed, payment_id, payment_hash, rejected_by_dest, retry, .. } => {
382+
if *rejected_by_dest {
383+
log_trace!(self.logger, "Payment {} rejected by destination; not retrying", log_bytes!(payment_hash.0));
384+
if *all_paths_failed { self.payment_cache.lock().unwrap().remove(payment_hash); }
385+
} else if payment_id.is_none() {
386+
log_trace!(self.logger, "Payment {} has no id; not retrying", log_bytes!(payment_hash.0));
387+
if *all_paths_failed { self.payment_cache.lock().unwrap().remove(payment_hash); }
388+
} else if let Some(params) = retry {
389+
if self.retry_payment(!all_paths_failed, payment_id.unwrap(), *payment_hash, params).is_ok() {
390+
// We retried at least somewhat, don't provide the PaymentPathFailed event to the user.
325391
return;
326392
}
327-
328-
// Either the payment was rejected, the maximum attempts were exceeded, or an
329-
// error occurred when attempting to retry.
330-
entry.remove();
331393
} else {
332-
unreachable!();
394+
log_trace!(self.logger, "Payment {} missing retry params; not retrying", log_bytes!(payment_hash.0));
395+
if *all_paths_failed { self.payment_cache.lock().unwrap().remove(payment_hash); }
333396
}
334397
},
335398
Event::PaymentSent { payment_hash, .. } => {

0 commit comments

Comments
 (0)