Skip to content

offers: avoid panic when truncating payer_note in UTF-8 code point #3747

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 68 additions & 6 deletions lightning/src/offers/invoice_request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -998,15 +998,43 @@ impl VerifiedInvoiceRequest {
InvoiceRequestFields {
payer_signing_pubkey: *payer_signing_pubkey,
quantity: *quantity,
payer_note_truncated: payer_note.clone().map(|mut s| {
s.truncate(PAYER_NOTE_LIMIT);
UntrustedString(s)
}),
payer_note_truncated: payer_note
.clone()
// Truncate the payer note to `PAYER_NOTE_LIMIT` bytes, rounding
// down to the nearest valid UTF-8 code point boundary.
.map(|s| UntrustedString(string_truncate_safe(s, PAYER_NOTE_LIMIT))),
human_readable_name: self.offer_from_hrn().clone(),
}
}
}

/// `String::truncate(new_len)` panics if you split inside a UTF-8 code point,
/// which would leave the `String` containing invalid UTF-8. This function will
/// instead truncate the string to the next smaller code point boundary so the
/// truncated string always remains valid UTF-8.
///
/// This can still split a grapheme cluster, but that's probably fine.
/// We'd otherwise have to pull in the `unicode-segmentation` crate and its big
/// unicode tables to find the next smaller grapheme cluster boundary.
fn string_truncate_safe(mut s: String, new_len: usize) -> String {
// Finds the largest byte index `x` not exceeding byte index `index` where
// `s.is_char_boundary(x)` is true.
// TODO(phlip9): remove when `std::str::floor_char_boundary` stabilizes.
let truncated_len = if new_len >= s.len() {
s.len()
} else {
// UTF-8 code points are 1-4 bytes long, so we can limit our search
// to this range: [new_len - 3, new_len]
let lower_bound_index = new_len.saturating_sub(3);
(lower_bound_index..=new_len)
.rev()
.find(|idx| s.is_char_boundary(*idx))
.unwrap_or(lower_bound_index)
Comment on lines +1026 to +1032
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ha, but also

Suggested change
// UTF-8 code points are 1-4 bytes long, so we can limit our search
// to this range: [new_len - 3, new_len]
let lower_bound_index = new_len.saturating_sub(3);
(lower_bound_index..=new_len)
.rev()
.find(|idx| s.is_char_boundary(*idx))
.unwrap_or(lower_bound_index)
(0..=new_len).rev().find(|idx| s.is_char_boundary(*idx)).unwrap_or(0)

};
s.truncate(truncated_len);
s
}

impl InvoiceRequestContents {
pub(super) fn metadata(&self) -> &[u8] {
self.inner.metadata()
Expand Down Expand Up @@ -1426,6 +1454,7 @@ mod tests {
use crate::ln::inbound_payment::ExpandedKey;
use crate::ln::msgs::{DecodeError, MAX_VALUE_MSAT};
use crate::offers::invoice::{Bolt12Invoice, SIGNATURE_TAG as INVOICE_SIGNATURE_TAG};
use crate::offers::invoice_request::string_truncate_safe;
use crate::offers::merkle::{self, SignatureTlvStreamRef, TaggedHash, TlvStream};
use crate::offers::nonce::Nonce;
#[cfg(not(c_bindings))]
Expand Down Expand Up @@ -2947,14 +2976,20 @@ mod tests {
.unwrap();
assert_eq!(offer.issuer_signing_pubkey(), Some(node_id));

// UTF-8 payer note that we can't naively `.truncate(PAYER_NOTE_LIMIT)`
// because it would split a multi-byte UTF-8 code point.
let payer_note = "❤️".repeat(86);
assert_eq!(payer_note.len(), PAYER_NOTE_LIMIT + 4);
let expected_payer_note = "❤️".repeat(85);

let invoice_request = offer
.request_invoice(&expanded_key, nonce, &secp_ctx, payment_id)
.unwrap()
.chain(Network::Testnet)
.unwrap()
.quantity(1)
.unwrap()
.payer_note("0".repeat(PAYER_NOTE_LIMIT * 2))
.payer_note(payer_note)
.build_and_sign()
.unwrap();
match invoice_request.verify_using_metadata(&expanded_key, &secp_ctx) {
Expand All @@ -2966,7 +3001,7 @@ mod tests {
InvoiceRequestFields {
payer_signing_pubkey: invoice_request.payer_signing_pubkey(),
quantity: Some(1),
payer_note_truncated: Some(UntrustedString("0".repeat(PAYER_NOTE_LIMIT))),
payer_note_truncated: Some(UntrustedString(expected_payer_note)),
human_readable_name: None,
}
);
Expand All @@ -2981,4 +3016,31 @@ mod tests {
Err(_) => panic!("unexpected error"),
}
}

#[test]
fn test_string_truncate_safe() {
// We'll correctly truncate to the nearest UTF-8 code point boundary:
// ❤ variation-selector
// e29da4 efb88f
let s = String::from("❤️");
for idx in 0..(s.len() + 5) {
if idx >= s.len() {
assert_eq!(s, string_truncate_safe(s.clone(), idx));
} else if (3..s.len()).contains(&idx) {
assert_eq!("❤", string_truncate_safe(s.clone(), idx));
} else {
assert_eq!("", string_truncate_safe(s.clone(), idx));
}
}

// Every byte in an ASCII string is also a full UTF-8 code point.
let s = String::from("my ASCII string!");
for idx in 0..(s.len() + 5) {
if idx >= s.len() {
assert_eq!(s, string_truncate_safe(s.clone(), idx));
} else {
assert_eq!(s[..idx], string_truncate_safe(s.clone(), idx));
}
}
}
}
Loading