Skip to content

Commit ce744a2

Browse files
committed
offers: avoid panic when truncating payer_note in UTF-8 code point
1 parent c6921fa commit ce744a2

File tree

1 file changed

+51
-6
lines changed

1 file changed

+51
-6
lines changed

lightning/src/offers/invoice_request.rs

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -998,15 +998,54 @@ impl VerifiedInvoiceRequest {
998998
InvoiceRequestFields {
999999
payer_signing_pubkey: *payer_signing_pubkey,
10001000
quantity: *quantity,
1001-
payer_note_truncated: payer_note.clone().map(|mut s| {
1002-
s.truncate(PAYER_NOTE_LIMIT);
1003-
UntrustedString(s)
1004-
}),
1001+
payer_note_truncated: payer_note
1002+
.clone()
1003+
// Truncate the payer note to `PAYER_NOTE_LIMIT` bytes, rounding
1004+
// down to the nearest valid UTF-8 code point boundary.
1005+
.map(|s| UntrustedString(string_truncate_safe(s, PAYER_NOTE_LIMIT))),
10051006
human_readable_name: self.offer_from_hrn().clone(),
10061007
}
10071008
}
10081009
}
10091010

1011+
/// `String.truncate(new_len)` panics if you split on a UTF-8 code point. This
1012+
/// function will instead truncate the string to the next smaller code point
1013+
/// boundary.
1014+
///
1015+
/// This can still split a grapheme cluster, but that's probably fine.
1016+
/// We'd otherwise have to pull in the `unicode-segmentation` crate and its big
1017+
/// unicode tables to find the next smaller grapheme cluster boundary.
1018+
fn string_truncate_safe(mut s: String, new_len: usize) -> String {
1019+
/// Returns true if a byte is the first byte of a UTF-8 code point sequence.
1020+
// TODO(phlip9): remove when std stabilizes `str::floor_char_boundary`.
1021+
#[inline]
1022+
const fn u8_is_utf8_char_boundary(b: u8) -> bool {
1023+
// This is bit magic equivalent to: b < 128 || b >= 192
1024+
(b as i8) >= -0x40
1025+
}
1026+
1027+
/// Finds the closest `x` not exceeding `index` where `s.is_char_boundary(x)`
1028+
/// is true.
1029+
// TODO(phlip9): remove when std stabilizes `str::floor_char_boundary`.
1030+
#[inline]
1031+
fn str_floor_char_boundary(s: &str, index: usize) -> usize {
1032+
if index >= s.len() {
1033+
s.len()
1034+
} else {
1035+
let lower_bound = index.saturating_sub(3);
1036+
let new_index = s.as_bytes()[lower_bound..=index]
1037+
.iter()
1038+
.rposition(|b| u8_is_utf8_char_boundary(*b))
1039+
.unwrap_or(0);
1040+
lower_bound + new_index
1041+
}
1042+
}
1043+
1044+
let truncated_len = str_floor_char_boundary(s.as_str(), new_len);
1045+
s.truncate(truncated_len);
1046+
s
1047+
}
1048+
10101049
impl InvoiceRequestContents {
10111050
pub(super) fn metadata(&self) -> &[u8] {
10121051
self.inner.metadata()
@@ -2947,14 +2986,20 @@ mod tests {
29472986
.unwrap();
29482987
assert_eq!(offer.issuer_signing_pubkey(), Some(node_id));
29492988

2989+
// UTF-8 payer note that we can't naively `.truncate(PAYER_NOTE_LIMIT)`
2990+
// because it would split a multi-byte UTF-8 code point.
2991+
let payer_note = "❤️".repeat(86);
2992+
assert_eq!(payer_note.len(), PAYER_NOTE_LIMIT + 4);
2993+
let expected_payer_note = "❤️".repeat(85);
2994+
29502995
let invoice_request = offer
29512996
.request_invoice(&expanded_key, nonce, &secp_ctx, payment_id)
29522997
.unwrap()
29532998
.chain(Network::Testnet)
29542999
.unwrap()
29553000
.quantity(1)
29563001
.unwrap()
2957-
.payer_note("0".repeat(PAYER_NOTE_LIMIT * 2))
3002+
.payer_note(payer_note)
29583003
.build_and_sign()
29593004
.unwrap();
29603005
match invoice_request.verify_using_metadata(&expanded_key, &secp_ctx) {
@@ -2966,7 +3011,7 @@ mod tests {
29663011
InvoiceRequestFields {
29673012
payer_signing_pubkey: invoice_request.payer_signing_pubkey(),
29683013
quantity: Some(1),
2969-
payer_note_truncated: Some(UntrustedString("0".repeat(PAYER_NOTE_LIMIT))),
3014+
payer_note_truncated: Some(UntrustedString(expected_payer_note)),
29703015
human_readable_name: None,
29713016
}
29723017
);

0 commit comments

Comments
 (0)