Skip to content

Commit fd123d3

Browse files
committed
fixup! offers: avoid panic when truncating payer_note in UTF-8 code point
1 parent ce744a2 commit fd123d3

File tree

1 file changed

+42
-20
lines changed

1 file changed

+42
-20
lines changed

lightning/src/offers/invoice_request.rs

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,36 +1008,30 @@ impl VerifiedInvoiceRequest {
10081008
}
10091009
}
10101010

1011-
/// `String.truncate(new_len)` panics if you split on a UTF-8 code point. This
1012-
/// function will instead truncate the string to the next smaller code point
1013-
/// boundary.
1011+
/// `String::truncate(new_len)` panics if you split inside a UTF-8 code point,
1012+
/// which would leave the `String` containing invalid UTF-8. This function will
1013+
/// instead truncate the string to the next smaller code point boundary so the
1014+
/// truncated string always remains valid UTF-8.
10141015
///
10151016
/// This can still split a grapheme cluster, but that's probably fine.
10161017
/// We'd otherwise have to pull in the `unicode-segmentation` crate and its big
10171018
/// unicode tables to find the next smaller grapheme cluster boundary.
10181019
fn string_truncate_safe(mut s: String, new_len: usize) -> String {
1019-
/// Returns true if a byte is the first byte of a UTF-8 code point sequence.
1020-
// TODO(phlip9): remove when std stabilizes `str::floor_char_boundary`.
1021-
#[inline]
1022-
const fn u8_is_utf8_char_boundary(b: u8) -> bool {
1023-
// This is bit magic equivalent to: b < 128 || b >= 192
1024-
(b as i8) >= -0x40
1025-
}
1026-
1027-
/// Finds the closest `x` not exceeding `index` where `s.is_char_boundary(x)`
1028-
/// is true.
1029-
// TODO(phlip9): remove when std stabilizes `str::floor_char_boundary`.
1020+
/// Finds the largest byte index `x` not exceeding byte index `index` where
1021+
/// `s.is_char_boundary(x)` is true.
1022+
// TODO(phlip9): remove when `std::str::floor_char_boundary` stabilizes.
10301023
#[inline]
10311024
fn str_floor_char_boundary(s: &str, index: usize) -> usize {
10321025
if index >= s.len() {
10331026
s.len()
10341027
} else {
1035-
let lower_bound = index.saturating_sub(3);
1036-
let new_index = s.as_bytes()[lower_bound..=index]
1037-
.iter()
1038-
.rposition(|b| u8_is_utf8_char_boundary(*b))
1039-
.unwrap_or(0);
1040-
lower_bound + new_index
1028+
// UTF-8 code points are 1-4 bytes long, so we can limit our search
1029+
// to this range: [index - 3, index]
1030+
let lower_bound_index = index.saturating_sub(3);
1031+
(lower_bound_index..=index)
1032+
.rev()
1033+
.find(|idx| s.is_char_boundary(*idx))
1034+
.unwrap_or(lower_bound_index)
10411035
}
10421036
}
10431037

@@ -1465,6 +1459,7 @@ mod tests {
14651459
use crate::ln::inbound_payment::ExpandedKey;
14661460
use crate::ln::msgs::{DecodeError, MAX_VALUE_MSAT};
14671461
use crate::offers::invoice::{Bolt12Invoice, SIGNATURE_TAG as INVOICE_SIGNATURE_TAG};
1462+
use crate::offers::invoice_request::string_truncate_safe;
14681463
use crate::offers::merkle::{self, SignatureTlvStreamRef, TaggedHash, TlvStream};
14691464
use crate::offers::nonce::Nonce;
14701465
#[cfg(not(c_bindings))]
@@ -3026,4 +3021,31 @@ mod tests {
30263021
Err(_) => panic!("unexpected error"),
30273022
}
30283023
}
3024+
3025+
#[test]
3026+
fn test_string_truncate_safe() {
3027+
// We'll correctly truncate to the nearest UTF-8 code point boundary:
3028+
// ❤ variation-selector
3029+
// e29da4 efb88f
3030+
let s = String::from("❤️");
3031+
for idx in 0..(s.len() + 5) {
3032+
if idx >= s.len() {
3033+
assert_eq!(s, string_truncate_safe(s.clone(), idx));
3034+
} else if (3..s.len()).contains(&idx) {
3035+
assert_eq!("❤", string_truncate_safe(s.clone(), idx));
3036+
} else {
3037+
assert_eq!("", string_truncate_safe(s.clone(), idx));
3038+
}
3039+
}
3040+
3041+
// Every byte in an ASCII string is also a full UTF-8 code point.
3042+
let s = String::from("my ASCII string!");
3043+
for idx in 0..(s.len() + 5) {
3044+
if idx >= s.len() {
3045+
assert_eq!(s, string_truncate_safe(s.clone(), idx));
3046+
} else {
3047+
assert_eq!(s[..idx], string_truncate_safe(s.clone(), idx));
3048+
}
3049+
}
3050+
}
30293051
}

0 commit comments

Comments
 (0)