@@ -998,15 +998,54 @@ impl VerifiedInvoiceRequest {
998
998
InvoiceRequestFields {
999
999
payer_signing_pubkey : * payer_signing_pubkey,
1000
1000
quantity : * quantity,
1001
- payer_note_truncated : payer_note. clone ( ) . map ( |mut s| {
1002
- s. truncate ( PAYER_NOTE_LIMIT ) ;
1003
- UntrustedString ( s)
1004
- } ) ,
1001
+ payer_note_truncated : payer_note
1002
+ . clone ( )
1003
+ // Truncate the payer note to `PAYER_NOTE_LIMIT` bytes, rounding
1004
+ // down to the nearest valid UTF-8 code point boundary.
1005
+ . map ( |s| UntrustedString ( string_truncate_safe ( s, PAYER_NOTE_LIMIT ) ) ) ,
1005
1006
human_readable_name : self . offer_from_hrn ( ) . clone ( ) ,
1006
1007
}
1007
1008
}
1008
1009
}
1009
1010
1011
+ /// `String.truncate(new_len)` panics if you split on a UTF-8 code point. This
1012
+ /// function will instead truncate the string to the next smaller code point
1013
+ /// boundary.
1014
+ ///
1015
+ /// This can still split a grapheme cluster, but that's probably fine.
1016
+ /// We'd otherwise have to pull in the `unicode-segmentation` crate and its big
1017
+ /// unicode tables to find the next smaller grapheme cluster boundary.
1018
+ fn string_truncate_safe ( mut s : String , new_len : usize ) -> String {
1019
+ /// Returns true if a byte is the first byte of a UTF-8 code point sequence.
1020
+ // TODO(phlip9): remove when std stabilizes `str::floor_char_boundary`.
1021
+ #[ inline]
1022
+ const fn u8_is_utf8_char_boundary ( b : u8 ) -> bool {
1023
+ // This is bit magic equivalent to: b < 128 || b >= 192
1024
+ ( b as i8 ) >= -0x40
1025
+ }
1026
+
1027
+ /// Finds the closest `x` not exceeding `index` where `s.is_char_boundary(x)`
1028
+ /// is true.
1029
+ // TODO(phlip9): remove when std stabilizes `str::floor_char_boundary`.
1030
+ #[ inline]
1031
+ fn str_floor_char_boundary ( s : & str , index : usize ) -> usize {
1032
+ if index >= s. len ( ) {
1033
+ s. len ( )
1034
+ } else {
1035
+ let lower_bound = index. saturating_sub ( 3 ) ;
1036
+ let new_index = s. as_bytes ( ) [ lower_bound..=index]
1037
+ . iter ( )
1038
+ . rposition ( |b| u8_is_utf8_char_boundary ( * b) )
1039
+ . unwrap_or ( 0 ) ;
1040
+ lower_bound + new_index
1041
+ }
1042
+ }
1043
+
1044
+ let truncated_len = str_floor_char_boundary ( s. as_str ( ) , new_len) ;
1045
+ s. truncate ( truncated_len) ;
1046
+ s
1047
+ }
1048
+
1010
1049
impl InvoiceRequestContents {
1011
1050
pub ( super ) fn metadata ( & self ) -> & [ u8 ] {
1012
1051
self . inner . metadata ( )
@@ -2947,14 +2986,20 @@ mod tests {
2947
2986
. unwrap ( ) ;
2948
2987
assert_eq ! ( offer. issuer_signing_pubkey( ) , Some ( node_id) ) ;
2949
2988
2989
+ // UTF-8 payer note that we can't naively `.truncate(PAYER_NOTE_LIMIT)`
2990
+ // because it would split a multi-byte UTF-8 code point.
2991
+ let payer_note = "❤️" . repeat ( 86 ) ;
2992
+ assert_eq ! ( payer_note. len( ) , PAYER_NOTE_LIMIT + 4 ) ;
2993
+ let expected_payer_note = "❤️" . repeat ( 85 ) ;
2994
+
2950
2995
let invoice_request = offer
2951
2996
. request_invoice ( & expanded_key, nonce, & secp_ctx, payment_id)
2952
2997
. unwrap ( )
2953
2998
. chain ( Network :: Testnet )
2954
2999
. unwrap ( )
2955
3000
. quantity ( 1 )
2956
3001
. unwrap ( )
2957
- . payer_note ( "0" . repeat ( PAYER_NOTE_LIMIT * 2 ) )
3002
+ . payer_note ( payer_note )
2958
3003
. build_and_sign ( )
2959
3004
. unwrap ( ) ;
2960
3005
match invoice_request. verify_using_metadata ( & expanded_key, & secp_ctx) {
@@ -2966,7 +3011,7 @@ mod tests {
2966
3011
InvoiceRequestFields {
2967
3012
payer_signing_pubkey: invoice_request. payer_signing_pubkey( ) ,
2968
3013
quantity: Some ( 1 ) ,
2969
- payer_note_truncated: Some ( UntrustedString ( "0" . repeat ( PAYER_NOTE_LIMIT ) ) ) ,
3014
+ payer_note_truncated: Some ( UntrustedString ( expected_payer_note ) ) ,
2970
3015
human_readable_name: None ,
2971
3016
}
2972
3017
) ;
0 commit comments