@@ -192,7 +192,8 @@ macro_rules! impl_Debug {
192
192
}
193
193
194
194
// 2 digit decimal look up table
195
- static DEC_DIGITS_LUT : & [ u8 ; 200 ] = b"0001020304050607080910111213141516171819\
195
+ static DEC_DIGITS_LUT : & [ u8 ; 200 ] = b"\
196
+ 0001020304050607080910111213141516171819\
196
197
2021222324252627282930313233343536373839\
197
198
4041424344454647484950515253545556575859\
198
199
6061626364656667686970717273747576777879\
@@ -232,83 +233,68 @@ macro_rules! impl_Display {
232
233
233
234
#[ cfg( not( feature = "optimize_for_size" ) ) ]
234
235
impl $unsigned {
235
- fn _fmt( mut self , is_nonnegative: bool , f: & mut fmt:: Formatter <' _>) -> fmt:: Result {
236
- const SIZE : usize = $unsigned:: MAX . ilog( 10 ) as usize + 1 ;
237
- let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; SIZE ] ;
238
- let mut curr = SIZE ;
239
- let buf_ptr = MaybeUninit :: slice_as_mut_ptr( & mut buf) ;
240
- let lut_ptr = DEC_DIGITS_LUT . as_ptr( ) ;
241
-
242
- // SAFETY: Since `d1` and `d2` are always less than or equal to `198`, we
243
- // can copy from `lut_ptr[d1..d1 + 1]` and `lut_ptr[d2..d2 + 1]`. To show
244
- // that it's OK to copy into `buf_ptr`, notice that at the beginning
245
- // `curr == buf.len() == 39 > log(n)` since `n < 2^128 < 10^39`, and at
246
- // each step this is kept the same as `n` is divided. Since `n` is always
247
- // non-negative, this means that `curr > 0` so `buf_ptr[curr..curr + 1]`
248
- // is safe to access.
249
- unsafe {
250
- // need at least 16 bits for the 4-characters-at-a-time to work.
251
- #[ allow( overflowing_literals) ]
252
- #[ allow( unused_comparisons) ]
253
- // This block will be removed for smaller types at compile time and in the worst
254
- // case, it will prevent to have the `10000` literal to overflow for `i8` and `u8`.
255
- if core:: mem:: size_of:: <$unsigned>( ) >= 2 {
256
- // eagerly decode 4 characters at a time
257
- while self >= 10000 {
258
- let rem = ( self % 10000 ) as usize ;
259
- self /= 10000 ;
260
-
261
- let d1 = ( rem / 100 ) << 1 ;
262
- let d2 = ( rem % 100 ) << 1 ;
263
- curr -= 4 ;
264
-
265
- // We are allowed to copy to `buf_ptr[curr..curr + 3]` here since
266
- // otherwise `curr < 0`. But then `n` was originally at least `10000^10`
267
- // which is `10^40 > 2^128 > n`.
268
- ptr:: copy_nonoverlapping( lut_ptr. add( d1 as usize ) , buf_ptr. add( curr) , 2 ) ;
269
- ptr:: copy_nonoverlapping( lut_ptr. add( d2 as usize ) , buf_ptr. add( curr + 2 ) , 2 ) ;
270
- }
271
- }
236
+ fn _fmt( self , is_nonnegative: bool , f: & mut fmt:: Formatter <' _>) -> fmt:: Result {
237
+ // Buffer decimals for $unsigned type with fixed positions. Thus
238
+ // the least significant digit is located at the last buf byte.
239
+ const MAX_DEC_N : usize = $unsigned:: MAX . ilog( 10 ) as usize + 1 ;
240
+ let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; MAX_DEC_N ] ;
241
+ // Leading zero count & write index in buf.
242
+ let mut offset = MAX_DEC_N ;
243
+ // Consume decimals from working copy until none left.
244
+ let mut remain = self ;
245
+
246
+ // Format per four digits from the lookup table.
247
+ // Four digits need a 16-bit $unsigned or wider.
248
+ #[ allow( overflowing_literals) ]
249
+ #[ allow( unused_comparisons) ]
250
+ while offset >= 4 && remain > 999 {
251
+ let quad = remain % 100_00 ;
252
+ remain /= 100_00 ;
253
+ let p1 = ( quad / 100 ) as usize * 2 ;
254
+ let p2 = ( quad % 100 ) as usize * 2 ;
255
+ offset -= 4 ;
256
+ buf[ offset + 0 ] . write( DEC_DIGITS_LUT [ p1 + 0 ] ) ;
257
+ buf[ offset + 1 ] . write( DEC_DIGITS_LUT [ p1 + 1 ] ) ;
258
+ buf[ offset + 2 ] . write( DEC_DIGITS_LUT [ p2 + 0 ] ) ;
259
+ buf[ offset + 3 ] . write( DEC_DIGITS_LUT [ p2 + 1 ] ) ;
260
+ }
272
261
273
- // if we reach here numbers are <= 9999, so at most 4 chars long
274
- let mut n = self as usize ; // possibly reduce 64bit math
262
+ // Format per two digits from the lookup table.
263
+ while offset >= 2 && remain > 9 {
264
+ let p = ( remain % 100 ) as usize * 2 ;
265
+ remain /= 100 ;
266
+ offset -= 2 ;
267
+ buf[ offset + 0 ] . write( DEC_DIGITS_LUT [ p + 0 ] ) ;
268
+ buf[ offset + 1 ] . write( DEC_DIGITS_LUT [ p + 1 ] ) ;
269
+ }
275
270
276
- // decode 2 more chars, if > 2 chars
277
- if n >= 100 {
278
- let d1 = ( n % 100 ) << 1 ;
279
- n /= 100 ;
280
- curr -= 2 ;
281
- ptr:: copy_nonoverlapping( lut_ptr. add( d1) , buf_ptr. add( curr) , 2 ) ;
282
- }
271
+ // Format the last remaining digit, if any.
272
+ if offset != 0 && remain != 0 || offset == MAX_DEC_N {
273
+ // Either the compiler sees that remain < 10, or it prevents
274
+ // a boundary check up next.
275
+ let p = ( remain % 10 ) as usize * 2 ;
276
+ // not used: remain = 0;
283
277
284
- // if we reach here numbers are <= 100, so at most 2 chars long
285
- // The biggest it can be is 99, and 99 << 1 == 198, so a `u8` is enough.
286
- // decode last 1 or 2 chars
287
- if n < 10 {
288
- curr -= 1 ;
289
- * buf_ptr. add( curr) = ( n as u8 ) + b'0' ;
290
- } else {
291
- let d1 = n << 1 ;
292
- curr -= 2 ;
293
- ptr:: copy_nonoverlapping( lut_ptr. add( d1) , buf_ptr. add( curr) , 2 ) ;
294
- }
278
+ offset -= 1 ;
279
+ buf[ offset] . write( DEC_DIGITS_LUT [ p + 1 ] ) ;
295
280
}
296
281
297
- // SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid
298
- // UTF-8 since `DEC_DIGITS_LUT` is
299
- let buf_slice = unsafe {
300
- str :: from_utf8_unchecked(
301
- slice:: from_raw_parts( buf_ptr. add( curr) , buf. len( ) - curr) )
282
+ // SAFETY: All buf content since offset is set with bytes form
283
+ // the lookup table, which consists of valid ASCII exclusively.
284
+ let decimals = unsafe {
285
+ let written = & buf[ offset..] ;
286
+ let as_init = MaybeUninit :: slice_assume_init_ref( written) ;
287
+ str :: from_utf8_unchecked( as_init)
302
288
} ;
303
- f. pad_integral( is_nonnegative, "" , buf_slice )
289
+ f. pad_integral( is_nonnegative, "" , decimals )
304
290
}
305
291
} ) *
306
292
307
293
#[ cfg( feature = "optimize_for_size" ) ]
308
294
fn $gen_name( mut n: $u, is_nonnegative: bool , f: & mut fmt:: Formatter <' _>) -> fmt:: Result {
309
- const SIZE : usize = $u:: MAX . ilog( 10 ) as usize + 1 ;
310
- let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; SIZE ] ;
311
- let mut curr = buf . len ( ) ;
295
+ const MAX_DEC_N : usize = $u:: MAX . ilog( 10 ) as usize + 1 ;
296
+ let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; MAX_DEC_N ] ;
297
+ let mut curr = MAX_DEC_N ;
312
298
let buf_ptr = MaybeUninit :: slice_as_mut_ptr( & mut buf) ;
313
299
314
300
// SAFETY: To show that it's OK to copy into `buf_ptr`, notice that at the beginning
0 commit comments