@@ -192,7 +192,8 @@ macro_rules! impl_Debug {
192
192
}
193
193
194
194
// 2 digit decimal look up table
195
- static DEC_DIGITS_LUT : & [ u8 ; 200 ] = b"0001020304050607080910111213141516171819\
195
+ static DEC_DIGITS_LUT : & [ u8 ; 200 ] = b"\
196
+ 0001020304050607080910111213141516171819\
196
197
2021222324252627282930313233343536373839\
197
198
4041424344454647484950515253545556575859\
198
199
6061626364656667686970717273747576777879\
@@ -232,83 +233,89 @@ macro_rules! impl_Display {
232
233
233
234
#[ cfg( not( feature = "optimize_for_size" ) ) ]
234
235
impl $unsigned {
235
- fn _fmt( mut self , is_nonnegative: bool , f: & mut fmt:: Formatter <' _>) -> fmt:: Result {
236
- const SIZE : usize = $unsigned:: MAX . ilog( 10 ) as usize + 1 ;
237
- let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; SIZE ] ;
238
- let mut curr = SIZE ;
239
- let buf_ptr = MaybeUninit :: slice_as_mut_ptr( & mut buf) ;
240
- let lut_ptr = DEC_DIGITS_LUT . as_ptr( ) ;
241
-
242
- // SAFETY: Since `d1` and `d2` are always less than or equal to `198`, we
243
- // can copy from `lut_ptr[d1..d1 + 1]` and `lut_ptr[d2..d2 + 1]`. To show
244
- // that it's OK to copy into `buf_ptr`, notice that at the beginning
245
- // `curr == buf.len() == 39 > log(n)` since `n < 2^128 < 10^39`, and at
246
- // each step this is kept the same as `n` is divided. Since `n` is always
247
- // non-negative, this means that `curr > 0` so `buf_ptr[curr..curr + 1]`
248
- // is safe to access.
249
- unsafe {
250
- // need at least 16 bits for the 4-characters-at-a-time to work.
251
- #[ allow( overflowing_literals) ]
252
- #[ allow( unused_comparisons) ]
253
- // This block will be removed for smaller types at compile time and in the worst
254
- // case, it will prevent to have the `10000` literal to overflow for `i8` and `u8`.
255
- if core:: mem:: size_of:: <$unsigned>( ) >= 2 {
256
- // eagerly decode 4 characters at a time
257
- while self >= 10000 {
258
- let rem = ( self % 10000 ) as usize ;
259
- self /= 10000 ;
260
-
261
- let d1 = ( rem / 100 ) << 1 ;
262
- let d2 = ( rem % 100 ) << 1 ;
263
- curr -= 4 ;
264
-
265
- // We are allowed to copy to `buf_ptr[curr..curr + 3]` here since
266
- // otherwise `curr < 0`. But then `n` was originally at least `10000^10`
267
- // which is `10^40 > 2^128 > n`.
268
- ptr:: copy_nonoverlapping( lut_ptr. add( d1 as usize ) , buf_ptr. add( curr) , 2 ) ;
269
- ptr:: copy_nonoverlapping( lut_ptr. add( d2 as usize ) , buf_ptr. add( curr + 2 ) , 2 ) ;
270
- }
271
- }
272
-
273
- // if we reach here numbers are <= 9999, so at most 4 chars long
274
- let mut n = self as usize ; // possibly reduce 64bit math
236
+ fn _fmt( self , is_nonnegative: bool , f: & mut fmt:: Formatter <' _>) -> fmt:: Result {
237
+ const MAX_DEC_N : usize = $unsigned:: MAX . ilog( 10 ) as usize + 1 ;
238
+ // Buffer decimals for $unsigned with right alignment.
239
+ let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; MAX_DEC_N ] ;
240
+ // Count the number of bytes in buf that are not initialized.
241
+ let mut offset = buf. len( ) ;
242
+ // Consume the least-significant decimals from a working copy.
243
+ let mut remain = self ;
244
+
245
+ // Format per four digits from the lookup table.
246
+ // Four digits need a 16-bit $unsigned or wider.
247
+ while size_of:: <Self >( ) > 1 && remain > 999 . try_into( ) . expect( "branch is not hit for types that cannot fit 999 (u8)" ) {
248
+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
249
+ // and the while condition ensures at least 4 more decimals.
250
+ unsafe { core:: hint:: assert_unchecked( offset >= 4 ) }
251
+ // SAFETY: The offset counts down from its initial buf.len()
252
+ // without underflow due to the previous precondition.
253
+ unsafe { core:: hint:: assert_unchecked( offset <= buf. len( ) ) }
254
+ offset -= 4 ;
255
+
256
+ // pull two pairs
257
+ let scale: Self = 1_00_00 . try_into( ) . expect( "branch is not hit for types that cannot fit 1E4 (u8)" ) ;
258
+ let quad = remain % scale;
259
+ remain /= scale;
260
+ let pair1 = ( quad / 100 ) as usize ;
261
+ let pair2 = ( quad % 100 ) as usize ;
262
+ buf[ offset + 0 ] . write( DEC_DIGITS_LUT [ pair1 * 2 + 0 ] ) ;
263
+ buf[ offset + 1 ] . write( DEC_DIGITS_LUT [ pair1 * 2 + 1 ] ) ;
264
+ buf[ offset + 2 ] . write( DEC_DIGITS_LUT [ pair2 * 2 + 0 ] ) ;
265
+ buf[ offset + 3 ] . write( DEC_DIGITS_LUT [ pair2 * 2 + 1 ] ) ;
266
+ }
275
267
276
- // decode 2 more chars, if > 2 chars
277
- if n >= 100 {
278
- let d1 = ( n % 100 ) << 1 ;
279
- n /= 100 ;
280
- curr -= 2 ;
281
- ptr:: copy_nonoverlapping( lut_ptr. add( d1) , buf_ptr. add( curr) , 2 ) ;
282
- }
268
+ // Format per two digits from the lookup table.
269
+ if remain > 9 {
270
+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
271
+ // and the while condition ensures at least 2 more decimals.
272
+ unsafe { core:: hint:: assert_unchecked( offset >= 2 ) }
273
+ // SAFETY: The offset counts down from its initial buf.len()
274
+ // without underflow due to the previous precondition.
275
+ unsafe { core:: hint:: assert_unchecked( offset <= buf. len( ) ) }
276
+ offset -= 2 ;
277
+
278
+ let pair = ( remain % 100 ) as usize ;
279
+ remain /= 100 ;
280
+ buf[ offset + 0 ] . write( DEC_DIGITS_LUT [ pair * 2 + 0 ] ) ;
281
+ buf[ offset + 1 ] . write( DEC_DIGITS_LUT [ pair * 2 + 1 ] ) ;
282
+ }
283
283
284
- // if we reach here numbers are <= 100, so at most 2 chars long
285
- // The biggest it can be is 99, and 99 << 1 == 198, so a `u8` is enough.
286
- // decode last 1 or 2 chars
287
- if n < 10 {
288
- curr -= 1 ;
289
- * buf_ptr. add( curr) = ( n as u8 ) + b'0' ;
290
- } else {
291
- let d1 = n << 1 ;
292
- curr -= 2 ;
293
- ptr:: copy_nonoverlapping( lut_ptr. add( d1) , buf_ptr. add( curr) , 2 ) ;
294
- }
284
+ // Format the last remaining digit, if any.
285
+ if remain != 0 || self == 0 {
286
+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
287
+ // and the if condition ensures (at least) 1 more decimals.
288
+ unsafe { core:: hint:: assert_unchecked( offset >= 1 ) }
289
+ // SAFETY: The offset counts down from its initial buf.len()
290
+ // without underflow due to the previous precondition.
291
+ unsafe { core:: hint:: assert_unchecked( offset <= buf. len( ) ) }
292
+ offset -= 1 ;
293
+
294
+ // Either the compiler sees that remain < 10, or it prevents
295
+ // a boundary check up next.
296
+ let last = ( remain & 15 ) as usize ;
297
+ buf[ offset] . write( DEC_DIGITS_LUT [ last * 2 + 1 ] ) ;
298
+ // not used: remain = 0;
295
299
}
296
300
297
- // SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid
298
- // UTF-8 since `DEC_DIGITS_LUT` is
299
- let buf_slice = unsafe {
300
- str :: from_utf8_unchecked(
301
- slice:: from_raw_parts( buf_ptr. add( curr) , buf. len( ) - curr) )
301
+ // SAFETY: All buf content since offset is set.
302
+ let written = unsafe { buf. get_unchecked( offset..) } ;
303
+ // SAFETY: Writes use ASCII from the lookup table exclusively.
304
+ let as_str = unsafe {
305
+ str :: from_utf8_unchecked( slice:: from_raw_parts(
306
+ MaybeUninit :: slice_as_ptr( written) ,
307
+ written. len( ) ,
308
+ ) )
302
309
} ;
303
- f. pad_integral( is_nonnegative, "" , buf_slice )
310
+ f. pad_integral( is_nonnegative, "" , as_str )
304
311
}
305
312
} ) *
306
313
307
314
#[ cfg( feature = "optimize_for_size" ) ]
308
315
fn $gen_name( mut n: $u, is_nonnegative: bool , f: & mut fmt:: Formatter <' _>) -> fmt:: Result {
309
- const SIZE : usize = $u:: MAX . ilog( 10 ) as usize + 1 ;
310
- let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; SIZE ] ;
311
- let mut curr = buf . len ( ) ;
316
+ const MAX_DEC_N : usize = $u:: MAX . ilog( 10 ) as usize + 1 ;
317
+ let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; MAX_DEC_N ] ;
318
+ let mut curr = MAX_DEC_N ;
312
319
let buf_ptr = MaybeUninit :: slice_as_mut_ptr( & mut buf) ;
313
320
314
321
// SAFETY: To show that it's OK to copy into `buf_ptr`, notice that at the beginning
0 commit comments