1
+ //! Support for floating point types compatible with IEEE 754.
2
+
1
3
use crate :: { Category , ExpInt , IEK_INF , IEK_NAN , IEK_ZERO } ;
2
4
use crate :: { Float , FloatConvert , ParseError , Round , Status , StatusAnd } ;
3
5
@@ -8,6 +10,12 @@ use core::marker::PhantomData;
8
10
use core:: mem;
9
11
use core:: ops:: Neg ;
10
12
13
+ /// A floating point number that uses IEEE semantics.
14
+ ///
15
+ /// Usually you will want to use the available type aliases of this type
16
+ /// (e.g., [`Single`], [`Double`]) rather than referencing it directly.
17
+ ///
18
+ /// If `S` implements [`Semantics`], this type will implement [`Float`].
11
19
#[ must_use]
12
20
pub struct IeeeFloat < S > {
13
21
/// Absolute significand value (including the integer bit).
@@ -84,7 +92,7 @@ pub enum NonfiniteBehavior {
84
92
/// Only the Float8E5M2 has this behavior. There is no Inf representation. A
85
93
/// value is NaN if the exponent field and the mantissa field are all 1s.
86
94
/// This behavior matches the FP8 E4M3 type described in
87
- /// https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
95
+ /// < https://arxiv.org/abs/2209.05433> . We treat both signed and unsigned NaNs
88
96
/// as non-signalling, although the paper does not state whether the NaN
89
97
/// values are signalling or not.
90
98
NanOnly ,
@@ -276,46 +284,75 @@ impl<S> Clone for IeeeFloat<S> {
276
284
}
277
285
278
286
macro_rules! ieee_semantics {
279
- ( $( $name: ident = $sem: ident( $bits: tt : $exp_bits: tt) $( { $( $extra: tt) * } ) ?) ,* $( , ) ?) => {
280
- $( pub struct $sem; ) *
281
- $( pub type $name = IeeeFloat <$sem>; ) *
282
- $( impl Semantics for $sem {
283
- const BITS : usize = $bits;
284
- const EXP_BITS : usize = $exp_bits;
287
+ ( $(
288
+ $( #[ $meta: meta] ) *
289
+ $name: ident = $sem: ident( $bits: tt : $exp_bits: tt) $( { $( $extra: tt) * } ) ?
290
+ ) ,* $( , ) ?) => {
291
+ $(
292
+ #[ doc = concat!( "Floating point semantics for [`" , stringify!( $name) , "`]." ) ]
293
+ ///
294
+ /// See that type for more details.
295
+ pub struct $sem;
296
+
297
+ $( #[ $meta] ) *
298
+ pub type $name = IeeeFloat <$sem>;
285
299
286
- $( $( $extra) * ) ?
287
- } ) *
300
+ impl Semantics for $sem {
301
+ const BITS : usize = $bits;
302
+ const EXP_BITS : usize = $exp_bits;
303
+
304
+ $( $( $extra) * ) ?
305
+ }
306
+ ) *
288
307
}
289
308
}
290
309
291
310
ieee_semantics ! {
311
+ /// IEEE binary16 half-precision (16-bit) floating point number.
292
312
Half = HalfS ( 16 : 5 ) ,
313
+
314
+ /// IEEE binary32 single-precision (32-bit) floating point number.
293
315
Single = SingleS ( 32 : 8 ) ,
316
+
317
+ /// IEEE binary64 double-precision (64-bit) floating point number.
294
318
Double = DoubleS ( 64 : 11 ) ,
295
- Quad = QuadS ( 128 : 15 ) ,
296
319
297
- // Non-standard IEEE-like semantics:
320
+ /// IEEE binary128 quadruple-precision (128-bit) floating point number.
321
+ Quad = QuadS ( 128 : 15 ) ,
298
322
299
- // FIXME(eddyb) document this as "Brain Float 16" (C++ didn't have docs).
323
+ /// 16-bit brain floating point number.
324
+ ///
325
+ /// This is not an IEEE kind but uses the same semantics.
300
326
BFloat = BFloatS ( 16 : 8 ) ,
301
327
302
- // 8-bit floating point number following IEEE-754 conventions with bit
303
- // layout S1E5M2 as described in https://arxiv.org/abs/2209.05433.
328
+ /// 8-bit floating point number with S1E5M2 bit layout.
329
+ ///
330
+ /// Follows IEEE-754 conventions with S1E5M2 bit layout as described in
331
+ /// <https://arxiv.org/abs/2209.05433>.
304
332
Float8E5M2 = Float8E5M2S ( 8 : 5 ) ,
305
333
306
- // 8-bit floating point number mostly following IEEE-754 conventions with
307
- // bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433.
308
- // Unlike IEEE-754 types, there are no infinity values, and NaN is
309
- // represented with the exponent and mantissa bits set to all 1s.
334
+ /// 8-bit floating point number with S1E4M3 bit layout.
335
+ ///
336
+ /// This type mostly follows IEEE-754 conventions with a
337
+ /// bit layout S1E4M3 as described in <https://arxiv.org/abs/2209.05433>.
338
+ /// Unlike IEEE-754 types, there are no infinity values, and NaN is
339
+ /// represented with the exponent and mantissa bits set to all 1s.
310
340
Float8E4M3FN = Float8E4M3FNS ( 8 : 4 ) {
311
341
const NONFINITE_BEHAVIOR : NonfiniteBehavior = NonfiniteBehavior :: NanOnly ;
312
342
} ,
313
343
}
314
344
315
345
// FIXME(eddyb) consider moving X87-specific logic to a "has explicit integer bit"
316
346
// associated `const` on `Semantics` itself.
347
+ /// Floating point semantics for [`X87DoubleExtended`].
348
+ ///
349
+ /// See that type for more details.
317
350
pub struct X87DoubleExtendedS ;
351
+
352
+ /// 80-bit floating point number that uses IEEE extended precision semantics, as used
353
+ /// by x87 `long double`.
318
354
pub type X87DoubleExtended = IeeeFloat < X87DoubleExtendedS > ;
355
+
319
356
impl Semantics for X87DoubleExtendedS {
320
357
const BITS : usize = 80 ;
321
358
const EXP_BITS : usize = 15 ;
0 commit comments