Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit bbe3aef

Browse files
committedMay 6, 2024·
Work on bigint
Try splitting part of 'Int' into 'MinInt' so we don't need to implement everything on u256/i256 Add addsub test Add mul/div/rem tests Add cmp test Remove 32-bit div implementation formatting updates disable div tests for now Bigint updates Big update Fix widen mul wrapping add disable duplicate symbols in builtins Apply temporary unord fix from @beetrees #593 tests add lowerhex display errors by ref tests fix-test Update big tests Fix core calls Disable widen_mul for signed Test adding symbols in build.rs Add a feature to compile intrinsics that are missing on the system for testing update Disable f128 tests on platforms without system support add missing build.rs file pull cas file from master testgs print more div values Add a benchmark Work on fixing bit widths Update benchmark
1 parent f3f4dec commit bbe3aef

File tree

23 files changed

+933
-191
lines changed

23 files changed

+933
-191
lines changed
 

‎build.rs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -479,10 +479,6 @@ mod c {
479479
("__floatsitf", "floatsitf.c"),
480480
("__floatunditf", "floatunditf.c"),
481481
("__floatunsitf", "floatunsitf.c"),
482-
("__addtf3", "addtf3.c"),
483-
("__multf3", "multf3.c"),
484-
("__subtf3", "subtf3.c"),
485-
("__divtf3", "divtf3.c"),
486482
("__powitf2", "powitf2.c"),
487483
("__fe_getround", "fp_mode.c"),
488484
("__fe_raise_inexact", "fp_mode.c"),
@@ -500,30 +496,22 @@ mod c {
500496
if target_arch == "mips64" {
501497
sources.extend(&[
502498
("__netf2", "comparetf2.c"),
503-
("__addtf3", "addtf3.c"),
504-
("__multf3", "multf3.c"),
505-
("__subtf3", "subtf3.c"),
506499
("__fixtfsi", "fixtfsi.c"),
507500
("__floatsitf", "floatsitf.c"),
508501
("__fixunstfsi", "fixunstfsi.c"),
509502
("__floatunsitf", "floatunsitf.c"),
510503
("__fe_getround", "fp_mode.c"),
511-
("__divtf3", "divtf3.c"),
512504
]);
513505
}
514506

515507
if target_arch == "loongarch64" {
516508
sources.extend(&[
517509
("__netf2", "comparetf2.c"),
518-
("__addtf3", "addtf3.c"),
519-
("__multf3", "multf3.c"),
520-
("__subtf3", "subtf3.c"),
521510
("__fixtfsi", "fixtfsi.c"),
522511
("__floatsitf", "floatsitf.c"),
523512
("__fixunstfsi", "fixunstfsi.c"),
524513
("__floatunsitf", "floatunsitf.c"),
525514
("__fe_getround", "fp_mode.c"),
526-
("__divtf3", "divtf3.c"),
527515
]);
528516
}
529517

‎src/float/add.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::float::Float;
2-
use crate::int::{CastInto, Int};
2+
use crate::int::{CastInto, Int, MinInt};
33

44
/// Returns `a + b`
55
fn add<F: Float>(a: F, b: F) -> F
@@ -57,17 +57,17 @@ where
5757
}
5858

5959
// zero + anything = anything
60-
if a_abs == Int::ZERO {
60+
if a_abs == MinInt::ZERO {
6161
// but we need to get the sign right for zero + zero
62-
if b_abs == Int::ZERO {
62+
if b_abs == MinInt::ZERO {
6363
return F::from_repr(a.repr() & b.repr());
6464
} else {
6565
return b;
6666
}
6767
}
6868

6969
// anything + zero = anything
70-
if b_abs == Int::ZERO {
70+
if b_abs == MinInt::ZERO {
7171
return a;
7272
}
7373
}
@@ -113,10 +113,10 @@ where
113113
// Shift the significand of b by the difference in exponents, with a sticky
114114
// bottom bit to get rounding correct.
115115
let align = a_exponent.wrapping_sub(b_exponent).cast();
116-
if align != Int::ZERO {
116+
if align != MinInt::ZERO {
117117
if align < bits {
118118
let sticky =
119-
F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != Int::ZERO);
119+
F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != MinInt::ZERO);
120120
b_significand = (b_significand >> align.cast()) | sticky;
121121
} else {
122122
b_significand = one; // sticky; b is known to be non-zero.
@@ -125,8 +125,8 @@ where
125125
if subtraction {
126126
a_significand = a_significand.wrapping_sub(b_significand);
127127
// If a == -b, return +zero.
128-
if a_significand == Int::ZERO {
129-
return F::from_repr(Int::ZERO);
128+
if a_significand == MinInt::ZERO {
129+
return F::from_repr(MinInt::ZERO);
130130
}
131131

132132
// If partial cancellation occured, we need to left-shift the result
@@ -143,8 +143,8 @@ where
143143

144144
// If the addition carried up, we need to right-shift the result and
145145
// adjust the exponent:
146-
if a_significand & implicit_bit << 4 != Int::ZERO {
147-
let sticky = F::Int::from_bool(a_significand & one != Int::ZERO);
146+
if a_significand & implicit_bit << 4 != MinInt::ZERO {
147+
let sticky = F::Int::from_bool(a_significand & one != MinInt::ZERO);
148148
a_significand = a_significand >> 1 | sticky;
149149
a_exponent += 1;
150150
}
@@ -160,7 +160,7 @@ where
160160
// need to shift the significand.
161161
let shift = (1 - a_exponent).cast();
162162
let sticky =
163-
F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != Int::ZERO);
163+
F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO);
164164
a_significand = a_significand >> shift.cast() | sticky;
165165
a_exponent = 0;
166166
}

‎src/float/cmp.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#![allow(unreachable_code)]
22

33
use crate::float::Float;
4-
use crate::int::Int;
4+
use crate::int::MinInt;
55

66
#[derive(Clone, Copy)]
77
enum Result {

‎src/float/div.rs

Lines changed: 60 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
#![allow(clippy::needless_return)]
44

55
use crate::float::Float;
6-
use crate::int::{CastInto, DInt, HInt, Int};
6+
use crate::int::{CastInto, DInt, HInt, Int, MinInt};
7+
8+
use super::HalfRep;
79

810
fn div32<F: Float>(a: F, b: F) -> F
911
where
@@ -37,6 +39,11 @@ where
3739
let quiet_bit = implicit_bit >> 1;
3840
let qnan_rep = exponent_mask | quiet_bit;
3941

42+
// #[inline(always)]
43+
// fn negate<T: Int>(a: T) -> T {
44+
// T::wrapping_neg(a.signe)
45+
// }
46+
4047
#[inline(always)]
4148
fn negate_u32(a: u32) -> u32 {
4249
(<i32>::wrapping_neg(a as i32)) as u32
@@ -459,10 +466,14 @@ where
459466
i32: CastInto<F::Int>,
460467
F::Int: CastInto<i32>,
461468
u64: CastInto<F::Int>,
469+
u64: CastInto<HalfRep<F>>,
470+
F::Int: CastInto<HalfRep<F>>,
471+
F::Int: From<HalfRep<F>>,
472+
F::Int: From<u8>,
462473
F::Int: CastInto<u64>,
463474
i64: CastInto<F::Int>,
464475
F::Int: CastInto<i64>,
465-
F::Int: HInt,
476+
F::Int: HInt + DInt,
466477
{
467478
const NUMBER_OF_HALF_ITERATIONS: usize = 3;
468479
const NUMBER_OF_FULL_ITERATIONS: usize = 1;
@@ -471,7 +482,7 @@ where
471482
let one = F::Int::ONE;
472483
let zero = F::Int::ZERO;
473484
let hw = F::BITS / 2;
474-
let lo_mask = u64::MAX >> hw;
485+
let lo_mask = F::Int::MAX >> hw;
475486

476487
let significand_bits = F::SIGNIFICAND_BITS;
477488
let max_exponent = F::EXPONENT_MAX;
@@ -616,21 +627,23 @@ where
616627

617628
let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
618629
// Starting with (n-1) half-width iterations
619-
let b_uq1_hw: u32 =
620-
(CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw)) as u32;
630+
let b_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(
631+
CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw),
632+
);
621633

622634
// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
623635
// with W0 being either 16 or 32 and W0 <= HW.
624636
// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
625637
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
626638

627639
// HW is at least 32. Shifting into the highest bits if needed.
628-
let c_hw = (0x7504F333_u64 as u32).wrapping_shl(hw.wrapping_sub(32));
640+
let c_hw = (CastInto::<HalfRep<F>>::cast(0x7504F333_u64)).wrapping_shl(hw.wrapping_sub(32));
629641

630642
// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
631643
// so x0 fits to UQ0.HW without wrapping.
632-
let x_uq0_hw: u32 = {
633-
let mut x_uq0_hw: u32 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
644+
let x_uq0_hw: HalfRep<F> = {
645+
let mut x_uq0_hw: HalfRep<F> =
646+
c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
634647
// dbg!(x_uq0_hw);
635648
// An e_0 error is comprised of errors due to
636649
// * x0 being an inherently imprecise first approximation of 1/b_hw
@@ -661,8 +674,9 @@ where
661674
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
662675
// expected to be strictly positive because b_UQ1_hw has its highest bit set
663676
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
664-
let corr_uq1_hw: u32 =
665-
0.wrapping_sub(((x_uq0_hw as u64).wrapping_mul(b_uq1_hw as u64)) >> hw) as u32;
677+
let corr_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(zero.wrapping_sub(
678+
((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(b_uq1_hw))) >> hw,
679+
));
666680
// dbg!(corr_uq1_hw);
667681

668682
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
@@ -677,7 +691,9 @@ where
677691
// The fact corr_UQ1_hw was virtually round up (due to result of
678692
// multiplication being **first** truncated, then negated - to improve
679693
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
680-
x_uq0_hw = ((x_uq0_hw as u64).wrapping_mul(corr_uq1_hw as u64) >> (hw - 1)) as u32;
694+
x_uq0_hw = ((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(corr_uq1_hw))
695+
>> (hw - 1))
696+
.cast();
681697
// dbg!(x_uq0_hw);
682698
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
683699
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
@@ -707,7 +723,7 @@ where
707723
// be not below that value (see g(x) above), so it is safe to decrement just
708724
// once after the final iteration. On the other hand, an effective value of
709725
// divisor changes after this point (from b_hw to b), so adjust here.
710-
x_uq0_hw.wrapping_sub(1_u32)
726+
x_uq0_hw.wrapping_sub(HalfRep::<F>::ONE)
711727
};
712728

713729
// Error estimations for full-precision iterations are calculated just
@@ -717,7 +733,7 @@ where
717733
// Simulating operations on a twice_rep_t to perform a single final full-width
718734
// iteration. Using ad-hoc multiplication implementations to take advantage
719735
// of particular structure of operands.
720-
let blo: u64 = (CastInto::<u64>::cast(b_uq1)) & lo_mask;
736+
let blo: F::Int = b_uq1 & lo_mask;
721737
// x_UQ0 = x_UQ0_hw * 2^HW - 1
722738
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
723739
//
@@ -726,19 +742,20 @@ where
726742
// + [ x_UQ0_hw * blo ]
727743
// - [ b_UQ1 ]
728744
// = [ result ][.... discarded ...]
729-
let corr_uq1 = negate_u64(
730-
(x_uq0_hw as u64) * (b_uq1_hw as u64) + (((x_uq0_hw as u64) * (blo)) >> hw) - 1,
731-
); // account for *possible* carry
732-
let lo_corr = corr_uq1 & lo_mask;
733-
let hi_corr = corr_uq1 >> hw;
745+
let corr_uq1: F::Int = (F::Int::from(x_uq0_hw) * F::Int::from(b_uq1_hw)
746+
+ ((F::Int::from(x_uq0_hw) * blo) >> hw))
747+
.wrapping_sub(one)
748+
.wrapping_neg(); // account for *possible* carry
749+
let lo_corr: F::Int = corr_uq1 & lo_mask;
750+
let hi_corr: F::Int = corr_uq1 >> hw;
734751
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
735-
let mut x_uq0: <F as Float>::Int = ((((x_uq0_hw as u64) * hi_corr) << 1)
736-
.wrapping_add(((x_uq0_hw as u64) * lo_corr) >> (hw - 1))
737-
.wrapping_sub(2))
738-
.cast(); // 1 to account for the highest bit of corr_UQ1 can be 1
739-
// 1 to account for possible carry
740-
// Just like the case of half-width iterations but with possibility
741-
// of overflowing by one extra Ulp of x_UQ0.
752+
let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1)
753+
.wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1))
754+
.wrapping_sub(F::Int::from(2u8));
755+
// 1 to account for the highest bit of corr_UQ1 can be 1
756+
// 1 to account for possible carry
757+
// Just like the case of half-width iterations but with possibility
758+
// of overflowing by one extra Ulp of x_UQ0.
742759
x_uq0 -= one;
743760
// ... and then traditional fixup by 2 should work
744761

@@ -755,8 +772,8 @@ where
755772
x_uq0
756773
} else {
757774
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
758-
let c: <F as Float>::Int = (0x7504F333 << (F::BITS - 32)).cast();
759-
let x_uq0: <F as Float>::Int = c.wrapping_sub(b_uq1);
775+
let c: F::Int = (0x7504F333 << (F::BITS - 32)).cast();
776+
let x_uq0: F::Int = c.wrapping_sub(b_uq1);
760777
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
761778
x_uq0
762779
};
@@ -799,14 +816,27 @@ where
799816

800817
// Add 2 to U_N due to final decrement.
801818

802-
let reciprocal_precision: <F as Float>::Int = 220.cast();
819+
let reciprocal_precision: F::Int = if F::BITS == 32
820+
&& NUMBER_OF_HALF_ITERATIONS == 2
821+
&& NUMBER_OF_FULL_ITERATIONS == 1
822+
{
823+
74.cast()
824+
} else if F::BITS == 32 && NUMBER_OF_HALF_ITERATIONS == 0 && NUMBER_OF_FULL_ITERATIONS == 3 {
825+
10.cast()
826+
} else if F::BITS == 64 && NUMBER_OF_HALF_ITERATIONS == 3 && NUMBER_OF_FULL_ITERATIONS == 1 {
827+
220.cast()
828+
} else if F::BITS == 128 && NUMBER_OF_HALF_ITERATIONS == 4 && NUMBER_OF_FULL_ITERATIONS == 1 {
829+
13922.cast()
830+
} else {
831+
panic!("invalid iterations for the specified bits");
832+
};
803833

804834
// Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W
805835
let x_uq0 = x_uq0 - reciprocal_precision;
806836
// Now 1/b - (2*P) * 2^-W < x < 1/b
807837
// FIXME Is x_UQ0 still >= 0.5?
808838

809-
let mut quotient: <F as Float>::Int = x_uq0.widen_mul(a_significand << 1).hi();
839+
let mut quotient: F::Int = x_uq0.widen_mul(a_significand << 1).hi();
810840
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
811841

812842
// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
@@ -914,13 +944,8 @@ intrinsics! {
914944
div64(a, b)
915945
}
916946

917-
// TODO: how should `HInt` be handled?
918947
pub extern "C" fn __divtf3(a: f128, b: f128) -> f128 {
919-
if cfg!(target_pointer_width = "64") {
920-
div32(a, b)
921-
} else {
922-
div64(a, b)
923-
}
948+
div64(a, b)
924949
}
925950

926951
#[cfg(target_arch = "arm")]

‎src/float/extend.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::float::Float;
2-
use crate::int::{CastInto, Int};
2+
use crate::int::{CastInto, Int, MinInt};
33

44
/// Generic conversion from a narrower to a wider IEEE-754 floating-point type
55
fn extend<F: Float, R: Float>(a: F) -> R

‎src/float/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ pub(crate) trait Float:
5959
/// A mask for the significand
6060
const SIGNIFICAND_MASK: Self::Int;
6161

62-
/// The implicit bit of the float format
62+
// The implicit bit of the float format
6363
const IMPLICIT_BIT: Self::Int;
6464

6565
/// A mask for the exponent

‎src/float/mul.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::float::Float;
2-
use crate::int::{CastInto, DInt, HInt, Int};
2+
use crate::int::{CastInto, DInt, HInt, Int, MinInt};
33

44
fn mul<F: Float>(a: F, b: F) -> F
55
where

‎src/float/trunc.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::float::Float;
2-
use crate::int::{CastInto, Int};
2+
use crate::int::{CastInto, Int, MinInt};
33

44
fn trunc<F: Float, R: Float>(a: F) -> R
55
where

‎src/int/addsub.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use crate::int::{DInt, Int};
1+
use crate::int::{DInt, Int, MinInt};
22

3-
trait UAddSub: DInt {
3+
trait UAddSub: DInt + Int {
44
fn uadd(self, other: Self) -> Self {
55
let (lo, carry) = self.lo().overflowing_add(other.lo());
66
let hi = self.hi().wrapping_add(other.hi());
@@ -22,7 +22,7 @@ impl UAddSub for u128 {}
2222

2323
trait AddSub: Int
2424
where
25-
<Self as Int>::UnsignedInt: UAddSub,
25+
<Self as MinInt>::UnsignedInt: UAddSub,
2626
{
2727
fn add(self, other: Self) -> Self {
2828
Self::from_unsigned(self.unsigned().uadd(other.unsigned()))
@@ -37,7 +37,7 @@ impl AddSub for i128 {}
3737

3838
trait Addo: AddSub
3939
where
40-
<Self as Int>::UnsignedInt: UAddSub,
40+
<Self as MinInt>::UnsignedInt: UAddSub,
4141
{
4242
fn addo(self, other: Self) -> (Self, bool) {
4343
let sum = AddSub::add(self, other);
@@ -50,7 +50,7 @@ impl Addo for u128 {}
5050

5151
trait Subo: AddSub
5252
where
53-
<Self as Int>::UnsignedInt: UAddSub,
53+
<Self as MinInt>::UnsignedInt: UAddSub,
5454
{
5555
fn subo(self, other: Self) -> (Self, bool) {
5656
let sum = AddSub::sub(self, other);

‎src/int/big.rs

Lines changed: 364 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,364 @@
1+
//! Integers used for wide operations, larger than `u128`.
2+
3+
#![allow(unused)]
4+
5+
use crate::int::{DInt, HInt, Int, MinInt};
6+
use core::{fmt, ops};
7+
8+
const WORD_LO_MASK: u64 = 0x00000000ffffffff;
9+
const WORD_HI_MASK: u64 = 0xffffffff00000000;
10+
const WORD_FULL_MASK: u64 = 0xffffffffffffffff;
11+
const U128_LO_MASK: u128 = u64::MAX as u128;
12+
const U128_HI_MASK: u128 = (u64::MAX as u128) << 64;
13+
14+
/// A 256-bit unsigned integer represented as 4 64-bit limbs.
15+
///
16+
/// Each limb is a native-endian number, but the array is little-limb-endian.
17+
#[allow(non_camel_case_types)]
18+
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
19+
pub struct u256(pub [u64; 4]);
20+
21+
impl u256 {
22+
pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]);
23+
24+
/// Reinterpret as a signed integer
25+
pub fn signed(self) -> i256 {
26+
i256(self.0)
27+
}
28+
}
29+
30+
/// A 256-bit signed integer represented as 4 64-bit limbs.
31+
///
32+
/// Each limb is a native-endian number, but the array is little-limb-endian.
33+
#[allow(non_camel_case_types)]
34+
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
35+
pub struct i256(pub [u64; 4]);
36+
37+
impl i256 {
38+
/// Reinterpret as an unsigned integer
39+
pub fn unsigned(self) -> u256 {
40+
u256(self.0)
41+
}
42+
}
43+
44+
impl MinInt for u256 {
45+
type OtherSign = i256;
46+
47+
type UnsignedInt = u256;
48+
49+
const SIGNED: bool = false;
50+
const BITS: u32 = 256;
51+
const ZERO: Self = Self([0u64; 4]);
52+
const ONE: Self = Self([1, 0, 0, 0]);
53+
const MIN: Self = Self([0u64; 4]);
54+
const MAX: Self = Self([u64::MAX; 4]);
55+
}
56+
57+
impl MinInt for i256 {
58+
type OtherSign = u256;
59+
60+
type UnsignedInt = u256;
61+
62+
const SIGNED: bool = false;
63+
const BITS: u32 = 256;
64+
const ZERO: Self = Self([0u64; 4]);
65+
const ONE: Self = Self([1, 0, 0, 0]);
66+
const MIN: Self = Self([0, 0, 0, 1 << 63]);
67+
const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX << 1]);
68+
}
69+
70+
// impl Int for i256 {
71+
// fn is_zero(self) -> bool {
72+
// self == Self::ZERO
73+
// }
74+
75+
// fn wrapping_neg(self) -> Self {
76+
// Self::ZERO.wrapping_sub(self)
77+
// }
78+
79+
// fn wrapping_add(self, other: Self) -> Self {
80+
// self.overflowing_add(other).0
81+
// }
82+
//
83+
// fn overflowing_add(self, other: Self) -> (Self, bool) {
84+
// let x0 = (u128::from(self.0[0])).wrapping_add(u128::from(other.0[0]));
85+
// let v0 = x0 as u64;
86+
// let c0 = x0 >> 64;
87+
88+
// let x1 = (u128::from(self.0[1]))
89+
// .wrapping_add(u128::from(other.0[1]))
90+
// .wrapping_add(c0);
91+
// let v1 = x1 as u64;
92+
// let c1 = x1 >> 64;
93+
94+
// let x2 = (u128::from(self.0[2]))
95+
// .wrapping_add(u128::from(other.0[2]))
96+
// .wrapping_add(c1);
97+
// let v2 = x2 as u64;
98+
// let c2 = x2 >> 64;
99+
100+
// let x3 = (u128::from(self.0[3]))
101+
// .wrapping_add(u128::from(other.0[3]))
102+
// .wrapping_add(c2);
103+
// let v3 = x3 as u64;
104+
// let c3 = x3 >> 64;
105+
106+
// (Self([v0, v1, v2, v3]), c3 > 0)
107+
// }
108+
// }
109+
110+
macro_rules! impl_common {
111+
($ty:ty) => {
112+
// impl ops::Add for $ty {
113+
// type Output = Self;
114+
115+
// fn add(self, rhs: Self) -> Self::Output {
116+
// let (val, wrapped) = self.overflowing_add(rhs);
117+
// debug_assert!(!wrapped, "attempted to add with overflow");
118+
// val
119+
// }
120+
// }
121+
122+
// impl ops::AddAssign for $ty {
123+
// fn add_assign(&mut self, rhs: Self) {
124+
// *self = *self + rhs
125+
// }
126+
// }
127+
128+
// impl ops::BitAnd for $ty {
129+
// type Output = Self;
130+
131+
// fn bitand(self, rhs: Self) -> Self::Output {
132+
// Self([
133+
// self.0[0] & rhs.0[0],
134+
// self.0[1] & rhs.0[1],
135+
// self.0[2] & rhs.0[2],
136+
// self.0[3] & rhs.0[3],
137+
// ])
138+
// }
139+
// }
140+
141+
// impl ops::BitAndAssign for $ty {
142+
// fn bitand_assign(&mut self, rhs: Self) {
143+
// *self = *self & rhs
144+
// }
145+
// }
146+
147+
impl ops::BitOr for $ty {
148+
type Output = Self;
149+
150+
fn bitor(mut self, rhs: Self) -> Self::Output {
151+
self.0[0] |= rhs.0[0];
152+
self.0[1] |= rhs.0[1];
153+
self.0[2] |= rhs.0[2];
154+
self.0[3] |= rhs.0[3];
155+
self
156+
}
157+
}
158+
159+
// impl ops::BitOrAssign for $ty {
160+
// fn bitor_assign(&mut self, rhs: Self) {
161+
// *self = *self | rhs
162+
// }
163+
// }
164+
165+
// impl ops::BitXor for $ty {
166+
// type Output = Self;
167+
168+
// fn bitxor(self, rhs: Self) -> Self::Output {
169+
// Self([
170+
// self.0[0] ^ rhs.0[0],
171+
// self.0[1] ^ rhs.0[1],
172+
// self.0[2] ^ rhs.0[2],
173+
// self.0[3] ^ rhs.0[3],
174+
// ])
175+
// }
176+
// }
177+
178+
// impl ops::BitXorAssign for $ty {
179+
// fn bitxor_assign(&mut self, rhs: Self) {
180+
// *self = *self ^ rhs
181+
// }
182+
// }
183+
184+
impl ops::Not for $ty {
185+
type Output = Self;
186+
187+
fn not(self) -> Self::Output {
188+
Self([!self.0[0], !self.0[1], !self.0[2], !self.0[3]])
189+
}
190+
}
191+
192+
impl ops::Shl<u32> for $ty {
193+
type Output = Self;
194+
195+
fn shl(self, rhs: u32) -> Self::Output {
196+
todo!()
197+
}
198+
}
199+
};
200+
}
201+
202+
impl_common!(i256);
203+
impl_common!(u256);
204+
205+
macro_rules! word {
206+
(1, $val:expr) => {
207+
(($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64
208+
};
209+
(2, $val:expr) => {
210+
(($val >> (32 * 2)) & Self::from(WORD_LO_MASK)) as u64
211+
};
212+
(3, $val:expr) => {
213+
(($val >> (32 * 1)) & Self::from(WORD_LO_MASK)) as u64
214+
};
215+
(4, $val:expr) => {
216+
(($val >> (32 * 0)) & Self::from(WORD_LO_MASK)) as u64
217+
};
218+
}
219+
220+
impl HInt for u128 {
221+
type D = u256;
222+
223+
fn widen(self) -> Self::D {
224+
let w0 = self & u128::from(u64::MAX);
225+
let w1 = (self >> u64::BITS) & u128::from(u64::MAX);
226+
u256([w0 as u64, w1 as u64, 0, 0])
227+
}
228+
229+
fn zero_widen(self) -> Self::D {
230+
self.widen()
231+
}
232+
233+
fn zero_widen_mul(self, rhs: Self) -> Self::D {
234+
let product11: u64 = word!(1, self) * word!(1, rhs);
235+
let product12: u64 = word!(1, self) * word!(2, rhs);
236+
let product13: u64 = word!(1, self) * word!(3, rhs);
237+
let product14: u64 = word!(1, self) * word!(4, rhs);
238+
let product21: u64 = word!(2, self) * word!(1, rhs);
239+
let product22: u64 = word!(2, self) * word!(2, rhs);
240+
let product23: u64 = word!(2, self) * word!(3, rhs);
241+
let product24: u64 = word!(2, self) * word!(4, rhs);
242+
let product31: u64 = word!(3, self) * word!(1, rhs);
243+
let product32: u64 = word!(3, self) * word!(2, rhs);
244+
let product33: u64 = word!(3, self) * word!(3, rhs);
245+
let product34: u64 = word!(3, self) * word!(4, rhs);
246+
let product41: u64 = word!(4, self) * word!(1, rhs);
247+
let product42: u64 = word!(4, self) * word!(2, rhs);
248+
let product43: u64 = word!(4, self) * word!(3, rhs);
249+
let product44: u64 = word!(4, self) * word!(4, rhs);
250+
251+
let sum0: u128 = u128::from(product44);
252+
let sum1: u128 = u128::from(product34) + u128::from(product43);
253+
let sum2: u128 = u128::from(product24) + u128::from(product33) + u128::from(product42);
254+
let sum3: u128 = u128::from(product14)
255+
+ u128::from(product23)
256+
+ u128::from(product32)
257+
+ u128::from(product41);
258+
let sum4: u128 = u128::from(product13) + u128::from(product22) + u128::from(product31);
259+
let sum5: u128 = u128::from(product12) + u128::from(product21);
260+
let sum6: u128 = u128::from(product11);
261+
262+
let r0: u128 =
263+
(sum0 & u128::from(WORD_FULL_MASK)) + ((sum1 & u128::from(WORD_LO_MASK)) << 32);
264+
let r1: u128 = (sum0 >> 64)
265+
+ ((sum1 >> 32) & u128::from(WORD_FULL_MASK))
266+
+ (sum2 & u128::from(WORD_FULL_MASK))
267+
+ ((sum3 << 32) & u128::from(WORD_HI_MASK));
268+
269+
let lo = r0.wrapping_add(r1 << 64);
270+
let hi = (r1 >> 64)
271+
+ (sum1 >> 96)
272+
+ (sum2 >> 64)
273+
+ (sum3 >> 32)
274+
+ sum4
275+
+ (sum5 << 32)
276+
+ (sum6 << 64);
277+
278+
u256([
279+
(lo & U128_LO_MASK) as u64,
280+
((lo >> 64) & U128_LO_MASK) as u64,
281+
(hi & U128_LO_MASK) as u64,
282+
((hi >> 64) & U128_LO_MASK) as u64,
283+
])
284+
}
285+
286+
fn widen_mul(self, rhs: Self) -> Self::D {
287+
self.zero_widen_mul(rhs)
288+
}
289+
}
290+
291+
impl HInt for i128 {
292+
type D = i256;
293+
294+
fn widen(self) -> Self::D {
295+
let mut ret = self.unsigned().zero_widen().signed();
296+
if self.is_negative() {
297+
ret.0[2] = u64::MAX;
298+
ret.0[3] = u64::MAX;
299+
}
300+
ret
301+
}
302+
303+
fn zero_widen(self) -> Self::D {
304+
self.unsigned().zero_widen().signed()
305+
}
306+
307+
fn zero_widen_mul(self, rhs: Self) -> Self::D {
308+
self.unsigned().zero_widen_mul(rhs.unsigned()).signed()
309+
}
310+
311+
fn widen_mul(self, rhs: Self) -> Self::D {
312+
unimplemented!()
313+
// let mut res = self.zero_widen_mul(rhs);
314+
// if self.is_negative() ^ rhs.is_negative() {
315+
// // Sign extend as needed
316+
// // for word in res.0.iter_mut().rev() {
317+
// // let zeroes = word.leading_zeros();
318+
// // let leading = u64::MAX << (64 - zeroes);
319+
// // *word |= leading;
320+
// // if zeroes != 64 {
321+
// // break;
322+
// // }
323+
// // }
324+
// }
325+
326+
// res
327+
}
328+
}
329+
330+
impl DInt for u256 {
331+
type H = u128;
332+
333+
fn lo(self) -> Self::H {
334+
let mut tmp = [0u8; 16];
335+
tmp[..8].copy_from_slice(&self.0[0].to_le_bytes());
336+
tmp[8..].copy_from_slice(&self.0[1].to_le_bytes());
337+
u128::from_le_bytes(tmp)
338+
}
339+
340+
fn hi(self) -> Self::H {
341+
let mut tmp = [0u8; 16];
342+
tmp[..8].copy_from_slice(&self.0[2].to_le_bytes());
343+
tmp[8..].copy_from_slice(&self.0[3].to_le_bytes());
344+
u128::from_le_bytes(tmp)
345+
}
346+
}
347+
348+
impl DInt for i256 {
349+
type H = i128;
350+
351+
fn lo(self) -> Self::H {
352+
let mut tmp = [0u8; 16];
353+
tmp[..8].copy_from_slice(&self.0[0].to_le_bytes());
354+
tmp[8..].copy_from_slice(&self.0[1].to_le_bytes());
355+
i128::from_le_bytes(tmp)
356+
}
357+
358+
fn hi(self) -> Self::H {
359+
let mut tmp = [0u8; 16];
360+
tmp[..8].copy_from_slice(&self.0[2].to_le_bytes());
361+
tmp[8..].copy_from_slice(&self.0[3].to_le_bytes());
362+
i128::from_le_bytes(tmp)
363+
}
364+
}

‎src/int/mod.rs

Lines changed: 125 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -3,42 +3,29 @@ use core::ops;
33
mod specialized_div_rem;
44

55
pub mod addsub;
6+
mod big;
67
pub mod leading_zeros;
78
pub mod mul;
89
pub mod sdiv;
910
pub mod shift;
1011
pub mod udiv;
1112

12-
pub use self::leading_zeros::__clzsi2;
13+
pub use big::{i256, u256};
14+
pub use leading_zeros::__clzsi2;
1315

1416
public_test_dep! {
15-
/// Trait for some basic operations on integers
16-
pub(crate) trait Int:
17-
Copy
17+
/// Minimal integer implementations needed on all integer types, including wide integers.
18+
pub(crate) trait MinInt: Copy
1819
+ core::fmt::Debug
19-
+ PartialEq
20-
+ PartialOrd
21-
+ ops::AddAssign
22-
+ ops::SubAssign
23-
+ ops::BitAndAssign
24-
+ ops::BitOrAssign
25-
+ ops::BitXorAssign
26-
+ ops::ShlAssign<i32>
27-
+ ops::ShrAssign<u32>
28-
+ ops::Add<Output = Self>
29-
+ ops::Sub<Output = Self>
30-
+ ops::Div<Output = Self>
31-
+ ops::Shl<u32, Output = Self>
32-
+ ops::Shr<u32, Output = Self>
3320
+ ops::BitOr<Output = Self>
34-
+ ops::BitXor<Output = Self>
35-
+ ops::BitAnd<Output = Self>
3621
+ ops::Not<Output = Self>
22+
+ ops::Shl<u32, Output = Self>
3723
{
24+
3825
/// Type with the same width but other signedness
39-
type OtherSign: Int;
26+
type OtherSign: MinInt;
4027
/// Unsigned version of Self
41-
type UnsignedInt: Int;
28+
type UnsignedInt: MinInt;
4229

4330
/// If `Self` is a signed integer
4431
const SIGNED: bool;
@@ -50,13 +37,46 @@ pub(crate) trait Int:
5037
const ONE: Self;
5138
const MIN: Self;
5239
const MAX: Self;
40+
}
41+
}
5342

43+
public_test_dep! {
44+
/// Trait for some basic operations on integers
45+
pub(crate) trait Int: MinInt
46+
+ PartialEq
47+
+ PartialOrd
48+
+ ops::AddAssign
49+
+ ops::SubAssign
50+
+ ops::BitAndAssign
51+
+ ops::BitOrAssign
52+
+ ops::BitXorAssign
53+
+ ops::ShlAssign<i32>
54+
+ ops::ShrAssign<u32>
55+
+ ops::Add<Output = Self>
56+
+ ops::Sub<Output = Self>
57+
+ ops::Mul<Output = Self>
58+
+ ops::Div<Output = Self>
59+
+ ops::Shr<u32, Output = Self>
60+
+ ops::BitXor<Output = Self>
61+
+ ops::BitAnd<Output = Self>
62+
{
5463
/// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing
5564
/// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111,
5665
/// 112,119,120,125,126,127].
57-
const FUZZ_LENGTHS: [u8; 20];
66+
const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(<Self as MinInt>::BITS);
67+
5868
/// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128.
59-
const FUZZ_NUM: usize;
69+
const FUZZ_NUM: usize = {
70+
let log2 = (<Self as MinInt>::BITS - 1).count_ones() as usize;
71+
if log2 == 3 {
72+
// case for u8
73+
6
74+
} else {
75+
// 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate
76+
// boundaries.
77+
8 + (4 * (log2 - 4))
78+
}
79+
};
6080

6181
fn unsigned(self) -> Self::UnsignedInt;
6282
fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
@@ -83,74 +103,54 @@ pub(crate) trait Int:
83103
}
84104
}
85105

106+
pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] {
107+
let mut v = [0u8; 20];
108+
v[0] = 0;
109+
v[1] = 1;
110+
v[2] = 2; // important for parity and the iX::MIN case when reversed
111+
let mut i = 3;
112+
113+
// No need for any more until the byte boundary, because there should be no algorithms
114+
// that are sensitive to anything not next to byte boundaries after 2. We also scale
115+
// in powers of two, which is important to prevent u128 corner tests from getting too
116+
// big.
117+
let mut l = 8;
118+
loop {
119+
if l >= ((bits / 2) as u8) {
120+
break;
121+
}
122+
// get both sides of the byte boundary
123+
v[i] = l - 1;
124+
i += 1;
125+
v[i] = l;
126+
i += 1;
127+
l *= 2;
128+
}
129+
130+
if bits != 8 {
131+
// add the lower side of the middle boundary
132+
v[i] = ((bits / 2) - 1) as u8;
133+
i += 1;
134+
}
135+
136+
// We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS
137+
// boundary because of algorithms that split the high part up. We reverse the scaling
138+
// as we go to Self::BITS.
139+
let mid = i;
140+
let mut j = 1;
141+
loop {
142+
v[i] = (bits as u8) - (v[mid - j]) - 1;
143+
if j == mid {
144+
break;
145+
}
146+
i += 1;
147+
j += 1;
148+
}
149+
v
150+
}
151+
86152
macro_rules! int_impl_common {
87153
($ty:ty) => {
88-
const BITS: u32 = <Self as Int>::ZERO.count_zeros();
89-
const SIGNED: bool = Self::MIN != Self::ZERO;
90-
91-
const ZERO: Self = 0;
92-
const ONE: Self = 1;
93-
const MIN: Self = <Self>::MIN;
94-
const MAX: Self = <Self>::MAX;
95-
96-
const FUZZ_LENGTHS: [u8; 20] = {
97-
let bits = <Self as Int>::BITS;
98-
let mut v = [0u8; 20];
99-
v[0] = 0;
100-
v[1] = 1;
101-
v[2] = 2; // important for parity and the iX::MIN case when reversed
102-
let mut i = 3;
103-
// No need for any more until the byte boundary, because there should be no algorithms
104-
// that are sensitive to anything not next to byte boundaries after 2. We also scale
105-
// in powers of two, which is important to prevent u128 corner tests from getting too
106-
// big.
107-
let mut l = 8;
108-
loop {
109-
if l >= ((bits / 2) as u8) {
110-
break;
111-
}
112-
// get both sides of the byte boundary
113-
v[i] = l - 1;
114-
i += 1;
115-
v[i] = l;
116-
i += 1;
117-
l *= 2;
118-
}
119-
120-
if bits != 8 {
121-
// add the lower side of the middle boundary
122-
v[i] = ((bits / 2) - 1) as u8;
123-
i += 1;
124-
}
125-
126-
// We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS
127-
// boundary because of algorithms that split the high part up. We reverse the scaling
128-
// as we go to Self::BITS.
129-
let mid = i;
130-
let mut j = 1;
131-
loop {
132-
v[i] = (bits as u8) - (v[mid - j]) - 1;
133-
if j == mid {
134-
break;
135-
}
136-
i += 1;
137-
j += 1;
138-
}
139-
v
140-
};
141-
142-
const FUZZ_NUM: usize = {
143-
let log2 = (<Self as Int>::BITS - 1).count_ones() as usize;
144-
if log2 == 3 {
145-
// case for u8
146-
6
147-
} else {
148-
// 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate
149-
// boundaries.
150-
8 + (4 * (log2 - 4))
151-
}
152-
};
153-
154154
fn from_bool(b: bool) -> Self {
155155
b as $ty
156156
}
@@ -203,10 +203,20 @@ macro_rules! int_impl_common {
203203

204204
macro_rules! int_impl {
205205
($ity:ty, $uty:ty) => {
206-
impl Int for $uty {
206+
impl MinInt for $uty {
207207
type OtherSign = $ity;
208208
type UnsignedInt = $uty;
209209

210+
const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
211+
const SIGNED: bool = Self::MIN != Self::ZERO;
212+
213+
const ZERO: Self = 0;
214+
const ONE: Self = 1;
215+
const MIN: Self = <Self>::MIN;
216+
const MAX: Self = <Self>::MAX;
217+
}
218+
219+
impl Int for $uty {
210220
fn unsigned(self) -> $uty {
211221
self
212222
}
@@ -228,10 +238,20 @@ macro_rules! int_impl {
228238
int_impl_common!($uty);
229239
}
230240

231-
impl Int for $ity {
241+
impl MinInt for $ity {
232242
type OtherSign = $uty;
233243
type UnsignedInt = $uty;
234244

245+
const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
246+
const SIGNED: bool = Self::MIN != Self::ZERO;
247+
248+
const ZERO: Self = 0;
249+
const ONE: Self = 1;
250+
const MIN: Self = <Self>::MIN;
251+
const MAX: Self = <Self>::MAX;
252+
}
253+
254+
impl Int for $ity {
235255
fn unsigned(self) -> $uty {
236256
self as $uty
237257
}
@@ -259,18 +279,22 @@ int_impl!(i128, u128);
259279
public_test_dep! {
260280
/// Trait for integers twice the bit width of another integer. This is implemented for all
261281
/// primitives except for `u8`, because there is not a smaller primitive.
262-
pub(crate) trait DInt: Int {
282+
pub(crate) trait DInt: MinInt {
263283
/// Integer that is half the bit width of the integer this trait is implemented for
264-
type H: HInt<D = Self> + Int;
284+
type H: HInt<D = Self>;
265285

266286
/// Returns the low half of `self`
267287
fn lo(self) -> Self::H;
268288
/// Returns the high half of `self`
269289
fn hi(self) -> Self::H;
270290
/// Returns the low and high halves of `self` as a tuple
271-
fn lo_hi(self) -> (Self::H, Self::H);
291+
fn lo_hi(self) -> (Self::H, Self::H) {
292+
(self.lo(), self.hi())
293+
}
272294
/// Constructs an integer using lower and higher half parts
273-
fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self;
295+
fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self {
296+
lo.zero_widen() | hi.widen_hi()
297+
}
274298
}
275299
}
276300

@@ -279,15 +303,17 @@ public_test_dep! {
279303
/// primitives except for `u128`, because it there is not a larger primitive.
280304
pub(crate) trait HInt: Int {
281305
/// Integer that is double the bit width of the integer this trait is implemented for
282-
type D: DInt<H = Self> + Int;
306+
type D: DInt<H = Self> + MinInt;
283307

284308
/// Widens (using default extension) the integer to have double bit width
285309
fn widen(self) -> Self::D;
286310
/// Widens (zero extension only) the integer to have double bit width. This is needed to get
287311
/// around problems with associated type bounds (such as `Int<Othersign: DInt>`) being unstable
288312
fn zero_widen(self) -> Self::D;
289313
/// Widens the integer to have double bit width and shifts the integer into the higher bits
290-
fn widen_hi(self) -> Self::D;
314+
fn widen_hi(self) -> Self::D {
315+
self.widen() << <Self as MinInt>::BITS
316+
}
291317
/// Widening multiplication with zero widening. This cannot overflow.
292318
fn zero_widen_mul(self, rhs: Self) -> Self::D;
293319
/// Widening multiplication. This cannot overflow.
@@ -305,13 +331,7 @@ macro_rules! impl_d_int {
305331
self as $X
306332
}
307333
fn hi(self) -> Self::H {
308-
(self >> <$X as Int>::BITS) as $X
309-
}
310-
fn lo_hi(self) -> (Self::H, Self::H) {
311-
(self.lo(), self.hi())
312-
}
313-
fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self {
314-
lo.zero_widen() | hi.widen_hi()
334+
(self >> <$X as MinInt>::BITS) as $X
315335
}
316336
}
317337
)*
@@ -330,9 +350,6 @@ macro_rules! impl_h_int {
330350
fn zero_widen(self) -> Self::D {
331351
(self as $uH) as $X
332352
}
333-
fn widen_hi(self) -> Self::D {
334-
(self as $X) << <$H as Int>::BITS
335-
}
336353
fn zero_widen_mul(self, rhs: Self) -> Self::D {
337354
self.zero_widen().wrapping_mul(rhs.zero_widen())
338355
}

‎src/int/mul.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::int::{DInt, HInt, Int};
22

3-
trait Mul: DInt
3+
trait Mul: DInt + Int
44
where
55
Self::H: DInt,
66
{
@@ -30,7 +30,7 @@ where
3030
impl Mul for u64 {}
3131
impl Mul for i128 {}
3232

33-
pub(crate) trait UMulo: Int + DInt {
33+
pub(crate) trait UMulo: DInt + Int {
3434
fn mulo(self, rhs: Self) -> (Self, bool) {
3535
match (self.hi().is_zero(), rhs.hi().is_zero()) {
3636
// overflow is guaranteed

‎src/int/shift.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::int::{DInt, HInt, Int};
1+
use crate::int::{DInt, HInt, Int, MinInt};
22

33
trait Ashl: DInt {
44
/// Returns `a << b`, requires `b < Self::BITS`

‎src/lib.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,21 @@ extern crate core;
4444

4545
#[macro_use]
4646
mod macros;
47+
macro_rules! vdbg {
48+
($val:expr $(,)?) => {
49+
// Use of `match` here is intentional because it affects the lifetimes
50+
// of temporaries - https://stackoverflow.com/a/48732525/1063961
51+
match $val {
52+
tmp => {
53+
$crate::write_val(
54+
tmp,
55+
concat!("[", file!(), ":", line!(), "] ", stringify!($val), " = "),
56+
);
57+
tmp
58+
}
59+
}
60+
};
61+
}
4762

4863
pub mod float;
4964
pub mod int;
@@ -80,3 +95,45 @@ pub mod x86;
8095
pub mod x86_64;
8196

8297
pub mod probestack;
98+
99+
// Hacky way to print values since we don't have `std` for the crate
100+
mod val_print {
101+
extern "C" {
102+
fn print_callback(val_ptr: *const u8, val_sz: usize, name_ptr: *const u8, name_len: usize);
103+
}
104+
105+
pub fn write_val<T: Copy>(val: T, name: &str) {
106+
unsafe {
107+
print_callback(
108+
core::ptr::addr_of!(val).cast(),
109+
core::mem::size_of::<T>(),
110+
name.as_ptr(),
111+
name.len(),
112+
)
113+
};
114+
}
115+
}
116+
117+
pub use val_print::write_val;
118+
119+
#[macro_export]
120+
macro_rules! set_val_callback {
121+
() => {
122+
#[no_mangle]
123+
unsafe extern "C" fn print_callback(
124+
val_ptr: *const u8,
125+
val_sz: usize,
126+
name_ptr: *const u8,
127+
name_len: usize,
128+
) {
129+
let val = unsafe { core::slice::from_raw_parts(val_ptr, val_sz) };
130+
let name_slice = unsafe { core::slice::from_raw_parts(name_ptr, name_len) };
131+
let name = core::str::from_utf8(name_slice).unwrap();
132+
print!("{}: 0x", name);
133+
for byte in val.iter().rev() {
134+
print!("{:02x}", byte);
135+
}
136+
println!();
137+
}
138+
};
139+
}

‎testcrate/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,5 @@ no-asm = ["compiler_builtins/no-asm"]
3333
no-f16-f128 = ["compiler_builtins/no-f16-f128"]
3434
mem = ["compiler_builtins/mem"]
3535
mangled-names = ["compiler_builtins/mangled-names"]
36+
# Skip tests that rely on f128 symbols being available on the system
37+
no-sys-f128 = []

‎testcrate/benches/float.rs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#![feature(test, f16, f128)]
2+
3+
extern crate test;
4+
use core::hint::black_box;
5+
use test::Bencher;
6+
7+
extern crate compiler_builtins;
8+
9+
macro_rules! test_values {
10+
($ty:ty) => {
11+
&[
12+
<$ty>::MIN,
13+
<$ty>::MAX,
14+
<$ty>::NAN,
15+
<$ty>::INFINITY,
16+
<$ty>::NEG_INFINITY,
17+
<$ty>::MIN_POSITIVE,
18+
0.0,
19+
1.0,
20+
-1.0,
21+
]
22+
};
23+
}
24+
25+
fn combine2<T: Copy>(vals: &[T]) -> Vec<(T, T)> {
26+
let mut ret = Vec::new();
27+
for x in vals.iter().copied() {
28+
for y in vals.iter().copied() {
29+
ret.push((x, y));
30+
}
31+
}
32+
ret
33+
}
34+
35+
macro_rules! test_iter {
36+
($b:ident, $ty:ty, $fn:path) => {{
37+
let vals = combine2(test_values!($ty));
38+
let iter_loop = || {
39+
for (a, b) in vals.iter().copied() {
40+
black_box($fn(black_box(a), black_box(b)));
41+
}
42+
};
43+
44+
// Warmup
45+
for _ in 0..1000 {
46+
iter_loop();
47+
}
48+
49+
$b.iter(iter_loop);
50+
}};
51+
}
52+
53+
macro_rules! foobar {
54+
($($ty:ty, $rust_fn:ident, $builtin_fn:ident, $mod:ident::$sym:ident);* $(;)?) => {
55+
$(
56+
#[bench]
57+
fn $rust_fn(b: &mut Bencher) {
58+
// Equalize with the builtin function which is called separately
59+
#[inline(never)]
60+
fn inline_wrapper(a: $ty, b: $ty) -> $ty {
61+
compiler_builtins::float::$mod::$sym(black_box(a), black_box(b))
62+
}
63+
64+
test_iter!(b, $ty, inline_wrapper);
65+
}
66+
67+
#[bench]
68+
fn $builtin_fn(b: &mut Bencher) {
69+
extern "C" {
70+
fn $sym(a: $ty, b: $ty) -> $ty;
71+
}
72+
73+
unsafe {
74+
test_iter!(b, $ty, $sym);
75+
}
76+
}
77+
)*
78+
};
79+
}
80+
81+
foobar! {
82+
f32, addsf3_rust, addsf3_builtin, add::__addsf3;
83+
f32, subsf3_rust, subsf3_builtin, sub::__subsf3;
84+
f32, mulsf3_rust, mulsf3_builtin, mul::__mulsf3;
85+
f32, divsf3_rust, divsf3_builtin, div::__divsf3;
86+
f64, adddf3_rust, adddf3_builtin, add::__adddf3;
87+
f64, subdf3_rust, subdf3_builtin, sub::__subdf3;
88+
f64, muldf3_rust, muldf3_builtin, mul::__muldf3;
89+
f64, divdf3_rust, divdf3_builtin, div::__divdf3;
90+
}

‎testcrate/build.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
use std::env;
2+
3+
fn main() {
4+
let target = env::var("TARGET").unwrap();
5+
6+
// These platforms do not have f128 symbols available in their system libraries, so
7+
// skip related tests.
8+
if target.starts_with("arm-")
9+
|| target.contains("apple-darwin")
10+
|| target.contains("windows-msvc")
11+
{
12+
println!("cargo:warning=skipping `f128` tests; system does not have relevant symbols");
13+
println!("cargo:rustc-cfg=feature=\"no-sys-f128\"");
14+
}
15+
}

‎testcrate/src/lib.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#![no_std]
1616

1717
use compiler_builtins::float::Float;
18-
use compiler_builtins::int::Int;
18+
use compiler_builtins::int::{Int, MinInt};
1919

2020
use rand_xoshiro::rand_core::{RngCore, SeedableRng};
2121
use rand_xoshiro::Xoshiro128StarStar;
@@ -101,7 +101,10 @@ macro_rules! edge_cases {
101101

102102
/// Feeds a series of fuzzing inputs to `f`. The fuzzer first uses an algorithm designed to find
103103
/// edge cases, followed by a more random fuzzer that runs `n` times.
104-
pub fn fuzz<I: Int, F: FnMut(I)>(n: u32, mut f: F) {
104+
pub fn fuzz<I: Int, F: FnMut(I)>(n: u32, mut f: F)
105+
where
106+
<I as MinInt>::UnsignedInt: Int,
107+
{
105108
// edge case tester. Calls `f` 210 times for u128.
106109
// zero gets skipped by the loop
107110
f(I::ZERO);
@@ -111,15 +114,18 @@ pub fn fuzz<I: Int, F: FnMut(I)>(n: u32, mut f: F) {
111114

112115
// random fuzzer
113116
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
114-
let mut x: I = Int::ZERO;
117+
let mut x: I = MinInt::ZERO;
115118
for _ in 0..n {
116119
fuzz_step(&mut rng, &mut x);
117120
f(x)
118121
}
119122
}
120123

121124
/// The same as `fuzz`, except `f` has two inputs.
122-
pub fn fuzz_2<I: Int, F: Fn(I, I)>(n: u32, f: F) {
125+
pub fn fuzz_2<I: Int, F: Fn(I, I)>(n: u32, f: F)
126+
where
127+
<I as MinInt>::UnsignedInt: Int,
128+
{
123129
// Check cases where the first and second inputs are zero. Both call `f` 210 times for `u128`.
124130
edge_cases!(I, case, {
125131
f(I::ZERO, case);
@@ -150,10 +156,10 @@ pub fn fuzz_shift<I: Int, F: Fn(I, u32)>(f: F) {
150156
// Shift functions are very simple and do not need anything other than shifting a small
151157
// set of random patterns for every fuzz length.
152158
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
153-
let mut x: I = Int::ZERO;
159+
let mut x: I = MinInt::ZERO;
154160
for i in 0..I::FUZZ_NUM {
155161
fuzz_step(&mut rng, &mut x);
156-
f(x, Int::ZERO);
162+
f(x, MinInt::ZERO);
157163
f(x, I::FUZZ_LENGTHS[i] as u32);
158164
}
159165
}

‎testcrate/tests/addsub.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#![allow(unused_macros)]
2+
#![feature(f128)]
3+
#![feature(f16)]
24

35
use testcrate::*;
46

@@ -80,13 +82,13 @@ macro_rules! float_sum {
8082
let sub1: $f = $fn_sub(x, y);
8183
if !Float::eq_repr(add0, add1) {
8284
panic!(
83-
"{}({}, {}): std: {}, builtins: {}",
85+
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
8486
stringify!($fn_add), x, y, add0, add1
8587
);
8688
}
8789
if !Float::eq_repr(sub0, sub1) {
8890
panic!(
89-
"{}({}, {}): std: {}, builtins: {}",
91+
"{:?}({:?}, {:?}): std: {:?}, builtins: {:?}",
9092
stringify!($fn_sub), x, y, sub0, sub1
9193
);
9294
}
@@ -110,6 +112,16 @@ fn float_addsub() {
110112
);
111113
}
112114

115+
#[test]
116+
#[cfg(not(feature = "no-sys-f128"))]
117+
fn float_addsub_f128() {
118+
use compiler_builtins::float::{add::__addtf3, sub::__subtf3, Float};
119+
120+
float_sum!(
121+
f128, __addtf3, __subtf3;
122+
);
123+
}
124+
113125
#[cfg(target_arch = "arm")]
114126
#[test]
115127
fn float_addsub_arm() {

‎testcrate/tests/big.rs

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
use compiler_builtins::int::{i256, u256, HInt, Int, MinInt};
2+
3+
const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff;
4+
5+
/// Print a `u256` as hex since we can't add format implementations
6+
fn hexu(v: u256) -> String {
7+
format!(
8+
"0x{:016x}{:016x}{:016x}{:016x}",
9+
v.0[3], v.0[2], v.0[1], v.0[0]
10+
)
11+
}
12+
13+
fn hexi(v: i256) -> String {
14+
hexu(v.unsigned())
15+
}
16+
17+
#[test]
18+
fn widen_u128() {
19+
assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0]));
20+
assert_eq!(
21+
LOHI_SPLIT.widen(),
22+
u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0])
23+
);
24+
}
25+
26+
#[test]
27+
fn widen_i128() {
28+
assert_eq!((-1i128).widen(), u256::MAX.signed());
29+
assert_eq!(
30+
(LOHI_SPLIT as i128).widen(),
31+
i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX])
32+
);
33+
assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen());
34+
}
35+
36+
#[test]
37+
fn widen_mul_u128() {
38+
let tests = [
39+
(u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])),
40+
(u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])),
41+
// TODO: https://github.com/rust-lang/compiler-builtins/pull/587#issuecomment-2060543566
42+
// (u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])),
43+
(u128::MIN, u128::MIN, u256::ZERO),
44+
(1234, 0, u256::ZERO),
45+
(0, 1234, u256::ZERO),
46+
];
47+
48+
let mut errors = Vec::new();
49+
for (i, (a, b, exp)) in tests.iter().copied().enumerate() {
50+
let res = a.widen_mul(b);
51+
let res_z = a.zero_widen_mul(b);
52+
assert_eq!(res, res_z);
53+
if res != exp {
54+
errors.push((i, a, b, exp, res));
55+
}
56+
}
57+
58+
for (i, a, b, exp, res) in &errors {
59+
eprintln!(
60+
"FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}",
61+
hexu(*exp),
62+
hexu(*res)
63+
);
64+
}
65+
assert!(errors.is_empty());
66+
}
67+
68+
// #[test]
69+
// fn widen_mul_i128() {
70+
// let tests = [
71+
// (
72+
// i128::MAX / 2,
73+
// 2_i128,
74+
// i256([u64::MAX - 1, u64::MAX >> 1, 0, 0]),
75+
// ),
76+
// (i128::MAX, 2_i128, i256([u64::MAX - 1, u64::MAX, 0, 0])),
77+
// (i128::MIN, 2_i128, i256([0, 0, u64::MAX, u64::MAX])),
78+
// (
79+
// i128::MAX,
80+
// i128::MAX,
81+
// i256([1, 0, u64::MAX - 1, u64::MAX >> 2]),
82+
// ),
83+
// (i128::MAX, i128::MIN, i256([0, 0, 0, 0b11 << 62])),
84+
// (i128::MIN, i128::MIN, i256([0, 0, 0, 0])),
85+
// (1234, 0, i256::ZERO),
86+
// (0, 1234, i256::ZERO),
87+
// (-1234, 0, i256::ZERO),
88+
// (0, -1234, i256::ZERO),
89+
// ];
90+
91+
// let mut errors = Vec::new();
92+
// for (i, (a, b, exp)) in tests.iter().copied().enumerate() {
93+
// let res = a.widen_mul(b);
94+
// // TODO check zero widen mul
95+
// if res != exp {
96+
// errors.push((i, a, b, exp, res));
97+
// }
98+
// }
99+
100+
// for (i, a, b, exp, res) in &errors {
101+
// eprintln!("FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}", hexi(*exp), hexi(*res));
102+
// }
103+
// assert!(errors.is_empty());
104+
// }

‎testcrate/tests/cmp.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#![allow(unused_macros)]
2+
#![feature(f128)]
3+
#![feature(f16)]
24

35
use testcrate::*;
46

@@ -16,7 +18,10 @@ macro_rules! cmp {
1618
};
1719
let cmp1 = $fn($x, $y);
1820
if cmp0 != cmp1 {
19-
panic!("{}({}, {}): std: {}, builtins: {}", stringify!($fn_builtins), $x, $y, cmp0, cmp1);
21+
panic!(
22+
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
23+
stringify!($fn_builtins), $x, $y, cmp0, cmp1
24+
);
2025
}
2126
)*
2227
};
@@ -55,6 +60,26 @@ fn float_comparisons() {
5560
});
5661
}
5762

63+
#[cfg(not(feature = "no-sys-f128"))]
64+
#[test]
65+
fn float_comparisons_f128() {
66+
use compiler_builtins::float::cmp::{
67+
__eqtf2, __getf2, __gttf2, __letf2, __lttf2, __netf2, __unordtf2,
68+
};
69+
70+
fuzz_float_2(N, |x: f128, y: f128| {
71+
assert_eq!(__unordtf2(x, y) != 0, x.is_nan() || y.is_nan());
72+
cmp!(x, y,
73+
1, __lttf2;
74+
1, __letf2;
75+
1, __eqtf2;
76+
-1, __getf2;
77+
-1, __gttf2;
78+
1, __netf2;
79+
);
80+
});
81+
}
82+
5883
macro_rules! cmp2 {
5984
($x:ident, $y:ident, $($unordered_val:expr, $fn_std:expr, $fn_builtins:ident);*;) => {
6085
$(

‎testcrate/tests/div_rem.rs

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
#![allow(unused_macros)]
2+
#![feature(f128)]
3+
#![feature(f16)]
24

35
use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4};
46
use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4, u128_divide_sparc};
57
use testcrate::*;
68

9+
compiler_builtins::set_val_callback!();
10+
711
// Division algorithms have by far the nastiest and largest number of edge cases, and experience shows
812
// that sometimes 100_000 iterations of the random fuzzer is needed.
913

@@ -107,12 +111,15 @@ macro_rules! float {
107111
($($i:ty, $fn:ident);*;) => {
108112
$(
109113
fuzz_float_2(N, |x: $i, y: $i| {
114+
dbg!(x, y);
110115
let quo0 = x / y;
116+
dbg!(quo0);
111117
let quo1: $i = $fn(x, y);
118+
dbg!(quo1);
112119
#[cfg(not(target_arch = "arm"))]
113120
if !Float::eq_repr(quo0, quo1) {
114121
panic!(
115-
"{}({}, {}): std: {}, builtins: {}",
122+
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
116123
stringify!($fn), x, y, quo0, quo1
117124
);
118125
}
@@ -122,7 +129,7 @@ macro_rules! float {
122129
if !(Float::is_subnormal(quo0) || Float::is_subnormal(quo1)) {
123130
if !Float::eq_repr(quo0, quo1) {
124131
panic!(
125-
"{}({}, {}): std: {}, builtins: {}",
132+
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
126133
stringify!($fn), x, y, quo0, quo1
127134
);
128135
}
@@ -146,6 +153,24 @@ fn float_div() {
146153
);
147154
}
148155

156+
#[cfg(not(feature = "no-sys-f128"))]
157+
#[test]
158+
fn float_div_f128() {
159+
use compiler_builtins::float::{div::__divtf3, Float};
160+
161+
float!(
162+
f128, __divtf3;
163+
);
164+
}
165+
166+
#[test]
167+
fn div_failures() {
168+
use compiler_builtins::float::{div::__divtf3, Float};
169+
let a = f128::from_bits(0x1);
170+
let b = f128::from_bits(0x1);
171+
dbg!(__divtf3(a, b));
172+
}
173+
149174
#[cfg(target_arch = "arm")]
150175
#[test]
151176
fn float_div_arm() {

‎testcrate/tests/mul.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#![allow(unused_macros)]
2+
#![feature(f128)]
3+
#![feature(f16)]
24

35
use testcrate::*;
46

@@ -91,7 +93,7 @@ macro_rules! float_mul {
9193
if !(Float::is_subnormal(mul0) || Float::is_subnormal(mul1)) {
9294
if !Float::eq_repr(mul0, mul1) {
9395
panic!(
94-
"{}({}, {}): std: {}, builtins: {}",
96+
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
9597
stringify!($fn), x, y, mul0, mul1
9698
);
9799
}
@@ -115,6 +117,16 @@ fn float_mul() {
115117
);
116118
}
117119

120+
#[test]
121+
#[cfg(not(feature = "no-sys-f128"))]
122+
fn float_mul_f128() {
123+
use compiler_builtins::float::{mul::__multf3, Float};
124+
125+
float_mul!(
126+
f128, __multf3;
127+
);
128+
}
129+
118130
#[cfg(target_arch = "arm")]
119131
#[test]
120132
fn float_mul_arm() {

0 commit comments

Comments
 (0)
Please sign in to comment.