Skip to content

Commit 6fab367

Browse files
committed
Implement u256 with two u128s rather than u64
This produces better assembly, e.g. on aarch64: .globl libm::u128_wmul .p2align 2 libm::u128_wmul: Lfunc_begin124: .cfi_startproc mul x9, x2, x0 umulh x10, x2, x0 umulh x11, x3, x0 mul x12, x3, x0 umulh x13, x2, x1 mul x14, x2, x1 umulh x15, x3, x1 mul x16, x3, x1 adds x10, x10, x14 cinc x13, x13, hs adds x13, x13, x16 cinc x14, x15, hs adds x10, x10, x12 cinc x11, x11, hs adds x11, x13, x11 stp x9, x10, [x8] cinc x9, x14, hs stp x11, x9, [x8, rust-lang#16] ret The original was ~70 instructions so the improvement is significant. With these changes, the result is reasonably close to what LLVM generates using `u256` operands [1]. [1]: https://llvm.godbolt.org/z/re1aGdaqY
1 parent 0d9ab5c commit 6fab367

File tree

7 files changed

+298
-188
lines changed

7 files changed

+298
-188
lines changed

crates/libm-test/benches/icount.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ fn setup_u128_mul() -> Vec<(u128, u128)> {
7777
v
7878
}
7979

80-
/*
8180
fn setup_u256_add() -> Vec<(u256, u256)> {
8281
let mut v = Vec::new();
8382
for (x, y) in setup_u128_mul() {
@@ -88,7 +87,6 @@ fn setup_u256_add() -> Vec<(u256, u256)> {
8887
v.push((u256::MAX, u256::MAX));
8988
v
9089
}
91-
*/
9290

9391
fn setup_u256_shift() -> Vec<(u256, u32)> {
9492
let mut v = Vec::new();
@@ -116,7 +114,6 @@ library_benchmark_group!(
116114
benchmarks = icount_bench_u128_widen_mul
117115
);
118116

119-
/* Not yet implemented
120117
#[library_benchmark]
121118
#[bench::linspace(setup_u256_add())]
122119
fn icount_bench_u256_add(cases: Vec<(u256, u256)>) {
@@ -129,7 +126,6 @@ library_benchmark_group!(
129126
name = icount_bench_u256_add_group;
130127
benchmarks = icount_bench_u256_add
131128
);
132-
*/
133129

134130
#[library_benchmark]
135131
#[bench::linspace(setup_u256_shift())]
@@ -148,7 +144,7 @@ main!(
148144
library_benchmark_groups =
149145
// u256-related benchmarks
150146
icount_bench_u128_widen_mul_group,
151-
// icount_bench_u256_add_group,
147+
icount_bench_u256_add_group,
152148
icount_bench_u256_shr_group,
153149
// verify-apilist-start
154150
// verify-sorted-start

crates/libm-test/src/gen/random.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use crate::run_cfg::{int_range, iteration_count};
1414

1515
pub(crate) const SEED_ENV: &str = "LIBM_SEED";
1616

17-
pub(crate) static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| {
17+
pub static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| {
1818
let s = env::var(SEED_ENV).unwrap_or_else(|_| {
1919
let mut rng = rand::thread_rng();
2020
(0..32).map(|_| rng.sample(Alphanumeric) as char).collect()

crates/libm-test/src/lib.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,10 @@ pub use op::{
2929
};
3030
pub use precision::{MaybeOverride, SpecialCase, default_ulp};
3131
use run_cfg::extensive_max_iterations;
32-
pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, skip_extensive_test};
32+
pub use run_cfg::{
33+
CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, bigint_fuzz_iteration_count,
34+
skip_extensive_test,
35+
};
3336
pub use test_traits::{CheckOutput, Hex, TupleCall};
3437

3538
/// Result type for tests is usually from `anyhow`. Most times there is no success value to

crates/libm-test/src/run_cfg.rs

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -158,14 +158,6 @@ impl TestEnv {
158158
let op = id.math_op();
159159

160160
let will_run_mp = cfg!(feature = "build-mpfr");
161-
162-
// Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start
163-
// with a reduced number on these platforms.
164-
let slow_on_ci = crate::emulated()
165-
|| usize::BITS < 64
166-
|| cfg!(all(target_arch = "x86_64", target_vendor = "apple"));
167-
let slow_platform = slow_on_ci && crate::ci();
168-
169161
let large_float_ty = match op.float_ty {
170162
FloatTy::F16 | FloatTy::F32 => false,
171163
FloatTy::F64 | FloatTy::F128 => true,
@@ -176,7 +168,7 @@ impl TestEnv {
176168
let input_count = op.rust_sig.args.len();
177169

178170
Self {
179-
slow_platform,
171+
slow_platform: slow_platform(),
180172
large_float_ty,
181173
should_run_extensive: will_run_extensive,
182174
mp_tests_enabled: will_run_mp,
@@ -185,6 +177,17 @@ impl TestEnv {
185177
}
186178
}
187179

180+
/// Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start
181+
/// with a reduced number on these platforms.
182+
fn slow_platform() -> bool {
183+
let slow_on_ci = crate::emulated()
184+
|| usize::BITS < 64
185+
|| cfg!(all(target_arch = "x86_64", target_vendor = "apple"));
186+
187+
// If not running in CI, there is no need to reduce iteration count.
188+
slow_on_ci && crate::ci()
189+
}
190+
188191
/// The number of iterations to run for a given test.
189192
pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
190193
let t_env = TestEnv::from_env(ctx);
@@ -351,3 +354,12 @@ pub fn skip_extensive_test(ctx: &CheckCtx) -> bool {
351354
let t_env = TestEnv::from_env(ctx);
352355
!t_env.should_run_extensive
353356
}
357+
358+
/// The number of iterations to run for `u256` fuzz tests.
359+
pub fn bigint_fuzz_iteration_count() -> u64 {
360+
if !cfg!(optimizations_enabled) {
361+
return 1000;
362+
}
363+
364+
if slow_platform() { 100_000 } else { 5_000_000 }
365+
}

crates/libm-test/tests/u256.rs

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
//! Test the u256 implementation. the ops already get exercised reasonably well through the `f128`
2+
//! routines, so this only does a few million fuzz iterations against GMP.
3+
4+
#![cfg(feature = "build-mpfr")]
5+
6+
use std::sync::LazyLock;
7+
8+
use libm::support::{HInt, u256};
9+
type BigInt = rug::Integer;
10+
11+
use libm_test::bigint_fuzz_iteration_count;
12+
use libm_test::gen::random::SEED;
13+
use rand::{Rng, SeedableRng};
14+
use rand_chacha::ChaCha8Rng;
15+
use rug::Assign;
16+
use rug::integer::Order;
17+
use rug::ops::NotAssign;
18+
19+
static BIGINT_U256_MAX: LazyLock<BigInt> =
20+
LazyLock::new(|| BigInt::from_digits(&[u128::MAX, u128::MAX], Order::Lsf));
21+
22+
/// Copied from the test module.
23+
fn hexu(v: u256) -> String {
24+
format!("0x{:032x}{:032x}", v.hi, v.lo)
25+
}
26+
27+
fn random_u256(rng: &mut ChaCha8Rng) -> u256 {
28+
let lo: u128 = rng.gen();
29+
let hi: u128 = rng.gen();
30+
u256 { lo, hi }
31+
}
32+
33+
fn assign_bigint(bx: &mut BigInt, x: u256) {
34+
bx.assign_digits(&[x.lo, x.hi], Order::Lsf);
35+
}
36+
37+
fn from_bigint(bx: &mut BigInt) -> u256 {
38+
// Truncate so the result fits into `[u128; 2]`. This makes all ops overflowing.
39+
*bx &= &*BIGINT_U256_MAX;
40+
let mut bres = [0u128, 0];
41+
bx.write_digits(&mut bres, Order::Lsf);
42+
bx.assign(0);
43+
u256 { lo: bres[0], hi: bres[1] }
44+
}
45+
46+
fn check_one(
47+
x: impl FnOnce() -> String,
48+
y: impl FnOnce() -> Option<String>,
49+
actual: u256,
50+
expected: &mut BigInt,
51+
) {
52+
let expected = from_bigint(expected);
53+
if actual != expected {
54+
let xmsg = x();
55+
let ymsg = y().map(|y| format!("y: {y}\n")).unwrap_or_default();
56+
panic!(
57+
"Results do not match\n\
58+
input: {xmsg}\n\
59+
{ymsg}\
60+
actual: {}\n\
61+
expected: {}\
62+
",
63+
hexu(actual),
64+
hexu(expected),
65+
)
66+
}
67+
}
68+
69+
#[test]
70+
fn mp_u256_bitor() {
71+
let mut rng = ChaCha8Rng::from_seed(*SEED);
72+
let mut bx = BigInt::new();
73+
let mut by = BigInt::new();
74+
75+
for _ in 0..bigint_fuzz_iteration_count() {
76+
let x = random_u256(&mut rng);
77+
let y = random_u256(&mut rng);
78+
assign_bigint(&mut bx, x);
79+
assign_bigint(&mut by, y);
80+
let actual = x | y;
81+
bx |= &by;
82+
check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
83+
}
84+
}
85+
86+
#[test]
87+
fn mp_u256_not() {
88+
let mut rng = ChaCha8Rng::from_seed(*SEED);
89+
let mut bx = BigInt::new();
90+
91+
for _ in 0..bigint_fuzz_iteration_count() {
92+
let x = random_u256(&mut rng);
93+
assign_bigint(&mut bx, x);
94+
let actual = !x;
95+
bx.not_assign();
96+
check_one(|| hexu(x), || None, actual, &mut bx);
97+
}
98+
}
99+
100+
#[test]
101+
fn mp_u256_add() {
102+
let mut rng = ChaCha8Rng::from_seed(*SEED);
103+
let mut bx = BigInt::new();
104+
let mut by = BigInt::new();
105+
106+
for _ in 0..bigint_fuzz_iteration_count() {
107+
let x = random_u256(&mut rng);
108+
let y = random_u256(&mut rng);
109+
assign_bigint(&mut bx, x);
110+
assign_bigint(&mut by, y);
111+
let actual = x + y;
112+
bx += &by;
113+
check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
114+
}
115+
}
116+
117+
#[test]
118+
fn mp_u256_shr() {
119+
let mut rng = ChaCha8Rng::from_seed(*SEED);
120+
let mut bx = BigInt::new();
121+
122+
for _ in 0..bigint_fuzz_iteration_count() {
123+
let x = random_u256(&mut rng);
124+
let shift: u32 = rng.gen_range(0..255);
125+
assign_bigint(&mut bx, x);
126+
let actual = x >> shift;
127+
bx >>= shift;
128+
check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx);
129+
}
130+
}
131+
132+
#[test]
133+
fn mp_u256_widen_mul() {
134+
let mut rng = ChaCha8Rng::from_seed(*SEED);
135+
let mut bx = BigInt::new();
136+
let mut by = BigInt::new();
137+
138+
for _ in 0..bigint_fuzz_iteration_count() {
139+
let x: u128 = rng.gen();
140+
let y: u128 = rng.gen();
141+
bx.assign(x);
142+
by.assign(y);
143+
let actual = x.widen_mul(y);
144+
bx *= &by;
145+
check_one(|| format!("{x:#034x}"), || Some(format!("{y:#034x}")), actual, &mut bx);
146+
}
147+
}

0 commit comments

Comments
 (0)