Skip to content

Commit 9ca5234

Browse files
authored
Support three parameters in the code generator and add vmla and vmls instructions (#1088)
1 parent 446ce33 commit 9ca5234

File tree

4 files changed

+864
-112
lines changed

4 files changed

+864
-112
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1601,6 +1601,38 @@ pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
16011601
vcvtpq_u64_f64_(a)
16021602
}
16031603

1604+
/// Floating-point multiply-add to accumulator
1605+
#[inline]
1606+
#[target_feature(enable = "neon")]
1607+
#[cfg_attr(test, assert_instr(fmul))]
1608+
pub unsafe fn vmla_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
1609+
simd_add(a, simd_mul(b, c))
1610+
}
1611+
1612+
/// Floating-point multiply-add to accumulator
1613+
#[inline]
1614+
#[target_feature(enable = "neon")]
1615+
#[cfg_attr(test, assert_instr(fmul))]
1616+
pub unsafe fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
1617+
simd_add(a, simd_mul(b, c))
1618+
}
1619+
1620+
/// Floating-point multiply-subtract from accumulator
1621+
#[inline]
1622+
#[target_feature(enable = "neon")]
1623+
#[cfg_attr(test, assert_instr(fmul))]
1624+
pub unsafe fn vmls_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
1625+
simd_sub(a, simd_mul(b, c))
1626+
}
1627+
1628+
/// Floating-point multiply-subtract from accumulator
1629+
#[inline]
1630+
#[target_feature(enable = "neon")]
1631+
#[cfg_attr(test, assert_instr(fmul))]
1632+
pub unsafe fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
1633+
simd_sub(a, simd_mul(b, c))
1634+
}
1635+
16041636
/// Multiply
16051637
#[inline]
16061638
#[target_feature(enable = "neon")]
@@ -3206,6 +3238,46 @@ mod test {
32063238
assert_eq!(r, e);
32073239
}
32083240

3241+
#[simd_test(enable = "neon")]
3242+
unsafe fn test_vmla_f64() {
3243+
let a: f64 = 0.;
3244+
let b: f64 = 2.;
3245+
let c: f64 = 3.;
3246+
let e: f64 = 6.;
3247+
let r: f64 = transmute(vmla_f64(transmute(a), transmute(b), transmute(c)));
3248+
assert_eq!(r, e);
3249+
}
3250+
3251+
#[simd_test(enable = "neon")]
3252+
unsafe fn test_vmlaq_f64() {
3253+
let a: f64x2 = f64x2::new(0., 1.);
3254+
let b: f64x2 = f64x2::new(2., 2.);
3255+
let c: f64x2 = f64x2::new(3., 3.);
3256+
let e: f64x2 = f64x2::new(6., 7.);
3257+
let r: f64x2 = transmute(vmlaq_f64(transmute(a), transmute(b), transmute(c)));
3258+
assert_eq!(r, e);
3259+
}
3260+
3261+
#[simd_test(enable = "neon")]
3262+
unsafe fn test_vmls_f64() {
3263+
let a: f64 = 6.;
3264+
let b: f64 = 2.;
3265+
let c: f64 = 3.;
3266+
let e: f64 = 0.;
3267+
let r: f64 = transmute(vmls_f64(transmute(a), transmute(b), transmute(c)));
3268+
assert_eq!(r, e);
3269+
}
3270+
3271+
#[simd_test(enable = "neon")]
3272+
unsafe fn test_vmlsq_f64() {
3273+
let a: f64x2 = f64x2::new(6., 7.);
3274+
let b: f64x2 = f64x2::new(2., 2.);
3275+
let c: f64x2 = f64x2::new(3., 3.);
3276+
let e: f64x2 = f64x2::new(0., 1.);
3277+
let r: f64x2 = transmute(vmlsq_f64(transmute(a), transmute(b), transmute(c)));
3278+
assert_eq!(r, e);
3279+
}
3280+
32093281
#[simd_test(enable = "neon")]
32103282
unsafe fn test_vmul_f64() {
32113283
let a: f64 = 1.0;

0 commit comments

Comments
 (0)