Skip to content

Commit 75c6ccc

Browse files
committed
optimize 32-bit aligned mem{cpy,clr,set} intrinsics for ARM
this reduces the execution time of all these routines by 40-70%
1 parent c311dee commit 75c6ccc

File tree

6 files changed

+463
-30
lines changed

6 files changed

+463
-30
lines changed

src/arm.rs

+57-25
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
use core::intrinsics;
1+
use core::{intrinsics, ptr};
22

3-
#[cfg(feature = "mem")]
4-
use mem::{memcpy, memmove, memset};
3+
use mem;
54

65
// NOTE This function and the ones below are implemented using assembly because they using a custom
76
// calling convention which can't be implemented using a normal Rust function
@@ -60,65 +59,98 @@ pub unsafe fn __aeabi_ldivmod() {
6059
intrinsics::unreachable();
6160
}
6261

63-
// TODO: These aeabi_* functions should be defined as aliases
64-
#[cfg(not(feature = "mem"))]
65-
extern "C" {
66-
fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
67-
fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
68-
fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8;
69-
}
70-
7162
// FIXME: The `*4` and `*8` variants should be defined as aliases.
7263

7364
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
65+
#[linkage = "weak"]
7466
pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) {
75-
memcpy(dest, src, n);
67+
mem::memcpy(dest, src, n);
7668
}
69+
7770
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
78-
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) {
79-
memcpy(dest, src, n);
71+
#[linkage = "weak"]
72+
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, mut n: usize) {
73+
let mut dest = dest as *mut u32;
74+
let mut src = src as *mut u32;
75+
76+
while n >= 4 {
77+
ptr::write(dest, ptr::read(src));
78+
dest = dest.offset(1);
79+
src = src.offset(1);
80+
n -= 4;
81+
}
82+
83+
__aeabi_memcpy(dest as *mut u8, src as *const u8, n);
8084
}
85+
8186
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
87+
#[linkage = "weak"]
8288
pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) {
83-
memcpy(dest, src, n);
89+
__aeabi_memcpy4(dest, src, n);
8490
}
8591

8692
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
93+
#[linkage = "weak"]
8794
pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) {
88-
memmove(dest, src, n);
95+
mem::memmove(dest, src, n);
8996
}
97+
9098
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
99+
#[linkage = "weak"]
91100
pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) {
92-
memmove(dest, src, n);
101+
__aeabi_memmove(dest, src, n);
93102
}
103+
94104
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
105+
#[linkage = "weak"]
95106
pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) {
96-
memmove(dest, src, n);
107+
__aeabi_memmove(dest, src, n);
97108
}
98109

99110
// Note the different argument order
100111
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
112+
#[linkage = "weak"]
101113
pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) {
102-
memset(dest, c, n);
114+
mem::memset(dest, c, n);
103115
}
116+
104117
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
105-
pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) {
106-
memset(dest, c, n);
118+
#[linkage = "weak"]
119+
pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, mut n: usize, c: i32) {
120+
let mut dest = dest as *mut u32;
121+
122+
let byte = (c as u32) & 0xff;
123+
let c = (byte << 24) | (byte << 16) | (byte << 8) | byte;
124+
125+
while n >= 4 {
126+
ptr::write(dest, c);
127+
dest = dest.offset(1);
128+
n -= 4;
129+
}
130+
131+
__aeabi_memset(dest as *mut u8, n, byte as i32);
107132
}
133+
108134
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
135+
#[linkage = "weak"]
109136
pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) {
110-
memset(dest, c, n);
137+
__aeabi_memset4(dest, n, c);
111138
}
112139

113140
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
141+
#[linkage = "weak"]
114142
pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) {
115-
memset(dest, 0, n);
143+
__aeabi_memset(dest, n, 0);
116144
}
145+
117146
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
147+
#[linkage = "weak"]
118148
pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) {
119-
memset(dest, 0, n);
149+
__aeabi_memset4(dest, n, 0);
120150
}
151+
121152
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
153+
#[linkage = "weak"]
122154
pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) {
123-
memset(dest, 0, n);
155+
__aeabi_memset4(dest, n, 0);
124156
}

src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#![feature(i128_type)]
1717
#![feature(repr_simd)]
1818
#![feature(abi_unadjusted)]
19+
#![feature(linkage)]
1920
#![allow(unused_features)]
2021
#![no_builtins]
2122
#![unstable(feature = "compiler_builtins_lib",
@@ -45,7 +46,6 @@ mod macros;
4546
pub mod int;
4647
pub mod float;
4748

48-
#[cfg(feature = "mem")]
4949
pub mod mem;
5050

5151
#[cfg(target_arch = "arm")]

src/mem.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ type c_int = i16;
55
#[cfg(not(target_pointer_width = "16"))]
66
type c_int = i32;
77

8-
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
8+
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
99
pub unsafe extern "C" fn memcpy(dest: *mut u8,
1010
src: *const u8,
1111
n: usize)
@@ -18,7 +18,7 @@ pub unsafe extern "C" fn memcpy(dest: *mut u8,
1818
dest
1919
}
2020

21-
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
21+
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
2222
pub unsafe extern "C" fn memmove(dest: *mut u8,
2323
src: *const u8,
2424
n: usize)
@@ -41,7 +41,7 @@ pub unsafe extern "C" fn memmove(dest: *mut u8,
4141
dest
4242
}
4343

44-
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
44+
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
4545
pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
4646
let mut i = 0;
4747
while i < n {
@@ -51,7 +51,7 @@ pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
5151
s
5252
}
5353

54-
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
54+
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
5555
pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
5656
let mut i = 0;
5757
while i < n {

tests/aeabi_memclr.rs

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#![cfg(all(target_arch = "arm",
2+
not(any(target_env = "gnu", target_env = "musl")),
3+
target_os = "linux",
4+
feature = "mem"))]
5+
#![feature(compiler_builtins_lib)]
6+
#![no_std]
7+
8+
extern crate compiler_builtins;
9+
10+
// test runner
11+
extern crate utest_cortex_m_qemu;
12+
13+
// overrides `panic!`
14+
#[macro_use]
15+
extern crate utest_macros;
16+
17+
use core::mem;
18+
19+
macro_rules! panic {
20+
($($tt:tt)*) => {
21+
upanic!($($tt)*);
22+
};
23+
}
24+
25+
extern "C" {
26+
fn __aeabi_memclr4(dest: *mut u8, n: usize);
27+
fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
28+
}
29+
30+
struct Aligned {
31+
array: [u8; 8],
32+
_alignment: [u32; 0],
33+
}
34+
35+
impl Aligned {
36+
fn new() -> Self {
37+
Aligned {
38+
array: [0; 8],
39+
_alignment: [],
40+
}
41+
}
42+
}
43+
44+
#[test]
45+
fn memclr4() {
46+
let mut aligned = Aligned::new();;
47+
assert_eq!(mem::align_of_val(&aligned), 4);
48+
let xs = &mut aligned.array;
49+
50+
for n in 0..9 {
51+
unsafe {
52+
__aeabi_memset4(xs.as_mut_ptr(), n, 0xff);
53+
__aeabi_memclr4(xs.as_mut_ptr(), n);
54+
}
55+
56+
assert!(xs[0..n].iter().all(|x| *x == 0));
57+
}
58+
}

tests/aeabi_memcpy.rs

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#![cfg(all(target_arch = "arm",
2+
not(any(target_env = "gnu", target_env = "musl")),
3+
target_os = "linux",
4+
feature = "mem"))]
5+
#![feature(compiler_builtins_lib)]
6+
#![no_std]
7+
8+
extern crate compiler_builtins;
9+
10+
// test runner
11+
extern crate utest_cortex_m_qemu;
12+
13+
// overrides `panic!`
14+
#[macro_use]
15+
extern crate utest_macros;
16+
17+
macro_rules! panic {
18+
($($tt:tt)*) => {
19+
upanic!($($tt)*);
20+
};
21+
}
22+
23+
extern "C" {
24+
fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize);
25+
fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize);
26+
}
27+
28+
struct Aligned {
29+
array: [u8; 8],
30+
_alignment: [u32; 0],
31+
}
32+
33+
impl Aligned {
34+
fn new(array: [u8; 8]) -> Self {
35+
Aligned {
36+
array: array,
37+
_alignment: [],
38+
}
39+
}
40+
}
41+
42+
#[test]
43+
fn memcpy() {
44+
let mut dest = [0; 4];
45+
let src = [0xde, 0xad, 0xbe, 0xef];
46+
47+
for n in 0..dest.len() {
48+
dest.copy_from_slice(&[0; 4]);
49+
50+
unsafe { __aeabi_memcpy(dest.as_mut_ptr(), src.as_ptr(), n) }
51+
52+
assert_eq!(&dest[0..n], &src[0..n])
53+
}
54+
}
55+
56+
#[test]
57+
fn memcpy4() {
58+
let mut aligned = Aligned::new([0; 8]);
59+
let dest = &mut aligned.array;
60+
let src = [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xf0, 0x0d];
61+
62+
for n in 0..dest.len() {
63+
dest.copy_from_slice(&[0; 8]);
64+
65+
unsafe { __aeabi_memcpy4(dest.as_mut_ptr(), src.as_ptr(), n) }
66+
67+
assert_eq!(&dest[0..n], &src[0..n])
68+
}
69+
}

0 commit comments

Comments
 (0)