Skip to content

Commit b0300b1

Browse files
committed
Auto merge of #164 - rust-lang-nursery:memclr, r=alexcrichton
optimize memset and memclr for ARM This commit optimizes those routines by rewriting them in assembly and performing the memory copying in 32-bit chunks, rather than in 8-bit chunks as it was done before this commit. This assembly implementation is compatible with the ARMv6 and ARMv7 architectures. This change results in a reduction of runtime of about 40-70% in all cases that matter (the compiler will never use these intrinsics for sizes smaller than 4 bytes). See data below: | Bytes | HEAD | this PR | diff | | ----- | ---- | ------- | ---------- | | 0 | 6 | 14 | +133.3333% | | 1 | 10 | 13 | +30% | | 2 | 14 | 13 | -7.1429% | | 3 | 18 | 13 | -27.77% | | 4 | 24 | 21 | -12.5% | | 16 | 70 | 36 | -48.5714% | | 64 | 263 | 97 | -63.1179% | | 256 | 1031 | 337 | -67.3133% | | 1024 | 4103 | 1297 | -68.389% | All times are in clock cycles. The measurements were done on a Cortex-M3 processor running at 8 MHz using the technique described [here]. [here]: http://blog.japaric.io/rtfm-overhead --- For relevance all pure Rust programs for Cortex-M microcontrollers use memclr to zero the .bss during startup so this change results in a quicker boot time. Some questions / comments: - ~~the original code (it had a bug) comes from this [repo] and it's licensed under the ICS license. I have preserved the copyright and license text in the source code. IANAL, is that OK?~~ no longer applies. The intrinsics are written in Rust now. - ~~I don't know whether this ARM implementation works for ARMv4 or ARMv5. @FenrirWolf and @Uvekilledkenny may want to take look at it first.~~ no longer applies. The intrinsics are written in Rust now. - ~~No idea whether this implementation works on processors that have no thumb instruction set. The current implementation uses 16-bit thumb instructions.~~ no longer applies. The intrinsics are written in Rust now. - ~~The loop code can be rewritten in less instructions but using 32-bit thumb instructions. That 32-bit version would only work on ARMv7 though. I have yet to check whether that makes any difference in the runtime of the intrinsic.~~ no longer applies. The intrinsics are written in Rust now. - ~~I'll look into memcpy4 next.~~ done [repo]: https://github.com/bobbl/libaeabi-cortexm0
2 parents c311dee + b8a6620 commit b0300b1

File tree

6 files changed

+476
-31
lines changed

6 files changed

+476
-31
lines changed

src/arm.rs

+70-26
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
use core::intrinsics;
1+
use core::{intrinsics, ptr};
22

3-
#[cfg(feature = "mem")]
4-
use mem::{memcpy, memmove, memset};
3+
use mem;
54

65
// NOTE This function and the ones below are implemented using assembly because they using a custom
76
// calling convention which can't be implemented using a normal Rust function
@@ -60,65 +59,110 @@ pub unsafe fn __aeabi_ldivmod() {
6059
intrinsics::unreachable();
6160
}
6261

63-
// TODO: These aeabi_* functions should be defined as aliases
64-
#[cfg(not(feature = "mem"))]
65-
extern "C" {
66-
fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
67-
fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
68-
fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8;
69-
}
70-
7162
// FIXME: The `*4` and `*8` variants should be defined as aliases.
7263

64+
#[cfg(not(target_os = "ios"))]
7365
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
66+
#[cfg_attr(thumb, linkage = "weak")]
7467
pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) {
75-
memcpy(dest, src, n);
68+
mem::memcpy(dest, src, n);
7669
}
70+
71+
#[cfg(not(target_os = "ios"))]
7772
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
78-
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) {
79-
memcpy(dest, src, n);
73+
#[cfg_attr(thumb, linkage = "weak")]
74+
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, mut n: usize) {
75+
let mut dest = dest as *mut u32;
76+
let mut src = src as *mut u32;
77+
78+
while n >= 4 {
79+
ptr::write(dest, ptr::read(src));
80+
dest = dest.offset(1);
81+
src = src.offset(1);
82+
n -= 4;
83+
}
84+
85+
__aeabi_memcpy(dest as *mut u8, src as *const u8, n);
8086
}
87+
88+
#[cfg(not(target_os = "ios"))]
8189
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
90+
#[cfg_attr(thumb, linkage = "weak")]
8291
pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) {
83-
memcpy(dest, src, n);
92+
__aeabi_memcpy4(dest, src, n);
8493
}
8594

95+
#[cfg(not(target_os = "ios"))]
8696
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
97+
#[cfg_attr(thumb, linkage = "weak")]
8798
pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) {
88-
memmove(dest, src, n);
99+
mem::memmove(dest, src, n);
89100
}
101+
102+
#[cfg(not(target_os = "ios"))]
90103
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
104+
#[cfg_attr(thumb, linkage = "weak")]
91105
pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) {
92-
memmove(dest, src, n);
106+
__aeabi_memmove(dest, src, n);
93107
}
108+
109+
#[cfg(not(target_os = "ios"))]
94110
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
111+
#[cfg_attr(thumb, linkage = "weak")]
95112
pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) {
96-
memmove(dest, src, n);
113+
__aeabi_memmove(dest, src, n);
97114
}
98115

99-
// Note the different argument order
116+
#[cfg(not(target_os = "ios"))]
100117
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
118+
#[cfg_attr(thumb, linkage = "weak")]
101119
pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) {
102-
memset(dest, c, n);
120+
// Note the different argument order
121+
mem::memset(dest, c, n);
103122
}
123+
124+
#[cfg(not(target_os = "ios"))]
104125
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
105-
pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) {
106-
memset(dest, c, n);
126+
#[cfg_attr(thumb, linkage = "weak")]
127+
pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, mut n: usize, c: i32) {
128+
let mut dest = dest as *mut u32;
129+
130+
let byte = (c as u32) & 0xff;
131+
let c = (byte << 24) | (byte << 16) | (byte << 8) | byte;
132+
133+
while n >= 4 {
134+
ptr::write(dest, c);
135+
dest = dest.offset(1);
136+
n -= 4;
137+
}
138+
139+
__aeabi_memset(dest as *mut u8, n, byte as i32);
107140
}
141+
142+
#[cfg(not(target_os = "ios"))]
108143
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
144+
#[cfg_attr(thumb, linkage = "weak")]
109145
pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) {
110-
memset(dest, c, n);
146+
__aeabi_memset4(dest, n, c);
111147
}
112148

149+
#[cfg(not(target_os = "ios"))]
113150
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
151+
#[cfg_attr(thumb, linkage = "weak")]
114152
pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) {
115-
memset(dest, 0, n);
153+
__aeabi_memset(dest, n, 0);
116154
}
155+
156+
#[cfg(not(target_os = "ios"))]
117157
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
158+
#[cfg_attr(thumb, linkage = "weak")]
118159
pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) {
119-
memset(dest, 0, n);
160+
__aeabi_memset4(dest, n, 0);
120161
}
162+
163+
#[cfg(not(target_os = "ios"))]
121164
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
165+
#[cfg_attr(thumb, linkage = "weak")]
122166
pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) {
123-
memset(dest, 0, n);
167+
__aeabi_memset4(dest, n, 0);
124168
}

src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#![feature(i128_type)]
1717
#![feature(repr_simd)]
1818
#![feature(abi_unadjusted)]
19+
#![feature(linkage)]
1920
#![allow(unused_features)]
2021
#![no_builtins]
2122
#![unstable(feature = "compiler_builtins_lib",
@@ -45,7 +46,6 @@ mod macros;
4546
pub mod int;
4647
pub mod float;
4748

48-
#[cfg(feature = "mem")]
4949
pub mod mem;
5050

5151
#[cfg(target_arch = "arm")]

src/mem.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ type c_int = i16;
55
#[cfg(not(target_pointer_width = "16"))]
66
type c_int = i32;
77

8-
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
8+
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
99
pub unsafe extern "C" fn memcpy(dest: *mut u8,
1010
src: *const u8,
1111
n: usize)
@@ -18,7 +18,7 @@ pub unsafe extern "C" fn memcpy(dest: *mut u8,
1818
dest
1919
}
2020

21-
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
21+
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
2222
pub unsafe extern "C" fn memmove(dest: *mut u8,
2323
src: *const u8,
2424
n: usize)
@@ -41,7 +41,7 @@ pub unsafe extern "C" fn memmove(dest: *mut u8,
4141
dest
4242
}
4343

44-
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
44+
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
4545
pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
4646
let mut i = 0;
4747
while i < n {
@@ -51,7 +51,7 @@ pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
5151
s
5252
}
5353

54-
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
54+
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
5555
pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
5656
let mut i = 0;
5757
while i < n {

tests/aeabi_memclr.rs

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#![cfg(all(target_arch = "arm",
2+
not(any(target_env = "gnu", target_env = "musl")),
3+
target_os = "linux",
4+
feature = "mem"))]
5+
#![feature(compiler_builtins_lib)]
6+
#![no_std]
7+
8+
extern crate compiler_builtins;
9+
10+
// test runner
11+
extern crate utest_cortex_m_qemu;
12+
13+
// overrides `panic!`
14+
#[macro_use]
15+
extern crate utest_macros;
16+
17+
use core::mem;
18+
19+
macro_rules! panic {
20+
($($tt:tt)*) => {
21+
upanic!($($tt)*);
22+
};
23+
}
24+
25+
extern "C" {
26+
fn __aeabi_memclr4(dest: *mut u8, n: usize);
27+
fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
28+
}
29+
30+
struct Aligned {
31+
array: [u8; 8],
32+
_alignment: [u32; 0],
33+
}
34+
35+
impl Aligned {
36+
fn new() -> Self {
37+
Aligned {
38+
array: [0; 8],
39+
_alignment: [],
40+
}
41+
}
42+
}
43+
44+
#[test]
45+
fn memclr4() {
46+
let mut aligned = Aligned::new();;
47+
assert_eq!(mem::align_of_val(&aligned), 4);
48+
let xs = &mut aligned.array;
49+
50+
for n in 0..9 {
51+
unsafe {
52+
__aeabi_memset4(xs.as_mut_ptr(), n, 0xff);
53+
__aeabi_memclr4(xs.as_mut_ptr(), n);
54+
}
55+
56+
assert!(xs[0..n].iter().all(|x| *x == 0));
57+
}
58+
}

tests/aeabi_memcpy.rs

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#![cfg(all(target_arch = "arm",
2+
not(any(target_env = "gnu", target_env = "musl")),
3+
target_os = "linux",
4+
feature = "mem"))]
5+
#![feature(compiler_builtins_lib)]
6+
#![no_std]
7+
8+
extern crate compiler_builtins;
9+
10+
// test runner
11+
extern crate utest_cortex_m_qemu;
12+
13+
// overrides `panic!`
14+
#[macro_use]
15+
extern crate utest_macros;
16+
17+
macro_rules! panic {
18+
($($tt:tt)*) => {
19+
upanic!($($tt)*);
20+
};
21+
}
22+
23+
extern "C" {
24+
fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize);
25+
fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize);
26+
}
27+
28+
struct Aligned {
29+
array: [u8; 8],
30+
_alignment: [u32; 0],
31+
}
32+
33+
impl Aligned {
34+
fn new(array: [u8; 8]) -> Self {
35+
Aligned {
36+
array: array,
37+
_alignment: [],
38+
}
39+
}
40+
}
41+
42+
#[test]
43+
fn memcpy() {
44+
let mut dest = [0; 4];
45+
let src = [0xde, 0xad, 0xbe, 0xef];
46+
47+
for n in 0..dest.len() {
48+
dest.copy_from_slice(&[0; 4]);
49+
50+
unsafe { __aeabi_memcpy(dest.as_mut_ptr(), src.as_ptr(), n) }
51+
52+
assert_eq!(&dest[0..n], &src[0..n])
53+
}
54+
}
55+
56+
#[test]
57+
fn memcpy4() {
58+
let mut aligned = Aligned::new([0; 8]);
59+
let dest = &mut aligned.array;
60+
let src = [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xf0, 0x0d];
61+
62+
for n in 0..dest.len() {
63+
dest.copy_from_slice(&[0; 8]);
64+
65+
unsafe { __aeabi_memcpy4(dest.as_mut_ptr(), src.as_ptr(), n) }
66+
67+
assert_eq!(&dest[0..n], &src[0..n])
68+
}
69+
}

0 commit comments

Comments
 (0)