Skip to content

Commit 90ff6f5

Browse files
committed
Implement word-sized copy
1 parent 3b050d2 commit 90ff6f5

File tree

1 file changed

+200
-16
lines changed

1 file changed

+200
-16
lines changed

src/mem/impls.rs

+200-16
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,211 @@
1+
use core::intrinsics::likely;
2+
3+
const WORD_SIZE: usize = core::mem::size_of::<usize>();
4+
const WORD_MASK: usize = WORD_SIZE - 1;
5+
6+
const WORD_COPY_THRESHOLD: usize = 2 * WORD_SIZE;
7+
8+
#[inline(always)]
9+
unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
10+
let dest_end = dest.add(n);
11+
while dest < dest_end {
12+
*dest = *src;
13+
dest = dest.add(1);
14+
src = src.add(1);
15+
}
16+
}
17+
118
#[inline(always)]
2-
pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, n: usize) {
3-
let mut i = 0;
4-
while i < n {
5-
*dest.add(i) = *src.add(i);
6-
i += 1;
19+
unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
20+
let mut dest_usize = dest as *mut usize;
21+
let mut src_usize = src as *mut usize;
22+
let dest_end = dest.add(n) as *mut usize;
23+
24+
while dest_usize < dest_end {
25+
*dest_usize = *src_usize;
26+
dest_usize = dest_usize.add(1);
27+
src_usize = src_usize.add(1);
728
}
829
}
930

1031
#[inline(always)]
11-
pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, n: usize) {
12-
// copy from end
13-
let mut i = n;
14-
while i != 0 {
15-
i -= 1;
16-
*dest.add(i) = *src.add(i);
32+
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
33+
let mut dest_usize = dest as *mut usize;
34+
let dest_end = dest.add(n) as *mut usize;
35+
36+
// Calculate the misalignment offset and shift needed to reassemble value.
37+
let offset = src as usize & WORD_MASK;
38+
let shift = offset * 8;
39+
40+
// Realign src
41+
let mut src_aligned = (src as usize &! WORD_MASK) as *mut usize;
42+
// XXX: Could this possibly be UB?
43+
let mut prev_word = *src_aligned;
44+
45+
while dest_usize < dest_end {
46+
src_aligned = src_aligned.add(1);
47+
let cur_word = *src_aligned;
48+
let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift);
49+
prev_word = cur_word;
50+
51+
*dest_usize = resembled;
52+
dest_usize = dest_usize.add(1);
53+
}
54+
}
55+
56+
#[inline(always)]
57+
pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) {
58+
if likely(n >= WORD_COPY_THRESHOLD) {
59+
// Align dest
60+
// Because of n >= 2 * WORD_SIZE, dst_misalignment < n
61+
let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK;
62+
copy_forward_bytes(dest, src, dest_misalignment);
63+
dest = dest.add(dest_misalignment);
64+
src = src.add(dest_misalignment);
65+
n -= dest_misalignment;
66+
67+
let n_words = n & !WORD_MASK;
68+
let src_misalignment = src as usize & WORD_MASK;
69+
if likely(src_misalignment == 0) {
70+
copy_forward_aligned_words(dest, src, n_words);
71+
} else {
72+
copy_forward_misaligned_words(dest, src, n_words);
73+
}
74+
dest = dest.add(n_words);
75+
src = src.add(n_words);
76+
n -= n_words;
77+
}
78+
copy_forward_bytes(dest, src, n);
79+
}
80+
81+
// The following backward copy helper functions except the public-facing copy_backward
82+
// uses the pointers past the end as their inputs instead of pointers to the start!
83+
84+
#[inline(always)]
85+
unsafe fn copy_backward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
86+
let dest_start = dest.sub(n);
87+
while dest_start < dest {
88+
dest = dest.sub(1);
89+
src = src.sub(1);
90+
*dest = *src;
91+
}
92+
}
93+
94+
#[inline(always)]
95+
unsafe fn copy_backward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
96+
let mut dest_usize = dest as *mut usize;
97+
let mut src_usize = src as *mut usize;
98+
let dest_start = dest.sub(n) as *mut usize;
99+
100+
while dest_start < dest_usize {
101+
dest_usize = dest_usize.sub(1);
102+
src_usize = src_usize.sub(1);
103+
*dest_usize = *src_usize;
17104
}
18105
}
19106

20107
#[inline(always)]
21-
pub unsafe fn set_bytes(s: *mut u8, c: u8, n: usize) {
22-
let mut i = 0;
23-
while i < n {
24-
*s.add(i) = c;
25-
i += 1;
108+
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
109+
let mut dest_usize = dest as *mut usize;
110+
let dest_start = dest.sub(n) as *mut usize;
111+
112+
// Calculate the misalignment offset and shift needed to reassemble value.
113+
let offset = src as usize & WORD_MASK;
114+
let shift = offset * 8;
115+
116+
// Realign src_aligned
117+
let mut src_aligned = (src as usize &! WORD_MASK) as *mut usize;
118+
// XXX: Could this possibly be UB?
119+
let mut prev_word = *src_aligned;
120+
121+
while dest_start < dest_usize {
122+
src_aligned = src_aligned.sub(1);
123+
let cur_word = *src_aligned;
124+
let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift;
125+
prev_word = cur_word;
126+
127+
dest_usize = dest_usize.sub(1);
128+
*dest_usize = resembled;
129+
}
130+
}
131+
132+
#[inline(always)]
133+
pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
134+
let mut dest = dest.add(n);
135+
let mut src = src.add(n);
136+
137+
if likely(n >= WORD_COPY_THRESHOLD) {
138+
// Align dest
139+
// Because of n >= 2 * WORD_SIZE, dst_misalignment < n
140+
let dest_misalignment = dest as usize & WORD_MASK;
141+
copy_backward_bytes(dest, src, dest_misalignment);
142+
dest = dest.sub(dest_misalignment);
143+
src = src.sub(dest_misalignment);
144+
n -= dest_misalignment;
145+
146+
let n_words = n & !WORD_MASK;
147+
let src_misalignment = src as usize & WORD_MASK;
148+
if likely(src_misalignment == 0) {
149+
copy_backward_aligned_words(dest, src, n_words);
150+
} else {
151+
copy_backward_misaligned_words(dest, src, n_words);
152+
}
153+
dest = dest.sub(n_words);
154+
src = src.sub(n_words);
155+
n -= n_words;
156+
}
157+
copy_backward_bytes(dest, src, n);
158+
}
159+
160+
#[inline(always)]
161+
pub unsafe fn set_bytes_bytes(mut s: *mut u8, c: u8, n: usize) {
162+
let end = s.add(n);
163+
while s < end {
164+
*s = c;
165+
s = s.add(1);
166+
}
167+
}
168+
169+
#[inline(always)]
170+
pub unsafe fn set_bytes_words(s: *mut u8, c: u8, n: usize) {
171+
let mut broadcast = c as usize;
172+
broadcast |= broadcast << 8;
173+
#[cfg(not(target_pointer_width = "16"))]
174+
{
175+
broadcast |= broadcast << 16;
176+
#[cfg(not(target_pointer_width = "32"))]
177+
{
178+
broadcast |= broadcast << 32;
179+
#[cfg(not(target_pointer_width = "64"))]
180+
{
181+
broadcast |= broadcast << 64;
182+
}
183+
}
184+
}
185+
186+
let mut s_usize = s as *mut usize;
187+
let end = s.add(n) as *mut usize;
188+
189+
while s_usize < end {
190+
*s_usize = broadcast;
191+
s_usize = s_usize.add(1);
192+
}
193+
}
194+
195+
#[inline(always)]
196+
pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) {
197+
if likely(n >= WORD_COPY_THRESHOLD) {
198+
// Bec// Align dest
199+
// Because of n >= 2 * WORD_SIZE, dst_misalignment < n
200+
let misalignment = (s as usize).wrapping_neg() & WORD_MASK;
201+
set_bytes_bytes(s, c, misalignment);
202+
s = s.add(misalignment);
203+
n -= misalignment;
204+
205+
let n_words = n & !WORD_MASK;
206+
set_bytes_words(s, c, n_words);
207+
s = s.add(n_words);
208+
n -= n_words;
26209
}
210+
set_bytes_bytes(s, c, n);
27211
}

0 commit comments

Comments
 (0)