Skip to content

Commit 116d1a7

Browse files
committed
Auto merge of rust-lang#80824 - cuviper:heap-clones, r=kennytm
Try to avoid locals when cloning into Box/Rc/Arc For generic `T: Clone`, we can allocate an uninitialized box beforehand, which gives the optimizer a chance to create the clone directly in the heap. For `T: Copy`, we can go further and do a simple memory copy, regardless of optimization level. The same applies to `Rc`/`Arc::make_mut` when they must clone the data.
2 parents 9f3998b + 1f1a3b4 commit 116d1a7

File tree

4 files changed

+65
-23
lines changed

4 files changed

+65
-23
lines changed

library/alloc/src/alloc.rs

+23
Original file line numberDiff line numberDiff line change
@@ -397,3 +397,26 @@ pub mod __alloc_error_handler {
397397
unsafe { oom_impl(layout) }
398398
}
399399
}
400+
401+
/// Specialize clones into pre-allocated, uninitialized memory.
402+
/// Used by `Box::clone` and `Rc`/`Arc::make_mut`.
403+
pub(crate) trait WriteCloneIntoRaw: Sized {
404+
unsafe fn write_clone_into_raw(&self, target: *mut Self);
405+
}
406+
407+
impl<T: Clone> WriteCloneIntoRaw for T {
408+
#[inline]
409+
default unsafe fn write_clone_into_raw(&self, target: *mut Self) {
410+
// Having allocated *first* may allow the optimizer to create
411+
// the cloned value in-place, skipping the local and move.
412+
unsafe { target.write(self.clone()) };
413+
}
414+
}
415+
416+
impl<T: Copy> WriteCloneIntoRaw for T {
417+
#[inline]
418+
unsafe fn write_clone_into_raw(&self, target: *mut Self) {
419+
// We can always copy in-place, without ever involving a local value.
420+
unsafe { target.copy_from_nonoverlapping(self, 1) };
421+
}
422+
}

library/alloc/src/boxed.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ use core::pin::Pin;
151151
use core::ptr::{self, Unique};
152152
use core::task::{Context, Poll};
153153

154-
use crate::alloc::{handle_alloc_error, AllocError, Allocator, Global, Layout};
154+
use crate::alloc::{handle_alloc_error, AllocError, Allocator, Global, Layout, WriteCloneIntoRaw};
155155
use crate::borrow::Cow;
156156
use crate::raw_vec::RawVec;
157157
use crate::str::from_boxed_utf8_unchecked;
@@ -1014,10 +1014,14 @@ impl<T: Clone, A: Allocator + Clone> Clone for Box<T, A> {
10141014
/// // But they are unique objects
10151015
/// assert_ne!(&*x as *const i32, &*y as *const i32);
10161016
/// ```
1017-
#[rustfmt::skip]
10181017
#[inline]
10191018
fn clone(&self) -> Self {
1020-
Self::new_in((**self).clone(), self.1.clone())
1019+
// Pre-allocate memory to allow writing the cloned value directly.
1020+
let mut boxed = Self::new_uninit_in(self.1.clone());
1021+
unsafe {
1022+
(**self).write_clone_into_raw(boxed.as_mut_ptr());
1023+
boxed.assume_init()
1024+
}
10211025
}
10221026

10231027
/// Copies `source`'s contents into `self` without creating a new allocation.

library/alloc/src/rc.rs

+18-8
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,9 @@ use core::pin::Pin;
263263
use core::ptr::{self, NonNull};
264264
use core::slice::from_raw_parts_mut;
265265

266-
use crate::alloc::{box_free, handle_alloc_error, AllocError, Allocator, Global, Layout};
266+
use crate::alloc::{
267+
box_free, handle_alloc_error, AllocError, Allocator, Global, Layout, WriteCloneIntoRaw,
268+
};
267269
use crate::borrow::{Cow, ToOwned};
268270
use crate::string::String;
269271
use crate::vec::Vec;
@@ -1037,18 +1039,26 @@ impl<T: Clone> Rc<T> {
10371039
#[stable(feature = "rc_unique", since = "1.4.0")]
10381040
pub fn make_mut(this: &mut Self) -> &mut T {
10391041
if Rc::strong_count(this) != 1 {
1040-
// Gotta clone the data, there are other Rcs
1041-
*this = Rc::new((**this).clone())
1042+
// Gotta clone the data, there are other Rcs.
1043+
// Pre-allocate memory to allow writing the cloned value directly.
1044+
let mut rc = Self::new_uninit();
1045+
unsafe {
1046+
let data = Rc::get_mut_unchecked(&mut rc);
1047+
(**this).write_clone_into_raw(data.as_mut_ptr());
1048+
*this = rc.assume_init();
1049+
}
10421050
} else if Rc::weak_count(this) != 0 {
10431051
// Can just steal the data, all that's left is Weaks
1052+
let mut rc = Self::new_uninit();
10441053
unsafe {
1045-
let mut swap = Rc::new(ptr::read(&this.ptr.as_ref().value));
1046-
mem::swap(this, &mut swap);
1047-
swap.inner().dec_strong();
1054+
let data = Rc::get_mut_unchecked(&mut rc);
1055+
data.as_mut_ptr().copy_from_nonoverlapping(&**this, 1);
1056+
1057+
this.inner().dec_strong();
10481058
// Remove implicit strong-weak ref (no need to craft a fake
10491059
// Weak here -- we know other Weaks can clean up for us)
1050-
swap.inner().dec_weak();
1051-
forget(swap);
1060+
this.inner().dec_weak();
1061+
ptr::write(this, rc.assume_init());
10521062
}
10531063
}
10541064
// This unsafety is ok because we're guaranteed that the pointer

library/alloc/src/sync.rs

+17-12
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ use core::slice::from_raw_parts_mut;
2222
use core::sync::atomic;
2323
use core::sync::atomic::Ordering::{Acquire, Relaxed, Release, SeqCst};
2424

25-
use crate::alloc::{box_free, handle_alloc_error, AllocError, Allocator, Global, Layout};
25+
use crate::alloc::{
26+
box_free, handle_alloc_error, AllocError, Allocator, Global, Layout, WriteCloneIntoRaw,
27+
};
2628
use crate::borrow::{Cow, ToOwned};
2729
use crate::boxed::Box;
2830
use crate::rc::is_dangling;
@@ -1369,8 +1371,14 @@ impl<T: Clone> Arc<T> {
13691371
// weak count, there's no chance the ArcInner itself could be
13701372
// deallocated.
13711373
if this.inner().strong.compare_exchange(1, 0, Acquire, Relaxed).is_err() {
1372-
// Another strong pointer exists; clone
1373-
*this = Arc::new((**this).clone());
1374+
// Another strong pointer exists, so we must clone.
1375+
// Pre-allocate memory to allow writing the cloned value directly.
1376+
let mut arc = Self::new_uninit();
1377+
unsafe {
1378+
let data = Arc::get_mut_unchecked(&mut arc);
1379+
(**this).write_clone_into_raw(data.as_mut_ptr());
1380+
*this = arc.assume_init();
1381+
}
13741382
} else if this.inner().weak.load(Relaxed) != 1 {
13751383
// Relaxed suffices in the above because this is fundamentally an
13761384
// optimization: we are always racing with weak pointers being
@@ -1386,17 +1394,14 @@ impl<T: Clone> Arc<T> {
13861394

13871395
// Materialize our own implicit weak pointer, so that it can clean
13881396
// up the ArcInner as needed.
1389-
let weak = Weak { ptr: this.ptr };
1397+
let _weak = Weak { ptr: this.ptr };
13901398

1391-
// mark the data itself as already deallocated
1399+
// Can just steal the data, all that's left is Weaks
1400+
let mut arc = Self::new_uninit();
13921401
unsafe {
1393-
// there is no data race in the implicit write caused by `read`
1394-
// here (due to zeroing) because data is no longer accessed by
1395-
// other threads (due to there being no more strong refs at this
1396-
// point).
1397-
let mut swap = Arc::new(ptr::read(&weak.ptr.as_ref().data));
1398-
mem::swap(this, &mut swap);
1399-
mem::forget(swap);
1402+
let data = Arc::get_mut_unchecked(&mut arc);
1403+
data.as_mut_ptr().copy_from_nonoverlapping(&**this, 1);
1404+
ptr::write(this, arc.assume_init());
14001405
}
14011406
} else {
14021407
// We were the sole reference of either kind; bump back up the

0 commit comments

Comments
 (0)