Skip to content

Commit 41ccb13

Browse files
committed
Auto merge of rust-lang#122582 - scottmcm:swap-intrinsic-v2, r=oli-obk
Let codegen decide when to `mem::swap` with immediates Making `libcore` decide this is silly; the backend has so much better information about when it's a good idea. Thus this PR introduces a new `typed_swap` intrinsic with a fallback body, and replaces that fallback implementation when swapping immediates or scalar pairs. r? oli-obk Replaces rust-lang#111744, and means we'll never need more libs PRs like rust-lang#111803 or rust-lang#107140
2 parents 1c64346 + 0f7c3b3 commit 41ccb13

File tree

4 files changed

+44
-60
lines changed

4 files changed

+44
-60
lines changed

core/src/intrinsics.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
use crate::marker::DiscriminantKind;
6767
use crate::marker::Tuple;
6868
use crate::mem::align_of;
69+
use crate::ptr;
6970

7071
pub mod mir;
7172
pub mod simd;
@@ -2638,6 +2639,27 @@ pub const fn is_val_statically_known<T: Copy>(_arg: T) -> bool {
26382639
false
26392640
}
26402641

2642+
/// Non-overlapping *typed* swap of a single value.
2643+
///
2644+
/// The codegen backends will replace this with a better implementation when
2645+
/// `T` is a simple type that can be loaded and stored as an immediate.
2646+
///
2647+
/// The stabilized form of this intrinsic is [`crate::mem::swap`].
2648+
///
2649+
/// # Safety
2650+
///
2651+
/// `x` and `y` are readable and writable as `T`, and non-overlapping.
2652+
#[rustc_nounwind]
2653+
#[inline]
2654+
#[cfg_attr(not(bootstrap), rustc_intrinsic)]
2655+
// This has fallback `const fn` MIR, so shouldn't need stability, see #122652
2656+
#[rustc_const_unstable(feature = "const_typed_swap", issue = "none")]
2657+
pub const unsafe fn typed_swap<T>(x: *mut T, y: *mut T) {
2658+
// SAFETY: The caller provided single non-overlapping items behind
2659+
// pointers, so swapping them with `count: 1` is fine.
2660+
unsafe { ptr::swap_nonoverlapping(x, y, 1) };
2661+
}
2662+
26412663
/// Returns whether we should check for library UB. This evaluate to the value of `cfg!(debug_assertions)`
26422664
/// during monomorphization.
26432665
///

core/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@
170170
#![feature(const_try)]
171171
#![feature(const_type_id)]
172172
#![feature(const_type_name)]
173+
#![feature(const_typed_swap)]
173174
#![feature(const_unicode_case_lookup)]
174175
#![feature(const_unsafecell_get_mut)]
175176
#![feature(const_waker)]

core/src/mem/mod.rs

Lines changed: 3 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -726,63 +726,9 @@ pub unsafe fn uninitialized<T>() -> T {
726726
#[rustc_const_unstable(feature = "const_swap", issue = "83163")]
727727
#[rustc_diagnostic_item = "mem_swap"]
728728
pub const fn swap<T>(x: &mut T, y: &mut T) {
729-
// NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
730-
// reinterpretation of values as (chunkable) byte arrays, and the loop in the
731-
// block optimization in `swap_slice` is hard to rewrite back
732-
// into the (unoptimized) direct swapping implementation, so we disable it.
733-
#[cfg(not(any(target_arch = "spirv")))]
734-
{
735-
// For types that are larger multiples of their alignment, the simple way
736-
// tends to copy the whole thing to stack rather than doing it one part
737-
// at a time, so instead treat them as one-element slices and piggy-back
738-
// the slice optimizations that will split up the swaps.
739-
if const { size_of::<T>() / align_of::<T>() > 2 } {
740-
// SAFETY: exclusive references always point to one non-overlapping
741-
// element and are non-null and properly aligned.
742-
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };
743-
}
744-
}
745-
746-
// If a scalar consists of just a small number of alignment units, let
747-
// the codegen just swap those pieces directly, as it's likely just a
748-
// few instructions and anything else is probably overcomplicated.
749-
//
750-
// Most importantly, this covers primitives and simd types that tend to
751-
// have size=align where doing anything else can be a pessimization.
752-
// (This will also be used for ZSTs, though any solution works for them.)
753-
swap_simple(x, y);
754-
}
755-
756-
/// Same as [`swap`] semantically, but always uses the simple implementation.
757-
///
758-
/// Used elsewhere in `mem` and `ptr` at the bottom layer of calls.
759-
#[rustc_const_unstable(feature = "const_swap", issue = "83163")]
760-
#[inline]
761-
pub(crate) const fn swap_simple<T>(x: &mut T, y: &mut T) {
762-
// We arrange for this to typically be called with small types,
763-
// so this reads-and-writes approach is actually better than using
764-
// copy_nonoverlapping as it easily puts things in LLVM registers
765-
// directly and doesn't end up inlining allocas.
766-
// And LLVM actually optimizes it to 3×memcpy if called with
767-
// a type larger than it's willing to keep in a register.
768-
// Having typed reads and writes in MIR here is also good as
769-
// it lets Miri and CTFE understand them better, including things
770-
// like enforcing type validity for them.
771-
// Importantly, read+copy_nonoverlapping+write introduces confusing
772-
// asymmetry to the behaviour where one value went through read+write
773-
// whereas the other was copied over by the intrinsic (see #94371).
774-
// Furthermore, using only read+write here benefits limited backends
775-
// such as SPIR-V that work on an underlying *typed* view of memory,
776-
// and thus have trouble with Rust's untyped memory operations.
777-
778-
// SAFETY: exclusive references are always valid to read/write,
779-
// including being aligned, and nothing here panics so it's drop-safe.
780-
unsafe {
781-
let a = ptr::read(x);
782-
let b = ptr::read(y);
783-
ptr::write(x, b);
784-
ptr::write(y, a);
785-
}
729+
// SAFETY: `&mut` guarantees these are typed readable and writable
730+
// as well as non-overlapping.
731+
unsafe { intrinsics::typed_swap(x, y) }
786732
}
787733

788734
/// Replaces `dest` with the default value of `T`, returning the previous `dest` value.

core/src/ptr/mod.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,11 +1062,26 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
10621062
let mut i = 0;
10631063
while i < count {
10641064
// SAFETY: By precondition, `i` is in-bounds because it's below `n`
1065-
let x = unsafe { &mut *x.add(i) };
1065+
let x = unsafe { x.add(i) };
10661066
// SAFETY: By precondition, `i` is in-bounds because it's below `n`
10671067
// and it's distinct from `x` since the ranges are non-overlapping
1068-
let y = unsafe { &mut *y.add(i) };
1069-
mem::swap_simple::<MaybeUninit<T>>(x, y);
1068+
let y = unsafe { y.add(i) };
1069+
1070+
// If we end up here, it's because we're using a simple type -- like
1071+
// a small power-of-two-sized thing -- or a special type with particularly
1072+
// large alignment, particularly SIMD types.
1073+
// Thus we're fine just reading-and-writing it, as either it's small
1074+
// and that works well anyway or it's special and the type's author
1075+
// presumably wanted things to be done in the larger chunk.
1076+
1077+
// SAFETY: we're only ever given pointers that are valid to read/write,
1078+
// including being aligned, and nothing here panics so it's drop-safe.
1079+
unsafe {
1080+
let a: MaybeUninit<T> = read(x);
1081+
let b: MaybeUninit<T> = read(y);
1082+
write(x, b);
1083+
write(y, a);
1084+
}
10701085

10711086
i += 1;
10721087
}

0 commit comments

Comments
 (0)