Skip to content

Commit c1824d1

Browse files
committed
Auto merge of rust-lang#118310 - scottmcm:three-way-compare, r=davidtwco
Add `Ord::cmp` for primitives as a `BinOp` in MIR Update: most of this OP was written months ago. See rust-lang#118310 (comment) below for where we got to recently that made it ready for review. --- There are dozens of reasonable ways to implement `Ord::cmp` for integers using comparison, bit-ops, and branches. Those differences are irrelevant at the rust level, however, so we can make things better by adding `BinOp::Cmp` at the MIR level: 1. Exactly how to implement it is left up to the backends, so LLVM can use whatever pattern its optimizer best recognizes and cranelift can use whichever pattern codegens the fastest. 2. By not inlining those details for every use of `cmp`, we drastically reduce the amount of MIR generated for `derive`d `PartialOrd`, while also making it more amenable to MIR-level optimizations. Having extremely careful `if` ordering to μoptimize resource usage on broadwell (rust-lang#63767) is great, but it really feels to me like libcore is the wrong place to put that logic. Similarly, using subtraction [tricks](https://graphics.stanford.edu/~seander/bithacks.html#CopyIntegerSign) (rust-lang#105840) is arguably even nicer, but depends on the optimizer understanding it (llvm/llvm-project#73417) to be practical. Or maybe [bitor is better than add](https://discourse.llvm.org/t/representing-in-ir/67369/2?u=scottmcm)? But maybe only on a future version that [has `or disjoint` support](https://discourse.llvm.org/t/rfc-add-or-disjoint-flag/75036?u=scottmcm)? And just because one of those forms happens to be good for LLVM, there's no guarantee that it'd be the same form that GCC or Cranelift would rather see -- especially given their very different optimizers. Not to mention that if LLVM gets a spaceship intrinsic -- [which it should](https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/Suboptimal.20inlining.20in.20std.20function.20.60binary_search.60/near/404250586) -- we'll need at least a rustc intrinsic to be able to call it. As for simplifying it in Rust, we now regularly inline `{integer}::partial_cmp`, but it's quite a large amount of IR. The best way to see that is with rust-lang@8811efa#diff-d134c32d028fbe2bf835fef2df9aca9d13332dd82284ff21ee7ebf717bfa4765R113 -- I added a new pre-codegen MIR test for a simple 3-tuple struct, and this PR change it from 36 locals and 26 basic blocks down to 24 locals and 8 basic blocks. Even better, as soon as the construct-`Some`-then-match-it-in-same-BB noise is cleaned up, this'll expose the `Cmp == 0` branches clearly in MIR, so that an InstCombine (rust-lang#105808) can simplify that to just a `BinOp::Eq` and thus fix some of our generated code perf issues. (Tracking that through today's `if a < b { Less } else if a == b { Equal } else { Greater }` would be *much* harder.) --- r? `@ghost` But first I should check that perf is ok with this ~~...and my true nemesis, tidy.~~
2 parents 697f9ec + 6bbd18a commit c1824d1

File tree

4 files changed

+64
-6
lines changed

4 files changed

+64
-6
lines changed

core/src/cmp.rs

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,10 @@ pub struct AssertParamIsEq<T: Eq + ?Sized> {
376376
/// ```
377377
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
378378
#[stable(feature = "rust1", since = "1.0.0")]
379+
// This is a lang item only so that `BinOp::Cmp` in MIR can return it.
380+
// It has no special behaviour, but does require that the three variants
381+
// `Less`/`Equal`/`Greater` remain `-1_i8`/`0_i8`/`+1_i8` respectively.
382+
#[cfg_attr(not(bootstrap), lang = "Ordering")]
379383
#[repr(i8)]
380384
pub enum Ordering {
381385
/// An ordering where a compared value is less than another.
@@ -1554,7 +1558,14 @@ mod impls {
15541558
impl PartialOrd for $t {
15551559
#[inline]
15561560
fn partial_cmp(&self, other: &$t) -> Option<Ordering> {
1557-
Some(self.cmp(other))
1561+
#[cfg(bootstrap)]
1562+
{
1563+
Some(self.cmp(other))
1564+
}
1565+
#[cfg(not(bootstrap))]
1566+
{
1567+
Some(crate::intrinsics::three_way_compare(*self, *other))
1568+
}
15581569
}
15591570
#[inline(always)]
15601571
fn lt(&self, other: &$t) -> bool { (*self) < (*other) }
@@ -1570,11 +1581,18 @@ mod impls {
15701581
impl Ord for $t {
15711582
#[inline]
15721583
fn cmp(&self, other: &$t) -> Ordering {
1573-
// The order here is important to generate more optimal assembly.
1574-
// See <https://github.com/rust-lang/rust/issues/63758> for more info.
1575-
if *self < *other { Less }
1576-
else if *self == *other { Equal }
1577-
else { Greater }
1584+
#[cfg(bootstrap)]
1585+
{
1586+
// The order here is important to generate more optimal assembly.
1587+
// See <https://github.com/rust-lang/rust/issues/63758> for more info.
1588+
if *self < *other { Less }
1589+
else if *self == *other { Equal }
1590+
else { Greater }
1591+
}
1592+
#[cfg(not(bootstrap))]
1593+
{
1594+
crate::intrinsics::three_way_compare(*self, *other)
1595+
}
15781596
}
15791597
}
15801598
)*)

core/src/intrinsics.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2146,6 +2146,18 @@ extern "rust-intrinsic" {
21462146
#[rustc_nounwind]
21472147
pub fn bitreverse<T: Copy>(x: T) -> T;
21482148

2149+
/// Does a three-way comparison between the two integer arguments.
2150+
///
2151+
/// This is included as an intrinsic as it's useful to let it be one thing
2152+
/// in MIR, rather than the multiple checks and switches that make its IR
2153+
/// large and difficult to optimize.
2154+
///
2155+
/// The stabilized version of this intrinsic is [`Ord::cmp`].
2156+
#[cfg(not(bootstrap))]
2157+
#[rustc_const_unstable(feature = "const_three_way_compare", issue = "none")]
2158+
#[rustc_safe_intrinsic]
2159+
pub fn three_way_compare<T: Copy>(lhs: T, rhs: T) -> crate::cmp::Ordering;
2160+
21492161
/// Performs checked integer addition.
21502162
///
21512163
/// Note that, unlike most intrinsics, this is safe to call;

core/tests/intrinsics.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,30 @@ fn test_const_deallocate_at_runtime() {
9999
const_deallocate(core::ptr::null_mut(), 1, 1); // nop
100100
}
101101
}
102+
103+
#[cfg(not(bootstrap))]
104+
#[test]
105+
fn test_three_way_compare_in_const_contexts() {
106+
use core::cmp::Ordering::{self, *};
107+
use core::intrinsics::three_way_compare;
108+
109+
const UNSIGNED_LESS: Ordering = three_way_compare(123_u16, 456);
110+
const UNSIGNED_EQUAL: Ordering = three_way_compare(456_u16, 456);
111+
const UNSIGNED_GREATER: Ordering = three_way_compare(789_u16, 456);
112+
const CHAR_LESS: Ordering = three_way_compare('A', 'B');
113+
const CHAR_EQUAL: Ordering = three_way_compare('B', 'B');
114+
const CHAR_GREATER: Ordering = three_way_compare('C', 'B');
115+
const SIGNED_LESS: Ordering = three_way_compare(123_i64, 456);
116+
const SIGNED_EQUAL: Ordering = three_way_compare(456_i64, 456);
117+
const SIGNED_GREATER: Ordering = three_way_compare(789_i64, 456);
118+
119+
assert_eq!(UNSIGNED_LESS, Less);
120+
assert_eq!(UNSIGNED_EQUAL, Equal);
121+
assert_eq!(UNSIGNED_GREATER, Greater);
122+
assert_eq!(CHAR_LESS, Less);
123+
assert_eq!(CHAR_EQUAL, Equal);
124+
assert_eq!(CHAR_GREATER, Greater);
125+
assert_eq!(SIGNED_LESS, Less);
126+
assert_eq!(SIGNED_EQUAL, Equal);
127+
assert_eq!(SIGNED_GREATER, Greater);
128+
}

core/tests/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#![feature(const_pointer_is_aligned)]
2323
#![feature(const_ptr_as_ref)]
2424
#![feature(const_ptr_write)]
25+
#![cfg_attr(not(bootstrap), feature(const_three_way_compare))]
2526
#![feature(const_trait_impl)]
2627
#![feature(const_likely)]
2728
#![feature(const_location_fields)]

0 commit comments

Comments
 (0)