Skip to content

Commit ee86f96

Browse files
committed
Auto merge of #85828 - scottmcm:raw-eq, r=oli-obk
Stop generating `alloca`s & `memcmp` for simple short array equality Example: ```rust pub fn demo(x: [u16; 6], y: [u16; 6]) -> bool { x == y } ``` Before: ```llvm define zeroext i1 `@_ZN10playground4demo17h48537f7eac23948fE(i96` %0, i96 %1) unnamed_addr #0 { start: %y = alloca [6 x i16], align 8 %x = alloca [6 x i16], align 8 %.0..sroa_cast = bitcast [6 x i16]* %x to i96* store i96 %0, i96* %.0..sroa_cast, align 8 %.0..sroa_cast3 = bitcast [6 x i16]* %y to i96* store i96 %1, i96* %.0..sroa_cast3, align 8 %_11.i.i.i = bitcast [6 x i16]* %x to i8* %_14.i.i.i = bitcast [6 x i16]* %y to i8* %bcmp.i.i.i = call i32 `@bcmp(i8*` nonnull dereferenceable(12) %_11.i.i.i, i8* nonnull dereferenceable(12) %_14.i.i.i, i64 12) #2, !alias.scope !2 %2 = icmp eq i32 %bcmp.i.i.i, 0 ret i1 %2 } ``` ```x86 playground::demo: # `@playground::demo` sub rsp, 32 mov qword ptr [rsp], rdi mov dword ptr [rsp + 8], esi mov qword ptr [rsp + 16], rdx mov dword ptr [rsp + 24], ecx xor rdi, rdx xor esi, ecx or rsi, rdi sete al add rsp, 32 ret ``` After: ```llvm define zeroext i1 `@_ZN4mini4demo17h7a8994aaa314c981E(i96` %0, i96 %1) unnamed_addr #0 { start: %2 = icmp eq i96 %0, %1 ret i1 %2 } ``` ```x86 _ZN4mini4demo17h7a8994aaa314c981E: xor rcx, r8 xor edx, r9d or rdx, rcx sete al ret ```
2 parents 95fb131 + d064494 commit ee86f96

File tree

15 files changed

+410
-114
lines changed

15 files changed

+410
-114
lines changed

Diff for: compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs

+34
Original file line numberDiff line numberDiff line change
@@ -1115,6 +1115,40 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
11151115
);
11161116
ret.write_cvalue(fx, CValue::by_val(res, ret.layout()));
11171117
};
1118+
1119+
raw_eq, <T>(v lhs_ref, v rhs_ref) {
1120+
fn type_by_size(size: Size) -> Option<Type> {
1121+
Type::int(size.bits().try_into().ok()?)
1122+
}
1123+
1124+
let size = fx.layout_of(T).layout.size;
1125+
let is_eq_value =
1126+
if size == Size::ZERO {
1127+
// No bytes means they're trivially equal
1128+
fx.bcx.ins().iconst(types::I8, 1)
1129+
} else if let Some(clty) = type_by_size(size) {
1130+
// Can't use `trusted` for these loads; they could be unaligned.
1131+
let mut flags = MemFlags::new();
1132+
flags.set_notrap();
1133+
let lhs_val = fx.bcx.ins().load(clty, flags, lhs_ref, 0);
1134+
let rhs_val = fx.bcx.ins().load(clty, flags, rhs_ref, 0);
1135+
let eq = fx.bcx.ins().icmp(IntCC::Equal, lhs_val, rhs_val);
1136+
fx.bcx.ins().bint(types::I8, eq)
1137+
} else {
1138+
// Just call `memcmp` (like slices do in core) when the
1139+
// size is too large or it's not a power-of-two.
1140+
let ptr_ty = pointer_ty(fx.tcx);
1141+
let signed_bytes = i64::try_from(size.bytes()).unwrap();
1142+
let bytes_val = fx.bcx.ins().iconst(ptr_ty, signed_bytes);
1143+
let params = vec![AbiParam::new(ptr_ty); 3];
1144+
let returns = vec![AbiParam::new(types::I32)];
1145+
let args = &[lhs_ref, rhs_ref, bytes_val];
1146+
let cmp = fx.lib_call("memcmp", params, returns, args)[0];
1147+
let eq = fx.bcx.ins().icmp_imm(IntCC::Equal, cmp, 0);
1148+
fx.bcx.ins().bint(types::I8, eq)
1149+
};
1150+
ret.write_cvalue(fx, CValue::by_val(is_eq_value, ret.layout()));
1151+
};
11181152
}
11191153

11201154
if let Some((_, dest)) = destination {

Diff for: compiler/rustc_codegen_cranelift/src/value_and_place.rs

+4
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,10 @@ impl<'tcx> CPlace<'tcx> {
453453
ptr.store(fx, data, MemFlags::trusted());
454454
ptr.load(fx, dst_ty, MemFlags::trusted())
455455
}
456+
457+
// `CValue`s should never contain SSA-only types, so if you ended
458+
// up here having seen an error like `B1 -> I8`, then before
459+
// calling `write_cvalue` you need to add a `bint` instruction.
456460
_ => unreachable!("write_cvalue_transmute: {:?} -> {:?}", src_ty, dst_ty),
457461
};
458462
//fx.bcx.set_val_label(data, cranelift_codegen::ir::ValueLabel::new(var.index()));

Diff for: compiler/rustc_codegen_llvm/src/context.rs

+5
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ impl CodegenCx<'b, 'tcx> {
500500
let t_i32 = self.type_i32();
501501
let t_i64 = self.type_i64();
502502
let t_i128 = self.type_i128();
503+
let t_isize = self.type_isize();
503504
let t_f32 = self.type_f32();
504505
let t_f64 = self.type_f64();
505506

@@ -712,6 +713,10 @@ impl CodegenCx<'b, 'tcx> {
712713
ifn!("llvm.assume", fn(i1) -> void);
713714
ifn!("llvm.prefetch", fn(i8p, t_i32, t_i32, t_i32) -> void);
714715

716+
// This isn't an "LLVM intrinsic", but LLVM's optimization passes
717+
// recognize it like one and we assume it exists in `core::slice::cmp`
718+
ifn!("memcmp", fn(i8p, i8p, t_isize) -> t_i32);
719+
715720
// variadic intrinsics
716721
ifn!("llvm.va_start", fn(i8p) -> void);
717722
ifn!("llvm.va_end", fn(i8p) -> void);

Diff for: compiler/rustc_codegen_llvm/src/intrinsic.rs

+38
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,44 @@ impl IntrinsicCallMethods<'tcx> for Builder<'a, 'll, 'tcx> {
296296
}
297297
}
298298

299+
sym::raw_eq => {
300+
use abi::Abi::*;
301+
let tp_ty = substs.type_at(0);
302+
let layout = self.layout_of(tp_ty).layout;
303+
let use_integer_compare = match layout.abi {
304+
Scalar(_) | ScalarPair(_, _) => true,
305+
Uninhabited | Vector { .. } => false,
306+
Aggregate { .. } => {
307+
// For rusty ABIs, small aggregates are actually passed
308+
// as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
309+
// so we re-use that same threshold here.
310+
layout.size <= self.data_layout().pointer_size * 2
311+
}
312+
};
313+
314+
let a = args[0].immediate();
315+
let b = args[1].immediate();
316+
if layout.size.bytes() == 0 {
317+
self.const_bool(true)
318+
} else if use_integer_compare {
319+
let integer_ty = self.type_ix(layout.size.bits());
320+
let ptr_ty = self.type_ptr_to(integer_ty);
321+
let a_ptr = self.bitcast(a, ptr_ty);
322+
let a_val = self.load(a_ptr, layout.align.abi);
323+
let b_ptr = self.bitcast(b, ptr_ty);
324+
let b_val = self.load(b_ptr, layout.align.abi);
325+
self.icmp(IntPredicate::IntEQ, a_val, b_val)
326+
} else {
327+
let i8p_ty = self.type_i8p();
328+
let a_ptr = self.bitcast(a, i8p_ty);
329+
let b_ptr = self.bitcast(b, i8p_ty);
330+
let n = self.const_usize(layout.size.bytes());
331+
let llfn = self.get_intrinsic("memcmp");
332+
let cmp = self.call(llfn, &[a_ptr, b_ptr, n], None);
333+
self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0))
334+
}
335+
}
336+
299337
_ if name_str.starts_with("simd_") => {
300338
match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) {
301339
Ok(llval) => llval,

Diff for: compiler/rustc_mir/src/interpret/intrinsics.rs

+19
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,10 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
472472
throw_ub_format!("`assume` intrinsic called with `false`");
473473
}
474474
}
475+
sym::raw_eq => {
476+
let result = self.raw_eq_intrinsic(&args[0], &args[1])?;
477+
self.write_scalar(result, dest)?;
478+
}
475479
_ => return Ok(false),
476480
}
477481

@@ -559,4 +563,19 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
559563

560564
self.memory.copy(src, align, dst, align, size, nonoverlapping)
561565
}
566+
567+
pub(crate) fn raw_eq_intrinsic(
568+
&mut self,
569+
lhs: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::PointerTag>,
570+
rhs: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::PointerTag>,
571+
) -> InterpResult<'tcx, Scalar<M::PointerTag>> {
572+
let layout = self.layout_of(lhs.layout.ty.builtin_deref(true).unwrap().ty)?;
573+
assert!(!layout.is_unsized());
574+
575+
let lhs = self.read_scalar(lhs)?.check_init()?;
576+
let rhs = self.read_scalar(rhs)?.check_init()?;
577+
let lhs_bytes = self.memory.read_bytes(lhs, layout.size)?;
578+
let rhs_bytes = self.memory.read_bytes(rhs, layout.size)?;
579+
Ok(Scalar::from_bool(lhs_bytes == rhs_bytes))
580+
}
562581
}

Diff for: compiler/rustc_span/src/symbol.rs

+1
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,7 @@ symbols! {
934934
quote,
935935
range_inclusive_new,
936936
raw_dylib,
937+
raw_eq,
937938
raw_identifiers,
938939
raw_ref_op,
939940
re_rebalance_coherence,

Diff for: compiler/rustc_typeck/src/check/intrinsic.rs

+7
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,13 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
380380

381381
sym::nontemporal_store => (1, vec![tcx.mk_mut_ptr(param(0)), param(0)], tcx.mk_unit()),
382382

383+
sym::raw_eq => {
384+
let br = ty::BoundRegion { var: ty::BoundVar::from_u32(0), kind: ty::BrAnon(0) };
385+
let param_ty =
386+
tcx.mk_imm_ref(tcx.mk_region(ty::ReLateBound(ty::INNERMOST, br)), param(0));
387+
(1, vec![param_ty; 2], tcx.types.bool)
388+
}
389+
383390
other => {
384391
tcx.sess.emit_err(UnrecognizedIntrinsicFunction { span: it.span, name: other });
385392
return;

Diff for: library/core/src/array/equality.rs

+160
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
#[stable(feature = "rust1", since = "1.0.0")]
2+
impl<A, B, const N: usize> PartialEq<[B; N]> for [A; N]
3+
where
4+
A: PartialEq<B>,
5+
{
6+
#[inline]
7+
fn eq(&self, other: &[B; N]) -> bool {
8+
SpecArrayEq::spec_eq(self, other)
9+
}
10+
#[inline]
11+
fn ne(&self, other: &[B; N]) -> bool {
12+
SpecArrayEq::spec_ne(self, other)
13+
}
14+
}
15+
16+
#[stable(feature = "rust1", since = "1.0.0")]
17+
impl<A, B, const N: usize> PartialEq<[B]> for [A; N]
18+
where
19+
A: PartialEq<B>,
20+
{
21+
#[inline]
22+
fn eq(&self, other: &[B]) -> bool {
23+
self[..] == other[..]
24+
}
25+
#[inline]
26+
fn ne(&self, other: &[B]) -> bool {
27+
self[..] != other[..]
28+
}
29+
}
30+
31+
#[stable(feature = "rust1", since = "1.0.0")]
32+
impl<A, B, const N: usize> PartialEq<[A; N]> for [B]
33+
where
34+
B: PartialEq<A>,
35+
{
36+
#[inline]
37+
fn eq(&self, other: &[A; N]) -> bool {
38+
self[..] == other[..]
39+
}
40+
#[inline]
41+
fn ne(&self, other: &[A; N]) -> bool {
42+
self[..] != other[..]
43+
}
44+
}
45+
46+
#[stable(feature = "rust1", since = "1.0.0")]
47+
impl<A, B, const N: usize> PartialEq<&[B]> for [A; N]
48+
where
49+
A: PartialEq<B>,
50+
{
51+
#[inline]
52+
fn eq(&self, other: &&[B]) -> bool {
53+
self[..] == other[..]
54+
}
55+
#[inline]
56+
fn ne(&self, other: &&[B]) -> bool {
57+
self[..] != other[..]
58+
}
59+
}
60+
61+
#[stable(feature = "rust1", since = "1.0.0")]
62+
impl<A, B, const N: usize> PartialEq<[A; N]> for &[B]
63+
where
64+
B: PartialEq<A>,
65+
{
66+
#[inline]
67+
fn eq(&self, other: &[A; N]) -> bool {
68+
self[..] == other[..]
69+
}
70+
#[inline]
71+
fn ne(&self, other: &[A; N]) -> bool {
72+
self[..] != other[..]
73+
}
74+
}
75+
76+
#[stable(feature = "rust1", since = "1.0.0")]
77+
impl<A, B, const N: usize> PartialEq<&mut [B]> for [A; N]
78+
where
79+
A: PartialEq<B>,
80+
{
81+
#[inline]
82+
fn eq(&self, other: &&mut [B]) -> bool {
83+
self[..] == other[..]
84+
}
85+
#[inline]
86+
fn ne(&self, other: &&mut [B]) -> bool {
87+
self[..] != other[..]
88+
}
89+
}
90+
91+
#[stable(feature = "rust1", since = "1.0.0")]
92+
impl<A, B, const N: usize> PartialEq<[A; N]> for &mut [B]
93+
where
94+
B: PartialEq<A>,
95+
{
96+
#[inline]
97+
fn eq(&self, other: &[A; N]) -> bool {
98+
self[..] == other[..]
99+
}
100+
#[inline]
101+
fn ne(&self, other: &[A; N]) -> bool {
102+
self[..] != other[..]
103+
}
104+
}
105+
106+
// NOTE: some less important impls are omitted to reduce code bloat
107+
// __impl_slice_eq2! { [A; $N], &'b [B; $N] }
108+
// __impl_slice_eq2! { [A; $N], &'b mut [B; $N] }
109+
110+
#[stable(feature = "rust1", since = "1.0.0")]
111+
impl<T: Eq, const N: usize> Eq for [T; N] {}
112+
113+
trait SpecArrayEq<Other, const N: usize>: Sized {
114+
fn spec_eq(a: &[Self; N], b: &[Other; N]) -> bool;
115+
fn spec_ne(a: &[Self; N], b: &[Other; N]) -> bool;
116+
}
117+
118+
impl<T: PartialEq<Other>, Other, const N: usize> SpecArrayEq<Other, N> for T {
119+
default fn spec_eq(a: &[Self; N], b: &[Other; N]) -> bool {
120+
a[..] == b[..]
121+
}
122+
default fn spec_ne(a: &[Self; N], b: &[Other; N]) -> bool {
123+
a[..] != b[..]
124+
}
125+
}
126+
127+
impl<T: PartialEq<U> + IsRawEqComparable<U>, U, const N: usize> SpecArrayEq<U, N> for T {
128+
#[cfg(bootstrap)]
129+
fn spec_eq(a: &[T; N], b: &[U; N]) -> bool {
130+
a[..] == b[..]
131+
}
132+
#[cfg(not(bootstrap))]
133+
fn spec_eq(a: &[T; N], b: &[U; N]) -> bool {
134+
// SAFETY: This is why `IsRawEqComparable` is an `unsafe trait`.
135+
unsafe {
136+
let b = &*b.as_ptr().cast::<[T; N]>();
137+
crate::intrinsics::raw_eq(a, b)
138+
}
139+
}
140+
fn spec_ne(a: &[T; N], b: &[U; N]) -> bool {
141+
!Self::spec_eq(a, b)
142+
}
143+
}
144+
145+
/// `U` exists on here mostly because `min_specialization` didn't let me
146+
/// repeat the `T` type parameter in the above specialization, so instead
147+
/// the `T == U` constraint comes from the impls on this.
148+
/// # Safety
149+
/// - Neither `Self` nor `U` has any padding.
150+
/// - `Self` and `U` have the same layout.
151+
/// - `Self: PartialEq<U>` is byte-wise (this means no floats, among other things)
152+
#[rustc_specialization_trait]
153+
unsafe trait IsRawEqComparable<U> {}
154+
155+
macro_rules! is_raw_comparable {
156+
($($t:ty),+) => {$(
157+
unsafe impl IsRawEqComparable<$t> for $t {}
158+
)+};
159+
}
160+
is_raw_comparable!(bool, char, u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize);

0 commit comments

Comments
 (0)