Skip to content

Commit cc937f3

Browse files
committed
Auto merge of rust-lang#123878 - jwong101:inplacecollect, r=jhpratt
optimize inplace collection of Vec This PR has the following changes: 1. Using `usize::unchecked_mul` in https://github.com/rust-lang/rust/blob/79424056b05eaa9563d16dfab9b9a0c8f033f220/library/alloc/src/vec/in_place_collect.rs#L262 as LLVM, does not know that the operation can't wrap, since that's the size of the original allocation. Given the following: ```rust pub struct Foo([usize; 3]); pub fn unwrap_copy(v: Vec<Foo>) -> Vec<[usize; 3]> { v.into_iter().map(|f| f.0).collect() } ``` <details> <summary>Before this commit:</summary> ```llvm define void `@unwrap_copy(ptr` noalias nocapture noundef writeonly sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %iter) { start: %me.sroa.0.0.copyload.i = load i64, ptr %iter, align 8 %me.sroa.4.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 8 %me.sroa.4.0.copyload.i = load ptr, ptr %me.sroa.4.0.self.sroa_idx.i, align 8 %me.sroa.5.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 16 %me.sroa.5.0.copyload.i = load i64, ptr %me.sroa.5.0.self.sroa_idx.i, align 8 %_19.i.idx = mul nsw i64 %me.sroa.5.0.copyload.i, 24 %0 = udiv i64 %_19.i.idx, 24 ; Unnecessary calculation %_16.i.i = mul i64 %me.sroa.0.0.copyload.i, 24 %dst_cap.i.i = udiv i64 %_16.i.i, 24 store i64 %dst_cap.i.i, ptr %_0, align 8 %1 = getelementptr inbounds i8, ptr %_0, i64 8 store ptr %me.sroa.4.0.copyload.i, ptr %1, align 8 %2 = getelementptr inbounds i8, ptr %_0, i64 16 store i64 %0, ptr %2, align 8 ret void } ``` </details> <details> <summary>After:</summary> ```llvm define void `@unwrap_copy(ptr` noalias nocapture noundef writeonly sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %iter) { start: %me.sroa.0.0.copyload.i = load i64, ptr %iter, align 8 %me.sroa.4.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 8 %me.sroa.4.0.copyload.i = load ptr, ptr %me.sroa.4.0.self.sroa_idx.i, align 8 %me.sroa.5.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 16 %me.sroa.5.0.copyload.i = load i64, ptr %me.sroa.5.0.self.sroa_idx.i, align 8 %_19.i.idx = mul nsw i64 %me.sroa.5.0.copyload.i, 24 %0 = udiv i64 %_19.i.idx, 24 store i64 %me.sroa.0.0.copyload.i, ptr %_0, align 8 %1 = getelementptr inbounds i8, ptr %_0, i64 8 store ptr %me.sroa.4.0.copyload.i, ptr %1, align 8 %2 = getelementptr inbounds i8, ptr %_0, i64 16 store i64 %0, ptr %2, align 8, !alias.scope !9, !noalias !14 ret void } ``` </details> Note that there is still one more `mul,udiv` pair that I couldn't get rid of. The root cause is the same issue as rust-lang#121239, the `nuw` gets stripped off of `ptr::sub_ptr`. 2. `Iterator::try_fold` gets called on the underlying Iterator in `SpecInPlaceCollect::collect_in_place` whenever it does not implement `TrustedRandomAccess`. For types that impl `Drop`, LLVM currently can't tell that the drop can never occur, when using the default `Iterator::try_fold` implementation. For example, given the following code from rust-lang#120493 ```rust #[repr(transparent)] struct WrappedClone { inner: String } #[no_mangle] pub fn unwrap_clone(list: Vec<WrappedClone>) -> Vec<String> { list.into_iter().map(|s| s.inner).collect() } ``` <details> <summary>The asm for the `unwrap_clone` method is currently:</summary> ```asm unwrap_clone: push rbp push r15 push r14 push r13 push r12 push rbx push rax mov rbx, rdi mov r12, qword ptr [rsi] mov rdi, qword ptr [rsi + 8] mov rax, qword ptr [rsi + 16] movabs rsi, -6148914691236517205 mov r14, r12 test rax, rax je .LBB0_10 lea rcx, [rax + 2*rax] lea r14, [r12 + 8*rcx] shl rax, 3 lea rax, [rax + 2*rax] xor ecx, ecx .LBB0_2: cmp qword ptr [r12 + rcx], 0 je .LBB0_4 add rcx, 24 cmp rax, rcx jne .LBB0_2 jmp .LBB0_10 .LBB0_4: lea rdx, [rax - 24] lea r14, [r12 + rcx] cmp rdx, rcx je .LBB0_10 mov qword ptr [rsp], rdi sub rax, rcx add rax, -24 mul rsi mov r15, rdx lea rbp, [r12 + rcx] add rbp, 32 shr r15, 4 mov r13, qword ptr [rip + __rust_dealloc@GOTPCREL] jmp .LBB0_6 .LBB0_8: add rbp, 24 dec r15 je .LBB0_9 .LBB0_6: mov rsi, qword ptr [rbp] test rsi, rsi je .LBB0_8 mov rdi, qword ptr [rbp - 8] mov edx, 1 call r13 jmp .LBB0_8 .LBB0_9: mov rdi, qword ptr [rsp] movabs rsi, -6148914691236517205 .LBB0_10: sub r14, r12 mov rax, r14 mul rsi shr rdx, 4 mov qword ptr [rbx], r12 mov qword ptr [rbx + 8], rdi mov qword ptr [rbx + 16], rdx mov rax, rbx add rsp, 8 pop rbx pop r12 pop r13 pop r14 pop r15 pop rbp ret ``` </details> <details> <summary>After this PR:</summary> ```asm unwrap_clone: mov rax, rdi movups xmm0, xmmword ptr [rsi] mov rcx, qword ptr [rsi + 16] movups xmmword ptr [rdi], xmm0 mov qword ptr [rdi + 16], rcx ret ``` </details> Fixes rust-lang#120493
2 parents 3a5f258 + c8f44c3 commit cc937f3

File tree

3 files changed

+58
-2
lines changed

3 files changed

+58
-2
lines changed

alloc/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@
160160
#![feature(tuple_trait)]
161161
#![feature(unicode_internals)]
162162
#![feature(unsize)]
163+
#![feature(unwrap_infallible)]
163164
#![feature(vec_pop_if)]
164165
// tidy-alphabetical-end
165166
//

alloc/src/vec/in_place_collect.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,8 @@ where
259259
inner.cap,
260260
inner.buf.cast::<T>(),
261261
inner.end as *const T,
262-
inner.cap * mem::size_of::<I::Src>() / mem::size_of::<T>(),
262+
// SAFETY: the multiplication can not overflow, since `inner.cap * size_of::<I::SRC>()` is the size of the allocation.
263+
inner.cap.unchecked_mul(mem::size_of::<I::Src>()) / mem::size_of::<T>(),
263264
)
264265
};
265266

@@ -374,7 +375,7 @@ where
374375
// - it lets us thread the write pointer through its innards and get it back in the end
375376
let sink = InPlaceDrop { inner: dst_buf, dst: dst_buf };
376377
let sink =
377-
self.try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(end)).unwrap();
378+
self.try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(end)).into_ok();
378379
// iteration succeeded, don't drop head
379380
unsafe { ManuallyDrop::new(sink).dst.sub_ptr(dst_buf) }
380381
}

alloc/src/vec/into_iter.rs

+54
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,60 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
289289
};
290290
}
291291

292+
fn fold<B, F>(mut self, mut accum: B, mut f: F) -> B
293+
where
294+
F: FnMut(B, Self::Item) -> B,
295+
{
296+
if T::IS_ZST {
297+
while self.ptr.as_ptr() != self.end.cast_mut() {
298+
// SAFETY: we just checked that `self.ptr` is in bounds.
299+
let tmp = unsafe { self.ptr.read() };
300+
// See `next` for why we subtract from `end` here.
301+
self.end = self.end.wrapping_byte_sub(1);
302+
accum = f(accum, tmp);
303+
}
304+
} else {
305+
// SAFETY: `self.end` can only be null if `T` is a ZST.
306+
while self.ptr != non_null!(self.end, T) {
307+
// SAFETY: we just checked that `self.ptr` is in bounds.
308+
let tmp = unsafe { self.ptr.read() };
309+
// SAFETY: the maximum this can be is `self.end`.
310+
// Increment `self.ptr` first to avoid double dropping in the event of a panic.
311+
self.ptr = unsafe { self.ptr.add(1) };
312+
accum = f(accum, tmp);
313+
}
314+
}
315+
accum
316+
}
317+
318+
fn try_fold<B, F, R>(&mut self, mut accum: B, mut f: F) -> R
319+
where
320+
Self: Sized,
321+
F: FnMut(B, Self::Item) -> R,
322+
R: core::ops::Try<Output = B>,
323+
{
324+
if T::IS_ZST {
325+
while self.ptr.as_ptr() != self.end.cast_mut() {
326+
// SAFETY: we just checked that `self.ptr` is in bounds.
327+
let tmp = unsafe { self.ptr.read() };
328+
// See `next` for why we subtract from `end` here.
329+
self.end = self.end.wrapping_byte_sub(1);
330+
accum = f(accum, tmp)?;
331+
}
332+
} else {
333+
// SAFETY: `self.end` can only be null if `T` is a ZST.
334+
while self.ptr != non_null!(self.end, T) {
335+
// SAFETY: we just checked that `self.ptr` is in bounds.
336+
let tmp = unsafe { self.ptr.read() };
337+
// SAFETY: the maximum this can be is `self.end`.
338+
// Increment `self.ptr` first to avoid double dropping in the event of a panic.
339+
self.ptr = unsafe { self.ptr.add(1) };
340+
accum = f(accum, tmp)?;
341+
}
342+
}
343+
R::from_output(accum)
344+
}
345+
292346
unsafe fn __iterator_get_unchecked(&mut self, i: usize) -> Self::Item
293347
where
294348
Self: TrustedRandomAccessNoCoerce,

0 commit comments

Comments
 (0)