Skip to content

Split Vec::dedup_by into 2 cycles #118273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 106 additions & 18 deletions library/alloc/benches/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -658,73 +658,161 @@ fn random_sorted_fill(mut seed: u32, buf: &mut [u32]) {
buf.sort();
}

fn bench_vec_dedup_old(b: &mut Bencher, sz: usize) {
// Measures performance of slice dedup impl.
// This was used to justify separate implementation of dedup for Vec.
// This algorithm was used for Vecs prior to Rust 1.52.
fn bench_dedup_slice_truncate(b: &mut Bencher, sz: usize) {
let mut template = vec![0u32; sz];
b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
random_sorted_fill(0x43, &mut template);

let mut vec = template.clone();
b.iter(|| {
let vec = black_box(&mut vec);
let len = {
let (dedup, _) = vec.partition_dedup();
dedup.len()
};
vec.truncate(len);

black_box(vec.first());
let vec = black_box(vec);
vec.clear();
vec.extend_from_slice(&template);
});
}

fn bench_vec_dedup_new(b: &mut Bencher, sz: usize) {
// Measures performance of Vec::dedup on random data.
fn bench_vec_dedup_random(b: &mut Bencher, sz: usize) {
let mut template = vec![0u32; sz];
b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
random_sorted_fill(0x43, &mut template);

let mut vec = template.clone();
b.iter(|| {
let vec = black_box(&mut vec);
vec.dedup();
black_box(vec.first());
let vec = black_box(vec);
vec.clear();
vec.extend_from_slice(&template);
});
}

// Measures performance of Vec::dedup when there is no items removed
fn bench_vec_dedup_none(b: &mut Bencher, sz: usize) {
let mut template = vec![0u32; sz];
b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
template.chunks_exact_mut(2).for_each(|w| {
w[0] = black_box(0);
w[1] = black_box(5);
});

let mut vec = template.clone();
b.iter(|| {
let vec = black_box(&mut vec);
vec.dedup();
black_box(vec.first());
// Unlike other benches of `dedup`
// this doesn't reinitialize vec
// because we measure how efficient dedup is
// when no memory written
});
}

// Measures performance of Vec::dedup when there is all items removed
fn bench_vec_dedup_all(b: &mut Bencher, sz: usize) {
let mut template = vec![0u32; sz];
b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
template.iter_mut().for_each(|w| {
*w = black_box(0);
});

let mut vec = template.clone();
b.iter(|| {
let vec = black_box(&mut vec);
vec.dedup();
black_box(vec.first());
let vec = black_box(vec);
vec.clear();
vec.extend_from_slice(&template);
});
}

#[bench]
fn bench_dedup_old_100(b: &mut Bencher) {
bench_vec_dedup_old(b, 100);
fn bench_dedup_slice_truncate_100(b: &mut Bencher) {
bench_dedup_slice_truncate(b, 100);
}
#[bench]
fn bench_dedup_new_100(b: &mut Bencher) {
bench_vec_dedup_new(b, 100);
fn bench_dedup_random_100(b: &mut Bencher) {
bench_vec_dedup_random(b, 100);
}

#[bench]
fn bench_dedup_old_1000(b: &mut Bencher) {
bench_vec_dedup_old(b, 1000);
fn bench_dedup_none_100(b: &mut Bencher) {
bench_vec_dedup_none(b, 100);
}

#[bench]
fn bench_dedup_all_100(b: &mut Bencher) {
bench_vec_dedup_all(b, 100);
}

#[bench]
fn bench_dedup_slice_truncate_1000(b: &mut Bencher) {
bench_dedup_slice_truncate(b, 1000);
}
#[bench]
fn bench_dedup_random_1000(b: &mut Bencher) {
bench_vec_dedup_random(b, 1000);
}

#[bench]
fn bench_dedup_none_1000(b: &mut Bencher) {
bench_vec_dedup_none(b, 1000);
}

#[bench]
fn bench_dedup_new_1000(b: &mut Bencher) {
bench_vec_dedup_new(b, 1000);
fn bench_dedup_all_1000(b: &mut Bencher) {
bench_vec_dedup_all(b, 1000);
}

#[bench]
fn bench_dedup_old_10000(b: &mut Bencher) {
bench_vec_dedup_old(b, 10000);
fn bench_dedup_slice_truncate_10000(b: &mut Bencher) {
bench_dedup_slice_truncate(b, 10000);
}
#[bench]
fn bench_dedup_new_10000(b: &mut Bencher) {
bench_vec_dedup_new(b, 10000);
fn bench_dedup_random_10000(b: &mut Bencher) {
bench_vec_dedup_random(b, 10000);
}

#[bench]
fn bench_dedup_old_100000(b: &mut Bencher) {
bench_vec_dedup_old(b, 100000);
fn bench_dedup_none_10000(b: &mut Bencher) {
bench_vec_dedup_none(b, 10000);
}

#[bench]
fn bench_dedup_all_10000(b: &mut Bencher) {
bench_vec_dedup_all(b, 10000);
}

#[bench]
fn bench_dedup_slice_truncate_100000(b: &mut Bencher) {
bench_dedup_slice_truncate(b, 100000);
}
#[bench]
fn bench_dedup_random_100000(b: &mut Bencher) {
bench_vec_dedup_random(b, 100000);
}

#[bench]
fn bench_dedup_none_100000(b: &mut Bencher) {
bench_vec_dedup_none(b, 100000);
}

#[bench]
fn bench_dedup_new_100000(b: &mut Bencher) {
bench_vec_dedup_new(b, 100000);
fn bench_dedup_all_100000(b: &mut Bencher) {
bench_vec_dedup_all(b, 100000);
}

#[bench]
Expand Down
57 changes: 45 additions & 12 deletions library/alloc/src/vec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1775,7 +1775,32 @@ impl<T, A: Allocator> Vec<T, A> {
return;
}

/* INVARIANT: vec.len() > read >= write > write-1 >= 0 */
// Check if we ever want to remove anything.
// This allows to use copy_non_overlapping in next cycle.
// And avoids any memory writes if we don't need to remove anything.
let mut first_duplicate_idx: usize = 1;
let start = self.as_mut_ptr();
while first_duplicate_idx != len {
let found_duplicate = unsafe {
// SAFETY: first_duplicate always in range [1..len)
// Note that we start iteration from 1 so we never overflow.
let prev = start.add(first_duplicate_idx.wrapping_sub(1));
let current = start.add(first_duplicate_idx);
// We explicitly say in docs that references are reversed.
same_bucket(&mut *current, &mut *prev)
};
if found_duplicate {
break;
}
first_duplicate_idx += 1;
}
// Don't need to remove anything.
// We cannot get bigger than len.
if first_duplicate_idx == len {
return;
}

/* INVARIANT: vec.len() > read > write > write-1 >= 0 */
struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> {
/* Offset of the element we want to check if it is duplicate */
read: usize,
Expand Down Expand Up @@ -1821,31 +1846,39 @@ impl<T, A: Allocator> Vec<T, A> {
}
}

let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self };
let ptr = gap.vec.as_mut_ptr();

/* Drop items while going through Vec, it should be more efficient than
* doing slice partition_dedup + truncate */

// Construct gap first and then drop item to avoid memory corruption if `T::drop` panics.
let mut gap =
FillGapOnDrop { read: first_duplicate_idx + 1, write: first_duplicate_idx, vec: self };
unsafe {
// SAFETY: we checked that first_duplicate_idx in bounds before.
// If drop panics, `gap` would remove this item without drop.
ptr::drop_in_place(start.add(first_duplicate_idx));
}

/* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr
* are always in-bounds and read_ptr never aliases prev_ptr */
unsafe {
while gap.read < len {
let read_ptr = ptr.add(gap.read);
let prev_ptr = ptr.add(gap.write.wrapping_sub(1));
let read_ptr = start.add(gap.read);
let prev_ptr = start.add(gap.write.wrapping_sub(1));

if same_bucket(&mut *read_ptr, &mut *prev_ptr) {
// We explicitly say in docs that references are reversed.
let found_duplicate = same_bucket(&mut *read_ptr, &mut *prev_ptr);
if found_duplicate {
// Increase `gap.read` now since the drop may panic.
gap.read += 1;
/* We have found duplicate, drop it in-place */
ptr::drop_in_place(read_ptr);
} else {
let write_ptr = ptr.add(gap.write);
let write_ptr = start.add(gap.write);

/* Because `read_ptr` can be equal to `write_ptr`, we either
* have to use `copy` or conditional `copy_nonoverlapping`.
* Looks like the first option is faster. */
ptr::copy(read_ptr, write_ptr, 1);
/* read_ptr cannot be equal to write_ptr because at this point
* we guaranteed to skip at least one element (before loop starts).
*/
ptr::copy_nonoverlapping(read_ptr, write_ptr, 1);

/* We have filled that place, so go further */
gap.write += 1;
Expand Down