Skip to content

Commit 36cb012

Browse files
committed
.unique() and .unique_by(..) operations
Functions make iterators that return unique elements, storing visited elements in a hash set. `.unique_by(..)` uses supplied function to make keys, and `.unique()` uses `.clone()`.
1 parent 84e4efa commit 36cb012

File tree

3 files changed

+113
-0
lines changed

3 files changed

+113
-0
lines changed

src/adaptors.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ use std::num::One;
1111
use std::ops::Add;
1212
use std::cmp::Ordering;
1313
use std::iter::{Fuse, Peekable};
14+
use std::collections::HashSet;
15+
use std::hash::Hash;
1416
use Itertools;
1517
use size_hint;
1618

@@ -972,3 +974,55 @@ impl<I> Iterator for Combinations<I> where I: Iterator + Clone, I::Item: Clone{
972974
size_hint::add((lo / 2, hi.map(|hi| hi / 2)), extra)
973975
}
974976
}
977+
978+
/// An iterator adapter to filter non-unique elements.
979+
///
980+
/// See [*.unique()*](trait.Itertools.html#method.unique) for more information.
981+
#[derive(Clone)]
982+
pub struct UniqueBy<I: Iterator, V, F> {
983+
iter: I,
984+
used: HashSet<V>,
985+
f: F,
986+
}
987+
988+
impl<I: Iterator, V, F> UniqueBy<I, V, F> where V: Clone + Eq + Hash, F: FnMut(&I::Item) -> V {
989+
/// Create a new **UniqueBy** iterator.
990+
pub fn new(iter: I, f: F) -> UniqueBy<I, V, F> {
991+
UniqueBy {
992+
iter: iter,
993+
used: HashSet::new(),
994+
f: f,
995+
}
996+
}
997+
}
998+
999+
impl<I, V, F> Iterator for UniqueBy<I, V, F> where
1000+
I: Iterator,
1001+
V: Clone + Eq + Hash,
1002+
F: FnMut(&I::Item) -> V
1003+
{
1004+
type Item = I::Item;
1005+
1006+
fn next(&mut self) -> Option<I::Item> {
1007+
loop {
1008+
match self.iter.next() {
1009+
None => return None,
1010+
Some(v) => {
1011+
let key = (self.f)(&v);
1012+
if self.used.insert(key) {
1013+
return Some(v);
1014+
}
1015+
}
1016+
}
1017+
}
1018+
}
1019+
1020+
#[inline]
1021+
fn size_hint(&self) -> (usize, Option<usize>) {
1022+
self.iter.size_hint()
1023+
}
1024+
}
1025+
1026+
/// An iterator adapter to filter non-unique elements.
1027+
pub type Unique<I> where I: Iterator =
1028+
UniqueBy<I, I::Item, fn(&I::Item) -> I::Item>;

src/lib.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ use std::iter::{self, IntoIterator};
3636
use std::fmt::Write;
3737
use std::cmp::Ordering;
3838
use std::fmt;
39+
use std::hash::Hash;
3940

4041
pub use adaptors::{
4142
Interleave,
@@ -54,6 +55,8 @@ pub use adaptors::{
5455
Coalesce,
5556
CoalesceFn,
5657
Combinations,
58+
Unique,
59+
UniqueBy,
5760
};
5861
#[cfg(feature = "unstable")]
5962
pub use adaptors::EnumerateFrom;
@@ -625,6 +628,45 @@ pub trait Itertools : Iterator {
625628
Coalesce::new(self, eq)
626629
}
627630

631+
/// Remove non-unique elements from the iterator.
632+
///
633+
/// Elemens are considered the same if supplied function returns
634+
/// equal values for them. Those values are stored in a hash set in
635+
/// the iterator.
636+
///
637+
/// ```
638+
/// use itertools::Itertools;
639+
///
640+
/// let data = vec!["qux", "corge", "bar", "quux", "bazaar"];
641+
/// itertools::assert_equal(data.into_iter().unique_by(|s| s[..2].to_string()),
642+
/// vec!["qux", "corge", "bar"]);
643+
/// ```
644+
fn unique_by<V, F>(self, f: F) -> UniqueBy<Self, V, F> where
645+
Self: Sized,
646+
V: Clone + Eq + Hash,
647+
F: FnMut(&Self::Item) -> V
648+
{
649+
UniqueBy::new(self, f)
650+
}
651+
652+
/// Remove non-unique elements from the iterator.
653+
///
654+
/// Copies of visited elements are stored in a hash set in the
655+
/// iterator.
656+
///
657+
/// ```
658+
/// use itertools::Itertools;
659+
///
660+
/// let data = vec![10, 20, 30, 20, 40, 10, 50];
661+
/// itertools::assert_equal(data.into_iter().unique(),
662+
/// vec![10, 20, 30, 40, 50]);
663+
/// ```
664+
fn unique(self) -> Unique<Self> where
665+
Self: Sized,
666+
Self::Item: Clone + Eq + Hash,
667+
{
668+
self.unique_by(Clone::clone)
669+
}
628670

629671
/// Return an iterator adaptor that joins together adjacent slices if possible.
630672
///

tests/tests.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,23 @@ fn dedup() {
202202
it::assert_equal(ys.iter(), xs.iter().dedup());
203203
}
204204

205+
#[test]
206+
fn unique_by() {
207+
let xs = ["aaa", "bbbbb", "aa", "ccc", "bbbb", "aaaaa", "cccc"];
208+
let ys = ["aaa", "bbbbb", "ccc"];
209+
it::assert_equal(ys.iter(), xs.iter().unique_by(|x| x[..2].to_string()));
210+
}
211+
212+
#[test]
213+
fn unique() {
214+
let xs = [0, 1, 2, 3, 2, 1, 3];
215+
let ys = [0, 1, 2, 3];
216+
it::assert_equal(ys.iter(), xs.iter().unique());
217+
let xs = [0, 1];
218+
let ys = [0, 1];
219+
it::assert_equal(ys.iter(), xs.iter().unique());
220+
}
221+
205222
#[test]
206223
fn batching() {
207224
let xs = [0, 1, 2, 1, 3];

0 commit comments

Comments
 (0)