Skip to content

Commit 3c1cd9a

Browse files
committed
Merge pull request #37 from stepancheg/unique
.unique() and .unique_by(..) operations
2 parents 84e4efa + 2e7df18 commit 3c1cd9a

File tree

3 files changed

+113
-0
lines changed

3 files changed

+113
-0
lines changed

src/adaptors.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ use std::num::One;
1111
use std::ops::Add;
1212
use std::cmp::Ordering;
1313
use std::iter::{Fuse, Peekable};
14+
use std::collections::HashSet;
15+
use std::hash::Hash;
1416
use Itertools;
1517
use size_hint;
1618

@@ -972,3 +974,55 @@ impl<I> Iterator for Combinations<I> where I: Iterator + Clone, I::Item: Clone{
972974
size_hint::add((lo / 2, hi.map(|hi| hi / 2)), extra)
973975
}
974976
}
977+
978+
/// An iterator adapter to filter non-unique elements.
979+
///
980+
/// See [*.unique()*](trait.Itertools.html#method.unique) for more information.
981+
#[derive(Clone)]
982+
pub struct UniqueBy<I: Iterator, V, F> {
983+
iter: I,
984+
used: HashSet<V>,
985+
f: F,
986+
}
987+
988+
impl<I: Iterator, V, F> UniqueBy<I, V, F> where V: Clone + Eq + Hash, F: FnMut(&I::Item) -> V {
989+
/// Create a new **UniqueBy** iterator.
990+
pub fn new(iter: I, f: F) -> UniqueBy<I, V, F> {
991+
UniqueBy {
992+
iter: iter,
993+
used: HashSet::new(),
994+
f: f,
995+
}
996+
}
997+
}
998+
999+
impl<I, V, F> Iterator for UniqueBy<I, V, F> where
1000+
I: Iterator,
1001+
V: Clone + Eq + Hash,
1002+
F: FnMut(&I::Item) -> V
1003+
{
1004+
type Item = I::Item;
1005+
1006+
fn next(&mut self) -> Option<I::Item> {
1007+
loop {
1008+
match self.iter.next() {
1009+
None => return None,
1010+
Some(v) => {
1011+
let key = (self.f)(&v);
1012+
if self.used.insert(key) {
1013+
return Some(v);
1014+
}
1015+
}
1016+
}
1017+
}
1018+
}
1019+
1020+
#[inline]
1021+
fn size_hint(&self) -> (usize, Option<usize>) {
1022+
(0, self.iter.size_hint().1)
1023+
}
1024+
}
1025+
1026+
/// An iterator adapter to filter non-unique elements.
1027+
pub type Unique<I> where I: Iterator =
1028+
UniqueBy<I, I::Item, fn(&I::Item) -> I::Item>;

src/lib.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ use std::iter::{self, IntoIterator};
3636
use std::fmt::Write;
3737
use std::cmp::Ordering;
3838
use std::fmt;
39+
use std::hash::Hash;
3940

4041
pub use adaptors::{
4142
Interleave,
@@ -54,6 +55,8 @@ pub use adaptors::{
5455
Coalesce,
5556
CoalesceFn,
5657
Combinations,
58+
Unique,
59+
UniqueBy,
5760
};
5861
#[cfg(feature = "unstable")]
5962
pub use adaptors::EnumerateFrom;
@@ -625,6 +628,45 @@ pub trait Itertools : Iterator {
625628
Coalesce::new(self, eq)
626629
}
627630

631+
/// Filter non-unique elements from the iterator.
632+
///
633+
/// Copies of visited elements are stored in a hash set in the
634+
/// iterator.
635+
///
636+
/// ```
637+
/// use itertools::Itertools;
638+
///
639+
/// let data = vec![10, 20, 30, 20, 40, 10, 50];
640+
/// itertools::assert_equal(data.into_iter().unique(),
641+
/// vec![10, 20, 30, 40, 50]);
642+
/// ```
643+
fn unique(self) -> Unique<Self> where
644+
Self: Sized,
645+
Self::Item: Clone + Eq + Hash,
646+
{
647+
self.unique_by(Clone::clone)
648+
}
649+
650+
/// Filter non-unique elements from the iterator.
651+
///
652+
/// Elemens are considered the same if supplied function returns
653+
/// equal values for them. Those values are stored in a hash set in
654+
/// the iterator.
655+
///
656+
/// ```
657+
/// use itertools::Itertools;
658+
///
659+
/// let data = vec!["a", "bb", "aa", "c", "ccc"];
660+
/// itertools::assert_equal(data.into_iter().unique_by(|s| s.len()),
661+
/// vec!["a", "bb", "ccc"]);
662+
/// ```
663+
fn unique_by<V, F>(self, f: F) -> UniqueBy<Self, V, F> where
664+
Self: Sized,
665+
V: Clone + Eq + Hash,
666+
F: FnMut(&Self::Item) -> V
667+
{
668+
UniqueBy::new(self, f)
669+
}
628670

629671
/// Return an iterator adaptor that joins together adjacent slices if possible.
630672
///

tests/tests.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,23 @@ fn dedup() {
202202
it::assert_equal(ys.iter(), xs.iter().dedup());
203203
}
204204

205+
#[test]
206+
fn unique_by() {
207+
let xs = ["aaa", "bbbbb", "aa", "ccc", "bbbb", "aaaaa", "cccc"];
208+
let ys = ["aaa", "bbbbb", "ccc"];
209+
it::assert_equal(ys.iter(), xs.iter().unique_by(|x| x[..2].to_string()));
210+
}
211+
212+
#[test]
213+
fn unique() {
214+
let xs = [0, 1, 2, 3, 2, 1, 3];
215+
let ys = [0, 1, 2, 3];
216+
it::assert_equal(ys.iter(), xs.iter().unique());
217+
let xs = [0, 1];
218+
let ys = [0, 1];
219+
it::assert_equal(ys.iter(), xs.iter().unique());
220+
}
221+
205222
#[test]
206223
fn batching() {
207224
let xs = [0, 1, 2, 1, 3];

0 commit comments

Comments
 (0)