Skip to content

Commit 2d22ff9

Browse files
bors[bot]orium
andauthored
Merge #423
423: Added `Itertools::dedup_with_count()` and `Itertools::dedup_by_with_count()` r=jswrenn a=orium Fixes #393. Co-authored-by: Diogo Sousa <[email protected]>
2 parents d081998 + 2508d87 commit 2d22ff9

File tree

3 files changed

+157
-12
lines changed

3 files changed

+157
-12
lines changed

src/adaptors/mod.rs

Lines changed: 83 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pub use self::multi_product::*;
1010

1111
use std::fmt;
1212
use std::mem::replace;
13-
use std::iter::{Fuse, Peekable, FromIterator};
13+
use std::iter::{Fuse, Peekable, FromIterator, FusedIterator};
1414
use std::marker::PhantomData;
1515
use crate::size_hint;
1616

@@ -310,13 +310,13 @@ pub fn cartesian_product<I, J>(mut i: I, j: J) -> Product<I, J>
310310
}
311311
}
312312

313-
314313
impl<I, J> Iterator for Product<I, J>
315314
where I: Iterator,
316315
J: Clone + Iterator,
317316
I::Item: Clone
318317
{
319318
type Item = (I::Item, J::Item);
319+
320320
fn next(&mut self) -> Option<(I::Item, J::Item)> {
321321
let elt_b = match self.b.next() {
322322
None => {
@@ -607,18 +607,18 @@ impl<I, J, F> Iterator for MergeBy<I, J, F>
607607
}
608608

609609
#[derive(Clone, Debug)]
610-
pub struct CoalesceCore<I>
610+
pub struct CoalesceCore<I, T>
611611
where I: Iterator
612612
{
613613
iter: I,
614-
last: Option<I::Item>,
614+
last: Option<T>,
615615
}
616616

617-
impl<I> CoalesceCore<I>
617+
impl<I, T> CoalesceCore<I, T>
618618
where I: Iterator
619619
{
620-
fn next_with<F>(&mut self, mut f: F) -> Option<I::Item>
621-
where F: FnMut(I::Item, I::Item) -> Result<I::Item, (I::Item, I::Item)>
620+
fn next_with<F>(&mut self, mut f: F) -> Option<T>
621+
where F: FnMut(T, I::Item) -> Result<T, (T, T)>
622622
{
623623
// this fuses the iterator
624624
let mut last = match self.last.take() {
@@ -652,7 +652,7 @@ impl<I> CoalesceCore<I>
652652
pub struct Coalesce<I, F>
653653
where I: Iterator
654654
{
655-
iter: CoalesceCore<I>,
655+
iter: CoalesceCore<I, I::Item>,
656656
f: F,
657657
}
658658

@@ -705,7 +705,7 @@ impl<I, F> Iterator for Coalesce<I, F>
705705
pub struct DedupBy<I, Pred>
706706
where I: Iterator
707707
{
708-
iter: CoalesceCore<I>,
708+
iter: CoalesceCore<I, I::Item>,
709709
dedup_pred: Pred,
710710
}
711711

@@ -718,7 +718,7 @@ pub struct DedupEq;
718718

719719
impl<T: PartialEq> DedupPredicate<T> for DedupEq {
720720
fn dedup_pair(&mut self, a: &T, b: &T) -> bool {
721-
a==b
721+
a == b
722722
}
723723
}
724724

@@ -803,6 +803,79 @@ impl<I, Pred> Iterator for DedupBy<I, Pred>
803803
}
804804
}
805805

806+
/// An iterator adaptor that removes repeated duplicates, while keeping a count of how many
807+
/// repeated elements were present. This will determine equality using a comparison function.
808+
///
809+
/// See [`.dedup_by_with_count()`](../trait.Itertools.html#method.dedup_by_with_count) or
810+
/// [`.dedup_with_count()`](../trait.Itertools.html#method.dedup_with_count) for more information.
811+
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
812+
pub struct DedupByWithCount<I, Pred>
813+
where I: Iterator
814+
{
815+
iter: CoalesceCore<I, (usize, I::Item)>,
816+
dedup_pred: Pred,
817+
}
818+
819+
/// An iterator adaptor that removes repeated duplicates, while keeping a count of how many
820+
/// repeated elements were present.
821+
///
822+
/// See [`.dedup_with_count()`](../trait.Itertools.html#method.dedup_with_count) for more information.
823+
pub type DedupWithCount<I> = DedupByWithCount<I, DedupEq>;
824+
825+
/// Create a new `DedupByWithCount`.
826+
pub fn dedup_by_with_count<I, Pred>(mut iter: I, dedup_pred: Pred) -> DedupByWithCount<I, Pred>
827+
where I: Iterator,
828+
{
829+
DedupByWithCount {
830+
iter: CoalesceCore {
831+
last: iter.next().map(|v| (1, v)),
832+
iter,
833+
},
834+
dedup_pred,
835+
}
836+
}
837+
838+
/// Create a new `DedupWithCount`.
839+
pub fn dedup_with_count<I>(iter: I) -> DedupWithCount<I>
840+
where I: Iterator
841+
{
842+
dedup_by_with_count(iter, DedupEq)
843+
}
844+
845+
impl<I, Pred> fmt::Debug for DedupByWithCount<I, Pred>
846+
where I: Iterator + fmt::Debug,
847+
I::Item: fmt::Debug,
848+
{
849+
debug_fmt_fields!(Dedup, iter);
850+
}
851+
852+
impl<I: Clone, Pred: Clone> Clone for DedupByWithCount<I, Pred>
853+
where I: Iterator,
854+
I::Item: Clone,
855+
{
856+
clone_fields!(iter, dedup_pred);
857+
}
858+
859+
impl<I, Pred> Iterator for DedupByWithCount<I, Pred>
860+
where I: Iterator,
861+
Pred: DedupPredicate<I::Item>,
862+
{
863+
type Item = (usize, I::Item);
864+
865+
fn next(&mut self) -> Option<(usize, I::Item)> {
866+
let ref mut dedup_pred = self.dedup_pred;
867+
self.iter.next_with(|(c, x), y| {
868+
if dedup_pred.dedup_pair(&x, &y) { Ok((c + 1, x)) } else { Err(((c, x), (1, y))) }
869+
})
870+
}
871+
872+
fn size_hint(&self) -> (usize, Option<usize>) {
873+
self.iter.size_hint()
874+
}
875+
}
876+
877+
impl<I: Iterator, Pred: DedupPredicate<I::Item>> FusedIterator for DedupByWithCount<I, Pred> {}
878+
806879
/// An iterator adaptor that borrows from a `Clone`-able iterator
807880
/// to only pick off elements while the predicate returns `true`.
808881
///

src/lib.rs

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ pub mod structs {
7878
pub use crate::adaptors::{
7979
Dedup,
8080
DedupBy,
81+
DedupWithCount,
82+
DedupByWithCount,
8183
Interleave,
8284
InterleaveShortest,
8385
Product,
@@ -827,7 +829,6 @@ pub trait Itertools : Iterator {
827829
merge_join_by(self, other, cmp_fn)
828830
}
829831

830-
831832
/// Return an iterator adaptor that flattens an iterator of iterators by
832833
/// merging them in ascending order.
833834
///
@@ -1008,7 +1009,7 @@ pub trait Itertools : Iterator {
10081009
/// use itertools::Itertools;
10091010
///
10101011
/// let data = vec![(0, 1.), (1, 1.), (0, 2.), (0, 3.), (1, 3.), (1, 2.), (2, 2.)];
1011-
/// itertools::assert_equal(data.into_iter().dedup_by(|x, y| x.1==y.1),
1012+
/// itertools::assert_equal(data.into_iter().dedup_by(|x, y| x.1 == y.1),
10121013
/// vec![(0, 1.), (0, 2.), (0, 3.), (1, 2.)]);
10131014
/// ```
10141015
fn dedup_by<Cmp>(self, cmp: Cmp) -> DedupBy<Self, Cmp>
@@ -1018,6 +1019,50 @@ pub trait Itertools : Iterator {
10181019
adaptors::dedup_by(self, cmp)
10191020
}
10201021

1022+
/// Remove duplicates from sections of consecutive identical elements, while keeping a count of
1023+
/// how many repeated elements were present.
1024+
/// If the iterator is sorted, all elements will be unique.
1025+
///
1026+
/// Iterator element type is `(usize, Self::Item)`.
1027+
///
1028+
/// This iterator is *fused*.
1029+
///
1030+
/// ```
1031+
/// use itertools::Itertools;
1032+
///
1033+
/// let data = vec![1., 1., 2., 3., 3., 2., 2.];
1034+
/// itertools::assert_equal(data.into_iter().dedup_with_count(),
1035+
/// vec![(2, 1.), (1, 2.), (2, 3.), (2, 2.)]);
1036+
/// ```
1037+
fn dedup_with_count(self) -> DedupWithCount<Self>
1038+
where Self: Sized,
1039+
{
1040+
adaptors::dedup_with_count(self)
1041+
}
1042+
1043+
/// Remove duplicates from sections of consecutive identical elements, while keeping a count of
1044+
/// how many repeated elements were present.
1045+
/// This will determine equality using a comparison function.
1046+
/// If the iterator is sorted, all elements will be unique.
1047+
///
1048+
/// Iterator element type is `(usize, Self::Item)`.
1049+
///
1050+
/// This iterator is *fused*.
1051+
///
1052+
/// ```
1053+
/// use itertools::Itertools;
1054+
///
1055+
/// let data = vec![(0, 1.), (1, 1.), (0, 2.), (0, 3.), (1, 3.), (1, 2.), (2, 2.)];
1056+
/// itertools::assert_equal(data.into_iter().dedup_by_with_count(|x, y| x.1 == y.1),
1057+
/// vec![(2, (0, 1.)), (1, (0, 2.)), (2, (0, 3.)), (2, (1, 2.))]);
1058+
/// ```
1059+
fn dedup_by_with_count<Cmp>(self, cmp: Cmp) -> DedupByWithCount<Self, Cmp>
1060+
where Self: Sized,
1061+
Cmp: FnMut(&Self::Item, &Self::Item) -> bool,
1062+
{
1063+
adaptors::dedup_by_with_count(self, cmp)
1064+
}
1065+
10211066
/// Return an iterator adaptor that filters out elements that have
10221067
/// already been produced once during the iteration. Duplicates
10231068
/// are detected using hash and equality.

tests/test_std.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,33 @@ fn dedup_by() {
115115
assert_eq!(&xs_d, &ys);
116116
}
117117

118+
#[test]
119+
fn dedup_with_count() {
120+
let xs: [i32; 8] = [0, 1, 1, 1, 2, 1, 3, 3];
121+
let ys: [(usize, &i32); 5] = [(1, &0), (3, &1), (1, &2), (1, &1), (2, &3)];
122+
123+
it::assert_equal(ys.iter().cloned(), xs.iter().dedup_with_count());
124+
125+
let xs: [i32; 5] = [0, 0, 0, 0, 0];
126+
let ys: [(usize, &i32); 1] = [(5, &0)];
127+
128+
it::assert_equal(ys.iter().cloned(), xs.iter().dedup_with_count());
129+
}
130+
131+
132+
#[test]
133+
fn dedup_by_with_count() {
134+
let xs = [(0, 0), (0, 1), (1, 1), (2, 1), (0, 2), (3, 1), (0, 3), (1, 3)];
135+
let ys = [(1, &(0, 0)), (3, &(0, 1)), (1, &(0, 2)), (1, &(3, 1)), (2, &(0, 3))];
136+
137+
it::assert_equal(ys.iter().cloned(), xs.iter().dedup_by_with_count(|x, y| x.1==y.1));
138+
139+
let xs = [(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)];
140+
let ys = [( 5, &(0, 1))];
141+
142+
it::assert_equal(ys.iter().cloned(), xs.iter().dedup_by_with_count(|x, y| x.0==y.0));
143+
}
144+
118145
#[test]
119146
fn all_equal() {
120147
assert!("".chars().all_equal());

0 commit comments

Comments
 (0)