diff --git a/Cargo.toml b/Cargo.toml index a67c6de76..a9e9ca4f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ test = false [dependencies] either = { version = "1.0", default-features = false } +lending-iterator = { version = "0.1.6", optional = true} [dev-dependencies] rand = "0.7" @@ -39,6 +40,7 @@ quickcheck = { version = "0.9", default_features = false } default = ["use_std"] use_std = ["use_alloc", "either/use_std"] use_alloc = [] +lending_iters = ["dep:lending-iterator"] [profile] bench = { debug = true } @@ -71,6 +73,11 @@ harness = false name = "combinations" harness = false +[[bench]] +name = "combinations_lending" +harness = false +required-features = ["default", "lending_iters"] + [[bench]] name = "powerset" harness = false diff --git a/benches/combinations.rs b/benches/combinations.rs index e7433a4cb..e7261b6e0 100644 --- a/benches/combinations.rs +++ b/benches/combinations.rs @@ -1,3 +1,5 @@ +use std::collections::{HashSet, VecDeque}; + use criterion::{black_box, criterion_group, criterion_main, Criterion}; use itertools::Itertools; @@ -110,6 +112,91 @@ fn comb_c14(c: &mut Criterion) { }); } +fn comb_single_use(c: &mut Criterion) { + c.bench_function("comb single use", move |b| { + b.iter(|| { + let mut combination_bitmask = 0usize; + (0..N14).combinations(14).for_each(|combo| { + let compared_bitmask = 0b101010101010101011110000usize; + combo.into_iter().for_each(|bit_pos| { + combination_bitmask |= 1 << bit_pos; + }); + black_box((combination_bitmask & compared_bitmask).count_ones()); + }); + }) + }); +} + +fn comb_into_hash_set(c: &mut Criterion) { + c.bench_function("comb into hash set", move |b| { + b.iter(|| { + (0..N14).combinations(14).for_each(|combo| { + black_box({ + let mut out = HashSet::with_capacity(14); + out.extend(combo); + out + }); + }); + }) + }); +} + +fn comb_into_vec_deque(c: &mut Criterion) { + c.bench_function("comb into vec deque", move |b| { + b.iter(|| { + (0..N14).combinations(14).for_each(|combo| { + black_box(VecDeque::from(combo)); + }); + }) + }); +} + +fn comb_into_slice(c: &mut Criterion) { + c.bench_function("comb into slice", move |b| { + b.iter(|| { + (0..N14).combinations(14).for_each(|combo| { + black_box({ + let mut out = [0; 14]; + let mut combo_iter = combo.into_iter(); + out.fill_with(|| combo_iter.next().unwrap_or_default()); + out + }); + }); + }) + }); +} + +fn comb_into_slice_unchecked(c: &mut Criterion) { + c.bench_function("comb into slice unchecked", move |b| { + b.iter(|| { + (0..N14).combinations(14).for_each(|combo| { + black_box({ + let mut out = [0; 14]; + let mut combo_iter = combo.into_iter(); + out.fill_with(|| combo_iter.next().unwrap()); + out + }); + }); + }) + }); +} + +fn comb_into_slice_for_loop(c: &mut Criterion) { + c.bench_function("comb into slice for loop", move |b| { + b.iter(|| { + (0..N14).combinations(14).for_each(|combo| { + black_box({ + let mut out = [0; 14]; + for (i, elem) in combo.into_iter().enumerate() { + out[i] = elem; + } + out + }); + }); + }) + }); +} + criterion_group!( benches, comb_for1, @@ -121,5 +208,12 @@ criterion_group!( comb_c3, comb_c4, comb_c14, + comb_single_use, + comb_into_hash_set, + comb_into_vec_deque, + comb_into_slice, + comb_into_slice_unchecked, + comb_into_slice_for_loop, ); + criterion_main!(benches); diff --git a/benches/combinations_lending.rs b/benches/combinations_lending.rs new file mode 100644 index 000000000..df4085b20 --- /dev/null +++ b/benches/combinations_lending.rs @@ -0,0 +1,193 @@ +#![cfg(feature = "lending_iters")] + +use std::collections::{HashSet, VecDeque}; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use itertools::Itertools; +use itertools::LendingIterator; + +// approximate 100_000 iterations for each combination +const N1: usize = 100_000; +const N2: usize = 448; +const N3: usize = 86; +const N4: usize = 41; +const N14: usize = 21; + +fn comb_lending_c1(c: &mut Criterion) { + c.bench_function("comb lending c1", move |b| { + b.iter(|| { + (0..N1).combinations_lending(1).for_each(|combo| { + black_box({ + let mut out = Vec::with_capacity(1); + out.extend(combo); + out + }); + }); + }) + }); +} + +fn comb_lending_c2(c: &mut Criterion) { + c.bench_function("comb lending c2", move |b| { + b.iter(|| { + (0..N2).combinations_lending(2).for_each(|combo| { + black_box({ + let mut out = Vec::with_capacity(2); + out.extend(combo); + out + }); + }); + }) + }); +} + +fn comb_lending_c3(c: &mut Criterion) { + c.bench_function("comb lending c3", move |b| { + b.iter(|| { + (0..N3).combinations_lending(3).for_each(|combo| { + black_box({ + let mut out = Vec::with_capacity(3); + out.extend(combo); + out + }); + }); + }) + }); +} + +fn comb_lending_c4(c: &mut Criterion) { + c.bench_function("comb lending c4", move |b| { + b.iter(|| { + (0..N4).combinations_lending(4).for_each(|combo| { + black_box({ + let mut out = Vec::with_capacity(4); + out.extend(combo); + out + }); + }); + }) + }); +} + +fn comb_lending_c14(c: &mut Criterion) { + c.bench_function("comb lending c14", move |b| { + b.iter(|| { + (0..N14).combinations_lending(14).for_each(|combo| { + black_box({ + let mut out = Vec::with_capacity(14); + out.extend(combo); + out + }); + }); + }) + }); +} + +fn comb_lending_single_use(c: &mut Criterion) { + c.bench_function("comb lending single use", move |b| { + b.iter(|| { + let mut combination_bitmask = 0usize; + (0..N14).combinations_lending(14).for_each(|combo| { + let compared_bitmask = 0b101010101010101011110000usize; + combo.for_each(|bit_pos| { + combination_bitmask |= 1 << bit_pos; + }); + black_box((combination_bitmask & compared_bitmask).count_ones()); + }); + }) + }); +} + +fn comb_lending_into_hash_set_from_collect(c: &mut Criterion) { + c.bench_function("comb lending into hash set from collect", move |b| { + b.iter(|| { + (0..N14).combinations_lending(14).for_each(|combo| { + black_box(combo.collect::>()); + }); + }) + }); +} + +fn comb_lending_into_hash_set_from_extend(c: &mut Criterion) { + c.bench_function("comb lending into hash set from extend", move |b| { + b.iter(|| { + (0..N14).combinations_lending(14).for_each(|combo| { + black_box({ + let mut out = HashSet::with_capacity(14); + out.extend(combo); + out + }); + }); + }) + }); +} + +fn comb_lending_into_vec_deque_from_collect(c: &mut Criterion) { + c.bench_function("comb lending into vec deque from collect", move |b| { + b.iter(|| { + (0..N14).combinations_lending(14).for_each(|combo| { + black_box(combo.collect::>()); + }); + }) + }); +} + +fn comb_lending_into_vec_deque_from_extend(c: &mut Criterion) { + c.bench_function("comb lending into vec deque from extend", move |b| { + b.iter(|| { + (0..N14).combinations_lending(14).for_each(|combo| { + black_box({ + let mut out = VecDeque::with_capacity(14); + out.extend(combo); + out + }); + }); + }) + }); +} + +fn comb_lending_into_slice(c: &mut Criterion) { + c.bench_function("comb lending into slice", move |b| { + b.iter(|| { + (0..N14).combinations_lending(14).for_each(|mut combo| { + black_box({ + let mut out = [0; 14]; + out.fill_with(|| combo.next().unwrap_or_default()); + out + }); + }); + }) + }); +} + +fn comb_lending_into_slice_unchecked(c: &mut Criterion) { + c.bench_function("comb lending into slice unchecked", move |b| { + b.iter(|| { + (0..N14).combinations_lending(14).for_each(|mut combo| { + black_box({ + let mut out = [0; 14]; + out.fill_with(|| combo.next().unwrap()); + out + }); + }); + }) + }); +} + +criterion_group!( + benches, + comb_lending_c1, + comb_lending_c2, + comb_lending_c3, + comb_lending_c4, + comb_lending_c14, + comb_lending_single_use, + comb_lending_into_hash_set_from_collect, + comb_lending_into_hash_set_from_extend, + comb_lending_into_vec_deque_from_collect, + comb_lending_into_vec_deque_from_extend, + comb_lending_into_slice, + comb_lending_into_slice_unchecked, +); + +criterion_main!(benches); diff --git a/src/combinations.rs b/src/combinations.rs index 68a59c5e4..912f2f722 100644 --- a/src/combinations.rs +++ b/src/combinations.rs @@ -4,33 +4,44 @@ use std::iter::FusedIterator; use super::lazy_buffer::LazyBuffer; use alloc::vec::Vec; +/// Marker indicating the iterator is being used as `LendingIterator` type. +#[cfg(feature = "lending_iters")] +pub struct Lending; +/// Marker indicating the iterator is being used as `Iterator` type. +pub struct NonLending; + /// An iterator to iterate through all the `k`-length combinations in an iterator. /// -/// See [`.combinations()`](crate::Itertools::combinations) for more information. +/// See [`.combinations()`](crate::Itertools::combinations) and [`.combinations_lending()`](crate::Itertools::combinations_lending) for more information. #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] -pub struct Combinations { +pub struct Combinations { indices: Vec, pool: LazyBuffer, first: bool, + // Disambiguate the purpose of the iterator and makes use not require fully qualified path to disambiguate. Instead chosen by constructor. + phantom: std::marker::PhantomData, } impl Clone for Combinations - where I: Clone + Iterator, - I::Item: Clone, +where + I: Clone + Iterator, + I::Item: Clone, { - clone_fields!(indices, pool, first); + clone_fields!(indices, pool, first, phantom); } -impl fmt::Debug for Combinations - where I: Iterator + fmt::Debug, - I::Item: fmt::Debug, +impl fmt::Debug for Combinations +where + I: Iterator + fmt::Debug, + I::Item: fmt::Debug, { debug_fmt_fields!(Combinations, indices, pool, first); } -/// Create a new `Combinations` from a clonable iterator. +/// Create a new `Combinations` `Iterator` from an iterator. pub fn combinations(iter: I, k: usize) -> Combinations - where I: Iterator +where + I: Iterator, { let mut pool = LazyBuffer::new(iter); pool.prefill(k); @@ -39,27 +50,75 @@ pub fn combinations(iter: I, k: usize) -> Combinations indices: (0..k).collect(), pool, first: true, + phantom: std::marker::PhantomData::, } } -impl Combinations { +impl Combinations { /// Returns the length of a combination produced by this iterator. #[inline] - pub fn k(&self) -> usize { self.indices.len() } + pub fn k(&self) -> usize { + self.indices.len() + } + + #[inline] + /// Does all calculation required to for next combination. Shared between lending and non-lending implementations. + /// Returns false if no next combination. Returns true if there is. + fn next_combination_logic(&mut self) -> bool { + if self.first { + if self.k() > self.n() { + return false; + } + self.first = false; + } else if self.indices.is_empty() { + return false; + } else { + // Scan from the end, looking for an index to increment + let mut i: usize = self.indices.len() - 1; + + // Check if we need to consume more from the iterator + if self.indices[i] == self.pool.len() - 1 { + self.pool.get_next(); // may change pool size + } + + while self.indices[i] == i + self.pool.len() - self.indices.len() { + if i > 0 { + i -= 1; + } else { + // Reached the last combination + return false; + } + } + + // Increment index, and reset the ones to its right + self.indices[i] += 1; + for j in i + 1..self.indices.len() { + self.indices[j] = self.indices[j - 1] + 1; + } + } + + true // next combination exists + } /// Returns the (current) length of the pool from which combination elements are /// selected. This value can change between invocations of [`next`](Combinations::next). #[inline] - pub fn n(&self) -> usize { self.pool.len() } + pub fn n(&self) -> usize { + self.pool.len() + } /// Returns a reference to the source iterator. #[inline] - pub(crate) fn src(&self) -> &I { &self.pool.it } + #[allow(dead_code)] // Not actually dead. Used in powerset. + pub(crate) fn src(&self) -> &I { + &self.pool.it + } /// Resets this `Combinations` back to an initial state for combinations of length /// `k` over the same pool data source. If `k` is larger than the current length /// of the data pool an attempt is made to prefill the pool so that it holds `k` /// elements. + #[allow(dead_code)] // Not actually dead. Used in powerset. pub(crate) fn reset(&mut self, k: usize) { self.first = true; @@ -68,7 +127,6 @@ impl Combinations { for i in 0..k { self.indices[i] = i; } - } else { for i in 0..self.indices.len() { self.indices[i] = i; @@ -79,50 +137,159 @@ impl Combinations { } } -impl Iterator for Combinations - where I: Iterator, - I::Item: Clone +impl Iterator for Combinations +where + I: Iterator, + I::Item: Clone, { type Item = Vec; fn next(&mut self) -> Option { - if self.first { - if self.k() > self.n() { - return None; - } - self.first = false; - } else if self.indices.is_empty() { + let next_comb_exists = self.next_combination_logic(); + if !next_comb_exists { return None; - } else { - // Scan from the end, looking for an index to increment - let mut i: usize = self.indices.len() - 1; + } - // Check if we need to consume more from the iterator - if self.indices[i] == self.pool.len() - 1 { - self.pool.get_next(); // may change pool size + // Create result vector based on the indices. If there is a combination it is always of length k. + let mut out = Vec::with_capacity(self.k()); + out.extend(self.indices.iter().map(|i| self.pool[*i].clone())); + Some(out) + } +} + +impl FusedIterator for Combinations +where + I: Iterator, + I::Item: Clone, +{ +} + +#[cfg(feature = "lending_iters")] +pub mod lending { + use super::*; + pub use lending_iterator::prelude::{gat, LendingIterator, LendingIteratorāļžItem}; + + /// Create a new `Combinations` `LendingIterator` from an iterator. + pub fn combinations_lending(iter: I, k: usize) -> Combinations + where + I: Iterator, + { + let mut pool = LazyBuffer::new(iter); + pool.prefill(k); + + Combinations { + indices: (0..k).collect(), + pool, + first: true, + phantom: std::marker::PhantomData::, + } + } + + #[gat] + impl LendingIterator for Combinations + where + I: Iterator, + I::Item: Clone, + { + type Item<'next> + where + Self: 'next, + = Combination<'next, I>; + fn next(&mut self) -> Option> { + let next_comb_exists = self.next_combination_logic(); + if !next_comb_exists { + return None; } - while self.indices[i] == i + self.pool.len() - self.indices.len() { - if i > 0 { - i -= 1; - } else { - // Reached the last combination - return None; - } + // Create result iterator based on the indices. + Some(Combination { + combinations: &*self, + index: 0, + }) + } + } + + impl Combinations + where + I: Iterator, + I::Item: Clone, + { + /// Applies `collect_vec()` on interior iterators and then again on the result. + #[cfg(feature = "use_alloc")] + pub fn collect_nested_vec(self) -> Vec> + where + Self: Sized, + { + use crate::Itertools; + + self.map_into_iter(|x| x.collect_vec()).collect_vec() + } + } + + // TODO Should take precedence over LendingIterator blanket impl for IntoIterator. How to do? + // Appears to works correctly given sufficient type hints/context such as a for loop. + impl IntoIterator for Combinations + where + I: Iterator, + I::Item: Clone, + { + type Item = Vec; + + type IntoIter = Combinations; + + /// The phantom marker changing is sufficient to change this into an iterator because it implements `Iterator` as well and `Lendingiterator` + #[inline] + fn into_iter(self) -> Self::IntoIter { + Combinations { + indices: self.indices, + pool: self.pool, + first: self.first, + phantom: core::marker::PhantomData::, } + } + } - // Increment index, and reset the ones to its right - self.indices[i] += 1; - for j in i+1..self.indices.len() { - self.indices[j] = self.indices[j - 1] + 1; + /// Iterator over the elements of a particular combination. This allows avoiding unnecessary heap allocations if the use of the combinations is not a `Vec`. + #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] + #[derive(Clone)] + pub struct Combination<'a, I> + where + I: Iterator, + I::Item: Clone, + { + combinations: &'a Combinations, + index: usize, // Index of the combinations indices + } + + impl<'a, I> fmt::Debug for Combination<'a, I> + where + I: Iterator + fmt::Debug, + I::Item: fmt::Debug, + I::Item: Clone, + { + // Allows implementing Debug for items that implement Debug without requiring Debug to use the iterator. + debug_fmt_fields!(Combination, combinations, index); + } + + impl<'a, I> Iterator for Combination<'a, I> + where + I: Iterator, + I::Item: Clone, + { + type Item = I::Item; + + // Simply increment through the indices that fetch values from the pool. + fn next(&mut self) -> Option { + if self.index >= self.combinations.indices.len() { + None + } else { + self.index += 1; + Some(self.combinations.pool[self.combinations.indices[self.index - 1]].clone()) } } - // Create result vector based on the indices - Some(self.indices.iter().map(|i| self.pool[*i].clone()).collect()) + fn size_hint(&self) -> (usize, Option) { + // If a combination is returned it is always of length k. + (self.combinations.k(), Some(self.combinations.k())) + } } } - -impl FusedIterator for Combinations - where I: Iterator, - I::Item: Clone -{} diff --git a/src/lib.rs b/src/lib.rs index f9cf0da7e..d3add6c00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -114,6 +114,8 @@ pub mod structs { pub use crate::adaptors::MultiProduct; #[cfg(feature = "use_alloc")] pub use crate::combinations::Combinations; + #[cfg(feature = "lending_iters")] + pub use crate::combinations::{Lending, NonLending, lending::{Combination, LendingIterator}}; #[cfg(feature = "use_alloc")] pub use crate::combinations_with_replacement::CombinationsWithReplacement; pub use crate::cons_tuples_impl::ConsTuples; @@ -1487,6 +1489,68 @@ pub trait Itertools : Iterator { combinations::combinations(self, k) } + /// Return an iterator adaptor that iterates over the `k`-length combinations of + /// the elements from an iterator. It is a `LendingIterator` of an `Iterator` of the elements. + /// + /// LendingIterator element type is another iterator of type [`Combination<'next, I>`](crate::Combination) where + /// `'next` is the lifetime passed to the `.next()` call. + /// + /// The element type of the interior iterator is the `Self::Item` this iterator is adapting. + /// + /// This means that no extra heap allocation for a vector is necessary and can be multiple times faster. + /// + /// Use `.into_iter()` (preferred) or `.map_into_iter(|x| x.collect_vec())` if you wish to convert it to a normal non lending `Iterator`. + /// This will lose the speed benefits of lending. + /// If used in a for loop implicit call to `.into_iter()` will allocate a vector on the heap like the non lending version. + /// Consider using `.for_each` or `while let Some(combination) = combinations_lending_iter.next()` instead. + /// + /// Must import `itertools::LendingIterator` type. If you do not you will encounter a + /// "no method named `next` found for struct `Combinations` in the current scope" error. + /// ``` + /// use itertools::{Itertools, LendingIterator}; + /// + /// let it = (1..5).combinations_lending(3); + /// itertools::assert_equal(it, vec![ + /// vec![1, 2, 3], + /// vec![1, 2, 4], + /// vec![1, 3, 4], + /// vec![2, 3, 4], + /// ]); + /// ``` + /// + /// Collection into a non-vec type is more efficient with this method: + /// ``` + /// use std::collections::HashSet; + /// use std::iter::FromIterator; + /// use itertools::{Itertools, LendingIterator}; + /// + /// let mut combinations_lending_iter = (0..20).combinations_lending(4); + /// let mut combinations_iter = (0..20).combinations(4); + /// while let Some(combination) = combinations_lending_iter.next() { + /// let combination_slow = combinations_iter.next().unwrap(); + /// assert_eq!(combination.collect::>(), HashSet::from_iter(combination_slow.into_iter())) + /// } + /// ``` + /// + /// Note: Combinations does not take into account the equality of the iterated values. + /// ``` + /// use itertools::{Itertools, LendingIterator}; + /// + /// let it = vec![1, 2, 2].into_iter().combinations_lending(2); + /// itertools::assert_equal(it, vec![ + /// vec![1, 2], // Note: these are the same + /// vec![1, 2], // Note: these are the same + /// vec![2, 2], + /// ]); + /// ``` + #[cfg(feature = "lending_iters")] + fn combinations_lending(self, k: usize) -> combinations::Combinations + where Self: Sized, + Self::Item: Clone + { + combinations::lending::combinations_lending(self, k) + } + /// Return an iterator that iterates over the `k`-length combinations of /// the elements from an iterator, with replacement. /// diff --git a/tests/quick.rs b/tests/quick.rs index 0adcf1ad7..6e11101ed 100644 --- a/tests/quick.rs +++ b/tests/quick.rs @@ -891,6 +891,16 @@ quickcheck! { } } +#[cfg(feature = "lending_iters")] +qc::quickcheck! { +fn combinations_lending_parity_to_non_lending(it: Iter, k: usize) -> () { + const NUM: usize = 18; + let it = it.take(NUM); // Worst case amount is NUM!/(NUM/2)!^2. at 20 is 1.8*10^5. At 19 is 9.2*10^4. 18 is 4862. + itertools::assert_equal(it.clone().combinations(k), it.clone().combinations_lending(k)); + assert_eq!(it.clone().combinations(k).collect_vec(), it.combinations_lending(k).collect_nested_vec()); + } +} + quickcheck! { fn size_pad_tail(it: Iter, pad: u8) -> bool { correct_size_hint(it.clone().pad_using(pad as usize, |_| 0)) && diff --git a/tests/test_std.rs b/tests/test_std.rs index 77207d87e..568cb20b1 100644 --- a/tests/test_std.rs +++ b/tests/test_std.rs @@ -909,6 +909,15 @@ fn combinations_zero() { it::assert_equal((0..0).combinations(0), vec![vec![]]); } +// Below shouldn't compile because of attempt to reference an already mut reference. +// #[test] +// fn combinations_lending_cant_double_mut() { +// let mut out = (1..4).combinations_lending(2); +// let mut combination = out.next().unwrap(); +// let combination2 = out.next().unwrap(); +// combination.next(); +// } + #[test] fn permutations_zero() { it::assert_equal((1..3).permutations(0), vec![vec![]]);