From 1c850ce53d1b7360eeac8f3e8dbc0e491e0bc07e Mon Sep 17 00:00:00 2001 From: Jack Wrenn Date: Sat, 29 Jun 2024 10:33:55 +0000 Subject: [PATCH 01/10] ci: Run most tests with miri This PR additionally reduces the size of some heavy tests to reduce their running time. We use nextest as our runner because it runs tests in parallel. However, since nextest does not currenlty run doc tests, we also run those separately. --- .github/workflows/ci.yml | 15 +++++++++++++++ tests/quick.rs | 4 ++++ tests/specializations.rs | 7 +++++++ tests/test_std.rs | 20 ++++++++++++-------- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4b8dcf189..8e2a9d66c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,6 +54,21 @@ jobs: - uses: dtolnay/rust-toolchain@stable - run: cargo test --all-features + miri: + runs-on: ubuntu-latest + env: + CARGO_TERM_COLOR: always + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly + components: miri + - uses: taiki-e/install-action@nextest + - run: | + cargo miri nextest run --all-features + cargo miri test --doc + check-format: name: check format runs-on: ubuntu-latest diff --git a/tests/quick.rs b/tests/quick.rs index 12b6b9d5a..672901e7c 100644 --- a/tests/quick.rs +++ b/tests/quick.rs @@ -2,7 +2,11 @@ //! and adaptors. //! //! In particular we test the tedious size_hint and exact size correctness. +//! +//! **NOTE:** Due to performance limitations, these tests are not run with miri! +//! They cannot be relied upon to discover soundness issues. +#![cfg(not(miri))] #![allow(deprecated, unstable_name_collisions)] use itertools::free::{ diff --git a/tests/specializations.rs b/tests/specializations.rs index ff3d5c19b..3e4831024 100644 --- a/tests/specializations.rs +++ b/tests/specializations.rs @@ -1,3 +1,10 @@ +//! Test specializations of methods with default impls match the behavior of the +//! default impls. +//! +//! **NOTE:** Due to performance limitations, these tests are not run with miri! +//! They cannot be relied upon to discover soundness issues. + +#![cfg(not(miri))] #![allow(unstable_name_collisions)] use itertools::Itertools; diff --git a/tests/test_std.rs b/tests/test_std.rs index 00246d506..f626a17d7 100644 --- a/tests/test_std.rs +++ b/tests/test_std.rs @@ -491,6 +491,7 @@ fn sorted_by() { it::assert_equal(v, vec![4, 3, 2, 1, 0]); } +#[cfg(not(miri))] qc::quickcheck! { fn k_smallest_range(n: i64, m: u16, k: u16) -> () { // u16 is used to constrain k and m to 0..2¹⁶, @@ -598,7 +599,9 @@ macro_rules! generic_test { }; } +#[cfg(not(miri))] generic_test!(k_smallest_sort, u8, u16, u32, u64, i8, i16, i32, i64); +#[cfg(not(miri))] generic_test!(k_smallest_by_sort, u8, u16, u32, u64, i8, i16, i32, i64); #[test] @@ -1055,8 +1058,8 @@ fn binomial(n: usize, k: usize) -> usize { #[test] fn combinations_range_count() { - for n in 0..=10 { - for k in 0..=10 { + for n in 0..=7 { + for k in 0..=7 { let len = binomial(n, k); let mut it = (0..n).combinations(k); assert_eq!(len, it.clone().count()); @@ -1077,7 +1080,7 @@ fn combinations_range_count() { #[test] fn combinations_inexact_size_hints() { - for k in 0..=10 { + for k in 0..=7 { let mut numbers = (0..18).filter(|i| i % 2 == 0); // 9 elements let mut it = numbers.clone().combinations(k); let real_n = numbers.clone().count(); @@ -1129,8 +1132,8 @@ fn permutations_zero() { #[test] fn permutations_range_count() { - for n in 0..=7 { - for k in 0..=7 { + for n in 0..=4 { + for k in 0..=4 { let len = if k <= n { (n - k + 1..=n).product() } else { 0 }; let mut it = (0..n).permutations(k); assert_eq!(len, it.clone().count()); @@ -1162,6 +1165,7 @@ fn permutations_overflowed_size_hints() { } #[test] +#[cfg(not(miri))] fn combinations_with_replacement() { // Pool smaller than n it::assert_equal((0..1).combinations_with_replacement(2), vec![vec![0, 0]]); @@ -1190,8 +1194,8 @@ fn combinations_with_replacement() { #[test] fn combinations_with_replacement_range_count() { - for n in 0..=7 { - for k in 0..=7 { + for n in 0..=4 { + for k in 0..=4 { let len = binomial(usize::saturating_sub(n + k, 1), k); let mut it = (0..n).combinations_with_replacement(k); assert_eq!(len, it.clone().count()); @@ -1236,7 +1240,7 @@ fn powerset() { assert_eq!((0..8).powerset().count(), 1 << 8); assert_eq!((0..16).powerset().count(), 1 << 16); - for n in 0..=10 { + for n in 0..=4 { let mut it = (0..n).powerset(); let len = 2_usize.pow(n); assert_eq!(len, it.clone().count()); From fe7ef10cf217bc5c38088831d35ce0342cd98363 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Sat, 27 Apr 2024 13:22:27 +0200 Subject: [PATCH 02/10] Add k_smallest_relaxed and variants This implements the algorithm described in [1] which consumes twice the amount of memory as the existing `k_smallest` algorithm but achieves linear time in the number of elements in the input. [1] https://quickwit.io/blog/top-k-complexity --- src/k_smallest.rs | 39 ++++++++++ src/lib.rs | 187 ++++++++++++++++++++++++++++++++++++++++++++++ tests/test_std.rs | 46 +++++++++++- 3 files changed, 270 insertions(+), 2 deletions(-) diff --git a/src/k_smallest.rs b/src/k_smallest.rs index 7b2f62ea1..083c1b00d 100644 --- a/src/k_smallest.rs +++ b/src/k_smallest.rs @@ -88,6 +88,45 @@ where storage } +pub(crate) fn k_smallest_relaxed_general(iter: I, k: usize, mut comparator: F) -> Vec +where + I: Iterator, + F: FnMut(&I::Item, &I::Item) -> Ordering, +{ + if k == 0 { + iter.last(); + return Vec::new(); + } + + let mut iter = iter.fuse(); + let mut buf = iter.by_ref().take(2 * k).collect::>(); + + if buf.len() < k { + buf.sort_unstable_by(&mut comparator); + return buf; + } + + buf.select_nth_unstable_by(k - 1, &mut comparator); + buf.truncate(k); + + iter.for_each(|val| { + if comparator(&val, &buf[k - 1]) != Ordering::Less { + return; + } + + buf.push(val); + + if buf.len() == 2 * k { + buf.select_nth_unstable_by(k - 1, &mut comparator); + buf.truncate(k); + } + }); + + buf.sort_unstable_by(&mut comparator); + buf.truncate(k); + buf +} + #[inline] pub(crate) fn key_to_cmp(mut key: F) -> impl FnMut(&T, &T) -> Ordering where diff --git a/src/lib.rs b/src/lib.rs index b98e03f29..10abe964a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3153,6 +3153,105 @@ pub trait Itertools: Iterator { self.k_smallest_by(k, k_smallest::key_to_cmp(key)) } + /// Sort the k smallest elements into a new iterator, in ascending order, relaxing the amount of memory required. + /// + /// **Note:** This consumes the entire iterator, and returns the result + /// as a new iterator that owns its elements. If the input contains + /// less than k elements, the result is equivalent to `self.sorted()`. + /// + /// This is guaranteed to use `2 * k * sizeof(Self::Item) + O(1)` memory + /// and `O(n + k log k)` time, with `n` the number of elements in the input, + /// meaning it uses more memory than the minimum obtained by [`k_smallest`](Itertools::k_smallest) + /// but achieves linear time in the number of elements. + /// + /// The sorted iterator, if directly collected to a `Vec`, is converted + /// without any extra copying or allocation cost. + /// + /// **Note:** This is functionally-equivalent to `self.sorted().take(k)` + /// but much more efficient. + /// + /// ``` + /// use itertools::Itertools; + /// + /// // A random permutation of 0..15 + /// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5]; + /// + /// let five_smallest = numbers + /// .into_iter() + /// .k_smallest_relaxed(5); + /// + /// itertools::assert_equal(five_smallest, 0..5); + /// ``` + #[cfg(feature = "use_alloc")] + fn k_smallest_relaxed(self, k: usize) -> VecIntoIter + where + Self: Sized, + Self::Item: Ord, + { + self.k_smallest_relaxed_by(k, Ord::cmp) + } + + /// Sort the k smallest elements into a new iterator using the provided comparison, relaxing the amount of memory required. + /// + /// The sorted iterator, if directly collected to a `Vec`, is converted + /// without any extra copying or allocation cost. + /// + /// This corresponds to `self.sorted_by(cmp).take(k)` in the same way that + /// [`k_smallest_relaxed`](Itertools::k_smallest_relaxed) corresponds to `self.sorted().take(k)`, + /// in both semantics and complexity. + /// + /// ``` + /// use itertools::Itertools; + /// + /// // A random permutation of 0..15 + /// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5]; + /// + /// let five_smallest = numbers + /// .into_iter() + /// .k_smallest_relaxed_by(5, |a, b| (a % 7).cmp(&(b % 7)).then(a.cmp(b))); + /// + /// itertools::assert_equal(five_smallest, vec![0, 7, 14, 1, 8]); + /// ``` + #[cfg(feature = "use_alloc")] + fn k_smallest_relaxed_by(self, k: usize, cmp: F) -> VecIntoIter + where + Self: Sized, + F: FnMut(&Self::Item, &Self::Item) -> Ordering, + { + k_smallest::k_smallest_relaxed_general(self, k, cmp).into_iter() + } + + /// Return the elements producing the k smallest outputs of the provided function, relaxing the amount of memory required. + /// + /// The sorted iterator, if directly collected to a `Vec`, is converted + /// without any extra copying or allocation cost. + /// + /// This corresponds to `self.sorted_by_key(key).take(k)` in the same way that + /// [`k_smallest_relaxed`](Itertools::k_smallest_relaxed) corresponds to `self.sorted().take(k)`, + /// in both semantics and complexity. + /// + /// ``` + /// use itertools::Itertools; + /// + /// // A random permutation of 0..15 + /// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5]; + /// + /// let five_smallest = numbers + /// .into_iter() + /// .k_smallest_relaxed_by_key(5, |n| (n % 7, *n)); + /// + /// itertools::assert_equal(five_smallest, vec![0, 7, 14, 1, 8]); + /// ``` + #[cfg(feature = "use_alloc")] + fn k_smallest_relaxed_by_key(self, k: usize, key: F) -> VecIntoIter + where + Self: Sized, + F: FnMut(&Self::Item) -> K, + K: Ord, + { + self.k_smallest_relaxed_by(k, k_smallest::key_to_cmp(key)) + } + /// Sort the k largest elements into a new iterator, in descending order. /// /// The sorted iterator, if directly collected to a `Vec`, is converted @@ -3243,6 +3342,94 @@ pub trait Itertools: Iterator { self.k_largest_by(k, k_smallest::key_to_cmp(key)) } + /// Sort the k largest elements into a new iterator, in descending order, relaxing the amount of memory required. + /// + /// The sorted iterator, if directly collected to a `Vec`, is converted + /// without any extra copying or allocation cost. + /// + /// It is semantically equivalent to [`k_smallest_relaxed`](Itertools::k_smallest_relaxed) + /// with a reversed `Ord`. + /// + /// ``` + /// use itertools::Itertools; + /// + /// // A random permutation of 0..15 + /// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5]; + /// + /// let five_largest = numbers + /// .into_iter() + /// .k_largest_relaxed(5); + /// + /// itertools::assert_equal(five_largest, vec![14, 13, 12, 11, 10]); + /// ``` + #[cfg(feature = "use_alloc")] + fn k_largest_relaxed(self, k: usize) -> VecIntoIter + where + Self: Sized, + Self::Item: Ord, + { + self.k_largest_relaxed_by(k, Self::Item::cmp) + } + + /// Sort the k largest elements into a new iterator using the provided comparison, relaxing the amount of memory required. + /// + /// The sorted iterator, if directly collected to a `Vec`, is converted + /// without any extra copying or allocation cost. + /// + /// Functionally equivalent to [`k_smallest_relaxed_by`](Itertools::k_smallest_relaxed_by) + /// with a reversed `Ord`. + /// + /// ``` + /// use itertools::Itertools; + /// + /// // A random permutation of 0..15 + /// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5]; + /// + /// let five_largest = numbers + /// .into_iter() + /// .k_largest_relaxed_by(5, |a, b| (a % 7).cmp(&(b % 7)).then(a.cmp(b))); + /// + /// itertools::assert_equal(five_largest, vec![13, 6, 12, 5, 11]); + /// ``` + #[cfg(feature = "use_alloc")] + fn k_largest_relaxed_by(self, k: usize, mut cmp: F) -> VecIntoIter + where + Self: Sized, + F: FnMut(&Self::Item, &Self::Item) -> Ordering, + { + self.k_smallest_relaxed_by(k, move |a, b| cmp(b, a)) + } + + /// Return the elements producing the k largest outputs of the provided function, relaxing the amount of memory required. + /// + /// The sorted iterator, if directly collected to a `Vec`, is converted + /// without any extra copying or allocation cost. + /// + /// Functionally equivalent to [`k_smallest_relaxed_by_key`](Itertools::k_smallest_relaxed_by_key) + /// with a reversed `Ord`. + /// + /// ``` + /// use itertools::Itertools; + /// + /// // A random permutation of 0..15 + /// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5]; + /// + /// let five_largest = numbers + /// .into_iter() + /// .k_largest_relaxed_by_key(5, |n| (n % 7, *n)); + /// + /// itertools::assert_equal(five_largest, vec![13, 6, 12, 5, 11]); + /// ``` + #[cfg(feature = "use_alloc")] + fn k_largest_relaxed_by_key(self, k: usize, key: F) -> VecIntoIter + where + Self: Sized, + F: FnMut(&Self::Item) -> K, + K: Ord, + { + self.k_largest_relaxed_by(k, k_smallest::key_to_cmp(key)) + } + /// Consumes the iterator and return an iterator of the last `n` elements. /// /// The iterator, if directly collected to a `VecDeque`, is converted diff --git a/tests/test_std.rs b/tests/test_std.rs index f626a17d7..ad391faad 100644 --- a/tests/test_std.rs +++ b/tests/test_std.rs @@ -528,6 +528,42 @@ qc::quickcheck! { it::assert_equal(largest_by, sorted_largest.clone()); it::assert_equal(largest_by_key, sorted_largest); } + + fn k_smallest_relaxed_range(n: i64, m: u16, k: u16) -> () { + // u16 is used to constrain k and m to 0..2¹⁶, + // otherwise the test could use too much memory. + let (k, m) = (k as usize, m as u64); + + let mut v: Vec<_> = (n..n.saturating_add(m as _)).collect(); + // Generate a random permutation of n..n+m + v.shuffle(&mut thread_rng()); + + // Construct the right answers for the top and bottom elements + let mut sorted = v.clone(); + sorted.sort(); + // how many elements are we checking + let num_elements = min(k, m as _); + + // Compute the top and bottom k in various combinations + let sorted_smallest = sorted[..num_elements].iter().cloned(); + let smallest = v.iter().cloned().k_smallest_relaxed(k); + let smallest_by = v.iter().cloned().k_smallest_relaxed_by(k, Ord::cmp); + let smallest_by_key = v.iter().cloned().k_smallest_relaxed_by_key(k, |&x| x); + + let sorted_largest = sorted[sorted.len() - num_elements..].iter().rev().cloned(); + let largest = v.iter().cloned().k_largest_relaxed(k); + let largest_by = v.iter().cloned().k_largest_relaxed_by(k, Ord::cmp); + let largest_by_key = v.iter().cloned().k_largest_relaxed_by_key(k, |&x| x); + + // Check the variations produce the same answers and that they're right + it::assert_equal(smallest, sorted_smallest.clone()); + it::assert_equal(smallest_by, sorted_smallest.clone()); + it::assert_equal(smallest_by_key, sorted_smallest); + + it::assert_equal(largest, sorted_largest.clone()); + it::assert_equal(largest_by, sorted_largest.clone()); + it::assert_equal(largest_by_key, sorted_largest); + } } #[derive(Clone, Debug)] @@ -572,8 +608,11 @@ where I::Item: Ord + Debug, { let j = i.clone(); + let i1 = i.clone(); + let j1 = i.clone(); let k = k as usize; - it::assert_equal(i.k_smallest(k), j.sorted().take(k)) + it::assert_equal(i.k_smallest(k), j.sorted().take(k)); + it::assert_equal(i1.k_smallest_relaxed(k), j1.sorted().take(k)); } // Similar to `k_smallest_sort` but for our custom heap implementation. @@ -583,8 +622,11 @@ where I::Item: Ord + Debug, { let j = i.clone(); + let i1 = i.clone(); + let j1 = i.clone(); let k = k as usize; - it::assert_equal(i.k_smallest_by(k, Ord::cmp), j.sorted().take(k)) + it::assert_equal(i.k_smallest_by(k, Ord::cmp), j.sorted().take(k)); + it::assert_equal(i1.k_smallest_relaxed_by(k, Ord::cmp), j1.sorted().take(k)); } macro_rules! generic_test { From e41a62bc3172cf1ea4afd8d580964691dcb50077 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Sat, 27 Apr 2024 21:04:22 +0200 Subject: [PATCH 03/10] Add benchmarks for k_smallest implementation variants. --- Cargo.toml | 6 ++++- benches/k_smallest.rs | 61 +++++++++++++++++++++++++++++++++++++++++++ src/k_smallest.rs | 1 + 3 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 benches/k_smallest.rs diff --git a/Cargo.toml b/Cargo.toml index 800614657..6ec88a9e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,7 @@ either = { version = "1.0", default-features = false } [dev-dependencies] rand = "0.7" -criterion = "0.4.0" +criterion = { version = "0.4.0", features = ["html_reports"] } paste = "1.0.0" # Used in test_std to instantiate generic tests permutohedron = "0.2" quickcheck = { version = "0.9", default_features = false } @@ -75,3 +75,7 @@ harness = false [[bench]] name = "specializations" harness = false + +[[bench]] +name = "k_smallest" +harness = false diff --git a/benches/k_smallest.rs b/benches/k_smallest.rs new file mode 100644 index 000000000..509ed7f8a --- /dev/null +++ b/benches/k_smallest.rs @@ -0,0 +1,61 @@ +use criterion::{black_box, criterion_group, criterion_main, Bencher, BenchmarkId, Criterion}; +use itertools::Itertools; +use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; + +fn strict(b: &mut Bencher, (k, vals): &(usize, &Vec)) { + b.iter(|| black_box(vals.iter()).k_smallest(*k)) +} + +fn relaxed(b: &mut Bencher, (k, vals): &(usize, &Vec)) { + b.iter(|| black_box(vals.iter()).k_smallest_relaxed(*k)) +} + +fn ascending(n: usize) -> Vec { + (0..n).collect() +} + +fn random(n: usize) -> Vec { + let mut vals = (0..n).collect_vec(); + vals.shuffle(&mut StdRng::seed_from_u64(42)); + vals +} + +fn descending(n: usize) -> Vec { + (0..n).rev().collect() +} + +fn k_smallest(c: &mut Criterion, order: &str, vals: fn(usize) -> Vec) { + let mut g = c.benchmark_group(format!("k-smallest/{order}")); + + for log_n in 20..23 { + let n = 1 << log_n; + + let vals = vals(n); + + for log_k in 7..10 { + let k = 1 << log_k; + + let params = format!("{log_n}/{log_k}"); + let input = (k, &vals); + g.bench_with_input(BenchmarkId::new("strict", ¶ms), &input, strict); + g.bench_with_input(BenchmarkId::new("relaxed", ¶ms), &input, relaxed); + } + } + + g.finish() +} + +fn k_smallest_asc(c: &mut Criterion) { + k_smallest(c, "asc", ascending); +} + +fn k_smallest_rand(c: &mut Criterion) { + k_smallest(c, "rand", random); +} + +fn k_smallest_desc(c: &mut Criterion) { + k_smallest(c, "desc", descending); +} + +criterion_group!(benches, k_smallest_asc, k_smallest_rand, k_smallest_desc); +criterion_main!(benches); diff --git a/src/k_smallest.rs b/src/k_smallest.rs index 083c1b00d..7e4ace262 100644 --- a/src/k_smallest.rs +++ b/src/k_smallest.rs @@ -114,6 +114,7 @@ where return; } + assert_ne!(buf.len(), buf.capacity()); buf.push(val); if buf.len() == 2 * k { From 30678931c1595eee7bb5106015146d1d114750ec Mon Sep 17 00:00:00 2001 From: Philippe-Cholet <44676486+Philippe-Cholet@users.noreply.github.com> Date: Wed, 3 Jul 2024 09:09:13 +0200 Subject: [PATCH 04/10] Fix/ignore warnings within doctests --- src/free.rs | 6 ++++-- src/kmerge_impl.rs | 1 + src/lib.rs | 31 ++++++++++++++++++++----------- src/merge_join.rs | 1 + src/zip_eq_impl.rs | 1 + 5 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/free.rs b/src/free.rs index 8d0bcf3ea..4c6820543 100644 --- a/src/free.rs +++ b/src/free.rs @@ -36,7 +36,7 @@ pub use crate::zip_eq_impl::zip_eq; /// ``` /// use itertools::intersperse; /// -/// itertools::assert_equal(intersperse((0..3), 8), vec![0, 8, 1, 8, 2]); +/// itertools::assert_equal(intersperse(0..3, 8), vec![0, 8, 1, 8, 2]); /// ``` pub fn intersperse(iterable: I, element: I::Item) -> Intersperse where @@ -55,7 +55,7 @@ where /// use itertools::intersperse_with; /// /// let mut i = 10; -/// itertools::assert_equal(intersperse_with((0..3), || { i -= 1; i }), vec![0, 9, 1, 8, 2]); +/// itertools::assert_equal(intersperse_with(0..3, || { i -= 1; i }), vec![0, 9, 1, 8, 2]); /// assert_eq!(i, 8); /// ``` pub fn intersperse_with(iterable: I, element: F) -> IntersperseWith @@ -75,6 +75,7 @@ where /// /// for (i, elt) in enumerate(&[1, 2, 3]) { /// /* loop body */ +/// # let _ = (i, elt); /// } /// ``` pub fn enumerate(iterable: I) -> iter::Enumerate @@ -93,6 +94,7 @@ where /// /// for elt in rev(&[1, 2, 3]) { /// /* loop body */ +/// # let _ = elt; /// } /// ``` pub fn rev(iterable: I) -> iter::Rev diff --git a/src/kmerge_impl.rs b/src/kmerge_impl.rs index 13c935b28..9ea73f9e8 100644 --- a/src/kmerge_impl.rs +++ b/src/kmerge_impl.rs @@ -134,6 +134,7 @@ impl bool> KMergePredicate for F { /// /// for elt in kmerge(vec![vec![0, 2, 4], vec![1, 3, 5], vec![6, 7]]) { /// /* loop body */ +/// # let _ = elt; /// } /// ``` pub fn kmerge(iterable: I) -> KMerge<::IntoIter> diff --git a/src/lib.rs b/src/lib.rs index 10abe964a..0f5ea8ee2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ #![warn(missing_docs, clippy::default_numeric_fallback)] #![crate_name = "itertools"] #![cfg_attr(not(feature = "use_std"), no_std)] +#![doc(test(attr(deny(warnings), allow(deprecated, unstable_name_collisions))))] //! Extra iterator adaptors, functions and macros. //! @@ -8,6 +9,7 @@ //! the [`Itertools`] trait: //! //! ``` +//! # #[allow(unused_imports)] //! use itertools::Itertools; //! ``` //! @@ -29,6 +31,7 @@ //! //! for elt in interleave(&[1, 2, 3], &[2, 3, 4]) { //! /* loop body */ +//! # let _ = elt; //! } //! ``` //! @@ -248,6 +251,7 @@ mod ziptuple; /// // from (0, 0, 0), (0, 0, 1), .., (0, 1, 0), (0, 1, 1), .. etc until (3, 3, 3) /// for (i, j, k) in iproduct!(0..4, 0..4, 0..4) { /// // .. +/// # let _ = (i, j, k); /// } /// # } /// ``` @@ -375,16 +379,16 @@ macro_rules! izip { /// /// Invocations of `chain!` with one argument expand to [`arg.into_iter()`](IntoIterator): /// ``` -/// use std::{ops::Range, slice}; +/// use std::ops::Range; /// use itertools::chain; -/// let _: as IntoIterator>::IntoIter = chain!((2..6),); // trailing comma optional! +/// let _: as IntoIterator>::IntoIter = chain!(2..6,); // trailing comma optional! /// let _: <&[_] as IntoIterator>::IntoIter = chain!(&[2, 3, 4]); /// ``` /// /// Invocations of `chain!` with multiple arguments [`.into_iter()`](IntoIterator) each /// argument, and then [`chain`] them together: /// ``` -/// use std::{iter::*, ops::Range, slice}; +/// use std::{iter::*, slice}; /// use itertools::{assert_equal, chain}; /// /// // e.g., this: @@ -1902,7 +1906,7 @@ pub trait Itertools: Iterator { /// use itertools::Itertools; /// /// let input = vec![vec![1], vec![3, 2, 1]]; - /// let it = input.into_iter().update(|mut v| v.push(0)); + /// let it = input.into_iter().update(|v| v.push(0)); /// itertools::assert_equal(it, vec![vec![1, 0], vec![3, 2, 1, 0]]); /// ``` fn update(self, updater: F) -> Update @@ -2162,7 +2166,7 @@ pub trait Itertools: Iterator { /// ``` /// use itertools::Itertools; /// - /// let mut iter = "αβγ".chars().dropping(2); + /// let iter = "αβγ".chars().dropping(2); /// itertools::assert_equal(iter, "γ".chars()); /// ``` /// @@ -2246,14 +2250,17 @@ pub trait Itertools: Iterator { /// /// fn process_dir_entries(entries: &[fs::DirEntry]) { /// // ... + /// # let _ = entries; /// } /// - /// fn do_stuff() -> std::io::Result<()> { + /// fn do_stuff() -> io::Result<()> { /// let entries: Vec<_> = fs::read_dir(".")?.try_collect()?; /// process_dir_entries(&entries); /// /// Ok(()) /// } + /// + /// # let _ = do_stuff; /// ``` fn try_collect(self) -> Result where @@ -2404,6 +2411,7 @@ pub trait Itertools: Iterator { /// accum = f(accum, 1); /// accum = f(accum, 2); /// accum = f(accum, 3); + /// # let _ = accum; /// ``` /// /// With a `start` value of 0 and an addition as folding function, @@ -2554,16 +2562,16 @@ pub trait Itertools: Iterator { /// assert_eq!((1..8).map(|x| x.to_string()).tree_reduce(f), /// Some(String::from("f(f(f(1, 2), f(3, 4)), f(f(5, 6), 7))"))); /// - /// // Like fold1, an empty iterator produces None + /// // Like reduce, an empty iterator produces None /// assert_eq!((0..0).tree_reduce(|x, y| x * y), None); /// - /// // tree_reduce matches fold1 for associative operations... + /// // tree_reduce matches reduce for associative operations... /// assert_eq!((0..10).tree_reduce(|x, y| x + y), - /// (0..10).fold1(|x, y| x + y)); + /// (0..10).reduce(|x, y| x + y)); /// /// // ...but not for non-associative ones /// assert_ne!((0..10).tree_reduce(|x, y| x - y), - /// (0..10).fold1(|x, y| x - y)); + /// (0..10).reduce(|x, y| x - y)); /// /// let mut total_len_reduce = 0; /// let reduce_res = (1..100).map(|x| x.to_string()) @@ -4350,7 +4358,7 @@ pub trait Itertools: Iterator { /// # Examples /// ``` /// # use itertools::Itertools; - /// let counts = [1, 1, 1, 3, 3, 5].into_iter().counts(); + /// let counts = [1, 1, 1, 3, 3, 5].iter().counts(); /// assert_eq!(counts[&1], 3); /// assert_eq!(counts[&3], 2); /// assert_eq!(counts[&5], 1); @@ -4376,6 +4384,7 @@ pub trait Itertools: Iterator { /// # use itertools::Itertools; /// struct Character { /// first_name: &'static str, + /// # #[allow(dead_code)] /// last_name: &'static str, /// } /// diff --git a/src/merge_join.rs b/src/merge_join.rs index c0de35f90..5f4a605fa 100644 --- a/src/merge_join.rs +++ b/src/merge_join.rs @@ -31,6 +31,7 @@ pub type Merge = MergeBy; /// /// for elt in merge(&[1, 2, 3], &[2, 3, 4]) { /// /* loop body */ +/// # let _ = elt; /// } /// ``` pub fn merge( diff --git a/src/zip_eq_impl.rs b/src/zip_eq_impl.rs index 6d3b68296..3240a40eb 100644 --- a/src/zip_eq_impl.rs +++ b/src/zip_eq_impl.rs @@ -21,6 +21,7 @@ pub struct ZipEq { /// let data = [1, 2, 3, 4, 5]; /// for (a, b) in zip_eq(&data[..data.len() - 1], &data[1..]) { /// /* loop body */ +/// # let _ = (a, b); /// } /// ``` pub fn zip_eq(i: I, j: J) -> ZipEq From a20444fb87729b36f97db0137c78c4473b84044c Mon Sep 17 00:00:00 2001 From: Alex Macleod Date: Fri, 12 Jul 2024 17:26:14 +0000 Subject: [PATCH 05/10] Allow `Q: ?Sized` in `Itertools::contains` --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 0f5ea8ee2..a8de36749 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2068,7 +2068,7 @@ pub trait Itertools: Iterator { where Self: Sized, Self::Item: Borrow, - Q: PartialEq, + Q: PartialEq + ?Sized, { self.any(|x| x.borrow() == query) } From a4a82e4b97eb76687c2a57cdfd2e5343ff507827 Mon Sep 17 00:00:00 2001 From: Ciel <47144701+Ciel-MC@users.noreply.github.com> Date: Mon, 15 Jul 2024 16:52:26 +0800 Subject: [PATCH 06/10] Give `take_while_inclusive` a more intuitive doc alias --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index a8de36749..0082aa989 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1594,6 +1594,7 @@ pub trait Itertools: Iterator { /// .collect(); /// let expected: Vec<_> = vec![1, 2, 3].into_iter().map(NoCloneImpl).collect(); /// assert_eq!(filtered, expected); + #[doc(alias = "take_until")] fn take_while_inclusive(self, accept: F) -> TakeWhileInclusive where Self: Sized, From 4f49f2636aa95283cc4c921e6a3e04efe317fda5 Mon Sep 17 00:00:00 2001 From: philipp Date: Sun, 18 Aug 2024 10:47:02 +0200 Subject: [PATCH 07/10] Fix clippy 1.80 lints If documentation should be part of bullet point, we need to indent. --- src/diff.rs | 2 +- src/lib.rs | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/diff.rs b/src/diff.rs index c6d99657e..df88d8032 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -1,7 +1,7 @@ //! "Diff"ing iterators for caching elements to sequential collections without requiring the new //! elements' iterator to be `Clone`. //! -//! - [`Diff`] (produced by the [`diff_with`] function) +//! [`Diff`] (produced by the [`diff_with`] function) //! describes the difference between two non-`Clone` iterators `I` and `J` after breaking ASAP from //! a lock-step comparison. diff --git a/src/lib.rs b/src/lib.rs index 0082aa989..0465d0801 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -427,13 +427,13 @@ macro_rules! chain { /// This trait defines a number of methods. They are divided into two groups: /// /// * *Adaptors* take an iterator and parameter as input, and return -/// a new iterator value. These are listed first in the trait. An example -/// of an adaptor is [`.interleave()`](Itertools::interleave) +/// a new iterator value. These are listed first in the trait. An example +/// of an adaptor is [`.interleave()`](Itertools::interleave) /// /// * *Regular methods* are those that don't return iterators and instead -/// return a regular value of some other kind. -/// [`.next_tuple()`](Itertools::next_tuple) is an example and the first regular -/// method in the list. +/// return a regular value of some other kind. +/// [`.next_tuple()`](Itertools::next_tuple) is an example and the first regular +/// method in the list. pub trait Itertools: Iterator { // adaptors From e2a0a6e9759f9f8851aa8735ca79e7498dfdf8e8 Mon Sep 17 00:00:00 2001 From: philipp Date: Wed, 21 Aug 2024 20:14:43 +0200 Subject: [PATCH 08/10] ConsTuples is a MapSpecialCase --- src/cons_tuples_impl.rs | 45 ++++++++++++----------------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/src/cons_tuples_impl.rs b/src/cons_tuples_impl.rs index 9ab309478..7e86260b4 100644 --- a/src/cons_tuples_impl.rs +++ b/src/cons_tuples_impl.rs @@ -1,24 +1,15 @@ +use crate::adaptors::map::{MapSpecialCase, MapSpecialCaseFn}; + macro_rules! impl_cons_iter( ($_A:ident, $_B:ident, ) => (); // stop ($A:ident, $($B:ident,)*) => ( impl_cons_iter!($($B,)*); #[allow(non_snake_case)] - impl Iterator for ConsTuples - where Iter: Iterator, - { - type Item = ($($B,)* X, ); - fn next(&mut self) -> Option { - self.iter.next().map(|(($($B,)*), x)| ($($B,)* x, )) - } - - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } - fn fold(self, accum: Acc, mut f: Fold) -> Acc - where Fold: FnMut(Acc, Self::Item) -> Acc, - { - self.iter.fold(accum, move |acc, (($($B,)*), x)| f(acc, ($($B,)* x, ))) + impl<$($B),*, X> MapSpecialCaseFn<(($($B,)*), X)> for ConsTuplesFn { + type Out = ($($B,)* X, ); + fn call(&mut self, (($($B,)*), X): (($($B,)*), X)) -> Self::Out { + ($($B,)* X, ) } } ); @@ -26,33 +17,23 @@ macro_rules! impl_cons_iter( impl_cons_iter!(A, B, C, D, E, F, G, H, I, J, K, L,); +#[derive(Debug, Clone)] +pub struct ConsTuplesFn; + /// An iterator that maps an iterator of tuples like /// `((A, B), C)` to an iterator of `(A, B, C)`. /// /// Used by the `iproduct!()` macro. -#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] -#[derive(Debug)] -pub struct ConsTuples -where - I: Iterator, -{ - iter: I, -} - -impl Clone for ConsTuples -where - I: Clone + Iterator, -{ - clone_fields!(iter); -} +pub type ConsTuples = MapSpecialCase; /// Create an iterator that maps for example iterators of /// `((A, B), C)` to `(A, B, C)`. -pub fn cons_tuples(iterable: I) -> ConsTuples +pub fn cons_tuples(iterable: I) -> ConsTuples where - I: IntoIterator, + I: IntoIterator, { ConsTuples { iter: iterable.into_iter(), + f: ConsTuplesFn, } } From 91f96182488788e01a40742c5269a0a3d02da49f Mon Sep 17 00:00:00 2001 From: Axel Karjalainen Date: Wed, 7 Aug 2024 22:24:51 +0300 Subject: [PATCH 09/10] Add note about Result to `find_or_last` and `find_or_first` See GH-983 --- src/lib.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 0465d0801..604529a59 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1999,6 +1999,15 @@ pub trait Itertools: Iterator { /// assert_eq!(numbers.iter().find_or_last(|&&x| x > 5), Some(&4)); /// assert_eq!(numbers.iter().find_or_last(|&&x| x > 2), Some(&3)); /// assert_eq!(std::iter::empty::().find_or_last(|&x| x > 5), None); + /// + /// // An iterator of Results can return the first Ok or the last Err: + /// let input = vec![Err(()), Ok(11), Err(()), Ok(22)]; + /// assert_eq!(input.into_iter().find_or_last(Result::is_ok), Some(Ok(11))); + /// + /// let input: Vec> = vec![Err(11), Err(22)]; + /// assert_eq!(input.into_iter().find_or_last(Result::is_ok), Some(Err(22))); + /// + /// assert_eq!(std::iter::empty::>().find_or_last(Result::is_ok), None); /// ``` fn find_or_last

(mut self, mut predicate: P) -> Option where @@ -2027,6 +2036,15 @@ pub trait Itertools: Iterator { /// assert_eq!(numbers.iter().find_or_first(|&&x| x > 5), Some(&1)); /// assert_eq!(numbers.iter().find_or_first(|&&x| x > 2), Some(&3)); /// assert_eq!(std::iter::empty::().find_or_first(|&x| x > 5), None); + /// + /// // An iterator of Results can return the first Ok or the first Err: + /// let input = vec![Err(()), Ok(11), Err(()), Ok(22)]; + /// assert_eq!(input.into_iter().find_or_first(Result::is_ok), Some(Ok(11))); + /// + /// let input: Vec> = vec![Err(11), Err(22)]; + /// assert_eq!(input.into_iter().find_or_first(Result::is_ok), Some(Err(11))); + /// + /// assert_eq!(std::iter::empty::>().find_or_first(Result::is_ok), None); /// ``` fn find_or_first

(mut self, mut predicate: P) -> Option where From a31e14fc73398bb83482f1ec8fdf59dc584f7561 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet <44676486+Philippe-Cholet@users.noreply.github.com> Date: Wed, 3 Jul 2024 22:04:19 +0200 Subject: [PATCH 10/10] Increase discoverability of `merge_join_by` (Unix-like `comm`) --- src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 604529a59..9670390a9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1091,7 +1091,9 @@ pub trait Itertools: Iterator { /// let b = (0..10).step_by(3); /// /// itertools::assert_equal( - /// a.merge_join_by(b, |i, j| i.cmp(j)), + /// // This performs a diff in the style of the Unix command comm(1), + /// // generalized to arbitrary types rather than text. + /// a.merge_join_by(b, Ord::cmp), /// vec![Both(0, 0), Left(2), Right(3), Left(4), Both(6, 6), Left(1), Right(9)] /// ); /// ``` @@ -1123,6 +1125,7 @@ pub trait Itertools: Iterator { /// ); /// ``` #[inline] + #[doc(alias = "comm")] fn merge_join_by(self, other: J, cmp_fn: F) -> MergeJoinBy where J: IntoIterator,