diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..4c9e1ff --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +text eol=crlf diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..22b1e8d --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +version: 2 +updates: + - package-ecosystem: "cargo" + directory: "/" + schedule: + interval: "monthly" + open-pull-requests-limit: 10 + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 0000000..0bfc55b --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,29 @@ +name: Rust + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + toolchain: + - stable + - beta + - nightly + + steps: + - uses: actions/checkout@v3 + - run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }} + - name: Build + run: cargo build --verbose + - name: Run tests + run: cargo test --verbose diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8eb581d --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +/Cargo.lock +/.idea diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..fdcc052 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,40 @@ +[workspace] +resolver = "2" +members = [ + "frozen-collections", + "frozen-collections-core", + "frozen-collections-macros", + "benches", + "examples", +] + +[workspace.package] +version = "0.1.0" +edition = "2021" +categories = ["data-structures"] +keywords = ["map", "set", "collection"] +repository = "https://github.com/geeknoid/frozen-collections" +license = "MIT" +readme = "README.md" +authors = ["Martin Taillefer "] + +[workspace.lints.clippy] +pedantic = { level = "warn", priority = -1 } +correctness = { level = "warn", priority = -1 } +complexity = { level = "warn", priority = -1 } +perf = { level = "warn", priority = -1 } +cargo = { level = "warn", priority = -1 } +nursery = { level = "warn", priority = -1 } +single_call_fn = "allow" +min_ident_chars = "allow" +unused_self = "allow" +transmute_ptr_to_ptr = "allow" +wildcard_imports = "allow" +too_many_lines = "allow" +multiple_crate_versions = "allow" +missing_transmute_annotations = "allow" +from-iter-instead-of-collect = "allow" + +[profile.bench] +codegen-units = 1 +lto = "fat" diff --git a/Ideas_and_Todos.md b/Ideas_and_Todos.md new file mode 100644 index 0000000..c415746 --- /dev/null +++ b/Ideas_and_Todos.md @@ -0,0 +1,40 @@ +# Ideas + +* Create a benchmark suite to try and come up with better numbers for the various threshold and percentages + used in the analysis code. + +- In the hash code analyzer, beyond the number of collisions, the logic should factor in how many empty slots are in the + hash table. A lot of empty slots can slow things down due to cache misses, in addition to wasting memory. + +- Consider some hint supplied by the caller for how much time/effort to put into analysis. + +- Consider providing an offline tool that performs the analysis on the input data. Being offline, the + analysis could be more exhaustive. The analysis would produce a little blob of state which would be fed + into the code to configure things without running analysis code at runtime. + +- Consider the use of perfect hashing or minimal perfect hashing. + +- Consider introducing dynamic benchmarking as part of the analysis phase. We could build + several prototype collections, measure effective perf, and then use the benchmark results to + decide on the optimal collection configuration. + +- The facades need to support some notion of Borrow. This is particularly important to + allowing collections where K=String to be queried with &str instead. Unfortunately, given the + gymnastics the code is doing internally around hashing, it's not obvious how this feature + could be added. + +- Add a specialized set implementation for integer types which uses a bit vector for storage. + +- Evaluate hash functions to find the highest performance one + +- Bypass hashing for short left-slices or right-slices. When the slices are + short enough, we should just take the character values as the hash code. + +- For integer keys, consider expanding the model for ranges to include ranges with holes. + Basically, the array would hold Option instead of just V. + +# TODOs + +- Tests +- Make it so the macros don't need a type indicator for strings and ints +- Perf analysis diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b5df8da --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +Copyright (c) 2024 Martin Taillefer + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..231095c --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# Frozen Collections + +[![Test Status](https://github.com/geeknoid/frozen-collections/workflows/Tests/badge.svg?event=push)](https://github.com/geeknoid/frozen-collections/actions) +[![Crate](https://img.shields.io/crates/v/frozen-collections.svg)](https://crates.io/crates/frozen-collections) +[![API](https://docs.rs/frozen-collections/badge.svg)](https://docs.rs/frozen-collections) + +Frozen collections are designed to trade creation time for improved +read performance. They are ideal for use with long-lasting collections +which get initialized when an application starts and remain unchanged +permanently, or at least extended periods of time. This is a common +pattern in service applications. + +During creation, the input data is analyzed to determine the best layout and algorithm for the specific case. +This analysis can take some time, but the value in spending this time up front +is that the collections provide blazingly fast read-time performance. diff --git a/benches/Cargo.toml b/benches/Cargo.toml new file mode 100644 index 0000000..ceb3b0d --- /dev/null +++ b/benches/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "benches" +version = "0.0.0" +publish = false +edition = "2021" + +[dev-dependencies] +frozen-collections = { path = "../frozen-collections" } +rand = "0.9.0-alpha.1" +criterion = "0.5.1" + +[[bench]] +name = "frozen_map" +path = "frozen_map.rs" +harness = false + +[lints] +workspace = true diff --git a/benches/frozen_map.rs b/benches/frozen_map.rs new file mode 100644 index 0000000..46d0cf5 --- /dev/null +++ b/benches/frozen_map.rs @@ -0,0 +1,204 @@ +use std::collections::HashMap; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use frozen_collections::{frozen_map, FrozenMap}; + +fn u32_keys(c: &mut Criterion) { + let mut group = c.benchmark_group("u32_keys"); + + let map = HashMap::from([(0u32, 1), (2, 3), (4, 5), (6, 7), (8, 9)]); + group.bench_function("HashMap", |b| { + b.iter(|| { + _ = black_box(map.get(&4)); + _ = black_box(map.get(&10)); + }); + }); + + group.bench_function("FrozenMap", |b| { + let map = FrozenMap::try_from([(0u32, 1), (2, 3), (4, 5), (6, 7), (8, 9)]).unwrap(); + b.iter(|| { + _ = black_box(map.get(&4)); + _ = black_box(map.get(&10)); + }); + }); + + group.bench_function("frozen_map!", |b| { + let map = frozen_map!(u32, 0: 1, 2: 3, 4: 5, 6: 7, 8: 9); + + b.iter(|| { + _ = black_box(map.get(&4)); + _ = black_box(map.get(&10)); + }); + }); + + group.finish(); +} + +fn u32_keys_range(c: &mut Criterion) { + let mut group = c.benchmark_group("u32_keys_range"); + + let map = HashMap::from([(0u32, 0), (1, 1), (2, 2), (3, 3), (4, 4)]); + group.bench_function("HashMap", |b| { + b.iter(|| { + _ = black_box(map.get(&4)); + _ = black_box(map.get(&10)); + }); + }); + + group.bench_function("FrozenMap", |b| { + let map = FrozenMap::try_from([(0u32, 0), (1, 1), (2, 2), (3, 3), (4, 4)]).unwrap(); + b.iter(|| { + _ = black_box(map.get(&4)); + _ = black_box(map.get(&10)); + }); + }); + + group.bench_function("frozen_map!", |b| { + let map = frozen_map!(u32, 0: 0, 1: 1, 2: 2, 3: 3, 4: 4); + b.iter(|| { + _ = black_box(map.get(&4)); + _ = black_box(map.get(&10)); + }); + }); + + group.finish(); +} + +fn i32_keys(c: &mut Criterion) { + let mut group = c.benchmark_group("i32_keys"); + + let map = HashMap::from([(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)]); + group.bench_function("HashMap", |b| { + b.iter(|| { + _ = black_box(map.get(&4)); + _ = black_box(map.get(&10)); + }); + }); + + group.bench_function("FrozenMap", |b| { + let map = FrozenMap::try_from([(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)]).unwrap(); + b.iter(|| { + _ = black_box(map.get(&4)); + _ = black_box(map.get(&10)); + }); + }); + + group.bench_function("frozen_map!", |b| { + let map = frozen_map!(i32, 0: 1, 2: 3, 4: 5, 6: 7, 8: 9); + b.iter(|| { + _ = black_box(map.get(&4)); + _ = black_box(map.get(&10)); + }); + }); + + group.finish(); +} + +fn string_keys_length(c: &mut Criterion) { + let mut group = c.benchmark_group("string_keys_length"); + let kvs = [ + ("Red".to_string(), 1), + ("Green".to_string(), 2), + ("Blue".to_string(), 3), + ("Cyan".to_string(), 4), + ("Magenta".to_string(), 5), + ("Purple".to_string(), 6), + ]; + + let blue = "Blue".to_string(); + let black = "Black".to_string(); + + let map = HashMap::from(kvs.clone()); + group.bench_function("HashMap", |b| { + b.iter(|| { + _ = black_box(map.get(&blue)); + _ = black_box(map.get(&black)); + }); + }); + + group.bench_function("FrozenMap", |b| { + let map = FrozenMap::try_from(kvs.clone()).unwrap(); + b.iter(|| { + _ = black_box(map.get(&blue)); + _ = black_box(map.get(&black)); + }); + }); + + group.bench_function("frozen_map!", |b| { + let map = frozen_map!( + &str, + "Red": 1, + "Green": 2, + "Blue": 3, + "Cyan": 4, + "Magenta": 5, + "Purple": 6); + + b.iter(|| { + _ = black_box(map.get(&blue)); + _ = black_box(map.get(&black)); + }); + }); + + group.finish(); +} + +fn string_keys_subslice(c: &mut Criterion) { + let mut group = c.benchmark_group("string_keys_subslice"); + let kvs = [ + ("abcdefghi0".to_string(), 1), + ("abcdefghi1".to_string(), 2), + ("abcdefghi2".to_string(), 3), + ("abcdefghi3".to_string(), 4), + ("abcdefghi4".to_string(), 5), + ("abcdefghi5".to_string(), 6), + ]; + + let blue = "Blue".to_string(); + let black = "Black".to_string(); + + let map = HashMap::from(kvs.clone()); + group.bench_function("HashMap", |b| { + b.iter(|| { + _ = black_box(map.get(&blue)); + _ = black_box(map.get(&black)); + }); + }); + + group.bench_function("FrozenMap", |b| { + let map = FrozenMap::try_from(kvs.clone()).unwrap(); + b.iter(|| { + _ = black_box(map.get(&blue)); + _ = black_box(map.get(&black)); + }); + }); + + group.bench_function("frozen_map!", |b| { + let map = frozen_map!( + &str, + "abcdefghi0": 1, + "abcdefghi1": 2, + "abcdefghi2": 3, + "abcdefghi3": 4, + "abcdefghi4": 5, + "abcdefghi5": 6, + ); + b.iter(|| { + _ = black_box(map.get(&blue)); + _ = black_box(map.get(&black)); + }); + }); + + group.finish(); +} + +criterion_group!( + benches, + string_keys_length, + string_keys_subslice, + u32_keys, + u32_keys_range, + i32_keys +); +criterion_main!(benches); diff --git a/examples/Cargo.toml b/examples/Cargo.toml new file mode 100644 index 0000000..071efe0 --- /dev/null +++ b/examples/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "examples" +version = "0.0.0" +publish = false +edition = "2021" + +# If you copy one of the examples into a new project, you should be using +# [dependencies] instead, and delete the **path**. +[dev-dependencies] +frozen-collections = { path = "../frozen-collections" } + +[[example]] +name = "frozen_map" +path = "frozen_map.rs" + +[[example]] +name = "frozen_set" +path = "frozen_set.rs" + +[lints] +workspace = true diff --git a/examples/frozen_map.rs b/examples/frozen_map.rs new file mode 100644 index 0000000..7ce32a7 --- /dev/null +++ b/examples/frozen_map.rs @@ -0,0 +1,106 @@ +use frozen_collections::*; + +fn main() { + use_macro(); + use_function(); +} + +fn use_macro() { + // Create a frozen map using the frozen_map! macro. This results in + // the best performance, but it requires that all the keys be known + // at compile time + let fm = frozen_map!( + &str, + "first_key": (1, "first_value"), + "second_key": (2, "second_value"), + "third_key": (3, "third_value"), + "fourth_key": (4, "fourth_value"), + "fifth_key": (5, "fifth_value"), + "sixth_key": (6, "sixth_value"), + "seventh_key": (7, "seventh_value"), + ); + + assert_eq!(7, fm.len()); + assert!(!fm.is_empty()); + assert!(fm.contains_key("first_key")); + assert!(!fm.contains_key("eight_key")); + assert_eq!(Some(&(2, "second_value")), fm.get("second_key")); + assert_eq!( + Some((&"third_key".to_string(), &(3, "third_value"))), + fm.get_key_value("third_key") + ); + + // print out the map's contents + println!("MAP CONTENTS"); + println!(" {fm:?}"); + + // print out all the entries, in random order + println!("MAP ENTRIES"); + for entry in &fm { + println!(" {entry:?}"); + } + + // print out all the keys, in random order + println!("MAP KEYS"); + for key in fm.keys() { + println!(" {key:?}"); + } + + // print out all the values, in random order + println!("MAP VALUES"); + for value in fm.values() { + println!(" {value:?}"); + } +} + +fn use_function() { + // Create a frozen map using the FrozenMap type. This is slightly + // slower than using the frozen_map! macro, but is necessary when the + // keys are not known at compile time. + + // The key/value pairs we're loading into the frozen map. Imagine these + // are determined at runtime. + let v = vec![ + ("first_key", (1, "first_value")), + ("second_key", (2, "second_value")), + ("third_key", (3, "third_value")), + ("fourth_key", (4, "fourth_value")), + ("fifth_key", (5, "fifth_value")), + ("sixth_key", (6, "sixth_value")), + ("seventh_key", (7, "seventh_value")), + ]; + + let fm = FrozenMap::try_from(v).unwrap(); + + assert_eq!(7, fm.len()); + assert!(!fm.is_empty()); + assert!(fm.contains_key(&"first_key")); + assert!(!fm.contains_key(&"eight_key")); + assert_eq!(Some(&(2, "second_value")), fm.get(&"second_key")); + assert_eq!( + Some((&"third_key", &(3, "third_value"))), + fm.get_key_value(&"third_key") + ); + + // print out the map's contents + println!("MAP CONTENTS"); + println!(" {fm:?}"); + + // print out all the entries, in random order + println!("MAP ENTRIES"); + for entry in &fm { + println!(" {entry:?}"); + } + + // print out all the keys, in random order + println!("MAP KEYS"); + for key in fm.keys() { + println!(" {key:?}"); + } + + // print out all the values, in random order + println!("MAP VALUES"); + for value in fm.values() { + println!(" {value:?}"); + } +} diff --git a/examples/frozen_set.rs b/examples/frozen_set.rs new file mode 100644 index 0000000..453d698 --- /dev/null +++ b/examples/frozen_set.rs @@ -0,0 +1,72 @@ +use frozen_collections::*; + +fn main() { + use_macro(); + use_function(); +} + +fn use_macro() { + // Create a frozen set using the frozen_set! macro. This results in + // the best performance, but it requires that all the values be known + // at compile time + let fs = frozen_set!( + &str, + "first_value", + "second_value", + "third_value", + "fourth_value", + "fifth_value", + "sixth_value", + "seventh_value", + ); + + assert_eq!(7, fs.len()); + assert!(!fs.is_empty()); + assert!(fs.contains("first_value")); + assert!(!fs.contains("eight_value")); + + // print out the set's contents + println!("SET CONTENTS"); + println!(" {fs:?}"); + + // print out all the values, in random order + println!("SET VALUES"); + for value in &fs { + println!(" {value:?}"); + } +} + +fn use_function() { + // Create a frozen set using the FrozenSet type. This is slightly + // slower than using the frozen_set! macro, but is necessary when the + // values are not known at compile time. + + // The values we're loading into the frozen set. Imagine these + // are determined at runtime. + let v = vec![ + "first_value", + "second_value", + "third_value", + "fourth_value", + "fifth_value", + "sixth_value", + "seventh_value", + ]; + + let fs = FrozenSet::try_from(v).unwrap(); + + assert_eq!(7, fs.len()); + assert!(!fs.is_empty()); + assert!(fs.contains(&"first_value")); + assert!(!fs.contains(&"eight_value")); + + // print out the set's contents + println!("SET CONTENTS"); + println!(" {fs:?}"); + + // print out all the values, in random order + println!("SET VALUES"); + for value in &fs { + println!(" {value:?}"); + } +} diff --git a/frozen-collections-core/Cargo.toml b/frozen-collections-core/Cargo.toml new file mode 100644 index 0000000..2e78491 --- /dev/null +++ b/frozen-collections-core/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "frozen-collections-core" +description = "Implementation logic for frozen collections." +readme = "README.md" +authors.workspace = true +version.workspace = true +edition.workspace = true +categories.workspace = true +keywords.workspace = true +repository.workspace = true +license.workspace = true + +[dependencies] +num-traits = "0.2.19" +bitvec = "1.0.1" +syn = { version = "2.0.68", features = ["extra-traits", "full", "fold"] } +quote = "1.0.36" +proc-macro2 = "1.0.86" +proc-macro-error = "1.0.4" + +[dev-dependencies] +rand = "0.9.0-alpha.1" +criterion = "0.5.1" + +[lints] +workspace = true diff --git a/frozen-collections-core/README.md b/frozen-collections-core/README.md new file mode 100644 index 0000000..aa557f4 --- /dev/null +++ b/frozen-collections-core/README.md @@ -0,0 +1,6 @@ +# frozen-collections-core + +This crate contains the implementation logic for the +frozen-collections crate. Users of frozen collections +should generally depend on the frozen-collections crate +instead of this one. diff --git a/frozen-collections-core/src/analyzers/duplicate_key_detector.rs b/frozen-collections-core/src/analyzers/duplicate_key_detector.rs new file mode 100644 index 0000000..ab69c1c --- /dev/null +++ b/frozen-collections-core/src/analyzers/duplicate_key_detector.rs @@ -0,0 +1,56 @@ +use std::collections::HashSet; +use std::hash::Hash; + +/// Look for duplicate keys. +/// +/// # Errors +/// +/// This fails if any keys appear twice in the input. +pub fn check_duplicate_keys<'a, K, I>(keys: I) -> Result<(), &'static str> +where + K: Hash + Eq + 'a, + I: Iterator, +{ + let mut s = HashSet::new(); + + for key in keys { + if !s.insert(key) { + return Err("duplicate keys detected in input payload"); + } + } + + Ok(()) +} +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_no_duplicates() { + let keys = [1, 2, 3, 4, 5]; + let result = check_duplicate_keys(keys.iter()); + assert!(result.is_ok()); + } + + #[test] + fn test_with_duplicates() { + let keys = [1, 2, 3, 3, 4]; + let result = check_duplicate_keys(keys.iter()); + assert!(result.is_err()); + assert_eq!(result, Err("duplicate keys detected in input payload")); + } + + #[test] + fn test_empty_input() { + let keys: Vec = vec![]; + let result = check_duplicate_keys(keys.iter()); + assert!(result.is_ok()); + } + + #[test] + fn test_single_element() { + let keys = [1]; + let result = check_duplicate_keys(keys.iter()); + assert!(result.is_ok()); + } +} diff --git a/frozen-collections-core/src/analyzers/hash_code_analyzer.rs b/frozen-collections-core/src/analyzers/hash_code_analyzer.rs new file mode 100644 index 0000000..e75fc94 --- /dev/null +++ b/frozen-collections-core/src/analyzers/hash_code_analyzer.rs @@ -0,0 +1,244 @@ +use bitvec::prelude::*; + +/// How to treat a collection of hash codes for best performance. +pub struct HashCodeAnalysisResult { + /// The recommended hash table size. This is not necessarily optimal, but it's good enough. + pub num_hash_slots: usize, + + /// The number of collisions when using the recommended table size. + pub num_hash_collisions: usize, +} + +/// Look for an optimal hash table size for a given set of hash codes. +#[allow(clippy::cast_possible_truncation)] +pub fn analyze_hash_codes(hash_codes: I) -> HashCodeAnalysisResult +where + I: Iterator, +{ + // What is a satisfactory rate of hash collisions? + const ACCEPTABLE_COLLISION_PERCENTAGE: usize = 5; + + // By how much do we shrink the acceptable # collisions per iteration? + const ACCEPTABLE_COLLISION_PERCENTAGE_OF_REDUCTION: usize = 20; + + // thresholds to categorize input sizes + const MEDIUM_INPUT_SIZE_THRESHOLD: usize = 128; + const LARGE_INPUT_SIZE_THRESHOLD: usize = 1000; + + // amount by which the table can be larger than the input + const MAX_SMALL_INPUT_MULTIPLIER: usize = 16; + const MAX_MEDIUM_INPUT_MULTIPLIER: usize = 10; + const MAX_LARGE_INPUT_MULTIPLIER: usize = 3; + + // Table of prime numbers to use as hash table sizes for medium-sized inputs + const PRIMES: [usize; 60] = [ + 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919, 1103, 1327, 1597, 1931, 2333, 2801, + 3371, 4049, 4861, 5839, 7013, 8419, 10_103, 12_143, 14_591, 17_519, 21_023, 25_229, 30_293, + 36_353, 43_627, 52_361, 62_851, 75_431, 90_523, 108_631, 130_363, 156_437, 187_751, + 225_307, 270_371, 324_449, 389_357, 467_237, 560_689, 672_827, 807_403, 968_897, 1_162_687, + 1_395_263, 1_674_319, 2_009_191, 2_411_033, 2_893_249, 3_471_899, 4_166_287, 4_999_559, + 5_999_471, 7_199_369, + ]; + + let hash_codes: Vec = hash_codes.collect(); + let mut acceptable_collisions = if hash_codes.len() < MEDIUM_INPUT_SIZE_THRESHOLD { + // for small enough inputs, we try for perfection + 0 + } else { + (hash_codes.len() / 100) * ACCEPTABLE_COLLISION_PERCENTAGE + }; + + // the minimum table size we can tolerate, given the acceptable collision rate + let min_size = hash_codes.len() - acceptable_collisions; + + // the maximum table size we consider, given a scaled growth factor for different input sizes + let max_size = if hash_codes.len() < MEDIUM_INPUT_SIZE_THRESHOLD { + hash_codes.len() * MAX_SMALL_INPUT_MULTIPLIER + } else if hash_codes.len() < LARGE_INPUT_SIZE_THRESHOLD { + hash_codes.len() * MAX_MEDIUM_INPUT_MULTIPLIER + } else { + hash_codes.len() * MAX_LARGE_INPUT_MULTIPLIER + }; + + let mut use_table: BitVec = BitVec::with_capacity(max_size); + use_table.resize(max_size, false); + + let mut best_size = 0; + let mut best_num_collisions = hash_codes.len(); + + let mut sizes = Vec::new(); + + // always try the exact size first to optimally handle cases where the keys are unique integers + sizes.push(hash_codes.len()); + + if max_size < MEDIUM_INPUT_SIZE_THRESHOLD { + sizes.extend(min_size..=max_size); + } else if min_size < PRIMES[PRIMES.len() - 1] { + // For medium input sizes, we only consider a predefined set of prime numbers rather than being exhaustive as in the + // case for smaller input sizes. This is to constrain the total amount of compute time that gets spent in this code. + sizes.extend(PRIMES); + } else { + // For very large input sizes, we try a few multiples of the input size + let mut size = min_size; + let increment = hash_codes.len() / 3; + while size <= max_size { + sizes.push(size); + + size += increment; + + // find next prime + size |= 1; + while !is_prime(size as u64) { + size += 2; + } + } + } + + for size in sizes { + if size < min_size { + continue; + } + + if size > max_size { + break; + } + + use_table.fill(false); + let mut num_collisions = 0; + + for code in &hash_codes { + let slot = (code % (size as u64)) as usize; + if use_table[slot] { + num_collisions += 1; + if num_collisions >= best_num_collisions { + break; + } + } else { + use_table.set(slot, true); + } + } + + if num_collisions < best_num_collisions { + if best_size == 0 || num_collisions <= acceptable_collisions { + best_num_collisions = num_collisions; + best_size = size; + } + + if num_collisions <= acceptable_collisions { + // we have a winner! + break; + } + } + + if acceptable_collisions > 0 { + // The larger the table, the fewer collisions we tolerate. The idea + // here is to reduce the risk of a table getting very big and still + // having a relatively high count of collisions. + acceptable_collisions = + (acceptable_collisions / 100) * ACCEPTABLE_COLLISION_PERCENTAGE_OF_REDUCTION; + } + } + + HashCodeAnalysisResult { + num_hash_slots: best_size, + num_hash_collisions: best_num_collisions, + } +} + +#[allow(clippy::cast_possible_truncation)] +#[allow(clippy::cast_sign_loss)] +#[allow(clippy::cast_precision_loss)] +fn is_prime(candidate: u64) -> bool { + if candidate % 3 == 0 || candidate % 5 == 0 { + return false; + } + + let limit = f64::sqrt(candidate as f64) as u64; + let mut divisor = 3; + while divisor <= limit { + if candidate % divisor == 0 { + return false; + } + + divisor += 2; + } + + true +} + +#[cfg(test)] +mod tests { + use rand::rngs::StdRng; + use rand::{Rng, SeedableRng}; + + use super::*; + + struct AnalysisTestCase { + num_hash_codes: usize, + randomize_hash_codes: bool, + expected_num_hash_slots: usize, + expected_num_hash_collisions: usize, + } + + #[test] + #[allow(clippy::used_underscore_binding)] + fn analyze_hash_codes_test() { + const ANALYSIS_TEST_CASES: [AnalysisTestCase; 5] = [ + AnalysisTestCase { + num_hash_codes: 0, + randomize_hash_codes: true, + expected_num_hash_slots: 0, + expected_num_hash_collisions: 0, + }, + AnalysisTestCase { + num_hash_codes: 2, + randomize_hash_codes: true, + expected_num_hash_slots: 2, + expected_num_hash_collisions: 0, + }, + AnalysisTestCase { + num_hash_codes: 1000, + randomize_hash_codes: true, + expected_num_hash_slots: 1000, + expected_num_hash_collisions: 359, + }, + AnalysisTestCase { + num_hash_codes: 8_000_000, + randomize_hash_codes: false, + expected_num_hash_slots: 8_000_000, + expected_num_hash_collisions: 0, + }, + AnalysisTestCase { + num_hash_codes: 8_000_000, + randomize_hash_codes: true, + expected_num_hash_slots: 8_000_000, + expected_num_hash_collisions: 2_941_169, + }, + ]; + + for (count, case) in ANALYSIS_TEST_CASES.iter().enumerate() { + println!("Test case #{count}"); + + let mut rng = StdRng::seed_from_u64(42); + let mut hash_codes = Vec::with_capacity(case.num_hash_codes); + + if case.randomize_hash_codes { + for _ in 0..case.num_hash_codes { + hash_codes.push(rng.gen()); + } + } else { + for count in 0..case.num_hash_codes { + hash_codes.push(count as u64); + } + } + + let result = analyze_hash_codes(hash_codes.iter().copied()); + + assert_eq!(case.expected_num_hash_slots, result.num_hash_slots); + assert_eq!( + case.expected_num_hash_collisions, + result.num_hash_collisions + ); + } + } +} diff --git a/frozen-collections-core/src/analyzers/int_key_analyzer.rs b/frozen-collections-core/src/analyzers/int_key_analyzer.rs new file mode 100644 index 0000000..5c5774e --- /dev/null +++ b/frozen-collections-core/src/analyzers/int_key_analyzer.rs @@ -0,0 +1,68 @@ +use num_traits::PrimInt; + +/// How to treat integer keys for best performance. +#[derive(PartialEq, Eq, Debug)] +pub enum IntKeyAnalysisResult { + /// Normal hashing + Normal, + + /// All keys are in a continuous range + Range, +} + +/// Look for well-known patterns we can optimize for with integer map keys. +pub fn analyze_int_keys(keys: I) -> IntKeyAnalysisResult +where + K: PrimInt, + I: Iterator, +{ + let mut min = K::max_value(); + let mut max = K::min_value(); + let mut count = K::zero(); + for key in keys { + min = min.min(key); + max = max.max(key); + count = count + K::one(); + } + + if count == K::zero() { + IntKeyAnalysisResult::Normal + } else if max.sub(min) == count - K::one() { + IntKeyAnalysisResult::Range + } else { + IntKeyAnalysisResult::Normal + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_analyze_int_keys_normal() { + let keys = vec![1, 3, 5, 7, 9]; + let result = analyze_int_keys(keys.into_iter()); + assert_eq!(result, IntKeyAnalysisResult::Normal); + } + + #[test] + fn test_analyze_int_keys_range() { + let keys = vec![1, 2, 3, 4, 5]; + let result = analyze_int_keys(keys.into_iter()); + assert_eq!(result, IntKeyAnalysisResult::Range); + } + + #[test] + fn test_analyze_int_keys_empty() { + let keys: Vec = vec![]; + let result = analyze_int_keys(keys.into_iter()); + assert_eq!(result, IntKeyAnalysisResult::Normal); + } + + #[test] + fn test_analyze_int_keys_single() { + let keys = vec![1]; + let result = analyze_int_keys(keys.into_iter()); + assert_eq!(result, IntKeyAnalysisResult::Range); + } +} diff --git a/frozen-collections-core/src/analyzers/mod.rs b/frozen-collections-core/src/analyzers/mod.rs new file mode 100644 index 0000000..3a83ab5 --- /dev/null +++ b/frozen-collections-core/src/analyzers/mod.rs @@ -0,0 +1,11 @@ +//! Logic to analyze collection input data to assess the best implementation choices. + +pub use duplicate_key_detector::*; +pub use hash_code_analyzer::*; +pub use int_key_analyzer::*; +pub use slice_key_analyzer::*; + +mod duplicate_key_detector; +mod hash_code_analyzer; +mod int_key_analyzer; +mod slice_key_analyzer; diff --git a/frozen-collections-core/src/analyzers/slice_key_analyzer.rs b/frozen-collections-core/src/analyzers/slice_key_analyzer.rs new file mode 100644 index 0000000..674ab2d --- /dev/null +++ b/frozen-collections-core/src/analyzers/slice_key_analyzer.rs @@ -0,0 +1,295 @@ +use std::cmp::{max, min}; +use std::collections::{HashMap, HashSet}; +use std::hash::{BuildHasher, Hash}; + +/// How to treat keys which are slices for best performance. +#[derive(PartialEq, Eq, Debug)] +pub enum SliceKeyAnalysisResult { + /// Normal hashing + Normal, + + /// Hash left-justified subslices + LeftHandSubslice { + subslice_index: usize, + subslice_len: usize, + }, + + /// Hash right-justified subslices + RightHandSubslice { + subslice_index: usize, + subslice_len: usize, + }, + + /// Use the length of the slices as hash codes, instead of hashing the slices + Length, +} + +/// Look for well-known patterns we can optimize for map keys. +/// +/// The idea here is to find the shortest subslice across all the input slices which are maximally unique. A corresponding +/// subslice range is then applied to incoming slices being hashed to perform lookups. Keeping the subslices as +/// short as possible minimizes the number of bytes involved in hashing, speeding up the whole process. +/// +/// What we do here is pretty simple. We loop over the input slices, looking for the shortest subslice with a good +/// enough uniqueness factor. We look at all the slices both left-justified and right-justified as this maximizes +/// the opportunities to find unique subslices, especially in the case of many slices with the same prefix or suffix. +/// +/// We also analyze the length of the input slices. If the length of the slices are sufficiently unique, +/// we can totally skip hashing and just use their lengths as hash codes. +pub fn analyze_slice_keys<'a, K, I, BH>(keys: I, bh: &BH) -> SliceKeyAnalysisResult +where + K: Hash + 'a, + I: Iterator, + BH: BuildHasher, +{ + let keys = keys.collect(); + + // first, see if we can just use slice lengths as hash codes + let result = analyze_lengths(&keys); + + if result == SliceKeyAnalysisResult::Normal { + // if we can't use slice lengths, look for suitable subslices + analyze_subslices(&keys, bh) + } else { + result + } +} + +/// See if we can use slice lengths instead of hashing +fn analyze_lengths(keys: &Vec<&[T]>) -> SliceKeyAnalysisResult { + const MAX_IDENTICAL_LENGTHS: usize = 3; + const MAX_SLICES: usize = 255; + + if keys.len() > MAX_SLICES { + // if there are a lof of slices, assume we'll get too many length collisions + return SliceKeyAnalysisResult::Normal; + } + + let mut lengths = HashMap::new(); + for s in keys { + let v = lengths.get(&s.len()); + if let Some(count) = v { + if count == &MAX_IDENTICAL_LENGTHS { + return SliceKeyAnalysisResult::Normal; + } + + lengths.insert(s.len(), count + 1); + } else { + lengths.insert(s.len(), 1); + } + } + + SliceKeyAnalysisResult::Length +} + +/// See if we can use subslices to reduce the time spent hashing +#[allow(clippy::cast_possible_truncation)] +#[allow(clippy::cast_sign_loss)] +#[allow(clippy::cast_precision_loss)] +fn analyze_subslices(keys: &Vec<&[T]>, bh: &BH) -> SliceKeyAnalysisResult +where + T: Hash, + BH: BuildHasher, +{ + // constrain the amount of work we do in this code + const MAX_SUBSLICE_LENGTH_LIMIT: usize = 16; + const ACCEPTABLE_DUPLICATE_PERCENT: f64 = 0.05; + + let mut min_len = usize::MAX; + let mut max_len = 0; + for s in keys { + min_len = min(min_len, s.len()); + max_len = max(max_len, s.len()); + } + + // tolerate a certain amount of duplicate subslices + let acceptable_duplicates = ((keys.len() as f64) * ACCEPTABLE_DUPLICATE_PERCENT) as usize; + + // this set is reused for each call to is_sufficiently_unique + let mut set = HashSet::with_capacity(keys.len()); + + // for each subslice length, prefer the shortest length that provides enough uniqueness + let max_subslice_len = min(min_len, MAX_SUBSLICE_LENGTH_LIMIT); + + let mut subslice_len = 1; + while subslice_len <= max_subslice_len { + // For each index, get a uniqueness factor for the left-justified subslices. + // If any is above our threshold, we're done. + let mut subslice_index = 0; + while subslice_index <= min_len - subslice_len { + if is_sufficiently_unique( + keys, + subslice_index, + subslice_len, + true, + &mut set, + acceptable_duplicates, + bh, + ) { + return if subslice_len == max_len { + SliceKeyAnalysisResult::Normal + } else { + SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index, + subslice_len, + } + }; + } + + subslice_index += 1; + } + + // There were no left-justified slices of this length available. + // If all the slices are of the same length, then just checking left-justification is sufficient. + // But if any slices are of different lengths, then we'll get different alignments for left- vs + // right-justified subslices, and so we also check right-justification. + if min_len != max_len { + // For each index, get a uniqueness factor for the right-justified subslices. + // If any is above our threshold, we're done. + subslice_index = 0; + while subslice_index <= min_len - subslice_len { + if is_sufficiently_unique( + keys, + subslice_index, + subslice_len, + false, + &mut set, + acceptable_duplicates, + bh, + ) { + return SliceKeyAnalysisResult::RightHandSubslice { + subslice_index, + subslice_len, + }; + } + + subslice_index += 1; + } + } + + subslice_len += 1; + } + + // could not find a subslice that was good enough. + SliceKeyAnalysisResult::Normal +} + +fn is_sufficiently_unique( + keys: &Vec<&[T]>, + subslice_index: usize, + subslice_len: usize, + left_justified: bool, + set: &mut HashSet, + acceptable_duplicates: usize, + bh: &BH, +) -> bool +where + T: Hash, + BH: BuildHasher, +{ + set.clear(); + + let mut acceptable_duplicates = acceptable_duplicates; + for s in keys { + let sub = if left_justified { + &s[subslice_index..subslice_index + subslice_len] + } else { + let start = s.len() - subslice_index - 1; + &s[start..start + subslice_len] + }; + + if !set.insert(bh.hash_one(sub)) { + if acceptable_duplicates == 0 { + return false; + } + + acceptable_duplicates -= 1; + } + } + + true +} + +#[cfg(test)] +mod tests { + use std::hash::RandomState; + + use super::*; + + struct AnalysisTestCase<'a> { + slices: &'a [&'a str], + expected: SliceKeyAnalysisResult, + } + + #[test] + fn analyze_string_keys_test() { + const ANALYSIS_TEST_CASES: [AnalysisTestCase; 9] = [ + AnalysisTestCase { + slices: &[ + "AAA", "ABB", "ACC", "ADD", "AEE", "AFF", "AGG", "AHH", "AII", "AJJ", "AKK", + "ALL", "AMM", "ANN", "AOO", "APP", "AQQ", "ARR", "ASS", "ATT", "AUU", + ], + expected: SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index: 1, + subslice_len: 1, + }, + }, + AnalysisTestCase { + slices: &["A00", "B00", "C00", "D00"], + expected: SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index: 0, + subslice_len: 1, + }, + }, + AnalysisTestCase { + slices: &["A", "B", "C", "D", "E2"], + expected: SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index: 0, + subslice_len: 1, + }, + }, + AnalysisTestCase { + slices: &["A", "B", "C", "D", "E2", ""], + expected: SliceKeyAnalysisResult::Normal, + }, + AnalysisTestCase { + slices: &["XA", "XB", "XC", "XD", "XE2"], + expected: SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index: 1, + subslice_len: 1, + }, + }, + AnalysisTestCase { + slices: &["XXA", "XXB", "XXC", "XXD", "XXX", "XXXE"], + expected: SliceKeyAnalysisResult::RightHandSubslice { + subslice_index: 0, + subslice_len: 1, + }, + }, + AnalysisTestCase { + slices: &["ABC", "DEFG", "HIJKL", "MNOPQR", "STUVWX", "YZ"], + expected: SliceKeyAnalysisResult::Length, + }, + AnalysisTestCase { + slices: &[ + "ABC", "DEFG", "HIJKL", "MNOPQR", "STUVWX", "YZ", "D2", "D3", "D4", + ], + expected: SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index: 1, + subslice_len: 1, + }, + }, + AnalysisTestCase { + slices: &["AAA", "1AA", "A1A", "AA1", "BBB", "1BB", "B1B", "BB1"], + expected: SliceKeyAnalysisResult::Normal, + }, + ]; + + for (count, case) in ANALYSIS_TEST_CASES.into_iter().enumerate() { + println!("Test case #{count}"); + + let keys = case.slices.iter().map(|x| x.as_bytes()); + assert_eq!(case.expected, analyze_slice_keys(keys, &RandomState::new())); + } + } +} diff --git a/frozen-collections-core/src/lib.rs b/frozen-collections-core/src/lib.rs new file mode 100644 index 0000000..8cde6b9 --- /dev/null +++ b/frozen-collections-core/src/lib.rs @@ -0,0 +1,11 @@ +//! Implementation crate for the frozen collections. +//! +//! Application code should generally not interact with +//! this crate directly. Please use +//! the `frozen-collections` crate instead. + +pub mod analyzers; +pub mod macros; +pub mod specialized_maps; +pub mod specialized_sets; +pub mod traits; diff --git a/frozen-collections-core/src/macros/frozen_map.rs b/frozen-collections-core/src/macros/frozen_map.rs new file mode 100644 index 0000000..24b3a0d --- /dev/null +++ b/frozen-collections-core/src/macros/frozen_map.rs @@ -0,0 +1,260 @@ +use std::cmp::PartialEq; +use std::fmt::Display; +use std::hash::RandomState; +use std::str::FromStr; + +use bitvec::macros::internal::funty::Fundamental; +use num_traits::PrimInt; +use proc_macro2::TokenStream; +use quote::{format_ident, quote, ToTokens}; +use syn::parse::{Parse, ParseStream}; +use syn::{parse2, Error, Expr, LitInt, LitStr, Token, Type}; + +use crate::analyzers::{analyze_int_keys, IntKeyAnalysisResult}; +use crate::analyzers::{analyze_slice_keys, SliceKeyAnalysisResult}; + +struct Entry(Expr, Expr); + +struct Map { + ty: Type, + entries: Vec, +} + +impl ToTokens for Entry { + fn to_tokens(&self, tokens: &mut TokenStream) { + let key = self.0.clone(); + let value = self.1.clone(); + + tokens.extend(quote!(#key, #value)); + } +} + +impl Parse for Map { + fn parse(input: ParseStream) -> syn::Result { + let mut entries = vec![]; + + let ty = input.parse::()?; + input.parse::()?; + + while !input.is_empty() { + let key = input.parse::()?; + input.parse::()?; + let value = input.parse::()?; + + entries.push(Entry(key, value)); + + if input.peek(Token![,]) { + input.parse::()?; + } + } + + Ok(Self { ty, entries }) + } +} + +#[derive(PartialEq)] +enum KeyVariety { + Common, + Integer, + String, +} + +/// Implementation logic for the `frozen_map!` macro. +#[must_use] +#[allow(clippy::module_name_repetitions)] +pub fn frozen_map_macro(args: TokenStream) -> TokenStream { + frozen_map_macro_internal(args).unwrap_or_else(|error| error.to_compile_error()) +} + +fn frozen_map_macro_internal(args: TokenStream) -> Result { + // proc_marco2 version of "parse_macro_input!(input as ParsedMap)" + let input = parse2::(args)?; + let mut entries = input.entries; + + if entries.len() < 3 { + return Ok(quote!( + ::frozen_collections::specialized_maps::ScanningMap::try_from(vec![ + #( + (#entries), + )* + ]) + .unwrap() + )); + } + + let mut ty = input.ty; + let type_name = format!("{}", ty.to_token_stream()); + + let mut variety = KeyVariety::Integer; + let mut int_analysis = IntKeyAnalysisResult::Normal; + let mut slice_analysis = SliceKeyAnalysisResult::Normal; + let mut include_payload_size = true; + + match type_name.as_str() { + "u8" => int_analysis = process_int_keys::(&entries)?, + "i8" => int_analysis = process_int_keys::(&entries)?, + "u16" => int_analysis = process_int_keys::(&entries)?, + "i16" => int_analysis = process_int_keys::(&entries)?, + "u32" => int_analysis = process_int_keys::(&entries)?, + "i32" => int_analysis = process_int_keys::(&entries)?, + "u64" => int_analysis = process_int_keys::(&entries)?, + "i64" => int_analysis = process_int_keys::(&entries)?, + "u128" => int_analysis = process_int_keys::(&entries)?, + "i128" => int_analysis = process_int_keys::(&entries)?, + + "& str" => { + variety = KeyVariety::String; + slice_analysis = + process_string_keys(entries.iter().map(|x| x.0.to_token_stream())).unwrap(); + + let mut copy = Vec::with_capacity(entries.len()); + for kv in entries { + let original = kv.0.to_token_stream(); + let modified = quote!(String::from(#original)); + copy.push(Entry(parse2::(modified)?, kv.1)); + } + + entries = copy; + ty = parse2::(quote!(String))?; + } + + _ => variety = KeyVariety::Common, + } + + let map_type = match variety { + KeyVariety::Integer => { + if int_analysis == IntKeyAnalysisResult::Range { + include_payload_size = false; + format_ident!("{}", "IntegerRangeMap") + } else { + format_ident!("{}", "IntegerMap") + } + } + + KeyVariety::String => match slice_analysis { + SliceKeyAnalysisResult::Normal => format_ident!("{}", "CommonMap"), + SliceKeyAnalysisResult::Length => format_ident!("{}", "LengthMap"), + + SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index: _, + subslice_len: _, + } => format_ident!("{}", "LeftSliceMap"), + + SliceKeyAnalysisResult::RightHandSubslice { + subslice_index: _, + subslice_len: _, + } => format_ident!("{}", "RightSliceMap"), + }, + + KeyVariety::Common => format_ident!("{}", "CommonMap"), + }; + + let payload_size = format_ident!( + "{}", + if entries.len() <= u8::MAX.as_usize() { + "u8" + } else if entries.len() <= u16::MAX.as_usize() { + "u16" + } else { + "usize" + } + ); + + Ok(match slice_analysis { + SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index, + subslice_len, + } => { + quote!(::frozen_collections::specialized_maps::#map_type::<#ty, _, #payload_size, ::std::hash::RandomState>::try_from(vec![ + #( + (#entries), + )* + ], #subslice_index..#subslice_index + #subslice_len).unwrap()) + } + + SliceKeyAnalysisResult::RightHandSubslice { + subslice_index, + subslice_len, + } => { + quote!(::frozen_collections::specialized_maps::#map_type::<#ty, _, #payload_size, ::std::hash::RandomState>::try_from(vec![ + #( + (#entries), + )* + ], #subslice_index..#subslice_index + #subslice_len).unwrap()) + } + + _ => { + if include_payload_size { + quote!(::frozen_collections::specialized_maps::#map_type::<#ty, _, #payload_size>::try_from(vec![ + #( + (#entries), + )* + ]).unwrap()) + } else { + quote!(::frozen_collections::specialized_maps::#map_type::<#ty, _>::try_from(vec![ + #( + (#entries), + )* + ]).unwrap()) + } + } + }) +} + +fn process_int_keys(entries: &[Entry]) -> syn::Result +where + K: PrimInt + FromStr, + K::Err: Display, +{ + let keys = entries.iter().map(|x| x.0.to_token_stream()); + let mut parsed = Vec::new(); + for key in keys { + let li = parse2::(key)?; + let v = li.base10_parse::()?; + parsed.push(v); + } + + Ok(analyze_int_keys(parsed.into_iter())) +} + +fn process_string_keys(keys: I) -> syn::Result +where + I: Iterator, +{ + let mut parsed = Vec::new(); + for key in keys { + let ls = parse2::(key)?; + parsed.push(ls.value()); + } + + let bh = RandomState::new(); + Ok(analyze_slice_keys(parsed.iter().map(String::as_bytes), &bh)) +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use proc_macro2::TokenStream; + + use crate::macros::frozen_map::frozen_map_macro; + + #[test] + fn basic() { + let ts = TokenStream::from_str( + " + &str, + \"first_key\": (1, \"first_value\"), + \"second_key\": (2, \"second_value\"), + \"third_key\": (3, \"third_value\"), + \"fourth_key\": (4, \"fourth_value\"), + \"fifth_key\": (5, \"fifth_value\"), + ", + ) + .unwrap(); + + let ts2 = frozen_map_macro(ts); + + println!("{ts2}"); + } +} diff --git a/frozen-collections-core/src/macros/frozen_set.rs b/frozen-collections-core/src/macros/frozen_set.rs new file mode 100644 index 0000000..d8f9051 --- /dev/null +++ b/frozen-collections-core/src/macros/frozen_set.rs @@ -0,0 +1,255 @@ +use std::cmp::PartialEq; +use std::fmt::Display; +use std::hash::RandomState; +use std::str::FromStr; + +use bitvec::macros::internal::funty::Fundamental; +use num_traits::PrimInt; +use proc_macro2::TokenStream; +use quote::{format_ident, quote, ToTokens}; +use syn::parse::{Parse, ParseStream}; +use syn::{parse2, Error, Expr, LitInt, LitStr, Token, Type}; + +use crate::analyzers::{analyze_int_keys, IntKeyAnalysisResult}; +use crate::analyzers::{analyze_slice_keys, SliceKeyAnalysisResult}; + +struct Value(Expr); + +struct Set { + ty: Type, + values: Vec, +} + +impl ToTokens for Value { + fn to_tokens(&self, tokens: &mut TokenStream) { + let value = self.0.clone(); + + tokens.extend(quote!(#value)); + } +} + +impl Parse for Set { + fn parse(input: ParseStream) -> syn::Result { + let mut values = vec![]; + + let ty = input.parse::()?; + input.parse::()?; + + while !input.is_empty() { + let value = input.parse::()?; + + values.push(Value(value)); + + if input.peek(Token![,]) { + input.parse::()?; + } + } + + Ok(Self { ty, values }) + } +} + +#[derive(PartialEq)] +enum ValueVariety { + Common, + Integer, + String, +} + +/// Implementation logic for the `frozen_set!` macro. +#[must_use] +#[allow(clippy::module_name_repetitions)] +pub fn frozen_set_macro(args: TokenStream) -> TokenStream { + frozen_set_macro_internal(args).unwrap_or_else(|error| error.to_compile_error()) +} + +fn frozen_set_macro_internal(args: TokenStream) -> Result { + // proc_marco2 version of "parse_macro_input!(input as ParsedSet)" + let input = parse2::(args)?; + let mut values = input.values; + + if values.len() < 3 { + return Ok(quote!( + ::frozen_collections::specialized_sets::ScanningSet::try_from(vec![ + #( + (#values), + )* + ]) + )); + } + + let mut ty = input.ty; + let type_name = format!("{}", ty.to_token_stream()); + + let mut variety = ValueVariety::Integer; + let mut int_analysis = IntKeyAnalysisResult::Normal; + let mut slice_analysis = SliceKeyAnalysisResult::Normal; + let mut include_payload_size = true; + + match type_name.as_str() { + "u8" => int_analysis = process_int_values::(&values)?, + "i8" => int_analysis = process_int_values::(&values)?, + "u16" => int_analysis = process_int_values::(&values)?, + "i16" => int_analysis = process_int_values::(&values)?, + "u32" => int_analysis = process_int_values::(&values)?, + "i32" => int_analysis = process_int_values::(&values)?, + "u64" => int_analysis = process_int_values::(&values)?, + "i64" => int_analysis = process_int_values::(&values)?, + "u128" => int_analysis = process_int_values::(&values)?, + "i128" => int_analysis = process_int_values::(&values)?, + + "& str" => { + variety = ValueVariety::String; + slice_analysis = + process_string_values(values.iter().map(|x| x.0.to_token_stream())).unwrap(); + + let mut copy = Vec::with_capacity(values.len()); + for value in values { + let original = value.0.to_token_stream(); + let modified = quote!(String::from(#original)); + copy.push(Value(parse2::(modified)?)); + } + + values = copy; + ty = parse2::(quote!(String))?; + } + + _ => variety = ValueVariety::Common, + } + + let set_type = match variety { + ValueVariety::Integer => { + if int_analysis == IntKeyAnalysisResult::Range { + include_payload_size = false; + format_ident!("{}", "IntegerRangeSet") + } else { + format_ident!("{}", "IntegerSet") + } + } + + ValueVariety::String => match slice_analysis { + SliceKeyAnalysisResult::Normal => format_ident!("{}", "CommonSet"), + SliceKeyAnalysisResult::Length => format_ident!("{}", "LengthSet"), + + SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index: _, + subslice_len: _, + } => format_ident!("{}", "LeftSliceSet"), + + SliceKeyAnalysisResult::RightHandSubslice { + subslice_index: _, + subslice_len: _, + } => format_ident!("{}", "RightSliceSet"), + }, + + ValueVariety::Common => format_ident!("{}", "CommonSet"), + }; + + let payload_size = format_ident!( + "{}", + if values.len() <= u8::MAX.as_usize() { + "u8" + } else if values.len() <= u16::MAX.as_usize() { + "u16" + } else { + "usize" + } + ); + + Ok(match slice_analysis { + SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index, + subslice_len, + } => { + quote!(::frozen_collections::specialized_sets::#set_type::<#ty, #payload_size, ::std::hash::RandomState>::try_from(vec![ + #( + (#values), + )* + ], #subslice_index..#subslice_index + #subslice_len).unwrap()) + } + + SliceKeyAnalysisResult::RightHandSubslice { + subslice_index, + subslice_len, + } => { + quote!(::frozen_collections::specialized_sets::#set_type::<#ty, #payload_size, ::std::hash::RandomState>::try_from(vec![ + #( + (#values), + )* + ], #subslice_index..#subslice_index + #subslice_len).unwrap()) + } + + _ => { + if include_payload_size { + quote!(::frozen_collections::specialized_sets::#set_type::<#ty, #payload_size>::try_from(vec![ + #( + (#values), + )* + ]).unwrap()) + } else { + quote!(::frozen_collections::specialized_sets::#set_type::<#ty>::try_from(vec![ + #( + (#values), + )* + ]).unwrap()) + } + } + }) +} + +fn process_int_values(values: &[Value]) -> syn::Result +where + T: PrimInt + FromStr, + T::Err: Display, +{ + let mut parsed = Vec::new(); + for v in values.iter().map(|x| x.0.to_token_stream()) { + let li = parse2::(v)?; + let v = li.base10_parse::()?; + parsed.push(v); + } + + Ok(analyze_int_keys(parsed.into_iter())) +} + +fn process_string_values(values: I) -> syn::Result +where + I: Iterator, +{ + let mut parsed = Vec::new(); + for v in values { + let ls = parse2::(v)?; + parsed.push(ls.value()); + } + + let bh = RandomState::new(); + Ok(analyze_slice_keys(parsed.iter().map(String::as_bytes), &bh)) +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use proc_macro2::TokenStream; + + use crate::macros::frozen_set::frozen_set_macro; + + #[test] + fn basic() { + let ts = TokenStream::from_str( + " + &str, + \"first_value\", + \"second_value\", + \"third_value\", + \"fourth_value\", + \"fifth_value\", + ", + ) + .unwrap(); + + let ts2 = frozen_set_macro(ts); + + println!("{ts2}"); + } +} diff --git a/frozen-collections-core/src/macros/mod.rs b/frozen-collections-core/src/macros/mod.rs new file mode 100644 index 0000000..39ebb9c --- /dev/null +++ b/frozen-collections-core/src/macros/mod.rs @@ -0,0 +1,7 @@ +//! Implementation logic for the frozen collection macros. + +pub use frozen_map::frozen_map_macro; +pub use frozen_set::frozen_set_macro; + +mod frozen_map; +mod frozen_set; diff --git a/frozen-collections-core/src/specialized_maps/common_map.rs b/frozen-collections-core/src/specialized_maps/common_map.rs new file mode 100644 index 0000000..9012512 --- /dev/null +++ b/frozen-collections-core/src/specialized_maps/common_map.rs @@ -0,0 +1,373 @@ +use std::borrow::Borrow; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::RandomState; +use std::hash::{BuildHasher, Hash}; +use std::intrinsics::transmute; +use std::mem::MaybeUninit; +use std::ops::Range; +use std::ops::{Index, IndexMut}; + +use num_traits::{PrimInt, Unsigned}; + +use crate::analyzers::{analyze_hash_codes, check_duplicate_keys}; +use crate::specialized_maps::hash_table::HashTable; +use crate::specialized_maps::utils::any_duplicate_keys; +use crate::specialized_maps::{ + IntoIter, IntoKeys, IntoValues, Iter, IterMut, Keys, Values, ValuesMut, +}; +use crate::traits::Len; + +/// A general purpose map. +/// +/// # Capacity Constraints +/// +/// The `S` generic argument controls the maximum capacity +/// of the map. A `u8` will allow up to 255 entries, `u16` +/// will allow up to 65,535 entries, and `usize` will allow +/// up to `usize::MAX` entries. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenMap` type or the `frozen_map!` macro. +#[derive(Clone)] +pub struct CommonMap { + pub(crate) table: HashTable, + bh: BH, +} + +impl CommonMap +where + K: Hash + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + #[allow(clippy::missing_errors_doc)] + pub fn with_hasher(payload: Vec<(K, V)>, bh: BH) -> std::result::Result { + check_duplicate_keys(payload.iter().map(|entry| &entry.0))?; + + let code_analysis = analyze_hash_codes(payload.iter().map(|entry| bh.hash_one(&entry.0))); + + Ok(Self { + table: HashTable::new(payload, code_analysis.num_hash_slots, |k| bh.hash_one(k))?, + bh, + }) + } +} + +impl CommonMap +where + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + #[inline] + #[must_use] + fn get_hash_info(&self, key: &Q) -> Range + where + Q: ?Sized + Hash + Eq, + { + let hash_code = self.bh.hash_one(key.borrow()); + self.table.get_hash_info(hash_code) + } + + #[inline] + #[must_use] + pub fn get(&self, key: &Q) -> Option<&V> + where + K: Borrow, + Q: ?Sized + Hash + Eq, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some(&entry.1); + } + } + + None + } + + #[inline] + #[must_use] + pub fn get_key_value(&self, key: &Q) -> Option<(&K, &V)> + where + K: Borrow, + Q: ?Sized + Hash + Eq, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some((&entry.0, &entry.1)); + } + } + + None + } + + #[inline] + #[must_use] + pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> + where + K: Borrow, + Q: ?Sized + Hash + Eq, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked_mut(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some(&mut entry.1); + } + } + + None + } + + #[allow(mutable_transmutes)] + pub fn get_many_mut(&mut self, keys: [&Q; N]) -> Option<[&mut V; N]> + where + K: Borrow, + Q: ?Sized + Hash + Eq, + { + if any_duplicate_keys(keys) { + return None; + } + + unsafe { + let mut result: MaybeUninit<[&mut V; N]> = MaybeUninit::uninit(); + let p = result.as_mut_ptr(); + + for (i, key) in keys.iter().enumerate() { + *(*p).get_unchecked_mut(i) = transmute(self.get(key)?); + } + + Some(result.assume_init()) + } + } + + #[inline] + #[must_use] + pub fn contains_key(&self, key: &Q) -> bool + where + K: Borrow, + Q: ?Sized + Hash + Eq, + { + self.get(key).is_some() + } +} + +impl CommonMap { + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.table.entries) + } + + #[must_use] + pub const fn keys(&self) -> Keys { + Keys::new(&self.table.entries) + } + + #[must_use] + pub const fn values(&self) -> Values { + Values::new(&self.table.entries) + } + + #[must_use] + pub fn into_keys(self) -> IntoKeys { + IntoKeys::new(self.table.entries) + } + + #[must_use] + pub fn into_values(self) -> IntoValues { + IntoValues::new(self.table.entries) + } + + #[must_use] + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { + IterMut::new(self.table.entries.as_mut()) + } + + #[must_use] + pub fn values_mut(&mut self) -> ValuesMut<'_, K, V> { + ValuesMut::new(self.table.entries.as_mut()) + } + + #[must_use] + pub const fn hasher(&self) -> &BH { + &self.bh + } +} + +impl Len for CommonMap { + fn len(&self) -> usize { + self.table.len() + } +} + +impl Debug for CommonMap +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + self.table.fmt(f) + } +} + +impl Default for CommonMap +where + S: PrimInt + Unsigned, + BH: BuildHasher + Default, +{ + fn default() -> Self { + Self { + table: HashTable::default(), + bh: BH::default(), + } + } +} + +impl Index<&Q> for CommonMap +where + K: Borrow, + Q: ?Sized + Hash + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + type Output = V; + + fn index(&self, index: &Q) -> &Self::Output { + self.get(index).unwrap() + } +} + +impl IndexMut<&Q> for CommonMap +where + K: Borrow, + Q: ?Sized + Hash + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + fn index_mut(&mut self, index: &Q) -> &mut V { + self.get_mut(index).unwrap() + } +} + +impl<'a, K, V, S, BH> IntoIterator for &'a CommonMap { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, K, V, S, BH> IntoIterator for &'a mut CommonMap { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +impl PartialEq for CommonMap +where + K: Hash + Eq, + V: PartialEq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter() + .all(|(key, value)| other.get(key).map_or(false, |v| *value == *v)) + } +} + +impl Eq for CommonMap +where + K: Hash + Eq, + V: Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ +} + +impl TryFrom> for CommonMap +where + K: Hash + Eq, + S: PrimInt + Unsigned, +{ + type Error = &'static str; + + fn try_from(payload: Vec<(K, V)>) -> std::result::Result { + Self::with_hasher(payload, RandomState::new()) + } +} + +impl TryFrom<[(K, V); N]> for CommonMap +where + K: Hash + Eq, + S: PrimInt + Unsigned, +{ + type Error = &'static str; + + fn try_from(payload: [(K, V); N]) -> std::result::Result { + Self::with_hasher(Vec::from_iter(payload), RandomState::new()) + } +} + +impl FromIterator<(K, V)> for CommonMap +where + K: Hash + Eq, + S: PrimInt + Unsigned, +{ + fn from_iter>(iter: T) -> Self { + Self::with_hasher(Vec::from_iter(iter), RandomState::new()).unwrap() + } +} + +impl IntoIterator for CommonMap { + type Item = (K, V); + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.table.entries) + } +} + +#[cfg(test)] +mod tests { + use std::hash::RandomState; + + use super::*; + + #[test] + fn test_from_iter_empty() { + let pairs: Vec<(i32, i32)> = vec![]; + let map: CommonMap = pairs.into_iter().collect(); + assert!(map.is_empty()); + } + + #[test] + fn test_from_iter_single() { + let pairs = vec![(1, 2)]; + let map: CommonMap = pairs.into_iter().collect(); + assert_eq!(map.get(&1), Some(&2)); + } + + #[test] + fn test_from_iter_multiple() { + let pairs = vec![(1, 2), (3, 4), (5, 6)]; + let map: CommonMap = pairs.into_iter().collect(); + assert_eq!(map.get(&1), Some(&2)); + assert_eq!(map.get(&3), Some(&4)); + assert_eq!(map.get(&5), Some(&6)); + } +} diff --git a/frozen-collections-core/src/specialized_maps/hash_table.rs b/frozen-collections-core/src/specialized_maps/hash_table.rs new file mode 100644 index 0000000..e4ebf42 --- /dev/null +++ b/frozen-collections-core/src/specialized_maps/hash_table.rs @@ -0,0 +1,142 @@ +use std::fmt::{Debug, Formatter, Result}; +use std::num::{NonZeroU64, NonZeroUsize}; +use std::ops::Range; + +use bitvec::macros::internal::funty::Fundamental; +use num_traits::{PrimInt, Unsigned}; + +#[derive(Clone)] +pub struct HashTable { + num_slots: NonZeroU64, + slots: Box<[HashTableSlot]>, + pub entries: Box<[(K, V)]>, +} + +#[derive(Clone)] +struct HashTableSlot { + min_index: S, + max_index: S, +} + +struct PrepItem { + hash_slot_index: usize, + entry: (K, V), +} + +impl HashTable +where + S: PrimInt + Unsigned, +{ + pub fn new( + mut payload: Vec<(K, V)>, + num_hash_slots: usize, + hash: F, + ) -> std::result::Result + where + F: Fn(&K) -> u64, + { + if payload.is_empty() { + return Ok(Self::default()); + } else if payload.len() > S::max_value().to_usize().unwrap() { + return Err("too many payload entries for the given collection size S"); + } + + let mut prep_items = Vec::with_capacity(payload.len()); + while let Some(entry) = payload.pop() { + let hash_code = hash(&entry.0); + let hash_slot_index = (hash_code % num_hash_slots as u64).as_usize(); + + prep_items.push(PrepItem { + hash_slot_index, + entry, + }); + } + + // sort items so hash collisions are contiguous + prep_items.sort_unstable_by(|x, y| x.hash_slot_index.cmp(&y.hash_slot_index)); + + let mut entry_index = 0; + let mut slots = Vec::with_capacity(num_hash_slots); + let mut entries = payload; + + slots.resize_with(num_hash_slots, || HashTableSlot { + min_index: S::zero(), + max_index: S::zero(), + }); + + while let Some(mut item) = prep_items.pop() { + let hash_slot_index = item.hash_slot_index; + let mut num_entries_in_hash_slot = 0; + + loop { + entries.push(item.entry); + num_entries_in_hash_slot += 1; + + if let Some(last) = prep_items.last() { + if last.hash_slot_index == hash_slot_index { + item = prep_items.pop().unwrap(); + continue; + } + } + + break; + } + + slots[hash_slot_index] = HashTableSlot { + min_index: S::from(entry_index).unwrap(), + max_index: S::from(entry_index).unwrap() + + S::from(num_entries_in_hash_slot).unwrap(), + }; + + entry_index += num_entries_in_hash_slot; + } + + Ok(Self { + num_slots: NonZeroU64::try_from(NonZeroUsize::try_from(slots.len()).unwrap()).unwrap(), + slots: slots.into_boxed_slice(), + entries: entries.into_boxed_slice(), + }) + } + + #[inline] + pub fn get_hash_info(&self, hash_code: u64) -> Range { + let hash_slot_index = (hash_code % self.num_slots).as_usize(); + let hash_slot = unsafe { self.slots.get_unchecked(hash_slot_index) }; + + hash_slot.min_index.to_usize().unwrap()..hash_slot.max_index.to_usize().unwrap() + } +} + +impl HashTable { + #[inline] + pub const fn len(&self) -> usize { + self.entries.len() + } +} + +impl Debug for HashTable +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + let pairs = self.entries.iter().map(|x| (&x.0, &x.1)); + f.debug_map().entries(pairs).finish() + } +} + +impl Default for HashTable +where + S: PrimInt + Unsigned, +{ + fn default() -> Self { + Self { + num_slots: NonZeroU64::try_from(1).unwrap(), + slots: Box::new([HashTableSlot { + min_index: S::zero(), + max_index: S::zero(), + }]), + entries: Box::new([]), + } + } +} diff --git a/frozen-collections-core/src/specialized_maps/integer_map.rs b/frozen-collections-core/src/specialized_maps/integer_map.rs new file mode 100644 index 0000000..4727722 --- /dev/null +++ b/frozen-collections-core/src/specialized_maps/integer_map.rs @@ -0,0 +1,343 @@ +use std::borrow::Borrow; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::Hash; +use std::intrinsics::transmute; +use std::mem::MaybeUninit; +use std::ops::Range; +use std::ops::{Index, IndexMut}; + +use num_traits::{AsPrimitive, PrimInt, Unsigned}; + +use crate::analyzers::{analyze_hash_codes, check_duplicate_keys}; +use crate::specialized_maps::hash_table::HashTable; +use crate::specialized_maps::utils::any_duplicate_keys; +use crate::specialized_maps::{ + IntoIter, IntoKeys, IntoValues, Iter, IterMut, Keys, Values, ValuesMut, +}; +use crate::traits::Len; + +/// A map whose keys are integers. +/// +/// # Capacity Constraints +/// +/// The `S` generic argument controls the maximum capacity +/// of the map. A `u8` will allow up to 255 entries, `u16` +/// will allow up to 65,535 entries, and `usize` will allow +/// up to `usize::MAX` entries. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenMap` type or the `frozen_map!` macro. +#[derive(Clone)] +pub struct IntegerMap { + pub(crate) table: HashTable, +} + +impl IntegerMap +where + S: PrimInt + Unsigned, +{ + #[inline] + #[must_use] + fn get_hash_info(&self, key: &Q) -> Range + where + Q: ?Sized + PrimInt + AsPrimitive, + { + let hash_code = key.as_(); + self.table.get_hash_info(hash_code) + } + + #[inline] + #[must_use] + pub fn get(&self, key: &Q) -> Option<&V> + where + K: Borrow, + Q: ?Sized + PrimInt + AsPrimitive, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some(&entry.1); + } + } + + None + } + + #[inline] + #[must_use] + pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> + where + K: Borrow, + Q: ?Sized + PrimInt + AsPrimitive, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked_mut(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some(&mut entry.1); + } + } + + None + } + + #[allow(mutable_transmutes)] + pub fn get_many_mut(&mut self, keys: [&Q; N]) -> Option<[&mut V; N]> + where + K: Borrow, + Q: ?Sized + PrimInt + AsPrimitive, + { + if any_duplicate_keys(keys) { + return None; + } + + unsafe { + let mut result: MaybeUninit<[&mut V; N]> = MaybeUninit::uninit(); + let p = result.as_mut_ptr(); + + for (i, key) in keys.iter().enumerate() { + *(*p).get_unchecked_mut(i) = transmute(self.get(key)?); + } + + Some(result.assume_init()) + } + } + + #[inline] + #[must_use] + pub fn get_key_value(&self, key: &Q) -> Option<(&K, &V)> + where + K: Borrow, + Q: ?Sized + PrimInt + AsPrimitive, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some((&entry.0, &entry.1)); + } + } + + None + } + + #[inline] + #[must_use] + pub fn contains_key(&self, key: &Q) -> bool + where + K: Borrow, + Q: ?Sized + PrimInt + AsPrimitive, + { + self.get(key).is_some() + } +} + +impl IntegerMap { + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.table.entries) + } + + #[must_use] + pub const fn keys(&self) -> Keys { + Keys::new(&self.table.entries) + } + + #[must_use] + pub const fn values(&self) -> Values { + Values::new(&self.table.entries) + } + + #[must_use] + pub fn into_keys(self) -> IntoKeys { + IntoKeys::new(self.table.entries) + } + + #[must_use] + pub fn into_values(self) -> IntoValues { + IntoValues::new(self.table.entries) + } + + #[must_use] + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { + IterMut::new(self.table.entries.as_mut()) + } + + #[must_use] + pub fn values_mut(&mut self) -> ValuesMut<'_, K, V> { + ValuesMut::new(self.table.entries.as_mut()) + } +} + +impl Len for IntegerMap { + fn len(&self) -> usize { + self.table.len() + } +} + +impl Debug for IntegerMap +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + self.table.fmt(f) + } +} + +impl Default for IntegerMap +where + S: PrimInt + Unsigned, +{ + fn default() -> Self { + Self { + table: HashTable::default(), + } + } +} + +impl Index<&Q> for IntegerMap +where + K: Borrow, + Q: ?Sized + PrimInt + AsPrimitive, + S: PrimInt + Unsigned, +{ + type Output = V; + + fn index(&self, index: &Q) -> &Self::Output { + self.get(index).unwrap() + } +} + +impl IndexMut<&Q> for IntegerMap +where + K: Borrow, + Q: ?Sized + PrimInt + AsPrimitive, + S: PrimInt + Unsigned, +{ + fn index_mut(&mut self, index: &Q) -> &mut V { + self.get_mut(index).unwrap() + } +} + +impl<'a, K, V, S> IntoIterator for &'a IntegerMap { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, K, V, S> IntoIterator for &'a mut IntegerMap { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +impl PartialEq for IntegerMap +where + K: PrimInt + AsPrimitive, + V: PartialEq, + S: PrimInt + Unsigned, +{ + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter() + .all(|(key, value)| other.get(key).map_or(false, |v| *value == *v)) + } +} + +impl Eq for IntegerMap +where + K: PrimInt + AsPrimitive, + V: Eq, + S: PrimInt + Unsigned, +{ +} + +impl TryFrom> for IntegerMap +where + K: PrimInt + AsPrimitive + Hash + Eq, + S: PrimInt + Unsigned, +{ + type Error = &'static str; + + fn try_from(payload: Vec<(K, V)>) -> std::result::Result { + check_duplicate_keys(payload.iter().map(|entry| &entry.0))?; + + let code_analysis = analyze_hash_codes(payload.iter().map(|entry| entry.0.as_())); + Ok(Self { + table: HashTable::new(payload, code_analysis.num_hash_slots, |k| k.as_())?, + }) + } +} + +impl TryFrom<[(K, V); N]> for IntegerMap +where + K: PrimInt + AsPrimitive + Hash + Eq, + S: PrimInt + Unsigned, +{ + type Error = &'static str; + + fn try_from(payload: [(K, V); N]) -> std::result::Result { + Self::try_from(Vec::from_iter(payload)) + } +} + +impl FromIterator<(K, V)> for IntegerMap +where + K: PrimInt + AsPrimitive + Hash + Eq, + S: PrimInt + Unsigned, +{ + fn from_iter>(iter: T) -> Self { + Self::try_from(Vec::from_iter(iter)).unwrap() + } +} + +impl IntoIterator for IntegerMap { + type Item = (K, V); + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.table.entries) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_from_iter_empty() { + let pairs: Vec<(u32, u32)> = vec![]; + let map: IntegerMap = pairs.into_iter().collect(); + assert!(map.is_empty()); + } + + #[test] + fn test_from_iter_single() { + let pairs = vec![(1, 2)]; + let map: IntegerMap = pairs.into_iter().collect(); + assert_eq!(map.get(&1), Some(&2)); + } + + #[test] + fn test_from_iter_multiple() { + let pairs = vec![(1, 2), (3, 4), (5, 6)]; + let map: IntegerMap = pairs.into_iter().collect(); + assert_eq!(map.get(&1), Some(&2)); + assert_eq!(map.get(&3), Some(&4)); + assert_eq!(map.get(&5), Some(&6)); + } +} diff --git a/frozen-collections-core/src/specialized_maps/integer_range_map.rs b/frozen-collections-core/src/specialized_maps/integer_range_map.rs new file mode 100644 index 0000000..8b6e820 --- /dev/null +++ b/frozen-collections-core/src/specialized_maps/integer_range_map.rs @@ -0,0 +1,336 @@ +use std::borrow::Borrow; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::Hash; +use std::intrinsics::transmute; +use std::mem::MaybeUninit; +use std::ops::{Index, IndexMut}; + +use num_traits::PrimInt; + +use crate::analyzers::check_duplicate_keys; +use crate::specialized_maps::utils::any_duplicate_keys; +use crate::specialized_maps::{ + IntoIter, IntoKeys, IntoValues, Iter, IterMut, Keys, Values, ValuesMut, +}; +use crate::traits::Len; + +/// A map whose keys are a continuous range of integers. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenMap` type or the `frozen_map!` macro. +#[derive(Clone, Default)] +pub struct IntegerRangeMap { + min: K, + max: K, + pub(crate) entries: Box<[(K, V)]>, +} + +impl IntegerRangeMap { + #[inline] + #[must_use] + pub fn get(&self, key: &Q) -> Option<&V> + where + K: Borrow, + Q: ?Sized + PrimInt, + { + if *key >= *self.min.borrow() && *key <= *self.max.borrow() { + let index = (*key - *self.min.borrow()).to_usize()?; + Some(&self.entries[index].1) + } else { + None + } + } + + #[inline] + #[must_use] + pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> + where + K: Borrow, + Q: ?Sized + PrimInt, + { + if *key >= *self.min.borrow() && *key <= *self.max.borrow() { + let index = (*key - *self.min.borrow()).to_usize()?; + Some(&mut self.entries[index].1) + } else { + None + } + } + + #[allow(mutable_transmutes)] + pub fn get_many_mut(&mut self, keys: [&Q; N]) -> Option<[&mut V; N]> + where + K: Borrow, + Q: ?Sized + PrimInt, + { + if any_duplicate_keys(keys) { + return None; + } + + unsafe { + let mut result: MaybeUninit<[&mut V; N]> = MaybeUninit::uninit(); + let p = result.as_mut_ptr(); + + for (i, key) in keys.iter().enumerate() { + *(*p).get_unchecked_mut(i) = transmute(self.get(key)?); + } + + Some(result.assume_init()) + } + } + + #[inline] + #[must_use] + pub fn get_key_value(&self, key: &Q) -> Option<(&K, &V)> + where + K: Borrow, + Q: ?Sized + PrimInt, + { + if *key >= *self.min.borrow() && *key <= *self.max.borrow() { + let index = (*key - *self.min.borrow()).to_usize()?; + Some((&self.entries[index].0, &self.entries[index].1)) + } else { + None + } + } + + #[inline] + #[must_use] + pub fn contains_key(&self, key: &Q) -> bool + where + K: Borrow, + Q: ?Sized + PrimInt, + { + self.get(key).is_some() + } + + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.entries) + } + + #[must_use] + pub const fn keys(&self) -> Keys { + Keys::new(&self.entries) + } + + #[must_use] + pub const fn values(&self) -> Values { + Values::new(&self.entries) + } + + #[must_use] + pub fn into_keys(self) -> IntoKeys { + IntoKeys::new(self.entries) + } + + #[must_use] + pub fn into_values(self) -> IntoValues { + IntoValues::new(self.entries) + } + + #[must_use] + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { + IterMut::new(self.entries.as_mut()) + } + + #[must_use] + pub fn values_mut(&mut self) -> ValuesMut<'_, K, V> { + ValuesMut::new(self.entries.as_mut()) + } +} + +impl Len for IntegerRangeMap { + fn len(&self) -> usize { + self.entries.len() + } +} + +impl Debug for IntegerRangeMap +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + let pairs = self.entries.iter().map(|x| (&x.0, &x.1)); + f.debug_map().entries(pairs).finish() + } +} + +impl Index<&Q> for IntegerRangeMap +where + K: Borrow, + Q: ?Sized + PrimInt, +{ + type Output = V; + + fn index(&self, index: &Q) -> &Self::Output { + self.get(index).unwrap() + } +} + +impl IndexMut<&Q> for IntegerRangeMap +where + K: Borrow, + Q: ?Sized + PrimInt, +{ + fn index_mut(&mut self, index: &Q) -> &mut V { + self.get_mut(index).unwrap() + } +} + +impl<'a, K, V> IntoIterator for &'a IntegerRangeMap { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, K, V> IntoIterator for &'a mut IntegerRangeMap { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +impl PartialEq for IntegerRangeMap +where + K: PrimInt, + V: PartialEq, +{ + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter() + .all(|(key, value)| other.get(key).map_or(false, |v| *value == *v)) + } +} + +impl Eq for IntegerRangeMap +where + K: PrimInt, + V: Eq, +{ +} + +impl TryFrom> for IntegerRangeMap +where + K: PrimInt + Hash + Eq, +{ + type Error = &'static str; + + fn try_from(mut payload: Vec<(K, V)>) -> std::result::Result { + if payload.is_empty() { + return Ok(Self { + min: K::zero(), + max: K::zero(), + entries: Box::default(), + }); + } + + check_duplicate_keys(payload.iter().map(|entry| &entry.0))?; + + payload.sort_by_key(|x| x.0); + + let min = payload[0].0; + let max = payload[payload.len() - 1].0; + + if max.sub(min).to_usize().unwrap() == payload.len() - 1 { + Ok(Self { + min, + max, + entries: payload.into_boxed_slice(), + }) + } else { + Err("IntegerRangeMap and IntegerRangeSet require that the map keys be in a continuous range") + } + } +} + +impl TryFrom<[(K, V); N]> for IntegerRangeMap +where + K: PrimInt + Hash + Eq, +{ + type Error = &'static str; + + fn try_from(payload: [(K, V); N]) -> std::result::Result { + Self::try_from(Vec::from_iter(payload)) + } +} + +impl FromIterator<(K, V)> for IntegerRangeMap +where + K: PrimInt + Hash + Eq, +{ + /// # Panics + /// + /// This panics if the keys don't represent a contiguous range of integer values. + fn from_iter>(iter: T) -> Self { + Self::try_from(Vec::from_iter(iter)).unwrap() + } +} + +impl IntoIterator for IntegerRangeMap { + type Item = (K, V); + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.entries) + } +} + +#[cfg(test)] +mod test { + use crate::traits::Len; + + use super::IntegerRangeMap; + + #[test] + fn range_map_test() { + const MIN: [i32; 5] = [-11, -10, -9, 0, 1]; + + for min in MIN { + let mut v = Vec::new(); + for i in 0..10 { + v.push((min + i, i)); + } + + let mut m = IntegerRangeMap::::try_from(v).unwrap(); + + assert_eq!(10, m.len()); + for i in 0..9 { + let index = min + i; + assert_eq!(i, *m.get(&index).unwrap()); + assert_eq!(i, *m.get_mut(&index).unwrap()); + + let (k, v) = m.get_key_value(&index).unwrap(); + assert_eq!((index, i), (*k, *v)); + } + + let below = min - 1; + assert_eq!(None, m.get(&below)); + assert_eq!(None, m.get_mut(&below)); + assert_eq!(None, m.get_key_value(&below)); + + let above = min + 10; + assert_eq!(None, m.get(&above)); + assert_eq!(None, m.get_mut(&above)); + assert_eq!(None, m.get_key_value(&above)); + + if min == -11 { + assert_eq!( + "{-11: 0, -10: 1, -9: 2, -8: 3, -7: 4, -6: 5, -5: 6, -4: 7, -3: 8, -2: 9}", + format!("{m:?}") + ); + } + } + } +} diff --git a/frozen-collections-core/src/specialized_maps/iterators.rs b/frozen-collections-core/src/specialized_maps/iterators.rs new file mode 100644 index 0000000..1f0cc9e --- /dev/null +++ b/frozen-collections-core/src/specialized_maps/iterators.rs @@ -0,0 +1,468 @@ +use std::fmt::{Debug, Formatter, Result}; +use std::iter::FusedIterator; + +/// An iterator over the entries of a map. +pub struct Iter<'a, K, V> { + entries: &'a [(K, V)], + index: usize, +} + +impl<'a, K, V> Iter<'a, K, V> { + #[must_use] + pub const fn new(entries: &'a [(K, V)]) -> Self { + Self { entries, index: 0 } + } +} + +impl<'a, K, V> Iterator for Iter<'a, K, V> { + type Item = (&'a K, &'a V); + + fn next(&mut self) -> Option { + if self.index < self.entries.len() { + let entry = &self.entries[self.index]; + self.index += 1; + Some((&entry.0, &entry.1)) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + (self.len(), Some(self.len())) + } + + fn count(self) -> usize + where + Self: Sized, + { + self.len() + } +} + +impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { + fn len(&self) -> usize { + self.entries.len() - self.index + } +} + +impl<'a, K, V> FusedIterator for Iter<'a, K, V> {} + +impl<'a, K, V> Clone for Iter<'a, K, V> { + fn clone(&self) -> Self { + Self { + entries: self.entries, + index: self.index, + } + } +} + +impl<'a, K, V> Debug for Iter<'a, K, V> +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_list().entries(self.clone()).finish() + } +} + +/// An iterator over the entries of a map providing mutable values. +pub struct IterMut<'a, K, V> { + inner: std::slice::IterMut<'a, (K, V)>, +} + +impl<'a, K, V> IterMut<'a, K, V> { + #[must_use] + pub fn new(entries: &'a mut [(K, V)]) -> Self { + Self { + inner: entries.iter_mut(), + } + } +} + +impl<'a, K, V> Iterator for IterMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + + fn next(&mut self) -> Option { + if let Some(entry) = self.inner.next() { + Some((&entry.0, &mut entry.1)) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + fn count(self) -> usize + where + Self: Sized, + { + self.inner.len() + } +} + +impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<'a, K, V> FusedIterator for IterMut<'a, K, V> {} + +impl<'a, K, V> Debug for IterMut<'a, K, V> +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + self.inner.fmt(f) + } +} + +/// An iterator over the keys of a map. +pub struct Keys<'a, K, V> { + inner: Iter<'a, K, V>, +} + +impl<'a, K, V> Keys<'a, K, V> { + #[must_use] + pub const fn new(entries: &'a [(K, V)]) -> Self { + Self { + inner: Iter::new(entries), + } + } +} + +impl<'a, K, V> Iterator for Keys<'a, K, V> { + type Item = &'a K; + + fn next(&mut self) -> Option { + self.inner.next().map(|x| x.0) + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + fn count(self) -> usize { + self.inner.count() + } + + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (k, _)| f(acc, k)) + } +} + +impl<'a, K, V> ExactSizeIterator for Keys<'a, K, V> { + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<'a, K, V> FusedIterator for Keys<'a, K, V> {} + +impl<'a, K, V> Clone for Keys<'a, K, V> { + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + } + } +} + +impl<'a, K, V> Debug for Keys<'a, K, V> +where + K: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_list().entries(self.clone()).finish() + } +} + +/// An iterator over the values of a map. +pub struct Values<'a, K, V> { + inner: Iter<'a, K, V>, +} + +impl<'a, K, V> Values<'a, K, V> { + #[must_use] + pub const fn new(entries: &'a [(K, V)]) -> Self { + Self { + inner: Iter::new(entries), + } + } +} + +impl<'a, K, V> Iterator for Values<'a, K, V> { + type Item = &'a V; + + fn next(&mut self) -> Option { + self.inner.next().map(|x| x.1) + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + fn count(self) -> usize { + self.inner.count() + } + + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (_, v)| f(acc, v)) + } +} + +impl<'a, K, V> ExactSizeIterator for Values<'a, K, V> { + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<'a, K, V> FusedIterator for Values<'a, K, V> {} + +impl<'a, K, V> Clone for Values<'a, K, V> { + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + } + } +} + +impl<'a, K, V> Debug for Values<'a, K, V> +where + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_list().entries(self.clone()).finish() + } +} + +/// An iterator over the mutable values of a map. +pub struct ValuesMut<'a, K, V> { + inner: std::slice::IterMut<'a, (K, V)>, +} + +impl<'a, K, V> ValuesMut<'a, K, V> { + #[must_use] + pub fn new(entries: &'a mut [(K, V)]) -> Self { + Self { + inner: entries.iter_mut(), + } + } +} + +impl<'a, K, V> Iterator for ValuesMut<'a, K, V> { + type Item = &'a mut V; + + fn next(&mut self) -> Option { + if let Some(entry) = self.inner.next() { + Some(&mut entry.1) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + fn count(self) -> usize + where + Self: Sized, + { + self.inner.len() + } +} + +impl<'a, K, V> ExactSizeIterator for ValuesMut<'a, K, V> { + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<'a, K, V> FusedIterator for ValuesMut<'a, K, V> {} + +impl<'a, K, V> Debug for ValuesMut<'a, K, V> +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + self.inner.fmt(f) + } +} + +/// A consuming iterator over the entries in a map. +#[derive(Clone)] +pub struct IntoIter { + iter: std::vec::IntoIter<(K, V)>, +} + +impl IntoIter { + pub(crate) fn new(entries: Box<[(K, V)]>) -> Self { + Self { + iter: entries.into_vec().into_iter(), + } + } +} + +impl Iterator for IntoIter { + type Item = (K, V); + + fn next(&mut self) -> Option { + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } + + fn count(self) -> usize + where + Self: Sized, + { + self.iter.count() + } +} + +impl ExactSizeIterator for IntoIter { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for IntoIter {} + +impl Debug for IntoIter +where + K: Clone + Debug, + V: Clone + Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_list().entries((*self).clone()).finish() + } +} + +/// A consuming iterator over the keys in a map. +#[derive(Clone)] +pub struct IntoKeys { + inner: IntoIter, +} + +impl IntoKeys { + #[must_use] + pub fn new(entries: Box<[(K, V)]>) -> Self { + Self { + inner: IntoIter::new(entries), + } + } +} + +impl Iterator for IntoKeys { + type Item = K; + + fn next(&mut self) -> Option { + self.inner.next().map(|x| x.0) + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + fn count(self) -> usize { + self.inner.count() + } + + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (k, _)| f(acc, k)) + } +} + +impl ExactSizeIterator for IntoKeys { + fn len(&self) -> usize { + self.inner.len() + } +} + +impl FusedIterator for IntoKeys {} + +impl Debug for IntoKeys +where + K: Debug + Clone, + V: Debug + Clone, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_list().entries((*self).clone()).finish() + } +} + +/// A consuming iterator over the values in a map. +#[derive(Clone)] +pub struct IntoValues { + inner: IntoIter, +} + +impl IntoValues { + #[must_use] + pub fn new(entries: Box<[(K, V)]>) -> Self { + Self { + inner: IntoIter::new(entries), + } + } +} + +impl Iterator for IntoValues { + type Item = V; + + fn next(&mut self) -> Option { + self.inner.next().map(|x| x.1) + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + fn count(self) -> usize { + self.inner.count() + } + + fn fold(self, init: B, mut f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.inner.fold(init, |acc, (_, v)| f(acc, v)) + } +} + +impl ExactSizeIterator for IntoValues { + fn len(&self) -> usize { + self.inner.len() + } +} + +impl FusedIterator for IntoValues {} + +impl Debug for IntoValues +where + K: Debug + Clone, + V: Debug + Clone, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_list().entries((*self).clone()).finish() + } +} diff --git a/frozen-collections-core/src/specialized_maps/left_slice_map.rs b/frozen-collections-core/src/specialized_maps/left_slice_map.rs new file mode 100644 index 0000000..5acf2b3 --- /dev/null +++ b/frozen-collections-core/src/specialized_maps/left_slice_map.rs @@ -0,0 +1,346 @@ +use std::borrow::Borrow; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::RandomState; +use std::hash::{BuildHasher, Hash}; +use std::intrinsics::transmute; +use std::mem::MaybeUninit; +use std::ops::Range; +use std::ops::{Index, IndexMut}; + +use num_traits::{PrimInt, Unsigned}; + +use crate::analyzers::{analyze_hash_codes, check_duplicate_keys}; +use crate::specialized_maps::hash_table::HashTable; +use crate::specialized_maps::utils::any_duplicate_keys; +use crate::specialized_maps::{ + IntoIter, IntoKeys, IntoValues, Iter, IterMut, Keys, Values, ValuesMut, +}; +use crate::traits::Len; +use crate::traits::RangeHash; + +/// A map that hashes left-aligned slices of its keys. +/// +/// # Capacity Constraints +/// +/// The `S` generic argument controls the maximum capacity +/// of the map. A `u8` will allow up to 255 entries, `u16` +/// will allow up to 65,535 entries, and `usize` will allow +/// up to `usize::MAX` entries. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenMap` type or the `frozen_map!` macro. +#[derive(Clone)] +pub struct LeftSliceMap { + pub(crate) table: HashTable, + bh: BH, + range: Range, +} + +impl LeftSliceMap +where + K: RangeHash + Len + Hash + Eq, + S: PrimInt + Unsigned, +{ + #[allow(clippy::missing_errors_doc)] + pub fn try_from( + payload: Vec<(K, V)>, + range: Range, + ) -> std::result::Result { + Self::with_hasher(payload, range, RandomState::new()) + } +} + +impl LeftSliceMap +where + K: RangeHash + Len + Hash + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + #[allow(clippy::missing_errors_doc)] + pub fn with_hasher( + payload: Vec<(K, V)>, + range: Range, + bh: BH, + ) -> std::result::Result { + check_duplicate_keys(payload.iter().map(|entry| &entry.0))?; + + let codes = payload.iter().map(|entry| { + let key = &entry.0; + if key.len() >= range.end { + key.hash_range(&bh, range.clone()) + } else { + 0 + } + }); + let code_analysis = analyze_hash_codes(codes); + + Ok(Self { + table: HashTable::new(payload, code_analysis.num_hash_slots, |k| { + k.hash_range(&bh, range.clone()) + })?, + bh, + range, + }) + } +} + +impl LeftSliceMap +where + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + #[inline] + #[must_use] + fn get_hash_info(&self, key: &Q) -> Range + where + Q: ?Sized + RangeHash + Len, + { + let hash_code = if key.len() >= self.range.end { + key.hash_range(&self.bh, self.range.clone()) + } else { + 0 + }; + + self.table.get_hash_info(hash_code) + } + + #[inline] + #[must_use] + pub fn get(&self, key: &Q) -> Option<&V> + where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some(&entry.1); + } + } + + None + } + + #[inline] + #[must_use] + pub fn get_key_value(&self, key: &Q) -> Option<(&K, &V)> + where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some((&entry.0, &entry.1)); + } + } + + None + } + + #[inline] + #[must_use] + pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> + where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked_mut(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some(&mut entry.1); + } + } + + None + } + + #[allow(mutable_transmutes)] + pub fn get_many_mut(&mut self, keys: [&Q; N]) -> Option<[&mut V; N]> + where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + if any_duplicate_keys(keys) { + return None; + } + + unsafe { + let mut result: MaybeUninit<[&mut V; N]> = MaybeUninit::uninit(); + let p = result.as_mut_ptr(); + + for (i, key) in keys.iter().enumerate() { + *(*p).get_unchecked_mut(i) = transmute(self.get(key)?); + } + + Some(result.assume_init()) + } + } + + #[inline] + #[must_use] + pub fn contains_key(&self, key: &Q) -> bool + where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + self.get(key).is_some() + } +} + +impl LeftSliceMap { + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.table.entries) + } + + #[must_use] + pub const fn keys(&self) -> Keys { + Keys::new(&self.table.entries) + } + + #[must_use] + pub const fn values(&self) -> Values { + Values::new(&self.table.entries) + } + + #[must_use] + pub fn into_keys(self) -> IntoKeys { + IntoKeys::new(self.table.entries) + } + + #[must_use] + pub fn into_values(self) -> IntoValues { + IntoValues::new(self.table.entries) + } + + #[must_use] + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { + IterMut::new(self.table.entries.as_mut()) + } + + #[must_use] + pub fn values_mut(&mut self) -> ValuesMut<'_, K, V> { + ValuesMut::new(self.table.entries.as_mut()) + } + + #[must_use] + pub const fn hasher(&self) -> &BH { + &self.bh + } +} + +impl Len for LeftSliceMap { + fn len(&self) -> usize { + self.table.len() + } +} + +impl Debug for LeftSliceMap +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + self.table.fmt(f) + } +} + +impl Default for LeftSliceMap +where + S: PrimInt + Unsigned, + BH: BuildHasher + Default, +{ + fn default() -> Self { + Self { + table: HashTable::default(), + bh: BH::default(), + range: Range::default(), + } + } +} + +impl Index<&Q> for LeftSliceMap +where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + type Output = V; + + fn index(&self, index: &Q) -> &Self::Output { + self.get(index).unwrap() + } +} + +impl IndexMut<&Q> for LeftSliceMap +where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + fn index_mut(&mut self, index: &Q) -> &mut V { + self.get_mut(index).unwrap() + } +} + +impl<'a, K, V, S, BH> IntoIterator for &'a LeftSliceMap { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, K, V, S, BH> IntoIterator for &'a mut LeftSliceMap { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +impl PartialEq for LeftSliceMap +where + K: RangeHash + Len + Eq, + V: PartialEq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter() + .all(|(key, value)| other.get(key).map_or(false, |v| *value == *v)) + } +} + +impl Eq for LeftSliceMap +where + K: RangeHash + Len + Eq, + V: Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ +} + +impl IntoIterator for LeftSliceMap { + type Item = (K, V); + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.table.entries) + } +} diff --git a/frozen-collections-core/src/specialized_maps/length_map.rs b/frozen-collections-core/src/specialized_maps/length_map.rs new file mode 100644 index 0000000..3543a07 --- /dev/null +++ b/frozen-collections-core/src/specialized_maps/length_map.rs @@ -0,0 +1,376 @@ +use std::borrow::Borrow; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::Hash; +use std::intrinsics::transmute; +use std::mem::MaybeUninit; +use std::ops::Range; +use std::ops::{Index, IndexMut}; + +use bitvec::macros::internal::funty::Fundamental; +use num_traits::{PrimInt, Unsigned}; + +use crate::analyzers::{analyze_hash_codes, check_duplicate_keys}; +use crate::specialized_maps::hash_table::HashTable; +use crate::specialized_maps::utils::any_duplicate_keys; +use crate::specialized_maps::{ + IntoIter, IntoKeys, IntoValues, Iter, IterMut, Keys, Values, ValuesMut, +}; +use crate::traits::Len; + +/// A map that uses key lengths as hash codes. +/// +/// # Capacity Constraints +/// +/// The `S` generic argument controls the maximum capacity +/// of the map. A `u8` will allow up to 255 entries, `u16` +/// will allow up to 65,535 entries, and `usize` will allow +/// up to `usize::MAX` entries. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenMap` type or the `frozen_map!` macro. +#[derive(Clone)] +pub struct LengthMap { + pub(crate) table: HashTable, +} + +impl LengthMap +where + S: PrimInt + Unsigned, +{ + #[inline] + #[must_use] + fn get_hash_info(&self, key: &Q) -> Range + where + Q: ?Sized + Len, + { + let hash_code = key.len().as_u64(); + self.table.get_hash_info(hash_code) + } + + #[inline] + #[must_use] + pub fn get(&self, key: &Q) -> Option<&V> + where + K: Borrow, + Q: ?Sized + Len + Eq, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some(&entry.1); + } + } + + None + } + + #[inline] + #[must_use] + pub fn get_key_value(&self, key: &Q) -> Option<(&K, &V)> + where + K: Borrow, + Q: ?Sized + Len + Eq, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some((&entry.0, &entry.1)); + } + } + + None + } + + #[inline] + #[must_use] + pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> + where + K: Borrow, + Q: ?Sized + Len + Eq, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked_mut(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some(&mut entry.1); + } + } + + None + } + + #[allow(mutable_transmutes)] + pub fn get_many_mut(&mut self, keys: [&Q; N]) -> Option<[&mut V; N]> + where + K: Borrow, + Q: ?Sized + Len + Eq, + { + if any_duplicate_keys(keys) { + return None; + } + + unsafe { + let mut result: MaybeUninit<[&mut V; N]> = MaybeUninit::uninit(); + let p = result.as_mut_ptr(); + + for (i, key) in keys.iter().enumerate() { + *(*p).get_unchecked_mut(i) = transmute(self.get(key)?); + } + + Some(result.assume_init()) + } + } + + #[inline] + #[must_use] + pub fn contains_key(&self, key: &Q) -> bool + where + K: Borrow, + Q: ?Sized + Len + Eq, + { + self.get(key).is_some() + } +} + +impl LengthMap { + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.table.entries) + } + + #[must_use] + pub const fn keys(&self) -> Keys { + Keys::new(&self.table.entries) + } + + #[must_use] + pub const fn values(&self) -> Values { + Values::new(&self.table.entries) + } + + #[must_use] + pub fn into_keys(self) -> IntoKeys { + IntoKeys::new(self.table.entries) + } + + #[must_use] + pub fn into_values(self) -> IntoValues { + IntoValues::new(self.table.entries) + } + + #[must_use] + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { + IterMut::new(self.table.entries.as_mut()) + } + + #[must_use] + pub fn values_mut(&mut self) -> ValuesMut<'_, K, V> { + ValuesMut::new(self.table.entries.as_mut()) + } +} + +impl Len for LengthMap { + fn len(&self) -> usize { + self.table.len() + } +} + +impl Debug for LengthMap +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + self.table.fmt(f) + } +} + +impl Default for LengthMap +where + S: PrimInt + Unsigned, +{ + fn default() -> Self { + Self { + table: HashTable::default(), + } + } +} + +impl Index<&Q> for LengthMap +where + K: Borrow, + Q: ?Sized + Len + Eq, + S: PrimInt + Unsigned, +{ + type Output = V; + + fn index(&self, index: &Q) -> &Self::Output { + self.get(index).unwrap() + } +} + +impl IndexMut<&Q> for LengthMap +where + K: Borrow, + Q: ?Sized + Len + Eq, + S: PrimInt + Unsigned, +{ + fn index_mut(&mut self, index: &Q) -> &mut V { + self.get_mut(index).unwrap() + } +} + +impl<'a, K, V, S> IntoIterator for &'a LengthMap { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, K, V, S> IntoIterator for &'a mut LengthMap { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +impl PartialEq for LengthMap +where + K: Len + Eq, + V: PartialEq, + S: PrimInt + Unsigned, +{ + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter() + .all(|(key, value)| other.get(key).map_or(false, |v| *value == *v)) + } +} + +impl Eq for LengthMap +where + K: Len + Eq, + V: Eq, + S: PrimInt + Unsigned, +{ +} + +impl TryFrom> for LengthMap +where + K: Len + Hash + Eq, + S: PrimInt + Unsigned, +{ + type Error = &'static str; + + fn try_from(payload: Vec<(K, V)>) -> std::result::Result { + check_duplicate_keys(payload.iter().map(|entry| &entry.0))?; + + let code_analysis = analyze_hash_codes(payload.iter().map(|entry| entry.0.len().as_u64())); + + Ok(Self { + table: HashTable::new(payload, code_analysis.num_hash_slots, |k| k.len() as u64)?, + }) + } +} + +impl TryFrom<[(K, V); N]> for LengthMap +where + K: Len + Hash + Eq, + S: PrimInt + Unsigned, +{ + type Error = &'static str; + + fn try_from(payload: [(K, V); N]) -> std::result::Result { + Self::try_from(Vec::from_iter(payload)) + } +} + +impl FromIterator<(K, V)> for LengthMap +where + K: Len + Hash + Eq, + S: PrimInt + Unsigned, +{ + fn from_iter>(iter: T) -> Self { + Self::try_from(Vec::from_iter(iter)).unwrap() + } +} + +impl IntoIterator for LengthMap { + type Item = (K, V); + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.table.entries) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_from_empty() { + let pairs: [(String, i32); 0] = []; + let map = LengthMap::::try_from(pairs).unwrap(); + assert!(map.is_empty()); + } + + #[test] + fn test_from_single() { + let pairs = [("key1".to_string(), 1)]; + let map = LengthMap::::try_from(pairs).unwrap(); + assert_eq!(map.get(&"key1".to_string()), Some(&1)); + } + + #[test] + fn test_from_multiple() { + let pairs = [ + ("key1".to_string(), 1), + ("key2".to_string(), 2), + ("key3".to_string(), 3), + ]; + let map = LengthMap::::try_from(pairs).unwrap(); + assert_eq!(map.get(&"key1".to_string()), Some(&1)); + assert_eq!(map.get(&"key2".to_string()), Some(&2)); + assert_eq!(map.get(&"key3".to_string()), Some(&3)); + } + + #[test] + fn test_from_iter_empty() { + let pairs: Vec<(String, i32)> = vec![]; + let map: LengthMap = pairs.into_iter().collect(); + assert!(map.is_empty()); + } + + #[test] + fn test_from_iter_single() { + let pairs = vec![("key1".to_string(), 1)]; + let map: LengthMap = pairs.into_iter().collect(); + assert_eq!(map.get(&"key1".to_string()), Some(&1)); + } + + #[test] + fn test_from_iter_multiple() { + let pairs = vec![ + ("key1".to_string(), 1), + ("key2".to_string(), 2), + ("key3".to_string(), 3), + ]; + let map: LengthMap = pairs.into_iter().collect(); + assert_eq!(map.get(&"key1".to_string()), Some(&1)); + assert_eq!(map.get(&"key2".to_string()), Some(&2)); + assert_eq!(map.get(&"key3".to_string()), Some(&3)); + } +} diff --git a/frozen-collections-core/src/specialized_maps/mod.rs b/frozen-collections-core/src/specialized_maps/mod.rs new file mode 100644 index 0000000..299fb44 --- /dev/null +++ b/frozen-collections-core/src/specialized_maps/mod.rs @@ -0,0 +1,21 @@ +//! Specialized read-only maps used as implementation details of frozen maps. + +pub use common_map::CommonMap; +pub use integer_map::IntegerMap; +pub use integer_range_map::IntegerRangeMap; +pub use iterators::*; +pub use left_slice_map::LeftSliceMap; +pub use length_map::LengthMap; +pub use right_slice_map::RightSliceMap; +pub use scanning_map::ScanningMap; + +mod common_map; +mod hash_table; +mod integer_map; +mod integer_range_map; +mod iterators; +mod left_slice_map; +mod length_map; +mod right_slice_map; +mod scanning_map; +mod utils; diff --git a/frozen-collections-core/src/specialized_maps/right_slice_map.rs b/frozen-collections-core/src/specialized_maps/right_slice_map.rs new file mode 100644 index 0000000..b7bdc8e --- /dev/null +++ b/frozen-collections-core/src/specialized_maps/right_slice_map.rs @@ -0,0 +1,349 @@ +use std::borrow::Borrow; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::RandomState; +use std::hash::{BuildHasher, Hash}; +use std::intrinsics::transmute; +use std::mem::MaybeUninit; +use std::ops::Range; +use std::ops::{Index, IndexMut}; + +use num_traits::{PrimInt, Unsigned}; + +use crate::analyzers::{analyze_hash_codes, check_duplicate_keys}; +use crate::specialized_maps::hash_table::HashTable; +use crate::specialized_maps::utils::any_duplicate_keys; +use crate::specialized_maps::{ + IntoIter, IntoKeys, IntoValues, Iter, IterMut, Keys, Values, ValuesMut, +}; +use crate::traits::Len; +use crate::traits::RangeHash; + +/// A map that hashes right-aligned slices of its keys. +/// +/// # Capacity Constraints +/// +/// The `S` generic argument controls the maximum capacity +/// of the map. A `u8` will allow up to 255 entries, `u16` +/// will allow up to 65,535 entries, and `usize` will allow +/// up to `usize::MAX` entries. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenMap` type or the `frozen_map!` macro. +#[derive(Clone)] +pub struct RightSliceMap { + pub(crate) table: HashTable, + bh: BH, + range: Range, +} + +impl RightSliceMap +where + K: RangeHash + Len + Hash + Eq, + S: PrimInt + Unsigned, +{ + #[allow(clippy::missing_errors_doc)] + pub fn try_from( + payload: Vec<(K, V)>, + range: Range, + ) -> std::result::Result { + Self::with_hasher(payload, range, RandomState::new()) + } +} + +impl RightSliceMap +where + K: RangeHash + Len + Hash + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + #[allow(clippy::missing_errors_doc)] + pub fn with_hasher( + payload: Vec<(K, V)>, + range: Range, + bh: BH, + ) -> std::result::Result { + check_duplicate_keys(payload.iter().map(|entry| &entry.0))?; + + let codes = payload.iter().map(|entry| { + let key = &entry.0; + if key.len() >= range.end { + key.hash_range(&bh, key.len() - range.start..key.len() - range.end) + } else { + 0 + } + }); + + let code_analysis = analyze_hash_codes(codes); + Ok(Self { + table: HashTable::new(payload, code_analysis.num_hash_slots, |k| { + k.hash_range(&bh, k.len() - range.start..k.len() - range.end) + })?, + bh, + range, + }) + } +} + +impl RightSliceMap +where + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + #[inline] + #[must_use] + fn get_hash_info(&self, key: &Q) -> Range + where + Q: ?Sized + RangeHash + Len, + { + let hash_code = if key.len() >= self.range.start { + key.hash_range( + &self.bh, + key.len() - self.range.start..key.len() - self.range.end, + ) + } else { + 0 + }; + + self.table.get_hash_info(hash_code) + } + + #[inline] + #[must_use] + pub fn get(&self, key: &Q) -> Option<&V> + where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some(&entry.1); + } + } + + None + } + + #[inline] + #[must_use] + pub fn get_key_value(&self, key: &Q) -> Option<(&K, &V)> + where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some((&entry.0, &entry.1)); + } + } + + None + } + + #[inline] + #[must_use] + pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> + where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + let range = self.get_hash_info(key); + let entries = unsafe { self.table.entries.get_unchecked_mut(range) }; + for entry in entries { + if key.eq(entry.0.borrow()) { + return Some(&mut entry.1); + } + } + + None + } + + #[allow(mutable_transmutes)] + pub fn get_many_mut(&mut self, keys: [&Q; N]) -> Option<[&mut V; N]> + where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + if any_duplicate_keys(keys) { + return None; + } + + unsafe { + let mut result: MaybeUninit<[&mut V; N]> = MaybeUninit::uninit(); + let p = result.as_mut_ptr(); + + for (i, key) in keys.iter().enumerate() { + *(*p).get_unchecked_mut(i) = transmute(self.get(key)?); + } + + Some(result.assume_init()) + } + } + + #[inline] + #[must_use] + pub fn contains_key(&self, key: &Q) -> bool + where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + self.get(key).is_some() + } +} + +impl RightSliceMap { + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.table.entries) + } + + #[must_use] + pub const fn keys(&self) -> Keys { + Keys::new(&self.table.entries) + } + + #[must_use] + pub const fn values(&self) -> Values { + Values::new(&self.table.entries) + } + + #[must_use] + pub fn into_keys(self) -> IntoKeys { + IntoKeys::new(self.table.entries) + } + + #[must_use] + pub fn into_values(self) -> IntoValues { + IntoValues::new(self.table.entries) + } + + #[must_use] + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { + IterMut::new(self.table.entries.as_mut()) + } + + #[must_use] + pub fn values_mut(&mut self) -> ValuesMut<'_, K, V> { + ValuesMut::new(self.table.entries.as_mut()) + } + + #[must_use] + pub const fn hasher(&self) -> &BH { + &self.bh + } +} + +impl Len for RightSliceMap { + fn len(&self) -> usize { + self.table.len() + } +} + +impl Debug for RightSliceMap +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + self.table.fmt(f) + } +} + +impl Default for RightSliceMap +where + S: PrimInt + Unsigned, + BH: BuildHasher + Default, +{ + fn default() -> Self { + Self { + table: HashTable::default(), + bh: BH::default(), + range: Range::default(), + } + } +} + +impl Index<&Q> for RightSliceMap +where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + type Output = V; + + fn index(&self, index: &Q) -> &Self::Output { + self.get(index).unwrap() + } +} + +impl IndexMut<&Q> for RightSliceMap +where + K: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + fn index_mut(&mut self, index: &Q) -> &mut V { + self.get_mut(index).unwrap() + } +} + +impl<'a, K, V, S, BH> IntoIterator for &'a RightSliceMap { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, K, V, S, BH> IntoIterator for &'a mut RightSliceMap { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +impl PartialEq for RightSliceMap +where + K: RangeHash + Len + Eq, + V: PartialEq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter() + .all(|(key, value)| other.get(key).map_or(false, |v| *value == *v)) + } +} + +impl Eq for RightSliceMap +where + K: RangeHash + Len + Eq, + V: Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ +} + +impl IntoIterator for RightSliceMap { + type Item = (K, V); + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.table.entries) + } +} diff --git a/frozen-collections-core/src/specialized_maps/scanning_map.rs b/frozen-collections-core/src/specialized_maps/scanning_map.rs new file mode 100644 index 0000000..a3873ee --- /dev/null +++ b/frozen-collections-core/src/specialized_maps/scanning_map.rs @@ -0,0 +1,342 @@ +use std::borrow::Borrow; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::Hash; +use std::intrinsics::transmute; +use std::mem::MaybeUninit; +use std::ops::{Index, IndexMut}; + +use crate::analyzers::check_duplicate_keys; +use crate::specialized_maps::utils::any_duplicate_keys; +use crate::specialized_maps::{ + IntoIter, IntoKeys, IntoValues, Iter, IterMut, Keys, Values, ValuesMut, +}; +use crate::traits::Len; + +/// A general purpose map that uses linear scan of entries rather than a hash table. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenMap` type or the `frozen_map!` macro. +#[derive(Clone)] +pub struct ScanningMap { + pub(crate) entries: Box<[(K, V)]>, +} + +impl ScanningMap { + #[inline] + #[must_use] + pub fn get(&self, key: &Q) -> Option<&V> + where + K: Borrow, + Q: ?Sized + Eq, + { + for entry in self.entries.iter() { + if key.eq(entry.0.borrow()) { + return Some(&entry.1); + } + } + + None + } + + #[inline] + #[must_use] + pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> + where + K: Borrow, + Q: ?Sized + Eq, + { + for entry in self.entries.iter_mut() { + if key.eq(entry.0.borrow()) { + return Some(&mut entry.1); + } + } + + None + } + + #[inline] + #[must_use] + pub fn get_key_value(&self, key: &Q) -> Option<(&K, &V)> + where + K: Borrow, + Q: ?Sized + Eq, + { + for entry in self.entries.iter() { + if key.eq(entry.0.borrow()) { + return Some((&entry.0, &entry.1)); + } + } + + None + } + + #[allow(mutable_transmutes)] + pub fn get_many_mut(&mut self, keys: [&Q; N]) -> Option<[&mut V; N]> + where + K: Borrow, + Q: ?Sized + Eq, + { + if any_duplicate_keys(keys) { + return None; + } + + unsafe { + let mut result: MaybeUninit<[&mut V; N]> = MaybeUninit::uninit(); + let p = result.as_mut_ptr(); + + for (i, key) in keys.iter().enumerate() { + *(*p).get_unchecked_mut(i) = transmute(self.get(key)?); + } + + Some(result.assume_init()) + } + } + + #[inline] + #[must_use] + pub fn contains_key(&self, key: &Q) -> bool + where + K: Borrow, + Q: ?Sized + Eq, + { + self.get(key).is_some() + } + + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.entries) + } + + #[must_use] + pub const fn keys(&self) -> Keys { + Keys::new(&self.entries) + } + + #[must_use] + pub const fn values(&self) -> Values { + Values::new(&self.entries) + } + + #[must_use] + pub fn into_keys(self) -> IntoKeys { + IntoKeys::new(self.entries) + } + + #[must_use] + pub fn into_values(self) -> IntoValues { + IntoValues::new(self.entries) + } + + #[must_use] + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { + IterMut::new(self.entries.as_mut()) + } + + #[must_use] + pub fn values_mut(&mut self) -> ValuesMut<'_, K, V> { + ValuesMut::new(self.entries.as_mut()) + } +} + +impl Len for ScanningMap { + fn len(&self) -> usize { + self.entries.len() + } +} + +impl Debug for ScanningMap +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + let pairs = self.entries.iter().map(|x| (&x.0, &x.1)); + f.debug_map().entries(pairs).finish() + } +} + +impl Default for ScanningMap { + fn default() -> Self { + Self { + entries: Box::default(), + } + } +} + +impl Index<&Q> for ScanningMap +where + K: Borrow, + Q: ?Sized + Eq, +{ + type Output = V; + + fn index(&self, index: &Q) -> &Self::Output { + self.get(index).unwrap() + } +} + +impl IndexMut<&Q> for ScanningMap +where + K: Borrow, + Q: ?Sized + Eq, +{ + fn index_mut(&mut self, index: &Q) -> &mut V { + self.get_mut(index).unwrap() + } +} + +impl<'a, K, V> IntoIterator for &'a ScanningMap { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, K, V> IntoIterator for &'a mut ScanningMap { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +impl PartialEq for ScanningMap +where + K: Eq, + V: PartialEq, +{ + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter() + .all(|(key, value)| other.get(key).map_or(false, |v| *value == *v)) + } +} + +impl Eq for ScanningMap +where + K: Eq, + V: Eq, +{ +} + +impl TryFrom> for ScanningMap +where + K: Hash + Eq, +{ + type Error = &'static str; + + fn try_from(payload: Vec<(K, V)>) -> std::result::Result { + check_duplicate_keys(payload.iter().map(|entry| &entry.0))?; + + Ok(Self { + entries: payload.into_boxed_slice(), + }) + } +} + +impl TryFrom<[(K, V); N]> for ScanningMap +where + K: Hash + Eq, +{ + type Error = &'static str; + + fn try_from(payload: [(K, V); N]) -> std::result::Result { + Self::try_from(Vec::from_iter(payload)) + } +} + +impl FromIterator<(K, V)> for ScanningMap +where + K: Hash + Eq, +{ + fn from_iter>(iter: T) -> Self { + Self::try_from(Vec::from_iter(iter)).unwrap() + } +} + +impl IntoIterator for ScanningMap { + type Item = (K, V); + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.entries) + } +} + +#[cfg(test)] +mod tests { + use crate::traits::Len; + + use super::ScanningMap; + + #[test] + fn new_creates_scanning_map_with_given_payload() { + let payload = vec![(10, 20), (30, 40), (50, 60)]; + let map = ScanningMap::::try_from(payload.clone()).unwrap(); + assert_eq!(payload.len(), map.len()); + } + + #[test] + fn get_returns_some_for_existing_keys() { + let payload = vec![(10, 20), (30, 40), (50, 60)]; + let map = ScanningMap::::try_from(payload).unwrap(); + assert_eq!(&20, map.get(&10).unwrap()); + assert_eq!(&40, map.get(&30).unwrap()); + assert_eq!(&60, map.get(&50).unwrap()); + } + + #[test] + fn get_returns_none_for_non_existing_keys() { + let payload = vec![(10, 20), (30, 40), (50, 60)]; + let map = ScanningMap::::try_from(payload).unwrap(); + assert_eq!(None, map.get(&0)); + } + + #[test] + fn get_mut_returns_some_for_existing_keys() { + let payload = vec![(10, 20), (30, 40), (50, 60)]; + let mut map = ScanningMap::::try_from(payload).unwrap(); + assert_eq!(&20, map.get_mut(&10).unwrap()); + assert_eq!(&40, map.get_mut(&30).unwrap()); + assert_eq!(&60, map.get_mut(&50).unwrap()); + } + + #[test] + fn get_mut_returns_none_for_non_existing_keys() { + let payload = vec![(10, 20), (30, 40), (50, 60)]; + let mut map = ScanningMap::::try_from(payload).unwrap(); + assert_eq!(None, map.get_mut(&0)); + } + + #[test] + fn get_key_value_returns_some_for_existing_keys() { + let payload = vec![(10, 20), (30, 40), (50, 60)]; + let map = ScanningMap::::try_from(payload).unwrap(); + assert_eq!((&10, &20), map.get_key_value(&10).unwrap()); + assert_eq!((&30, &40), map.get_key_value(&30).unwrap()); + assert_eq!((&50, &60), map.get_key_value(&50).unwrap()); + } + + #[test] + fn get_key_value_returns_none_for_non_existing_keys() { + let payload = vec![(10, 20), (30, 40), (50, 60)]; + let map = ScanningMap::::try_from(payload).unwrap(); + assert_eq!(None, map.get_key_value(&0)); + } + + #[test] + fn debug_format_is_correct() { + let payload = vec![(10, 20)]; + let map = ScanningMap::::try_from(payload).unwrap(); + assert_eq!("{10: 20}", format!("{map:?}")); + } +} diff --git a/frozen-collections-core/src/specialized_maps/utils.rs b/frozen-collections-core/src/specialized_maps/utils.rs new file mode 100644 index 0000000..dda5beb --- /dev/null +++ b/frozen-collections-core/src/specialized_maps/utils.rs @@ -0,0 +1,15 @@ +/// Ensure key uniqueness (assumes "keys" is a relatively small array) +pub fn any_duplicate_keys(keys: [&K; N]) -> bool +where + K: ?Sized + Eq, +{ + for i in 0..keys.len() { + for j in 0..i { + if keys[j].eq(keys[i]) { + return true; + } + } + } + + false +} diff --git a/frozen-collections-core/src/specialized_sets/common_set.rs b/frozen-collections-core/src/specialized_sets/common_set.rs new file mode 100644 index 0000000..ecc57c4 --- /dev/null +++ b/frozen-collections-core/src/specialized_sets/common_set.rs @@ -0,0 +1,269 @@ +use std::borrow::Borrow; +use std::collections::HashSet; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::RandomState; +use std::hash::{BuildHasher, Hash}; +use std::ops::{BitAnd, BitOr, BitXor, Sub}; + +use num_traits::{PrimInt, Unsigned}; + +use crate::specialized_maps::CommonMap; +use crate::specialized_sets::{IntoIter, Iter}; +use crate::traits::Len; +use crate::traits::Set; + +/// A general-purpose set. +/// +/// # Capacity Constraints +/// +/// The `S` generic argument controls the maximum capacity +/// of the set. A `u8` will allow up to 255 elements, `u16` +/// will allow up to 65,535 elements, and `usize` will allow +/// up to `usize::MAX` elements. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenSet` type or the `frozen_set!` macro. +#[derive(Clone)] +pub struct CommonSet { + map: CommonMap, +} + +impl CommonSet +where + T: Hash + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + #[allow(clippy::missing_errors_doc)] + pub fn with_hasher(payload: Vec, bh: BH) -> std::result::Result { + Ok(Self { + map: CommonMap::with_hasher(payload.into_iter().map(|x| (x, ())).collect(), bh)?, + }) + } +} + +impl CommonSet +where + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + #[inline] + #[must_use] + pub fn get(&self, value: &Q) -> Option<&T> + where + T: Borrow, + Q: ?Sized + Hash + Eq, + { + Some(self.map.get_key_value(value)?.0) + } + + #[inline] + #[must_use] + pub fn contains(&self, value: &Q) -> bool + where + T: Borrow, + Q: ?Sized + Hash + Eq, + { + self.get(value).is_some() + } +} + +impl CommonSet { + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.map.table.entries) + } + + /// Returns the hasher for this set. + #[must_use] + pub const fn hasher(&self) -> &BH { + self.map.hasher() + } +} + +impl Len for CommonSet { + fn len(&self) -> usize { + self.map.len() + } +} + +impl Debug for CommonSet +where + T: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_set().entries(self.iter()).finish() + } +} + +impl Default for CommonSet +where + S: PrimInt + Unsigned, + BH: BuildHasher + Default, +{ + fn default() -> Self { + Self { + map: CommonMap::default(), + } + } +} + +impl IntoIterator for CommonSet { + type Item = T; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.map.table.entries) + } +} + +impl<'a, T, S, BH> IntoIterator for &'a CommonSet { + type Item = &'a T; + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl TryFrom> for CommonSet +where + T: Hash + Eq, + S: PrimInt + Unsigned, +{ + type Error = &'static str; + + fn try_from(payload: Vec) -> std::result::Result { + Self::with_hasher(payload, RandomState::new()) + } +} + +impl TryFrom<[T; N]> for CommonSet +where + T: Hash + Eq, + S: PrimInt + Unsigned, +{ + type Error = &'static str; + + fn try_from(payload: [T; N]) -> std::result::Result { + Ok(Self { + map: CommonMap::try_from(Vec::from_iter(payload.into_iter().map(|x| (x, ()))))?, + }) + } +} + +impl FromIterator for CommonSet +where + T: Hash + Eq, + S: PrimInt + Unsigned, +{ + fn from_iter>(iter: I) -> Self { + Self { + map: CommonMap::try_from(Vec::from_iter(iter.into_iter().map(|x| (x, ())))).unwrap(), + } + } +} + +impl Set for CommonSet +where + T: Hash + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + type Iterator<'a> = Iter<'a, T> + where + T: 'a, + S: 'a, + BH: 'a; + + fn iter(&self) -> Iter<'_, T> { + Iter::new(&self.map.table.entries) + } + + fn contains(&self, value: &T) -> bool { + self.contains(value) + } +} + +impl BitOr<&ST> for &CommonSet +where + T: Hash + Eq + Clone, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn bitor(self, rhs: &ST) -> Self::Output { + self.union(rhs).cloned().collect() + } +} + +impl BitAnd<&ST> for &CommonSet +where + T: Hash + Eq + Clone, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn bitand(self, rhs: &ST) -> Self::Output { + self.intersection(rhs).cloned().collect() + } +} + +impl BitXor<&ST> for &CommonSet +where + T: Hash + Eq + Clone, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn bitxor(self, rhs: &ST) -> Self::Output { + self.symmetric_difference(rhs).cloned().collect() + } +} + +impl Sub<&ST> for &CommonSet +where + T: Hash + Eq + Clone, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn sub(self, rhs: &ST) -> Self::Output { + self.difference(rhs).cloned().collect() + } +} + +impl PartialEq for CommonSet +where + T: Hash + Eq, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + fn eq(&self, other: &ST) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter().all(|value| other.contains(value)) + } +} + +impl Eq for CommonSet +where + T: Hash + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher + Default, +{ +} diff --git a/frozen-collections-core/src/specialized_sets/integer_range_set.rs b/frozen-collections-core/src/specialized_sets/integer_range_set.rs new file mode 100644 index 0000000..11e58e5 --- /dev/null +++ b/frozen-collections-core/src/specialized_sets/integer_range_set.rs @@ -0,0 +1,204 @@ +use std::borrow::Borrow; +use std::collections::HashSet; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::{Hash, RandomState}; +use std::ops::{BitAnd, BitOr, BitXor, Sub}; + +use num_traits::PrimInt; + +use crate::specialized_maps::IntegerRangeMap; +use crate::specialized_sets::{IntoIter, Iter}; +use crate::traits::Len; +use crate::traits::Set; + +/// A set whose values are a continuous range of integers. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenSet` type or the `frozen_set!` macro. +#[derive(Clone, Default)] +pub struct IntegerRangeSet { + map: IntegerRangeMap, +} + +impl IntegerRangeSet { + #[inline] + #[must_use] + pub fn get(&self, value: &Q) -> Option<&T> + where + T: Borrow, + Q: ?Sized + PrimInt, + { + Some(self.map.get_key_value(value)?.0) + } + + #[inline] + #[must_use] + pub fn contains(&self, value: &Q) -> bool + where + T: Borrow, + Q: ?Sized + PrimInt, + { + self.get(value).is_some() + } + + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.map.entries) + } +} + +impl Len for IntegerRangeSet { + fn len(&self) -> usize { + self.map.len() + } +} + +impl Debug for IntegerRangeSet +where + T: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_set().entries(self.iter()).finish() + } +} + +impl IntoIterator for IntegerRangeSet { + type Item = T; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.map.entries) + } +} + +impl<'a, T> IntoIterator for &'a IntegerRangeSet { + type Item = &'a T; + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl TryFrom> for IntegerRangeSet +where + T: PrimInt + Hash + Eq, +{ + type Error = &'static str; + + #[allow(clippy::from_iter_instead_of_collect)] + fn try_from(payload: Vec) -> std::result::Result { + Ok(Self { + map: IntegerRangeMap::try_from(Vec::from_iter(payload.into_iter().map(|x| (x, ()))))?, + }) + } +} + +impl TryFrom<[T; N]> for IntegerRangeSet +where + T: PrimInt + Hash + Eq, +{ + type Error = &'static str; + + #[allow(clippy::from_iter_instead_of_collect)] + fn try_from(payload: [T; N]) -> std::result::Result { + Ok(Self { + map: IntegerRangeMap::try_from(Vec::from_iter(payload.into_iter().map(|x| (x, ()))))?, + }) + } +} + +impl FromIterator for IntegerRangeSet +where + T: PrimInt + Hash + Eq, +{ + fn from_iter>(iter: I) -> Self { + Self { + map: iter.into_iter().map(|x| (x, ())).collect(), + } + } +} + +impl Set for IntegerRangeSet +where + T: PrimInt, +{ + type Iterator<'a> = Iter<'a, T> + where + T: 'a; + + fn iter(&self) -> Iter<'_, T> { + self.iter() + } + + fn contains(&self, value: &T) -> bool { + self.contains(value) + } +} + +impl BitOr<&ST> for &IntegerRangeSet +where + T: PrimInt + Hash, + ST: Set, +{ + type Output = HashSet; + + fn bitor(self, rhs: &ST) -> Self::Output { + self.union(rhs).copied().collect() + } +} + +impl BitAnd<&ST> for &IntegerRangeSet +where + T: PrimInt + Hash, + ST: Set, +{ + type Output = HashSet; + + fn bitand(self, rhs: &ST) -> Self::Output { + self.intersection(rhs).copied().collect() + } +} + +impl BitXor<&ST> for &IntegerRangeSet +where + T: PrimInt + Hash, + ST: Set, +{ + type Output = HashSet; + + fn bitxor(self, rhs: &ST) -> Self::Output { + self.symmetric_difference(rhs).copied().collect() + } +} + +impl Sub<&ST> for &IntegerRangeSet +where + T: PrimInt + Hash, + ST: Set, +{ + type Output = HashSet; + + fn sub(self, rhs: &ST) -> Self::Output { + self.difference(rhs).copied().collect() + } +} + +impl PartialEq for IntegerRangeSet +where + T: PrimInt, + ST: Set, +{ + fn eq(&self, other: &ST) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter().all(|value| other.contains(value)) + } +} + +impl Eq for IntegerRangeSet where T: PrimInt {} diff --git a/frozen-collections-core/src/specialized_sets/integer_set.rs b/frozen-collections-core/src/specialized_sets/integer_set.rs new file mode 100644 index 0000000..e5a4a3b --- /dev/null +++ b/frozen-collections-core/src/specialized_sets/integer_set.rs @@ -0,0 +1,242 @@ +use std::borrow::Borrow; +use std::collections::HashSet; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::Hash; +use std::ops::{BitAnd, BitOr, BitXor, Sub}; + +use num_traits::{AsPrimitive, PrimInt, Unsigned}; + +use crate::specialized_maps::IntegerMap; +use crate::specialized_sets::{IntoIter, Iter}; +use crate::traits::Len; +use crate::traits::Set; + +/// A set whose values are integers. +/// +/// # Capacity Constraints +/// +/// The `S` generic argument controls the maximum capacity +/// of the set. A `u8` will allow up to 255 elements, `u16` +/// will allow up to 65,535 elements, and `usize` will allow +/// up to `usize::MAX` elements. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenSet` type or the `frozen_set!` macro. +#[derive(Clone)] +pub struct IntegerSet { + map: IntegerMap, +} + +impl IntegerSet +where + S: PrimInt + Unsigned, +{ + #[inline] + #[must_use] + pub fn get(&self, value: &Q) -> Option<&T> + where + T: Borrow, + Q: ?Sized + PrimInt + AsPrimitive, + { + Some(self.map.get_key_value(value)?.0) + } + + #[inline] + #[must_use] + pub fn contains(&self, value: &Q) -> bool + where + T: Borrow, + Q: ?Sized + PrimInt + AsPrimitive, + { + self.get(value).is_some() + } +} + +impl IntegerSet { + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.map.table.entries) + } +} + +impl Len for IntegerSet { + fn len(&self) -> usize { + self.map.len() + } +} + +impl Debug for IntegerSet +where + T: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_set().entries(self.iter()).finish() + } +} + +impl Default for IntegerSet +where + S: PrimInt + Unsigned, +{ + fn default() -> Self { + Self { + map: IntegerMap::default(), + } + } +} + +impl IntoIterator for IntegerSet { + type Item = T; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.map.table.entries) + } +} + +impl<'a, T, S> IntoIterator for &'a IntegerSet { + type Item = &'a T; + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl TryFrom> for IntegerSet +where + T: PrimInt + AsPrimitive + Hash + Eq, + S: PrimInt + Unsigned, +{ + type Error = &'static str; + + #[allow(clippy::from_iter_instead_of_collect)] + fn try_from(payload: Vec) -> std::result::Result { + Ok(Self { + map: IntegerMap::try_from(Vec::from_iter(payload.into_iter().map(|x| (x, ()))))?, + }) + } +} + +impl TryFrom<[T; N]> for IntegerSet +where + T: PrimInt + AsPrimitive + Hash + Eq, + S: PrimInt + Unsigned, +{ + type Error = &'static str; + + #[allow(clippy::from_iter_instead_of_collect)] + fn try_from(payload: [T; N]) -> std::result::Result { + Ok(Self { + map: IntegerMap::try_from(Vec::from_iter(payload.into_iter().map(|x| (x, ()))))?, + }) + } +} + +impl FromIterator for IntegerSet +where + T: PrimInt + AsPrimitive + Hash + Eq, + S: PrimInt + Unsigned, +{ + fn from_iter>(iter: I) -> Self { + Self { + map: iter.into_iter().map(|x| (x, ())).collect(), + } + } +} + +impl Set for IntegerSet +where + T: PrimInt + AsPrimitive, + S: PrimInt + Unsigned, +{ + type Iterator<'a> = Iter<'a, T> + where + T: 'a, + S: 'a; + + fn iter(&self) -> Iter<'_, T> { + self.iter() + } + + fn contains(&self, value: &T) -> bool { + self.contains(value) + } +} + +impl BitOr<&ST> for &IntegerSet +where + T: PrimInt + AsPrimitive + Clone + Hash, + S: PrimInt + Unsigned, + ST: Set, +{ + type Output = HashSet; + + fn bitor(self, rhs: &ST) -> Self::Output { + self.union(rhs).copied().collect() + } +} + +impl BitAnd<&ST> for &IntegerSet +where + T: PrimInt + AsPrimitive + Clone + Hash, + S: PrimInt + Unsigned, + ST: Set, +{ + type Output = HashSet; + + fn bitand(self, rhs: &ST) -> Self::Output { + self.intersection(rhs).copied().collect() + } +} + +impl BitXor<&ST> for &IntegerSet +where + T: PrimInt + AsPrimitive + Clone + Hash, + S: PrimInt + Unsigned, + ST: Set, +{ + type Output = HashSet; + + fn bitxor(self, rhs: &ST) -> Self::Output { + self.symmetric_difference(rhs).copied().collect() + } +} + +impl Sub<&ST> for &IntegerSet +where + T: PrimInt + AsPrimitive + Clone + Hash, + S: PrimInt + Unsigned, + ST: Set, +{ + type Output = HashSet; + + fn sub(self, rhs: &ST) -> Self::Output { + self.difference(rhs).copied().collect() + } +} + +impl PartialEq for IntegerSet +where + T: PrimInt + AsPrimitive + Hash, + S: PrimInt + Unsigned, + ST: Set, +{ + fn eq(&self, other: &ST) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter().all(|value| other.contains(value)) + } +} + +impl Eq for IntegerSet +where + T: PrimInt + AsPrimitive + Hash, + S: PrimInt + Unsigned, +{ +} diff --git a/frozen-collections-core/src/specialized_sets/iterators.rs b/frozen-collections-core/src/specialized_sets/iterators.rs new file mode 100644 index 0000000..e57714a --- /dev/null +++ b/frozen-collections-core/src/specialized_sets/iterators.rs @@ -0,0 +1,499 @@ +use std::cmp::min; +use std::fmt::{Debug, Formatter, Result}; +use std::iter::FusedIterator; + +use crate::traits::Set; + +/// An iterator over the values of a set. +#[derive(Clone)] +pub struct Iter<'a, T> { + entries: &'a [(T, ())], + index: usize, +} + +impl<'a, T> Iter<'a, T> { + pub(crate) const fn new(entries: &'a [(T, ())]) -> Self { + Self { entries, index: 0 } + } +} + +impl<'a, T> Iterator for Iter<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + if self.index < self.entries.len() { + let entry = &self.entries[self.index]; + self.index += 1; + Some(&entry.0) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + (self.len(), Some(self.len())) + } + + fn count(self) -> usize + where + Self: Sized, + { + self.len() + } +} + +impl<'a, T> ExactSizeIterator for Iter<'a, T> { + fn len(&self) -> usize { + self.entries.len() - self.index + } +} + +impl<'a, T> FusedIterator for Iter<'a, T> {} + +impl<'a, T> Debug for Iter<'a, T> +where + T: Debug + Clone, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_list().entries((*self).clone()).finish() + } +} + +/// A consuming iterator over the values of a set. +#[derive(Clone)] +pub struct IntoIter { + iter: std::vec::IntoIter<(T, ())>, +} + +impl IntoIter { + pub(crate) fn new(entries: Box<[(T, ())]>) -> Self { + Self { + iter: entries.into_vec().into_iter(), + } + } +} + +impl Iterator for IntoIter { + type Item = T; + + fn next(&mut self) -> Option { + let item = self.iter.next()?; + Some(item.0) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } + + fn count(self) -> usize + where + Self: Sized, + { + self.iter.count() + } +} + +impl ExactSizeIterator for IntoIter { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl FusedIterator for IntoIter {} + +impl Debug for IntoIter +where + T: Clone + Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_list().entries((*self).clone()).finish() + } +} + +/// An iterator that returns the union between two sets. +pub struct Union<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, + T: 'a, +{ + s1: &'a S1, + s1_iter: >::Iterator<'a>, + s2: &'a S2, + s2_iter: >::Iterator<'a>, +} + +impl<'a, S1, S2, T> Union<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, +{ + pub(crate) fn new(s1: &'a S1, s2: &'a S2) -> Self { + Self { + s1_iter: s1.iter(), + s1, + s2_iter: s2.iter(), + s2, + } + } +} + +impl<'a, S1, S2, T> Iterator for Union<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, +{ + type Item = &'a T; + + #[allow(clippy::needless_borrow)] + fn next(&mut self) -> Option { + if self.s1.len() > self.s2.len() { + let item = self.s1_iter.next(); + if item.is_some() { + return item; + } + + loop { + let item = self.s2_iter.next()?; + if !self.s1.contains(&item) { + return Some(item); + } + } + } else { + let item = self.s2_iter.next(); + if item.is_some() { + return item; + } + + loop { + let item = self.s1_iter.next()?; + if !self.s2.contains(&item) { + return Some(item); + } + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let h1 = self.s1_iter.size_hint().1; + let h2 = self.s2_iter.size_hint().1; + + if let Some(h1x) = h1 { + if let Some(h2x) = h2 { + return (0, h1x.checked_add(h2x)); + } + } + + (0, None) + } +} + +impl<'a, S1, S2, T> Clone for Union<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, + >::Iterator<'a>: Clone, + >::Iterator<'a>: Clone, +{ + fn clone(&self) -> Self { + Self { + s1: self.s1, + s1_iter: self.s1_iter.clone(), + s2: self.s2, + s2_iter: self.s2_iter.clone(), + } + } +} + +impl<'a, S1, S2, T> FusedIterator for Union<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, +{ +} + +impl<'a, S1, S2, T> Debug for Union<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, + >::Iterator<'a>: Clone, + >::Iterator<'a>: Clone, + T: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_list().entries((*self).clone()).finish() + } +} + +/// An iterator that returns the symmetric difference between two sets. +pub struct SymmetricDifference<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, + T: 'a, +{ + s1: &'a S1, + s1_iter: >::Iterator<'a>, + s2: &'a S2, + s2_iter: >::Iterator<'a>, +} + +impl<'a, S1, S2, T> SymmetricDifference<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, +{ + pub(crate) fn new(s1: &'a S1, s2: &'a S2) -> Self { + Self { + s1_iter: s1.iter(), + s1, + s2_iter: s2.iter(), + s2, + } + } +} + +impl<'a, S1, S2, T> Iterator for SymmetricDifference<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, +{ + type Item = &'a T; + + #[allow(clippy::needless_borrow)] + fn next(&mut self) -> Option { + for item in self.s1_iter.by_ref() { + if !self.s2.contains(&item) { + return Some(item); + } + } + + self.s2_iter.by_ref().find(|&item| !self.s1.contains(&item)) + } + + fn size_hint(&self) -> (usize, Option) { + let h1 = self.s1_iter.size_hint().1; + let h2 = self.s2_iter.size_hint().1; + + if let Some(h1x) = h1 { + if let Some(h2x) = h2 { + return (0, h1x.checked_add(h2x)); + } + } + + (0, None) + } +} + +impl<'a, S1, S2, T> Clone for SymmetricDifference<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, + >::Iterator<'a>: Clone, + >::Iterator<'a>: Clone, +{ + fn clone(&self) -> Self { + Self { + s1: self.s1, + s1_iter: self.s1_iter.clone(), + s2: self.s2, + s2_iter: self.s2_iter.clone(), + } + } +} + +impl<'a, S1, S2, T> FusedIterator for SymmetricDifference<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, +{ +} + +impl<'a, S1, S2, T> Debug for SymmetricDifference<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, + >::Iterator<'a>: Clone, + >::Iterator<'a>: Clone, + T: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_list().entries((*self).clone()).finish() + } +} + +/// An iterator that returns the difference between two sets. +pub struct Difference<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, + T: 'a, +{ + s1: &'a S1, + s1_iter: >::Iterator<'a>, + s2: &'a S2, +} + +impl<'a, S1, S2, T> Difference<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, +{ + pub(crate) fn new(s1: &'a S1, s2: &'a S2) -> Self { + Self { + s1_iter: s1.iter(), + s1, + s2, + } + } +} + +impl<'a, S1, S2, T> Iterator for Difference<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, +{ + type Item = &'a T; + + #[allow(clippy::needless_borrow)] + fn next(&mut self) -> Option { + loop { + let item = self.s1_iter.next()?; + if !self.s2.contains(&item) { + return Some(item); + } + } + } +} + +impl<'a, S1, S2, T> Clone for Difference<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, + >::Iterator<'a>: Clone, + >::Iterator<'a>: Clone, +{ + fn clone(&self) -> Self { + Self { + s1: self.s1, + s1_iter: self.s1_iter.clone(), + s2: self.s2, + } + } +} + +impl<'a, S1, S2, T> FusedIterator for Difference<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, +{ +} + +impl<'a, S1, S2, T> Debug for Difference<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, + >::Iterator<'a>: Clone, + >::Iterator<'a>: Clone, + T: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_list().entries((*self).clone()).finish() + } +} + +/// An iterator that returns the intersection between two sets. +pub struct Intersection<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, + T: 'a, +{ + s1: &'a S1, + s1_iter: >::Iterator<'a>, + s2: &'a S2, + s2_iter: >::Iterator<'a>, +} + +impl<'a, S1, S2, T> Intersection<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, +{ + pub(crate) fn new(s1: &'a S1, s2: &'a S2) -> Self { + Self { + s1_iter: s1.iter(), + s1, + s2_iter: s2.iter(), + s2, + } + } +} + +impl<'a, S1, S2, T> Iterator for Intersection<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, +{ + type Item = &'a T; + + #[allow(clippy::needless_borrow)] + fn next(&mut self) -> Option { + if self.s1.len() < self.s2.len() { + loop { + let item = self.s1_iter.next()?; + if self.s2.contains(&item) { + return Some(item); + } + } + } else { + loop { + let item = self.s2_iter.next()?; + if self.s1.contains(&item) { + return Some(item); + } + } + } + } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(min(self.s1.len(), self.s2.len()))) + } +} + +impl<'a, S1, S2, T> Clone for Intersection<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, + >::Iterator<'a>: Clone, + >::Iterator<'a>: Clone, +{ + fn clone(&self) -> Self { + Self { + s1: self.s1, + s1_iter: self.s1_iter.clone(), + s2: self.s2, + s2_iter: self.s2_iter.clone(), + } + } +} + +impl<'a, S1, S2, T> FusedIterator for Intersection<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, +{ +} + +impl<'a, S1, S2, T> Debug for Intersection<'a, S1, S2, T> +where + S1: Set + ?Sized, + S2: Set + ?Sized, + >::Iterator<'a>: Clone, + >::Iterator<'a>: Clone, + T: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_list().entries((*self).clone()).finish() + } +} diff --git a/frozen-collections-core/src/specialized_sets/left_slice_set.rs b/frozen-collections-core/src/specialized_sets/left_slice_set.rs new file mode 100644 index 0000000..9b66ba5 --- /dev/null +++ b/frozen-collections-core/src/specialized_sets/left_slice_set.rs @@ -0,0 +1,252 @@ +use std::borrow::Borrow; +use std::collections::HashSet; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::{BuildHasher, Hash, RandomState}; +use std::ops::{BitAnd, BitOr, BitXor, Range, Sub}; + +use num_traits::{PrimInt, Unsigned}; + +use crate::specialized_maps::LeftSliceMap; +use crate::specialized_sets::{IntoIter, Iter}; +use crate::traits::Len; +use crate::traits::RangeHash; +use crate::traits::Set; + +/// A set that hashes left-aligned slices of its values. +/// +/// # Capacity Constraints +/// +/// The `S` generic argument controls the maximum capacity +/// of the set. A `u8` will allow up to 255 elements, `u16` +/// will allow up to 65,535 elements, and `usize` will allow +/// up to `usize::MAX` elements. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenSet` type or the `frozen_set!` macro. +#[derive(Clone)] +pub struct LeftSliceSet { + map: LeftSliceMap, +} + +impl LeftSliceSet +where + T: RangeHash + Len + Hash + Eq, + S: PrimInt + Unsigned, +{ + #[allow(clippy::missing_errors_doc)] + pub fn try_from( + payload: Vec, + range: Range, + ) -> std::result::Result { + Self::with_hasher(payload, range, RandomState::new()) + } +} + +impl LeftSliceSet +where + T: RangeHash + Len + Hash + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + #[allow(clippy::missing_errors_doc)] + pub fn with_hasher( + payload: Vec, + range: Range, + bh: BH, + ) -> std::result::Result { + Ok(Self { + map: LeftSliceMap::with_hasher( + payload.into_iter().map(|x| (x, ())).collect(), + range, + bh, + )?, + }) + } +} + +impl LeftSliceSet +where + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + #[inline] + #[must_use] + pub fn get(&self, value: &Q) -> Option<&T> + where + T: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + Some(self.map.get_key_value(value)?.0) + } + + #[inline] + #[must_use] + pub fn contains(&self, value: &Q) -> bool + where + T: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + self.get(value).is_some() + } +} + +impl LeftSliceSet { + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.map.table.entries) + } + + #[must_use] + pub const fn hasher(&self) -> &BH { + self.map.hasher() + } +} + +impl Len for LeftSliceSet { + fn len(&self) -> usize { + self.map.len() + } +} + +impl Debug for LeftSliceSet +where + T: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_set().entries(self.iter()).finish() + } +} + +impl Default for LeftSliceSet +where + S: PrimInt + Unsigned, + BH: BuildHasher + Default, +{ + fn default() -> Self { + Self { + map: LeftSliceMap::default(), + } + } +} + +impl IntoIterator for LeftSliceSet { + type Item = T; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.map.table.entries) + } +} + +impl<'a, T, S, BH> IntoIterator for &'a LeftSliceSet { + type Item = &'a T; + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl Set for LeftSliceSet +where + T: RangeHash + Len + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + type Iterator<'a> = Iter<'a, T> + where + T: 'a, + S: 'a, + BH: 'a; + + fn iter(&self) -> Iter<'_, T> { + self.iter() + } + + fn contains(&self, value: &T) -> bool { + self.contains(value) + } +} + +impl BitOr<&ST> for &LeftSliceSet +where + T: RangeHash + Hash + Len + Eq + Clone, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn bitor(self, rhs: &ST) -> Self::Output { + self.union(rhs).cloned().collect() + } +} + +impl BitAnd<&ST> for &LeftSliceSet +where + T: RangeHash + Hash + Len + Eq + Clone, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn bitand(self, rhs: &ST) -> Self::Output { + self.intersection(rhs).cloned().collect() + } +} + +impl BitXor<&ST> for &LeftSliceSet +where + T: RangeHash + Hash + Len + Eq + Clone, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn bitxor(self, rhs: &ST) -> Self::Output { + self.symmetric_difference(rhs).cloned().collect() + } +} + +impl Sub<&ST> for &LeftSliceSet +where + T: RangeHash + Hash + Len + Eq + Clone, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn sub(self, rhs: &ST) -> Self::Output { + self.difference(rhs).cloned().collect() + } +} + +impl PartialEq for LeftSliceSet +where + T: RangeHash + Len + Eq, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + fn eq(&self, other: &ST) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter().all(|value| other.contains(value)) + } +} + +impl Eq for LeftSliceSet +where + T: RangeHash + Len + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher + Default, +{ +} diff --git a/frozen-collections-core/src/specialized_sets/length_set.rs b/frozen-collections-core/src/specialized_sets/length_set.rs new file mode 100644 index 0000000..cccc006 --- /dev/null +++ b/frozen-collections-core/src/specialized_sets/length_set.rs @@ -0,0 +1,242 @@ +use std::borrow::Borrow; +use std::collections::HashSet; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::{Hash, RandomState}; +use std::ops::{BitAnd, BitOr, BitXor, Sub}; + +use num_traits::{PrimInt, Unsigned}; + +use crate::specialized_maps::LengthMap; +use crate::specialized_sets::{IntoIter, Iter}; +use crate::traits::Len; +use crate::traits::Set; + +/// A set that uses value lengths as hash codes. +/// +/// # Capacity Constraints +/// +/// The `S` generic argument controls the maximum capacity +/// of the set. A `u8` will allow up to 255 elements, `u16` +/// will allow up to 65,535 elements, and `usize` will allow +/// up to `usize::MAX` elements. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenSet` type or the `frozen_set!` macro. +#[derive(Clone)] +pub struct LengthSet { + map: LengthMap, +} + +impl LengthSet +where + S: PrimInt + Unsigned, +{ + #[inline] + #[must_use] + pub fn get(&self, value: &Q) -> Option<&T> + where + T: Borrow, + Q: ?Sized + Len + Eq, + { + Some(self.map.get_key_value(value)?.0) + } + + #[inline] + #[must_use] + pub fn contains(&self, value: &Q) -> bool + where + T: Borrow, + Q: ?Sized + Len + Eq, + { + self.get(value).is_some() + } +} + +impl LengthSet { + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.map.table.entries) + } +} + +impl Len for LengthSet { + fn len(&self) -> usize { + self.map.len() + } +} + +impl Debug for LengthSet +where + T: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_set().entries(self.iter()).finish() + } +} + +impl Default for LengthSet +where + S: PrimInt + Unsigned, +{ + fn default() -> Self { + Self { + map: LengthMap::default(), + } + } +} + +impl IntoIterator for LengthSet { + type Item = T; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.map.table.entries) + } +} + +impl<'a, T, S> IntoIterator for &'a LengthSet { + type Item = &'a T; + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl TryFrom> for LengthSet +where + T: Len + Hash + Eq, + S: PrimInt + Unsigned, +{ + type Error = &'static str; + + #[allow(clippy::from_iter_instead_of_collect)] + fn try_from(payload: Vec) -> std::result::Result { + Ok(Self { + map: LengthMap::try_from(Vec::from_iter(payload.into_iter().map(|x| (x, ()))))?, + }) + } +} + +impl TryFrom<[T; N]> for LengthSet +where + T: Len + Hash + Eq, + S: PrimInt + Unsigned, +{ + type Error = &'static str; + + #[allow(clippy::from_iter_instead_of_collect)] + fn try_from(payload: [T; N]) -> std::result::Result { + Ok(Self { + map: LengthMap::try_from(Vec::from_iter(payload.into_iter().map(|x| (x, ()))))?, + }) + } +} + +impl FromIterator for LengthSet +where + T: Len + Hash + Eq, + S: PrimInt + Unsigned, +{ + fn from_iter>(iter: I) -> Self { + Self { + map: iter.into_iter().map(|x| (x, ())).collect(), + } + } +} + +impl Set for LengthSet +where + T: Len + Eq, + S: PrimInt + Unsigned, +{ + type Iterator<'a> = Iter<'a, T> + where + T: 'a, + S: 'a; + + fn iter(&self) -> Iter<'_, T> { + self.iter() + } + + fn contains(&self, value: &T) -> bool { + self.contains(value) + } +} + +impl BitOr<&ST> for &LengthSet +where + T: Hash + Eq + Len + Clone, + S: PrimInt + Unsigned, + ST: Set, +{ + type Output = HashSet; + + fn bitor(self, rhs: &ST) -> Self::Output { + self.union(rhs).cloned().collect() + } +} + +impl BitAnd<&ST> for &LengthSet +where + T: Hash + Eq + Len + Clone, + S: PrimInt + Unsigned, + ST: Set, +{ + type Output = HashSet; + + fn bitand(self, rhs: &ST) -> Self::Output { + self.intersection(rhs).cloned().collect() + } +} + +impl BitXor<&ST> for &LengthSet +where + T: Hash + Eq + Len + Clone, + S: PrimInt + Unsigned, + ST: Set, +{ + type Output = HashSet; + + fn bitxor(self, rhs: &ST) -> Self::Output { + self.symmetric_difference(rhs).cloned().collect() + } +} + +impl Sub<&ST> for &LengthSet +where + T: Hash + Eq + Len + Clone, + S: PrimInt + Unsigned, + ST: Set, +{ + type Output = HashSet; + + fn sub(self, rhs: &ST) -> Self::Output { + self.difference(rhs).cloned().collect() + } +} + +impl PartialEq for LengthSet +where + T: Hash + Eq + Len, + S: PrimInt + Unsigned, + ST: Set, +{ + fn eq(&self, other: &ST) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter().all(|value| other.contains(value)) + } +} + +impl Eq for LengthSet +where + T: Hash + Eq + Len, + S: PrimInt + Unsigned, +{ +} diff --git a/frozen-collections-core/src/specialized_sets/mod.rs b/frozen-collections-core/src/specialized_sets/mod.rs new file mode 100644 index 0000000..a986180 --- /dev/null +++ b/frozen-collections-core/src/specialized_sets/mod.rs @@ -0,0 +1,19 @@ +//! Specialized read-only sets used as implementation details of frozen sets. + +pub use common_set::CommonSet; +pub use integer_range_set::IntegerRangeSet; +pub use integer_set::IntegerSet; +pub use iterators::*; +pub use left_slice_set::LeftSliceSet; +pub use length_set::LengthSet; +pub use right_slice_set::RightSliceSet; +pub use scanning_set::ScanningSet; + +mod common_set; +mod integer_range_set; +mod integer_set; +mod iterators; +mod left_slice_set; +mod length_set; +mod right_slice_set; +mod scanning_set; diff --git a/frozen-collections-core/src/specialized_sets/right_slice_set.rs b/frozen-collections-core/src/specialized_sets/right_slice_set.rs new file mode 100644 index 0000000..4b5774f --- /dev/null +++ b/frozen-collections-core/src/specialized_sets/right_slice_set.rs @@ -0,0 +1,252 @@ +use std::borrow::Borrow; +use std::collections::HashSet; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::{BuildHasher, Hash, RandomState}; +use std::ops::{BitAnd, BitOr, BitXor, Range, Sub}; + +use num_traits::{PrimInt, Unsigned}; + +use crate::specialized_maps::RightSliceMap; +use crate::specialized_sets::{IntoIter, Iter}; +use crate::traits::Len; +use crate::traits::RangeHash; +use crate::traits::Set; + +/// A set that hashes right-aligned slices of its values. +/// +/// # Capacity Constraints +/// +/// The `S` generic argument controls the maximum capacity +/// of the set. A `u8` will allow up to 255 elements, `u16` +/// will allow up to 65,535 elements, and `usize` will allow +/// up to `usize::MAX` elements. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenSet` type or the `frozen_set!` macro. +#[derive(Clone)] +pub struct RightSliceSet { + map: RightSliceMap, +} + +impl RightSliceSet +where + T: RangeHash + Len + Hash + Eq, + S: PrimInt + Unsigned, +{ + #[allow(clippy::missing_errors_doc)] + pub fn try_from( + payload: Vec, + range: Range, + ) -> std::result::Result { + Self::with_hasher(payload, range, RandomState::new()) + } +} + +impl RightSliceSet +where + T: RangeHash + Len + Hash + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + #[allow(clippy::missing_errors_doc)] + pub fn with_hasher( + payload: Vec, + range: Range, + bh: BH, + ) -> std::result::Result { + Ok(Self { + map: RightSliceMap::with_hasher( + payload.into_iter().map(|x| (x, ())).collect(), + range, + bh, + )?, + }) + } +} + +impl RightSliceSet +where + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + #[inline] + #[must_use] + pub fn get(&self, value: &Q) -> Option<&T> + where + T: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + Some(self.map.get_key_value(value)?.0) + } + + #[inline] + #[must_use] + pub fn contains(&self, value: &Q) -> bool + where + T: Borrow, + Q: ?Sized + RangeHash + Len + Eq, + { + self.get(value).is_some() + } +} + +impl RightSliceSet { + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.map.table.entries) + } + + #[must_use] + pub const fn hasher(&self) -> &BH { + self.map.hasher() + } +} + +impl Len for RightSliceSet { + fn len(&self) -> usize { + self.map.len() + } +} + +impl Debug for RightSliceSet +where + T: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_set().entries(self.iter()).finish() + } +} + +impl Default for RightSliceSet +where + S: PrimInt + Unsigned, + BH: BuildHasher + Default, +{ + fn default() -> Self { + Self { + map: RightSliceMap::default(), + } + } +} + +impl IntoIterator for RightSliceSet { + type Item = T; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.map.table.entries) + } +} + +impl<'a, T, S, BH> IntoIterator for &'a RightSliceSet { + type Item = &'a T; + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl Set for RightSliceSet +where + T: RangeHash + Len + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher, +{ + type Iterator<'a> = Iter<'a, T> + where + T: 'a, + S: 'a, + BH: 'a; + + fn iter(&self) -> Iter<'_, T> { + self.iter() + } + + fn contains(&self, value: &T) -> bool { + self.contains(value) + } +} + +impl BitOr<&ST> for &RightSliceSet +where + T: RangeHash + Hash + Len + Eq + Clone, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn bitor(self, rhs: &ST) -> Self::Output { + self.union(rhs).cloned().collect() + } +} + +impl BitAnd<&ST> for &RightSliceSet +where + T: RangeHash + Hash + Len + Eq + Clone, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn bitand(self, rhs: &ST) -> Self::Output { + self.intersection(rhs).cloned().collect() + } +} + +impl BitXor<&ST> for &RightSliceSet +where + T: RangeHash + Hash + Len + Eq + Clone, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn bitxor(self, rhs: &ST) -> Self::Output { + self.symmetric_difference(rhs).cloned().collect() + } +} + +impl Sub<&ST> for &RightSliceSet +where + T: RangeHash + Hash + Len + Eq + Clone, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn sub(self, rhs: &ST) -> Self::Output { + self.difference(rhs).cloned().collect() + } +} + +impl PartialEq for RightSliceSet +where + T: RangeHash + Len + Eq, + S: PrimInt + Unsigned, + ST: Set, + BH: BuildHasher + Default, +{ + fn eq(&self, other: &ST) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter().all(|value| other.contains(value)) + } +} + +impl Eq for RightSliceSet +where + T: RangeHash + Len + Eq, + S: PrimInt + Unsigned, + BH: BuildHasher + Default, +{ +} diff --git a/frozen-collections-core/src/specialized_sets/scanning_set.rs b/frozen-collections-core/src/specialized_sets/scanning_set.rs new file mode 100644 index 0000000..ba72882 --- /dev/null +++ b/frozen-collections-core/src/specialized_sets/scanning_set.rs @@ -0,0 +1,215 @@ +use std::borrow::Borrow; +use std::collections::HashSet; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::{Hash, RandomState}; +use std::ops::{BitAnd, BitOr, BitXor, Sub}; + +use crate::specialized_maps::ScanningMap; +use crate::specialized_sets::{IntoIter, Iter}; +use crate::traits::Len; +use crate::traits::Set; + +/// A general purpose set that uses linear scan of values rather than a hash table. +/// +/// # Capacity Constraints +/// +/// The `S` generic argument controls the maximum capacity +/// of the set. A `u8` will allow up to 255 elements, `u16` +/// will allow up to 65,535 elements, and `usize` will allow +/// up to `usize::MAX` elements. +/// +/// # Important Note +/// +/// This type is not intended to be used directly by +/// application code. Instead, applications are expected +/// to use the `FrozenSet` type or the `frozen_set!` macro. +#[derive(Clone)] +pub struct ScanningSet { + map: ScanningMap, +} + +impl ScanningSet { + #[inline] + #[must_use] + pub fn get(&self, value: &Q) -> Option<&T> + where + T: Borrow, + Q: ?Sized + Eq, + { + Some(self.map.get_key_value(value)?.0) + } + + #[inline] + #[must_use] + pub fn contains(&self, value: &Q) -> bool + where + T: Borrow, + Q: ?Sized + Eq, + { + self.get(value).is_some() + } + + #[must_use] + pub const fn iter(&self) -> Iter { + Iter::new(&self.map.entries) + } +} + +impl Len for ScanningSet { + fn len(&self) -> usize { + self.map.len() + } +} + +impl Debug for ScanningSet +where + T: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + f.debug_set().entries(self.iter()).finish() + } +} + +impl Default for ScanningSet { + fn default() -> Self { + Self { + map: ScanningMap::::default(), + } + } +} + +impl IntoIterator for ScanningSet { + type Item = T; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.map.entries) + } +} + +impl<'a, T> IntoIterator for &'a ScanningSet { + type Item = &'a T; + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl TryFrom> for ScanningSet +where + T: Hash + Eq, +{ + type Error = &'static str; + + fn try_from(payload: Vec) -> std::result::Result { + Ok(Self { + map: ScanningMap::try_from(Vec::from_iter(payload.into_iter().map(|x| (x, ()))))?, + }) + } +} + +impl TryFrom<[T; N]> for ScanningSet +where + T: Hash + Eq, +{ + type Error = &'static str; + + fn try_from(payload: [T; N]) -> std::result::Result { + Ok(Self { + map: ScanningMap::try_from(Vec::from_iter(payload.into_iter().map(|x| (x, ()))))?, + }) + } +} + +impl FromIterator for ScanningSet +where + T: Hash + Eq, +{ + fn from_iter>(iter: I) -> Self { + Self { + map: iter.into_iter().map(|x| (x, ())).collect(), + } + } +} + +impl Set for ScanningSet +where + T: Eq, +{ + type Iterator<'a> = Iter<'a, T> + where + T: 'a; + + fn iter(&self) -> Iter<'_, T> { + self.iter() + } + + fn contains(&self, value: &T) -> bool { + self.contains(value) + } +} + +impl BitOr<&ST> for &ScanningSet +where + T: Hash + Eq + Clone, + ST: Set, +{ + type Output = HashSet; + + fn bitor(self, rhs: &ST) -> Self::Output { + self.union(rhs).cloned().collect() + } +} + +impl BitAnd<&ST> for &ScanningSet +where + T: Hash + Eq + Clone, + ST: Set, +{ + type Output = HashSet; + + fn bitand(self, rhs: &ST) -> Self::Output { + self.intersection(rhs).cloned().collect() + } +} + +impl BitXor<&ST> for &ScanningSet +where + T: Hash + Eq + Clone, + ST: Set, +{ + type Output = HashSet; + + fn bitxor(self, rhs: &ST) -> Self::Output { + self.symmetric_difference(rhs).cloned().collect() + } +} + +impl Sub<&ST> for &ScanningSet +where + T: Hash + Eq + Clone, + ST: Set, +{ + type Output = HashSet; + + fn sub(self, rhs: &ST) -> Self::Output { + self.difference(rhs).cloned().collect() + } +} + +impl PartialEq for ScanningSet +where + T: Hash + Eq, + ST: Set, +{ + fn eq(&self, other: &ST) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter().all(|value| other.contains(value)) + } +} + +impl Eq for ScanningSet where T: Hash + Eq {} diff --git a/frozen-collections-core/src/traits/len.rs b/frozen-collections-core/src/traits/len.rs new file mode 100644 index 0000000..f21d7df --- /dev/null +++ b/frozen-collections-core/src/traits/len.rs @@ -0,0 +1,150 @@ +use std::collections::{BTreeMap, BTreeSet, BinaryHeap, HashMap, HashSet, LinkedList, VecDeque}; +use std::ffi::{CStr, CString, OsStr, OsString}; +use std::rc::Rc; +use std::sync::Arc; + +/// Describes the length of a collection. +pub trait Len { + /// Returns the length of a collection. + fn len(&self) -> usize; + + /// Returns whether a collection is empty. + fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl Len for HashSet { + fn len(&self) -> usize { + self.len() + } +} + +impl Len for HashMap { + fn len(&self) -> usize { + self.len() + } +} + +impl Len for String { + fn len(&self) -> usize { + self.len() + } +} + +impl Len for str { + fn len(&self) -> usize { + self.len() + } +} + +impl Len for CStr { + fn len(&self) -> usize { + self.to_bytes().len() + } +} + +impl Len for CString { + fn len(&self) -> usize { + self.as_bytes().len() + } +} + +impl Len for [T] { + fn len(&self) -> usize { + self.len() + } +} + +impl Len for Box { + fn len(&self) -> usize { + T::len(self) + } +} + +impl Len for Rc { + fn len(&self) -> usize { + T::len(self) + } +} + +impl Len for Arc { + fn len(&self) -> usize { + T::len(self) + } +} + +impl Len for BTreeMap { + fn len(&self) -> usize { + self.len() + } +} + +impl Len for BTreeSet { + fn len(&self) -> usize { + self.len() + } +} + +impl Len for BinaryHeap { + fn len(&self) -> usize { + self.len() + } +} + +impl Len for LinkedList { + fn len(&self) -> usize { + self.len() + } +} + +impl Len for Vec { + fn len(&self) -> usize { + self.len() + } +} + +impl Len for VecDeque { + fn len(&self) -> usize { + self.len() + } +} + +impl Len for OsStr { + fn len(&self) -> usize { + self.len() + } +} + +impl Len for OsString { + fn len(&self) -> usize { + self.as_os_str().len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hashset_len_and_is_empty() { + let mut set = HashSet::new(); + assert_eq!(set.len(), 0); + assert!(set.is_empty()); + + set.insert(1); + assert_eq!(set.len(), 1); + assert!(!set.is_empty()); + } + + #[test] + fn hashmap_len_and_is_empty() { + let mut map = HashMap::new(); + assert_eq!(map.len(), 0); + assert!(map.is_empty()); + + map.insert("key", "value"); + assert_eq!(map.len(), 1); + assert!(!map.is_empty()); + } +} diff --git a/frozen-collections-core/src/traits/mod.rs b/frozen-collections-core/src/traits/mod.rs new file mode 100644 index 0000000..32b5526 --- /dev/null +++ b/frozen-collections-core/src/traits/mod.rs @@ -0,0 +1,9 @@ +//! Traits to support frozen collections. + +pub use crate::traits::len::Len; +pub use crate::traits::range_hash::RangeHash; +pub use crate::traits::set::Set; + +mod len; +mod range_hash; +mod set; diff --git a/frozen-collections-core/src/traits/range_hash.rs b/frozen-collections-core/src/traits/range_hash.rs new file mode 100644 index 0000000..5e7e7f0 --- /dev/null +++ b/frozen-collections-core/src/traits/range_hash.rs @@ -0,0 +1,71 @@ +use std::hash::{BuildHasher, Hasher}; +use std::ops::Range; + +/// Enables hashing over a range of an input. +pub trait RangeHash { + /// Hash a range of a value. + #[must_use] + fn hash_range(&self, bh: &BH, range: Range) -> u64 + where + BH: BuildHasher; +} + +impl RangeHash for String { + #[inline] + fn hash_range(&self, bh: &BH, range: Range) -> u64 + where + BH: BuildHasher, + { + let mut h = bh.build_hasher(); + let b = unsafe { &self.as_bytes().get_unchecked(range) }; + h.write(b); + h.finish() + } +} + +impl RangeHash for str { + #[inline] + fn hash_range(&self, bh: &BH, range: Range) -> u64 + where + BH: BuildHasher, + { + let mut h = bh.build_hasher(); + let b = unsafe { &self.as_bytes().get_unchecked(range) }; + h.write(b); + h.finish() + } +} + +impl RangeHash for [u8] { + #[inline] + fn hash_range(&self, bh: &BH, range: Range) -> u64 + where + BH: BuildHasher, + { + let mut h = bh.build_hasher(); + let b = unsafe { &self.get_unchecked(range) }; + h.write(b); + h.finish() + } +} + +#[cfg(test)] +mod tests { + use std::hash::RandomState; + + use super::*; + + #[test] + fn test_hash_range_for_slice_u8() { + let data = [1, 2, 3, 4, 5]; + let hasher = RandomState::new(); + + let hash_full = data.hash_range(&hasher, 0..5); + let hash_partial = data.hash_range(&hasher, 1..4); + + assert_ne!( + hash_full, hash_partial, + "Hashes should differ for different ranges" + ); + } +} diff --git a/frozen-collections-core/src/traits/set.rs b/frozen-collections-core/src/traits/set.rs new file mode 100644 index 0000000..5f2bcd4 --- /dev/null +++ b/frozen-collections-core/src/traits/set.rs @@ -0,0 +1,145 @@ +use std::collections::hash_set::Iter; +use std::collections::{BTreeSet, HashSet}; +use std::hash::{BuildHasher, Hash}; + +use crate::specialized_sets::{Difference, Intersection, SymmetricDifference, Union}; +use crate::traits::Len; + +/// Common abstractions for sets. +pub trait Set: Len { + type Iterator<'a>: Iterator + where + Self: 'a, + T: 'a; + + /// An iterator visiting all elements in arbitrary order. + /// The iterator element type is `&'a T`. + fn iter(&self) -> Self::Iterator<'_>; + + /// Checks whether a particular value is present in the set. + fn contains(&self, value: &T) -> bool; + + /// Visits the values representing the union, + /// i.e., all the values in `self` or `other`, without duplicates. + fn union<'a, ST>(&'a self, other: &'a ST) -> Union<'a, Self, ST, T> + where + ST: Set, + Self: Sized, + { + Union::new(self, other) + } + + /// Visits the values representing the symmetric difference, + /// i.e., the values that are in `self` or in `other` but not in both. + fn symmetric_difference<'a, ST>(&'a self, other: &'a ST) -> SymmetricDifference<'a, Self, ST, T> + where + ST: Set, + Self: Sized, + { + SymmetricDifference::new(self, other) + } + + /// Visits the values representing the difference, + /// i.e., the values that are in `self` but not in `other`. + fn difference<'a, ST>(&'a self, other: &'a ST) -> Difference<'a, Self, ST, T> + where + ST: Set, + Self: Sized, + { + Difference::new(self, other) + } + + /// Visits the values representing the intersection, + /// i.e., the values that are both in `self` and `other`. + /// + /// When an equal element is present in `self` and `other` + /// then the resulting `Intersection` may yield references to + /// one or the other. This can be relevant if `T` contains fields which + /// are not compared by its `Eq` implementation, and may hold different + /// value between the two equal copies of `T` in the two sets. + fn intersection<'a, ST>(&'a self, other: &'a ST) -> Intersection<'a, Self, ST, T> + where + ST: Set, + Self: Sized, + { + Intersection::new(self, other) + } + + /// Returns `true` if `self` has no elements in common with `other`. + /// This is equivalent to checking for an empty intersection. + fn is_disjoint<'a, ST>(&'a self, other: &'a ST) -> bool + where + ST: Set, + Self: Sized, + { + if self.len() <= self.len() { + self.iter().all(|v| !other.contains(v)) + } else { + other.iter().all(|v| !self.contains(v)) + } + } + + /// Returns `true` if the set is a subset of another, + /// i.e., `other` contains at least all the values in `self`. + fn is_subset<'a, ST>(&'a self, other: &'a ST) -> bool + where + ST: Set, + Self: Sized, + { + if self.len() <= other.len() { + self.iter().all(|v| other.contains(v)) + } else { + false + } + } + + /// Returns `true` if the set is a superset of another, + /// i.e., `self` contains at least all the values in `other`. + fn is_superset<'a, ST>(&'a self, other: &'a ST) -> bool + where + ST: Set, + Self: Sized, + { + if other.len() <= self.len() { + other.iter().all(|v| self.contains(v)) + } else { + false + } + } +} + +impl Set for HashSet +where + T: Hash + Eq, + BH: BuildHasher, +{ + type Iterator<'a> = Iter<'a, T> + where + T: 'a, + BH: 'a; + + fn iter(&self) -> Self::Iterator<'_> { + Self::iter(self) + } + + fn contains(&self, value: &T) -> bool { + Self::contains(self, value) + } +} + +impl Set for BTreeSet +where + T: Ord, +{ + type Iterator<'a> = std::collections::btree_set::Iter<'a, T> + where + T: 'a; + + fn iter(&self) -> Self::Iterator<'_> { + Self::iter(self) + } + + fn contains(&self, value: &T) -> bool { + Self::contains(self, value) + } +} diff --git a/frozen-collections-macros/Cargo.toml b/frozen-collections-macros/Cargo.toml new file mode 100644 index 0000000..6b92698 --- /dev/null +++ b/frozen-collections-macros/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "frozen-collections-macros" +description = "Macros to support frozen collections" +readme = "README.md" +authors.workspace = true +version.workspace = true +edition.workspace = true +categories.workspace = true +keywords.workspace = true +repository.workspace = true +license.workspace = true + +[lib] +name = "frozen_collections_macros" +path = "src/lib.rs" +proc-macro = true + +[dependencies] +syn = "2.0.67" +quote = "1.0.36" +proc-macro2 = "1.0.85" +bitvec = "1.0.1" +proc-macro-error = "1.0.4" + +[dependencies.frozen-collections-core] +path = "../frozen-collections-core" + +[lints] +workspace = true diff --git a/frozen-collections-macros/README.md b/frozen-collections-macros/README.md new file mode 100644 index 0000000..bfcd1e5 --- /dev/null +++ b/frozen-collections-macros/README.md @@ -0,0 +1,6 @@ +# frozen-collections-macros + +This crate contains the procedural macros for the +frozen-collections crate. Users of frozen collections +should generally depend on the frozen-collections crate +instead of this one. diff --git a/frozen-collections-macros/src/lib.rs b/frozen-collections-macros/src/lib.rs new file mode 100644 index 0000000..0c86949 --- /dev/null +++ b/frozen-collections-macros/src/lib.rs @@ -0,0 +1,23 @@ +//! Implementation crate for the frozen collections. +//! +//! Application code should generally not interact with +//! this crate. Please use +//! the `frozen-collections` crate instead. + +use proc_macro::TokenStream; + +use proc_macro_error::proc_macro_error; + +use frozen_collections_core::macros::{frozen_map_macro, frozen_set_macro}; + +#[proc_macro] +#[proc_macro_error] +pub fn frozen_map(item: TokenStream) -> TokenStream { + frozen_map_macro(item.into()).into() +} + +#[proc_macro] +#[proc_macro_error] +pub fn frozen_set(item: TokenStream) -> TokenStream { + frozen_set_macro(item.into()).into() +} diff --git a/frozen-collections/Cargo.toml b/frozen-collections/Cargo.toml new file mode 100644 index 0000000..db575a9 --- /dev/null +++ b/frozen-collections/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "frozen-collections" +description = "Fast read-only collections." +readme.workspace = true +authors.workspace = true +version.workspace = true +edition.workspace = true +categories.workspace = true +keywords.workspace = true +repository.workspace = true +license.workspace = true + +[dependencies] +num-traits = "0.2.19" +bitvec = "1.0.1" +nameof = "1.2.2" + +[dependencies.frozen-collections-macros] +path = "../frozen-collections-macros" + +[dependencies.frozen-collections-core] +path = "../frozen-collections-core" + +[lints] +workspace = true diff --git a/frozen-collections/README.md b/frozen-collections/README.md new file mode 100644 index 0000000..6c79adb --- /dev/null +++ b/frozen-collections/README.md @@ -0,0 +1,3 @@ +# frozen-collections + +This crate contains the API surface for frozen collections. diff --git a/frozen-collections/src/frozen_map.rs b/frozen-collections/src/frozen_map.rs new file mode 100644 index 0000000..0d66740 --- /dev/null +++ b/frozen-collections/src/frozen_map.rs @@ -0,0 +1,971 @@ +use std::any::type_name; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::RandomState; +use std::hash::{BuildHasher, Hash}; +use std::mem::transmute; +use std::mem::MaybeUninit; +use std::ops::Index; +use std::ops::IndexMut; + +use bitvec::macros::internal::funty::Fundamental; + +use frozen_collections_core::analyzers::{ + analyze_int_keys, analyze_slice_keys, IntKeyAnalysisResult, SliceKeyAnalysisResult, +}; + +use crate::specialized_maps::*; +use crate::Len; + +/// The different implementations available for use, depending on the type and content of the payload. +#[derive(Clone)] +enum MapTypes { + Scanning(ScanningMap), + + CommonSmall(CommonMap), + CommonLarge(CommonMap), + + U32Small(IntegerMap), + U32Large(IntegerMap), + + U32Range(IntegerRangeMap), + + LeftStringSliceSmall(LeftSliceMap), + LeftStringSliceLarge(LeftSliceMap), + + RightStringSliceSmall(RightSliceMap), + RightStringSliceLarge(RightSliceMap), + + StringLengthSmall(LengthMap), +} + +/// A map optimized for fast read access. +/// +/// A frozen map differs from the traditional [`HashMap`](std::collections::HashMap) type in three key ways. First, creating +/// a mew frozen map can take a relatively long time, especially for very large maps. Second, +/// once created, the keys in frozen maps are immutable. And third, probing a frozen map is +/// typically considerably faster, which is the whole point. +/// +/// The reason creating a frozen map can take some time is due to the extensive analysis that is +/// performed on the map's keys in order to determine the best implementation strategy and data +/// layout to use. This analysis is what enables frozen maps to be faster later when +/// reading from the map. +/// +/// Frozen maps are intended for long-lived maps, where the cost of creating the map is made up +/// over time by the faster read performance. +/// +/// A `FrozenMap` requires that the elements +/// implement the [`Eq`] and [`Hash`] traits. This can frequently be achieved by +/// using `#[derive(PartialEq, Eq, Hash)]`. If you implement these yourself, +/// it is important that the following property holds: +/// +/// ```text +/// k1 == k2 -> hash(k1) == hash(k2) +/// ``` +/// +/// In other words, if two keys are equal, their hashes must be equal. +/// Violating this property is a logic error. +/// +/// It is also a logic error for a key to be modified in such a way that the key's +/// hash, as determined by the [`Hash`] trait, or its equality, as determined by +/// the [`Eq`] trait, changes while it is in the map. This is normally only +/// possible through [`Cell`], [`RefCell`], global state, I/O, or unsafe code. +/// +/// The behavior resulting from either logic error is not specified, but will +/// be encapsulated to the `FrozenMap` that observed the logic error and not +/// result in undefined behavior. This could include panics, incorrect results, +/// aborts, memory leaks, and non-termination. +/// +/// # Macros are Faster +/// +/// If all your keys are known at compile time, you are much better off using the +/// [`frozen_map!`](crate::frozen_map!) macro rather than this type. This will result in considerably +/// better performance. +/// +/// # Implementation Limits +/// +/// Although frozen maps are always faster when reading than traditional hash maps, there are some +/// caveats to be aware of: +/// +/// - [`FrozenMap`] has optimized implementations for the case where the keys are +/// of type [`u32`], but not any other integer types. This limitation doesn't exist +/// for the [`frozen_map!`](crate::frozen_map!) macro. +/// +/// - [`FrozenMap`] has optimized implementations for the case where the keys are +/// of type [`String`], but not for the type `&str`. You will generally get considerably faster +/// performance using [`String`]. +/// +/// # Examples +/// +/// ``` +/// # use frozen_collections::FrozenMap; +/// # use frozen_collections::Len; +/// # +/// // Type inference lets us omit an explicit type signature (which +/// // would be `FrozenMap` in this example). +/// let book_reviews = FrozenMap::try_from([ +/// ("Adventures of Huckleberry Finn".to_string(), "My favorite book.".to_string()), +/// ("Grimms' Fairy Tales".to_string(), "Masterpiece.".to_string()), +/// ("Pride and Prejudice".to_string(), "Very enjoyable.".to_string()), +/// ("The Adventures of Sherlock Holmes".to_string(), "I liked it a lot.".to_string()), +/// ]).unwrap(); +/// +/// // Check for a specific one. +/// if !book_reviews.contains_key(&"Les Misérables".to_string()) { +/// println!("We've got {} reviews, but Les Misérables ain't one.", +/// book_reviews.len()); +/// } +/// +/// // Look up the values associated with some keys. +/// let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"]; +/// for &book in &to_find { +/// match book_reviews.get(&book.to_string()) { +/// Some(review) => println!("{book}: {review}"), +/// None => println!("{book} is unreviewed.") +/// } +/// } +/// +/// // Look up the value for a key (will panic if the key is not found). +/// println!("Review for Jane: {}", book_reviews[&"Pride and Prejudice".to_string()]); +/// +/// // Iterate over everything. +/// for (book, review) in &book_reviews { +/// println!("{book}: \"{review}\""); +/// } +/// ``` +/// +/// The easiest way to use `FrozenMap` with a custom key type is to derive [`Eq`] and [`Hash`]. +/// We must also derive [`PartialEq`]. +/// +/// [`RefCell`]: std::cell::RefCell +/// [`Cell`]: std::cell::Cell +/// [`default`]: Default::default +/// [`with_hasher`]: Self::with_hasher +/// +/// ``` +/// # use frozen_collections::FrozenMap; +/// # +/// #[derive(Hash, Eq, PartialEq, Debug)] +/// struct Viking { +/// name: String, +/// country: String, +/// } +/// +/// impl Viking { +/// /// Creates a new Viking. +/// fn new(name: &str, country: &str) -> Viking { +/// Viking { name: name.to_string(), country: country.to_string() } +/// } +/// } +/// +/// // Use a FrozenMap to store the vikings' health points. +/// let vikings = FrozenMap::try_from([ +/// (Viking::new("Einar", "Norway"), 25), +/// (Viking::new("Olaf", "Denmark"), 24), +/// (Viking::new("Harald", "Iceland"), 12), +/// ]).unwrap(); +/// +/// // Use derived implementation to print the status of the vikings. +/// for (viking, health) in &vikings { +/// println!("{viking:?} has {health} hp"); +/// } +/// ``` +#[derive(Clone)] +#[allow(clippy::module_name_repetitions)] +pub struct FrozenMap { + map_impl: MapTypes, +} + +impl FrozenMap +where + K: Hash + Eq, +{ + /// Creates a frozen map. + /// + /// # Errors + /// + /// This fails if there are duplicate keys within the vector. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # use std::hash::RandomState; + /// # + /// let map = FrozenMap::new(vec![(1, 2), (3, 4)]).unwrap(); + /// ``` + pub fn new(payload: Vec<(K, V)>) -> std::result::Result { + Self::with_hasher(payload, RandomState::new()) + } +} + +impl FrozenMap +where + K: Hash + Eq, + BH: BuildHasher, +{ + /// Creates a frozen map which will use the given hash builder to hash + /// keys. + /// + /// # Errors + /// + /// This fails if there are duplicate keys within the vector. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # use std::hash::RandomState; + /// # + /// let map = FrozenMap::with_hasher(vec![(1, 2), (3, 4)], RandomState::new()).unwrap(); + /// ``` + pub fn with_hasher(payload: Vec<(K, V)>, bh: BH) -> std::result::Result { + Ok(Self { + map_impl: if payload.len() < 4 { + MapTypes::Scanning(ScanningMap::try_from(payload)?) + } else if type_name::() == type_name::() { + Self::new_u32_map(payload)? + } else if type_name::() == type_name::() { + Self::new_string_map(payload, bh)? + } else { + Self::new_common_map(payload, bh)? + }, + }) + } + + #[allow(clippy::transmute_undefined_repr)] + fn new_u32_map(payload: Vec<(K, V)>) -> std::result::Result, &'static str> { + let payload: Vec<(u32, V)> = unsafe { transmute(payload) }; + + let key_analysis = analyze_int_keys(payload.iter().map(|x| x.0)); + + match key_analysis { + IntKeyAnalysisResult::Range => { + Ok(MapTypes::U32Range(IntegerRangeMap::try_from(payload)?)) + } + IntKeyAnalysisResult::Normal => { + if payload.len() <= u8::MAX.as_usize() { + Ok(MapTypes::U32Small(IntegerMap::try_from(payload)?)) + } else { + Ok(MapTypes::U32Large(IntegerMap::try_from(payload)?)) + } + } + } + } + + #[allow(clippy::transmute_undefined_repr)] + fn new_string_map( + payload: Vec<(K, V)>, + bh: BH, + ) -> std::result::Result, &'static str> { + let payload: Vec<(String, V)> = unsafe { transmute(payload) }; + + let key_analysis = analyze_slice_keys(payload.iter().map(|x| x.0.as_bytes()), &bh); + + if payload.len() <= u8::MAX.as_usize() { + match key_analysis { + SliceKeyAnalysisResult::Normal => Ok(MapTypes::CommonSmall( + CommonMap::with_hasher(unsafe { transmute(payload) }, bh)?, + )), + + SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index, + subslice_len, + } => Ok(MapTypes::LeftStringSliceSmall(LeftSliceMap::with_hasher( + payload, + subslice_index..subslice_index + subslice_len, + bh, + )?)), + + SliceKeyAnalysisResult::RightHandSubslice { + subslice_index, + subslice_len, + } => Ok(MapTypes::RightStringSliceSmall(RightSliceMap::with_hasher( + payload, + subslice_index..subslice_index + subslice_len, + bh, + )?)), + + SliceKeyAnalysisResult::Length => { + Ok(MapTypes::StringLengthSmall(LengthMap::try_from(payload)?)) + } + } + } else { + match key_analysis { + SliceKeyAnalysisResult::Length | SliceKeyAnalysisResult::Normal => { + Ok(MapTypes::CommonLarge(CommonMap::with_hasher( + unsafe { transmute(payload) }, + bh, + )?)) + } + + SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index, + subslice_len, + } => Ok(MapTypes::LeftStringSliceLarge(LeftSliceMap::with_hasher( + payload, + subslice_index..subslice_index + subslice_len, + bh, + )?)), + + SliceKeyAnalysisResult::RightHandSubslice { + subslice_index, + subslice_len, + } => Ok(MapTypes::RightStringSliceLarge(RightSliceMap::with_hasher( + payload, + subslice_index..subslice_index + subslice_len, + bh, + )?)), + } + } + } + + fn new_common_map( + payload: Vec<(K, V)>, + bh: BH, + ) -> std::result::Result, &'static str> { + if payload.len() <= u8::MAX.as_usize() { + Ok(MapTypes::CommonSmall(CommonMap::with_hasher(payload, bh)?)) + } else { + Ok(MapTypes::CommonLarge(CommonMap::with_hasher(payload, bh)?)) + } + } + + /// Returns a reference to the value corresponding to the key. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # + /// let map = FrozenMap::try_from([(1, "a".to_string())]).unwrap(); + /// assert_eq!(map.get(&1), Some(&"a".to_string())); + /// assert_eq!(map.get(&2), None); + /// ``` + #[inline] + pub fn get(&self, key: &K) -> Option<&V> { + match &self.map_impl { + MapTypes::Scanning(m) => m.get(key), + MapTypes::CommonSmall(m) => m.get(key), + MapTypes::CommonLarge(m) => m.get(key), + MapTypes::U32Small(m) => m.get(unsafe { transmute(key) }), + MapTypes::U32Large(m) => m.get(unsafe { transmute(key) }), + MapTypes::U32Range(m) => m.get(unsafe { transmute(key) }), + MapTypes::LeftStringSliceSmall(m) => { + let k: &String = unsafe { transmute(key) }; + m.get(k) + } + MapTypes::LeftStringSliceLarge(m) => { + let k: &String = unsafe { transmute(key) }; + m.get(k) + } + MapTypes::RightStringSliceSmall(m) => { + let k: &String = unsafe { transmute(key) }; + m.get(k) + } + MapTypes::RightStringSliceLarge(m) => { + let k: &String = unsafe { transmute(key) }; + m.get(k) + } + MapTypes::StringLengthSmall(m) => { + let k: &String = unsafe { transmute(key) }; + m.get(k) + } + } + } + + /// Returns the key-value pair corresponding to the supplied key. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # + /// let map = FrozenMap::try_from([(1, "a".to_string())]).unwrap(); + /// assert_eq!(map.get_key_value(&1), Some((&1, &"a".to_string()))); + /// assert_eq!(map.get_key_value(&2), None); + /// ``` + #[inline] + pub fn get_key_value(&self, key: &K) -> Option<(&K, &V)> { + match &self.map_impl { + MapTypes::Scanning(m) => m.get_key_value(key), + MapTypes::CommonSmall(m) => m.get_key_value(key), + MapTypes::CommonLarge(m) => m.get_key_value(key), + MapTypes::U32Small(m) => unsafe { transmute(m.get_key_value(transmute(key))) }, + MapTypes::U32Large(m) => unsafe { transmute(m.get_key_value(transmute(key))) }, + MapTypes::U32Range(m) => unsafe { transmute(m.get_key_value(transmute(key))) }, + MapTypes::LeftStringSliceSmall(m) => unsafe { + let k: &String = transmute(key); + transmute(m.get_key_value(k)) + }, + MapTypes::LeftStringSliceLarge(m) => unsafe { + let k: &String = transmute(key); + transmute(m.get_key_value(k)) + }, + MapTypes::RightStringSliceSmall(m) => unsafe { + let k: &String = transmute(key); + transmute(m.get_key_value(k)) + }, + MapTypes::RightStringSliceLarge(m) => unsafe { + let k: &String = transmute(key); + transmute(m.get_key_value(k)) + }, + MapTypes::StringLengthSmall(m) => unsafe { + let k: &String = transmute(key); + transmute(m.get_key_value(k)) + }, + } + } + + /// Returns a mutable reference to the value corresponding to the key. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # + /// let mut map = FrozenMap::try_from([(1, "a".to_string())]).unwrap(); + /// assert_eq!(map.get_mut(&1), Some(&mut "a".to_string())); + /// assert_eq!(map.get_mut(&2), None); + /// ``` + #[inline] + pub fn get_mut(&mut self, key: &K) -> Option<&mut V> { + match &mut self.map_impl { + MapTypes::Scanning(m) => m.get_mut(key), + MapTypes::CommonSmall(m) => m.get_mut(key), + MapTypes::CommonLarge(m) => m.get_mut(key), + MapTypes::U32Small(m) => m.get_mut(unsafe { transmute(key) }), + MapTypes::U32Large(m) => m.get_mut(unsafe { transmute(key) }), + MapTypes::U32Range(m) => { + let k = unsafe { transmute(key) }; + m.get_mut(k) + } + MapTypes::LeftStringSliceSmall(m) => { + let k: &String = unsafe { transmute(key) }; + m.get_mut(k) + } + MapTypes::LeftStringSliceLarge(m) => { + let k: &String = unsafe { transmute(key) }; + m.get_mut(k) + } + MapTypes::RightStringSliceSmall(m) => { + let k: &String = unsafe { transmute(key) }; + m.get_mut(k) + } + MapTypes::RightStringSliceLarge(m) => { + let k: &String = unsafe { transmute(key) }; + m.get_mut(k) + } + MapTypes::StringLengthSmall(m) => { + let k: &String = unsafe { transmute(key) }; + m.get_mut(k) + } + } + } + + /// Attempts to get mutable references to `N` values in the map at once. + /// + /// Returns an array of length `N` with the results of each query. For soundness, at most one + /// mutable reference will be returned to any value. `None` will be returned if any of the + /// keys are duplicates or missing. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # + /// let mut libraries = FrozenMap::try_from([ + /// ("Bodleian Library".to_string(), 1602), + /// ("Athenæum".to_string(), 1807), + /// ("Herzogin-Anna-Amalia-Bibliothek".to_string(), 1691), + /// ("Library of Congress".to_string(), 1800) + /// ]).unwrap(); + /// + /// let got = libraries.get_many_mut([ + /// &"Athenæum".to_string(), + /// &"Library of Congress".to_string(), + /// ]); + /// assert_eq!( + /// got, + /// Some([ + /// &mut 1807, + /// &mut 1800, + /// ]), + /// ); + /// + /// // Missing keys result in None + /// let got = libraries.get_many_mut([ + /// &"Athenæum".to_string(), + /// &"New York Public Library".to_string(), + /// ]); + /// assert_eq!(got, None); + /// + /// // Duplicate keys result in None + /// let got = libraries.get_many_mut([ + /// &"Athenæum".to_string(), + /// &"Athenæum".to_string(), + /// ]); + /// assert_eq!(got, None); + /// ``` + #[allow(mutable_transmutes)] + pub fn get_many_mut(&mut self, keys: [&K; N]) -> Option<[&mut V; N]> { + // ensure key uniqueness (assumes "keys" is a relatively small array) + for i in 0..keys.len() { + for j in 0..i { + if keys[j].eq(keys[i]) { + return None; + } + } + } + + unsafe { + let mut result: MaybeUninit<[&mut V; N]> = MaybeUninit::uninit(); + let p = result.as_mut_ptr(); + + for (i, key) in keys.iter().enumerate() { + *(*p).get_unchecked_mut(i) = transmute(self.get(key)?); + } + + Some(result.assume_init()) + } + } + + /// Returns `true` if the map contains a value for the specified key. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # + /// let map = FrozenMap::try_from([(1, "a".to_string())]).unwrap(); + /// + /// assert_eq!(map.contains_key(&1), true); + /// assert_eq!(map.contains_key(&2), false); + /// ``` + pub fn contains_key(&self, key: &K) -> bool { + self.get(key).is_some() + } + + /// An iterator visiting all key-value pairs in arbitrary order. + /// The iterator element type is `(&'a K, &'a V)`. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # + /// let map = FrozenMap::try_from([ + /// ("a".to_string(), 1), + /// ("b".to_string(), 2), + /// ("c".to_string(), 3), + /// ]).unwrap(); + /// + /// for (key, val) in map.iter() { + /// println!("key: {key} val: {val}"); + /// } + /// ``` + pub const fn iter(&self) -> Iter { + match &self.map_impl { + MapTypes::Scanning(m) => m.iter(), + MapTypes::CommonSmall(m) => m.iter(), + MapTypes::CommonLarge(m) => m.iter(), + MapTypes::U32Small(m) => unsafe { transmute(m.iter()) }, + MapTypes::U32Large(m) => unsafe { transmute(m.iter()) }, + MapTypes::U32Range(m) => unsafe { transmute(m.iter()) }, + MapTypes::LeftStringSliceSmall(m) => unsafe { transmute(m.iter()) }, + MapTypes::LeftStringSliceLarge(m) => unsafe { transmute(m.iter()) }, + MapTypes::RightStringSliceSmall(m) => unsafe { transmute(m.iter()) }, + MapTypes::RightStringSliceLarge(m) => unsafe { transmute(m.iter()) }, + MapTypes::StringLengthSmall(m) => unsafe { transmute(m.iter()) }, + } + } + + /// An iterator visiting all keys in arbitrary order. + /// The iterator element type is `&'a K`. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # + /// let map = FrozenMap::try_from([ + /// ("a".to_string(), 1), + /// ("b".to_string(), 2), + /// ("c".to_string(), 3), + /// ]).unwrap(); + /// + /// for key in map.keys() { + /// println!("{key}"); + /// } + /// ``` + pub const fn keys(&self) -> Keys { + match &self.map_impl { + MapTypes::Scanning(m) => m.keys(), + MapTypes::CommonSmall(m) => m.keys(), + MapTypes::CommonLarge(m) => m.keys(), + MapTypes::U32Small(m) => unsafe { transmute(m.keys()) }, + MapTypes::U32Large(m) => unsafe { transmute(m.keys()) }, + MapTypes::U32Range(m) => unsafe { transmute(m.keys()) }, + MapTypes::LeftStringSliceSmall(m) => unsafe { transmute(m.keys()) }, + MapTypes::LeftStringSliceLarge(m) => unsafe { transmute(m.keys()) }, + MapTypes::RightStringSliceSmall(m) => unsafe { transmute(m.keys()) }, + MapTypes::RightStringSliceLarge(m) => unsafe { transmute(m.keys()) }, + MapTypes::StringLengthSmall(m) => unsafe { transmute(m.keys()) }, + } + } + + /// An iterator visiting all values in arbitrary order. + /// The iterator element type is `&'a V`. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # + /// let map = FrozenMap::try_from([ + /// ("a".to_string(), 1), + /// ("b".to_string(), 2), + /// ("c".to_string(), 3), + /// ]).unwrap(); + /// + /// for val in map.values() { + /// println!("{val}"); + /// } + /// ``` + pub const fn values(&self) -> Values { + match &self.map_impl { + MapTypes::Scanning(m) => m.values(), + MapTypes::CommonSmall(m) => m.values(), + MapTypes::CommonLarge(m) => m.values(), + MapTypes::U32Small(m) => unsafe { transmute(m.values()) }, + MapTypes::U32Large(m) => unsafe { transmute(m.values()) }, + MapTypes::U32Range(m) => unsafe { transmute(m.values()) }, + MapTypes::LeftStringSliceSmall(m) => unsafe { transmute(m.values()) }, + MapTypes::LeftStringSliceLarge(m) => unsafe { transmute(m.values()) }, + MapTypes::RightStringSliceSmall(m) => unsafe { transmute(m.values()) }, + MapTypes::RightStringSliceLarge(m) => unsafe { transmute(m.values()) }, + MapTypes::StringLengthSmall(m) => unsafe { transmute(m.values()) }, + } + } + + /// A consuming iterator visiting all keys in arbitrary order. + /// The iterator element type is `&'a K`. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # + /// let map = FrozenMap::try_from([ + /// ("a".to_string(), 1), + /// ("b".to_string(), 2), + /// ("c".to_string(), 3), + /// ]).unwrap(); + /// + /// for key in map.into_keys() { + /// println!("{key}"); + /// } + /// ``` + #[allow(clippy::transmute_undefined_repr)] + pub fn into_keys(self) -> IntoKeys { + match self.map_impl { + MapTypes::Scanning(m) => m.into_keys(), + MapTypes::CommonSmall(m) => m.into_keys(), + MapTypes::CommonLarge(m) => m.into_keys(), + MapTypes::U32Small(m) => unsafe { transmute(m.into_keys()) }, + MapTypes::U32Large(m) => unsafe { transmute(m.into_keys()) }, + MapTypes::U32Range(m) => unsafe { transmute(m.into_keys()) }, + MapTypes::LeftStringSliceSmall(m) => unsafe { transmute(m.into_keys()) }, + MapTypes::LeftStringSliceLarge(m) => unsafe { transmute(m.into_keys()) }, + MapTypes::RightStringSliceSmall(m) => unsafe { transmute(m.into_keys()) }, + MapTypes::RightStringSliceLarge(m) => unsafe { transmute(m.into_keys()) }, + MapTypes::StringLengthSmall(m) => unsafe { transmute(m.into_keys()) }, + } + } + + /// A consuming iterator visiting all values in arbitrary order. + /// The iterator element type is `&'a V`. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # + /// let map = FrozenMap::try_from([ + /// ("a".to_string(), 1), + /// ("b".to_string(), 2), + /// ("c".to_string(), 3), + /// ]).unwrap(); + /// + /// for val in map.into_values() { + /// println!("{val}"); + /// } + /// ``` + #[allow(clippy::transmute_undefined_repr)] + pub fn into_values(self) -> IntoValues { + match self.map_impl { + MapTypes::Scanning(m) => m.into_values(), + MapTypes::CommonSmall(m) => m.into_values(), + MapTypes::CommonLarge(m) => m.into_values(), + MapTypes::U32Small(m) => unsafe { transmute(m.into_values()) }, + MapTypes::U32Large(m) => unsafe { transmute(m.into_values()) }, + MapTypes::U32Range(m) => unsafe { transmute(m.into_values()) }, + MapTypes::LeftStringSliceSmall(m) => unsafe { transmute(m.into_values()) }, + MapTypes::LeftStringSliceLarge(m) => unsafe { transmute(m.into_values()) }, + MapTypes::RightStringSliceSmall(m) => unsafe { transmute(m.into_values()) }, + MapTypes::RightStringSliceLarge(m) => unsafe { transmute(m.into_values()) }, + MapTypes::StringLengthSmall(m) => unsafe { transmute(m.into_values()) }, + } + } + + /// An iterator producing mutable references to all entries in arbitrary order. + /// The iterator element type is `&'a V`. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # + /// let mut map = FrozenMap::try_from([ + /// ("a".to_string(), 1), + /// ("b".to_string(), 2), + /// ("c".to_string(), 3), + /// ]).unwrap(); + /// + /// // update all values + /// for (_, val) in map.iter_mut() { + /// *val *= 2; + /// } + /// ``` + #[allow(clippy::transmute_undefined_repr)] + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { + match &mut self.map_impl { + MapTypes::Scanning(m) => m.iter_mut(), + MapTypes::CommonSmall(m) => m.iter_mut(), + MapTypes::CommonLarge(m) => m.iter_mut(), + MapTypes::U32Small(m) => unsafe { transmute(m.iter_mut()) }, + MapTypes::U32Large(m) => unsafe { transmute(m.iter_mut()) }, + MapTypes::U32Range(m) => unsafe { transmute(m.iter_mut()) }, + MapTypes::LeftStringSliceSmall(m) => unsafe { transmute(m.iter_mut()) }, + MapTypes::LeftStringSliceLarge(m) => unsafe { transmute(m.iter_mut()) }, + MapTypes::RightStringSliceSmall(m) => unsafe { transmute(m.iter_mut()) }, + MapTypes::RightStringSliceLarge(m) => unsafe { transmute(m.iter_mut()) }, + MapTypes::StringLengthSmall(m) => unsafe { transmute(m.iter_mut()) }, + } + } + + /// An iterator visiting all values mutably in arbitrary order. The iterator element type is `&'a mut V`. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenMap; + /// # + /// let mut map = FrozenMap::try_from([ + /// ("a", 1), + /// ("b", 2), + /// ("c", 3), + /// ]).unwrap(); + /// + /// for val in map.values_mut() { + /// *val = *val + 10; + /// } + /// ``` + #[allow(clippy::transmute_undefined_repr)] + pub fn values_mut(&mut self) -> ValuesMut<'_, K, V> { + match &mut self.map_impl { + MapTypes::Scanning(m) => m.values_mut(), + MapTypes::CommonSmall(m) => m.values_mut(), + MapTypes::CommonLarge(m) => m.values_mut(), + MapTypes::U32Small(m) => unsafe { transmute(m.values_mut()) }, + MapTypes::U32Large(m) => unsafe { transmute(m.values_mut()) }, + MapTypes::U32Range(m) => unsafe { transmute(m.values_mut()) }, + MapTypes::LeftStringSliceSmall(m) => unsafe { transmute(m.values_mut()) }, + MapTypes::LeftStringSliceLarge(m) => unsafe { transmute(m.values_mut()) }, + MapTypes::RightStringSliceSmall(m) => unsafe { transmute(m.values_mut()) }, + MapTypes::RightStringSliceLarge(m) => unsafe { transmute(m.values_mut()) }, + MapTypes::StringLengthSmall(m) => unsafe { transmute(m.values_mut()) }, + } + } +} + +impl Len for FrozenMap { + fn len(&self) -> usize { + match &self.map_impl { + MapTypes::Scanning(m) => m.len(), + MapTypes::CommonSmall(m) => m.len(), + MapTypes::CommonLarge(m) => m.len(), + MapTypes::U32Small(m) => m.len(), + MapTypes::U32Large(m) => m.len(), + MapTypes::U32Range(m) => m.len(), + MapTypes::LeftStringSliceSmall(m) => m.len(), + MapTypes::LeftStringSliceLarge(m) => m.len(), + MapTypes::RightStringSliceSmall(m) => m.len(), + MapTypes::RightStringSliceLarge(m) => m.len(), + MapTypes::StringLengthSmall(m) => m.len(), + } + } +} + +impl TryFrom> for FrozenMap +where + K: Hash + Eq, +{ + type Error = &'static str; + + fn try_from(payload: Vec<(K, V)>) -> std::result::Result { + Self::new(payload) + } +} + +impl TryFrom<[(K, V); N]> for FrozenMap +where + K: Hash + Eq, +{ + type Error = &'static str; + + fn try_from(payload: [(K, V); N]) -> std::result::Result { + Self::new(Vec::from_iter(payload)) + } +} + +impl FromIterator<(K, V)> for FrozenMap +where + K: Hash + Eq, +{ + fn from_iter>(iter: T) -> Self { + Self::new(Vec::from_iter(iter)).unwrap() + } +} + +impl Index<&K> for FrozenMap +where + K: Hash + Eq, + BH: BuildHasher, +{ + type Output = V; + + fn index(&self, index: &K) -> &Self::Output { + self.get(index).unwrap() + } +} + +impl IndexMut<&K> for FrozenMap +where + K: Hash + Eq, + BH: BuildHasher, +{ + fn index_mut(&mut self, index: &K) -> &mut V { + self.get_mut(index).unwrap() + } +} + +impl Default for FrozenMap +where + K: Hash + Eq + Default, + V: Default, + BH: BuildHasher + Default, +{ + fn default() -> Self { + Self { + map_impl: MapTypes::Scanning(ScanningMap::::default()), + } + } +} + +impl Debug for FrozenMap +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + match &self.map_impl { + MapTypes::Scanning(m) => m.fmt(f), + MapTypes::CommonSmall(m) => m.fmt(f), + MapTypes::CommonLarge(m) => m.fmt(f), + MapTypes::U32Small(m) => m.fmt(f), + MapTypes::U32Large(m) => m.fmt(f), + MapTypes::U32Range(m) => m.fmt(f), + MapTypes::LeftStringSliceSmall(m) => m.fmt(f), + MapTypes::LeftStringSliceLarge(m) => m.fmt(f), + MapTypes::RightStringSliceSmall(m) => m.fmt(f), + MapTypes::RightStringSliceLarge(m) => m.fmt(f), + MapTypes::StringLengthSmall(m) => m.fmt(f), + } + } +} + +impl PartialEq for FrozenMap +where + K: Hash + Eq, + V: PartialEq, + BH: BuildHasher, +{ + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter() + .all(|(key, value)| other.get(key).map_or(false, |v| *value == *v)) + } +} + +impl Eq for FrozenMap +where + K: Hash + Eq, + V: Eq, + BH: BuildHasher, +{ +} + +impl<'a, K, V, BH> IntoIterator for &'a FrozenMap +where + K: Hash + Eq, + BH: BuildHasher, +{ + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, K, V, BH> IntoIterator for &'a mut FrozenMap +where + K: Hash + Eq, + BH: BuildHasher, +{ + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +impl IntoIterator for FrozenMap +where + K: Hash + Eq, + BH: BuildHasher, +{ + type Item = (K, V); + type IntoIter = IntoIter; + + #[allow(clippy::transmute_undefined_repr)] + fn into_iter(self) -> Self::IntoIter { + match self.map_impl { + MapTypes::Scanning(m) => m.into_iter(), + MapTypes::CommonSmall(m) => m.into_iter(), + MapTypes::CommonLarge(m) => m.into_iter(), + MapTypes::U32Small(m) => unsafe { transmute(m.into_iter()) }, + MapTypes::U32Large(m) => unsafe { transmute(m.into_iter()) }, + MapTypes::U32Range(m) => unsafe { transmute(m.into_iter()) }, + MapTypes::LeftStringSliceSmall(m) => unsafe { transmute(m.into_iter()) }, + MapTypes::LeftStringSliceLarge(m) => unsafe { transmute(m.into_iter()) }, + MapTypes::RightStringSliceSmall(m) => unsafe { transmute(m.into_iter()) }, + MapTypes::RightStringSliceLarge(m) => unsafe { transmute(m.into_iter()) }, + MapTypes::StringLengthSmall(m) => unsafe { transmute(m.into_iter()) }, + } + } +} diff --git a/frozen-collections/src/frozen_map_tests.rs b/frozen-collections/src/frozen_map_tests.rs new file mode 100644 index 0000000..10849b5 --- /dev/null +++ b/frozen-collections/src/frozen_map_tests.rs @@ -0,0 +1,83 @@ +use std::collections::HashMap; + +use crate::FrozenMap; +use crate::Len; + +#[test] +fn test_empty_map() { + type FM = FrozenMap; + + let m = FM::default(); + assert_eq!(m.len(), 0); +} + +#[test] +fn test_i32_map() { + let m = + FrozenMap::::try_from([(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]).unwrap(); + assert_eq!(m.get(&6), Some(&6)); +} + +#[test] +fn basic_u32_map() { + let max_entries = [1, 2, 3, 4, 5, 6, 255, 256, 65535, 65536]; + + for max in max_entries { + let mut m = HashMap::::new(); + for i in 0..max { + m.insert(i, format!("V{i}")); + } + + let fm = m + .iter() + .map(|x| (*x.0, x.1.clone())) + .collect::>(); + assert_eq!(m.len(), fm.len()); + assert_eq!(m.is_empty(), fm.is_empty()); + + for pair in &m { + assert!(fm.contains_key(pair.0)); + assert_eq!(m.get(pair.0).unwrap(), fm.get(pair.0).unwrap()); + assert_eq!( + m.get_key_value(pair.0).unwrap(), + fm.get_key_value(pair.0).unwrap() + ); + } + + let mut m = HashMap::::new(); + for i in (0..max).map(|x| x * 2) { + m.insert(i, "V{i}".to_string()); + } + + let fd = m + .iter() + .map(|x| (*x.0, x.1.clone())) + .collect::>(); + assert_eq!(m.len(), fd.len()); + assert_eq!(m.is_empty(), fd.is_empty()); + + for pair in &m { + assert!(fd.contains_key(pair.0)); + assert_eq!(m.get(pair.0).unwrap(), fd.get(pair.0).unwrap()); + assert_eq!( + m.get_key_value(pair.0).unwrap(), + fd.get_key_value(pair.0).unwrap() + ); + } + } +} + +#[test] +fn test_iter() { + let mut m = HashMap::new(); + m.insert(1, 10); + m.insert(2, 20); + m.insert(3, 30); + m.insert(4, 40); + let m = m.iter().collect::>(); + + let mut iter = m.iter(); + println!("{iter:?}"); + iter.next(); + println!("{iter:?}"); +} diff --git a/frozen-collections/src/frozen_set.rs b/frozen-collections/src/frozen_set.rs new file mode 100644 index 0000000..c62c448 --- /dev/null +++ b/frozen-collections/src/frozen_set.rs @@ -0,0 +1,666 @@ +use std::any::type_name; +use std::collections::HashSet; +use std::fmt::{Debug, Formatter, Result}; +use std::hash::RandomState; +use std::hash::{BuildHasher, Hash}; +use std::intrinsics::transmute; +use std::ops::{BitAnd, BitOr, BitXor, Sub}; + +use bitvec::macros::internal::funty::Fundamental; + +use frozen_collections_core::analyzers::{ + analyze_int_keys, analyze_slice_keys, IntKeyAnalysisResult, SliceKeyAnalysisResult, +}; + +use crate::specialized_sets::{ + CommonSet, IntegerRangeSet, IntegerSet, IntoIter, Iter, LeftSliceSet, LengthSet, RightSliceSet, + ScanningSet, +}; +use crate::Len; +use crate::Set; + +/// The different implementations available for use, depending on the type and content of the payload. +#[derive(Clone)] +enum SetTypes { + Scanning(ScanningSet), + + CommonSmall(CommonSet), + CommonLarge(CommonSet), + + U32Small(IntegerSet), + U32Large(IntegerSet), + + U32Range(IntegerRangeSet), + + LeftStringSliceSmall(LeftSliceSet), + LeftStringSliceLarge(LeftSliceSet), + + RightStringSliceSmall(RightSliceSet), + RightStringSliceLarge(RightSliceSet), + + StringLengthSmall(LengthSet), +} + +/// A set optimized for fast read access. +/// +/// A frozen set differs from the traditional [`HashSet`] type in three key ways. First, creating +/// a mew frozen set can take a relatively long time, especially for very large sets. Second, +/// once created, instances of frozen sets are immutable. And third, probing a frozen set is +/// typically considerably faster, which is the whole point +/// +/// The reason creating a frozen set can take some time is due to the extensive analysis that is +/// performed on the set's values in order to determine the best set implementation strategy and +/// data layout to use. This analysis is what enables frozen sets to be faster later when +/// probing the set. +/// +/// Frozen sets are intended for long-lived sets, where the cost of creating the set is made up +/// over time by the faster probing performance. +/// +/// A `FrozenSet` requires that the elements +/// implement the [`Eq`] and [`Hash`] traits. This can frequently be achieved by +/// using `#[derive(PartialEq, Eq, Hash)]`. If you implement these yourself, +/// it is important that the following property holds: +/// +/// ```text +/// k1 == k2 -> hash(k1) == hash(k2) +/// ``` +/// +/// In other words, if two values are equal, their hashes must be equal. +/// Violating this property is a logic error. +/// +/// It is also a logic error for a value to be modified in such a way that the value's +/// hash, as determined by the [`Hash`] trait, or its equality, as determined by +/// the [`Eq`] trait, changes while it is in the set. This is normally only +/// possible through [`Cell`], [`RefCell`], global state, I/O, or unsafe code. +/// +/// The behavior resulting from either logic error is not specified, but will +/// be encapsulated to the `FrozenSet` that observed the logic error and not +/// result in undefined behavior. This could include panics, incorrect results, +/// aborts, memory leaks, and non-termination. +/// +/// # Macros are Faster +/// +/// If all your values are known at compile time, you are much better off using the +/// [`frozen_set!`](crate::frozen_set!) macro rather than this type. This will result in considerably +/// better performance. +/// +/// # Implementation Limits +/// +/// Although frozen sets are always faster when reading than traditional hash sets, there are some +/// caveats to be aware of: +/// +/// - [`FrozenSet`] has optimized implementations for the case where the values are +/// of type [`u32`], but not any other integer types. This limitation doesn't exist +/// for the [`frozen_set!`](crate::frozen_set!) macro. +/// +/// - [`FrozenSet`] has optimized implementations for the case where the values are +/// of type [`String`], but not for the type `&str`. You will generally get considerably faster +/// performance using [`String`]. +/// +/// # Examples +/// +/// ``` +/// # use std::hash::RandomState; +/// # use frozen_collections::FrozenSet; +/// # use frozen_collections::Len; +/// # +/// let books = FrozenSet::try_from(vec![ +/// "A Dance With Dragons".to_string(), +/// "To Kill a Mockingbird".to_string(), +/// "The Odyssey".to_string(), +/// "The Great Gatsby".to_string()]).unwrap(); +/// +/// // Check for a specific one. +/// if !books.contains(&"The Winds of Winter".to_string()) { +/// println!("We have {} books, but The Winds of Winter ain't one.", +/// books.len()); +/// } +/// +/// // Iterate over everything. +/// for book in &books { +/// println!("{book}"); +/// } +/// ``` +/// +/// The easiest way to use `FrozenSet` with a custom type is to derive +/// [`Eq`] and [`Hash`]. We must also derive [`PartialEq`], +/// which is required if [`Eq`] is derived. +/// +/// ``` +/// # use frozen_collections::FrozenSet; +/// # +/// #[derive(Hash, Eq, PartialEq, Debug)] +/// struct Viking { +/// name: String, +/// power: usize, +/// } +/// +/// let vikings = FrozenSet::try_from([ +/// Viking {name: "Einar".to_string(), power: 9 }, +/// Viking { name: "Olaf".to_string(), power: 4 }, +/// Viking { name: "Harald".to_string(), power: 8 }]).unwrap(); +/// +/// // Use derived implementation to print the vikings. +/// for x in &vikings { +/// println!("{x:?}"); +/// } +/// ``` +/// +/// [`HashSet`]: HashSet +/// [`HashMap`]: std::collections::HashMap +/// [`RefCell`]: std::cell::RefCell +/// [`Cell`]: std::cell::Cell +#[derive(Clone)] +#[allow(clippy::module_name_repetitions)] +pub struct FrozenSet { + set_impl: SetTypes, +} + +impl FrozenSet +where + T: Hash + Eq, +{ + /// Creates a new frozen set. + /// + /// # Errors + /// + /// This fails if there are duplicate items within the vector. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenSet; + /// # use std::hash::RandomState; + /// # use frozen_collections::Len; + /// # + /// let set = FrozenSet::new(vec![1, 2, 3]).unwrap(); + /// + /// assert_eq!(set.len(), 3); + /// assert!(set.contains(&1)); + /// ``` + pub fn new(payload: Vec) -> std::result::Result { + Self::with_hasher(payload, RandomState::new()) + } +} + +impl FrozenSet +where + T: Hash + Eq, + BH: BuildHasher, +{ + /// Creates a new frozen set which will use the given hasher to hash values. + /// + /// # Errors + /// + /// This fails if there are duplicate items within the vector. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenSet; + /// # use std::hash::RandomState; + /// # use frozen_collections::Len; + /// # + /// let set = FrozenSet::with_hasher(vec![1, 2, 3], RandomState::new()).unwrap(); + /// + /// assert_eq!(set.len(), 3); + /// assert!(set.contains(&1)); + /// ``` + pub fn with_hasher(payload: Vec, bh: BH) -> std::result::Result { + Ok(Self { + set_impl: if payload.len() < 4 { + SetTypes::Scanning(ScanningSet::try_from(payload)?) + } else if type_name::() == type_name::() { + Self::new_u32_set(payload)? + } else if type_name::() == type_name::() { + Self::new_string_set(payload, bh)? + } else { + Self::new_common_set(payload, bh)? + }, + }) + } + + #[allow(clippy::transmute_undefined_repr)] + fn new_u32_set(payload: Vec) -> std::result::Result, &'static str> { + let payload: Vec = unsafe { transmute(payload) }; + + let key_analysis = analyze_int_keys(payload.iter().copied()); + + match key_analysis { + IntKeyAnalysisResult::Range => { + Ok(SetTypes::U32Range(IntegerRangeSet::try_from(payload)?)) + } + IntKeyAnalysisResult::Normal => { + if payload.len() <= u8::MAX.as_usize() { + Ok(SetTypes::U32Small(IntegerSet::try_from(payload)?)) + } else { + Ok(SetTypes::U32Large(IntegerSet::try_from(payload)?)) + } + } + } + } + + #[allow(clippy::transmute_undefined_repr)] + fn new_string_set( + payload: Vec, + bh: BH, + ) -> std::result::Result, &'static str> { + let payload: Vec = unsafe { transmute(payload) }; + + let key_analysis = analyze_slice_keys(payload.iter().map(String::as_bytes), &bh); + + if payload.len() <= u8::MAX.as_usize() { + match key_analysis { + SliceKeyAnalysisResult::Normal => Ok(SetTypes::CommonSmall( + CommonSet::with_hasher(unsafe { transmute(payload) }, bh)?, + )), + + SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index, + subslice_len, + } => Ok(SetTypes::LeftStringSliceSmall(LeftSliceSet::with_hasher( + payload, + subslice_index..subslice_index + subslice_len, + bh, + )?)), + + SliceKeyAnalysisResult::RightHandSubslice { + subslice_index, + subslice_len, + } => Ok(SetTypes::RightStringSliceSmall(RightSliceSet::with_hasher( + payload, + subslice_index..subslice_index + subslice_len, + bh, + )?)), + + SliceKeyAnalysisResult::Length => { + Ok(SetTypes::StringLengthSmall(LengthSet::try_from(payload)?)) + } + } + } else { + match key_analysis { + SliceKeyAnalysisResult::Length | SliceKeyAnalysisResult::Normal => { + Ok(SetTypes::CommonLarge(CommonSet::with_hasher( + unsafe { transmute(payload) }, + bh, + )?)) + } + + SliceKeyAnalysisResult::LeftHandSubslice { + subslice_index, + subslice_len, + } => Ok(SetTypes::LeftStringSliceLarge(LeftSliceSet::with_hasher( + payload, + subslice_index..subslice_index + subslice_len, + bh, + )?)), + + SliceKeyAnalysisResult::RightHandSubslice { + subslice_index, + subslice_len, + } => Ok(SetTypes::RightStringSliceLarge(RightSliceSet::with_hasher( + payload, + subslice_index..subslice_index + subslice_len, + bh, + )?)), + } + } + } + + fn new_common_set( + payload: Vec, + bh: BH, + ) -> std::result::Result, &'static str> { + if payload.len() <= u8::MAX.as_usize() { + Ok(SetTypes::CommonSmall(CommonSet::with_hasher(payload, bh)?)) + } else { + Ok(SetTypes::CommonLarge(CommonSet::with_hasher(payload, bh)?)) + } + } + + /// Returns `true` if the set contains a value. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenSet; + /// # + /// let set = FrozenSet::try_from([1, 2, 3]).unwrap(); + /// + /// assert!(set.contains(&1)); + /// assert!(!set.contains(&4)); + /// ``` + pub fn contains(&self, value: &T) -> bool { + match &self.set_impl { + SetTypes::Scanning(s) => s.contains(value), + SetTypes::CommonSmall(s) => s.contains(value), + SetTypes::CommonLarge(s) => s.contains(value), + SetTypes::U32Small(s) => s.contains(unsafe { transmute(value) }), + SetTypes::U32Large(s) => s.contains(unsafe { transmute(value) }), + SetTypes::U32Range(s) => s.contains(unsafe { transmute(value) }), + SetTypes::LeftStringSliceSmall(s) => { + let v: &String = unsafe { transmute(value) }; + s.contains(v) + } + SetTypes::LeftStringSliceLarge(s) => { + let v: &String = unsafe { transmute(value) }; + s.contains(v) + } + SetTypes::RightStringSliceSmall(s) => { + let v: &String = unsafe { transmute(value) }; + s.contains(v) + } + SetTypes::RightStringSliceLarge(s) => { + let v: &String = unsafe { transmute(value) }; + s.contains(v) + } + SetTypes::StringLengthSmall(s) => { + let v: &String = unsafe { transmute(value) }; + s.contains(v) + } + } + } + + /// An iterator visiting all elements in arbitrary order. + /// The iterator element type is `&'a T`. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenSet; + /// # + /// let set = FrozenSet::try_from(["a".to_string(), "b".to_string()]).unwrap(); + /// + /// // Will print in an arbitrary order. + /// for x in set.iter() { + /// println!("{x}"); + /// } + /// ``` + pub fn iter(&self) -> Iter { + match &self.set_impl { + SetTypes::Scanning(s) => s.iter(), + SetTypes::CommonSmall(s) => s.iter(), + SetTypes::CommonLarge(s) => s.iter(), + SetTypes::U32Small(s) => unsafe { transmute(s.iter()) }, + SetTypes::U32Large(s) => unsafe { transmute(s.iter()) }, + SetTypes::U32Range(s) => unsafe { transmute(s.iter()) }, + SetTypes::LeftStringSliceSmall(s) => unsafe { transmute(s.iter()) }, + SetTypes::LeftStringSliceLarge(s) => unsafe { transmute(s.iter()) }, + SetTypes::RightStringSliceSmall(s) => unsafe { transmute(s.iter()) }, + SetTypes::RightStringSliceLarge(s) => unsafe { transmute(s.iter()) }, + SetTypes::StringLengthSmall(s) => unsafe { transmute(s.iter()) }, + } + } + + /// Returns a reference to the value in the set, if any, that is equal to the given value. + /// + /// # Examples + /// + /// ``` + /// # use frozen_collections::FrozenSet; + /// # + /// let set = FrozenSet::try_from([1, 2, 3]).unwrap(); + /// + /// assert_eq!(set.get(&2), Some(&2)); + /// assert_eq!(set.get(&4), None); + /// ``` + pub fn get(&self, value: &T) -> Option<&T> { + match &self.set_impl { + SetTypes::Scanning(s) => s.get(value), + SetTypes::CommonSmall(s) => s.get(value), + SetTypes::CommonLarge(s) => s.get(value), + SetTypes::U32Small(s) => unsafe { transmute(s.get(transmute(value))) }, + SetTypes::U32Large(s) => unsafe { transmute(s.get(transmute(value))) }, + SetTypes::U32Range(s) => unsafe { transmute(s.get(transmute(value))) }, + SetTypes::LeftStringSliceSmall(s) => unsafe { + let v: &String = transmute(value); + transmute(s.get(v)) + }, + SetTypes::LeftStringSliceLarge(s) => unsafe { + let v: &String = transmute(value); + transmute(s.get(v)) + }, + SetTypes::RightStringSliceSmall(s) => unsafe { + let v: &String = transmute(value); + transmute(s.get(v)) + }, + SetTypes::RightStringSliceLarge(s) => unsafe { + let v: &String = transmute(value); + transmute(s.get(v)) + }, + SetTypes::StringLengthSmall(s) => unsafe { + let v: &String = transmute(value); + transmute(s.get(v)) + }, + } + } +} + +impl TryFrom> for FrozenSet +where + T: Hash + Eq, +{ + type Error = &'static str; + + fn try_from(payload: Vec) -> std::result::Result { + Self::new(payload) + } +} + +impl TryFrom> for FrozenSet { + type Error = &'static str; + + fn try_from(payload: Vec<&str>) -> std::result::Result { + Self::new(payload.into_iter().map(&str::to_string).collect()) + } +} + +impl TryFrom<[T; N]> for FrozenSet +where + T: Hash + Eq, +{ + type Error = &'static str; + + fn try_from(payload: [T; N]) -> std::result::Result { + Self::new(Vec::from_iter(payload)) + } +} + +impl TryFrom<[&str; N]> for FrozenSet { + type Error = &'static str; + + fn try_from(payload: [&str; N]) -> std::result::Result { + Self::new(Vec::from_iter(payload.into_iter().map(&str::to_string))) + } +} + +impl FromIterator for FrozenSet +where + T: Hash + Eq, +{ + fn from_iter>(iter: U) -> Self { + Self::new(Vec::from_iter(iter)).unwrap() + } +} + +impl Default for FrozenSet +where + T: Hash + Eq + Default, + BH: BuildHasher + Default, +{ + fn default() -> Self { + Self { + set_impl: SetTypes::Scanning(ScanningSet::::default()), + } + } +} + +impl Debug for FrozenSet +where + T: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + match &self.set_impl { + SetTypes::Scanning(s) => s.fmt(f), + SetTypes::CommonSmall(s) => s.fmt(f), + SetTypes::CommonLarge(s) => s.fmt(f), + SetTypes::U32Small(s) => s.fmt(f), + SetTypes::U32Large(s) => s.fmt(f), + SetTypes::U32Range(s) => s.fmt(f), + SetTypes::LeftStringSliceSmall(s) => s.fmt(f), + SetTypes::LeftStringSliceLarge(s) => s.fmt(f), + SetTypes::RightStringSliceSmall(s) => s.fmt(f), + SetTypes::RightStringSliceLarge(s) => s.fmt(f), + SetTypes::StringLengthSmall(s) => s.fmt(f), + } + } +} + +impl PartialEq for FrozenSet +where + T: Hash + Eq, + BH: BuildHasher, +{ + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter().all(|value| other.get(value).is_some()) + } +} + +impl Eq for FrozenSet +where + T: Hash + Eq, + BH: BuildHasher, +{ +} + +impl BitOr<&ST> for &FrozenSet +where + T: Hash + Eq + Clone, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn bitor(self, rhs: &ST) -> Self::Output { + self.union(rhs).cloned().collect() + } +} + +impl BitAnd<&ST> for &FrozenSet +where + T: Hash + Eq + Clone, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn bitand(self, rhs: &ST) -> Self::Output { + self.intersection(rhs).cloned().collect() + } +} + +impl BitXor<&ST> for &FrozenSet +where + T: Hash + Eq + Clone, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn bitxor(self, rhs: &ST) -> Self::Output { + self.symmetric_difference(rhs).cloned().collect() + } +} + +impl Sub<&ST> for &FrozenSet +where + T: Hash + Eq + Clone, + ST: Set, + BH: BuildHasher + Default, +{ + type Output = HashSet; + + fn sub(self, rhs: &ST) -> Self::Output { + self.difference(rhs).cloned().collect() + } +} + +impl IntoIterator for FrozenSet +where + T: Hash + Eq, + BH: BuildHasher, +{ + type Item = T; + type IntoIter = IntoIter; + + #[allow(clippy::transmute_undefined_repr)] + fn into_iter(self) -> Self::IntoIter { + match self.set_impl { + SetTypes::Scanning(s) => s.into_iter(), + SetTypes::CommonSmall(s) => s.into_iter(), + SetTypes::CommonLarge(s) => s.into_iter(), + SetTypes::U32Small(s) => unsafe { transmute(s.into_iter()) }, + SetTypes::U32Large(s) => unsafe { transmute(s.into_iter()) }, + SetTypes::U32Range(s) => unsafe { transmute(s.into_iter()) }, + SetTypes::LeftStringSliceSmall(s) => unsafe { transmute(s.into_iter()) }, + SetTypes::LeftStringSliceLarge(s) => unsafe { transmute(s.into_iter()) }, + SetTypes::RightStringSliceSmall(s) => unsafe { transmute(s.into_iter()) }, + SetTypes::RightStringSliceLarge(s) => unsafe { transmute(s.into_iter()) }, + SetTypes::StringLengthSmall(s) => unsafe { transmute(s.into_iter()) }, + } + } +} + +impl<'a, T, BH> IntoIterator for &'a FrozenSet +where + T: Hash + Eq, + BH: BuildHasher, +{ + type Item = &'a T; + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Iter<'a, T> { + self.iter() + } +} + +impl Len for FrozenSet { + fn len(&self) -> usize { + match &self.set_impl { + SetTypes::Scanning(s) => Len::len(s), + SetTypes::CommonSmall(s) => Len::len(s), + SetTypes::CommonLarge(s) => Len::len(s), + SetTypes::U32Small(s) => Len::len(s), + SetTypes::U32Large(s) => Len::len(s), + SetTypes::U32Range(s) => Len::len(s), + SetTypes::LeftStringSliceSmall(s) => Len::len(s), + SetTypes::LeftStringSliceLarge(s) => Len::len(s), + SetTypes::RightStringSliceSmall(s) => Len::len(s), + SetTypes::RightStringSliceLarge(s) => Len::len(s), + SetTypes::StringLengthSmall(s) => Len::len(s), + } + } +} + +impl Set for FrozenSet +where + T: Hash + Eq, + BH: BuildHasher, +{ + type Iterator<'a> = Iter<'a, T> + where + T: 'a, + BH: 'a; + + fn iter(&self) -> Iter<'_, T> { + self.iter() + } + + fn contains(&self, value: &T) -> bool { + self.contains(value) + } +} diff --git a/frozen-collections/src/frozen_set_tests.rs b/frozen-collections/src/frozen_set_tests.rs new file mode 100644 index 0000000..5e3bcd9 --- /dev/null +++ b/frozen-collections/src/frozen_set_tests.rs @@ -0,0 +1,34 @@ +use std::collections::HashSet; + +use crate::FrozenSet; +use crate::Len; + +#[test] +fn misc() { + const SIZES: [usize; 12] = [0, 1, 2, 3, 4, 5, 255, 256, 257, 65535, 65536, 65536]; + + for size in SIZES { + let mut v = Vec::with_capacity(size); + for i in 0..size { + v.push(i); + } + + let s = FrozenSet::try_from(v).unwrap(); + assert_eq!(size, s.len()); + assert_eq!(size == 0, s.is_empty()); + + for i in 0..size { + assert_eq!(&i, s.get(&i).unwrap()); + assert!(s.contains(&i)); + } + + let mut ms = HashSet::new(); + for item in &s { + ms.insert(item); + } + + for i in 0..size { + assert!(ms.contains(&i)); + } + } +} diff --git a/frozen-collections/src/lib.rs b/frozen-collections/src/lib.rs new file mode 100644 index 0000000..2c6bcf0 --- /dev/null +++ b/frozen-collections/src/lib.rs @@ -0,0 +1,176 @@ +//! Frozen collections: fast partially immutable collections +//! +//! Frozen collections are designed to trade creation time for improved +//! read performance. They are ideal for use with long-lasting collections +//! which get initialized when an application starts and remain unchanged +//! permanently, or at least extended periods of time. This is a common +//! pattern in service applications. +//! +//! During creation, the frozen collections perform analysis over the data they +//! will hold to determine the best layout and algorithm for the specific case. +//! This analysis can take some time. But the value in spending this time up front +//! is that the collections provide blazingly fast read-time performance. +//! +//! # Analysis and Optimizations +//! +//! Unlike normal collections, the frozen collections require you to provide all the data for +//! the collection when you create the collection. The data you supply is analyzed which determines +//! what specific underlying implementation strategy to use and how to lay out data in the hash tables +//! (assuming the implementation uses hash tables at all) +//! +//! The available implementation strategies are: +//! +//! - **Integer as Hash**. When the keys are of an integer type, this uses the keys themselves +//! as hash codes, avoiding the overhead of hashing. +//! +//! - **Integer Range**. When the keys represent a contiguous range of integer values, this eliminates +//! hashing completely and uses direct indexing instead. +//! +//! - **Length as Hash**. When the keys are of a slice type, this uses the length of the slices +//! as hash codes, avoiding the overhead of hashing. +//! +//! - **Left Hand Hashing**. When the keys are of a slice type, this uses sub-slices of the keys +//! for hashing, reducing the overhead of hashing. +//! +//! - **Right Hand Hashing**. Similar to the Right Hand Hashing from above, but using right-aligned +//! sub-slices instead. +//! +//! - **Linear Scan**. For very small maps, this avoids hashing completely by scanning through the +//! keys in linear order. +//! +//! - **Classic Hashing**. This is the fallback when none of the previous strategies apply. This +//! benefits from a read-optimized data layout which delivers faster performance than normal +//! collections. +//! +//! # Macros and Structs +//! +//! Frozen collections can be created in one of two ways: +//! +//! - **via Macros**. When you know the data to load into the collection at build time, you can use the +//! [`frozen_set!`] or [`frozen_map!`] macros. Analysis of the input +//! data is done at build time, so there isn't any analysis cost spent when creating the +//! collections. +//! +//! * **via Facade Types**. When you don't know the data to load into the collection at build time, you must use +//! the [`FrozenSet`] and [`FrozenMap`] types. Analysis is performed at runtime when +//! the collections are created, which can take a while if the payload is made up of millions of +//! entries. +//! +//! Using the macros results in faster runtime performance, so they are the preferred choice if +//! possible. +//! +//! The [`FrozenSet`] or [`FrozenMap`] types are facades which dispatch at runtime to +//! different specialized implementation types. When you use the macros instead, the selection +//! of implementation type is done at build time, and thus the cost of the runtime dispatch is +//! completely eliminated. +//! +//! The specialized collection types returned by the macros are slightly more feature-rich +//! than the [`FrozenSet`] and [`FrozenMap`] types. Specifically, the specialized +//! types support the [`Borrow`](std::borrow::Borrow) trait for key specification, which makes +//! them more flexible. +//! +//! # Implementation Limits +//! +//! Although frozen collections are always faster when reading than traditional collections, there are some +//! caveats to be aware of: +//! +//! - [`FrozenMap`] and [`FrozenSet`] have optimized implementations for the case where the keys are +//! of type [`u32`], but not any other integer types. This limitation doesn't exist +//! for the [`frozen_map!`](crate::frozen_map!) and [`frozen_set!`](crate::frozen_set!) macros. +//! +//! - [`FrozenMap`] and [`FrozenSet`] have optimized implementations for the case where the keys are +//! of type [`String`], but not for the type `&str`. You will generally get considerably faster +//! performance using [`String`]. +//! +//! # Traits +//! +//! The frozen collections define three custom traits which you can use to +//! integrate with frozen collections: +//! +//! - [`Len`]. Used to represent keys that have lengths. This is used by the Length as Hash, +//! Left Hand Hashing, and Right Hand Hashing strategies. +//! +//! - [`RangeHash`]. Used to enable hashing of a sub-slice of a value. This is used by the +//! Left Hand Hashing and Right Hand Hashing strategies. +//! +//! - [`Set`]. Used to represent common features of a set. This makes it possible for +//! frozen collections to do logical operations, such as union or intersection, between various +//! concrete set types. + +pub use frozen_collections_core::traits::Len; +pub use frozen_collections_core::traits::RangeHash; +pub use frozen_collections_core::traits::Set; +/// Create an optimized read-only map. +/// +/// You give this macro the type of the map's keys, and then enumerate the key/value pairs that +/// should be added to the map. Analysis to select the implementation strategy and data layout +/// is done at build time. +/// +/// This macro works by returning different implementation types based on the specific details +/// of the input data. +/// +/// # Examples +/// +/// ``` +/// # use frozen_collections_macros::frozen_map; +/// # +/// let m = frozen_map!(&str, +/// "Red": 1, +/// "Green": 2, +/// "Blue": 3, +/// ); +/// +/// assert!(m.contains_key("Red")); +/// ``` +pub use frozen_collections_macros::frozen_map; +/// Create an optimized read-only set. +/// +/// You give this macro the type of the set's values, and then enumerate the values that +/// should be added to the set. Analysis to select the implementation strategy and data layout +/// is done at build time. +/// +/// This macro works by returning different implementation types based on the specific details +/// of the input data. +/// +/// # Examples +/// +/// ``` +/// # use frozen_collections_macros::frozen_set; +/// # +/// let s = frozen_set!(&str, +/// "Red", +/// "Green", +/// "Blue", +/// ); +/// +/// assert!(s.contains("Red")); +/// ``` +pub use frozen_collections_macros::frozen_set; +pub use frozen_map::FrozenMap; +pub use frozen_set::FrozenSet; + +mod frozen_map; +mod frozen_set; + +#[cfg(test)] +mod frozen_map_tests; + +#[cfg(test)] +mod frozen_set_tests; + +#[doc(hidden)] +pub mod specialized_sets { + pub use frozen_collections_core::specialized_sets::*; +} + +#[doc(hidden)] +pub mod specialized_maps { + pub use frozen_collections_core::specialized_maps::*; +} + +/* +#[doc(hidden)] +pub mod traits { + pub use frozen_collections_core::traits::*; +} + */