-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Martin Taillefer
committed
Jul 7, 2024
0 parents
commit 1e5772c
Showing
58 changed files
with
9,520 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
text eol=crlf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
version: 2 | ||
updates: | ||
- package-ecosystem: "cargo" | ||
directory: "/" | ||
schedule: | ||
interval: "monthly" | ||
open-pull-requests-limit: 10 | ||
- package-ecosystem: "github-actions" | ||
directory: "/" | ||
schedule: | ||
interval: "monthly" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
name: Rust | ||
|
||
on: | ||
push: | ||
branches: [ main ] | ||
pull_request: | ||
branches: [ main ] | ||
|
||
env: | ||
CARGO_TERM_COLOR: always | ||
|
||
jobs: | ||
build: | ||
|
||
runs-on: ubuntu-latest | ||
strategy: | ||
matrix: | ||
toolchain: | ||
- stable | ||
- beta | ||
- nightly | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
- run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }} | ||
- name: Build | ||
run: cargo build --verbose | ||
- name: Run tests | ||
run: cargo test --verbose |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
/target | ||
/Cargo.lock | ||
/.idea |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
[workspace] | ||
resolver = "2" | ||
members = [ | ||
"frozen-collections", | ||
"frozen-collections-core", | ||
"frozen-collections-macros", | ||
"benches", | ||
"examples", | ||
] | ||
|
||
[workspace.package] | ||
version = "0.1.0" | ||
edition = "2021" | ||
categories = ["data-structures"] | ||
keywords = ["map", "set", "collection"] | ||
repository = "https://github.com/geeknoid/frozen-collections" | ||
license = "MIT" | ||
readme = "README.md" | ||
authors = ["Martin Taillefer <martin@taillefer.org>"] | ||
|
||
[workspace.lints.clippy] | ||
pedantic = { level = "warn", priority = -1 } | ||
correctness = { level = "warn", priority = -1 } | ||
complexity = { level = "warn", priority = -1 } | ||
perf = { level = "warn", priority = -1 } | ||
cargo = { level = "warn", priority = -1 } | ||
nursery = { level = "warn", priority = -1 } | ||
single_call_fn = "allow" | ||
min_ident_chars = "allow" | ||
unused_self = "allow" | ||
transmute_ptr_to_ptr = "allow" | ||
wildcard_imports = "allow" | ||
too_many_lines = "allow" | ||
multiple_crate_versions = "allow" | ||
missing_transmute_annotations = "allow" | ||
from-iter-instead-of-collect = "allow" | ||
|
||
[profile.bench] | ||
codegen-units = 1 | ||
lto = "fat" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Ideas | ||
|
||
* Create a benchmark suite to try and come up with better numbers for the various threshold and percentages | ||
used in the analysis code. | ||
|
||
- In the hash code analyzer, beyond the number of collisions, the logic should factor in how many empty slots are in the | ||
hash table. A lot of empty slots can slow things down due to cache misses, in addition to wasting memory. | ||
|
||
- Consider some hint supplied by the caller for how much time/effort to put into analysis. | ||
|
||
- Consider providing an offline tool that performs the analysis on the input data. Being offline, the | ||
analysis could be more exhaustive. The analysis would produce a little blob of state which would be fed | ||
into the code to configure things without running analysis code at runtime. | ||
|
||
- Consider the use of perfect hashing or minimal perfect hashing. | ||
|
||
- Consider introducing dynamic benchmarking as part of the analysis phase. We could build | ||
several prototype collections, measure effective perf, and then use the benchmark results to | ||
decide on the optimal collection configuration. | ||
|
||
- The facades need to support some notion of Borrow<T>. This is particularly important to | ||
allowing collections where K=String to be queried with &str instead. Unfortunately, given the | ||
gymnastics the code is doing internally around hashing, it's not obvious how this feature | ||
could be added. | ||
|
||
- Add a specialized set implementation for integer types which uses a bit vector for storage. | ||
|
||
- Evaluate hash functions to find the highest performance one | ||
|
||
- Bypass hashing for short left-slices or right-slices. When the slices are | ||
short enough, we should just take the character values as the hash code. | ||
|
||
- For integer keys, consider expanding the model for ranges to include ranges with holes. | ||
Basically, the array would hold Option<V> instead of just V. | ||
|
||
# TODOs | ||
|
||
- Tests | ||
- Make it so the macros don't need a type indicator for strings and ints | ||
- Perf analysis |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
Copyright (c) 2024 Martin Taillefer | ||
|
||
Permission is hereby granted, free of charge, to any | ||
person obtaining a copy of this software and associated | ||
documentation files (the "Software"), to deal in the | ||
Software without restriction, including without | ||
limitation the rights to use, copy, modify, merge, | ||
publish, distribute, sublicense, and/or sell copies of | ||
the Software, and to permit persons to whom the Software | ||
is furnished to do so, subject to the following | ||
conditions: | ||
|
||
The above copyright notice and this permission notice | ||
shall be included in all copies or substantial portions | ||
of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF | ||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED | ||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A | ||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | ||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | ||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR | ||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
DEALINGS IN THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Frozen Collections | ||
|
||
[](https://github.com/geeknoid/frozen-collections/actions) | ||
[](https://crates.io/crates/frozen-collections) | ||
[](https://docs.rs/frozen-collections) | ||
|
||
Frozen collections are designed to trade creation time for improved | ||
read performance. They are ideal for use with long-lasting collections | ||
which get initialized when an application starts and remain unchanged | ||
permanently, or at least extended periods of time. This is a common | ||
pattern in service applications. | ||
|
||
During creation, the input data is analyzed to determine the best layout and algorithm for the specific case. | ||
This analysis can take some time, but the value in spending this time up front | ||
is that the collections provide blazingly fast read-time performance. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
[package] | ||
name = "benches" | ||
version = "0.0.0" | ||
publish = false | ||
edition = "2021" | ||
|
||
[dev-dependencies] | ||
frozen-collections = { path = "../frozen-collections" } | ||
rand = "0.9.0-alpha.1" | ||
criterion = "0.5.1" | ||
|
||
[[bench]] | ||
name = "frozen_map" | ||
path = "frozen_map.rs" | ||
harness = false | ||
|
||
[lints] | ||
workspace = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
use std::collections::HashMap; | ||
|
||
use criterion::{black_box, criterion_group, criterion_main, Criterion}; | ||
|
||
use frozen_collections::{frozen_map, FrozenMap}; | ||
|
||
fn u32_keys(c: &mut Criterion) { | ||
let mut group = c.benchmark_group("u32_keys"); | ||
|
||
let map = HashMap::from([(0u32, 1), (2, 3), (4, 5), (6, 7), (8, 9)]); | ||
group.bench_function("HashMap", |b| { | ||
b.iter(|| { | ||
_ = black_box(map.get(&4)); | ||
_ = black_box(map.get(&10)); | ||
}); | ||
}); | ||
|
||
group.bench_function("FrozenMap", |b| { | ||
let map = FrozenMap::try_from([(0u32, 1), (2, 3), (4, 5), (6, 7), (8, 9)]).unwrap(); | ||
b.iter(|| { | ||
_ = black_box(map.get(&4)); | ||
_ = black_box(map.get(&10)); | ||
}); | ||
}); | ||
|
||
group.bench_function("frozen_map!", |b| { | ||
let map = frozen_map!(u32, 0: 1, 2: 3, 4: 5, 6: 7, 8: 9); | ||
|
||
b.iter(|| { | ||
_ = black_box(map.get(&4)); | ||
_ = black_box(map.get(&10)); | ||
}); | ||
}); | ||
|
||
group.finish(); | ||
} | ||
|
||
fn u32_keys_range(c: &mut Criterion) { | ||
let mut group = c.benchmark_group("u32_keys_range"); | ||
|
||
let map = HashMap::from([(0u32, 0), (1, 1), (2, 2), (3, 3), (4, 4)]); | ||
group.bench_function("HashMap", |b| { | ||
b.iter(|| { | ||
_ = black_box(map.get(&4)); | ||
_ = black_box(map.get(&10)); | ||
}); | ||
}); | ||
|
||
group.bench_function("FrozenMap", |b| { | ||
let map = FrozenMap::try_from([(0u32, 0), (1, 1), (2, 2), (3, 3), (4, 4)]).unwrap(); | ||
b.iter(|| { | ||
_ = black_box(map.get(&4)); | ||
_ = black_box(map.get(&10)); | ||
}); | ||
}); | ||
|
||
group.bench_function("frozen_map!", |b| { | ||
let map = frozen_map!(u32, 0: 0, 1: 1, 2: 2, 3: 3, 4: 4); | ||
b.iter(|| { | ||
_ = black_box(map.get(&4)); | ||
_ = black_box(map.get(&10)); | ||
}); | ||
}); | ||
|
||
group.finish(); | ||
} | ||
|
||
fn i32_keys(c: &mut Criterion) { | ||
let mut group = c.benchmark_group("i32_keys"); | ||
|
||
let map = HashMap::from([(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)]); | ||
group.bench_function("HashMap", |b| { | ||
b.iter(|| { | ||
_ = black_box(map.get(&4)); | ||
_ = black_box(map.get(&10)); | ||
}); | ||
}); | ||
|
||
group.bench_function("FrozenMap", |b| { | ||
let map = FrozenMap::try_from([(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)]).unwrap(); | ||
b.iter(|| { | ||
_ = black_box(map.get(&4)); | ||
_ = black_box(map.get(&10)); | ||
}); | ||
}); | ||
|
||
group.bench_function("frozen_map!", |b| { | ||
let map = frozen_map!(i32, 0: 1, 2: 3, 4: 5, 6: 7, 8: 9); | ||
b.iter(|| { | ||
_ = black_box(map.get(&4)); | ||
_ = black_box(map.get(&10)); | ||
}); | ||
}); | ||
|
||
group.finish(); | ||
} | ||
|
||
fn string_keys_length(c: &mut Criterion) { | ||
let mut group = c.benchmark_group("string_keys_length"); | ||
let kvs = [ | ||
("Red".to_string(), 1), | ||
("Green".to_string(), 2), | ||
("Blue".to_string(), 3), | ||
("Cyan".to_string(), 4), | ||
("Magenta".to_string(), 5), | ||
("Purple".to_string(), 6), | ||
]; | ||
|
||
let blue = "Blue".to_string(); | ||
let black = "Black".to_string(); | ||
|
||
let map = HashMap::from(kvs.clone()); | ||
group.bench_function("HashMap", |b| { | ||
b.iter(|| { | ||
_ = black_box(map.get(&blue)); | ||
_ = black_box(map.get(&black)); | ||
}); | ||
}); | ||
|
||
group.bench_function("FrozenMap", |b| { | ||
let map = FrozenMap::try_from(kvs.clone()).unwrap(); | ||
b.iter(|| { | ||
_ = black_box(map.get(&blue)); | ||
_ = black_box(map.get(&black)); | ||
}); | ||
}); | ||
|
||
group.bench_function("frozen_map!", |b| { | ||
let map = frozen_map!( | ||
&str, | ||
"Red": 1, | ||
"Green": 2, | ||
"Blue": 3, | ||
"Cyan": 4, | ||
"Magenta": 5, | ||
"Purple": 6); | ||
|
||
b.iter(|| { | ||
_ = black_box(map.get(&blue)); | ||
_ = black_box(map.get(&black)); | ||
}); | ||
}); | ||
|
||
group.finish(); | ||
} | ||
|
||
fn string_keys_subslice(c: &mut Criterion) { | ||
let mut group = c.benchmark_group("string_keys_subslice"); | ||
let kvs = [ | ||
("abcdefghi0".to_string(), 1), | ||
("abcdefghi1".to_string(), 2), | ||
("abcdefghi2".to_string(), 3), | ||
("abcdefghi3".to_string(), 4), | ||
("abcdefghi4".to_string(), 5), | ||
("abcdefghi5".to_string(), 6), | ||
]; | ||
|
||
let blue = "Blue".to_string(); | ||
let black = "Black".to_string(); | ||
|
||
let map = HashMap::from(kvs.clone()); | ||
group.bench_function("HashMap", |b| { | ||
b.iter(|| { | ||
_ = black_box(map.get(&blue)); | ||
_ = black_box(map.get(&black)); | ||
}); | ||
}); | ||
|
||
group.bench_function("FrozenMap", |b| { | ||
let map = FrozenMap::try_from(kvs.clone()).unwrap(); | ||
b.iter(|| { | ||
_ = black_box(map.get(&blue)); | ||
_ = black_box(map.get(&black)); | ||
}); | ||
}); | ||
|
||
group.bench_function("frozen_map!", |b| { | ||
let map = frozen_map!( | ||
&str, | ||
"abcdefghi0": 1, | ||
"abcdefghi1": 2, | ||
"abcdefghi2": 3, | ||
"abcdefghi3": 4, | ||
"abcdefghi4": 5, | ||
"abcdefghi5": 6, | ||
); | ||
b.iter(|| { | ||
_ = black_box(map.get(&blue)); | ||
_ = black_box(map.get(&black)); | ||
}); | ||
}); | ||
|
||
group.finish(); | ||
} | ||
|
||
criterion_group!( | ||
benches, | ||
string_keys_length, | ||
string_keys_subslice, | ||
u32_keys, | ||
u32_keys_range, | ||
i32_keys | ||
); | ||
criterion_main!(benches); |
Oops, something went wrong.