Skip to content

Commit

Permalink
Frozen collections
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Taillefer committed Jul 7, 2024
0 parents commit 1e5772c
Show file tree
Hide file tree
Showing 58 changed files with 9,520 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
text eol=crlf
11 changes: 11 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
version: 2
updates:
- package-ecosystem: "cargo"
directory: "/"
schedule:
interval: "monthly"
open-pull-requests-limit: 10
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"
29 changes: 29 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Rust

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

env:
CARGO_TERM_COLOR: always

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
toolchain:
- stable
- beta
- nightly

steps:
- uses: actions/checkout@v3
- run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }}
- name: Build
run: cargo build --verbose
- name: Run tests
run: cargo test --verbose
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/target
/Cargo.lock
/.idea
40 changes: 40 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
[workspace]
resolver = "2"
members = [
"frozen-collections",
"frozen-collections-core",
"frozen-collections-macros",
"benches",
"examples",
]

[workspace.package]
version = "0.1.0"
edition = "2021"
categories = ["data-structures"]
keywords = ["map", "set", "collection"]
repository = "https://github.com/geeknoid/frozen-collections"
license = "MIT"
readme = "README.md"
authors = ["Martin Taillefer <martin@taillefer.org>"]

[workspace.lints.clippy]
pedantic = { level = "warn", priority = -1 }
correctness = { level = "warn", priority = -1 }
complexity = { level = "warn", priority = -1 }
perf = { level = "warn", priority = -1 }
cargo = { level = "warn", priority = -1 }
nursery = { level = "warn", priority = -1 }
single_call_fn = "allow"
min_ident_chars = "allow"
unused_self = "allow"
transmute_ptr_to_ptr = "allow"
wildcard_imports = "allow"
too_many_lines = "allow"
multiple_crate_versions = "allow"
missing_transmute_annotations = "allow"
from-iter-instead-of-collect = "allow"

[profile.bench]
codegen-units = 1
lto = "fat"
40 changes: 40 additions & 0 deletions Ideas_and_Todos.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Ideas

* Create a benchmark suite to try and come up with better numbers for the various threshold and percentages
used in the analysis code.

- In the hash code analyzer, beyond the number of collisions, the logic should factor in how many empty slots are in the
hash table. A lot of empty slots can slow things down due to cache misses, in addition to wasting memory.

- Consider some hint supplied by the caller for how much time/effort to put into analysis.

- Consider providing an offline tool that performs the analysis on the input data. Being offline, the
analysis could be more exhaustive. The analysis would produce a little blob of state which would be fed
into the code to configure things without running analysis code at runtime.

- Consider the use of perfect hashing or minimal perfect hashing.

- Consider introducing dynamic benchmarking as part of the analysis phase. We could build
several prototype collections, measure effective perf, and then use the benchmark results to
decide on the optimal collection configuration.

- The facades need to support some notion of Borrow<T>. This is particularly important to
allowing collections where K=String to be queried with &str instead. Unfortunately, given the
gymnastics the code is doing internally around hashing, it's not obvious how this feature
could be added.

- Add a specialized set implementation for integer types which uses a bit vector for storage.

- Evaluate hash functions to find the highest performance one

- Bypass hashing for short left-slices or right-slices. When the slices are
short enough, we should just take the character values as the hash code.

- For integer keys, consider expanding the model for ranges to include ranges with holes.
Basically, the array would hold Option<V> instead of just V.

# TODOs

- Tests
- Make it so the macros don't need a type indicator for strings and ints
- Perf analysis
25 changes: 25 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Copyright (c) 2024 Martin Taillefer

Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Frozen Collections

[![Test Status](https://github.com/geeknoid/frozen-collections/workflows/Tests/badge.svg?event=push)](https://github.com/geeknoid/frozen-collections/actions)
[![Crate](https://img.shields.io/crates/v/frozen-collections.svg)](https://crates.io/crates/frozen-collections)
[![API](https://docs.rs/frozen-collections/badge.svg)](https://docs.rs/frozen-collections)

Frozen collections are designed to trade creation time for improved
read performance. They are ideal for use with long-lasting collections
which get initialized when an application starts and remain unchanged
permanently, or at least extended periods of time. This is a common
pattern in service applications.

During creation, the input data is analyzed to determine the best layout and algorithm for the specific case.
This analysis can take some time, but the value in spending this time up front
is that the collections provide blazingly fast read-time performance.
18 changes: 18 additions & 0 deletions benches/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "benches"
version = "0.0.0"
publish = false
edition = "2021"

[dev-dependencies]
frozen-collections = { path = "../frozen-collections" }
rand = "0.9.0-alpha.1"
criterion = "0.5.1"

[[bench]]
name = "frozen_map"
path = "frozen_map.rs"
harness = false

[lints]
workspace = true
204 changes: 204 additions & 0 deletions benches/frozen_map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
use std::collections::HashMap;

use criterion::{black_box, criterion_group, criterion_main, Criterion};

use frozen_collections::{frozen_map, FrozenMap};

fn u32_keys(c: &mut Criterion) {
let mut group = c.benchmark_group("u32_keys");

let map = HashMap::from([(0u32, 1), (2, 3), (4, 5), (6, 7), (8, 9)]);
group.bench_function("HashMap", |b| {
b.iter(|| {
_ = black_box(map.get(&4));
_ = black_box(map.get(&10));
});
});

group.bench_function("FrozenMap", |b| {
let map = FrozenMap::try_from([(0u32, 1), (2, 3), (4, 5), (6, 7), (8, 9)]).unwrap();
b.iter(|| {
_ = black_box(map.get(&4));
_ = black_box(map.get(&10));
});
});

group.bench_function("frozen_map!", |b| {
let map = frozen_map!(u32, 0: 1, 2: 3, 4: 5, 6: 7, 8: 9);

b.iter(|| {
_ = black_box(map.get(&4));
_ = black_box(map.get(&10));
});
});

group.finish();
}

fn u32_keys_range(c: &mut Criterion) {
let mut group = c.benchmark_group("u32_keys_range");

let map = HashMap::from([(0u32, 0), (1, 1), (2, 2), (3, 3), (4, 4)]);
group.bench_function("HashMap", |b| {
b.iter(|| {
_ = black_box(map.get(&4));
_ = black_box(map.get(&10));
});
});

group.bench_function("FrozenMap", |b| {
let map = FrozenMap::try_from([(0u32, 0), (1, 1), (2, 2), (3, 3), (4, 4)]).unwrap();
b.iter(|| {
_ = black_box(map.get(&4));
_ = black_box(map.get(&10));
});
});

group.bench_function("frozen_map!", |b| {
let map = frozen_map!(u32, 0: 0, 1: 1, 2: 2, 3: 3, 4: 4);
b.iter(|| {
_ = black_box(map.get(&4));
_ = black_box(map.get(&10));
});
});

group.finish();
}

fn i32_keys(c: &mut Criterion) {
let mut group = c.benchmark_group("i32_keys");

let map = HashMap::from([(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)]);
group.bench_function("HashMap", |b| {
b.iter(|| {
_ = black_box(map.get(&4));
_ = black_box(map.get(&10));
});
});

group.bench_function("FrozenMap", |b| {
let map = FrozenMap::try_from([(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)]).unwrap();
b.iter(|| {
_ = black_box(map.get(&4));
_ = black_box(map.get(&10));
});
});

group.bench_function("frozen_map!", |b| {
let map = frozen_map!(i32, 0: 1, 2: 3, 4: 5, 6: 7, 8: 9);
b.iter(|| {
_ = black_box(map.get(&4));
_ = black_box(map.get(&10));
});
});

group.finish();
}

fn string_keys_length(c: &mut Criterion) {
let mut group = c.benchmark_group("string_keys_length");
let kvs = [
("Red".to_string(), 1),
("Green".to_string(), 2),
("Blue".to_string(), 3),
("Cyan".to_string(), 4),
("Magenta".to_string(), 5),
("Purple".to_string(), 6),
];

let blue = "Blue".to_string();
let black = "Black".to_string();

let map = HashMap::from(kvs.clone());
group.bench_function("HashMap", |b| {
b.iter(|| {
_ = black_box(map.get(&blue));
_ = black_box(map.get(&black));
});
});

group.bench_function("FrozenMap", |b| {
let map = FrozenMap::try_from(kvs.clone()).unwrap();
b.iter(|| {
_ = black_box(map.get(&blue));
_ = black_box(map.get(&black));
});
});

group.bench_function("frozen_map!", |b| {
let map = frozen_map!(
&str,
"Red": 1,
"Green": 2,
"Blue": 3,
"Cyan": 4,
"Magenta": 5,
"Purple": 6);

b.iter(|| {
_ = black_box(map.get(&blue));
_ = black_box(map.get(&black));
});
});

group.finish();
}

fn string_keys_subslice(c: &mut Criterion) {
let mut group = c.benchmark_group("string_keys_subslice");
let kvs = [
("abcdefghi0".to_string(), 1),
("abcdefghi1".to_string(), 2),
("abcdefghi2".to_string(), 3),
("abcdefghi3".to_string(), 4),
("abcdefghi4".to_string(), 5),
("abcdefghi5".to_string(), 6),
];

let blue = "Blue".to_string();
let black = "Black".to_string();

let map = HashMap::from(kvs.clone());
group.bench_function("HashMap", |b| {
b.iter(|| {
_ = black_box(map.get(&blue));
_ = black_box(map.get(&black));
});
});

group.bench_function("FrozenMap", |b| {
let map = FrozenMap::try_from(kvs.clone()).unwrap();
b.iter(|| {
_ = black_box(map.get(&blue));
_ = black_box(map.get(&black));
});
});

group.bench_function("frozen_map!", |b| {
let map = frozen_map!(
&str,
"abcdefghi0": 1,
"abcdefghi1": 2,
"abcdefghi2": 3,
"abcdefghi3": 4,
"abcdefghi4": 5,
"abcdefghi5": 6,
);
b.iter(|| {
_ = black_box(map.get(&blue));
_ = black_box(map.get(&black));
});
});

group.finish();
}

criterion_group!(
benches,
string_keys_length,
string_keys_subslice,
u32_keys,
u32_keys_range,
i32_keys
);
criterion_main!(benches);
Loading

0 comments on commit 1e5772c

Please sign in to comment.