Skip to content

Commit

Permalink
New chunkified latest-at APIs and caches (rerun-io#6992)
Browse files Browse the repository at this point in the history
_(Don't be fooled by the diff size, it is 90% copy pasta)_

Introduces the new chunkified latest-at APIs and related caches.

This is implemented, and works, very similarly to the legacy one.
The one major difference is that we store `UnitChunk`s, and never ever
deserialize data! 🥳

In addition, the new cache maintain a simple "`Clear` tracker", which
greatly improved the performance of read-time clears.
I've backported this mechanism to the legacy re_query, because why not.
  • Loading branch information
teh-cmc authored Jul 29, 2024
1 parent e88aaa2 commit e6e8c9a
Show file tree
Hide file tree
Showing 24 changed files with 9,944 additions and 1 deletion.
1 change: 1 addition & 0 deletions ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ Update instructions:
|----------------------|--------------------------------------------------------------------------|
| re_entity_db | In-memory storage of Rerun entities |
| re_query | Querying data in the re_chunk_store |
| re_query2 | Querying data in the re_chunk_store |
| re_types | The built-in Rerun data types, component types, and archetypes. |
| re_types_blueprint | The core traits and types that power Rerun's Blueprint sub-system. |
| re_log_encoding | Helpers for encoding and transporting Rerun log messages |
Expand Down
33 changes: 33 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4715,6 +4715,39 @@ dependencies = [
"thiserror",
]

[[package]]
name = "re_query2"
version = "0.18.0-alpha.1+dev"
dependencies = [
"ahash",
"anyhow",
"backtrace",
"criterion",
"indent",
"indexmap 2.1.0",
"itertools 0.13.0",
"mimalloc",
"nohash-hasher",
"parking_lot",
"paste",
"rand",
"re_arrow2",
"re_chunk",
"re_chunk_store",
"re_error",
"re_format",
"re_log",
"re_log_types",
"re_tracing",
"re_tuid",
"re_types",
"re_types_core",
"seq-macro",
"similar-asserts",
"static_assertions",
"thiserror",
]

[[package]]
name = "re_renderer"
version = "0.18.0-alpha.1+dev"
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ re_format_arrow = { path = "crates/store/re_format_arrow", version = "=0.18.0-al
re_log_encoding = { path = "crates/store/re_log_encoding", version = "=0.18.0-alpha.1", default-features = false }
re_log_types = { path = "crates/store/re_log_types", version = "=0.18.0-alpha.1", default-features = false }
re_query = { path = "crates/store/re_query", version = "=0.18.0-alpha.1", default-features = false }
re_query2 = { path = "crates/store/re_query2", version = "=0.18.0-alpha.1", default-features = false }
re_sdk_comms = { path = "crates/store/re_sdk_comms", version = "=0.18.0-alpha.1", default-features = false }
re_types = { path = "crates/store/re_types", version = "=0.18.0-alpha.1", default-features = false }
re_types_blueprint = { path = "crates/store/re_types_blueprint", version = "=0.18.0-alpha.1", default-features = false }
Expand Down
2 changes: 2 additions & 0 deletions crates/store/re_query/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,13 @@ bench = false
name = "clamped_zip"
required-features = ["codegen"]
bench = false
doc = false # we're already documenting the one is `re_query2`

[[bin]]
name = "range_zip"
required-features = ["codegen"]
bench = false
doc = false # we're already documenting the one is `re_query2`


[[bench]]
Expand Down
36 changes: 35 additions & 1 deletion crates/store/re_query/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ use std::{
};

use ahash::{HashMap, HashSet};
use nohash_hasher::IntSet;
use parking_lot::RwLock;

use re_chunk_store::{ChunkStore, ChunkStoreDiff, ChunkStoreEvent, ChunkStoreSubscriber};
use re_log_types::{EntityPath, ResolvedTimeRange, StoreId, TimeInt, Timeline};
use re_types_core::ComponentName;
use re_types_core::{components::ClearIsRecursive, ComponentName, Loggable as _};

use crate::{LatestAtCache, RangeCache};

Expand Down Expand Up @@ -70,6 +71,14 @@ pub struct Caches {
/// The [`StoreId`] of the associated [`ChunkStore`].
pub(crate) store_id: StoreId,

/// Keeps track of which entities have had any `Clear`-related data on any timeline at any
/// point in time.
///
/// This is used to optimized read-time clears, so that we don't unnecessarily pay for the fixed
/// overhead of all the query layers when we know for a fact that there won't be any data there.
/// This is a huge performance improvement in practice, especially in recordings with many entities.
pub(crate) might_require_clearing: RwLock<IntSet<EntityPath>>,

// NOTE: `Arc` so we can cheaply free the top-level lock early when needed.
pub(crate) latest_at_per_cache_key: RwLock<HashMap<CacheKey, Arc<RwLock<LatestAtCache>>>>,

Expand All @@ -81,12 +90,25 @@ impl std::fmt::Debug for Caches {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self {
store_id,
might_require_clearing,
latest_at_per_cache_key,
range_per_cache_key,
} = self;

let mut strings = Vec::new();

strings.push(format!(
"[Entities that must be checked for clears @ {store_id}]\n"
));
{
let sorted: BTreeSet<EntityPath> =
might_require_clearing.read().iter().cloned().collect();
for entity_path in sorted {
strings.push(format!(" * {entity_path}\n"));
}
strings.push("\n".to_owned());
}

strings.push(format!("[LatestAt @ {store_id}]"));
{
let latest_at_per_cache_key = latest_at_per_cache_key.read();
Expand Down Expand Up @@ -130,6 +152,7 @@ impl Caches {
pub fn new(store: &ChunkStore) -> Self {
Self {
store_id: store.id().clone(),
might_require_clearing: Default::default(),
latest_at_per_cache_key: Default::default(),
range_per_cache_key: Default::default(),
}
Expand All @@ -139,10 +162,12 @@ impl Caches {
pub fn clear(&self) {
let Self {
store_id: _,
might_require_clearing,
latest_at_per_cache_key,
range_per_cache_key,
} = self;

might_require_clearing.write().clear();
latest_at_per_cache_key.write().clear();
range_per_cache_key.write().clear();
}
Expand Down Expand Up @@ -223,6 +248,7 @@ impl ChunkStoreSubscriber for Caches {
}
}

let mut might_require_clearing = self.might_require_clearing.write();
let caches_latest_at = self.latest_at_per_cache_key.write();
let caches_range = self.range_per_cache_key.write();
// NOTE: Don't release the top-level locks -- even though this cannot happen yet with
Expand All @@ -237,6 +263,10 @@ impl ChunkStoreSubscriber for Caches {
// But since this pretty much never happens in practice, let's not go there until we
// have metrics showing that show we need to.
for (entity_path, component_name) in compacted.static_ {
if component_name == ClearIsRecursive::name() {
might_require_clearing.insert(entity_path.clone());
}

for (key, cache) in caches_latest_at.iter() {
if key.entity_path == entity_path && key.component_name == component_name {
cache.write().pending_invalidations.insert(TimeInt::STATIC);
Expand All @@ -255,6 +285,10 @@ impl ChunkStoreSubscriber for Caches {
re_tracing::profile_scope!("temporal");

for (key, times) in compacted.temporal {
if key.component_name == ClearIsRecursive::name() {
might_require_clearing.insert(key.entity_path.clone());
}

if let Some(cache) = caches_latest_at.get(&key) {
cache
.write()
Expand Down
21 changes: 21 additions & 0 deletions crates/store/re_query/src/latest_at/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ impl Caches {

let mut results = LatestAtResults::default();

// NOTE: This pre-filtering is extremely important: going through all these query layers
// has non-negligible overhead even if the final result ends up being nothing, and our
// number of queries for a frame grows linearly with the number of entity paths.
let component_names = component_names.into_iter().filter(|component_name| {
store.entity_has_component_on_timeline(&query.timeline(), entity_path, component_name)
});

// Query-time clears
// -----------------
//
Expand All @@ -70,8 +77,22 @@ impl Caches {
{
re_tracing::profile_scope!("clears");

let potential_clears = self.might_require_clearing.read();

let mut clear_entity_path = entity_path.clone();
loop {
if !potential_clears.contains(&clear_entity_path) {
// This entity does not contain any `Clear`-related data at all, there's no
// point in running actual queries.

let Some(parent_entity_path) = clear_entity_path.parent() else {
break;
};
clear_entity_path = parent_entity_path;

continue;
}

let key = CacheKey::new(
clear_entity_path.clone(),
query.timeline(),
Expand Down
7 changes: 7 additions & 0 deletions crates/store/re_query/src/range/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ impl Caches {

let mut results = RangeResults::new(query.clone());

// NOTE: This pre-filtering is extremely important: going through all these query layers
// has non-negligible overhead even if the final result ends up being nothing, and our
// number of queries for a frame grows linearly with the number of entity paths.
let component_names = component_names.into_iter().filter(|component_name| {
store.entity_has_component_on_timeline(&query.timeline(), entity_path, component_name)
});

for component_name in component_names {
let key = CacheKey::new(entity_path.clone(), query.timeline(), component_name);

Expand Down
81 changes: 81 additions & 0 deletions crates/store/re_query2/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
[package]
name = "re_query2"
authors.workspace = true
description = "High-level query APIs"
edition.workspace = true
homepage.workspace = true
include.workspace = true
license.workspace = true
publish = true
readme = "README.md"
repository.workspace = true
rust-version.workspace = true
version.workspace = true

[lints]
workspace = true

[package.metadata.docs.rs]
all-features = true


[features]
default = []

## Enable codegen helper binaries (generates ClampedZip & RangeZip implementations).
codegen = []


[dependencies]
# Rerun dependencies:
re_chunk.workspace = true
re_chunk_store.workspace = true
re_error.workspace = true
re_format.workspace = true
re_log.workspace = true
re_log_types.workspace = true
re_tracing.workspace = true
re_tuid.workspace = true
re_types_core.workspace = true

# External dependencies:
ahash.workspace = true
anyhow.workspace = true
arrow2.workspace = true
backtrace.workspace = true
indent.workspace = true
indexmap.workspace = true
itertools.workspace = true
nohash-hasher.workspace = true
parking_lot.workspace = true
paste.workspace = true
seq-macro.workspace = true
static_assertions.workspace = true
thiserror.workspace = true


[dev-dependencies]
criterion.workspace = true
mimalloc.workspace = true
rand = { workspace = true, features = ["std", "std_rng"] }
re_types.workspace = true
similar-asserts.workspace = true

[lib]
bench = false


[[bin]]
name = "clamped_zip"
required-features = ["codegen"]
bench = false

[[bin]]
name = "range_zip"
required-features = ["codegen"]
bench = false


[[bench]]
name = "latest_at"
harness = false
10 changes: 10 additions & 0 deletions crates/store/re_query2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# re_query2

Part of the [`rerun`](https://github.com/rerun-io/rerun) family of crates.

[![Latest version](https://img.shields.io/crates/v/re_query2.svg)](https://crates.io/crates/re_query2)
[![Documentation](https://docs.rs/re_query2/badge.svg)](https://docs.rs/re_query2)
![MIT](https://img.shields.io/badge/license-MIT-blue.svg)
![Apache](https://img.shields.io/badge/license-Apache-blue.svg)

High-level query APIs.
Loading

0 comments on commit e6e8c9a

Please sign in to comment.