Skip to content

Commit

Permalink
Use ArcStr for marker values (#10453)
Browse files Browse the repository at this point in the history
N.B. After fixing #10430, `ArcStr` became the fastest implementation
(and the gains were significantly reduced, down to 1-2%). See:
#10453 (comment).

## Summary

I tried out a variety of small string crates, but `Arc<str>`
outperformed them, giving a ~10% speed-up:

```console
❯ hyperfine "../arcstr lock" "../flexstr lock" "uv lock" "../arc lock" "../compact_str lock" --prepare "rm -f uv.lock" --min-runs 50 --warmup 20
Benchmark 1: ../arcstr lock
  Time (mean ± σ):     304.6 ms ±   2.3 ms    [User: 302.9 ms, System: 117.8 ms]
  Range (min … max):   299.0 ms … 311.3 ms    50 runs

Benchmark 2: ../flexstr lock
  Time (mean ± σ):     319.2 ms ±   1.7 ms    [User: 317.7 ms, System: 118.2 ms]
  Range (min … max):   316.8 ms … 323.3 ms    50 runs

Benchmark 3: uv lock
  Time (mean ± σ):     330.6 ms ±   1.5 ms    [User: 328.1 ms, System: 139.3 ms]
  Range (min … max):   326.6 ms … 334.2 ms    50 runs

Benchmark 4: ../arc lock
  Time (mean ± σ):     303.0 ms ±   1.2 ms    [User: 301.6 ms, System: 118.4 ms]
  Range (min … max):   300.3 ms … 305.3 ms    50 runs

Benchmark 5: ../compact_str lock
  Time (mean ± σ):     320.4 ms ±   2.0 ms    [User: 318.7 ms, System: 120.8 ms]
  Range (min … max):   317.3 ms … 326.7 ms    50 runs

Summary
  ../arc lock ran
    1.01 ± 0.01 times faster than ../arcstr lock
    1.05 ± 0.01 times faster than ../flexstr lock
    1.06 ± 0.01 times faster than ../compact_str lock
    1.09 ± 0.01 times faster than uv lock
```
  • Loading branch information
charliermarsh authored Jan 10, 2025
1 parent 7a21b71 commit 8420195
Show file tree
Hide file tree
Showing 12 changed files with 80 additions and 60 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/uv-distribution-types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ uv-platform-tags = { workspace = true }
uv-pypi-types = { workspace = true }

anyhow = { workspace = true }
arcstr = { workspace = true }
bitflags = { workspace = true }
fs-err = { workspace = true }
itertools = { workspace = true }
Expand Down
21 changes: 11 additions & 10 deletions crates/uv-distribution-types/src/prioritized_distribution.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::fmt::{Display, Formatter};

use arcstr::ArcStr;
use tracing::debug;

use uv_distribution_filename::{BuildTag, WheelFilename};
Expand Down Expand Up @@ -662,38 +663,38 @@ pub fn implied_markers(filename: &WheelFilename) -> MarkerTree {
let mut tag_marker = MarkerTree::expression(MarkerExpression::String {
key: MarkerValueString::SysPlatform,
operator: MarkerOperator::Equal,
value: "win32".to_string(),
value: arcstr::literal!("win32"),
});
tag_marker.and(MarkerTree::expression(MarkerExpression::String {
key: MarkerValueString::PlatformMachine,
operator: MarkerOperator::Equal,
value: "x86".to_string(),
value: arcstr::literal!("x86"),
}));
marker.or(tag_marker);
}
"win_amd64" => {
let mut tag_marker = MarkerTree::expression(MarkerExpression::String {
key: MarkerValueString::SysPlatform,
operator: MarkerOperator::Equal,
value: "win32".to_string(),
value: arcstr::literal!("win32"),
});
tag_marker.and(MarkerTree::expression(MarkerExpression::String {
key: MarkerValueString::PlatformMachine,
operator: MarkerOperator::Equal,
value: "x86_64".to_string(),
value: arcstr::literal!("x86_64"),
}));
marker.or(tag_marker);
}
"win_arm64" => {
let mut tag_marker = MarkerTree::expression(MarkerExpression::String {
key: MarkerValueString::SysPlatform,
operator: MarkerOperator::Equal,
value: "win32".to_string(),
value: arcstr::literal!("win32"),
});
tag_marker.and(MarkerTree::expression(MarkerExpression::String {
key: MarkerValueString::PlatformMachine,
operator: MarkerOperator::Equal,
value: "arm64".to_string(),
value: arcstr::literal!("arm64"),
}));
marker.or(tag_marker);
}
Expand All @@ -703,7 +704,7 @@ pub fn implied_markers(filename: &WheelFilename) -> MarkerTree {
let mut tag_marker = MarkerTree::expression(MarkerExpression::String {
key: MarkerValueString::SysPlatform,
operator: MarkerOperator::Equal,
value: "darwin".to_string(),
value: arcstr::literal!("darwin"),
});

// Parse the macOS version from the tag.
Expand Down Expand Up @@ -787,7 +788,7 @@ pub fn implied_markers(filename: &WheelFilename) -> MarkerTree {
arch_marker.or(MarkerTree::expression(MarkerExpression::String {
key: MarkerValueString::PlatformMachine,
operator: MarkerOperator::Equal,
value: (*arch).to_string(),
value: ArcStr::from(*arch),
}));
}
tag_marker.and(arch_marker);
Expand All @@ -800,7 +801,7 @@ pub fn implied_markers(filename: &WheelFilename) -> MarkerTree {
let mut tag_marker = MarkerTree::expression(MarkerExpression::String {
key: MarkerValueString::SysPlatform,
operator: MarkerOperator::Equal,
value: "linux".to_string(),
value: arcstr::literal!("linux"),
});

// Parse the architecture from the tag.
Expand Down Expand Up @@ -866,7 +867,7 @@ pub fn implied_markers(filename: &WheelFilename) -> MarkerTree {
tag_marker.and(MarkerTree::expression(MarkerExpression::String {
key: MarkerValueString::PlatformMachine,
operator: MarkerOperator::Equal,
value: arch.to_string(),
value: ArcStr::from(arch),
}));

marker.or(tag_marker);
Expand Down
1 change: 1 addition & 0 deletions crates/uv-pep508/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ uv-fs = { workspace = true }
uv-normalize = { workspace = true }
uv-pep440 = { workspace = true }

arcstr = { workspace = true}
boxcar = { workspace = true }
indexmap = { workspace = true }
itertools = { workspace = true }
Expand Down
6 changes: 3 additions & 3 deletions crates/uv-pep508/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1334,17 +1334,17 @@ mod tests {
let mut b = MarkerTree::expression(MarkerExpression::String {
key: MarkerValueString::SysPlatform,
operator: MarkerOperator::Equal,
value: "win32".to_string(),
value: arcstr::literal!("win32"),
});
let mut c = MarkerTree::expression(MarkerExpression::String {
key: MarkerValueString::OsName,
operator: MarkerOperator::Equal,
value: "linux".to_string(),
value: arcstr::literal!("linux"),
});
let d = MarkerTree::expression(MarkerExpression::String {
key: MarkerValueString::ImplementationName,
operator: MarkerOperator::Equal,
value: "cpython".to_string(),
value: arcstr::literal!("cpython"),
});

c.and(d);
Expand Down
60 changes: 32 additions & 28 deletions crates/uv-pep508/src/marker/algebra.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ use std::ops::Bound;
use std::sync::Mutex;
use std::sync::MutexGuard;

use arcstr::ArcStr;
use itertools::{Either, Itertools};
use rustc_hash::FxHashMap;
use std::sync::LazyLock;
Expand Down Expand Up @@ -287,28 +288,31 @@ impl InternerGuard<'_> {
// values in `exclusions`.
//
// See: https://discuss.python.org/t/clarify-usage-of-platform-system/70900
let (key, value) = match (key, value.as_str()) {
(MarkerValueString::PlatformSystem, "Windows") => {
(CanonicalMarkerValueString::SysPlatform, "win32".to_string())
}
let (key, value) = match (key, value.as_ref()) {
(MarkerValueString::PlatformSystem, "Windows") => (
CanonicalMarkerValueString::SysPlatform,
arcstr::literal!("win32"),
),
(MarkerValueString::PlatformSystem, "Darwin") => (
CanonicalMarkerValueString::SysPlatform,
"darwin".to_string(),
arcstr::literal!("darwin"),
),
(MarkerValueString::PlatformSystem, "Linux") => (
CanonicalMarkerValueString::SysPlatform,
arcstr::literal!("linux"),
),
(MarkerValueString::PlatformSystem, "AIX") => (
CanonicalMarkerValueString::SysPlatform,
arcstr::literal!("aix"),
),
(MarkerValueString::PlatformSystem, "Linux") => {
(CanonicalMarkerValueString::SysPlatform, "linux".to_string())
}
(MarkerValueString::PlatformSystem, "AIX") => {
(CanonicalMarkerValueString::SysPlatform, "aix".to_string())
}
(MarkerValueString::PlatformSystem, "Emscripten") => (
CanonicalMarkerValueString::SysPlatform,
"emscripten".to_string(),
arcstr::literal!("emscripten"),
),
// See: https://peps.python.org/pep-0738/#sys
(MarkerValueString::PlatformSystem, "Android") => (
CanonicalMarkerValueString::SysPlatform,
"android".to_string(),
arcstr::literal!("android"),
),
_ => (key.into(), value),
};
Expand Down Expand Up @@ -869,48 +873,48 @@ impl InternerGuard<'_> {
MarkerExpression::String {
key: MarkerValueString::OsName,
operator: MarkerOperator::Equal,
value: "nt".to_string(),
value: arcstr::literal!("nt"),
},
MarkerExpression::String {
key: MarkerValueString::SysPlatform,
operator: MarkerOperator::Equal,
value: "linux".to_string(),
value: arcstr::literal!("linux"),
},
),
(
MarkerExpression::String {
key: MarkerValueString::OsName,
operator: MarkerOperator::Equal,
value: "nt".to_string(),
value: arcstr::literal!("nt"),
},
MarkerExpression::String {
key: MarkerValueString::SysPlatform,
operator: MarkerOperator::Equal,
value: "darwin".to_string(),
value: arcstr::literal!("darwin"),
},
),
(
MarkerExpression::String {
key: MarkerValueString::OsName,
operator: MarkerOperator::Equal,
value: "nt".to_string(),
value: arcstr::literal!("nt"),
},
MarkerExpression::String {
key: MarkerValueString::SysPlatform,
operator: MarkerOperator::Equal,
value: "ios".to_string(),
value: arcstr::literal!("ios"),
},
),
(
MarkerExpression::String {
key: MarkerValueString::OsName,
operator: MarkerOperator::Equal,
value: "posix".to_string(),
value: arcstr::literal!("posix"),
},
MarkerExpression::String {
key: MarkerValueString::SysPlatform,
operator: MarkerOperator::Equal,
value: "win32".to_string(),
value: arcstr::literal!("win32"),
},
),
];
Expand Down Expand Up @@ -950,12 +954,12 @@ impl InternerGuard<'_> {
MarkerExpression::String {
key: MarkerValueString::PlatformSystem,
operator: MarkerOperator::Equal,
value: platform_system.to_string(),
value: ArcStr::from(platform_system),
},
MarkerExpression::String {
key: MarkerValueString::SysPlatform,
operator: MarkerOperator::Equal,
value: sys_platform.to_string(),
value: ArcStr::from(sys_platform),
},
));
}
Expand Down Expand Up @@ -996,13 +1000,13 @@ pub(crate) enum Variable {
/// string marker and value.
In {
key: CanonicalMarkerValueString,
value: String,
value: ArcStr,
},
/// A variable representing a `<value> in <key>` expression for a particular
/// string marker and value.
Contains {
key: CanonicalMarkerValueString,
value: String,
value: ArcStr,
},
/// A variable representing the existence or absence of a given extra.
///
Expand Down Expand Up @@ -1128,7 +1132,7 @@ pub(crate) enum Edges {
// Invariant: All ranges are simple, meaning they can be represented by a bounded
// interval without gaps. Additionally, there are at least two edges in the set.
String {
edges: SmallVec<(Ranges<String>, NodeId)>,
edges: SmallVec<(Ranges<ArcStr>, NodeId)>,
},
// The edges of a boolean variable, representing the values `true` (the `high` child)
// and `false` (the `low` child).
Expand Down Expand Up @@ -1158,8 +1162,8 @@ impl Edges {
///
/// This function will panic for the `In` and `Contains` marker operators, which
/// should be represented as separate boolean variables.
fn from_string(operator: MarkerOperator, value: String) -> Edges {
let range: Ranges<String> = match operator {
fn from_string(operator: MarkerOperator, value: ArcStr) -> Edges {
let range: Ranges<ArcStr> = match operator {
MarkerOperator::Equal => Ranges::singleton(value),
MarkerOperator::NotEqual => Ranges::singleton(value).complement(),
MarkerOperator::GreaterThan => Ranges::strictly_higher_than(value),
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-pep508/src/marker/parse.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use arcstr::ArcStr;
use std::str::FromStr;

use uv_normalize::ExtraName;
use uv_pep440::{Version, VersionPattern, VersionSpecifier};

Expand Down Expand Up @@ -92,7 +92,7 @@ pub(crate) fn parse_marker_value<T: Pep508Url>(
Some((start_pos, quotation_mark @ ('"' | '\''))) => {
cursor.next();
let (start, len) = cursor.take_while(|c| c != quotation_mark);
let value = cursor.slice(start, len).to_string();
let value = ArcStr::from(cursor.slice(start, len));
cursor.next_expect_char(quotation_mark, start_pos)?;
Ok(MarkerValue::QuotedString(value))
}
Expand Down
8 changes: 5 additions & 3 deletions crates/uv-pep508/src/marker/simplify.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
use std::fmt;
use std::ops::Bound;

use arcstr::ArcStr;
use indexmap::IndexMap;
use itertools::Itertools;
use rustc_hash::FxBuildHasher;
use uv_pep440::{Version, VersionSpecifier};
use version_ranges::Ranges;

use uv_pep440::{Version, VersionSpecifier};

use crate::{ExtraOperator, MarkerExpression, MarkerOperator, MarkerTree, MarkerTreeKind};

/// Returns a simplified DNF expression for a given marker tree.
Expand Down Expand Up @@ -131,7 +133,7 @@ fn collect_dnf(

let expr = MarkerExpression::String {
key: marker.key().into(),
value: marker.value().to_owned(),
value: ArcStr::from(marker.value()),
operator,
};

Expand All @@ -150,7 +152,7 @@ fn collect_dnf(

let expr = MarkerExpression::String {
key: marker.key().into(),
value: marker.value().to_owned(),
value: ArcStr::from(marker.value()),
operator,
};

Expand Down
Loading

0 comments on commit 8420195

Please sign in to comment.