Skip to content

Commit

Permalink
Include environment variables in interpreter info caching (#11601)
Browse files Browse the repository at this point in the history
We want to use `sys.path` for package discovery (#2500, #9849). For
that, we need to know the correct value of `sys.path`. `sys.path` is a
runtime-changeable value, which gets influenced from a lot of different
sources: Environment variables, CLI arguments, `.pth` files with
scripting, `sys.path.append()` at runtime, a distributor patching
Python, etc. We cannot capture them all accurately, especially since
it's possible to change `sys.path` mid-execution. Instead, we do a best
effort attempt at matching the user's expectation.

The assumption is that package installation generally happens in venv
site-packages, system/user site-packages (including pypy shipping
packages with std), and `PYTHONPATH`. Specifically, we reuse
`PYTHONPATH` as dedicated way for users to tell uv to include specific
directories in package discovery.

A common way to influence `sys.path` that is not using venvs is setting
`PYTHONPATH`. To support this we're capturing `PYTHONPATH` as part of
the cache invalidation, i.e. we refresh the interpreter metadata if it
changed. For completeness, we're also capturing other environment
variables documented as influencing `sys.path` or other fields in the
interpreter info.

This PR does not include reading registry values for `sys.path`
additions on Windows as documented in
https://docs.python.org/3.11/using/windows.html#finding-modules. It
notably also does not include parsing of python CLI arguments, we only
consider their environment variable versions for package installation
and listing. We could try parsing CLI flags in `uv run python`, but we'd
still miss them when Python is launched indirectly through a script, and
it's more consistent to only consider uv's own arguments and environment
variables, similar to uv's behavior in other places.
  • Loading branch information
konstin authored Feb 19, 2025
1 parent f394f72 commit da30cc4
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 43 deletions.
8 changes: 0 additions & 8 deletions crates/uv-cache/src/by_timestamp.rs

This file was deleted.

4 changes: 1 addition & 3 deletions crates/uv-cache/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ use uv_fs::{cachedir, directories, LockedFile};
use uv_normalize::PackageName;
use uv_pypi_types::ResolutionMetadata;

pub use crate::by_timestamp::CachedByTimestamp;
#[cfg(feature = "clap")]
pub use crate::cli::CacheArgs;
use crate::removal::Remover;
Expand All @@ -27,7 +26,6 @@ pub use crate::wheel::WheelCache;
use crate::wheel::WheelCacheKind;

mod archive;
mod by_timestamp;
#[cfg(feature = "clap")]
mod cli;
mod removal;
Expand Down Expand Up @@ -1034,7 +1032,7 @@ impl CacheBucket {
Self::SourceDistributions => "sdists-v8",
Self::FlatIndex => "flat-index-v2",
Self::Git => "git-v0",
Self::Interpreter => "interpreter-v4",
Self::Interpreter => "interpreter-v5",
// Note that when bumping this, you'll also need to bump it
// in `crates/uv/tests/it/cache_clean.rs`.
Self::Simple => "simple-v15",
Expand Down
155 changes: 123 additions & 32 deletions crates/uv-python/src/interpreter.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use std::borrow::Cow;
use std::env::consts::ARCH;
use std::ffi::OsString;
use std::fmt::{Display, Formatter};
use std::io;
use std::path::{Path, PathBuf};
use std::process::{Command, ExitStatus};
use std::sync::OnceLock;
use std::{env, io};

use configparser::ini::Ini;
use fs_err as fs;
Expand All @@ -14,7 +15,7 @@ use serde::{Deserialize, Serialize};
use thiserror::Error;
use tracing::{debug, trace, warn};

use uv_cache::{Cache, CacheBucket, CachedByTimestamp, Freshness};
use uv_cache::{Cache, CacheBucket, CacheEntry, Freshness};
use uv_cache_info::Timestamp;
use uv_cache_key::cache_digest;
use uv_fs::{write_atomic_sync, PythonExt, Simplified};
Expand All @@ -24,6 +25,7 @@ use uv_pep508::{MarkerEnvironment, StringVersion};
use uv_platform_tags::Platform;
use uv_platform_tags::{Tags, TagsError};
use uv_pypi_types::{ResolverMarkerEnvironment, Scheme};
use uv_static::EnvVars;

use crate::implementation::LenientImplementationName;
use crate::platform::{Arch, Libc, Os};
Expand Down Expand Up @@ -713,6 +715,42 @@ pub enum InterpreterInfoError {
},
}

/// Environment variables that can change the values of [`InterpreterInfo`].
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
struct PythonEnvVars {
/// `PYTHONHOME` overrides `sys.prefix`.
pythonhome: Option<OsString>,
/// `PYTHONPATH` adds to `sys.path`.
pythonpath: Option<OsString>,
/// `PYTHONSAFEPATH` influences `sys.path`.
pythonsafepath: Option<OsString>,
/// `PYTHONPLATLIBDIR` influences `sys.path`.
pythonplatlibdir: Option<OsString>,
/// `PYTHONNOUSERSITE` influences `sys.path`.
pythonnousersite: Option<OsString>,
/// `PYTHONUSERBASE` influences `sys.path`.
pythonuserbase: Option<OsString>,
/// `APPDATA` influences `sys.path` through the user site packages (windows).
appdata: Option<OsString>,
/// `HOME` influences `sys.path` through the user site packages (unix).
home: Option<OsString>,
}

impl PythonEnvVars {
fn from_env() -> Self {
Self {
pythonhome: env::var_os(EnvVars::PYTHONHOME),
pythonpath: env::var_os(EnvVars::PYTHONPATH),
pythonsafepath: env::var_os(EnvVars::PYTHONSAFEPATH),
pythonplatlibdir: env::var_os(EnvVars::PYTHONPLATLIBDIR),
pythonnousersite: env::var_os(EnvVars::PYTHONNOUSERSITE),
pythonuserbase: env::var_os(EnvVars::PYTHONUSERBASE),
appdata: env::var_os(EnvVars::APPDATA),
home: env::var_os(EnvVars::HOME),
}
}
}

#[derive(Debug, Deserialize, Serialize, Clone)]
struct InterpreterInfo {
platform: Platform,
Expand All @@ -732,6 +770,18 @@ struct InterpreterInfo {
gil_disabled: bool,
}

#[derive(Debug, Deserialize, Serialize, Clone)]
struct CachedInterpreterInfo {
/// Information about a Python interpreter at a path.
data: InterpreterInfo,
/// The last modified timestamp of the Python interpreter path.
///
/// It is ctime on unix.
timestamp: Timestamp,
/// Environment variables that can influence the other keys used for cache invalidation.
env_vars: PythonEnvVars,
}

impl InterpreterInfo {
/// Return the resolved [`InterpreterInfo`] for the given Python executable.
pub(crate) fn query(interpreter: &Path, cache: &Cache) -> Result<Self, Error> {
Expand Down Expand Up @@ -869,36 +919,10 @@ impl InterpreterInfo {
})?;

// Read from the cache.
if cache
.freshness(&cache_entry, None)
.is_ok_and(Freshness::is_fresh)
if let Some(value) =
Self::read_and_validate_cache(executable, cache, &cache_entry, modified)
{
if let Ok(data) = fs::read(cache_entry.path()) {
match rmp_serde::from_slice::<CachedByTimestamp<Self>>(&data) {
Ok(cached) => {
if cached.timestamp == modified {
trace!(
"Cached interpreter info for Python {}, skipping probing: {}",
cached.data.markers.python_full_version(),
executable.user_display()
);
return Ok(cached.data);
}

trace!(
"Ignoring stale interpreter markers for: {}",
executable.user_display()
);
}
Err(err) => {
warn!(
"Broken interpreter cache entry at {}, removing: {err}",
cache_entry.path().user_display()
);
let _ = fs_err::remove_file(cache_entry.path());
}
}
}
return Ok(value);
}

// Otherwise, run the Python script.
Expand All @@ -914,15 +938,82 @@ impl InterpreterInfo {
fs::create_dir_all(cache_entry.dir())?;
write_atomic_sync(
cache_entry.path(),
rmp_serde::to_vec(&CachedByTimestamp {
rmp_serde::to_vec(&CachedInterpreterInfo {
timestamp: modified,
data: info.clone(),
env_vars: PythonEnvVars::from_env(),
})?,
)?;
}

Ok(info)
}

/// If a cache entry for the Python interpreter exists and it's fresh, return it.
fn read_and_validate_cache(
executable: &Path,
cache: &Cache,
cache_entry: &CacheEntry,
modified: Timestamp,
) -> Option<InterpreterInfo> {
if !cache
.freshness(cache_entry, None)
.is_ok_and(Freshness::is_fresh)
{
return None;
}

let data = match fs::read(cache_entry.path()) {
Ok(data) => data,
Err(err) if err.kind() == io::ErrorKind::NotFound => {
return None;
}
Err(err) => {
warn!(
"Broken interpreter cache entry at {}, removing: {err}",
cache_entry.path().user_display()
);
let _ = fs_err::remove_file(cache_entry.path());
return None;
}
};

let cached = match rmp_serde::from_slice::<CachedInterpreterInfo>(&data) {
Ok(cached) => cached,
Err(err) => {
warn!(
"Broken interpreter cache entry at {}, removing: {err}",
cache_entry.path().user_display()
);
let _ = fs_err::remove_file(cache_entry.path());
return None;
}
};

if cached.timestamp != modified {
trace!(
"Ignoring stale cached interpreter info for: `{}`",
executable.user_display()
);
return None;
}

if cached.env_vars != PythonEnvVars::from_env() {
trace!(
"Ignoring cached interpreter info due to changed environment variables for: `{}`",
executable.user_display()
);
return None;
}

trace!(
"Cached interpreter info for Python {}, skipping probing: `{}`",
cached.data.markers.python_full_version(),
executable.user_display()
);

Some(cached.data)
}
}

/// Find the Python executable that should be considered the "base" for a virtual environment.
Expand Down
18 changes: 18 additions & 0 deletions crates/uv-static/src/env_vars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -649,4 +649,22 @@ impl EnvVars {
///
/// This is a quasi-standard variable, described e.g. in `ncurses(3x)`.
pub const COLUMNS: &'static str = "COLUMNS";

/// Overrides `sys.prefix`.
pub const PYTHONHOME: &'static str = "PYTHONHOME";

/// Don't prepend a potentially unsafe path to `sys.path`.
pub const PYTHONSAFEPATH: &'static str = "PYTHONSAFEPATH";

/// Overrides `sys.platlibdir`.
pub const PYTHONPLATLIBDIR: &'static str = "PYTHONPLATLIBDIR";

/// Don't add the user site packages to `sys.path`.
pub const PYTHONNOUSERSITE: &'static str = "PYTHONNOUSERSITE";

/// Overrides `site.USER_BASE`.
pub const PYTHONUSERBASE: &'static str = "PYTHONUSERBASE";

/// The base path for user site packages on Windows.
pub const APPDATA: &'static str = "APPDATA";
}

0 comments on commit da30cc4

Please sign in to comment.