diff --git a/Cargo.lock b/Cargo.lock index 43eea7c83a8b7..76100b932e605 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5833,15 +5833,18 @@ name = "uv-distribution" version = "0.0.1" dependencies = [ "anyhow", + "blake2", "either", "fs-err", "futures", + "hex", "indoc", "insta", "nanoid", "owo-colors", "reqwest", "reqwest-middleware", + "rkyv", "rmp-serde", "rustc-hash", "serde", @@ -5855,6 +5858,7 @@ dependencies = [ "uv-auth", "uv-cache", "uv-cache-info", + "uv-cache-key", "uv-client", "uv-configuration", "uv-distribution-filename", @@ -5870,6 +5874,7 @@ dependencies = [ "uv-platform-tags", "uv-pypi-types", "uv-redacted", + "uv-static", "uv-types", "uv-workspace", "walkdir", diff --git a/crates/uv-distribution/Cargo.toml b/crates/uv-distribution/Cargo.toml index 80859f18b7f0c..81134c1fce0c3 100644 --- a/crates/uv-distribution/Cargo.toml +++ b/crates/uv-distribution/Cargo.toml @@ -19,6 +19,7 @@ workspace = true uv-auth = { workspace = true } uv-cache = { workspace = true } uv-cache-info = { workspace = true } +uv-cache-key = { workspace = true } uv-client = { workspace = true } uv-configuration = { workspace = true } uv-distribution-filename = { workspace = true } @@ -34,17 +35,21 @@ uv-pep508 = { workspace = true } uv-platform-tags = { workspace = true } uv-pypi-types = { workspace = true } uv-redacted = { workspace = true } +uv-static = { workspace = true } uv-types = { workspace = true } uv-workspace = { workspace = true } anyhow = { workspace = true } +blake2 = { workspace = true } either = { workspace = true } fs-err = { workspace = true } futures = { workspace = true } +hex = { workspace = true } nanoid = { workspace = true } owo-colors = { workspace = true } reqwest = { workspace = true } reqwest-middleware = { workspace = true } +rkyv = { workspace = true } rmp-serde = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } diff --git a/crates/uv-distribution/src/distribution_database.rs b/crates/uv-distribution/src/distribution_database.rs index ef2227df6152a..6e9d6d5292e9f 100644 --- a/crates/uv-distribution/src/distribution_database.rs +++ b/crates/uv-distribution/src/distribution_database.rs @@ -10,7 +10,7 @@ use tempfile::TempDir; use tokio::io::{AsyncRead, AsyncSeekExt, ReadBuf}; use tokio::sync::Semaphore; use tokio_util::compat::FuturesAsyncReadCompatExt; -use tracing::{Instrument, info_span, instrument, warn}; +use tracing::{Instrument, debug, info_span, instrument, warn}; use url::Url; use uv_cache::{ArchiveId, CacheBucket, CacheEntry, WheelCache}; @@ -20,8 +20,8 @@ use uv_client::{ }; use uv_distribution_filename::WheelFilename; use uv_distribution_types::{ - BuildInfo, BuildableSource, BuiltDist, Dist, File, HashPolicy, Hashed, IndexUrl, InstalledDist, - Name, SourceDist, ToUrlError, + BuildInfo, BuildableSource, BuiltDist, CompatibleDist, Dist, File, HashPolicy, Hashed, + IndexUrl, InstalledDist, Name, RegistryBuiltDist, SourceDist, ToUrlError, }; use uv_extract::hash::Hasher; use uv_fs::write_atomic; @@ -32,6 +32,7 @@ use uv_types::{BuildContext, BuildStack}; use crate::archive::Archive; use crate::metadata::{ArchiveMetadata, Metadata}; +use crate::remote::RemoteCacheResolver; use crate::source::SourceDistributionBuilder; use crate::{Error, LocalWheel, Reporter, RequiresDist}; @@ -50,6 +51,7 @@ use crate::{Error, LocalWheel, Reporter, RequiresDist}; pub struct DistributionDatabase<'a, Context: BuildContext> { build_context: &'a Context, builder: SourceDistributionBuilder<'a, Context>, + resolver: RemoteCacheResolver<'a, Context>, client: ManagedClient<'a>, reporter: Option>, } @@ -63,6 +65,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { Self { build_context, builder: SourceDistributionBuilder::new(build_context), + resolver: RemoteCacheResolver::new(build_context), client: ManagedClient::new(client, concurrent_downloads), reporter: None, } @@ -378,6 +381,23 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { dist: &SourceDist, tags: &Tags, hashes: HashPolicy<'_>, + ) -> Result { + // If the metadata is available in a remote cache, fetch it. + if let Ok(Some(wheel)) = self.get_remote_wheel(dist, tags, hashes).await { + return Ok(wheel); + } + + // Otherwise, build the wheel locally. + self.build_wheel_inner(dist, tags, hashes).await + } + + /// Convert a source distribution into a wheel, fetching it from the cache or building it if + /// necessary. + async fn build_wheel_inner( + &self, + dist: &SourceDist, + tags: &Tags, + hashes: HashPolicy<'_>, ) -> Result { let built_wheel = self .builder @@ -523,6 +543,9 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { source: &BuildableSource<'_>, hashes: HashPolicy<'_>, ) -> Result { + // Resolve the source distribution to a precise revision (i.e., a specific Git commit). + self.builder.resolve_revision(source, &self.client).await?; + // If the metadata was provided by the user directly, prefer it. if let Some(dist) = source.as_dist() { if let Some(metadata) = self @@ -530,14 +553,25 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { .dependency_metadata() .get(dist.name(), dist.version()) { - // If we skipped the build, we should still resolve any Git dependencies to precise - // commits. - self.builder.resolve_revision(source, &self.client).await?; - return Ok(ArchiveMetadata::from_metadata23(metadata.clone())); } } + // If the metadata is available in a remote cache, fetch it. + if let Ok(Some(metadata)) = self.get_remote_metadata(source, hashes).await { + return Ok(metadata); + } + + // Otherwise, retrieve the metadata from the source distribution. + self.build_wheel_metadata_inner(source, hashes).await + } + + /// Build the wheel metadata for a source distribution, or fetch it from the cache if possible. + async fn build_wheel_metadata_inner( + &self, + source: &BuildableSource<'_>, + hashes: HashPolicy<'_>, + ) -> Result { let metadata = self .builder .download_and_build_metadata(source, hashes, &self.client) @@ -547,6 +581,99 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { Ok(metadata) } + /// Fetch a wheel from a remote cache, if available. + async fn get_remote_wheel( + &self, + source: &SourceDist, + tags: &Tags, + hashes: HashPolicy<'_>, + ) -> Result, Error> { + let Some(index) = self + .resolver + .get_cached_distribution(source, Some(tags), &self.client) + .await? + else { + return Ok(None); + }; + for prioritized_dist in index + .get(source.name()) + .iter() + .flat_map(|index| index.iter()) + { + let Some(compatible_dist) = prioritized_dist.get() else { + continue; + }; + match compatible_dist { + CompatibleDist::InstalledDist(..) => {} + CompatibleDist::SourceDist { sdist, .. } => { + debug!("Found cached remote source distribution for: {source}"); + let dist = SourceDist::Registry(sdist.clone()); + return self.build_wheel_inner(&dist, tags, hashes).await.map(Some); + } + CompatibleDist::CompatibleWheel { wheel, .. } + | CompatibleDist::IncompatibleWheel { wheel, .. } => { + debug!("Found cached remote built distribution for: {source}"); + let dist = BuiltDist::Registry(RegistryBuiltDist { + wheels: vec![wheel.clone()], + best_wheel_index: 0, + sdist: None, + }); + return self.get_wheel(&dist, hashes).await.map(Some); + } + } + } + Ok(None) + } + + /// Fetch the wheel metadata from a remote cache, if available. + async fn get_remote_metadata( + &self, + source: &BuildableSource<'_>, + hashes: HashPolicy<'_>, + ) -> Result, Error> { + let BuildableSource::Dist(source) = source else { + return Ok(None); + }; + let Some(index) = self + .resolver + .get_cached_distribution(source, None, &self.client) + .await? + else { + return Ok(None); + }; + for prioritized_dist in index + .get(source.name()) + .iter() + .flat_map(|index| index.iter()) + { + let Some(compatible_dist) = prioritized_dist.get() else { + continue; + }; + match compatible_dist { + CompatibleDist::InstalledDist(..) => {} + CompatibleDist::SourceDist { sdist, .. } => { + debug!("Found cached remote source distribution for: {source}"); + let dist = SourceDist::Registry(sdist.clone()); + return self + .build_wheel_metadata_inner(&BuildableSource::Dist(&dist), hashes) + .await + .map(Some); + } + CompatibleDist::CompatibleWheel { wheel, .. } + | CompatibleDist::IncompatibleWheel { wheel, .. } => { + debug!("Found cached remote built distribution for: {source}"); + let dist = BuiltDist::Registry(RegistryBuiltDist { + wheels: vec![wheel.clone()], + best_wheel_index: 0, + sdist: None, + }); + return self.get_wheel_metadata(&dist, hashes).await.map(Some); + } + } + } + Ok(None) + } + /// Return the [`RequiresDist`] from a `pyproject.toml`, if it can be statically extracted. pub async fn requires_dist( &self, diff --git a/crates/uv-distribution/src/lib.rs b/crates/uv-distribution/src/lib.rs index 6ffb2d6d87682..68f66811799b5 100644 --- a/crates/uv-distribution/src/lib.rs +++ b/crates/uv-distribution/src/lib.rs @@ -16,5 +16,6 @@ mod download; mod error; mod index; mod metadata; +mod remote; mod reporter; mod source; diff --git a/crates/uv-distribution/src/remote.rs b/crates/uv-distribution/src/remote.rs new file mode 100644 index 0000000000000..5450a8d9a9692 --- /dev/null +++ b/crates/uv-distribution/src/remote.rs @@ -0,0 +1,413 @@ +use std::collections::BTreeMap; +use std::collections::btree_map::Entry; +use std::sync::Arc; + +use blake2::Digest; +use rustc_hash::FxHashMap; +use tokio::sync::Mutex; +use tracing::{debug, instrument, warn}; + +use uv_auth::PyxTokenStore; +use uv_cache_key::RepositoryUrl; +use uv_client::{MetadataFormat, VersionFiles}; +use uv_configuration::BuildOptions; +use uv_distribution_filename::{DistFilename, SourceDistFilename, WheelFilename}; +use uv_distribution_types::{ + File, HashComparison, HashPolicy, IncompatibleSource, IncompatibleWheel, IndexFormat, + IndexMetadata, IndexUrl, PrioritizedDist, RegistryBuiltWheel, RegistrySourceDist, SourceDist, + SourceDistCompatibility, WheelCompatibility, +}; +use uv_git_types::GitOid; +use uv_normalize::PackageName; +use uv_pep440::Version; +use uv_pep508::VerbatimUrl; +use uv_platform_tags::{TagCompatibility, Tags}; +use uv_pypi_types::HashDigest; +use uv_static::EnvVars; +use uv_types::{BuildContext, HashStrategy}; + +use crate::Error; +use crate::distribution_database::ManagedClient; + +/// A resolver for remote Git-based indexes. +pub(crate) struct RemoteCacheResolver<'a, Context: BuildContext> { + build_context: &'a Context, + cache: Arc>, + store: Option, + workspace: Option, +} + +impl<'a, T: BuildContext> RemoteCacheResolver<'a, T> { + /// Initialize a [`RemoteCacheResolver`] from a [`BuildContext`]. + pub(crate) fn new(build_context: &'a T) -> Self { + Self { + build_context, + cache: Arc::default(), + store: PyxTokenStore::from_settings().ok(), + workspace: std::env::var(EnvVars::PYX_GIT_CACHE).ok(), + } + } + + /// Return the cached Git index for the given distribution, if any. + pub(crate) async fn get_cached_distribution( + &self, + dist: &SourceDist, + tags: Option<&Tags>, + client: &ManagedClient<'a>, + ) -> Result, Error> { + // Fetch the entries for the given distribution. + let entries = self.get_or_fetch_index(dist, client).await?; + if entries.is_empty() { + return Ok(None); + } + + // Create the index. + let index = GitIndex::from_entries( + entries, + tags, + &HashStrategy::default(), + self.build_context.build_options(), + ); + Ok(Some(index)) + } + + /// Fetch the remote Git index for the given distribution. + async fn get_or_fetch_index( + &self, + dist: &SourceDist, + client: &ManagedClient<'a>, + ) -> Result, Error> { + let Some(workspace) = &self.workspace else { + return Ok(Vec::default()); + }; + + let Some(store) = &self.store else { + return Ok(Vec::default()); + }; + + let SourceDist::Git(dist) = dist else { + return Ok(Vec::default()); + }; + + let Some(precise) = self.build_context.git().get_precise(&dist.git) else { + return Ok(Vec::default()); + }; + + // Determine the cache key for the Git source. + let cache_key = GitCacheKey { + repository: RepositoryUrl::new(dist.git.repository()), + precise, + }; + let digest = cache_key.digest(); + + // Add the cache key to the URL. + let url = { + let mut url = store.api().clone(); + url.set_path(&format!( + "v1/cache/{workspace}/{}/{}/{}", + &digest[..2], + &digest[2..4], + &digest[4..], + )); + url + }; + let index = IndexUrl::from(VerbatimUrl::from_url(url)); + debug!("Using remote Git index URL: {index}"); + + // Store the index entries in a cache, to avoid redundant fetches. + { + let cache = self.cache.lock().await; + if let Some(entries) = cache.get(&index) { + return Ok(entries.to_vec()); + } + } + + // Perform a remote fetch via the Simple API. + let metadata = IndexMetadata { + url: index.clone(), + format: IndexFormat::Simple, + }; + let archives = client + .manual(|client, semaphore| { + client.simple_detail( + &dist.name, + Some(metadata.as_ref()), + self.build_context.capabilities(), + semaphore, + ) + }) + .await?; + + // Collect the files from the remote index. + let mut entries = Vec::new(); + for (_, archive) in archives { + let MetadataFormat::Simple(archive) = archive else { + continue; + }; + for datum in archive.iter().rev() { + let files = rkyv::deserialize::(&datum.files) + .expect("archived version files always deserializes"); + for (filename, file) in files.all() { + if *filename.name() != dist.name { + warn!( + "Skipping file `{filename}` from remote Git index at `{index}` due to name mismatch (expected: `{}`)", + dist.name + ); + continue; + } + + entries.push(GitIndexEntry { + filename, + file, + index: index.clone(), + }); + } + } + } + + // Write to the cache. + { + let mut cache = self.cache.lock().await; + cache.insert(index.clone(), entries.clone()); + } + + Ok(entries) + } +} + +/// An entry in a remote Git index. +#[derive(Debug, Clone)] +struct GitIndexEntry { + filename: DistFilename, + file: File, + index: IndexUrl, +} + +/// A set of [`PrioritizedDist`] from a Git index. +/// +/// In practice, it's assumed that the [`GitIndex`] will only contain distributions for a single +/// package. +#[derive(Debug, Clone, Default)] +pub(crate) struct GitIndex(FxHashMap); + +impl GitIndex { + /// Collect all files from a Git index. + #[instrument(skip_all)] + fn from_entries( + entries: Vec, + tags: Option<&Tags>, + hasher: &HashStrategy, + build_options: &BuildOptions, + ) -> Self { + let mut index = FxHashMap::::default(); + for entry in entries { + let distributions = index.entry(entry.filename.name().clone()).or_default(); + distributions.add_file( + entry.file, + entry.filename, + tags, + hasher, + build_options, + entry.index, + ); + } + Self(index) + } + + /// Return the [`GitIndexDistributions`] for the given package name, if any. + pub(crate) fn get(&self, name: &PackageName) -> Option<&GitIndexDistributions> { + self.0.get(name) + } +} + +/// A set of [`PrioritizedDist`] from a Git index, indexed by [`Version`]. +#[derive(Debug, Clone, Default)] +pub(crate) struct GitIndexDistributions(BTreeMap); + +impl GitIndexDistributions { + /// Returns an [`Iterator`] over the distributions. + pub(crate) fn iter(&self) -> impl Iterator { + self.0.iter().map(|(.., dist)| dist) + } + + /// Add the given [`File`] to the [`GitIndexDistributions`] for the given package. + fn add_file( + &mut self, + file: File, + filename: DistFilename, + tags: Option<&Tags>, + hasher: &HashStrategy, + build_options: &BuildOptions, + index: IndexUrl, + ) { + // TODO(charlie): Incorporate `Requires-Python`, yanked status, etc. + match filename { + DistFilename::WheelFilename(filename) => { + let version = filename.version.clone(); + + let compatibility = Self::wheel_compatibility( + &filename, + file.hashes.as_slice(), + tags, + hasher, + build_options, + ); + let dist = RegistryBuiltWheel { + filename, + file: Box::new(file), + index, + }; + match self.0.entry(version) { + Entry::Occupied(mut entry) => { + entry.get_mut().insert_built(dist, vec![], compatibility); + } + Entry::Vacant(entry) => { + entry.insert(PrioritizedDist::from_built(dist, vec![], compatibility)); + } + } + } + DistFilename::SourceDistFilename(filename) => { + let compatibility = Self::source_dist_compatibility( + &filename, + file.hashes.as_slice(), + hasher, + build_options, + ); + let dist = RegistrySourceDist { + name: filename.name.clone(), + version: filename.version.clone(), + ext: filename.extension, + file: Box::new(file), + index, + wheels: vec![], + }; + match self.0.entry(filename.version) { + Entry::Occupied(mut entry) => { + entry.get_mut().insert_source(dist, vec![], compatibility); + } + Entry::Vacant(entry) => { + entry.insert(PrioritizedDist::from_source(dist, vec![], compatibility)); + } + } + } + } + } + + fn source_dist_compatibility( + filename: &SourceDistFilename, + hashes: &[HashDigest], + hasher: &HashStrategy, + build_options: &BuildOptions, + ) -> SourceDistCompatibility { + // Check if source distributions are allowed for this package. + if build_options.no_build_package(&filename.name) { + return SourceDistCompatibility::Incompatible(IncompatibleSource::NoBuild); + } + + // Check if hashes line up. + let hash = if let HashPolicy::Validate(required) = + hasher.get_package(&filename.name, &filename.version) + { + if hashes.is_empty() { + HashComparison::Missing + } else if required.iter().any(|hash| hashes.contains(hash)) { + HashComparison::Matched + } else { + HashComparison::Mismatched + } + } else { + HashComparison::Matched + }; + + SourceDistCompatibility::Compatible(hash) + } + + fn wheel_compatibility( + filename: &WheelFilename, + hashes: &[HashDigest], + tags: Option<&Tags>, + hasher: &HashStrategy, + build_options: &BuildOptions, + ) -> WheelCompatibility { + // Check if binaries are allowed for this package. + if build_options.no_binary_package(&filename.name) { + return WheelCompatibility::Incompatible(IncompatibleWheel::NoBinary); + } + + // Determine a compatibility for the wheel based on tags. + let priority = match tags { + Some(tags) => match filename.compatibility(tags) { + TagCompatibility::Incompatible(tag) => { + return WheelCompatibility::Incompatible(IncompatibleWheel::Tag(tag)); + } + TagCompatibility::Compatible(priority) => Some(priority), + }, + None => None, + }; + + // Check if hashes line up. + let hash = if let HashPolicy::Validate(required) = + hasher.get_package(&filename.name, &filename.version) + { + if hashes.is_empty() { + HashComparison::Missing + } else if required.iter().any(|hash| hashes.contains(hash)) { + HashComparison::Matched + } else { + HashComparison::Mismatched + } + } else { + HashComparison::Matched + }; + + // Break ties with the build tag. + let build_tag = filename.build_tag().cloned(); + + WheelCompatibility::Compatible(hash, priority, build_tag) + } +} + +/// A map from [`IndexUrl`] to [`GitIndex`] entries found at the given URL. +#[derive(Default, Debug, Clone)] +struct GitIndexCache(FxHashMap>); + +impl GitIndexCache { + /// Get the entries for a given index URL. + fn get(&self, index: &IndexUrl) -> Option<&[GitIndexEntry]> { + self.0.get(index).map(Vec::as_slice) + } + + /// Insert the entries for a given index URL. + fn insert( + &mut self, + index: IndexUrl, + entries: Vec, + ) -> Option> { + self.0.insert(index, entries) + } +} + +/// A cache key for a Git repository at a precise commit. +#[derive(Debug, Clone, PartialEq, Eq)] +struct GitCacheKey { + repository: RepositoryUrl, + precise: GitOid, +} + +impl GitCacheKey { + /// Compute the digest for the Git cache key. + fn digest(&self) -> String { + let mut hasher = blake2::Blake2b::::new(); + hasher.update(self.repository.as_str().as_bytes()); + hasher.update(b"/"); + hasher.update(self.precise.as_str().as_bytes()); + hex::encode(hasher.finalize()) + } +} + +impl std::fmt::Display for GitCacheKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}/{}", self.repository, self.precise.as_str())?; + Ok(()) + } +} diff --git a/crates/uv-static/src/env_vars.rs b/crates/uv-static/src/env_vars.rs index fa2640c22cf7b..505d74a7226f8 100644 --- a/crates/uv-static/src/env_vars.rs +++ b/crates/uv-static/src/env_vars.rs @@ -1139,6 +1139,10 @@ impl EnvVars { #[attr_added_in("0.8.15")] pub const PYX_API_KEY: &'static str = "PYX_API_KEY"; + /// The pyx workspace in which to search for cached Git dependencies. + #[attr_added_in("0.9.9")] + pub const PYX_GIT_CACHE: &'static str = "PYX_GIT_CACHE"; + /// The pyx API key, for backwards compatibility. #[attr_hidden] #[attr_added_in("0.8.15")] diff --git a/crates/uv/tests/it/pip_install.rs b/crates/uv/tests/it/pip_install.rs index accb2aca49699..cc41b7d1b2775 100644 --- a/crates/uv/tests/it/pip_install.rs +++ b/crates/uv/tests/it/pip_install.rs @@ -3678,9 +3678,7 @@ fn install_git_source_respects_offline_mode() { ----- stderr ----- × Failed to download and build `uv-public-pypackage @ git+https://github.com/astral-test/uv-public-pypackage` - ├─▶ Git operation failed - ├─▶ failed to clone into: [CACHE_DIR]/git-v0/db/8dab139913c4b566 - ╰─▶ Remote Git fetches are not allowed because network connectivity is disabled (i.e., with `--offline`) + ╰─▶ Network connectivity is disabled, but the requested data wasn't found in the cache for: `https://api.github.com/repos/astral-test/uv-public-pypackage/commits/HEAD` " ); } diff --git a/docs/reference/environment.md b/docs/reference/environment.md index a25c50266aa18..7d66a1f35cf81 100644 --- a/docs/reference/environment.md +++ b/docs/reference/environment.md @@ -954,6 +954,11 @@ The domain of the pyx CDN. Specifies the directory where uv stores pyx credentials. +### `PYX_GIT_CACHE` +added in `0.9.9` + +The pyx workspace in which to search for cached Git dependencies. + ### `RUST_BACKTRACE` added in `0.7.22`