Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(package): implement vcs status cache #14985

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/cargo/ops/cargo_package/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ fn do_package<'a>(

// Packages need to be created in dependency order, because dependencies must
// be added to our local overlay before we can create lockfiles that depend on them.
let mut vcs_info_builder = vcs::VcsInfoBuilder::new(ws, opts);
let sorted_pkgs = deps.sort();
let mut outputs: Vec<(Package, PackageOpts<'_>, FileLock)> = Vec::new();
for (pkg, cli_features) in sorted_pkgs {
Expand All @@ -234,7 +235,7 @@ fn do_package<'a>(
to_package: ops::Packages::Default,
..opts.clone()
};
let ar_files = prepare_archive(ws, &pkg, &opts)?;
let ar_files = prepare_archive(ws, &pkg, &opts, &mut vcs_info_builder)?;

if opts.list {
for ar_file in &ar_files {
Expand Down Expand Up @@ -369,6 +370,7 @@ fn prepare_archive(
ws: &Workspace<'_>,
pkg: &Package,
opts: &PackageOpts<'_>,
vcs_info_builder: &mut vcs::VcsInfoBuilder<'_, '_>,
) -> CargoResult<Vec<ArchiveFile>> {
let gctx = ws.gctx();
let mut src = PathSource::new(pkg.root(), pkg.package_id().source_id(), gctx);
Expand All @@ -387,7 +389,7 @@ fn prepare_archive(
let src_files = src.list_files(pkg)?;

// Check (git) repository state, getting the current commit hash.
let vcs_info = vcs::check_repo_state(pkg, &src_files, gctx, &opts)?;
let vcs_info = vcs_info_builder.build(pkg, &src_files)?;

build_ar_list(ws, pkg, src_files, vcs_info)
}
Expand Down
127 changes: 107 additions & 20 deletions src/cargo/ops/cargo_package/vcs.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Helpers to gather the VCS information for `cargo package`.

use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;

Expand All @@ -9,6 +10,7 @@ use serde::Serialize;
use tracing::debug;

use crate::core::Package;
use crate::core::Workspace;
use crate::sources::PathEntry;
use crate::CargoResult;
use crate::GlobalContext;
Expand All @@ -32,6 +34,84 @@ pub struct GitVcsInfo {
dirty: bool,
}

/// A shared builder for generating [`VcsInfo`] for packages inside the same workspace.
///
/// This is aimed to cache duplicate works like file system or VCS info lookups.
pub struct VcsInfoBuilder<'a, 'gctx> {
ws: &'a Workspace<'gctx>,
opts: &'a PackageOpts<'gctx>,
/// Map each git workdir path to the workdir's git status cache.
caches: HashMap<PathBuf, RepoStatusCache>,
}

impl<'a, 'gctx> VcsInfoBuilder<'a, 'gctx> {
pub fn new(
ws: &'a Workspace<'gctx>,
opts: &'a PackageOpts<'gctx>,
) -> VcsInfoBuilder<'a, 'gctx> {
VcsInfoBuilder {
ws,
opts,
caches: Default::default(),
}
}

/// Builds an [`VcsInfo`] for the given `pkg` and its associated `src_files`.
pub fn build(
&mut self,
pkg: &Package,
src_files: &[PathEntry],
) -> CargoResult<Option<VcsInfo>> {
check_repo_state(pkg, src_files, self.ws.gctx(), self.opts, &mut self.caches)
}
}

/// Cache of git status inquries for a Git workdir.
struct RepoStatusCache {
repo: git2::Repository,
/// Status of each file path relative to the git workdir path.
cache: HashMap<PathBuf, git2::Status>,
}

impl RepoStatusCache {
fn new(repo: git2::Repository) -> RepoStatusCache {
RepoStatusCache {
repo,
cache: Default::default(),
}
}

/// Like [`git2::Repository::status_file`] but with cache.
fn status_file(&mut self, path: &Path) -> CargoResult<git2::Status> {
use std::collections::hash_map::Entry;
match self.cache.entry(path.into()) {
Entry::Occupied(entry) => {
tracing::trace!(
target: "cargo_package_vcs_cache",
"git status cache hit for `{}` at workdir `{}`",
path.display(),
self.repo.workdir().unwrap().display()
);
Ok(*entry.get())
}
Entry::Vacant(entry) => {
tracing::trace!(
target: "cargo_package_vcs_cache",
"git status cache miss for `{}` at workdir `{}`",
path.display(),
self.repo.workdir().unwrap().display()
);
let status = self.repo.status_file(path)?;
Ok(*entry.insert(status))
}
}
}

fn workdir(&self) -> &Path {
self.repo.workdir().unwrap()
}
}

/// Checks if the package source is in a *git* DVCS repository.
///
/// If *git*, and the source is *dirty* (e.g., has uncommitted changes),
Expand All @@ -45,6 +125,7 @@ pub fn check_repo_state(
src_files: &[PathEntry],
gctx: &GlobalContext,
opts: &PackageOpts<'_>,
caches: &mut HashMap<PathBuf, RepoStatusCache>,
) -> CargoResult<Option<VcsInfo>> {
let Ok(repo) = git2::Repository::discover(p.root()) else {
gctx.shell().verbose(|shell| {
Expand All @@ -64,14 +145,20 @@ pub fn check_repo_state(
};

debug!("found a git repo at `{}`", workdir.display());

let cache = caches
.entry(workdir.to_path_buf())
.or_insert_with(|| RepoStatusCache::new(repo));

let path = p.manifest_path();
let path = paths::strip_prefix_canonical(path, workdir).unwrap_or_else(|_| path.to_path_buf());
let Ok(status) = repo.status_file(&path) else {
let path =
paths::strip_prefix_canonical(path, cache.workdir()).unwrap_or_else(|_| path.to_path_buf());
let Ok(status) = cache.status_file(&path) else {
gctx.shell().verbose(|shell| {
shell.warn(format!(
"no (git) Cargo.toml found at `{}` in workdir `{}`",
path.display(),
workdir.display()
cache.workdir().display()
))
})?;
// No checked-in `Cargo.toml` found. This package may be irrelevant.
Expand All @@ -84,27 +171,27 @@ pub fn check_repo_state(
shell.warn(format!(
"found (git) Cargo.toml ignored at `{}` in workdir `{}`",
path.display(),
workdir.display()
cache.workdir().display()
))
})?;
// An ignored `Cargo.toml` found. This package may be irrelevant.
// Have to assume it is clean.
return Ok(None);
}

warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &repo)?;
warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &cache)?;

debug!(
"found (git) Cargo.toml at `{}` in workdir `{}`",
path.display(),
workdir.display(),
cache.workdir().display(),
);
let path_in_vcs = path
.parent()
.and_then(|p| p.to_str())
.unwrap_or("")
.replace("\\", "/");
let Some(git) = git(p, gctx, src_files, &repo, &opts)? else {
let Some(git) = git(p, gctx, src_files, cache, &opts)? else {
// If the git repo lacks essensial field like `sha1`, and since this field exists from the beginning,
// then don't generate the corresponding file in order to maintain consistency with past behavior.
return Ok(None);
Expand All @@ -129,9 +216,10 @@ pub fn check_repo_state(
fn warn_symlink_checked_out_as_plain_text_file(
gctx: &GlobalContext,
src_files: &[PathEntry],
repo: &git2::Repository,
cache: &RepoStatusCache,
) -> CargoResult<()> {
if repo
if cache
.repo
.config()
.and_then(|c| c.get_bool("core.symlinks"))
.unwrap_or(true)
Expand All @@ -144,7 +232,7 @@ fn warn_symlink_checked_out_as_plain_text_file(
shell.warn(format_args!(
"found symbolic links that may be checked out as regular files for git repo at `{}`\n\
This might cause the `.crate` file to include incorrect or incomplete files",
repo.workdir().unwrap().display(),
cache.workdir().display(),
))?;
let extra_note = if cfg!(windows) {
"\nAnd on Windows, enable the Developer Mode to support symlinks"
Expand All @@ -164,7 +252,7 @@ fn git(
pkg: &Package,
gctx: &GlobalContext,
src_files: &[PathEntry],
repo: &git2::Repository,
cache: &mut RepoStatusCache,
opts: &PackageOpts<'_>,
) -> CargoResult<Option<GitVcsInfo>> {
// This is a collection of any dirty or untracked files. This covers:
Expand All @@ -173,10 +261,10 @@ fn git(
// - ignored (in case the user has an `include` directive that
// conflicts with .gitignore).
let mut dirty_files = Vec::new();
collect_statuses(repo, &mut dirty_files)?;
collect_statuses(&cache.repo, &mut dirty_files)?;
// Include each submodule so that the error message can provide
// specifically *which* files in a submodule are modified.
status_submodules(repo, &mut dirty_files)?;
status_submodules(&cache.repo, &mut dirty_files)?;

// Find the intersection of dirty in git, and the src_files that would
// be packaged. This is a lazy n^2 check, but seems fine with
Expand All @@ -186,7 +274,7 @@ fn git(
.iter()
.filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
.map(|p| p.as_ref())
.chain(dirty_metadata_paths(pkg, repo)?.iter())
.chain(dirty_metadata_paths(pkg, cache)?.iter())
.map(|path| {
pathdiff::diff_paths(path, cwd)
.as_ref()
Expand All @@ -199,10 +287,10 @@ fn git(
if !dirty || opts.allow_dirty {
// Must check whetherthe repo has no commit firstly, otherwise `revparse_single` would fail on bare commit repo.
// Due to lacking the `sha1` field, it's better not record the `GitVcsInfo` for consistency.
if repo.is_empty()? {
if cache.repo.is_empty()? {
return Ok(None);
}
let rev_obj = repo.revparse_single("HEAD")?;
let rev_obj = cache.repo.revparse_single("HEAD")?;
Ok(Some(GitVcsInfo {
sha1: rev_obj.id().to_string(),
dirty,
Expand All @@ -225,9 +313,8 @@ fn git(
/// This is required because those paths may link to a file outside the
/// current package root, but still under the git workdir, affecting the
/// final packaged `.crate` file.
fn dirty_metadata_paths(pkg: &Package, repo: &git2::Repository) -> CargoResult<Vec<PathBuf>> {
fn dirty_metadata_paths(pkg: &Package, repo: &mut RepoStatusCache) -> CargoResult<Vec<PathBuf>> {
let mut dirty_files = Vec::new();
let workdir = repo.workdir().unwrap();
let root = pkg.root();
let meta = pkg.manifest().metadata();
for path in [&meta.license_file, &meta.readme] {
Expand All @@ -239,12 +326,12 @@ fn dirty_metadata_paths(pkg: &Package, repo: &git2::Repository) -> CargoResult<V
// Inside package root. Don't bother checking git status.
continue;
}
if let Ok(rel_path) = paths::strip_prefix_canonical(abs_path.as_path(), workdir) {
if let Ok(rel_path) = paths::strip_prefix_canonical(abs_path.as_path(), repo.workdir()) {
// Outside package root but under git workdir,
if repo.status_file(&rel_path)? != git2::Status::CURRENT {
dirty_files.push(if abs_path.is_symlink() {
// For symlinks, shows paths to symlink sources
workdir.join(rel_path)
repo.workdir().join(rel_path)
} else {
abs_path
});
Expand Down
16 changes: 13 additions & 3 deletions tests/testsuite/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1185,6 +1185,7 @@ fn vcs_status_check_for_each_workspace_member() {
"#,
)
.file("hobbit", "...")
.file("README.md", "")
.file(
"isengard/Cargo.toml",
r#"
Expand All @@ -1194,6 +1195,7 @@ fn vcs_status_check_for_each_workspace_member() {
homepage = "saruman"
description = "saruman"
license = "MIT"
readme = "../README.md"
"#,
)
.file("isengard/src/lib.rs", "")
Expand All @@ -1206,6 +1208,7 @@ fn vcs_status_check_for_each_workspace_member() {
homepage = "sauron"
description = "sauron"
license = "MIT"
readme = "../README.md"
"#,
)
.file("mordor/src/lib.rs", "")
Expand All @@ -1228,10 +1231,15 @@ fn vcs_status_check_for_each_workspace_member() {

// Ensure dirty files be reported only for one affected package.
p.cargo("package --workspace --no-verify")
.env("CARGO_LOG", "cargo_package_vcs_cache=trace")
.with_status(101)
.with_stderr_data(str![[r#"
[..] TRACE cargo_package_vcs_cache: git status cache miss for `isengard/Cargo.toml` at workdir `[ROOT]/foo/`
[..] TRACE cargo_package_vcs_cache: git status cache miss for `README.md` at workdir `[ROOT]/foo/`
[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard)
[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
[PACKAGED] 6 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
[..] TRACE cargo_package_vcs_cache: git status cache miss for `mordor/Cargo.toml` at workdir `[ROOT]/foo/`
[..] TRACE cargo_package_vcs_cache: git status cache hit for `README.md` at workdir `[ROOT]/foo/`
[ERROR] 2 files in the working directory contain changes that were not yet committed into git:

mordor/src/lib.rs
Expand All @@ -1246,9 +1254,9 @@ to proceed despite this and include the uncommitted changes, pass the `--allow-d
p.cargo("package --workspace --no-verify --allow-dirty")
.with_stderr_data(str![[r#"
[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard)
[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
[PACKAGING] mordor v0.0.0 ([ROOT]/foo/mordor)
[PACKAGED] 6 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
[PACKAGING] mordor v0.0.0 ([ROOT]/foo/mordor)
[PACKAGED] 7 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)

"#]])
.run();
Expand All @@ -1263,6 +1271,7 @@ to proceed despite this and include the uncommitted changes, pass the `--allow-d
"Cargo.toml.orig",
"src/lib.rs",
"Cargo.lock",
"README.md",
],
[(
".cargo_vcs_info.json",
Expand Down Expand Up @@ -1290,6 +1299,7 @@ to proceed despite this and include the uncommitted changes, pass the `--allow-d
"src/lib.rs",
"src/main.rs",
"Cargo.lock",
"README.md",
],
[(
".cargo_vcs_info.json",
Expand Down
Loading