blob: a75c1ec6d91ce57585cb89d2bafbccf6b35c0325 [file] [log] [blame]
//! See [GitSource].
use crate::core::GitReference;
use crate::core::SourceId;
use crate::core::{Dependency, Package, PackageId, Summary};
use crate::sources::git::utils::GitRemote;
use crate::sources::source::MaybePackage;
use crate::sources::source::QueryKind;
use crate::sources::source::Source;
use crate::sources::PathSource;
use crate::util::cache_lock::CacheLockMode;
use crate::util::errors::CargoResult;
use crate::util::hex::short_hash;
use crate::util::Config;
use anyhow::Context;
use cargo_util::paths::exclude_from_backups_and_indexing;
use std::fmt::{self, Debug, Formatter};
use std::task::Poll;
use tracing::trace;
use url::Url;
/// `GitSource` contains one or more packages gathering from a Git repository.
/// Under the hood it uses [`PathSource`] to discover packages inside the
/// repository.
///
/// ## Filesystem layout
///
/// During a successful `GitSource` download, at least two Git repositories are
/// created: one is the shared Git database of this remote, and the other is the
/// Git checkout to a specific revision, which contains the actual files to be
/// compiled. Multiple checkouts can be cloned from a single Git database.
///
/// Those repositories are located at Cargo's Git cache directory
/// `$CARGO_HOME/git`. The file tree of the cache directory roughly looks like:
///
/// ```text
/// $CARGO_HOME/git/
/// ├── checkouts/
/// │ ├── gimli-a0d193bd15a5ed96/
/// │ │ ├── 8e73ef0/ # Git short ID for a certain revision
/// │ │ ├── a2a4b78/
/// │ │ └── e33d1ac/
/// │ ├── log-c58e1db3de7c154d-shallow/
/// │ │ └── 11eda98/
/// └── db/
/// ├── gimli-a0d193bd15a5ed96/
/// └── log-c58e1db3de7c154d-shallow/
/// ```
///
/// For more on Git cache directory, see ["Cargo Home"] in The Cargo Book.
///
/// For more on the directory format `<pkg>-<hash>[-shallow]`, see [`ident`]
/// and [`ident_shallow`].
///
/// ## Locked to a revision
///
/// Once a `GitSource` is fetched, it will resolve to a specific commit revision.
/// This is often mentioned as "locked revision" (`locked_rev`) throughout the
/// codebase. The revision is written into `Cargo.lock`. This is essential since
/// we want to ensure a package can compiles with the same set of files when
/// a `Cargo.lock` is present. With the `locked_rev` provided, `GitSource` can
/// precisely fetch the same revision from the Git repository.
///
/// ["Cargo Home"]: https://doc.rust-lang.org/nightly/cargo/guide/cargo-home.html#directories
pub struct GitSource<'cfg> {
/// The git remote which we're going to fetch from.
remote: GitRemote,
/// The Git reference from the manifest file.
manifest_reference: GitReference,
/// The revision which a git source is locked to.
/// This is expected to be set after the Git repository is fetched.
locked_rev: Option<git2::Oid>,
/// The unique identifier of this source.
source_id: SourceId,
/// The underlying path source to discover packages inside the Git repository.
path_source: Option<PathSource<'cfg>>,
/// The identifier of this source for Cargo's Git cache directory.
/// See [`ident`] for more.
ident: String,
config: &'cfg Config,
/// Disables status messages.
quiet: bool,
}
impl<'cfg> GitSource<'cfg> {
/// Creates a git source for the given [`SourceId`].
pub fn new(source_id: SourceId, config: &'cfg Config) -> CargoResult<GitSource<'cfg>> {
assert!(source_id.is_git(), "id is not git, id={}", source_id);
let remote = GitRemote::new(source_id.url());
let manifest_reference = source_id.git_reference().unwrap().clone();
let locked_rev = source_id.precise_git_oid()?;
let ident = ident_shallow(
&source_id,
config
.cli_unstable()
.gitoxide
.map_or(false, |gix| gix.fetch && gix.shallow_deps),
);
let source = GitSource {
remote,
manifest_reference,
locked_rev,
source_id,
path_source: None,
ident,
config,
quiet: false,
};
Ok(source)
}
/// Gets the remote repository URL.
pub fn url(&self) -> &Url {
self.remote.url()
}
/// Returns the packages discovered by this source. It may fetch the Git
/// repository as well as walk the filesystem if package information
/// haven't yet updated.
pub fn read_packages(&mut self) -> CargoResult<Vec<Package>> {
if self.path_source.is_none() {
self.invalidate_cache();
self.block_until_ready()?;
}
self.path_source.as_mut().unwrap().read_packages()
}
}
/// Create an identifier from a URL,
/// essentially turning `proto://host/path/repo` into `repo-<hash-of-url>`.
fn ident(id: &SourceId) -> String {
let ident = id
.canonical_url()
.raw_canonicalized_url()
.path_segments()
.and_then(|s| s.rev().next())
.unwrap_or("");
let ident = if ident.is_empty() { "_empty" } else { ident };
format!("{}-{}", ident, short_hash(id.canonical_url()))
}
/// Like [`ident()`], but appends `-shallow` to it, turning
/// `proto://host/path/repo` into `repo-<hash-of-url>-shallow`.
///
/// It's important to separate shallow from non-shallow clones for reasons of
/// backwards compatibility --- older cargo's aren't necessarily handling
/// shallow clones correctly.
fn ident_shallow(id: &SourceId, is_shallow: bool) -> String {
let mut ident = ident(id);
if is_shallow {
ident.push_str("-shallow");
}
ident
}
impl<'cfg> Debug for GitSource<'cfg> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "git repo at {}", self.remote.url())?;
// TODO(-Znext-lockfile-bump): set it to true when stabilizing
// lockfile v4, because we want Source ID serialization to be
// consistent with lockfile.
match self.manifest_reference.pretty_ref(false) {
Some(s) => write!(f, " ({})", s),
None => Ok(()),
}
}
}
impl<'cfg> Source for GitSource<'cfg> {
fn query(
&mut self,
dep: &Dependency,
kind: QueryKind,
f: &mut dyn FnMut(Summary),
) -> Poll<CargoResult<()>> {
if let Some(src) = self.path_source.as_mut() {
src.query(dep, kind, f)
} else {
Poll::Pending
}
}
fn supports_checksums(&self) -> bool {
false
}
fn requires_precise(&self) -> bool {
true
}
fn source_id(&self) -> SourceId {
self.source_id
}
fn block_until_ready(&mut self) -> CargoResult<()> {
if self.path_source.is_some() {
return Ok(());
}
let git_fs = self.config.git_path();
// Ignore errors creating it, in case this is a read-only filesystem:
// perhaps the later operations can succeed anyhow.
let _ = git_fs.create_dir();
let git_path = self
.config
.assert_package_cache_locked(CacheLockMode::DownloadExclusive, &git_fs);
// Before getting a checkout, make sure that `<cargo_home>/git` is
// marked as excluded from indexing and backups. Older versions of Cargo
// didn't do this, so we do it here regardless of whether `<cargo_home>`
// exists.
//
// This does not use `create_dir_all_excluded_from_backups_atomic` for
// the same reason: we want to exclude it even if the directory already
// exists.
exclude_from_backups_and_indexing(&git_path);
let db_path = git_path.join("db").join(&self.ident);
let db = self.remote.db_at(&db_path).ok();
let (db, actual_rev) = match (self.locked_rev, db) {
// If we have a locked revision, and we have a preexisting database
// which has that revision, then no update needs to happen.
(Some(rev), Some(db)) if db.contains(rev) => (db, rev),
// If we're in offline mode, we're not locked, and we have a
// database, then try to resolve our reference with the preexisting
// repository.
(None, Some(db)) if self.config.offline() => {
let rev = db.resolve(&self.manifest_reference).with_context(|| {
"failed to lookup reference in preexisting repository, and \
can't check for updates in offline mode (--offline)"
})?;
(db, rev)
}
// ... otherwise we use this state to update the git database. Note
// that we still check for being offline here, for example in the
// situation that we have a locked revision but the database
// doesn't have it.
(locked_rev, db) => {
if self.config.offline() {
anyhow::bail!(
"can't checkout from '{}': you are in the offline mode (--offline)",
self.remote.url()
);
}
if !self.quiet {
self.config.shell().status(
"Updating",
format!("git repository `{}`", self.remote.url()),
)?;
}
trace!("updating git source `{:?}`", self.remote);
self.remote.checkout(
&db_path,
db,
&self.manifest_reference,
locked_rev,
self.config,
)?
}
};
// Don’t use the full hash, in order to contribute less to reaching the
// path length limit on Windows. See
// <https://github.com/servo/servo/pull/14397>.
let short_id = db.to_short_id(actual_rev)?;
// Check out `actual_rev` from the database to a scoped location on the
// filesystem. This will use hard links and such to ideally make the
// checkout operation here pretty fast.
let checkout_path = git_path
.join("checkouts")
.join(&self.ident)
.join(short_id.as_str());
db.copy_to(actual_rev, &checkout_path, self.config)?;
let source_id = self
.source_id
.with_git_precise(Some(actual_rev.to_string()));
let path_source = PathSource::new_recursive(&checkout_path, source_id, self.config);
self.path_source = Some(path_source);
self.locked_rev = Some(actual_rev);
self.path_source.as_mut().unwrap().update()
}
fn download(&mut self, id: PackageId) -> CargoResult<MaybePackage> {
trace!(
"getting packages for package ID `{}` from `{:?}`",
id,
self.remote
);
self.path_source
.as_mut()
.expect("BUG: `update()` must be called before `get()`")
.download(id)
}
fn finish_download(&mut self, _id: PackageId, _data: Vec<u8>) -> CargoResult<Package> {
panic!("no download should have started")
}
fn fingerprint(&self, _pkg: &Package) -> CargoResult<String> {
Ok(self.locked_rev.as_ref().unwrap().to_string())
}
fn describe(&self) -> String {
format!("Git repository {}", self.source_id)
}
fn add_to_yanked_whitelist(&mut self, _pkgs: &[PackageId]) {}
fn is_yanked(&mut self, _pkg: PackageId) -> Poll<CargoResult<bool>> {
Poll::Ready(Ok(false))
}
fn invalidate_cache(&mut self) {}
fn set_quiet(&mut self, quiet: bool) {
self.quiet = quiet;
}
}
#[cfg(test)]
mod test {
use super::ident;
use crate::core::{GitReference, SourceId};
use crate::util::IntoUrl;
#[test]
pub fn test_url_to_path_ident_with_path() {
let ident = ident(&src("https://github.com/carlhuda/cargo"));
assert!(ident.starts_with("cargo-"));
}
#[test]
pub fn test_url_to_path_ident_without_path() {
let ident = ident(&src("https://github.com"));
assert!(ident.starts_with("_empty-"));
}
#[test]
fn test_canonicalize_idents_by_stripping_trailing_url_slash() {
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston/"));
let ident2 = ident(&src("https://github.com/PistonDevelopers/piston"));
assert_eq!(ident1, ident2);
}
#[test]
fn test_canonicalize_idents_by_lowercasing_github_urls() {
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston"));
let ident2 = ident(&src("https://github.com/pistondevelopers/piston"));
assert_eq!(ident1, ident2);
}
#[test]
fn test_canonicalize_idents_by_stripping_dot_git() {
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston"));
let ident2 = ident(&src("https://github.com/PistonDevelopers/piston.git"));
assert_eq!(ident1, ident2);
}
#[test]
fn test_canonicalize_idents_different_protocols() {
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston"));
let ident2 = ident(&src("git://github.com/PistonDevelopers/piston"));
assert_eq!(ident1, ident2);
}
fn src(s: &str) -> SourceId {
SourceId::for_git(&s.into_url().unwrap(), GitReference::DefaultBranch).unwrap()
}
}