blob: e53b1704db0928a07b60b6ff97b9d8e05ba574aa [file] [log] [blame]
use crate::core::PackageId;
use crate::sources::registry::CRATES_IO_HTTP_INDEX;
use crate::sources::source::Source;
use crate::sources::{DirectorySource, CRATES_IO_DOMAIN, CRATES_IO_INDEX, CRATES_IO_REGISTRY};
use crate::sources::{GitSource, PathSource, RegistrySource};
use crate::util::interning::InternedString;
use crate::util::{config, CanonicalUrl, CargoResult, Config, IntoUrl};
use anyhow::Context;
use serde::de;
use serde::ser;
use std::cmp::{self, Ordering};
use std::collections::HashSet;
use std::fmt::{self, Formatter};
use std::hash::{self, Hash};
use std::path::{Path, PathBuf};
use std::ptr;
use std::sync::Mutex;
use std::sync::OnceLock;
use tracing::trace;
use url::Url;
static SOURCE_ID_CACHE: OnceLock<Mutex<HashSet<&'static SourceIdInner>>> = OnceLock::new();
/// Unique identifier for a source of packages.
///
/// Cargo uniquely identifies packages using [`PackageId`], a combination of the
/// package name, version, and the code source. `SourceId` exactly represents
/// the "code source" in `PackageId`. See [`SourceId::hash`] to learn what are
/// taken into account for the uniqueness of a source.
///
/// `SourceId` is usually associated with an instance of [`Source`], which is
/// supposed to provide a `SourceId` via [`Source::source_id`] method.
///
/// [`Source`]: crate::sources::source::Source
/// [`Source::source_id`]: crate::sources::source::Source::source_id
/// [`PackageId`]: super::PackageId
#[derive(Clone, Copy, Eq, Debug)]
pub struct SourceId {
inner: &'static SourceIdInner,
}
/// The interned version of [`SourceId`] to avoid excessive clones and borrows.
/// Values are cached in `SOURCE_ID_CACHE` once created.
#[derive(Eq, Clone, Debug)]
struct SourceIdInner {
/// The source URL.
url: Url,
/// The canonical version of the above url. See [`CanonicalUrl`] to learn
/// why it is needed and how it normalizes a URL.
canonical_url: CanonicalUrl,
/// The source kind.
kind: SourceKind,
/// For example, the exact Git revision of the specified branch for a Git Source.
precise: Option<Precise>,
/// Name of the remote registry.
///
/// WARNING: this is not always set when the name is not known,
/// e.g. registry coming from `--index` or Cargo.lock
registry_key: Option<KeyOf>,
}
#[derive(Eq, PartialEq, Clone, Debug, Hash)]
enum Precise {
Locked,
Updated {
name: InternedString,
from: semver::Version,
to: semver::Version,
},
GitUrlFragment(String),
}
impl fmt::Display for Precise {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Precise::Locked => "locked".fmt(f),
Precise::Updated { name, from, to } => {
write!(f, "{name}={from}->{to}")
}
Precise::GitUrlFragment(s) => s.fmt(f),
}
}
}
/// The possible kinds of code source.
/// Along with [`SourceIdInner`], this fully defines the source.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
enum SourceKind {
/// A git repository.
Git(GitReference),
/// A local path.
Path,
/// A remote registry.
Registry,
/// A sparse registry.
SparseRegistry,
/// A local filesystem-based registry.
LocalRegistry,
/// A directory-based registry.
Directory,
}
/// Information to find a specific commit in a Git repository.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum GitReference {
/// From a tag.
Tag(String),
/// From a branch.
Branch(String),
/// From a specific revision. Can be a commit hash (either short or full),
/// or a named reference like `refs/pull/493/head`.
Rev(String),
/// The default branch of the repository, the reference named `HEAD`.
DefaultBranch,
}
/// Where the remote source key is defined.
///
/// The purpose of this is to provide better diagnostics for different sources of keys.
#[derive(Debug, Clone, PartialEq, Eq)]
enum KeyOf {
/// Defined in the `[registries]` table or the built-in `crates-io` key.
Registry(String),
/// Defined in the `[source]` replacement table.
Source(String),
}
impl SourceId {
/// Creates a `SourceId` object from the kind and URL.
///
/// The canonical url will be calculated, but the precise field will not
fn new(kind: SourceKind, url: Url, key: Option<KeyOf>) -> CargoResult<SourceId> {
if kind == SourceKind::SparseRegistry {
// Sparse URLs are different because they store the kind prefix (sparse+)
// in the URL. This is because the prefix is necessary to differentiate
// from regular registries (git-based). The sparse+ prefix is included
// everywhere, including user-facing locations such as the `config.toml`
// file that defines the registry, or whenever Cargo displays it to the user.
assert!(url.as_str().starts_with("sparse+"));
}
let source_id = SourceId::wrap(SourceIdInner {
kind,
canonical_url: CanonicalUrl::new(&url)?,
url,
precise: None,
registry_key: key,
});
Ok(source_id)
}
/// Interns the value and returns the wrapped type.
fn wrap(inner: SourceIdInner) -> SourceId {
let mut cache = SOURCE_ID_CACHE
.get_or_init(|| Default::default())
.lock()
.unwrap();
let inner = cache.get(&inner).cloned().unwrap_or_else(|| {
let inner = Box::leak(Box::new(inner));
cache.insert(inner);
inner
});
SourceId { inner }
}
fn remote_source_kind(url: &Url) -> SourceKind {
if url.as_str().starts_with("sparse+") {
SourceKind::SparseRegistry
} else {
SourceKind::Registry
}
}
/// Parses a source URL and returns the corresponding ID.
///
/// ## Example
///
/// ```
/// use cargo::core::SourceId;
/// SourceId::from_url("git+https://github.com/alexcrichton/\
/// libssh2-static-sys#80e71a3021618eb05\
/// 656c58fb7c5ef5f12bc747f");
/// ```
pub fn from_url(string: &str) -> CargoResult<SourceId> {
let (kind, url) = string
.split_once('+')
.ok_or_else(|| anyhow::format_err!("invalid source `{}`", string))?;
match kind {
"git" => {
let mut url = url.into_url()?;
let reference = GitReference::from_query(url.query_pairs());
let precise = url.fragment().map(|s| s.to_owned());
url.set_fragment(None);
url.set_query(None);
Ok(SourceId::for_git(&url, reference)?.with_git_precise(precise))
}
"registry" => {
let url = url.into_url()?;
Ok(SourceId::new(SourceKind::Registry, url, None)?.with_locked_precise())
}
"sparse" => {
let url = string.into_url()?;
Ok(SourceId::new(SourceKind::SparseRegistry, url, None)?.with_locked_precise())
}
"path" => {
let url = url.into_url()?;
SourceId::new(SourceKind::Path, url, None)
}
kind => Err(anyhow::format_err!("unsupported source protocol: {}", kind)),
}
}
/// A view of the [`SourceId`] that can be `Display`ed as a URL.
pub fn as_url(&self) -> SourceIdAsUrl<'_> {
SourceIdAsUrl {
inner: &*self.inner,
encoded: false,
}
}
/// Like [`Self::as_url`] but with URL parameters encoded.
pub fn as_encoded_url(&self) -> SourceIdAsUrl<'_> {
SourceIdAsUrl {
inner: &*self.inner,
encoded: true,
}
}
/// Creates a `SourceId` from a filesystem path.
///
/// `path`: an absolute path.
pub fn for_path(path: &Path) -> CargoResult<SourceId> {
let url = path.into_url()?;
SourceId::new(SourceKind::Path, url, None)
}
/// Creates a `SourceId` from a Git reference.
pub fn for_git(url: &Url, reference: GitReference) -> CargoResult<SourceId> {
SourceId::new(SourceKind::Git(reference), url.clone(), None)
}
/// Creates a SourceId from a remote registry URL when the registry name
/// cannot be determined, e.g. a user passes `--index` directly from CLI.
///
/// Use [`SourceId::for_alt_registry`] if a name can provided, which
/// generates better messages for cargo.
pub fn for_registry(url: &Url) -> CargoResult<SourceId> {
let kind = Self::remote_source_kind(url);
SourceId::new(kind, url.to_owned(), None)
}
/// Creates a `SourceId` for a remote registry from the `[registries]` table or crates.io.
pub fn for_alt_registry(url: &Url, key: &str) -> CargoResult<SourceId> {
let kind = Self::remote_source_kind(url);
let key = KeyOf::Registry(key.into());
SourceId::new(kind, url.to_owned(), Some(key))
}
/// Creates a `SourceId` for a remote registry from the `[source]` replacement table.
pub fn for_source_replacement_registry(url: &Url, key: &str) -> CargoResult<SourceId> {
let kind = Self::remote_source_kind(url);
let key = KeyOf::Source(key.into());
SourceId::new(kind, url.to_owned(), Some(key))
}
/// Creates a `SourceId` from a local registry path.
pub fn for_local_registry(path: &Path) -> CargoResult<SourceId> {
let url = path.into_url()?;
SourceId::new(SourceKind::LocalRegistry, url, None)
}
/// Creates a `SourceId` from a directory path.
pub fn for_directory(path: &Path) -> CargoResult<SourceId> {
let url = path.into_url()?;
SourceId::new(SourceKind::Directory, url, None)
}
/// Returns the `SourceId` corresponding to the main repository.
///
/// This is the main cargo registry by default, but it can be overridden in
/// a `.cargo/config.toml`.
pub fn crates_io(config: &Config) -> CargoResult<SourceId> {
config.crates_io_source_id()
}
/// Returns the `SourceId` corresponding to the main repository, using the
/// sparse HTTP index if allowed.
pub fn crates_io_maybe_sparse_http(config: &Config) -> CargoResult<SourceId> {
if Self::crates_io_is_sparse(config)? {
config.check_registry_index_not_set()?;
let url = CRATES_IO_HTTP_INDEX.into_url().unwrap();
let key = KeyOf::Registry(CRATES_IO_REGISTRY.into());
SourceId::new(SourceKind::SparseRegistry, url, Some(key))
} else {
Self::crates_io(config)
}
}
/// Returns whether to access crates.io over the sparse protocol.
pub fn crates_io_is_sparse(config: &Config) -> CargoResult<bool> {
let proto: Option<config::Value<String>> = config.get("registries.crates-io.protocol")?;
let is_sparse = match proto.as_ref().map(|v| v.val.as_str()) {
Some("sparse") => true,
Some("git") => false,
Some(unknown) => anyhow::bail!(
"unsupported registry protocol `{unknown}` (defined in {})",
proto.as_ref().unwrap().definition
),
None => true,
};
Ok(is_sparse)
}
/// Gets the `SourceId` associated with given name of the remote registry.
pub fn alt_registry(config: &Config, key: &str) -> CargoResult<SourceId> {
if key == CRATES_IO_REGISTRY {
return Self::crates_io(config);
}
let url = config.get_registry_index(key)?;
Self::for_alt_registry(&url, key)
}
/// Gets this source URL.
pub fn url(&self) -> &Url {
&self.inner.url
}
/// Gets the canonical URL of this source, used for internal comparison
/// purposes.
pub fn canonical_url(&self) -> &CanonicalUrl {
&self.inner.canonical_url
}
/// Displays the text "crates.io index" for Cargo shell status output.
pub fn display_index(self) -> String {
if self.is_crates_io() {
format!("{} index", CRATES_IO_DOMAIN)
} else {
format!("`{}` index", self.display_registry_name())
}
}
/// Displays the name of a registry if it has one. Otherwise just the URL.
pub fn display_registry_name(self) -> String {
if let Some(key) = self.inner.registry_key.as_ref().map(|k| k.key()) {
key.into()
} else if self.has_precise() {
// We remove `precise` here to retrieve an permissive version of
// `SourceIdInner`, which may contain the registry name.
self.without_precise().display_registry_name()
} else {
url_display(self.url())
}
}
/// Gets the name of the remote registry as defined in the `[registries]` table,
/// or the built-in `crates-io` key.
pub fn alt_registry_key(&self) -> Option<&str> {
self.inner.registry_key.as_ref()?.alternative_registry()
}
/// Returns `true` if this source is from a filesystem path.
pub fn is_path(self) -> bool {
self.inner.kind == SourceKind::Path
}
/// Returns the local path if this is a path dependency.
pub fn local_path(self) -> Option<PathBuf> {
if self.inner.kind != SourceKind::Path {
return None;
}
Some(self.inner.url.to_file_path().unwrap())
}
/// Returns `true` if this source is from a registry (either local or not).
pub fn is_registry(self) -> bool {
matches!(
self.inner.kind,
SourceKind::Registry | SourceKind::SparseRegistry | SourceKind::LocalRegistry
)
}
/// Returns `true` if this source is from a sparse registry.
pub fn is_sparse(self) -> bool {
matches!(self.inner.kind, SourceKind::SparseRegistry)
}
/// Returns `true` if this source is a "remote" registry.
///
/// "remote" may also mean a file URL to a git index, so it is not
/// necessarily "remote". This just means it is not `local-registry`.
pub fn is_remote_registry(self) -> bool {
matches!(
self.inner.kind,
SourceKind::Registry | SourceKind::SparseRegistry
)
}
/// Returns `true` if this source from a Git repository.
pub fn is_git(self) -> bool {
matches!(self.inner.kind, SourceKind::Git(_))
}
/// Creates an implementation of `Source` corresponding to this ID.
///
/// * `yanked_whitelist` --- Packages allowed to be used, even if they are yanked.
pub fn load<'a>(
self,
config: &'a Config,
yanked_whitelist: &HashSet<PackageId>,
) -> CargoResult<Box<dyn Source + 'a>> {
trace!("loading SourceId; {}", self);
match self.inner.kind {
SourceKind::Git(..) => Ok(Box::new(GitSource::new(self, config)?)),
SourceKind::Path => {
let path = self
.inner
.url
.to_file_path()
.expect("path sources cannot be remote");
Ok(Box::new(PathSource::new(&path, self, config)))
}
SourceKind::Registry | SourceKind::SparseRegistry => Ok(Box::new(
RegistrySource::remote(self, yanked_whitelist, config)?,
)),
SourceKind::LocalRegistry => {
let path = self
.inner
.url
.to_file_path()
.expect("path sources cannot be remote");
Ok(Box::new(RegistrySource::local(
self,
&path,
yanked_whitelist,
config,
)))
}
SourceKind::Directory => {
let path = self
.inner
.url
.to_file_path()
.expect("path sources cannot be remote");
Ok(Box::new(DirectorySource::new(&path, self, config)))
}
}
}
/// Gets the Git reference if this is a git source, otherwise `None`.
pub fn git_reference(self) -> Option<&'static GitReference> {
match self.inner.kind {
SourceKind::Git(ref s) => Some(s),
_ => None,
}
}
/// Check if the precise data field has bean set
pub fn has_precise(self) -> bool {
self.inner.precise.is_some()
}
/// Check if the precise data field has bean set to "locked"
pub fn has_locked_precise(self) -> bool {
self.inner.precise == Some(Precise::Locked)
}
/// Check if two sources have the same precise data field
pub fn has_same_precise_as(self, other: Self) -> bool {
self.inner.precise == other.inner.precise
}
/// Check if the precise data field stores information for this `name`
/// from a call to [SourceId::with_precise_registry_version].
///
/// If so return the version currently in the lock file and the version to be updated to.
pub fn precise_registry_version(
self,
pkg: &str,
) -> Option<(&semver::Version, &semver::Version)> {
match &self.inner.precise {
Some(Precise::Updated { name, from, to }) if name == pkg => Some((from, to)),
_ => None,
}
}
pub fn precise_git_fragment(self) -> Option<&'static str> {
match &self.inner.precise {
Some(Precise::GitUrlFragment(s)) => Some(&s[..8]),
_ => None,
}
}
pub fn precise_git_oid(self) -> CargoResult<Option<git2::Oid>> {
Ok(match self.inner.precise.as_ref() {
Some(Precise::GitUrlFragment(s)) => {
Some(git2::Oid::from_str(s).with_context(|| {
format!("precise value for git is not a git revision: {}", s)
})?)
}
_ => None,
})
}
/// Creates a new `SourceId` from this source with the given `precise`.
pub fn with_git_precise(self, fragment: Option<String>) -> SourceId {
SourceId::wrap(SourceIdInner {
precise: fragment.map(|f| Precise::GitUrlFragment(f)),
..(*self.inner).clone()
})
}
/// Creates a new `SourceId` from this source without a `precise`.
pub fn without_precise(self) -> SourceId {
SourceId::wrap(SourceIdInner {
precise: None,
..(*self.inner).clone()
})
}
/// Creates a new `SourceId` from this source without a `precise`.
pub fn with_locked_precise(self) -> SourceId {
SourceId::wrap(SourceIdInner {
precise: Some(Precise::Locked),
..(*self.inner).clone()
})
}
/// Creates a new `SourceId` from this source with the `precise` from some other `SourceId`.
pub fn with_precise_from(self, v: Self) -> SourceId {
SourceId::wrap(SourceIdInner {
precise: v.inner.precise.clone(),
..(*self.inner).clone()
})
}
/// When updating a lock file on a version using `cargo update --precise`
/// the requested version is stored in the precise field.
/// On a registry dependency we also need to keep track of the package that
/// should be updated and even which of the versions should be updated.
/// All of this gets encoded in the precise field using this method.
/// The data can be read with [SourceId::precise_registry_version]
pub fn with_precise_registry_version(
self,
name: InternedString,
version: semver::Version,
precise: &str,
) -> CargoResult<SourceId> {
let precise = semver::Version::parse(precise)
.with_context(|| format!("invalid version format for precise version `{precise}`"))?;
Ok(SourceId::wrap(SourceIdInner {
precise: Some(Precise::Updated {
name,
from: version,
to: precise,
}),
..(*self.inner).clone()
}))
}
/// Returns `true` if the remote registry is the standard <https://crates.io>.
pub fn is_crates_io(self) -> bool {
match self.inner.kind {
SourceKind::Registry | SourceKind::SparseRegistry => {}
_ => return false,
}
let url = self.inner.url.as_str();
url == CRATES_IO_INDEX || url == CRATES_IO_HTTP_INDEX || is_overridden_crates_io_url(url)
}
/// Hashes `self`.
///
/// For paths, remove the workspace prefix so the same source will give the
/// same hash in different locations, helping reproducible builds.
pub fn stable_hash<S: hash::Hasher>(self, workspace: &Path, into: &mut S) {
if self.is_path() {
if let Ok(p) = self
.inner
.url
.to_file_path()
.unwrap()
.strip_prefix(workspace)
{
self.inner.kind.hash(into);
p.to_str().unwrap().hash(into);
return;
}
}
self.hash(into)
}
pub fn full_eq(self, other: SourceId) -> bool {
ptr::eq(self.inner, other.inner)
}
pub fn full_hash<S: hash::Hasher>(self, into: &mut S) {
ptr::NonNull::from(self.inner).hash(into)
}
}
impl PartialEq for SourceId {
fn eq(&self, other: &SourceId) -> bool {
self.cmp(other) == Ordering::Equal
}
}
impl PartialOrd for SourceId {
fn partial_cmp(&self, other: &SourceId) -> Option<Ordering> {
Some(self.cmp(other))
}
}
// Custom comparison defined as canonical URL equality for git sources and URL
// equality for other sources, ignoring the `precise` and `name` fields.
impl Ord for SourceId {
fn cmp(&self, other: &SourceId) -> Ordering {
// If our interior pointers are to the exact same `SourceIdInner` then
// we're guaranteed to be equal.
if ptr::eq(self.inner, other.inner) {
return Ordering::Equal;
}
// Sort first based on `kind`, deferring to the URL comparison below if
// the kinds are equal.
match self.inner.kind.cmp(&other.inner.kind) {
Ordering::Equal => {}
other => return other,
}
// If the `kind` and the `url` are equal, then for git sources we also
// ensure that the canonical urls are equal.
match (&self.inner.kind, &other.inner.kind) {
(SourceKind::Git(_), SourceKind::Git(_)) => {
self.inner.canonical_url.cmp(&other.inner.canonical_url)
}
_ => self.inner.url.cmp(&other.inner.url),
}
}
}
impl ser::Serialize for SourceId {
fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
where
S: ser::Serializer,
{
if self.is_path() {
None::<String>.serialize(s)
} else {
s.collect_str(&self.as_url())
}
}
}
impl<'de> de::Deserialize<'de> for SourceId {
fn deserialize<D>(d: D) -> Result<SourceId, D::Error>
where
D: de::Deserializer<'de>,
{
let string = String::deserialize(d)?;
SourceId::from_url(&string).map_err(de::Error::custom)
}
}
fn url_display(url: &Url) -> String {
if url.scheme() == "file" {
if let Ok(path) = url.to_file_path() {
if let Some(path_str) = path.to_str() {
return path_str.to_string();
}
}
}
url.as_str().to_string()
}
impl fmt::Display for SourceId {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self.inner.kind {
SourceKind::Git(ref reference) => {
// Don't replace the URL display for git references,
// because those are kind of expected to be URLs.
write!(f, "{}", self.inner.url)?;
// TODO(-Znext-lockfile-bump): set it to true when stabilizing
// lockfile v4, because we want Source ID serialization to be
// consistent with lockfile.
if let Some(pretty) = reference.pretty_ref(false) {
write!(f, "?{}", pretty)?;
}
if let Some(s) = &self.inner.precise {
let s = s.to_string();
let len = cmp::min(s.len(), 8);
write!(f, "#{}", &s[..len])?;
}
Ok(())
}
SourceKind::Path => write!(f, "{}", url_display(&self.inner.url)),
SourceKind::Registry | SourceKind::SparseRegistry => {
write!(f, "registry `{}`", self.display_registry_name())
}
SourceKind::LocalRegistry => write!(f, "registry `{}`", url_display(&self.inner.url)),
SourceKind::Directory => write!(f, "dir {}", url_display(&self.inner.url)),
}
}
}
/// The hash of SourceId is used in the name of some Cargo folders, so shouldn't
/// vary. `as_str` gives the serialisation of a url (which has a spec) and so
/// insulates against possible changes in how the url crate does hashing.
impl Hash for SourceId {
fn hash<S: hash::Hasher>(&self, into: &mut S) {
self.inner.kind.hash(into);
match self.inner.kind {
SourceKind::Git(_) => self.inner.canonical_url.hash(into),
_ => self.inner.url.as_str().hash(into),
}
}
}
/// The hash of `SourceIdInner` is used to retrieve its interned value from
/// `SOURCE_ID_CACHE`. We only care about fields that make `SourceIdInner`
/// unique. Optional fields not affecting the uniqueness must be excluded,
/// such as [`registry_key`]. That's why this is not derived.
///
/// [`registry_key`]: SourceIdInner::registry_key
impl Hash for SourceIdInner {
fn hash<S: hash::Hasher>(&self, into: &mut S) {
self.kind.hash(into);
self.precise.hash(into);
self.canonical_url.hash(into);
}
}
/// This implementation must be synced with [`SourceIdInner::hash`].
impl PartialEq for SourceIdInner {
fn eq(&self, other: &Self) -> bool {
self.kind == other.kind
&& self.precise == other.precise
&& self.canonical_url == other.canonical_url
}
}
impl SourceKind {
pub(crate) fn protocol(&self) -> Option<&str> {
match self {
SourceKind::Path => Some("path"),
SourceKind::Git(_) => Some("git"),
SourceKind::Registry => Some("registry"),
// Sparse registry URL already includes the `sparse+` prefix
SourceKind::SparseRegistry => None,
SourceKind::LocalRegistry => Some("local-registry"),
SourceKind::Directory => Some("directory"),
}
}
}
/// Forwards to `Ord`
impl PartialOrd for SourceKind {
fn partial_cmp(&self, other: &SourceKind) -> Option<Ordering> {
Some(self.cmp(other))
}
}
/// Note that this is specifically not derived on `SourceKind` although the
/// implementation here is very similar to what it might look like if it were
/// otherwise derived.
///
/// The reason for this is somewhat obtuse. First of all the hash value of
/// `SourceKind` makes its way into `~/.cargo/registry/index/github.com-XXXX`
/// which means that changes to the hash means that all Rust users need to
/// redownload the crates.io index and all their crates. If possible we strive
/// to not change this to make this redownloading behavior happen as little as
/// possible. How is this connected to `Ord` you might ask? That's a good
/// question!
///
/// Since the beginning of time `SourceKind` has had `#[derive(Hash)]`. It for
/// the longest time *also* derived the `Ord` and `PartialOrd` traits. In #8522,
/// however, the implementation of `Ord` changed. This handwritten implementation
/// forgot to sync itself with the originally derived implementation, namely
/// placing git dependencies as sorted after all other dependencies instead of
/// first as before.
///
/// This regression in #8522 (Rust 1.47) went unnoticed. When we switched back
/// to a derived implementation in #9133 (Rust 1.52 beta) we only then ironically
/// saw an issue (#9334). In #9334 it was observed that stable Rust at the time
/// (1.51) was sorting git dependencies last, whereas Rust 1.52 beta would sort
/// git dependencies first. This is because the `PartialOrd` implementation in
/// 1.51 used #8522, the buggy implementation, which put git deps last. In 1.52
/// it was (unknowingly) restored to the pre-1.47 behavior with git dependencies
/// first.
///
/// Because the breakage was only witnessed after the original breakage, this
/// trait implementation is preserving the "broken" behavior. Put a different way:
///
/// * Rust pre-1.47 sorted git deps first.
/// * Rust 1.47 to Rust 1.51 sorted git deps last, a breaking change (#8522) that
/// was never noticed.
/// * Rust 1.52 restored the pre-1.47 behavior (#9133, without knowing it did
/// so), and breakage was witnessed by actual users due to difference with
/// 1.51.
/// * Rust 1.52 (the source as it lives now) was fixed to match the 1.47-1.51
/// behavior (#9383), which is now considered intentionally breaking from the
/// pre-1.47 behavior.
///
/// Note that this was all discovered when Rust 1.53 was in nightly and 1.52 was
/// in beta. #9133 was in both beta and nightly at the time of discovery. For
/// 1.52 #9383 reverted #9133, meaning 1.52 is the same as 1.51. On nightly
/// (1.53) #9397 was created to fix the regression introduced by #9133 relative
/// to the current stable (1.51).
///
/// That's all a long winded way of saying "it's weird that git deps hash first
/// and are sorted last, but it's the way it is right now". The author of this
/// comment chose to handwrite the `Ord` implementation instead of the `Hash`
/// implementation, but it's only required that at most one of them is
/// hand-written because the other can be derived. Perhaps one day in
/// the future someone can figure out how to remove this behavior.
impl Ord for SourceKind {
fn cmp(&self, other: &SourceKind) -> Ordering {
match (self, other) {
(SourceKind::Path, SourceKind::Path) => Ordering::Equal,
(SourceKind::Path, _) => Ordering::Less,
(_, SourceKind::Path) => Ordering::Greater,
(SourceKind::Registry, SourceKind::Registry) => Ordering::Equal,
(SourceKind::Registry, _) => Ordering::Less,
(_, SourceKind::Registry) => Ordering::Greater,
(SourceKind::SparseRegistry, SourceKind::SparseRegistry) => Ordering::Equal,
(SourceKind::SparseRegistry, _) => Ordering::Less,
(_, SourceKind::SparseRegistry) => Ordering::Greater,
(SourceKind::LocalRegistry, SourceKind::LocalRegistry) => Ordering::Equal,
(SourceKind::LocalRegistry, _) => Ordering::Less,
(_, SourceKind::LocalRegistry) => Ordering::Greater,
(SourceKind::Directory, SourceKind::Directory) => Ordering::Equal,
(SourceKind::Directory, _) => Ordering::Less,
(_, SourceKind::Directory) => Ordering::Greater,
(SourceKind::Git(a), SourceKind::Git(b)) => a.cmp(b),
}
}
}
/// A `Display`able view into a `SourceId` that will write it as a url
pub struct SourceIdAsUrl<'a> {
inner: &'a SourceIdInner,
encoded: bool,
}
impl<'a> fmt::Display for SourceIdAsUrl<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(protocol) = self.inner.kind.protocol() {
write!(f, "{protocol}+")?;
}
write!(f, "{}", self.inner.url)?;
if let SourceIdInner {
kind: SourceKind::Git(ref reference),
ref precise,
..
} = *self.inner
{
if let Some(pretty) = reference.pretty_ref(self.encoded) {
write!(f, "?{}", pretty)?;
}
if let Some(precise) = precise.as_ref() {
write!(f, "#{}", precise)?;
}
}
Ok(())
}
}
impl GitReference {
pub fn from_query(
query_pairs: impl Iterator<Item = (impl AsRef<str>, impl AsRef<str>)>,
) -> Self {
let mut reference = GitReference::DefaultBranch;
for (k, v) in query_pairs {
let v = v.as_ref();
match k.as_ref() {
// Map older 'ref' to branch.
"branch" | "ref" => reference = GitReference::Branch(v.to_owned()),
"rev" => reference = GitReference::Rev(v.to_owned()),
"tag" => reference = GitReference::Tag(v.to_owned()),
_ => {}
}
}
reference
}
/// Returns a `Display`able view of this git reference, or None if using
/// the head of the default branch
pub fn pretty_ref(&self, url_encoded: bool) -> Option<PrettyRef<'_>> {
match self {
GitReference::DefaultBranch => None,
_ => Some(PrettyRef {
inner: self,
url_encoded,
}),
}
}
}
/// A git reference that can be `Display`ed
pub struct PrettyRef<'a> {
inner: &'a GitReference,
url_encoded: bool,
}
impl<'a> fmt::Display for PrettyRef<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let value: &str;
match self.inner {
GitReference::Branch(s) => {
write!(f, "branch=")?;
value = s;
}
GitReference::Tag(s) => {
write!(f, "tag=")?;
value = s;
}
GitReference::Rev(s) => {
write!(f, "rev=")?;
value = s;
}
GitReference::DefaultBranch => unreachable!(),
}
if self.url_encoded {
for value in url::form_urlencoded::byte_serialize(value.as_bytes()) {
write!(f, "{value}")?;
}
} else {
write!(f, "{value}")?;
}
Ok(())
}
}
impl KeyOf {
/// Gets the underlying key.
fn key(&self) -> &str {
match self {
KeyOf::Registry(k) | KeyOf::Source(k) => k,
}
}
/// Gets the key if it's from an alternative registry.
fn alternative_registry(&self) -> Option<&str> {
match self {
KeyOf::Registry(k) => Some(k),
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use super::{GitReference, SourceId, SourceKind};
use crate::util::{Config, IntoUrl};
#[test]
fn github_sources_equal() {
let loc = "https://github.com/foo/bar".into_url().unwrap();
let default = SourceKind::Git(GitReference::DefaultBranch);
let s1 = SourceId::new(default.clone(), loc, None).unwrap();
let loc = "git://github.com/foo/bar".into_url().unwrap();
let s2 = SourceId::new(default, loc.clone(), None).unwrap();
assert_eq!(s1, s2);
let foo = SourceKind::Git(GitReference::Branch("foo".to_string()));
let s3 = SourceId::new(foo, loc, None).unwrap();
assert_ne!(s1, s3);
}
// This is a test that the hash of the `SourceId` for crates.io is a well-known
// value.
//
// Note that the hash value matches what the crates.io source id has hashed
// since long before Rust 1.30. We strive to keep this value the same across
// versions of Cargo because changing it means that users will need to
// redownload the index and all crates they use when using a new Cargo version.
//
// This isn't to say that this hash can *never* change, only that when changing
// this it should be explicitly done. If this hash changes accidentally and
// you're able to restore the hash to its original value, please do so!
// Otherwise please just leave a comment in your PR as to why the hash value is
// changing and why the old value can't be easily preserved.
//
// The hash value depends on endianness and bit-width, so we only run this test on
// little-endian 64-bit CPUs (such as x86-64 and ARM64) where it matches the
// well-known value.
#[test]
#[cfg(all(target_endian = "little", target_pointer_width = "64"))]
fn test_cratesio_hash() {
let config = Config::default().unwrap();
let crates_io = SourceId::crates_io(&config).unwrap();
assert_eq!(crate::util::hex::short_hash(&crates_io), "1ecc6299db9ec823");
}
// See the comment in `test_cratesio_hash`.
//
// Only test on non-Windows as paths on Windows will get different hashes.
#[test]
#[cfg(all(target_endian = "little", target_pointer_width = "64", not(windows)))]
fn test_stable_hash() {
use std::hash::Hasher;
use std::path::Path;
let gen_hash = |source_id: SourceId| {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
source_id.stable_hash(Path::new("/tmp/ws"), &mut hasher);
hasher.finish()
};
let url = "https://my-crates.io".into_url().unwrap();
let source_id = SourceId::for_registry(&url).unwrap();
assert_eq!(gen_hash(source_id), 18108075011063494626);
assert_eq!(crate::util::hex::short_hash(&source_id), "fb60813d6cb8df79");
let url = "https://your-crates.io".into_url().unwrap();
let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
assert_eq!(gen_hash(source_id), 12862859764592646184);
assert_eq!(crate::util::hex::short_hash(&source_id), "09c10fd0cbd74bce");
let url = "sparse+https://my-crates.io".into_url().unwrap();
let source_id = SourceId::for_registry(&url).unwrap();
assert_eq!(gen_hash(source_id), 8763561830438022424);
assert_eq!(crate::util::hex::short_hash(&source_id), "d1ea0d96f6f759b5");
let url = "sparse+https://your-crates.io".into_url().unwrap();
let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
assert_eq!(gen_hash(source_id), 5159702466575482972);
assert_eq!(crate::util::hex::short_hash(&source_id), "135d23074253cb78");
let url = "file:///tmp/ws/crate".into_url().unwrap();
let source_id = SourceId::for_git(&url, GitReference::DefaultBranch).unwrap();
assert_eq!(gen_hash(source_id), 15332537265078583985);
assert_eq!(crate::util::hex::short_hash(&source_id), "73a808694abda756");
let path = Path::new("/tmp/ws/crate");
let source_id = SourceId::for_local_registry(path).unwrap();
assert_eq!(gen_hash(source_id), 18446533307730842837);
assert_eq!(crate::util::hex::short_hash(&source_id), "52a84cc73f6fd48b");
let source_id = SourceId::for_path(path).unwrap();
assert_eq!(gen_hash(source_id), 8764714075439899829);
assert_eq!(crate::util::hex::short_hash(&source_id), "e1ddd48578620fc1");
let source_id = SourceId::for_directory(path).unwrap();
assert_eq!(gen_hash(source_id), 17459999773908528552);
assert_eq!(crate::util::hex::short_hash(&source_id), "6568fe2c2fab5bfe");
}
#[test]
fn serde_roundtrip() {
let url = "sparse+https://my-crates.io/".into_url().unwrap();
let source_id = SourceId::for_registry(&url).unwrap();
let formatted = format!("{}", source_id.as_url());
let deserialized = SourceId::from_url(&formatted).unwrap();
assert_eq!(formatted, "sparse+https://my-crates.io/");
assert_eq!(source_id, deserialized);
}
#[test]
fn gitrefs_roundtrip() {
let base = "https://host/path".into_url().unwrap();
let branch = GitReference::Branch("*-._+20%30 Z/z#foo=bar&zap[]?to\\()'\"".to_string());
let s1 = SourceId::for_git(&base, branch).unwrap();
let ser1 = format!("{}", s1.as_encoded_url());
let s2 = SourceId::from_url(&ser1).expect("Failed to deserialize");
let ser2 = format!("{}", s2.as_encoded_url());
// Serializing twice should yield the same result
assert_eq!(ser1, ser2, "Serialized forms don't match");
// SourceId serializing the same should have the same semantics
// This used to not be the case (# was ambiguous)
assert_eq!(s1, s2, "SourceId doesn't round-trip");
// Freeze the format to match an x-www-form-urlencoded query string
// https://url.spec.whatwg.org/#application/x-www-form-urlencoded
assert_eq!(
ser1,
"git+https://host/path?branch=*-._%2B20%2530+Z%2Fz%23foo%3Dbar%26zap%5B%5D%3Fto%5C%28%29%27%22"
);
}
}
/// Check if `url` equals to the overridden crates.io URL.
// ALLOWED: For testing Cargo itself only.
#[allow(clippy::disallowed_methods)]
fn is_overridden_crates_io_url(url: &str) -> bool {
std::env::var("__CARGO_TEST_CRATES_IO_URL_DO_NOT_USE_THIS").map_or(false, |v| v == url)
}