blob: 382666368b41cd522cb07ee69165ff6b0c315102 [file] [log] [blame]
use std::convert::Infallible;
use crate::Scheme;
use bstr::{BStr, BString, ByteSlice};
/// The error returned by [parse()](crate::parse()).
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("{} \"{url}\" is not valid UTF-8", kind.as_str())]
Utf8 {
url: BString,
kind: UrlKind,
source: std::str::Utf8Error,
},
#[error("{} {url:?} can not be parsed as valid URL", kind.as_str())]
Url {
url: String,
kind: UrlKind,
source: url::ParseError,
},
#[error("The host portion of the following URL is too long ({} bytes, {len} bytes total): {truncated_url:?}", truncated_url.len())]
TooLong { truncated_url: BString, len: usize },
#[error("{} \"{url}\" does not specify a path to a repository", kind.as_str())]
MissingRepositoryPath { url: BString, kind: UrlKind },
#[error("URL {url:?} is relative which is not allowed in this context")]
RelativeUrl { url: String },
}
impl From<Infallible> for Error {
fn from(_: Infallible) -> Self {
unreachable!("Cannot actually happen, but it seems there can't be a blanket impl for this")
}
}
///
#[derive(Debug, Clone, Copy)]
pub enum UrlKind {
///
Url,
///
Scp,
///
Local,
}
impl UrlKind {
fn as_str(&self) -> &'static str {
match self {
UrlKind::Url => "URL",
UrlKind::Scp => "SCP-like target",
UrlKind::Local => "local path",
}
}
}
pub(crate) enum InputScheme {
Url { protocol_end: usize },
Scp { colon: usize },
Local,
}
pub(crate) fn find_scheme(input: &BStr) -> InputScheme {
// TODO: url's may only contain `:/`, we should additionally check if the characters used for
// protocol are all valid
if let Some(protocol_end) = input.find("://") {
return InputScheme::Url { protocol_end };
}
if let Some(colon) = input.find_byte(b':') {
// allow user to select files containing a `:` by passing them as absolute or relative path
// this is behavior explicitly mentioned by the scp and git manuals
let explicitly_local = &input[..colon].contains(&b'/');
let dos_driver_letter = cfg!(windows) && input[..colon].len() == 1;
if !explicitly_local && !dos_driver_letter {
return InputScheme::Scp { colon };
}
}
InputScheme::Local
}
pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result<crate::Url, Error> {
const MAX_LEN: usize = 1024;
let bytes_to_path = input[protocol_end + "://".len()..]
.iter()
.filter(|b| !b.is_ascii_whitespace())
.skip_while(|b| **b == b'/' || **b == b'\\')
.position(|b| *b == b'/')
.unwrap_or(input.len() - protocol_end);
if bytes_to_path > MAX_LEN || protocol_end > MAX_LEN {
return Err(Error::TooLong {
truncated_url: input[..(protocol_end + "://".len() + MAX_LEN).min(input.len())].into(),
len: input.len(),
});
}
let (input, url) = input_to_utf8_and_url(input, UrlKind::Url)?;
let scheme = url.scheme().into();
if matches!(scheme, Scheme::Git | Scheme::Ssh) && url.path().is_empty() {
return Err(Error::MissingRepositoryPath {
url: input.into(),
kind: UrlKind::Url,
});
}
if url.cannot_be_a_base() {
return Err(Error::RelativeUrl { url: input.to_owned() });
}
Ok(crate::Url {
serialize_alternative_form: false,
scheme,
user: url_user(&url),
password: url.password().map(Into::into),
host: url.host_str().map(Into::into),
port: url.port(),
path: url.path().into(),
})
}
pub(crate) fn scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> {
let input = input_to_utf8(input, UrlKind::Scp)?;
// TODO: this incorrectly splits at IPv6 addresses, check for `[]` before splitting
let (host, path) = input.split_at(colon);
debug_assert_eq!(path.get(..1), Some(":"), "{path} should start with :");
let path = &path[1..];
if path.is_empty() {
return Err(Error::MissingRepositoryPath {
url: input.to_owned().into(),
kind: UrlKind::Scp,
});
}
// The path returned by the parsed url often has the wrong number of leading `/` characters but
// should never differ in any other way (ssh URLs should not contain a query or fragment part).
// To avoid the various off-by-one errors caused by the `/` characters, we keep using the path
// determined above and can therefore skip parsing it here as well.
let url = url::Url::parse(&format!("ssh://{host}")).map_err(|source| Error::Url {
url: input.to_owned(),
kind: UrlKind::Scp,
source,
})?;
Ok(crate::Url {
serialize_alternative_form: true,
scheme: url.scheme().into(),
user: url_user(&url),
password: url.password().map(Into::into),
host: url.host_str().map(Into::into),
port: url.port(),
path: path.into(),
})
}
fn url_user(url: &url::Url) -> Option<String> {
if url.username().is_empty() && url.password().is_none() {
None
} else {
Some(url.username().into())
}
}
pub(crate) fn file_url(input: &BStr, protocol_colon: usize) -> Result<crate::Url, Error> {
let input = input_to_utf8(input, UrlKind::Url)?;
let input_after_protocol = &input[protocol_colon + "://".len()..];
let Some(first_slash) = input_after_protocol
.find('/')
.or_else(|| cfg!(windows).then(|| input_after_protocol.find('\\')).flatten())
else {
return Err(Error::MissingRepositoryPath {
url: input.to_owned().into(),
kind: UrlKind::Url,
});
};
// We cannot use the url crate to parse host and path because it special cases Windows
// driver letters. With the url crate an input of `file://x:/path/to/git` is parsed as empty
// host and with `x:/path/to/git` as path. This behavior is wrong for Git which only follows
// that rule on Windows and parses `x:` as host on Unix platforms. Additionally the url crate
// does not account for Windows special UNC path support.
// TODO: implement UNC path special case
let windows_special_path = if cfg!(windows) {
// Inputs created via url::Url::from_file_path contain an additional `/` between the
// protocol and the absolute path. Make sure we ignore that first slash character to avoid
// producing invalid paths.
let input_after_protocol = if first_slash == 0 {
&input_after_protocol[1..]
} else {
input_after_protocol
};
// parse `file://x:/path/to/git` as explained above
if input_after_protocol.chars().nth(1) == Some(':') {
Some(input_after_protocol)
} else {
None
}
} else {
None
};
let host = if windows_special_path.is_some() || first_slash == 0 {
// `file:///path/to/git` or a windows special case was triggered
None
} else {
// `file://host/path/to/git`
Some(&input_after_protocol[..first_slash])
};
// default behavior on Unix platforms and if no Windows special case was triggered
let path = windows_special_path.unwrap_or(&input_after_protocol[first_slash..]);
Ok(crate::Url {
serialize_alternative_form: false,
host: host.map(Into::into),
..local(path.into())?
})
}
pub(crate) fn local(input: &BStr) -> Result<crate::Url, Error> {
if input.is_empty() {
return Err(Error::MissingRepositoryPath {
url: input.to_owned(),
kind: UrlKind::Local,
});
}
Ok(crate::Url {
serialize_alternative_form: true,
scheme: Scheme::File,
password: None,
user: None,
host: None,
port: None,
path: input.to_owned(),
})
}
fn input_to_utf8(input: &BStr, kind: UrlKind) -> Result<&str, Error> {
std::str::from_utf8(input).map_err(|source| Error::Utf8 {
url: input.to_owned(),
kind,
source,
})
}
fn input_to_utf8_and_url(input: &BStr, kind: UrlKind) -> Result<(&str, url::Url), Error> {
let input = input_to_utf8(input, kind)?;
url::Url::parse(input)
.map(|url| (input, url))
.map_err(|source| Error::Url {
url: input.to_owned(),
kind,
source,
})
}