blob: 9a21511caa0c8c9b06f61f351039122a8712e5e9 [file] [log] [blame]
use std::borrow::Cow;
use bstr::{BStr, BString, ByteSlice, ByteVec};
use crate::{Defaults, MagicSignature, Pattern, SearchMode};
/// The error returned by [parse()][crate::parse()].
#[derive(thiserror::Error, Debug)]
#[allow(missing_docs)]
pub enum Error {
#[error("An empty string is not a valid pathspec")]
EmptyString,
#[error("Found {keyword:?} in signature, which is not a valid keyword")]
InvalidKeyword { keyword: BString },
#[error("Unimplemented short keyword: {short_keyword:?}")]
Unimplemented { short_keyword: char },
#[error("Missing ')' at the end of pathspec signature")]
MissingClosingParenthesis,
#[error("Attribute has non-ascii characters or starts with '-': {attribute:?}")]
InvalidAttribute { attribute: BString },
#[error("Invalid character in attribute value: {character:?}")]
InvalidAttributeValue { character: char },
#[error("Escape character '\\' is not allowed as the last character in an attribute value")]
TrailingEscapeCharacter,
#[error("Attribute specification cannot be empty")]
EmptyAttribute,
#[error("Only one attribute specification is allowed in the same pathspec")]
MultipleAttributeSpecifications,
#[error("'literal' and 'glob' keywords cannot be used together in the same pathspec")]
IncompatibleSearchModes,
}
impl Pattern {
/// Try to parse a path-spec pattern from the given `input` bytes.
pub fn from_bytes(
input: &[u8],
Defaults {
signature,
search_mode,
literal,
}: Defaults,
) -> Result<Self, Error> {
if input.is_empty() {
return Err(Error::EmptyString);
}
if literal {
return Ok(Self::from_literal(input, signature));
}
if input.as_bstr() == ":" {
return Ok(Pattern {
nil: true,
..Default::default()
});
}
let mut p = Pattern {
signature,
search_mode: SearchMode::default(),
..Default::default()
};
let mut cursor = 0;
if input.first() == Some(&b':') {
cursor += 1;
p.signature |= parse_short_keywords(input, &mut cursor)?;
if let Some(b'(') = input.get(cursor) {
cursor += 1;
parse_long_keywords(input, &mut p, &mut cursor)?;
}
}
if search_mode != Default::default() && p.search_mode == Default::default() {
p.search_mode = search_mode;
}
let mut path = &input[cursor..];
if path.last() == Some(&b'/') {
p.signature |= MagicSignature::MUST_BE_DIR;
path = &path[..path.len() - 1];
}
p.path = path.into();
Ok(p)
}
/// Take `input` literally without parsing anything. This will also set our mode to `literal` to allow this pathspec to match `input` verbatim, and
/// use `default_signature` as magic signature.
pub fn from_literal(input: &[u8], default_signature: MagicSignature) -> Self {
Pattern {
path: input.into(),
signature: default_signature,
search_mode: SearchMode::Literal,
..Default::default()
}
}
}
fn parse_short_keywords(input: &[u8], cursor: &mut usize) -> Result<MagicSignature, Error> {
let unimplemented_chars = b"\"#%&'-',;<=>@_`~";
let mut signature = MagicSignature::empty();
while let Some(&b) = input.get(*cursor) {
*cursor += 1;
signature |= match b {
b'/' => MagicSignature::TOP,
b'^' | b'!' => MagicSignature::EXCLUDE,
b':' => break,
_ if unimplemented_chars.contains(&b) => {
return Err(Error::Unimplemented {
short_keyword: b.into(),
});
}
_ => {
*cursor -= 1;
break;
}
}
}
Ok(signature)
}
fn parse_long_keywords(input: &[u8], p: &mut Pattern, cursor: &mut usize) -> Result<(), Error> {
let end = input.find(")").ok_or(Error::MissingClosingParenthesis)?;
let input = &input[*cursor..end];
*cursor = end + 1;
if input.is_empty() {
return Ok(());
}
split_on_non_escaped_char(input, b',', |keyword| {
let attr_prefix = b"attr:";
match keyword {
b"attr" => {}
b"top" => p.signature |= MagicSignature::TOP,
b"icase" => p.signature |= MagicSignature::ICASE,
b"exclude" => p.signature |= MagicSignature::EXCLUDE,
b"literal" => match p.search_mode {
SearchMode::PathAwareGlob => return Err(Error::IncompatibleSearchModes),
_ => p.search_mode = SearchMode::Literal,
},
b"glob" => match p.search_mode {
SearchMode::Literal => return Err(Error::IncompatibleSearchModes),
_ => p.search_mode = SearchMode::PathAwareGlob,
},
_ if keyword.starts_with(attr_prefix) => {
if p.attributes.is_empty() {
p.attributes = parse_attributes(&keyword[attr_prefix.len()..])?;
} else {
return Err(Error::MultipleAttributeSpecifications);
}
}
_ => {
return Err(Error::InvalidKeyword {
keyword: BString::from(keyword),
});
}
};
Ok(())
})
}
fn split_on_non_escaped_char(
input: &[u8],
split_char: u8,
mut f: impl FnMut(&[u8]) -> Result<(), Error>,
) -> Result<(), Error> {
let mut i = 0;
let mut last = 0;
for window in input.windows(2) {
i += 1;
if window[0] != b'\\' && window[1] == split_char {
let keyword = &input[last..i];
f(keyword)?;
last = i + 1;
}
}
let last_keyword = &input[last..];
f(last_keyword)
}
fn parse_attributes(input: &[u8]) -> Result<Vec<gix_attributes::Assignment>, Error> {
if input.is_empty() {
return Err(Error::EmptyAttribute);
}
let unescaped = unescape_attribute_values(input.into())?;
gix_attributes::parse::Iter::new(unescaped.as_bstr())
.map(|res| res.map(gix_attributes::AssignmentRef::to_owned))
.collect::<Result<Vec<_>, _>>()
.map_err(|e| Error::InvalidAttribute { attribute: e.attribute })
}
fn unescape_attribute_values(input: &BStr) -> Result<Cow<'_, BStr>, Error> {
if !input.contains(&b'=') {
return Ok(Cow::Borrowed(input));
}
let mut out: Cow<'_, BStr> = Cow::Borrowed("".into());
for attr in input.split(|&c| c == b' ') {
let split_point = attr.find_byte(b'=').map_or_else(|| attr.len(), |i| i + 1);
let (name, value) = attr.split_at(split_point);
if value.contains(&b'\\') {
let out = out.to_mut();
out.push_str(name);
out.push_str(unescape_and_check_attr_value(value.into())?);
out.push(b' ');
} else {
check_attribute_value(value.as_bstr())?;
match out {
Cow::Borrowed(_) => {
let end = out.len() + attr.len() + 1;
out = Cow::Borrowed(&input[0..end.min(input.len())]);
}
Cow::Owned(_) => {
let out = out.to_mut();
out.push_str(name);
out.push_str(value);
out.push(b' ');
}
}
}
}
Ok(out)
}
fn unescape_and_check_attr_value(value: &BStr) -> Result<BString, Error> {
let mut out = BString::from(Vec::with_capacity(value.len()));
let mut bytes = value.iter();
while let Some(mut b) = bytes.next().copied() {
if b == b'\\' {
b = *bytes.next().ok_or(Error::TrailingEscapeCharacter)?;
}
out.push(validated_attr_value_byte(b)?);
}
Ok(out)
}
fn check_attribute_value(input: &BStr) -> Result<(), Error> {
match input.iter().copied().find(|b| !is_valid_attr_value(*b)) {
Some(b) => Err(Error::InvalidAttributeValue { character: b as char }),
None => Ok(()),
}
}
fn is_valid_attr_value(byte: u8) -> bool {
byte.is_ascii_alphanumeric() || b",-_".contains(&byte)
}
fn validated_attr_value_byte(byte: u8) -> Result<u8, Error> {
if is_valid_attr_value(byte) {
Ok(byte)
} else {
Err(Error::InvalidAttributeValue {
character: byte as char,
})
}
}