blob: 6977cc9cda83d259a61448f33f68d51cfba9f609 [file] [log] [blame]
//! AST for parsing format descriptions.
use alloc::string::String;
use alloc::vec::Vec;
use core::iter;
use core::iter::Peekable;
use super::{lexer, Error, Location, Span};
/// One part of a complete format description.
#[allow(variant_size_differences)]
pub(super) enum Item<'a> {
/// A literal string, formatted and parsed as-is.
Literal {
/// The string itself.
value: &'a [u8],
/// Where the string originates from in the format string.
_span: Span,
},
/// A sequence of brackets. The first acts as the escape character.
EscapedBracket {
/// The first bracket.
_first: Location,
/// The second bracket.
_second: Location,
},
/// Part of a type, along with its modifiers.
Component {
/// Where the opening bracket was in the format string.
_opening_bracket: Location,
/// Whitespace between the opening bracket and name.
_leading_whitespace: Option<Whitespace<'a>>,
/// The name of the component.
name: Name<'a>,
/// The modifiers for the component.
modifiers: Vec<Modifier<'a>>,
/// Whitespace between the modifiers and closing bracket.
_trailing_whitespace: Option<Whitespace<'a>>,
/// Where the closing bracket was in the format string.
_closing_bracket: Location,
},
}
/// Whitespace within a component.
pub(super) struct Whitespace<'a> {
/// The whitespace itself.
pub(super) _value: &'a [u8],
/// Where the whitespace was in the format string.
pub(super) span: Span,
}
/// The name of a component.
pub(super) struct Name<'a> {
/// The name itself.
pub(super) value: &'a [u8],
/// Where the name was in the format string.
pub(super) span: Span,
}
/// A modifier for a component.
pub(super) struct Modifier<'a> {
/// Whitespace preceding the modifier.
pub(super) _leading_whitespace: Whitespace<'a>,
/// The key of the modifier.
pub(super) key: Key<'a>,
/// Where the colon of the modifier was in the format string.
pub(super) _colon: Location,
/// The value of the modifier.
pub(super) value: Value<'a>,
}
/// The key of a modifier.
pub(super) struct Key<'a> {
/// The key itself.
pub(super) value: &'a [u8],
/// Where the key was in the format string.
pub(super) span: Span,
}
/// The value of a modifier.
pub(super) struct Value<'a> {
/// The value itself.
pub(super) value: &'a [u8],
/// Where the value was in the format string.
pub(super) span: Span,
}
/// Parse the provided tokens into an AST.
pub(super) fn parse<'a>(
tokens: impl Iterator<Item = lexer::Token<'a>>,
) -> impl Iterator<Item = Result<Item<'a>, Error>> {
let mut tokens = tokens.peekable();
iter::from_fn(move || {
Some(match tokens.next()? {
lexer::Token::Literal { value, span } => Ok(Item::Literal { value, _span: span }),
lexer::Token::Bracket {
kind: lexer::BracketKind::Opening,
location,
} => {
// escaped bracket
if let Some(&lexer::Token::Bracket {
kind: lexer::BracketKind::Opening,
location: second_location,
}) = tokens.peek()
{
tokens.next(); // consume
Ok(Item::EscapedBracket {
_first: location,
_second: second_location,
})
}
// component
else {
parse_component(location, &mut tokens)
}
}
lexer::Token::Bracket {
kind: lexer::BracketKind::Closing,
location: _,
} => unreachable!(
"internal error: closing bracket should have been consumed by `parse_component`",
),
lexer::Token::ComponentPart {
kind: _,
value: _,
span: _,
} => unreachable!(
"internal error: component part should have been consumed by `parse_component`",
),
})
})
}
/// Parse a component. This assumes that the opening bracket has already been consumed.
fn parse_component<'a>(
opening_bracket: Location,
tokens: &mut Peekable<impl Iterator<Item = lexer::Token<'a>>>,
) -> Result<Item<'a>, Error> {
let leading_whitespace = if let Some(&lexer::Token::ComponentPart {
kind: lexer::ComponentKind::Whitespace,
value,
span,
}) = tokens.peek()
{
tokens.next(); // consume
Some(Whitespace {
_value: value,
span,
})
} else {
None
};
let name = if let Some(&lexer::Token::ComponentPart {
kind: lexer::ComponentKind::NotWhitespace,
value,
span,
}) = tokens.peek()
{
tokens.next(); // consume
Name { value, span }
} else {
let span = leading_whitespace.map_or_else(
|| Span {
start: opening_bracket,
end: opening_bracket,
},
|whitespace| whitespace.span.shrink_to_end(),
);
return Err(Error {
_inner: span.error("expected component name"),
public: crate::error::InvalidFormatDescription::MissingComponentName {
index: span.start_byte(),
},
});
};
let mut modifiers = Vec::new();
let trailing_whitespace = loop {
let whitespace = if let Some(&lexer::Token::ComponentPart {
kind: lexer::ComponentKind::Whitespace,
value,
span,
}) = tokens.peek()
{
tokens.next(); // consume
Whitespace {
_value: value,
span,
}
} else {
break None;
};
if let Some(&lexer::Token::ComponentPart {
kind: lexer::ComponentKind::NotWhitespace,
value,
span,
}) = tokens.peek()
{
tokens.next(); // consume
let colon_index = match value.iter().position(|&b| b == b':') {
Some(index) => index,
None => {
return Err(Error {
_inner: span.error("modifier must be of the form `key:value`"),
public: crate::error::InvalidFormatDescription::InvalidModifier {
value: String::from_utf8_lossy(value).into_owned(),
index: span.start_byte(),
},
});
}
};
let key = &value[..colon_index];
let value = &value[colon_index + 1..];
if key.is_empty() {
return Err(Error {
_inner: span.shrink_to_start().error("expected modifier key"),
public: crate::error::InvalidFormatDescription::InvalidModifier {
value: String::new(),
index: span.start_byte(),
},
});
}
if value.is_empty() {
return Err(Error {
_inner: span.shrink_to_end().error("expected modifier value"),
public: crate::error::InvalidFormatDescription::InvalidModifier {
value: String::new(),
index: span.shrink_to_end().start_byte(),
},
});
}
modifiers.push(Modifier {
_leading_whitespace: whitespace,
key: Key {
value: key,
span: span.subspan(..colon_index),
},
_colon: span.start.offset(colon_index),
value: Value {
value,
span: span.subspan(colon_index + 1..),
},
});
} else {
break Some(whitespace);
}
};
let closing_bracket = if let Some(&lexer::Token::Bracket {
kind: lexer::BracketKind::Closing,
location,
}) = tokens.peek()
{
tokens.next(); // consume
location
} else {
return Err(Error {
_inner: opening_bracket.error("unclosed bracket"),
public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
index: opening_bracket.byte,
},
});
};
Ok(Item::Component {
_opening_bracket: opening_bracket,
_leading_whitespace: leading_whitespace,
name,
modifiers,
_trailing_whitespace: trailing_whitespace,
_closing_bracket: closing_bracket,
})
}