blob: 3b4bc7a81bda1a76b07db72e199345e512c487d8 [file] [log] [blame]
//! Implementations of the low-level parser combinators.
pub(crate) mod rfc;
use crate::format_description::modifier::Padding;
use crate::parsing::shim::{Integer, IntegerParseBytes};
use crate::parsing::ParsedItem;
/// Parse a "+" or "-" sign. Returns the ASCII byte representing the sign, if present.
pub(crate) const fn sign(input: &[u8]) -> Option<ParsedItem<'_, u8>> {
match input {
[sign @ (b'-' | b'+'), remaining @ ..] => Some(ParsedItem(remaining, *sign)),
_ => None,
}
}
/// Consume the first matching item, returning its associated value.
pub(crate) fn first_match<'a, T>(
options: impl IntoIterator<Item = (&'a [u8], T)>,
case_sensitive: bool,
) -> impl FnMut(&'a [u8]) -> Option<ParsedItem<'a, T>> {
let mut options = options.into_iter();
move |input| {
options.find_map(|(expected, t)| {
if case_sensitive {
Some(ParsedItem(input.strip_prefix(expected)?, t))
} else {
let n = expected.len();
if n <= input.len() {
let (head, tail) = input.split_at(n);
if head.eq_ignore_ascii_case(expected) {
return Some(ParsedItem(tail, t));
}
}
None
}
})
}
}
/// Consume zero or more instances of the provided parser. The parser must return the unit value.
pub(crate) fn zero_or_more<'a, P: Fn(&'a [u8]) -> Option<ParsedItem<'a, ()>>>(
parser: P,
) -> impl FnMut(&'a [u8]) -> ParsedItem<'a, ()> {
move |mut input| {
while let Some(remaining) = parser(input) {
input = remaining.into_inner();
}
ParsedItem(input, ())
}
}
/// Consume one of or more instances of the provided parser. The parser must produce the unit value.
pub(crate) fn one_or_more<'a, P: Fn(&'a [u8]) -> Option<ParsedItem<'a, ()>>>(
parser: P,
) -> impl Fn(&'a [u8]) -> Option<ParsedItem<'a, ()>> {
move |mut input| {
input = parser(input)?.into_inner();
while let Some(remaining) = parser(input) {
input = remaining.into_inner();
}
Some(ParsedItem(input, ()))
}
}
/// Consume between `n` and `m` instances of the provided parser.
pub(crate) fn n_to_m<
'a,
const N: u8,
const M: u8,
T,
P: Fn(&'a [u8]) -> Option<ParsedItem<'a, T>>,
>(
parser: P,
) -> impl Fn(&'a [u8]) -> Option<ParsedItem<'a, &'a [u8]>> {
debug_assert!(M >= N);
move |mut input| {
// We need to keep this to determine the total length eventually consumed.
let orig_input = input;
// Mandatory
for _ in 0..N {
input = parser(input)?.0;
}
// Optional
for _ in N..M {
match parser(input) {
Some(parsed) => input = parsed.0,
None => break,
}
}
Some(ParsedItem(
input,
&orig_input[..(orig_input.len() - input.len())],
))
}
}
/// Consume between `n` and `m` digits, returning the numerical value.
pub(crate) fn n_to_m_digits<const N: u8, const M: u8, T: Integer>(
input: &[u8],
) -> Option<ParsedItem<'_, T>> {
debug_assert!(M >= N);
n_to_m::<N, M, _, _>(any_digit)(input)?.flat_map(|value| value.parse_bytes())
}
/// Consume exactly `n` digits, returning the numerical value.
pub(crate) fn exactly_n_digits<const N: u8, T: Integer>(input: &[u8]) -> Option<ParsedItem<'_, T>> {
n_to_m_digits::<N, N, _>(input)
}
/// Consume exactly `n` digits, returning the numerical value.
pub(crate) fn exactly_n_digits_padded<'a, const N: u8, T: Integer>(
padding: Padding,
) -> impl Fn(&'a [u8]) -> Option<ParsedItem<'a, T>> {
n_to_m_digits_padded::<N, N, _>(padding)
}
/// Consume between `n` and `m` digits, returning the numerical value.
pub(crate) fn n_to_m_digits_padded<'a, const N: u8, const M: u8, T: Integer>(
padding: Padding,
) -> impl Fn(&'a [u8]) -> Option<ParsedItem<'a, T>> {
debug_assert!(M >= N);
move |mut input| match padding {
Padding::None => n_to_m_digits::<1, M, _>(input),
Padding::Space => {
debug_assert!(N > 0);
let mut orig_input = input;
for _ in 0..(N - 1) {
match ascii_char::<b' '>(input) {
Some(parsed) => input = parsed.0,
None => break,
}
}
let pad_width = (orig_input.len() - input.len()) as u8;
orig_input = input;
for _ in 0..(N - pad_width) {
input = any_digit(input)?.0;
}
for _ in N..M {
match any_digit(input) {
Some(parsed) => input = parsed.0,
None => break,
}
}
ParsedItem(input, &orig_input[..(orig_input.len() - input.len())])
.flat_map(|value| value.parse_bytes())
}
Padding::Zero => n_to_m_digits::<N, M, _>(input),
}
}
/// Consume exactly one digit.
pub(crate) const fn any_digit(input: &[u8]) -> Option<ParsedItem<'_, u8>> {
match input {
[c, remaining @ ..] if c.is_ascii_digit() => Some(ParsedItem(remaining, *c)),
_ => None,
}
}
/// Consume exactly one of the provided ASCII characters.
pub(crate) fn ascii_char<const CHAR: u8>(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
debug_assert!(CHAR.is_ascii_graphic() || CHAR.is_ascii_whitespace());
match input {
[c, remaining @ ..] if *c == CHAR => Some(ParsedItem(remaining, ())),
_ => None,
}
}
/// Consume exactly one of the provided ASCII characters, case-insensitive.
pub(crate) fn ascii_char_ignore_case<const CHAR: u8>(input: &[u8]) -> Option<ParsedItem<'_, ()>> {
debug_assert!(CHAR.is_ascii_graphic() || CHAR.is_ascii_whitespace());
match input {
[c, remaining @ ..] if c.eq_ignore_ascii_case(&CHAR) => Some(ParsedItem(remaining, ())),
_ => None,
}
}
/// Optionally consume an input with a given parser.
pub(crate) fn opt<'a, T>(
parser: impl Fn(&'a [u8]) -> Option<ParsedItem<'a, T>>,
) -> impl Fn(&'a [u8]) -> ParsedItem<'a, Option<T>> {
move |input| match parser(input) {
Some(value) => value.map(Some),
None => ParsedItem(input, None),
}
}