blob: f1b311a403e8b24900b07d17ec836f4252a6c3bf [file] [log] [blame]
// pest. The Elegant Parser
// Copyright (c) 2018 DragoČ™ Tiselice
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or> or the MIT
// license <LICENSE-MIT or>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
use alloc::format;
use alloc::rc::Rc;
#[cfg(feature = "pretty-print")]
use alloc::string::String;
use alloc::vec::Vec;
use core::borrow::Borrow;
use core::fmt;
use core::hash::{Hash, Hasher};
use core::ptr;
use core::str;
#[cfg(feature = "pretty-print")]
use serde::ser::SerializeStruct;
use super::line_index::LineIndex;
use super::pairs::{self, Pairs};
use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
use crate::span::{self, Span};
use crate::RuleType;
/// A matching pair of [`Token`]s and everything between them.
/// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
/// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
/// This is similar to the [brace matching problem]( in
/// editors.
/// [`Token`]: ../enum.Token.html
pub struct Pair<'i, R> {
/// # Safety
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
queue: Rc<Vec<QueueableToken<'i, R>>>,
input: &'i str,
/// Token index into `queue`.
start: usize,
line_index: Rc<LineIndex>,
/// # Safety
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
pub unsafe fn new<'i, R: RuleType>(
queue: Rc<Vec<QueueableToken<'i, R>>>,
input: &'i str,
line_index: Rc<LineIndex>,
start: usize,
) -> Pair<'i, R> {
Pair {
impl<'i, R: RuleType> Pair<'i, R> {
/// Returns the `Rule` of the `Pair`.
/// # Examples
/// ```
/// # use std::rc::Rc;
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// a
/// }
/// let input = "";
/// let pair = pest::state(input, |state| {
/// // generating Token pair with Rule::a ...
/// # state.rule(Rule::a, |s| Ok(s))
/// }).unwrap().next().unwrap();
/// assert_eq!(pair.as_rule(), Rule::a);
/// ```
pub fn as_rule(&self) -> R {
match self.queue[self.pair()] {
QueueableToken::End { rule, .. } => rule,
_ => unreachable!(),
/// Captures a slice from the `&str` defined by the token `Pair`.
/// # Examples
/// ```
/// # use std::rc::Rc;
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// ab
/// }
/// let input = "ab";
/// let pair = pest::state(input, |state| {
/// // generating Token pair with Rule::ab ...
/// # state.rule(Rule::ab, |s| s.match_string("ab"))
/// }).unwrap().next().unwrap();
/// assert_eq!(pair.as_str(), "ab");
/// ```
pub fn as_str(&self) -> &'i str {
let start = self.pos(self.start);
let end = self.pos(self.pair());
// Generated positions always come from Positions and are UTF-8 borders.
/// Returns the `Span` defined by the `Pair`, consuming it.
/// # Examples
/// ```
/// # use std::rc::Rc;
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// ab
/// }
/// let input = "ab";
/// let pair = pest::state(input, |state| {
/// // generating Token pair with Rule::ab ...
/// # state.rule(Rule::ab, |s| s.match_string("ab"))
/// }).unwrap().next().unwrap();
/// assert_eq!(pair.into_span().as_str(), "ab");
/// ```
#[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
pub fn into_span(self) -> Span<'i> {
/// Returns the `Span` defined by the `Pair`, **without** consuming it.
/// # Examples
/// ```
/// # use std::rc::Rc;
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// ab
/// }
/// let input = "ab";
/// let pair = pest::state(input, |state| {
/// // generating Token pair with Rule::ab ...
/// # state.rule(Rule::ab, |s| s.match_string("ab"))
/// }).unwrap().next().unwrap();
/// assert_eq!(pair.as_span().as_str(), "ab");
/// ```
pub fn as_span(&self) -> Span<'i> {
let start = self.pos(self.start);
let end = self.pos(self.pair());
// Generated positions always come from Positions and are UTF-8 borders.
unsafe { span::Span::new_unchecked(self.input, start, end) }
/// Get current node tag
pub fn as_node_tag(&self) -> Option<&str> {
match &self.queue[self.pair()] {
QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()),
_ => None,
/// Returns the inner `Pairs` between the `Pair`, consuming it.
/// # Examples
/// ```
/// # use std::rc::Rc;
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// a
/// }
/// let input = "";
/// let pair = pest::state(input, |state| {
/// // generating Token pair with Rule::a ...
/// # state.rule(Rule::a, |s| Ok(s))
/// }).unwrap().next().unwrap();
/// assert!(pair.into_inner().next().is_none());
/// ```
pub fn into_inner(self) -> Pairs<'i, R> {
let pair = self.pair();
self.start + 1,
/// Returns the `Tokens` for the `Pair`.
/// # Examples
/// ```
/// # use std::rc::Rc;
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// a
/// }
/// let input = "";
/// let pair = pest::state(input, |state| {
/// // generating Token pair with Rule::a ...
/// # state.rule(Rule::a, |s| Ok(s))
/// }).unwrap().next().unwrap();
/// let tokens: Vec<_> = pair.tokens().collect();
/// assert_eq!(tokens.len(), 2);
/// ```
pub fn tokens(self) -> Tokens<'i, R> {
let end = self.pair();
tokens::new(self.queue, self.input, self.start, end + 1)
/// Generates a string that stores the lexical information of `self` in
/// a pretty-printed JSON format.
#[cfg(feature = "pretty-print")]
pub fn to_json(&self) -> String {
::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
/// Returns the `line`, `col` of this pair start.
pub fn line_col(&self) -> (usize, usize) {
let pos = self.pos(self.start);
self.line_index.line_col(self.input, pos)
fn pair(&self) -> usize {
match self.queue[self.start] {
QueueableToken::Start {
end_token_index, ..
} => end_token_index,
_ => unreachable!(),
fn pos(&self, index: usize) -> usize {
match self.queue[index] {
QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
impl<'i, R: RuleType> Pairs<'i, R> {
/// Create a new `Pairs` iterator containing just the single `Pair`.
pub fn single(pair: Pair<'i, R>) -> Self {
let end = pair.pair();
impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let pair = &mut f.debug_struct("Pair");
pair.field("rule", &self.as_rule());
// In order not to break compatibility
if let Some(s) = self.as_node_tag() {
pair.field("node_tag", &s);
pair.field("span", &self.as_span())
.field("inner", &self.clone().into_inner().collect::<Vec<_>>())
impl<'i, R: RuleType> fmt::Display for Pair<'i, R> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let rule = self.as_rule();
let start = self.pos(self.start);
let end = self.pos(self.pair());
let mut pairs = self.clone().into_inner().peekable();
if pairs.peek().is_none() {
write!(f, "{:?}({}, {})", rule, start, end)
} else {
"{:?}({}, {}, [{}])",
.map(|pair| format!("{}", pair))
.join(", ")
impl<'i, R: PartialEq> PartialEq for Pair<'i, R> {
fn eq(&self, other: &Pair<'i, R>) -> bool {
Rc::ptr_eq(&self.queue, &other.queue)
&& ptr::eq(self.input, other.input)
&& self.start == other.start
impl<'i, R: Eq> Eq for Pair<'i, R> {}
impl<'i, R: Hash> Hash for Pair<'i, R> {
fn hash<H: Hasher>(&self, state: &mut H) {
(&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state);
(self.input as *const str).hash(state);
#[cfg(feature = "pretty-print")]
impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
S: ::serde::Serializer,
let start = self.pos(self.start);
let end = self.pos(self.pair());
let rule = format!("{:?}", self.as_rule());
let inner = self.clone().into_inner();
let mut ser = serializer.serialize_struct("Pairs", 3)?;
ser.serialize_field("pos", &(start, end))?;
ser.serialize_field("rule", &rule)?;
if inner.peek().is_none() {
ser.serialize_field("inner", &self.as_str())?;
} else {
ser.serialize_field("inner", &inner)?;
mod tests {
use crate::macros::tests::*;
use crate::parser::Parser;
#[cfg(feature = "pretty-print")]
fn test_pretty_print() {
let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
let expected = r#"{
"pos": [
"rule": "a",
"inner": {
"pos": [
"pairs": [
"pos": [
"rule": "b",
"inner": "b"
assert_eq!(expected, pair.to_json());
fn pair_into_inner() {
let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap(); // the tokens a(b())
let pairs = pair.into_inner(); // the tokens b()
assert_eq!(2, pairs.tokens().count());