pest/
parser_state.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10//! The core functionality of parsing grammar.
11//! State of parser during the process of rules handling.
12
13use alloc::borrow::ToOwned;
14use alloc::boxed::Box;
15use alloc::collections::BTreeSet;
16use alloc::rc::Rc;
17use alloc::string::String;
18use alloc::vec;
19use alloc::vec::Vec;
20use core::fmt::{Debug, Display, Formatter};
21use core::num::NonZeroUsize;
22use core::ops::Range;
23use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
24
25use crate::error::{Error, ErrorVariant};
26use crate::iterators::pairs::new;
27use crate::iterators::{pairs, QueueableToken};
28use crate::position::Position;
29use crate::span::Span;
30use crate::stack::Stack;
31use crate::RuleType;
32
33/// The current lookahead status of a [`ParserState`].
34///
35/// [`ParserState`]: struct.ParserState.html
36#[derive(Clone, Copy, Debug, Eq, PartialEq)]
37pub enum Lookahead {
38    /// The positive predicate, written as an ampersand &,
39    /// attempts to match its inner expression.
40    /// If the inner expression succeeds, parsing continues,
41    /// but at the same position as the predicate —
42    /// &foo ~ bar is thus a kind of "AND" statement:
43    /// "the input string must match foo AND bar".
44    /// If the inner expression fails,
45    /// the whole expression fails too.
46    Positive,
47    /// The negative predicate, written as an exclamation mark !,
48    /// attempts to match its inner expression.
49    /// If the inner expression fails, the predicate succeeds
50    /// and parsing continues at the same position as the predicate.
51    /// If the inner expression succeeds, the predicate fails —
52    /// !foo ~ bar is thus a kind of "NOT" statement:
53    /// "the input string must match bar but NOT foo".
54    Negative,
55    /// No lookahead (i.e. it will consume input).
56    None,
57}
58
59/// The current atomicity of a [`ParserState`].
60///
61/// [`ParserState`]: struct.ParserState.html
62#[derive(Clone, Copy, Debug, Eq, PartialEq)]
63pub enum Atomicity {
64    /// prevents implicit whitespace: inside an atomic rule,
65    /// the tilde ~ means "immediately followed by",
66    /// and repetition operators (asterisk * and plus sign +)
67    /// have no implicit separation. In addition, all other rules
68    /// called from an atomic rule are also treated as atomic.
69    /// (interior matching rules are silent)
70    Atomic,
71    /// The same as atomic, but inner tokens are produced as normal.
72    CompoundAtomic,
73    /// implicit whitespace is enabled
74    NonAtomic,
75}
76
77/// Type alias to simplify specifying the return value of chained closures.
78pub type ParseResult<S> = Result<S, S>;
79
80/// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`.
81#[derive(Clone, Copy, Debug, Eq, PartialEq)]
82pub enum MatchDir {
83    /// from the bottom to the top of the stack
84    BottomToTop,
85    /// from the top to the bottom of the stack
86    TopToBottom,
87}
88
89static CALL_LIMIT: AtomicUsize = AtomicUsize::new(0);
90
91/// Sets the maximum call limit for the parser state
92/// to prevent stack overflows or excessive execution times
93/// in some grammars.
94/// If set, the calls are tracked as a running total
95/// over all non-terminal rules that can nest closures
96/// (which are passed to transform the parser state).
97///
98/// # Arguments
99///
100/// * `limit` - The maximum number of calls. If None,
101///             the number of calls is unlimited.
102pub fn set_call_limit(limit: Option<NonZeroUsize>) {
103    CALL_LIMIT.store(limit.map(|f| f.get()).unwrap_or(0), Ordering::Relaxed);
104}
105
106static ERROR_DETAIL: AtomicBool = AtomicBool::new(false);
107
108/// Sets whether information for more error details
109/// should be collected. This is useful for debugging
110/// parser errors (as it leads to more comprehensive
111/// error messages), but it has a higher performance cost.
112/// (hence, it's off by default)
113///
114/// # Arguments
115///
116/// * `enabled` - Whether to enable the collection for
117///               more error details.
118pub fn set_error_detail(enabled: bool) {
119    ERROR_DETAIL.store(enabled, Ordering::Relaxed);
120}
121
122#[derive(Debug)]
123struct CallLimitTracker {
124    current_call_limit: Option<(usize, usize)>,
125}
126
127impl Default for CallLimitTracker {
128    fn default() -> Self {
129        let limit = CALL_LIMIT.load(Ordering::Relaxed);
130        let current_call_limit = if limit > 0 { Some((0, limit)) } else { None };
131        Self { current_call_limit }
132    }
133}
134
135impl CallLimitTracker {
136    fn limit_reached(&self) -> bool {
137        self.current_call_limit
138            .map_or(false, |(current, limit)| current >= limit)
139    }
140
141    fn increment_depth(&mut self) {
142        if let Some((current, _)) = &mut self.current_call_limit {
143            *current += 1;
144        }
145    }
146}
147
148/// Number of call stacks that may result from a sequence of rules parsing.
149const CALL_STACK_INITIAL_CAPACITY: usize = 20;
150/// Max (un)expected number of tokens that we may see on the parsing error position.
151const EXPECTED_TOKENS_INITIAL_CAPACITY: usize = 30;
152/// Max rule children number for which we'll extend calls stacks.
153///
154/// In case rule we're working with has too many children rules that failed in parsing,
155/// we don't want to store long stacks for all of them. If rule has more than this number
156/// of failed children, they all will be collapsed in a parent rule.
157const CALL_STACK_CHILDREN_THRESHOLD: usize = 4;
158
159/// Structure tracking errored parsing call (associated with specific `ParserState` function).
160#[derive(Debug, Hash, PartialEq, Eq, Clone, PartialOrd, Ord)]
161pub enum ParseAttempt<R> {
162    /// Call of `rule` errored.
163    Rule(R),
164    /// Call of token element (e.g., `match_string` or `match_insensitive`) errored.
165    /// Works as indicator of that leaf node is not a rule. In order to get the token value we
166    /// can address `ParseAttempts` `(un)expected_tokens`.
167    Token,
168}
169
170impl<R> ParseAttempt<R> {
171    pub fn get_rule(&self) -> Option<&R> {
172        match self {
173            ParseAttempt::Rule(r) => Some(r),
174            ParseAttempt::Token => None,
175        }
176    }
177}
178
179/// Rules call stack.
180/// Contains sequence of rule calls that resulted in new parsing attempt.
181#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
182pub struct RulesCallStack<R> {
183    /// Deepest rule caused a parsing error (ParseAttempt::Token transformed into a rule).
184    pub deepest: ParseAttempt<R>,
185    /// Most top rule covering `deepest`.
186    pub parent: Option<R>,
187}
188
189impl<R> RulesCallStack<R> {
190    fn new(deepest: ParseAttempt<R>) -> RulesCallStack<R> {
191        RulesCallStack {
192            deepest,
193            parent: None,
194        }
195    }
196}
197
198#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
199pub enum ParsingToken {
200    Sensitive { token: String },
201    Insensitive { token: String },
202    Range { start: char, end: char },
203    BuiltInRule,
204}
205
206impl Display for ParsingToken {
207    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
208        match self {
209            ParsingToken::Sensitive { token } => write!(f, "{token}"),
210            ParsingToken::Insensitive { token } => write!(f, "{}", token.to_uppercase()),
211            ParsingToken::Range { start, end } => write!(f, "{start}..{end}"),
212            ParsingToken::BuiltInRule => write!(f, "BUILTIN_RULE"),
213        }
214    }
215}
216
217/// Structure that tracks all the parsing attempts made on the max position.
218/// We want to give an error hint about parsing rules that succeeded
219/// at the farthest input position.
220/// The intuition is such rules will be most likely the query user initially wanted to write.
221#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
222pub struct ParseAttempts<R> {
223    /// Indicates whether the parsing attempts are tracked.
224    enabled: bool,
225    /// Vec of rule calls sequences awaiting tokens at the same `max_position`.
226    /// If there are several stacks in vec, it means all those rule stacks are "equal"
227    /// because their attempts occurred on the same position.
228    pub call_stacks: Vec<RulesCallStack<R>>,
229    /// Tokens that could be putted at `max_position`
230    /// in order to get a valid grammar query.
231    expected_tokens: Vec<ParsingToken>,
232    /// Tokens that we've prohibited to be putted at `max_position`
233    /// in order to get a valid grammar query.
234    unexpected_tokens: Vec<ParsingToken>,
235    /// Max position at which we were expecting to see one of `expected_tokens`.
236    pub max_position: usize,
237}
238
239impl<R: RuleType> ParseAttempts<R> {
240    /// Create new `ParseAttempts` instance with `call_stacks` and `expected_tokens`
241    /// initialized with capacity.
242    pub fn new() -> Self {
243        Self {
244            call_stacks: Vec::with_capacity(CALL_STACK_INITIAL_CAPACITY),
245            expected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
246            unexpected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
247            max_position: 0,
248            enabled: ERROR_DETAIL.load(Ordering::Relaxed),
249        }
250    }
251
252    /// Get number of currently present call stacks.
253    fn call_stacks_number(&self) -> usize {
254        self.call_stacks.len()
255    }
256
257    pub fn expected_tokens(&self) -> Vec<ParsingToken> {
258        self.expected_tokens
259            .iter()
260            .cloned()
261            .collect::<BTreeSet<_>>()
262            .into_iter()
263            .collect()
264    }
265
266    pub fn unexpected_tokens(&self) -> Vec<ParsingToken> {
267        self.unexpected_tokens
268            .iter()
269            .cloned()
270            .collect::<BTreeSet<_>>()
271            .into_iter()
272            .collect()
273    }
274
275    /// Retrieve call stacks.
276    pub fn call_stacks(&self) -> Vec<RulesCallStack<R>> {
277        self.call_stacks
278            .iter()
279            .cloned()
280            .collect::<BTreeSet<_>>()
281            .into_iter()
282            .collect()
283    }
284
285    /// In case we've tried to parse a rule, which start position is bigger than previous
286    /// `max_position` it means that we've advanced in our parsing and found better candidate.
287    ///
288    /// `start_index` is:
289    /// * Number of call stacks present in state at the moment current `rule` was called. The idea
290    ///   is that we'd like to update only those stacks that originated from the current `rule` and
291    ///   not from those that were called previously.
292    /// * 0 in case we've successfully parsed some token since the moment `rule` was called.
293    fn try_add_new_stack_rule(&mut self, rule: R, start_index: usize) {
294        let mut non_token_call_stacks = Vec::new();
295        let mut token_call_stack_met = false;
296        for call_stack in self.call_stacks.iter().skip(start_index) {
297            if matches!(call_stack.deepest, ParseAttempt::Token) {
298                token_call_stack_met = true;
299            } else {
300                non_token_call_stacks.push(call_stack.clone())
301            }
302        }
303        if token_call_stack_met && non_token_call_stacks.is_empty() {
304            // If `non_token_call_stacks` is not empty we wouldn't like to add a new standalone
305            // `RulesCallStack::new(ParseAttempt::Token)` (that will later be transformed into a
306            // rule) as soon as it doesn't give us any useful additional info.
307            non_token_call_stacks.push(RulesCallStack::new(ParseAttempt::Token));
308        }
309        self.call_stacks
310            .splice(start_index.., non_token_call_stacks);
311
312        let children_number_over_threshold =
313            self.call_stacks_number() - start_index >= CALL_STACK_CHILDREN_THRESHOLD;
314        if children_number_over_threshold {
315            self.call_stacks.truncate(start_index);
316            self.call_stacks
317                .push(RulesCallStack::new(ParseAttempt::Rule(rule)));
318        } else {
319            for call_stack in self.call_stacks.iter_mut().skip(start_index) {
320                if matches!(call_stack.deepest, ParseAttempt::Token) {
321                    call_stack.deepest = ParseAttempt::Rule(rule);
322                } else {
323                    call_stack.parent = Some(rule);
324                }
325            }
326        }
327    }
328
329    /// If `expected` flag is set to false, it means we've successfully parsed token being in the
330    /// state of negative lookahead and want to track `token` in the `unexpected_tokens`. Otherwise,
331    /// we want to track it the `expected_tokens`. Let's call chosen vec a `target_vec`.
332    ///
333    /// In case `position` is:
334    /// * Equal to `max_position`, add `token` to `target_vec`,
335    /// * Bigger than `max_position`, set `token` as the only new element of `target_vec`.
336    #[allow(clippy::comparison_chain)]
337    fn try_add_new_token(
338        &mut self,
339        token: ParsingToken,
340        start_position: usize,
341        position: usize,
342        negative_lookahead: bool,
343    ) {
344        let target_vec_push_token = |attempts: &mut ParseAttempts<R>| {
345            let target_vec = if negative_lookahead {
346                &mut attempts.unexpected_tokens
347            } else {
348                &mut attempts.expected_tokens
349            };
350            target_vec.push(token);
351        };
352
353        if position > self.max_position {
354            if negative_lookahead && start_position > self.max_position {
355                // We encountered a sequence under negative lookahead.
356                // We would like to track only first failed token in this sequence (which
357                // `start_position` should be equal to `self.max_position`).
358                return;
359            }
360            target_vec_push_token(self);
361
362            if negative_lookahead {
363                // In case of successful parsing of token under negative lookahead the only
364                // thing we'd like to do is to track the token in the `unexpected_tokens`.
365                return;
366            }
367            self.max_position = position;
368            self.expected_tokens.clear();
369            self.unexpected_tokens.clear();
370            self.call_stacks.clear();
371            self.call_stacks
372                .push(RulesCallStack::new(ParseAttempt::Token));
373        } else if position == self.max_position {
374            target_vec_push_token(self);
375            self.call_stacks
376                .push(RulesCallStack::new(ParseAttempt::Token));
377        }
378    }
379
380    /// Reset state in case we've successfully parsed some token in
381    /// `match_string` or `match_insensetive`.
382    fn nullify_expected_tokens(&mut self, new_max_position: usize) {
383        self.call_stacks.clear();
384        self.expected_tokens.clear();
385        self.unexpected_tokens.clear();
386        self.max_position = new_max_position;
387    }
388}
389
390impl<R: RuleType> Default for ParseAttempts<R> {
391    fn default() -> Self {
392        Self::new()
393    }
394}
395
396/// The complete state of a [`Parser`].
397///
398/// [`Parser`]: trait.Parser.html
399#[derive(Debug)]
400pub struct ParserState<'i, R: RuleType> {
401    /// Current position from which we try to apply some parser function.
402    /// Initially is 0.
403    /// E.g., we are parsing `create user 'Bobby'` query, we parsed "create" via `match_insensitive`
404    /// and switched our `position` from 0 to the length of "create".
405    ///
406    /// E.g., see `match_string` -> `self.position.match_string(string)` which updates `self.pos`.
407    position: Position<'i>,
408    /// Queue representing rules partially (`QueueableToken::Start`) and
409    /// totally (`QueueableToken::End`) parsed. When entering rule we put it in the queue in a state
410    /// of `Start` and after all its sublogic (subrules or strings) are parsed, we change it to
411    /// `End` state.
412    queue: Vec<QueueableToken<'i, R>>,
413    /// Status set in case specific lookahead logic is used in grammar.
414    /// See `Lookahead` for more information.
415    lookahead: Lookahead,
416    /// Rules that we HAVE expected, tried to parse, but failed.
417    pos_attempts: Vec<R>,
418    /// Rules that we have NOT expected, tried to parse, but failed.
419    neg_attempts: Vec<R>,
420    /// Max position in the query from which we've tried to parse some rule but failed.
421    attempt_pos: usize,
422    /// Current atomicity status. For more information see `Atomicity`.
423    atomicity: Atomicity,
424    /// Helper structure tracking `Stack` status (used in case grammar contains stack PUSH/POP
425    /// invocations).
426    stack: Stack<Span<'i>>,
427    /// Used for setting max parser calls limit.
428    call_tracker: CallLimitTracker,
429    /// Together with tracking of `pos_attempts` and `attempt_pos`
430    /// as a pair of (list of rules that we've tried to parse but failed, max parsed position)
431    /// we track those rules (which we've tried to parse at the same max pos) at this helper struct.
432    ///
433    /// Note, that we may try to parse several rules on different positions. We want to track only
434    /// those rules, which attempt position is bigger, because we consider that it's nearer to the
435    /// query that user really wanted to pass.
436    ///
437    /// E.g. we have a query `create user "Bobby"` and two root rules:
438    /// * CreateUser  = { "create" ~ "user"  ~ Name }
439    /// * CreateTable = { "create" ~ "table" ~ Name }
440    /// * Name = { SOME_DEFINITION }
441    ///
442    /// While parsing the query we'll update tracker position to the start of "Bobby", because we'd
443    /// successfully parse "create" + "user" (and not "table").
444    parse_attempts: ParseAttempts<R>,
445}
446
447/// Creates a `ParserState` from a `&str`, supplying it to a closure `f`.
448///
449/// # Examples
450///
451/// ```
452/// # use pest;
453/// let input = "";
454/// pest::state::<(), _>(input, |s| Ok(s)).unwrap();
455/// ```
456#[allow(clippy::perf)]
457pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>>
458where
459    F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,
460{
461    let state = ParserState::new(input);
462
463    match f(state) {
464        Ok(state) => {
465            let len = state.queue.len();
466            Ok(new(Rc::new(state.queue), input, None, 0, len))
467        }
468        Err(mut state) => {
469            let variant = if state.reached_call_limit() {
470                ErrorVariant::CustomError {
471                    message: "call limit reached".to_owned(),
472                }
473            } else {
474                state.pos_attempts.sort();
475                state.pos_attempts.dedup();
476                state.neg_attempts.sort();
477                state.neg_attempts.dedup();
478                ErrorVariant::ParsingError {
479                    positives: state.pos_attempts.clone(),
480                    negatives: state.neg_attempts.clone(),
481                }
482            };
483
484            if state.parse_attempts.enabled {
485                Err(Error::new_from_pos_with_parsing_attempts(
486                    variant,
487                    Position::new_internal(input, state.attempt_pos),
488                    state.parse_attempts.clone(),
489                ))
490            } else {
491                Err(Error::new_from_pos(
492                    variant,
493                    Position::new_internal(input, state.attempt_pos),
494                ))
495            }
496        }
497    }
498}
499
500impl<'i, R: RuleType> ParserState<'i, R> {
501    /// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box`
502    /// will be passed from closure to closure based on the needs of the specified `Parser`.
503    ///
504    /// # Examples
505    ///
506    /// ```
507    /// # use pest;
508    /// let input = "";
509    /// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input);
510    /// ```
511    pub fn new(input: &'i str) -> Box<Self> {
512        Box::new(ParserState {
513            position: Position::from_start(input),
514            queue: vec![],
515            lookahead: Lookahead::None,
516            pos_attempts: vec![],
517            neg_attempts: vec![],
518            attempt_pos: 0,
519            atomicity: Atomicity::NonAtomic,
520            stack: Stack::new(),
521            call_tracker: Default::default(),
522            parse_attempts: ParseAttempts::new(),
523        })
524    }
525
526    /// Get all parse attempts after process of parsing is finished.
527    pub fn get_parse_attempts(&self) -> &ParseAttempts<R> {
528        &self.parse_attempts
529    }
530
531    /// Returns a reference to the current `Position` of the `ParserState`.
532    ///
533    /// # Examples
534    ///
535    /// ```
536    /// # use pest;
537    /// # #[allow(non_camel_case_types)]
538    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
539    /// enum Rule {
540    ///     ab
541    /// }
542    ///
543    /// let input = "ab";
544    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
545    /// let position = state.position();
546    /// assert_eq!(position.pos(), 0);
547    /// ```
548    pub fn position(&self) -> &Position<'i> {
549        &self.position
550    }
551
552    /// Returns the current atomicity of the `ParserState`.
553    ///
554    /// # Examples
555    ///
556    /// ```
557    /// # use pest;
558    /// # use pest::Atomicity;
559    /// # #[allow(non_camel_case_types)]
560    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
561    /// enum Rule {
562    ///     ab
563    /// }
564    ///
565    /// let input = "ab";
566    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
567    /// let atomicity = state.atomicity();
568    /// assert_eq!(atomicity, Atomicity::NonAtomic);
569    /// ```
570    pub fn atomicity(&self) -> Atomicity {
571        self.atomicity
572    }
573
574    #[inline]
575    fn inc_call_check_limit(mut self: Box<Self>) -> ParseResult<Box<Self>> {
576        if self.call_tracker.limit_reached() {
577            return Err(self);
578        }
579        self.call_tracker.increment_depth();
580        Ok(self)
581    }
582
583    #[inline]
584    fn reached_call_limit(&self) -> bool {
585        self.call_tracker.limit_reached()
586    }
587
588    /// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure
589    /// meant to match the rule.
590    ///
591    /// # Examples
592    ///
593    /// ```
594    /// # use pest;
595    /// # #[allow(non_camel_case_types)]
596    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
597    /// enum Rule {
598    ///     a
599    /// }
600    ///
601    /// let input = "a";
602    /// let pairs: Vec<_> = pest::state(input, |state| {
603    ///     state.rule(Rule::a, |s| Ok(s))
604    /// }).unwrap().collect();
605    ///
606    /// assert_eq!(pairs.len(), 1);
607    /// ```
608    #[inline]
609    pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>>
610    where
611        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
612    {
613        self = self.inc_call_check_limit()?;
614        // Position from which this `rule` starts parsing.
615        let actual_pos = self.position.pos();
616        // Remember index of the `self.queue` element that will be associated with this `rule`.
617        let index = self.queue.len();
618
619        let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos {
620            (self.pos_attempts.len(), self.neg_attempts.len())
621        } else {
622            // Attempts have not been cleared yet since the attempt_pos is older.
623            (0, 0)
624        };
625
626        if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic {
627            // Pair's position will only be known after running the closure.
628            self.queue.push(QueueableToken::Start {
629                end_token_index: 0,
630                input_pos: actual_pos,
631            });
632        }
633
634        // Remember attempts number before `f` call.
635        // In `track` using this variable we can say, how many attempts were added
636        // during children rules traversal.
637        let attempts = self.attempts_at(actual_pos);
638        // Number of call stacks present in `self.parse_attempts` before `f` call.
639        // We need to remember this number only in case there wasn't found any farther attempt.
640        // E.g. we are handling rule, on start position of which may be tested two
641        // children rules. At the moment we'll return from `f` call below,
642        // there will be two more children rules in `self.parse_attempts` that we'll
643        // consider to be the children of current `rule`.
644        let mut remember_call_stacks_number = self.parse_attempts.call_stacks_number();
645        // Max parsing attempt position at the moment of `rule` handling.
646        // It case it's raised during children rules handling, it means
647        // we've made a parsing progress.
648        let remember_max_position = self.parse_attempts.max_position;
649
650        let result = f(self);
651
652        let mut try_add_rule_to_stack = |new_state: &mut Box<ParserState<'_, R>>| {
653            if new_state.parse_attempts.max_position > remember_max_position {
654                // It means that one of `match_string` or e.g. `match_insensetive` function calls
655                // have already erased `self.parse_attempts.call_stacks` and that previously
656                // remembered values are not valid anymore.
657                remember_call_stacks_number = 0;
658            }
659            if !matches!(new_state.atomicity, Atomicity::Atomic) {
660                new_state
661                    .parse_attempts
662                    .try_add_new_stack_rule(rule, remember_call_stacks_number);
663            }
664        };
665
666        match result {
667            Ok(mut new_state) => {
668                if new_state.lookahead == Lookahead::Negative {
669                    new_state.track(
670                        rule,
671                        actual_pos,
672                        pos_attempts_index,
673                        neg_attempts_index,
674                        attempts,
675                    );
676                }
677
678                if new_state.lookahead == Lookahead::None
679                    && new_state.atomicity != Atomicity::Atomic
680                {
681                    // Index of `QueueableToken::End` token added below
682                    // that corresponds to previously added `QueueableToken::Start` token.
683                    let new_index = new_state.queue.len();
684                    match new_state.queue[index] {
685                        QueueableToken::Start {
686                            ref mut end_token_index,
687                            ..
688                        } => *end_token_index = new_index,
689                        _ => unreachable!(),
690                    };
691
692                    let new_pos = new_state.position.pos();
693
694                    new_state.queue.push(QueueableToken::End {
695                        start_token_index: index,
696                        rule,
697                        tag: None,
698                        input_pos: new_pos,
699                    });
700                }
701
702                // Note, that we need to count positive parsing results too, because we can fail in
703                // optional rule call inside which may lie the farthest
704                // parsed token.
705                if new_state.parse_attempts.enabled {
706                    try_add_rule_to_stack(&mut new_state);
707                }
708                Ok(new_state)
709            }
710            Err(mut new_state) => {
711                if new_state.lookahead != Lookahead::Negative {
712                    new_state.track(
713                        rule,
714                        actual_pos,
715                        pos_attempts_index,
716                        neg_attempts_index,
717                        attempts,
718                    );
719                    if new_state.parse_attempts.enabled {
720                        try_add_rule_to_stack(&mut new_state);
721                    }
722                }
723
724                if new_state.lookahead == Lookahead::None
725                    && new_state.atomicity != Atomicity::Atomic
726                {
727                    new_state.queue.truncate(index);
728                }
729
730                Err(new_state)
731            }
732        }
733    }
734
735    /// Tag current node
736    ///
737    /// # Examples
738    ///
739    /// Try to recognize the one specified in a set of characters
740    ///
741    /// ```
742    /// use pest::{state, ParseResult, ParserState, iterators::Pair};
743    /// #[allow(non_camel_case_types)]
744    /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
745    /// enum Rule {
746    ///     character,
747    /// }
748    /// fn mark_c(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
749    ///     state.sequence(|state| {
750    ///         character(state)
751    ///             .and_then(|state| character(state))
752    ///             .and_then(|state| character(state))
753    ///             .and_then(|state| state.tag_node("c"))
754    ///             .and_then(|state| character(state))
755    ///     })
756    /// }
757    /// fn character(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
758    ///     state.rule(Rule::character, |state| state.match_range('a'..'z'))
759    /// }
760    ///
761    /// let input = "abcd";
762    /// let pairs = state(input, mark_c).unwrap();
763    /// // find all node tag as `c`
764    /// let find: Vec<Pair<Rule>> = pairs.filter(|s| s.as_node_tag() == Some("c")).collect();
765    /// assert_eq!(find[0].as_str(), "c")
766    /// ```
767    #[inline]
768    pub fn tag_node(mut self: Box<Self>, tag: &'i str) -> ParseResult<Box<Self>> {
769        if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() {
770            *old = Some(tag)
771        }
772        Ok(self)
773    }
774
775    /// Get number of allowed rules attempts + prohibited rules attempts.
776    fn attempts_at(&self, pos: usize) -> usize {
777        if self.attempt_pos == pos {
778            self.pos_attempts.len() + self.neg_attempts.len()
779        } else {
780            0
781        }
782    }
783
784    fn track(
785        &mut self,
786        rule: R,
787        pos: usize,
788        pos_attempts_index: usize,
789        neg_attempts_index: usize,
790        prev_attempts: usize,
791    ) {
792        if self.atomicity == Atomicity::Atomic {
793            return;
794        }
795
796        // If nested rules made no progress, there is no use to report them; it's only useful to
797        // track the current rule, the exception being when only one attempt has been made during
798        // the children rules.
799        let curr_attempts = self.attempts_at(pos);
800        if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 {
801            return;
802        }
803
804        if pos == self.attempt_pos {
805            self.pos_attempts.truncate(pos_attempts_index);
806            self.neg_attempts.truncate(neg_attempts_index);
807        }
808
809        if pos > self.attempt_pos {
810            self.pos_attempts.clear();
811            self.neg_attempts.clear();
812            self.attempt_pos = pos;
813        }
814
815        let attempts = if self.lookahead != Lookahead::Negative {
816            &mut self.pos_attempts
817        } else {
818            &mut self.neg_attempts
819        };
820
821        if pos == self.attempt_pos {
822            attempts.push(rule);
823        }
824    }
825
826    /// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns
827    /// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current
828    /// `Box<ParserState>` otherwise.
829    ///
830    /// This method is useful to parse sequences that only match together which usually come in the
831    /// form of chained `Result`s with
832    /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then).
833    ///
834    ///
835    /// # Examples
836    ///
837    /// ```
838    /// # use pest;
839    /// # #[allow(non_camel_case_types)]
840    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
841    /// enum Rule {
842    ///     a
843    /// }
844    ///
845    /// let input = "a";
846    /// let pairs: Vec<_> = pest::state(input, |state| {
847    ///     state.sequence(|s| {
848    ///         s.rule(Rule::a, |s| Ok(s)).and_then(|s| {
849    ///             s.match_string("b")
850    ///         })
851    ///     }).or_else(|s| {
852    ///         Ok(s)
853    ///     })
854    /// }).unwrap().collect();
855    ///
856    /// assert_eq!(pairs.len(), 0);
857    /// ```
858    #[inline]
859    pub fn sequence<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
860    where
861        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
862    {
863        self = self.inc_call_check_limit()?;
864        let token_index = self.queue.len();
865        let initial_pos = self.position;
866
867        let result = f(self);
868
869        match result {
870            Ok(new_state) => Ok(new_state),
871            Err(mut new_state) => {
872                // Restore the initial position and truncate the token queue.
873                new_state.position = initial_pos;
874                new_state.queue.truncate(token_index);
875                Err(new_state)
876            }
877        }
878    }
879
880    /// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns
881    /// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`.
882    ///
883    /// # Examples
884    ///
885    /// ```
886    /// # use pest;
887    /// # #[allow(non_camel_case_types)]
888    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
889    /// enum Rule {
890    ///     ab
891    /// }
892    ///
893    /// let input = "aab";
894    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
895    /// let mut result = state.repeat(|s| {
896    ///     s.match_string("a")
897    /// });
898    /// assert!(result.is_ok());
899    /// assert_eq!(result.unwrap().position().pos(), 2);
900    ///
901    /// state = pest::ParserState::new(input);
902    /// result = state.repeat(|s| {
903    ///     s.match_string("b")
904    /// });
905    /// assert!(result.is_ok());
906    /// assert_eq!(result.unwrap().position().pos(), 0);
907    /// ```
908    #[inline]
909    pub fn repeat<F>(mut self: Box<Self>, mut f: F) -> ParseResult<Box<Self>>
910    where
911        F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,
912    {
913        self = self.inc_call_check_limit()?;
914        let mut result = f(self);
915
916        loop {
917            match result {
918                Ok(state) => result = f(state),
919                Err(state) => return Ok(state),
920            };
921        }
922    }
923
924    /// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns
925    /// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`.
926    ///
927    /// # Examples
928    ///
929    /// ```
930    /// # use pest;
931    /// # #[allow(non_camel_case_types)]
932    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
933    /// enum Rule {
934    ///     ab
935    /// }
936    ///
937    /// let input = "ab";
938    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
939    /// let result = state.optional(|s| {
940    ///     s.match_string("ab")
941    /// });
942    /// assert!(result.is_ok());
943    ///
944    /// state = pest::ParserState::new(input);
945    /// let result = state.optional(|s| {
946    ///     s.match_string("ac")
947    /// });
948    /// assert!(result.is_ok());
949    /// ```
950    #[inline]
951    pub fn optional<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
952    where
953        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
954    {
955        self = self.inc_call_check_limit()?;
956        match f(self) {
957            Ok(state) | Err(state) => Ok(state),
958        }
959    }
960
961    /// Generic function to handle result of char/string/range parsing
962    /// in order to track (un)expected tokens.
963    fn handle_token_parse_result(
964        &mut self,
965        start_position: usize,
966        token: ParsingToken,
967        parse_succeeded: bool,
968    ) {
969        // New position after tracked parsed element for case of `parse_succeded` is true.
970        // Position of parsing failure otherwise.
971        let current_pos = self.position.pos();
972
973        if parse_succeeded {
974            if self.lookahead == Lookahead::Negative {
975                self.parse_attempts
976                    .try_add_new_token(token, start_position, current_pos, true);
977            } else if current_pos > self.parse_attempts.max_position {
978                self.parse_attempts.nullify_expected_tokens(current_pos);
979            }
980        } else if self.lookahead != Lookahead::Negative {
981            self.parse_attempts
982                .try_add_new_token(token, start_position, current_pos, false);
983        }
984    }
985
986    /// Attempts to match a single character based on a filter function. Returns `Ok` with the
987    /// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>`
988    /// otherwise.
989    ///
990    /// # Examples
991    ///
992    /// ```
993    /// # use pest;
994    /// # #[allow(non_camel_case_types)]
995    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
996    /// enum Rule {}
997    ///
998    /// let input = "ab";
999    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1000    /// let result = state.match_char_by(|c| c.is_ascii());
1001    /// assert!(result.is_ok());
1002    /// assert_eq!(result.unwrap().position().pos(), 1);
1003    ///
1004    /// let input = "❤";
1005    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1006    /// let result = state.match_char_by(|c| c.is_ascii());
1007    /// assert!(result.is_err());
1008    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1009    /// ```
1010    #[inline]
1011    pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1012    where
1013        F: FnOnce(char) -> bool,
1014    {
1015        let start_position = self.position.pos();
1016        let succeeded = self.position.match_char_by(f);
1017        if self.parse_attempts.enabled {
1018            let token = ParsingToken::BuiltInRule;
1019            self.handle_token_parse_result(start_position, token, succeeded);
1020        }
1021        if succeeded {
1022            Ok(self)
1023        } else {
1024            Err(self)
1025        }
1026    }
1027
1028    /// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if
1029    /// successful, or `Err` with the updated `Box<ParserState>` otherwise.
1030    ///
1031    /// # Examples
1032    ///
1033    /// ```
1034    /// # use pest;
1035    /// # #[allow(non_camel_case_types)]
1036    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1037    /// enum Rule {}
1038    ///
1039    /// let input = "ab";
1040    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1041    /// let mut result = state.match_string("ab");
1042    /// assert!(result.is_ok());
1043    /// assert_eq!(result.unwrap().position().pos(), 2);
1044    ///
1045    /// state = pest::ParserState::new(input);
1046    /// result = state.match_string("ac");
1047    /// assert!(result.is_err());
1048    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1049    /// ```
1050    #[inline]
1051    pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
1052        let start_position = self.position.pos();
1053        let succeeded = self.position.match_string(string);
1054        if self.parse_attempts.enabled {
1055            let token = ParsingToken::Sensitive {
1056                token: String::from(string),
1057            };
1058            self.handle_token_parse_result(start_position, token, succeeded);
1059        }
1060        if succeeded {
1061            Ok(self)
1062        } else {
1063            Err(self)
1064        }
1065    }
1066
1067    /// Attempts to case-insensitively match the given string. Returns `Ok` with the updated
1068    /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1069    ///
1070    /// # Examples
1071    ///
1072    /// ```
1073    /// # use pest;
1074    /// # #[allow(non_camel_case_types)]
1075    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1076    /// enum Rule {}
1077    ///
1078    /// let input = "ab";
1079    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1080    /// let mut result = state.match_insensitive("AB");
1081    /// assert!(result.is_ok());
1082    /// assert_eq!(result.unwrap().position().pos(), 2);
1083    ///
1084    /// state = pest::ParserState::new(input);
1085    /// result = state.match_insensitive("AC");
1086    /// assert!(result.is_err());
1087    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1088    /// ```
1089    #[inline]
1090    pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
1091        let start_position: usize = self.position().pos();
1092        let succeeded = self.position.match_insensitive(string);
1093        if self.parse_attempts.enabled {
1094            let token = ParsingToken::Insensitive {
1095                token: String::from(string),
1096            };
1097            self.handle_token_parse_result(start_position, token, succeeded);
1098        }
1099        if succeeded {
1100            Ok(self)
1101        } else {
1102            Err(self)
1103        }
1104    }
1105
1106    /// Attempts to match a single character from the given range. Returns `Ok` with the updated
1107    /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1108    ///
1109    /// # Caution
1110    /// The provided `range` is interpreted as inclusive.
1111    ///
1112    /// # Examples
1113    ///
1114    /// ```
1115    /// # use pest;
1116    /// # #[allow(non_camel_case_types)]
1117    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1118    /// enum Rule {}
1119    ///
1120    /// let input = "ab";
1121    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1122    /// let mut result = state.match_range('a'..'z');
1123    /// assert!(result.is_ok());
1124    /// assert_eq!(result.unwrap().position().pos(), 1);
1125    ///
1126    /// state = pest::ParserState::new(input);
1127    /// result = state.match_range('A'..'Z');
1128    /// assert!(result.is_err());
1129    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1130    /// ```
1131    #[inline]
1132    pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> {
1133        let start_position = self.position().pos();
1134        let token = ParsingToken::Range {
1135            start: range.start,
1136            end: range.end,
1137        };
1138        let succeeded = self.position.match_range(range);
1139        if self.parse_attempts.enabled {
1140            self.handle_token_parse_result(start_position, token, succeeded);
1141        }
1142        if succeeded {
1143            Ok(self)
1144        } else {
1145            Err(self)
1146        }
1147    }
1148
1149    /// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>`
1150    /// if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1151    ///
1152    /// # Examples
1153    ///
1154    /// ```
1155    /// # use pest;
1156    /// # #[allow(non_camel_case_types)]
1157    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1158    /// enum Rule {}
1159    ///
1160    /// let input = "ab";
1161    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1162    /// let mut result = state.skip(1);
1163    /// assert!(result.is_ok());
1164    /// assert_eq!(result.unwrap().position().pos(), 1);
1165    ///
1166    /// state = pest::ParserState::new(input);
1167    /// result = state.skip(3);
1168    /// assert!(result.is_err());
1169    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1170    /// ```
1171    #[inline]
1172    pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> {
1173        if self.position.skip(n) {
1174            Ok(self)
1175        } else {
1176            Err(self)
1177        }
1178    }
1179
1180    /// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the
1181    /// updated `Box<ParserState>` whether or not one of the strings is found.
1182    ///
1183    /// # Examples
1184    ///
1185    /// ```
1186    /// # use pest;
1187    /// # #[allow(non_camel_case_types)]
1188    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1189    /// enum Rule {}
1190    ///
1191    /// let input = "abcd";
1192    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1193    /// let mut result = state.skip_until(&["c", "d"]);
1194    /// assert!(result.is_ok());
1195    /// assert_eq!(result.unwrap().position().pos(), 2);
1196    /// ```
1197    #[inline]
1198    pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> {
1199        self.position.skip_until(strings);
1200        Ok(self)
1201    }
1202
1203    /// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>`
1204    /// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise.
1205    ///
1206    /// # Examples
1207    ///
1208    /// ```
1209    /// # use pest;
1210    /// # #[allow(non_camel_case_types)]
1211    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1212    /// enum Rule {}
1213    ///
1214    /// let input = "ab";
1215    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1216    /// let mut result = state.start_of_input();
1217    /// assert!(result.is_ok());
1218    ///
1219    /// state = pest::ParserState::new(input);
1220    /// state = state.match_string("ab").unwrap();
1221    /// result = state.start_of_input();
1222    /// assert!(result.is_err());
1223    /// ```
1224    #[inline]
1225    pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
1226        if self.position.at_start() {
1227            Ok(self)
1228        } else {
1229            Err(self)
1230        }
1231    }
1232
1233    /// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if
1234    /// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise.
1235    ///
1236    /// # Examples
1237    ///
1238    /// ```
1239    /// # use pest;
1240    /// # #[allow(non_camel_case_types)]
1241    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1242    /// enum Rule {}
1243    ///
1244    /// let input = "ab";
1245    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1246    /// let mut result = state.end_of_input();
1247    /// assert!(result.is_err());
1248    ///
1249    /// state = pest::ParserState::new(input);
1250    /// state = state.match_string("ab").unwrap();
1251    /// result = state.end_of_input();
1252    /// assert!(result.is_ok());
1253    /// ```
1254    #[inline]
1255    pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
1256        if self.position.at_end() {
1257            Ok(self)
1258        } else {
1259            Err(self)
1260        }
1261    }
1262
1263    /// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns
1264    /// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current
1265    /// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err`
1266    /// together, negating the `Result`.
1267    ///
1268    /// # Examples
1269    ///
1270    /// ```
1271    /// # use pest;
1272    /// # #[allow(non_camel_case_types)]
1273    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1274    /// enum Rule {
1275    ///     a
1276    /// }
1277    ///
1278    /// let input = "a";
1279    /// let pairs: Vec<_> = pest::state(input, |state| {
1280    ///     state.lookahead(true, |state| {
1281    ///         state.rule(Rule::a, |s| Ok(s))
1282    ///     })
1283    /// }).unwrap().collect();
1284    ///
1285    /// assert_eq!(pairs.len(), 0);
1286    /// ```
1287    #[inline]
1288    pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>>
1289    where
1290        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1291    {
1292        self = self.inc_call_check_limit()?;
1293        let initial_lookahead = self.lookahead;
1294
1295        self.lookahead = if is_positive {
1296            match initial_lookahead {
1297                Lookahead::None | Lookahead::Positive => Lookahead::Positive,
1298                Lookahead::Negative => Lookahead::Negative,
1299            }
1300        } else {
1301            match initial_lookahead {
1302                Lookahead::None | Lookahead::Positive => Lookahead::Negative,
1303                Lookahead::Negative => Lookahead::Positive,
1304            }
1305        };
1306
1307        let initial_pos = self.position;
1308
1309        let result = f(self.checkpoint());
1310
1311        let result_state = match result {
1312            Ok(mut new_state) => {
1313                new_state.position = initial_pos;
1314                new_state.lookahead = initial_lookahead;
1315                Ok(new_state.restore())
1316            }
1317            Err(mut new_state) => {
1318                new_state.position = initial_pos;
1319                new_state.lookahead = initial_lookahead;
1320                Err(new_state.restore())
1321            }
1322        };
1323
1324        if is_positive {
1325            result_state
1326        } else {
1327            match result_state {
1328                Ok(state) => Err(state),
1329                Err(state) => Ok(state),
1330            }
1331        }
1332    }
1333
1334    /// Transformation which stops `Token`s from being generated according to `is_atomic`.
1335    /// Used as wrapper over `rule` (or even another `atomic`) call.
1336    ///
1337    /// # Examples
1338    ///
1339    /// ```
1340    /// # use pest::{self, Atomicity};
1341    /// # #[allow(non_camel_case_types)]
1342    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1343    /// enum Rule {
1344    ///     a
1345    /// }
1346    ///
1347    /// let input = "a";
1348    /// let pairs: Vec<_> = pest::state(input, |state| {
1349    ///     state.atomic(Atomicity::Atomic, |s| {
1350    ///         s.rule(Rule::a, |s| Ok(s))
1351    ///     })
1352    /// }).unwrap().collect();
1353    ///
1354    /// assert_eq!(pairs.len(), 0);
1355    /// ```
1356    #[inline]
1357    pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>>
1358    where
1359        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1360    {
1361        self = self.inc_call_check_limit()?;
1362        // In case child parsing call is another `atomic` it will have its own atomicity status.
1363        let initial_atomicity = self.atomicity;
1364        // In case child atomicity is the same as we've demanded, we shouldn't do nothing.
1365        // E.g. we have the following rules:
1366        // * RootRule = @{ InnerRule }
1367        // * InnerRule = @{ ... }
1368        let should_toggle = self.atomicity != atomicity;
1369
1370        // Note that we take atomicity of the top rule and not of the leaf (inner).
1371        if should_toggle {
1372            self.atomicity = atomicity;
1373        }
1374
1375        let result = f(self);
1376
1377        match result {
1378            Ok(mut new_state) => {
1379                if should_toggle {
1380                    new_state.atomicity = initial_atomicity;
1381                }
1382                Ok(new_state)
1383            }
1384            Err(mut new_state) => {
1385                if should_toggle {
1386                    new_state.atomicity = initial_atomicity;
1387                }
1388                Err(new_state)
1389            }
1390        }
1391    }
1392
1393    /// Evaluates the result of closure `f` and pushes the span of the input consumed from before
1394    /// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is
1395    /// called successfully, or `Err(Box<ParserState>)` otherwise.
1396    ///
1397    /// # Examples
1398    ///
1399    /// ```
1400    /// # use pest;
1401    /// # #[allow(non_camel_case_types)]
1402    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1403    /// enum Rule {}
1404    ///
1405    /// let input = "ab";
1406    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1407    /// let mut result = state.stack_push(|state| state.match_string("a"));
1408    /// assert!(result.is_ok());
1409    /// assert_eq!(result.unwrap().position().pos(), 1);
1410    /// ```
1411    #[inline]
1412    pub fn stack_push<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1413    where
1414        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1415    {
1416        self = self.inc_call_check_limit()?;
1417        let start = self.position;
1418
1419        let result = f(self);
1420
1421        match result {
1422            Ok(mut state) => {
1423                let end = state.position;
1424                state.stack.push(start.span(&end));
1425                Ok(state)
1426            }
1427            Err(state) => Err(state),
1428        }
1429    }
1430
1431    /// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1432    /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1433    ///
1434    /// # Examples
1435    ///
1436    /// ```
1437    /// # use pest;
1438    /// # #[allow(non_camel_case_types)]
1439    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1440    /// enum Rule {}
1441    ///
1442    /// let input = "aa";
1443    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1444    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1445    ///     |state| state.stack_peek()
1446    /// );
1447    /// assert!(result.is_ok());
1448    /// assert_eq!(result.unwrap().position().pos(), 2);
1449    /// ```
1450    #[inline]
1451    pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1452        let string = self
1453            .stack
1454            .peek()
1455            .expect("peek was called on empty stack")
1456            .as_str();
1457        self.match_string(string)
1458    }
1459
1460    /// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1461    /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1462    ///
1463    /// # Examples
1464    ///
1465    /// ```
1466    /// # use pest;
1467    /// # #[allow(non_camel_case_types)]
1468    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1469    /// enum Rule {}
1470    ///
1471    /// let input = "aa";
1472    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1473    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1474    ///     |state| state.stack_pop()
1475    /// );
1476    /// assert!(result.is_ok());
1477    /// assert_eq!(result.unwrap().position().pos(), 2);
1478    /// ```
1479    #[inline]
1480    pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1481        let string = self
1482            .stack
1483            .pop()
1484            .expect("pop was called on empty stack")
1485            .as_str();
1486        self.match_string(string)
1487    }
1488
1489    /// Matches part of the state of the stack.
1490    ///
1491    /// # Examples
1492    ///
1493    /// ```
1494    /// # use pest::{self, MatchDir};
1495    /// # #[allow(non_camel_case_types)]
1496    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1497    /// enum Rule {}
1498    ///
1499    /// let input = "abcd cd cb";
1500    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1501    /// let mut result = state
1502    ///     .stack_push(|state| state.match_string("a"))
1503    ///     .and_then(|state| state.stack_push(|state| state.match_string("b")))
1504    ///     .and_then(|state| state.stack_push(|state| state.match_string("c")))
1505    ///     .and_then(|state| state.stack_push(|state| state.match_string("d")))
1506    ///     .and_then(|state| state.match_string(" "))
1507    ///     .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop))
1508    ///     .and_then(|state| state.match_string(" "))
1509    ///     .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom));
1510    /// assert!(result.is_ok());
1511    /// assert_eq!(result.unwrap().position().pos(), 10);
1512    /// ```
1513    #[inline]
1514    pub fn stack_match_peek_slice(
1515        mut self: Box<Self>,
1516        start: i32,
1517        end: Option<i32>,
1518        match_dir: MatchDir,
1519    ) -> ParseResult<Box<Self>> {
1520        let range = match constrain_idxs(start, end, self.stack.len()) {
1521            Some(r) => r,
1522            None => return Err(self),
1523        };
1524        // return true if an empty sequence is requested
1525        if range.end <= range.start {
1526            return Ok(self);
1527        }
1528
1529        let mut position = self.position;
1530        let result = {
1531            let mut iter_b2t = self.stack[range].iter();
1532            let matcher = |span: &Span<'_>| position.match_string(span.as_str());
1533            match match_dir {
1534                MatchDir::BottomToTop => iter_b2t.all(matcher),
1535                MatchDir::TopToBottom => iter_b2t.rev().all(matcher),
1536            }
1537        };
1538        if result {
1539            self.position = position;
1540            Ok(self)
1541        } else {
1542            Err(self)
1543        }
1544    }
1545
1546    /// Matches the full state of the stack.
1547    ///
1548    /// # Examples
1549    ///
1550    /// ```
1551    /// # use pest;
1552    /// # #[allow(non_camel_case_types)]
1553    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1554    /// enum Rule {}
1555    ///
1556    /// let input = "abba";
1557    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1558    /// let mut result = state
1559    ///     .stack_push(|state| state.match_string("a"))
1560    ///     .and_then(|state| { state.stack_push(|state| state.match_string("b")) })
1561    ///     .and_then(|state| state.stack_match_peek());
1562    /// assert!(result.is_ok());
1563    /// assert_eq!(result.unwrap().position().pos(), 4);
1564    /// ```
1565    #[inline]
1566    pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1567        self.stack_match_peek_slice(0, None, MatchDir::TopToBottom)
1568    }
1569
1570    /// Matches the full state of the stack. This method will clear the stack as it evaluates.
1571    ///
1572    /// # Examples
1573    ///
1574    /// ```
1575    /// /// # use pest;
1576    /// # #[allow(non_camel_case_types)]
1577    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1578    /// enum Rule {}
1579    ///
1580    /// let input = "aaaa";
1581    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1582    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(|state| {
1583    ///     state.stack_push(|state| state.match_string("a"))
1584    /// }).and_then(|state| state.stack_match_peek());
1585    /// assert!(result.is_ok());
1586    /// assert_eq!(result.unwrap().position().pos(), 4);
1587    /// ```
1588    #[inline]
1589    pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1590        let mut position = self.position;
1591        let mut result = true;
1592        while let Some(span) = self.stack.pop() {
1593            result = position.match_string(span.as_str());
1594            if !result {
1595                break;
1596            }
1597        }
1598
1599        if result {
1600            self.position = position;
1601            Ok(self)
1602        } else {
1603            Err(self)
1604        }
1605    }
1606
1607    /// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or
1608    /// `Err(Box<ParserState>)` otherwise.
1609    ///
1610    /// # Examples
1611    ///
1612    /// ```
1613    /// # use pest;
1614    /// # #[allow(non_camel_case_types)]
1615    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1616    /// enum Rule {}
1617    ///
1618    /// let input = "aa";
1619    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1620    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1621    ///     |state| state.stack_drop()
1622    /// );
1623    /// assert!(result.is_ok());
1624    /// assert_eq!(result.unwrap().position().pos(), 1);
1625    /// ```
1626    #[inline]
1627    pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1628        match self.stack.pop() {
1629            Some(_) => Ok(self),
1630            None => Err(self),
1631        }
1632    }
1633
1634    /// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently,
1635    /// this method only restores the stack.
1636    ///
1637    /// # Examples
1638    ///
1639    /// ```
1640    /// # use pest;
1641    /// # #[allow(non_camel_case_types)]
1642    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1643    /// enum Rule {}
1644    ///
1645    /// let input = "ab";
1646    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1647    /// let mut result = state.restore_on_err(|state| state.stack_push(|state|
1648    ///     state.match_string("a")).and_then(|state| state.match_string("a"))
1649    /// );
1650    ///
1651    /// assert!(result.is_err());
1652    ///
1653    /// // Since the the rule doesn't match, the "a" pushed to the stack will be removed.
1654    /// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop());
1655    /// assert!(catch_panic.is_err());
1656    /// ```
1657    #[inline]
1658    pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1659    where
1660        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1661    {
1662        match f(self.checkpoint()) {
1663            Ok(state) => Ok(state.checkpoint_ok()),
1664            Err(state) => Err(state.restore()),
1665        }
1666    }
1667
1668    // Mark the current state as a checkpoint and return the `Box`.
1669    #[inline]
1670    pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> {
1671        self.stack.snapshot();
1672        self
1673    }
1674
1675    // The checkpoint was cleared successfully
1676    // so remove it without touching other stack state.
1677    #[inline]
1678    pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> {
1679        self.stack.clear_snapshot();
1680        self
1681    }
1682
1683    // Restore the current state to the most recent checkpoint.
1684    #[inline]
1685    pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> {
1686        self.stack.restore();
1687        self
1688    }
1689}
1690
1691/// Helper function used only in case stack operations (PUSH/POP) are used in grammar.
1692fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> {
1693    let start_norm = normalize_index(start, len)?;
1694    let end_norm = end.map_or(Some(len), |e| normalize_index(e, len))?;
1695    Some(start_norm..end_norm)
1696}
1697
1698/// `constrain_idxs` helper function.
1699/// Normalizes the index using its sequence’s length.
1700/// Returns `None` if the normalized index is OOB.
1701fn normalize_index(i: i32, len: usize) -> Option<usize> {
1702    if i > len as i32 {
1703        None
1704    } else if i >= 0 {
1705        Some(i as usize)
1706    } else {
1707        let real_i = len as i32 + i;
1708        if real_i >= 0 {
1709            Some(real_i as usize)
1710        } else {
1711            None
1712        }
1713    }
1714}
1715
1716#[cfg(test)]
1717mod test {
1718    use super::*;
1719
1720    #[test]
1721    fn normalize_index_pos() {
1722        assert_eq!(normalize_index(4, 6), Some(4));
1723        assert_eq!(normalize_index(5, 5), Some(5));
1724        assert_eq!(normalize_index(6, 3), None);
1725    }
1726
1727    #[test]
1728    fn normalize_index_neg() {
1729        assert_eq!(normalize_index(-4, 6), Some(2));
1730        assert_eq!(normalize_index(-5, 5), Some(0));
1731        assert_eq!(normalize_index(-6, 3), None);
1732    }
1733}
pest/parser_state.rs

pest/
parser_state.rs