pest/parser_state.rs
1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10//! The core functionality of parsing grammar.
11//! State of parser during the process of rules handling.
12
13use alloc::borrow::ToOwned;
14use alloc::boxed::Box;
15use alloc::collections::BTreeSet;
16use alloc::rc::Rc;
17use alloc::string::String;
18use alloc::vec;
19use alloc::vec::Vec;
20use core::fmt::{Debug, Display, Formatter};
21use core::num::NonZeroUsize;
22use core::ops::Range;
23use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
24
25use crate::error::{Error, ErrorVariant};
26use crate::iterators::pairs::new;
27use crate::iterators::{pairs, QueueableToken};
28use crate::position::Position;
29use crate::span::Span;
30use crate::stack::Stack;
31use crate::RuleType;
32
33/// The current lookahead status of a [`ParserState`].
34///
35/// [`ParserState`]: struct.ParserState.html
36#[derive(Clone, Copy, Debug, Eq, PartialEq)]
37pub enum Lookahead {
38 /// The positive predicate, written as an ampersand &,
39 /// attempts to match its inner expression.
40 /// If the inner expression succeeds, parsing continues,
41 /// but at the same position as the predicate —
42 /// &foo ~ bar is thus a kind of "AND" statement:
43 /// "the input string must match foo AND bar".
44 /// If the inner expression fails,
45 /// the whole expression fails too.
46 Positive,
47 /// The negative predicate, written as an exclamation mark !,
48 /// attempts to match its inner expression.
49 /// If the inner expression fails, the predicate succeeds
50 /// and parsing continues at the same position as the predicate.
51 /// If the inner expression succeeds, the predicate fails —
52 /// !foo ~ bar is thus a kind of "NOT" statement:
53 /// "the input string must match bar but NOT foo".
54 Negative,
55 /// No lookahead (i.e. it will consume input).
56 None,
57}
58
59/// The current atomicity of a [`ParserState`].
60///
61/// [`ParserState`]: struct.ParserState.html
62#[derive(Clone, Copy, Debug, Eq, PartialEq)]
63pub enum Atomicity {
64 /// prevents implicit whitespace: inside an atomic rule,
65 /// the tilde ~ means "immediately followed by",
66 /// and repetition operators (asterisk * and plus sign +)
67 /// have no implicit separation. In addition, all other rules
68 /// called from an atomic rule are also treated as atomic.
69 /// (interior matching rules are silent)
70 Atomic,
71 /// The same as atomic, but inner tokens are produced as normal.
72 CompoundAtomic,
73 /// implicit whitespace is enabled
74 NonAtomic,
75}
76
77/// Type alias to simplify specifying the return value of chained closures.
78pub type ParseResult<S> = Result<S, S>;
79
80/// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`.
81#[derive(Clone, Copy, Debug, Eq, PartialEq)]
82pub enum MatchDir {
83 /// from the bottom to the top of the stack
84 BottomToTop,
85 /// from the top to the bottom of the stack
86 TopToBottom,
87}
88
89static CALL_LIMIT: AtomicUsize = AtomicUsize::new(0);
90
91/// Sets the maximum call limit for the parser state
92/// to prevent stack overflows or excessive execution times
93/// in some grammars.
94/// If set, the calls are tracked as a running total
95/// over all non-terminal rules that can nest closures
96/// (which are passed to transform the parser state).
97///
98/// # Arguments
99///
100/// * `limit` - The maximum number of calls. If None,
101/// the number of calls is unlimited.
102pub fn set_call_limit(limit: Option<NonZeroUsize>) {
103 CALL_LIMIT.store(limit.map(|f| f.get()).unwrap_or(0), Ordering::Relaxed);
104}
105
106static ERROR_DETAIL: AtomicBool = AtomicBool::new(false);
107
108/// Sets whether information for more error details
109/// should be collected. This is useful for debugging
110/// parser errors (as it leads to more comprehensive
111/// error messages), but it has a higher performance cost.
112/// (hence, it's off by default)
113///
114/// # Arguments
115///
116/// * `enabled` - Whether to enable the collection for
117/// more error details.
118pub fn set_error_detail(enabled: bool) {
119 ERROR_DETAIL.store(enabled, Ordering::Relaxed);
120}
121
122#[derive(Debug)]
123struct CallLimitTracker {
124 current_call_limit: Option<(usize, usize)>,
125}
126
127impl Default for CallLimitTracker {
128 fn default() -> Self {
129 let limit = CALL_LIMIT.load(Ordering::Relaxed);
130 let current_call_limit = if limit > 0 { Some((0, limit)) } else { None };
131 Self { current_call_limit }
132 }
133}
134
135impl CallLimitTracker {
136 fn limit_reached(&self) -> bool {
137 self.current_call_limit
138 .map_or(false, |(current, limit)| current >= limit)
139 }
140
141 fn increment_depth(&mut self) {
142 if let Some((current, _)) = &mut self.current_call_limit {
143 *current += 1;
144 }
145 }
146}
147
148/// Number of call stacks that may result from a sequence of rules parsing.
149const CALL_STACK_INITIAL_CAPACITY: usize = 20;
150/// Max (un)expected number of tokens that we may see on the parsing error position.
151const EXPECTED_TOKENS_INITIAL_CAPACITY: usize = 30;
152/// Max rule children number for which we'll extend calls stacks.
153///
154/// In case rule we're working with has too many children rules that failed in parsing,
155/// we don't want to store long stacks for all of them. If rule has more than this number
156/// of failed children, they all will be collapsed in a parent rule.
157const CALL_STACK_CHILDREN_THRESHOLD: usize = 4;
158
159/// Structure tracking errored parsing call (associated with specific `ParserState` function).
160#[derive(Debug, Hash, PartialEq, Eq, Clone, PartialOrd, Ord)]
161pub enum ParseAttempt<R> {
162 /// Call of `rule` errored.
163 Rule(R),
164 /// Call of token element (e.g., `match_string` or `match_insensitive`) errored.
165 /// Works as indicator of that leaf node is not a rule. In order to get the token value we
166 /// can address `ParseAttempts` `(un)expected_tokens`.
167 Token,
168}
169
170impl<R> ParseAttempt<R> {
171 pub fn get_rule(&self) -> Option<&R> {
172 match self {
173 ParseAttempt::Rule(r) => Some(r),
174 ParseAttempt::Token => None,
175 }
176 }
177}
178
179/// Rules call stack.
180/// Contains sequence of rule calls that resulted in new parsing attempt.
181#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
182pub struct RulesCallStack<R> {
183 /// Deepest rule caused a parsing error (ParseAttempt::Token transformed into a rule).
184 pub deepest: ParseAttempt<R>,
185 /// Most top rule covering `deepest`.
186 pub parent: Option<R>,
187}
188
189impl<R> RulesCallStack<R> {
190 fn new(deepest: ParseAttempt<R>) -> RulesCallStack<R> {
191 RulesCallStack {
192 deepest,
193 parent: None,
194 }
195 }
196}
197
198#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
199pub enum ParsingToken {
200 Sensitive { token: String },
201 Insensitive { token: String },
202 Range { start: char, end: char },
203 BuiltInRule,
204}
205
206impl Display for ParsingToken {
207 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
208 match self {
209 ParsingToken::Sensitive { token } => write!(f, "{token}"),
210 ParsingToken::Insensitive { token } => write!(f, "{}", token.to_uppercase()),
211 ParsingToken::Range { start, end } => write!(f, "{start}..{end}"),
212 ParsingToken::BuiltInRule => write!(f, "BUILTIN_RULE"),
213 }
214 }
215}
216
217/// Structure that tracks all the parsing attempts made on the max position.
218/// We want to give an error hint about parsing rules that succeeded
219/// at the farthest input position.
220/// The intuition is such rules will be most likely the query user initially wanted to write.
221#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
222pub struct ParseAttempts<R> {
223 /// Indicates whether the parsing attempts are tracked.
224 enabled: bool,
225 /// Vec of rule calls sequences awaiting tokens at the same `max_position`.
226 /// If there are several stacks in vec, it means all those rule stacks are "equal"
227 /// because their attempts occurred on the same position.
228 pub call_stacks: Vec<RulesCallStack<R>>,
229 /// Tokens that could be putted at `max_position`
230 /// in order to get a valid grammar query.
231 expected_tokens: Vec<ParsingToken>,
232 /// Tokens that we've prohibited to be putted at `max_position`
233 /// in order to get a valid grammar query.
234 unexpected_tokens: Vec<ParsingToken>,
235 /// Max position at which we were expecting to see one of `expected_tokens`.
236 pub max_position: usize,
237}
238
239impl<R: RuleType> ParseAttempts<R> {
240 /// Create new `ParseAttempts` instance with `call_stacks` and `expected_tokens`
241 /// initialized with capacity.
242 pub fn new() -> Self {
243 Self {
244 call_stacks: Vec::with_capacity(CALL_STACK_INITIAL_CAPACITY),
245 expected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
246 unexpected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
247 max_position: 0,
248 enabled: ERROR_DETAIL.load(Ordering::Relaxed),
249 }
250 }
251
252 /// Get number of currently present call stacks.
253 fn call_stacks_number(&self) -> usize {
254 self.call_stacks.len()
255 }
256
257 pub fn expected_tokens(&self) -> Vec<ParsingToken> {
258 self.expected_tokens
259 .iter()
260 .cloned()
261 .collect::<BTreeSet<_>>()
262 .into_iter()
263 .collect()
264 }
265
266 pub fn unexpected_tokens(&self) -> Vec<ParsingToken> {
267 self.unexpected_tokens
268 .iter()
269 .cloned()
270 .collect::<BTreeSet<_>>()
271 .into_iter()
272 .collect()
273 }
274
275 /// Retrieve call stacks.
276 pub fn call_stacks(&self) -> Vec<RulesCallStack<R>> {
277 self.call_stacks
278 .iter()
279 .cloned()
280 .collect::<BTreeSet<_>>()
281 .into_iter()
282 .collect()
283 }
284
285 /// In case we've tried to parse a rule, which start position is bigger than previous
286 /// `max_position` it means that we've advanced in our parsing and found better candidate.
287 ///
288 /// `start_index` is:
289 /// * Number of call stacks present in state at the moment current `rule` was called. The idea
290 /// is that we'd like to update only those stacks that originated from the current `rule` and
291 /// not from those that were called previously.
292 /// * 0 in case we've successfully parsed some token since the moment `rule` was called.
293 fn try_add_new_stack_rule(&mut self, rule: R, start_index: usize) {
294 let mut non_token_call_stacks = Vec::new();
295 let mut token_call_stack_met = false;
296 for call_stack in self.call_stacks.iter().skip(start_index) {
297 if matches!(call_stack.deepest, ParseAttempt::Token) {
298 token_call_stack_met = true;
299 } else {
300 non_token_call_stacks.push(call_stack.clone())
301 }
302 }
303 if token_call_stack_met && non_token_call_stacks.is_empty() {
304 // If `non_token_call_stacks` is not empty we wouldn't like to add a new standalone
305 // `RulesCallStack::new(ParseAttempt::Token)` (that will later be transformed into a
306 // rule) as soon as it doesn't give us any useful additional info.
307 non_token_call_stacks.push(RulesCallStack::new(ParseAttempt::Token));
308 }
309 self.call_stacks
310 .splice(start_index.., non_token_call_stacks);
311
312 let children_number_over_threshold =
313 self.call_stacks_number() - start_index >= CALL_STACK_CHILDREN_THRESHOLD;
314 if children_number_over_threshold {
315 self.call_stacks.truncate(start_index);
316 self.call_stacks
317 .push(RulesCallStack::new(ParseAttempt::Rule(rule)));
318 } else {
319 for call_stack in self.call_stacks.iter_mut().skip(start_index) {
320 if matches!(call_stack.deepest, ParseAttempt::Token) {
321 call_stack.deepest = ParseAttempt::Rule(rule);
322 } else {
323 call_stack.parent = Some(rule);
324 }
325 }
326 }
327 }
328
329 /// If `expected` flag is set to false, it means we've successfully parsed token being in the
330 /// state of negative lookahead and want to track `token` in the `unexpected_tokens`. Otherwise,
331 /// we want to track it the `expected_tokens`. Let's call chosen vec a `target_vec`.
332 ///
333 /// In case `position` is:
334 /// * Equal to `max_position`, add `token` to `target_vec`,
335 /// * Bigger than `max_position`, set `token` as the only new element of `target_vec`.
336 #[allow(clippy::comparison_chain)]
337 fn try_add_new_token(
338 &mut self,
339 token: ParsingToken,
340 start_position: usize,
341 position: usize,
342 negative_lookahead: bool,
343 ) {
344 let target_vec_push_token = |attempts: &mut ParseAttempts<R>| {
345 let target_vec = if negative_lookahead {
346 &mut attempts.unexpected_tokens
347 } else {
348 &mut attempts.expected_tokens
349 };
350 target_vec.push(token);
351 };
352
353 if position > self.max_position {
354 if negative_lookahead && start_position > self.max_position {
355 // We encountered a sequence under negative lookahead.
356 // We would like to track only first failed token in this sequence (which
357 // `start_position` should be equal to `self.max_position`).
358 return;
359 }
360 target_vec_push_token(self);
361
362 if negative_lookahead {
363 // In case of successful parsing of token under negative lookahead the only
364 // thing we'd like to do is to track the token in the `unexpected_tokens`.
365 return;
366 }
367 self.max_position = position;
368 self.expected_tokens.clear();
369 self.unexpected_tokens.clear();
370 self.call_stacks.clear();
371 self.call_stacks
372 .push(RulesCallStack::new(ParseAttempt::Token));
373 } else if position == self.max_position {
374 target_vec_push_token(self);
375 self.call_stacks
376 .push(RulesCallStack::new(ParseAttempt::Token));
377 }
378 }
379
380 /// Reset state in case we've successfully parsed some token in
381 /// `match_string` or `match_insensetive`.
382 fn nullify_expected_tokens(&mut self, new_max_position: usize) {
383 self.call_stacks.clear();
384 self.expected_tokens.clear();
385 self.unexpected_tokens.clear();
386 self.max_position = new_max_position;
387 }
388}
389
390impl<R: RuleType> Default for ParseAttempts<R> {
391 fn default() -> Self {
392 Self::new()
393 }
394}
395
396/// The complete state of a [`Parser`].
397///
398/// [`Parser`]: trait.Parser.html
399#[derive(Debug)]
400pub struct ParserState<'i, R: RuleType> {
401 /// Current position from which we try to apply some parser function.
402 /// Initially is 0.
403 /// E.g., we are parsing `create user 'Bobby'` query, we parsed "create" via `match_insensitive`
404 /// and switched our `position` from 0 to the length of "create".
405 ///
406 /// E.g., see `match_string` -> `self.position.match_string(string)` which updates `self.pos`.
407 position: Position<'i>,
408 /// Queue representing rules partially (`QueueableToken::Start`) and
409 /// totally (`QueueableToken::End`) parsed. When entering rule we put it in the queue in a state
410 /// of `Start` and after all its sublogic (subrules or strings) are parsed, we change it to
411 /// `End` state.
412 queue: Vec<QueueableToken<'i, R>>,
413 /// Status set in case specific lookahead logic is used in grammar.
414 /// See `Lookahead` for more information.
415 lookahead: Lookahead,
416 /// Rules that we HAVE expected, tried to parse, but failed.
417 pos_attempts: Vec<R>,
418 /// Rules that we have NOT expected, tried to parse, but failed.
419 neg_attempts: Vec<R>,
420 /// Max position in the query from which we've tried to parse some rule but failed.
421 attempt_pos: usize,
422 /// Current atomicity status. For more information see `Atomicity`.
423 atomicity: Atomicity,
424 /// Helper structure tracking `Stack` status (used in case grammar contains stack PUSH/POP
425 /// invocations).
426 stack: Stack<Span<'i>>,
427 /// Used for setting max parser calls limit.
428 call_tracker: CallLimitTracker,
429 /// Together with tracking of `pos_attempts` and `attempt_pos`
430 /// as a pair of (list of rules that we've tried to parse but failed, max parsed position)
431 /// we track those rules (which we've tried to parse at the same max pos) at this helper struct.
432 ///
433 /// Note, that we may try to parse several rules on different positions. We want to track only
434 /// those rules, which attempt position is bigger, because we consider that it's nearer to the
435 /// query that user really wanted to pass.
436 ///
437 /// E.g. we have a query `create user "Bobby"` and two root rules:
438 /// * CreateUser = { "create" ~ "user" ~ Name }
439 /// * CreateTable = { "create" ~ "table" ~ Name }
440 /// * Name = { SOME_DEFINITION }
441 ///
442 /// While parsing the query we'll update tracker position to the start of "Bobby", because we'd
443 /// successfully parse "create" + "user" (and not "table").
444 parse_attempts: ParseAttempts<R>,
445}
446
447/// Creates a `ParserState` from a `&str`, supplying it to a closure `f`.
448///
449/// # Examples
450///
451/// ```
452/// # use pest;
453/// let input = "";
454/// pest::state::<(), _>(input, |s| Ok(s)).unwrap();
455/// ```
456#[allow(clippy::perf)]
457pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>>
458where
459 F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,
460{
461 let state = ParserState::new(input);
462
463 match f(state) {
464 Ok(state) => {
465 let len = state.queue.len();
466 Ok(new(Rc::new(state.queue), input, None, 0, len))
467 }
468 Err(mut state) => {
469 let variant = if state.reached_call_limit() {
470 ErrorVariant::CustomError {
471 message: "call limit reached".to_owned(),
472 }
473 } else {
474 state.pos_attempts.sort();
475 state.pos_attempts.dedup();
476 state.neg_attempts.sort();
477 state.neg_attempts.dedup();
478 ErrorVariant::ParsingError {
479 positives: state.pos_attempts.clone(),
480 negatives: state.neg_attempts.clone(),
481 }
482 };
483
484 if state.parse_attempts.enabled {
485 Err(Error::new_from_pos_with_parsing_attempts(
486 variant,
487 Position::new_internal(input, state.attempt_pos),
488 state.parse_attempts.clone(),
489 ))
490 } else {
491 Err(Error::new_from_pos(
492 variant,
493 Position::new_internal(input, state.attempt_pos),
494 ))
495 }
496 }
497 }
498}
499
500impl<'i, R: RuleType> ParserState<'i, R> {
501 /// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box`
502 /// will be passed from closure to closure based on the needs of the specified `Parser`.
503 ///
504 /// # Examples
505 ///
506 /// ```
507 /// # use pest;
508 /// let input = "";
509 /// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input);
510 /// ```
511 pub fn new(input: &'i str) -> Box<Self> {
512 Box::new(ParserState {
513 position: Position::from_start(input),
514 queue: vec![],
515 lookahead: Lookahead::None,
516 pos_attempts: vec![],
517 neg_attempts: vec![],
518 attempt_pos: 0,
519 atomicity: Atomicity::NonAtomic,
520 stack: Stack::new(),
521 call_tracker: Default::default(),
522 parse_attempts: ParseAttempts::new(),
523 })
524 }
525
526 /// Get all parse attempts after process of parsing is finished.
527 pub fn get_parse_attempts(&self) -> &ParseAttempts<R> {
528 &self.parse_attempts
529 }
530
531 /// Returns a reference to the current `Position` of the `ParserState`.
532 ///
533 /// # Examples
534 ///
535 /// ```
536 /// # use pest;
537 /// # #[allow(non_camel_case_types)]
538 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
539 /// enum Rule {
540 /// ab
541 /// }
542 ///
543 /// let input = "ab";
544 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
545 /// let position = state.position();
546 /// assert_eq!(position.pos(), 0);
547 /// ```
548 pub fn position(&self) -> &Position<'i> {
549 &self.position
550 }
551
552 /// Returns the current atomicity of the `ParserState`.
553 ///
554 /// # Examples
555 ///
556 /// ```
557 /// # use pest;
558 /// # use pest::Atomicity;
559 /// # #[allow(non_camel_case_types)]
560 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
561 /// enum Rule {
562 /// ab
563 /// }
564 ///
565 /// let input = "ab";
566 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
567 /// let atomicity = state.atomicity();
568 /// assert_eq!(atomicity, Atomicity::NonAtomic);
569 /// ```
570 pub fn atomicity(&self) -> Atomicity {
571 self.atomicity
572 }
573
574 #[inline]
575 fn inc_call_check_limit(mut self: Box<Self>) -> ParseResult<Box<Self>> {
576 if self.call_tracker.limit_reached() {
577 return Err(self);
578 }
579 self.call_tracker.increment_depth();
580 Ok(self)
581 }
582
583 #[inline]
584 fn reached_call_limit(&self) -> bool {
585 self.call_tracker.limit_reached()
586 }
587
588 /// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure
589 /// meant to match the rule.
590 ///
591 /// # Examples
592 ///
593 /// ```
594 /// # use pest;
595 /// # #[allow(non_camel_case_types)]
596 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
597 /// enum Rule {
598 /// a
599 /// }
600 ///
601 /// let input = "a";
602 /// let pairs: Vec<_> = pest::state(input, |state| {
603 /// state.rule(Rule::a, |s| Ok(s))
604 /// }).unwrap().collect();
605 ///
606 /// assert_eq!(pairs.len(), 1);
607 /// ```
608 #[inline]
609 pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>>
610 where
611 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
612 {
613 self = self.inc_call_check_limit()?;
614 // Position from which this `rule` starts parsing.
615 let actual_pos = self.position.pos();
616 // Remember index of the `self.queue` element that will be associated with this `rule`.
617 let index = self.queue.len();
618
619 let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos {
620 (self.pos_attempts.len(), self.neg_attempts.len())
621 } else {
622 // Attempts have not been cleared yet since the attempt_pos is older.
623 (0, 0)
624 };
625
626 if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic {
627 // Pair's position will only be known after running the closure.
628 self.queue.push(QueueableToken::Start {
629 end_token_index: 0,
630 input_pos: actual_pos,
631 });
632 }
633
634 // Remember attempts number before `f` call.
635 // In `track` using this variable we can say, how many attempts were added
636 // during children rules traversal.
637 let attempts = self.attempts_at(actual_pos);
638 // Number of call stacks present in `self.parse_attempts` before `f` call.
639 // We need to remember this number only in case there wasn't found any farther attempt.
640 // E.g. we are handling rule, on start position of which may be tested two
641 // children rules. At the moment we'll return from `f` call below,
642 // there will be two more children rules in `self.parse_attempts` that we'll
643 // consider to be the children of current `rule`.
644 let mut remember_call_stacks_number = self.parse_attempts.call_stacks_number();
645 // Max parsing attempt position at the moment of `rule` handling.
646 // It case it's raised during children rules handling, it means
647 // we've made a parsing progress.
648 let remember_max_position = self.parse_attempts.max_position;
649
650 let result = f(self);
651
652 let mut try_add_rule_to_stack = |new_state: &mut Box<ParserState<'_, R>>| {
653 if new_state.parse_attempts.max_position > remember_max_position {
654 // It means that one of `match_string` or e.g. `match_insensetive` function calls
655 // have already erased `self.parse_attempts.call_stacks` and that previously
656 // remembered values are not valid anymore.
657 remember_call_stacks_number = 0;
658 }
659 if !matches!(new_state.atomicity, Atomicity::Atomic) {
660 new_state
661 .parse_attempts
662 .try_add_new_stack_rule(rule, remember_call_stacks_number);
663 }
664 };
665
666 match result {
667 Ok(mut new_state) => {
668 if new_state.lookahead == Lookahead::Negative {
669 new_state.track(
670 rule,
671 actual_pos,
672 pos_attempts_index,
673 neg_attempts_index,
674 attempts,
675 );
676 }
677
678 if new_state.lookahead == Lookahead::None
679 && new_state.atomicity != Atomicity::Atomic
680 {
681 // Index of `QueueableToken::End` token added below
682 // that corresponds to previously added `QueueableToken::Start` token.
683 let new_index = new_state.queue.len();
684 match new_state.queue[index] {
685 QueueableToken::Start {
686 ref mut end_token_index,
687 ..
688 } => *end_token_index = new_index,
689 _ => unreachable!(),
690 };
691
692 let new_pos = new_state.position.pos();
693
694 new_state.queue.push(QueueableToken::End {
695 start_token_index: index,
696 rule,
697 tag: None,
698 input_pos: new_pos,
699 });
700 }
701
702 // Note, that we need to count positive parsing results too, because we can fail in
703 // optional rule call inside which may lie the farthest
704 // parsed token.
705 if new_state.parse_attempts.enabled {
706 try_add_rule_to_stack(&mut new_state);
707 }
708 Ok(new_state)
709 }
710 Err(mut new_state) => {
711 if new_state.lookahead != Lookahead::Negative {
712 new_state.track(
713 rule,
714 actual_pos,
715 pos_attempts_index,
716 neg_attempts_index,
717 attempts,
718 );
719 if new_state.parse_attempts.enabled {
720 try_add_rule_to_stack(&mut new_state);
721 }
722 }
723
724 if new_state.lookahead == Lookahead::None
725 && new_state.atomicity != Atomicity::Atomic
726 {
727 new_state.queue.truncate(index);
728 }
729
730 Err(new_state)
731 }
732 }
733 }
734
735 /// Tag current node
736 ///
737 /// # Examples
738 ///
739 /// Try to recognize the one specified in a set of characters
740 ///
741 /// ```
742 /// use pest::{state, ParseResult, ParserState, iterators::Pair};
743 /// #[allow(non_camel_case_types)]
744 /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
745 /// enum Rule {
746 /// character,
747 /// }
748 /// fn mark_c(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
749 /// state.sequence(|state| {
750 /// character(state)
751 /// .and_then(|state| character(state))
752 /// .and_then(|state| character(state))
753 /// .and_then(|state| state.tag_node("c"))
754 /// .and_then(|state| character(state))
755 /// })
756 /// }
757 /// fn character(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
758 /// state.rule(Rule::character, |state| state.match_range('a'..'z'))
759 /// }
760 ///
761 /// let input = "abcd";
762 /// let pairs = state(input, mark_c).unwrap();
763 /// // find all node tag as `c`
764 /// let find: Vec<Pair<Rule>> = pairs.filter(|s| s.as_node_tag() == Some("c")).collect();
765 /// assert_eq!(find[0].as_str(), "c")
766 /// ```
767 #[inline]
768 pub fn tag_node(mut self: Box<Self>, tag: &'i str) -> ParseResult<Box<Self>> {
769 if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() {
770 *old = Some(tag)
771 }
772 Ok(self)
773 }
774
775 /// Get number of allowed rules attempts + prohibited rules attempts.
776 fn attempts_at(&self, pos: usize) -> usize {
777 if self.attempt_pos == pos {
778 self.pos_attempts.len() + self.neg_attempts.len()
779 } else {
780 0
781 }
782 }
783
784 fn track(
785 &mut self,
786 rule: R,
787 pos: usize,
788 pos_attempts_index: usize,
789 neg_attempts_index: usize,
790 prev_attempts: usize,
791 ) {
792 if self.atomicity == Atomicity::Atomic {
793 return;
794 }
795
796 // If nested rules made no progress, there is no use to report them; it's only useful to
797 // track the current rule, the exception being when only one attempt has been made during
798 // the children rules.
799 let curr_attempts = self.attempts_at(pos);
800 if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 {
801 return;
802 }
803
804 if pos == self.attempt_pos {
805 self.pos_attempts.truncate(pos_attempts_index);
806 self.neg_attempts.truncate(neg_attempts_index);
807 }
808
809 if pos > self.attempt_pos {
810 self.pos_attempts.clear();
811 self.neg_attempts.clear();
812 self.attempt_pos = pos;
813 }
814
815 let attempts = if self.lookahead != Lookahead::Negative {
816 &mut self.pos_attempts
817 } else {
818 &mut self.neg_attempts
819 };
820
821 if pos == self.attempt_pos {
822 attempts.push(rule);
823 }
824 }
825
826 /// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns
827 /// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current
828 /// `Box<ParserState>` otherwise.
829 ///
830 /// This method is useful to parse sequences that only match together which usually come in the
831 /// form of chained `Result`s with
832 /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then).
833 ///
834 ///
835 /// # Examples
836 ///
837 /// ```
838 /// # use pest;
839 /// # #[allow(non_camel_case_types)]
840 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
841 /// enum Rule {
842 /// a
843 /// }
844 ///
845 /// let input = "a";
846 /// let pairs: Vec<_> = pest::state(input, |state| {
847 /// state.sequence(|s| {
848 /// s.rule(Rule::a, |s| Ok(s)).and_then(|s| {
849 /// s.match_string("b")
850 /// })
851 /// }).or_else(|s| {
852 /// Ok(s)
853 /// })
854 /// }).unwrap().collect();
855 ///
856 /// assert_eq!(pairs.len(), 0);
857 /// ```
858 #[inline]
859 pub fn sequence<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
860 where
861 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
862 {
863 self = self.inc_call_check_limit()?;
864 let token_index = self.queue.len();
865 let initial_pos = self.position;
866
867 let result = f(self);
868
869 match result {
870 Ok(new_state) => Ok(new_state),
871 Err(mut new_state) => {
872 // Restore the initial position and truncate the token queue.
873 new_state.position = initial_pos;
874 new_state.queue.truncate(token_index);
875 Err(new_state)
876 }
877 }
878 }
879
880 /// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns
881 /// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`.
882 ///
883 /// # Examples
884 ///
885 /// ```
886 /// # use pest;
887 /// # #[allow(non_camel_case_types)]
888 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
889 /// enum Rule {
890 /// ab
891 /// }
892 ///
893 /// let input = "aab";
894 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
895 /// let mut result = state.repeat(|s| {
896 /// s.match_string("a")
897 /// });
898 /// assert!(result.is_ok());
899 /// assert_eq!(result.unwrap().position().pos(), 2);
900 ///
901 /// state = pest::ParserState::new(input);
902 /// result = state.repeat(|s| {
903 /// s.match_string("b")
904 /// });
905 /// assert!(result.is_ok());
906 /// assert_eq!(result.unwrap().position().pos(), 0);
907 /// ```
908 #[inline]
909 pub fn repeat<F>(mut self: Box<Self>, mut f: F) -> ParseResult<Box<Self>>
910 where
911 F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,
912 {
913 self = self.inc_call_check_limit()?;
914 let mut result = f(self);
915
916 loop {
917 match result {
918 Ok(state) => result = f(state),
919 Err(state) => return Ok(state),
920 };
921 }
922 }
923
924 /// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns
925 /// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`.
926 ///
927 /// # Examples
928 ///
929 /// ```
930 /// # use pest;
931 /// # #[allow(non_camel_case_types)]
932 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
933 /// enum Rule {
934 /// ab
935 /// }
936 ///
937 /// let input = "ab";
938 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
939 /// let result = state.optional(|s| {
940 /// s.match_string("ab")
941 /// });
942 /// assert!(result.is_ok());
943 ///
944 /// state = pest::ParserState::new(input);
945 /// let result = state.optional(|s| {
946 /// s.match_string("ac")
947 /// });
948 /// assert!(result.is_ok());
949 /// ```
950 #[inline]
951 pub fn optional<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
952 where
953 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
954 {
955 self = self.inc_call_check_limit()?;
956 match f(self) {
957 Ok(state) | Err(state) => Ok(state),
958 }
959 }
960
961 /// Generic function to handle result of char/string/range parsing
962 /// in order to track (un)expected tokens.
963 fn handle_token_parse_result(
964 &mut self,
965 start_position: usize,
966 token: ParsingToken,
967 parse_succeeded: bool,
968 ) {
969 // New position after tracked parsed element for case of `parse_succeded` is true.
970 // Position of parsing failure otherwise.
971 let current_pos = self.position.pos();
972
973 if parse_succeeded {
974 if self.lookahead == Lookahead::Negative {
975 self.parse_attempts
976 .try_add_new_token(token, start_position, current_pos, true);
977 } else if current_pos > self.parse_attempts.max_position {
978 self.parse_attempts.nullify_expected_tokens(current_pos);
979 }
980 } else if self.lookahead != Lookahead::Negative {
981 self.parse_attempts
982 .try_add_new_token(token, start_position, current_pos, false);
983 }
984 }
985
986 /// Attempts to match a single character based on a filter function. Returns `Ok` with the
987 /// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>`
988 /// otherwise.
989 ///
990 /// # Examples
991 ///
992 /// ```
993 /// # use pest;
994 /// # #[allow(non_camel_case_types)]
995 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
996 /// enum Rule {}
997 ///
998 /// let input = "ab";
999 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1000 /// let result = state.match_char_by(|c| c.is_ascii());
1001 /// assert!(result.is_ok());
1002 /// assert_eq!(result.unwrap().position().pos(), 1);
1003 ///
1004 /// let input = "❤";
1005 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1006 /// let result = state.match_char_by(|c| c.is_ascii());
1007 /// assert!(result.is_err());
1008 /// assert_eq!(result.unwrap_err().position().pos(), 0);
1009 /// ```
1010 #[inline]
1011 pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1012 where
1013 F: FnOnce(char) -> bool,
1014 {
1015 let start_position = self.position.pos();
1016 let succeeded = self.position.match_char_by(f);
1017 if self.parse_attempts.enabled {
1018 let token = ParsingToken::BuiltInRule;
1019 self.handle_token_parse_result(start_position, token, succeeded);
1020 }
1021 if succeeded {
1022 Ok(self)
1023 } else {
1024 Err(self)
1025 }
1026 }
1027
1028 /// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if
1029 /// successful, or `Err` with the updated `Box<ParserState>` otherwise.
1030 ///
1031 /// # Examples
1032 ///
1033 /// ```
1034 /// # use pest;
1035 /// # #[allow(non_camel_case_types)]
1036 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1037 /// enum Rule {}
1038 ///
1039 /// let input = "ab";
1040 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1041 /// let mut result = state.match_string("ab");
1042 /// assert!(result.is_ok());
1043 /// assert_eq!(result.unwrap().position().pos(), 2);
1044 ///
1045 /// state = pest::ParserState::new(input);
1046 /// result = state.match_string("ac");
1047 /// assert!(result.is_err());
1048 /// assert_eq!(result.unwrap_err().position().pos(), 0);
1049 /// ```
1050 #[inline]
1051 pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
1052 let start_position = self.position.pos();
1053 let succeeded = self.position.match_string(string);
1054 if self.parse_attempts.enabled {
1055 let token = ParsingToken::Sensitive {
1056 token: String::from(string),
1057 };
1058 self.handle_token_parse_result(start_position, token, succeeded);
1059 }
1060 if succeeded {
1061 Ok(self)
1062 } else {
1063 Err(self)
1064 }
1065 }
1066
1067 /// Attempts to case-insensitively match the given string. Returns `Ok` with the updated
1068 /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1069 ///
1070 /// # Examples
1071 ///
1072 /// ```
1073 /// # use pest;
1074 /// # #[allow(non_camel_case_types)]
1075 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1076 /// enum Rule {}
1077 ///
1078 /// let input = "ab";
1079 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1080 /// let mut result = state.match_insensitive("AB");
1081 /// assert!(result.is_ok());
1082 /// assert_eq!(result.unwrap().position().pos(), 2);
1083 ///
1084 /// state = pest::ParserState::new(input);
1085 /// result = state.match_insensitive("AC");
1086 /// assert!(result.is_err());
1087 /// assert_eq!(result.unwrap_err().position().pos(), 0);
1088 /// ```
1089 #[inline]
1090 pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
1091 let start_position: usize = self.position().pos();
1092 let succeeded = self.position.match_insensitive(string);
1093 if self.parse_attempts.enabled {
1094 let token = ParsingToken::Insensitive {
1095 token: String::from(string),
1096 };
1097 self.handle_token_parse_result(start_position, token, succeeded);
1098 }
1099 if succeeded {
1100 Ok(self)
1101 } else {
1102 Err(self)
1103 }
1104 }
1105
1106 /// Attempts to match a single character from the given range. Returns `Ok` with the updated
1107 /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1108 ///
1109 /// # Caution
1110 /// The provided `range` is interpreted as inclusive.
1111 ///
1112 /// # Examples
1113 ///
1114 /// ```
1115 /// # use pest;
1116 /// # #[allow(non_camel_case_types)]
1117 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1118 /// enum Rule {}
1119 ///
1120 /// let input = "ab";
1121 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1122 /// let mut result = state.match_range('a'..'z');
1123 /// assert!(result.is_ok());
1124 /// assert_eq!(result.unwrap().position().pos(), 1);
1125 ///
1126 /// state = pest::ParserState::new(input);
1127 /// result = state.match_range('A'..'Z');
1128 /// assert!(result.is_err());
1129 /// assert_eq!(result.unwrap_err().position().pos(), 0);
1130 /// ```
1131 #[inline]
1132 pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> {
1133 let start_position = self.position().pos();
1134 let token = ParsingToken::Range {
1135 start: range.start,
1136 end: range.end,
1137 };
1138 let succeeded = self.position.match_range(range);
1139 if self.parse_attempts.enabled {
1140 self.handle_token_parse_result(start_position, token, succeeded);
1141 }
1142 if succeeded {
1143 Ok(self)
1144 } else {
1145 Err(self)
1146 }
1147 }
1148
1149 /// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>`
1150 /// if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1151 ///
1152 /// # Examples
1153 ///
1154 /// ```
1155 /// # use pest;
1156 /// # #[allow(non_camel_case_types)]
1157 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1158 /// enum Rule {}
1159 ///
1160 /// let input = "ab";
1161 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1162 /// let mut result = state.skip(1);
1163 /// assert!(result.is_ok());
1164 /// assert_eq!(result.unwrap().position().pos(), 1);
1165 ///
1166 /// state = pest::ParserState::new(input);
1167 /// result = state.skip(3);
1168 /// assert!(result.is_err());
1169 /// assert_eq!(result.unwrap_err().position().pos(), 0);
1170 /// ```
1171 #[inline]
1172 pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> {
1173 if self.position.skip(n) {
1174 Ok(self)
1175 } else {
1176 Err(self)
1177 }
1178 }
1179
1180 /// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the
1181 /// updated `Box<ParserState>` whether or not one of the strings is found.
1182 ///
1183 /// # Examples
1184 ///
1185 /// ```
1186 /// # use pest;
1187 /// # #[allow(non_camel_case_types)]
1188 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1189 /// enum Rule {}
1190 ///
1191 /// let input = "abcd";
1192 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1193 /// let mut result = state.skip_until(&["c", "d"]);
1194 /// assert!(result.is_ok());
1195 /// assert_eq!(result.unwrap().position().pos(), 2);
1196 /// ```
1197 #[inline]
1198 pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> {
1199 self.position.skip_until(strings);
1200 Ok(self)
1201 }
1202
1203 /// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>`
1204 /// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise.
1205 ///
1206 /// # Examples
1207 ///
1208 /// ```
1209 /// # use pest;
1210 /// # #[allow(non_camel_case_types)]
1211 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1212 /// enum Rule {}
1213 ///
1214 /// let input = "ab";
1215 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1216 /// let mut result = state.start_of_input();
1217 /// assert!(result.is_ok());
1218 ///
1219 /// state = pest::ParserState::new(input);
1220 /// state = state.match_string("ab").unwrap();
1221 /// result = state.start_of_input();
1222 /// assert!(result.is_err());
1223 /// ```
1224 #[inline]
1225 pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
1226 if self.position.at_start() {
1227 Ok(self)
1228 } else {
1229 Err(self)
1230 }
1231 }
1232
1233 /// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if
1234 /// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise.
1235 ///
1236 /// # Examples
1237 ///
1238 /// ```
1239 /// # use pest;
1240 /// # #[allow(non_camel_case_types)]
1241 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1242 /// enum Rule {}
1243 ///
1244 /// let input = "ab";
1245 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1246 /// let mut result = state.end_of_input();
1247 /// assert!(result.is_err());
1248 ///
1249 /// state = pest::ParserState::new(input);
1250 /// state = state.match_string("ab").unwrap();
1251 /// result = state.end_of_input();
1252 /// assert!(result.is_ok());
1253 /// ```
1254 #[inline]
1255 pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
1256 if self.position.at_end() {
1257 Ok(self)
1258 } else {
1259 Err(self)
1260 }
1261 }
1262
1263 /// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns
1264 /// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current
1265 /// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err`
1266 /// together, negating the `Result`.
1267 ///
1268 /// # Examples
1269 ///
1270 /// ```
1271 /// # use pest;
1272 /// # #[allow(non_camel_case_types)]
1273 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1274 /// enum Rule {
1275 /// a
1276 /// }
1277 ///
1278 /// let input = "a";
1279 /// let pairs: Vec<_> = pest::state(input, |state| {
1280 /// state.lookahead(true, |state| {
1281 /// state.rule(Rule::a, |s| Ok(s))
1282 /// })
1283 /// }).unwrap().collect();
1284 ///
1285 /// assert_eq!(pairs.len(), 0);
1286 /// ```
1287 #[inline]
1288 pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>>
1289 where
1290 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1291 {
1292 self = self.inc_call_check_limit()?;
1293 let initial_lookahead = self.lookahead;
1294
1295 self.lookahead = if is_positive {
1296 match initial_lookahead {
1297 Lookahead::None | Lookahead::Positive => Lookahead::Positive,
1298 Lookahead::Negative => Lookahead::Negative,
1299 }
1300 } else {
1301 match initial_lookahead {
1302 Lookahead::None | Lookahead::Positive => Lookahead::Negative,
1303 Lookahead::Negative => Lookahead::Positive,
1304 }
1305 };
1306
1307 let initial_pos = self.position;
1308
1309 let result = f(self.checkpoint());
1310
1311 let result_state = match result {
1312 Ok(mut new_state) => {
1313 new_state.position = initial_pos;
1314 new_state.lookahead = initial_lookahead;
1315 Ok(new_state.restore())
1316 }
1317 Err(mut new_state) => {
1318 new_state.position = initial_pos;
1319 new_state.lookahead = initial_lookahead;
1320 Err(new_state.restore())
1321 }
1322 };
1323
1324 if is_positive {
1325 result_state
1326 } else {
1327 match result_state {
1328 Ok(state) => Err(state),
1329 Err(state) => Ok(state),
1330 }
1331 }
1332 }
1333
1334 /// Transformation which stops `Token`s from being generated according to `is_atomic`.
1335 /// Used as wrapper over `rule` (or even another `atomic`) call.
1336 ///
1337 /// # Examples
1338 ///
1339 /// ```
1340 /// # use pest::{self, Atomicity};
1341 /// # #[allow(non_camel_case_types)]
1342 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1343 /// enum Rule {
1344 /// a
1345 /// }
1346 ///
1347 /// let input = "a";
1348 /// let pairs: Vec<_> = pest::state(input, |state| {
1349 /// state.atomic(Atomicity::Atomic, |s| {
1350 /// s.rule(Rule::a, |s| Ok(s))
1351 /// })
1352 /// }).unwrap().collect();
1353 ///
1354 /// assert_eq!(pairs.len(), 0);
1355 /// ```
1356 #[inline]
1357 pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>>
1358 where
1359 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1360 {
1361 self = self.inc_call_check_limit()?;
1362 // In case child parsing call is another `atomic` it will have its own atomicity status.
1363 let initial_atomicity = self.atomicity;
1364 // In case child atomicity is the same as we've demanded, we shouldn't do nothing.
1365 // E.g. we have the following rules:
1366 // * RootRule = @{ InnerRule }
1367 // * InnerRule = @{ ... }
1368 let should_toggle = self.atomicity != atomicity;
1369
1370 // Note that we take atomicity of the top rule and not of the leaf (inner).
1371 if should_toggle {
1372 self.atomicity = atomicity;
1373 }
1374
1375 let result = f(self);
1376
1377 match result {
1378 Ok(mut new_state) => {
1379 if should_toggle {
1380 new_state.atomicity = initial_atomicity;
1381 }
1382 Ok(new_state)
1383 }
1384 Err(mut new_state) => {
1385 if should_toggle {
1386 new_state.atomicity = initial_atomicity;
1387 }
1388 Err(new_state)
1389 }
1390 }
1391 }
1392
1393 /// Evaluates the result of closure `f` and pushes the span of the input consumed from before
1394 /// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is
1395 /// called successfully, or `Err(Box<ParserState>)` otherwise.
1396 ///
1397 /// # Examples
1398 ///
1399 /// ```
1400 /// # use pest;
1401 /// # #[allow(non_camel_case_types)]
1402 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1403 /// enum Rule {}
1404 ///
1405 /// let input = "ab";
1406 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1407 /// let mut result = state.stack_push(|state| state.match_string("a"));
1408 /// assert!(result.is_ok());
1409 /// assert_eq!(result.unwrap().position().pos(), 1);
1410 /// ```
1411 #[inline]
1412 pub fn stack_push<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1413 where
1414 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1415 {
1416 self = self.inc_call_check_limit()?;
1417 let start = self.position;
1418
1419 let result = f(self);
1420
1421 match result {
1422 Ok(mut state) => {
1423 let end = state.position;
1424 state.stack.push(start.span(&end));
1425 Ok(state)
1426 }
1427 Err(state) => Err(state),
1428 }
1429 }
1430
1431 /// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1432 /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1433 ///
1434 /// # Examples
1435 ///
1436 /// ```
1437 /// # use pest;
1438 /// # #[allow(non_camel_case_types)]
1439 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1440 /// enum Rule {}
1441 ///
1442 /// let input = "aa";
1443 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1444 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1445 /// |state| state.stack_peek()
1446 /// );
1447 /// assert!(result.is_ok());
1448 /// assert_eq!(result.unwrap().position().pos(), 2);
1449 /// ```
1450 #[inline]
1451 pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1452 let string = self
1453 .stack
1454 .peek()
1455 .expect("peek was called on empty stack")
1456 .as_str();
1457 self.match_string(string)
1458 }
1459
1460 /// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1461 /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1462 ///
1463 /// # Examples
1464 ///
1465 /// ```
1466 /// # use pest;
1467 /// # #[allow(non_camel_case_types)]
1468 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1469 /// enum Rule {}
1470 ///
1471 /// let input = "aa";
1472 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1473 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1474 /// |state| state.stack_pop()
1475 /// );
1476 /// assert!(result.is_ok());
1477 /// assert_eq!(result.unwrap().position().pos(), 2);
1478 /// ```
1479 #[inline]
1480 pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1481 let string = self
1482 .stack
1483 .pop()
1484 .expect("pop was called on empty stack")
1485 .as_str();
1486 self.match_string(string)
1487 }
1488
1489 /// Matches part of the state of the stack.
1490 ///
1491 /// # Examples
1492 ///
1493 /// ```
1494 /// # use pest::{self, MatchDir};
1495 /// # #[allow(non_camel_case_types)]
1496 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1497 /// enum Rule {}
1498 ///
1499 /// let input = "abcd cd cb";
1500 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1501 /// let mut result = state
1502 /// .stack_push(|state| state.match_string("a"))
1503 /// .and_then(|state| state.stack_push(|state| state.match_string("b")))
1504 /// .and_then(|state| state.stack_push(|state| state.match_string("c")))
1505 /// .and_then(|state| state.stack_push(|state| state.match_string("d")))
1506 /// .and_then(|state| state.match_string(" "))
1507 /// .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop))
1508 /// .and_then(|state| state.match_string(" "))
1509 /// .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom));
1510 /// assert!(result.is_ok());
1511 /// assert_eq!(result.unwrap().position().pos(), 10);
1512 /// ```
1513 #[inline]
1514 pub fn stack_match_peek_slice(
1515 mut self: Box<Self>,
1516 start: i32,
1517 end: Option<i32>,
1518 match_dir: MatchDir,
1519 ) -> ParseResult<Box<Self>> {
1520 let range = match constrain_idxs(start, end, self.stack.len()) {
1521 Some(r) => r,
1522 None => return Err(self),
1523 };
1524 // return true if an empty sequence is requested
1525 if range.end <= range.start {
1526 return Ok(self);
1527 }
1528
1529 let mut position = self.position;
1530 let result = {
1531 let mut iter_b2t = self.stack[range].iter();
1532 let matcher = |span: &Span<'_>| position.match_string(span.as_str());
1533 match match_dir {
1534 MatchDir::BottomToTop => iter_b2t.all(matcher),
1535 MatchDir::TopToBottom => iter_b2t.rev().all(matcher),
1536 }
1537 };
1538 if result {
1539 self.position = position;
1540 Ok(self)
1541 } else {
1542 Err(self)
1543 }
1544 }
1545
1546 /// Matches the full state of the stack.
1547 ///
1548 /// # Examples
1549 ///
1550 /// ```
1551 /// # use pest;
1552 /// # #[allow(non_camel_case_types)]
1553 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1554 /// enum Rule {}
1555 ///
1556 /// let input = "abba";
1557 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1558 /// let mut result = state
1559 /// .stack_push(|state| state.match_string("a"))
1560 /// .and_then(|state| { state.stack_push(|state| state.match_string("b")) })
1561 /// .and_then(|state| state.stack_match_peek());
1562 /// assert!(result.is_ok());
1563 /// assert_eq!(result.unwrap().position().pos(), 4);
1564 /// ```
1565 #[inline]
1566 pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1567 self.stack_match_peek_slice(0, None, MatchDir::TopToBottom)
1568 }
1569
1570 /// Matches the full state of the stack. This method will clear the stack as it evaluates.
1571 ///
1572 /// # Examples
1573 ///
1574 /// ```
1575 /// /// # use pest;
1576 /// # #[allow(non_camel_case_types)]
1577 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1578 /// enum Rule {}
1579 ///
1580 /// let input = "aaaa";
1581 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1582 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(|state| {
1583 /// state.stack_push(|state| state.match_string("a"))
1584 /// }).and_then(|state| state.stack_match_peek());
1585 /// assert!(result.is_ok());
1586 /// assert_eq!(result.unwrap().position().pos(), 4);
1587 /// ```
1588 #[inline]
1589 pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1590 let mut position = self.position;
1591 let mut result = true;
1592 while let Some(span) = self.stack.pop() {
1593 result = position.match_string(span.as_str());
1594 if !result {
1595 break;
1596 }
1597 }
1598
1599 if result {
1600 self.position = position;
1601 Ok(self)
1602 } else {
1603 Err(self)
1604 }
1605 }
1606
1607 /// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or
1608 /// `Err(Box<ParserState>)` otherwise.
1609 ///
1610 /// # Examples
1611 ///
1612 /// ```
1613 /// # use pest;
1614 /// # #[allow(non_camel_case_types)]
1615 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1616 /// enum Rule {}
1617 ///
1618 /// let input = "aa";
1619 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1620 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1621 /// |state| state.stack_drop()
1622 /// );
1623 /// assert!(result.is_ok());
1624 /// assert_eq!(result.unwrap().position().pos(), 1);
1625 /// ```
1626 #[inline]
1627 pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1628 match self.stack.pop() {
1629 Some(_) => Ok(self),
1630 None => Err(self),
1631 }
1632 }
1633
1634 /// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently,
1635 /// this method only restores the stack.
1636 ///
1637 /// # Examples
1638 ///
1639 /// ```
1640 /// # use pest;
1641 /// # #[allow(non_camel_case_types)]
1642 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1643 /// enum Rule {}
1644 ///
1645 /// let input = "ab";
1646 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1647 /// let mut result = state.restore_on_err(|state| state.stack_push(|state|
1648 /// state.match_string("a")).and_then(|state| state.match_string("a"))
1649 /// );
1650 ///
1651 /// assert!(result.is_err());
1652 ///
1653 /// // Since the the rule doesn't match, the "a" pushed to the stack will be removed.
1654 /// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop());
1655 /// assert!(catch_panic.is_err());
1656 /// ```
1657 #[inline]
1658 pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1659 where
1660 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1661 {
1662 match f(self.checkpoint()) {
1663 Ok(state) => Ok(state.checkpoint_ok()),
1664 Err(state) => Err(state.restore()),
1665 }
1666 }
1667
1668 // Mark the current state as a checkpoint and return the `Box`.
1669 #[inline]
1670 pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> {
1671 self.stack.snapshot();
1672 self
1673 }
1674
1675 // The checkpoint was cleared successfully
1676 // so remove it without touching other stack state.
1677 #[inline]
1678 pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> {
1679 self.stack.clear_snapshot();
1680 self
1681 }
1682
1683 // Restore the current state to the most recent checkpoint.
1684 #[inline]
1685 pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> {
1686 self.stack.restore();
1687 self
1688 }
1689}
1690
1691/// Helper function used only in case stack operations (PUSH/POP) are used in grammar.
1692fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> {
1693 let start_norm = normalize_index(start, len)?;
1694 let end_norm = end.map_or(Some(len), |e| normalize_index(e, len))?;
1695 Some(start_norm..end_norm)
1696}
1697
1698/// `constrain_idxs` helper function.
1699/// Normalizes the index using its sequence’s length.
1700/// Returns `None` if the normalized index is OOB.
1701fn normalize_index(i: i32, len: usize) -> Option<usize> {
1702 if i > len as i32 {
1703 None
1704 } else if i >= 0 {
1705 Some(i as usize)
1706 } else {
1707 let real_i = len as i32 + i;
1708 if real_i >= 0 {
1709 Some(real_i as usize)
1710 } else {
1711 None
1712 }
1713 }
1714}
1715
1716#[cfg(test)]
1717mod test {
1718 use super::*;
1719
1720 #[test]
1721 fn normalize_index_pos() {
1722 assert_eq!(normalize_index(4, 6), Some(4));
1723 assert_eq!(normalize_index(5, 5), Some(5));
1724 assert_eq!(normalize_index(6, 3), None);
1725 }
1726
1727 #[test]
1728 fn normalize_index_neg() {
1729 assert_eq!(normalize_index(-4, 6), Some(2));
1730 assert_eq!(normalize_index(-5, 5), Some(0));
1731 assert_eq!(normalize_index(-6, 3), None);
1732 }
1733}