pest/
parser_state.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10//! The core functionality of parsing grammar.
11//! State of parser during the process of rules handling.
12
13use alloc::borrow::ToOwned;
14use alloc::boxed::Box;
15use alloc::collections::BTreeSet;
16use alloc::rc::Rc;
17use alloc::string::String;
18use alloc::vec;
19use alloc::vec::Vec;
20use core::fmt::{Debug, Display, Formatter};
21use core::num::NonZeroUsize;
22use core::ops::Range;
23use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
24
25use crate::error::{Error, ErrorVariant};
26use crate::iterators::pairs::new;
27use crate::iterators::{pairs, QueueableToken};
28use crate::position::Position;
29use crate::span::Span;
30use crate::stack::Stack;
31use crate::RuleType;
32
33/// The current lookahead status of a [`ParserState`].
34///
35/// [`ParserState`]: struct.ParserState.html
36#[derive(Clone, Copy, Debug, Eq, PartialEq)]
37pub enum Lookahead {
38    /// The positive predicate, written as an ampersand &,
39    /// attempts to match its inner expression.
40    /// If the inner expression succeeds, parsing continues,
41    /// but at the same position as the predicate —
42    /// &foo ~ bar is thus a kind of "AND" statement:
43    /// "the input string must match foo AND bar".
44    /// If the inner expression fails,
45    /// the whole expression fails too.
46    Positive,
47    /// The negative predicate, written as an exclamation mark !,
48    /// attempts to match its inner expression.
49    /// If the inner expression fails, the predicate succeeds
50    /// and parsing continues at the same position as the predicate.
51    /// If the inner expression succeeds, the predicate fails —
52    /// !foo ~ bar is thus a kind of "NOT" statement:
53    /// "the input string must match bar but NOT foo".
54    Negative,
55    /// No lookahead (i.e. it will consume input).
56    None,
57}
58
59/// The current atomicity of a [`ParserState`].
60///
61/// [`ParserState`]: struct.ParserState.html
62#[derive(Clone, Copy, Debug, Eq, PartialEq)]
63pub enum Atomicity {
64    /// prevents implicit whitespace: inside an atomic rule,
65    /// the tilde ~ means "immediately followed by",
66    /// and repetition operators (asterisk * and plus sign +)
67    /// have no implicit separation. In addition, all other rules
68    /// called from an atomic rule are also treated as atomic.
69    /// (interior matching rules are silent)
70    Atomic,
71    /// The same as atomic, but inner tokens are produced as normal.
72    CompoundAtomic,
73    /// implicit whitespace is enabled
74    NonAtomic,
75}
76
77/// Type alias to simplify specifying the return value of chained closures.
78pub type ParseResult<S> = Result<S, S>;
79
80/// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`.
81#[derive(Clone, Copy, Debug, Eq, PartialEq)]
82pub enum MatchDir {
83    /// from the bottom to the top of the stack
84    BottomToTop,
85    /// from the top to the bottom of the stack
86    TopToBottom,
87}
88
89static CALL_LIMIT: AtomicUsize = AtomicUsize::new(0);
90
91/// Sets the maximum call limit for the parser state
92/// to prevent stack overflows or excessive execution times
93/// in some grammars.
94/// If set, the calls are tracked as a running total
95/// over all non-terminal rules that can nest closures
96/// (which are passed to transform the parser state).
97///
98/// # Arguments
99///
100/// * `limit` - The maximum number of calls. If None,
101///             the number of calls is unlimited.
102pub fn set_call_limit(limit: Option<NonZeroUsize>) {
103    CALL_LIMIT.store(limit.map(|f| f.get()).unwrap_or(0), Ordering::Relaxed);
104}
105
106static ERROR_DETAIL: AtomicBool = AtomicBool::new(false);
107
108/// Sets whether information for more error details
109/// should be collected. This is useful for debugging
110/// parser errors (as it leads to more comprehensive
111/// error messages), but it has a higher performance cost.
112/// (hence, it's off by default)
113///
114/// # Arguments
115///
116/// * `enabled` - Whether to enable the collection for
117///               more error details.
118pub fn set_error_detail(enabled: bool) {
119    ERROR_DETAIL.store(enabled, Ordering::Relaxed);
120}
121
122#[derive(Debug)]
123struct CallLimitTracker {
124    current_call_limit: Option<(usize, usize)>,
125}
126
127impl Default for CallLimitTracker {
128    fn default() -> Self {
129        let limit = CALL_LIMIT.load(Ordering::Relaxed);
130        let current_call_limit = if limit > 0 { Some((0, limit)) } else { None };
131        Self { current_call_limit }
132    }
133}
134
135impl CallLimitTracker {
136    fn limit_reached(&self) -> bool {
137        self.current_call_limit
138            .map_or(false, |(current, limit)| current >= limit)
139    }
140
141    fn increment_depth(&mut self) {
142        if let Some((current, _)) = &mut self.current_call_limit {
143            *current += 1;
144        }
145    }
146}
147
148/// Number of call stacks that may result from a sequence of rules parsing.
149const CALL_STACK_INITIAL_CAPACITY: usize = 20;
150/// Max (un)expected number of tokens that we may see on the parsing error position.
151const EXPECTED_TOKENS_INITIAL_CAPACITY: usize = 30;
152/// Max rule children number for which we'll extend calls stacks.
153///
154/// In case rule we're working with has too many children rules that failed in parsing,
155/// we don't want to store long stacks for all of them. If rule has more than this number
156/// of failed children, they all will be collapsed in a parent rule.
157const CALL_STACK_CHILDREN_THRESHOLD: usize = 4;
158
159/// Structure tracking errored parsing call (associated with specific `ParserState` function).
160#[derive(Debug, Hash, PartialEq, Eq, Clone, PartialOrd, Ord)]
161pub enum ParseAttempt<R> {
162    /// Call of `rule` errored.
163    Rule(R),
164    /// Call of token element (e.g., `match_string` or `match_insensitive`) errored.
165    /// Works as indicator of that leaf node is not a rule. In order to get the token value we
166    /// can address `ParseAttempts` `(un)expected_tokens`.
167    Token,
168}
169
170impl<R> ParseAttempt<R> {
171    pub fn get_rule(&self) -> Option<&R> {
172        match self {
173            ParseAttempt::Rule(r) => Some(r),
174            ParseAttempt::Token => None,
175        }
176    }
177}
178
179/// Rules call stack.
180/// Contains sequence of rule calls that resulted in new parsing attempt.
181#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
182pub struct RulesCallStack<R> {
183    /// Deepest rule caused a parsing error (ParseAttempt::Token transformed into a rule).
184    pub deepest: ParseAttempt<R>,
185    /// Most top rule covering `deepest`.
186    pub parent: Option<R>,
187}
188
189impl<R> RulesCallStack<R> {
190    fn new(deepest: ParseAttempt<R>) -> RulesCallStack<R> {
191        RulesCallStack {
192            deepest,
193            parent: None,
194        }
195    }
196}
197
198#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
199pub enum ParsingToken {
200    Sensitive { token: String },
201    Insensitive { token: String },
202    Range { start: char, end: char },
203    BuiltInRule,
204}
205
206impl Display for ParsingToken {
207    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
208        match self {
209            ParsingToken::Sensitive { token } => write!(f, "{token}"),
210            ParsingToken::Insensitive { token } => write!(f, "{}", token.to_uppercase()),
211            ParsingToken::Range { start, end } => write!(f, "{start}..{end}"),
212            ParsingToken::BuiltInRule => write!(f, "BUILTIN_RULE"),
213        }
214    }
215}
216
217/// Structure that tracks all the parsing attempts made on the max position.
218/// We want to give an error hint about parsing rules that succeeded
219/// at the farthest input position.
220/// The intuition is such rules will be most likely the query user initially wanted to write.
221#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
222pub struct ParseAttempts<R> {
223    /// Indicates whether the parsing attempts are tracked.
224    enabled: bool,
225    /// Vec of rule calls sequences awaiting tokens at the same `max_position`.
226    /// If there are several stacks in vec, it means all those rule stacks are "equal"
227    /// because their attempts occurred on the same position.
228    pub call_stacks: Vec<RulesCallStack<R>>,
229    /// Tokens that could be putted at `max_position`
230    /// in order to get a valid grammar query.
231    expected_tokens: Vec<ParsingToken>,
232    /// Tokens that we've prohibited to be putted at `max_position`
233    /// in order to get a valid grammar query.
234    unexpected_tokens: Vec<ParsingToken>,
235    /// Max position at which we were expecting to see one of `expected_tokens`.
236    pub max_position: usize,
237}
238
239impl<R: RuleType> ParseAttempts<R> {
240    /// Create new `ParseAttempts` instance with `call_stacks` and `expected_tokens`
241    /// initialized with capacity.
242    pub fn new() -> Self {
243        Self {
244            call_stacks: Vec::with_capacity(CALL_STACK_INITIAL_CAPACITY),
245            expected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
246            unexpected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
247            max_position: 0,
248            enabled: ERROR_DETAIL.load(Ordering::Relaxed),
249        }
250    }
251
252    /// Get number of currently present call stacks.
253    fn call_stacks_number(&self) -> usize {
254        self.call_stacks.len()
255    }
256
257    pub fn expected_tokens(&self) -> Vec<ParsingToken> {
258        self.expected_tokens
259            .iter()
260            .cloned()
261            .collect::<BTreeSet<_>>()
262            .into_iter()
263            .collect()
264    }
265
266    pub fn unexpected_tokens(&self) -> Vec<ParsingToken> {
267        self.unexpected_tokens
268            .iter()
269            .cloned()
270            .collect::<BTreeSet<_>>()
271            .into_iter()
272            .collect()
273    }
274
275    /// Retrieve call stacks.
276    pub fn call_stacks(&self) -> Vec<RulesCallStack<R>> {
277        self.call_stacks
278            .iter()
279            .cloned()
280            .collect::<BTreeSet<_>>()
281            .into_iter()
282            .collect()
283    }
284
285    /// In case we've tried to parse a rule, which start position is bigger than previous
286    /// `max_position` it means that we've advanced in our parsing and found better candidate.
287    ///
288    /// `start_index` is:
289    /// * Number of call stacks present in state at the moment current `rule` was called. The idea
290    ///   is that we'd like to update only those stacks that originated from the current `rule` and
291    ///   not from those that were called previously.
292    /// * 0 in case we've successfully parsed some token since the moment `rule` was called.
293    fn try_add_new_stack_rule(&mut self, rule: R, start_index: usize) {
294        let mut non_token_call_stacks = Vec::new();
295        let mut token_call_stack_met = false;
296        for call_stack in self.call_stacks.iter().skip(start_index) {
297            if matches!(call_stack.deepest, ParseAttempt::Token) {
298                token_call_stack_met = true;
299            } else {
300                non_token_call_stacks.push(call_stack.clone())
301            }
302        }
303        if token_call_stack_met && non_token_call_stacks.is_empty() {
304            // If `non_token_call_stacks` is not empty we wouldn't like to add a new standalone
305            // `RulesCallStack::new(ParseAttempt::Token)` (that will later be transformed into a
306            // rule) as soon as it doesn't give us any useful additional info.
307            non_token_call_stacks.push(RulesCallStack::new(ParseAttempt::Token));
308        }
309        self.call_stacks
310            .splice(start_index.., non_token_call_stacks);
311
312        let children_number_over_threshold =
313            self.call_stacks_number() - start_index >= CALL_STACK_CHILDREN_THRESHOLD;
314        if children_number_over_threshold {
315            self.call_stacks.truncate(start_index);
316            self.call_stacks
317                .push(RulesCallStack::new(ParseAttempt::Rule(rule)));
318        } else {
319            for call_stack in self.call_stacks.iter_mut().skip(start_index) {
320                if matches!(call_stack.deepest, ParseAttempt::Token) {
321                    call_stack.deepest = ParseAttempt::Rule(rule);
322                } else {
323                    call_stack.parent = Some(rule);
324                }
325            }
326        }
327    }
328
329    /// If `expected` flag is set to false, it means we've successfully parsed token being in the
330    /// state of negative lookahead and want to track `token` in the `unexpected_tokens`. Otherwise,
331    /// we want to track it the `expected_tokens`. Let's call chosen vec a `target_vec`.
332    ///
333    /// In case `position` is:
334    /// * Equal to `max_position`, add `token` to `target_vec`,
335    /// * Bigger than `max_position`, set `token` as the only new element of `target_vec`.
336    #[allow(clippy::comparison_chain)]
337    fn try_add_new_token(
338        &mut self,
339        token: ParsingToken,
340        start_position: usize,
341        position: usize,
342        negative_lookahead: bool,
343    ) {
344        let target_vec_push_token = |attempts: &mut ParseAttempts<R>| {
345            let target_vec = if negative_lookahead {
346                &mut attempts.unexpected_tokens
347            } else {
348                &mut attempts.expected_tokens
349            };
350            target_vec.push(token);
351        };
352
353        if position > self.max_position {
354            if negative_lookahead && start_position > self.max_position {
355                // We encountered a sequence under negative lookahead.
356                // We would like to track only first failed token in this sequence (which
357                // `start_position` should be equal to `self.max_position`).
358                return;
359            }
360            target_vec_push_token(self);
361
362            if negative_lookahead {
363                // In case of successful parsing of token under negative lookahead the only
364                // thing we'd like to do is to track the token in the `unexpected_tokens`.
365                return;
366            }
367            self.max_position = position;
368            self.expected_tokens.clear();
369            self.unexpected_tokens.clear();
370            self.call_stacks.clear();
371            self.call_stacks
372                .push(RulesCallStack::new(ParseAttempt::Token));
373        } else if position == self.max_position {
374            target_vec_push_token(self);
375            self.call_stacks
376                .push(RulesCallStack::new(ParseAttempt::Token));
377        }
378    }
379
380    /// Reset state in case we've successfully parsed some token in
381    /// `match_string` or `match_insensetive`.
382    fn nullify_expected_tokens(&mut self, new_max_position: usize) {
383        self.call_stacks.clear();
384        self.expected_tokens.clear();
385        self.unexpected_tokens.clear();
386        self.max_position = new_max_position;
387    }
388}
389
390impl<R: RuleType> Default for ParseAttempts<R> {
391    fn default() -> Self {
392        Self::new()
393    }
394}
395
396/// The complete state of a [`Parser`].
397///
398/// [`Parser`]: trait.Parser.html
399#[derive(Debug)]
400pub struct ParserState<'i, R: RuleType> {
401    /// Current position from which we try to apply some parser function.
402    /// Initially is 0.
403    /// E.g., we are parsing `create user 'Bobby'` query, we parsed "create" via `match_insensitive`
404    /// and switched our `position` from 0 to the length of "create".
405    ///
406    /// E.g., see `match_string` -> `self.position.match_string(string)` which updates `self.pos`.
407    position: Position<'i>,
408    /// Queue representing rules partially (`QueueableToken::Start`) and
409    /// totally (`QueueableToken::End`) parsed. When entering rule we put it in the queue in a state
410    /// of `Start` and after all it's sublogic (subrules or strings) are parsed, we change it to
411    /// `End` state.
412    queue: Vec<QueueableToken<'i, R>>,
413    /// Status set in case specific lookahead logic is used in grammar.
414    /// See `Lookahead` for more information.
415    lookahead: Lookahead,
416    /// Rules that we HAVE expected, tried to parse, but failed.
417    pos_attempts: Vec<R>,
418    /// Rules that we have NOT expected, tried to parse, but failed.
419    neg_attempts: Vec<R>,
420    /// Max position in the query from which we've tried to parse some rule but failed.
421    attempt_pos: usize,
422    /// Current atomicity status. For more information see `Atomicity`.
423    atomicity: Atomicity,
424    /// Helper structure tracking `Stack` status (used in case grammar contains stack PUSH/POP
425    /// invocations).
426    stack: Stack<Span<'i>>,
427    /// Used for setting max parser calls limit.
428    call_tracker: CallLimitTracker,
429    /// Together with tracking of `pos_attempts` and `attempt_pos`
430    /// as a pair of (list of rules that we've tried to parse but failed, max parsed position)
431    /// we track those rules (which we've tried to parse at the same max pos) at this helper struct.
432    ///
433    /// Note, that we may try to parse several rules on different positions. We want to track only
434    /// those rules, which attempt position is bigger, because we consider that it's nearer to the
435    /// query that user really wanted to pass.
436    ///
437    /// E.g. we have a query `create user "Bobby"` and two root rules:
438    /// * CreateUser  = { "create" ~ "user"  ~ Name }
439    /// * CreateTable = { "create" ~ "table" ~ Name }
440    /// * Name = { SOME_DEFINITION }
441    /// While parsing the query we'll update tracker position to the start of "Bobby", because we'd
442    /// successfully parse "create" + "user" (and not "table").
443    parse_attempts: ParseAttempts<R>,
444}
445
446/// Creates a `ParserState` from a `&str`, supplying it to a closure `f`.
447///
448/// # Examples
449///
450/// ```
451/// # use pest;
452/// let input = "";
453/// pest::state::<(), _>(input, |s| Ok(s)).unwrap();
454/// ```
455#[allow(clippy::perf)]
456pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>>
457where
458    F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,
459{
460    let state = ParserState::new(input);
461
462    match f(state) {
463        Ok(state) => {
464            let len = state.queue.len();
465            Ok(new(Rc::new(state.queue), input, None, 0, len))
466        }
467        Err(mut state) => {
468            let variant = if state.reached_call_limit() {
469                ErrorVariant::CustomError {
470                    message: "call limit reached".to_owned(),
471                }
472            } else {
473                state.pos_attempts.sort();
474                state.pos_attempts.dedup();
475                state.neg_attempts.sort();
476                state.neg_attempts.dedup();
477                ErrorVariant::ParsingError {
478                    positives: state.pos_attempts.clone(),
479                    negatives: state.neg_attempts.clone(),
480                }
481            };
482
483            if state.parse_attempts.enabled {
484                Err(Error::new_from_pos_with_parsing_attempts(
485                    variant,
486                    Position::new_internal(input, state.attempt_pos),
487                    state.parse_attempts.clone(),
488                ))
489            } else {
490                Err(Error::new_from_pos(
491                    variant,
492                    Position::new_internal(input, state.attempt_pos),
493                ))
494            }
495        }
496    }
497}
498
499impl<'i, R: RuleType> ParserState<'i, R> {
500    /// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box`
501    /// will be passed from closure to closure based on the needs of the specified `Parser`.
502    ///
503    /// # Examples
504    ///
505    /// ```
506    /// # use pest;
507    /// let input = "";
508    /// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input);
509    /// ```
510    pub fn new(input: &'i str) -> Box<Self> {
511        Box::new(ParserState {
512            position: Position::from_start(input),
513            queue: vec![],
514            lookahead: Lookahead::None,
515            pos_attempts: vec![],
516            neg_attempts: vec![],
517            attempt_pos: 0,
518            atomicity: Atomicity::NonAtomic,
519            stack: Stack::new(),
520            call_tracker: Default::default(),
521            parse_attempts: ParseAttempts::new(),
522        })
523    }
524
525    /// Get all parse attempts after process of parsing is finished.
526    pub fn get_parse_attempts(&self) -> &ParseAttempts<R> {
527        &self.parse_attempts
528    }
529
530    /// Returns a reference to the current `Position` of the `ParserState`.
531    ///
532    /// # Examples
533    ///
534    /// ```
535    /// # use pest;
536    /// # #[allow(non_camel_case_types)]
537    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
538    /// enum Rule {
539    ///     ab
540    /// }
541    ///
542    /// let input = "ab";
543    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
544    /// let position = state.position();
545    /// assert_eq!(position.pos(), 0);
546    /// ```
547    pub fn position(&self) -> &Position<'i> {
548        &self.position
549    }
550
551    /// Returns the current atomicity of the `ParserState`.
552    ///
553    /// # Examples
554    ///
555    /// ```
556    /// # use pest;
557    /// # use pest::Atomicity;
558    /// # #[allow(non_camel_case_types)]
559    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
560    /// enum Rule {
561    ///     ab
562    /// }
563    ///
564    /// let input = "ab";
565    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
566    /// let atomicity = state.atomicity();
567    /// assert_eq!(atomicity, Atomicity::NonAtomic);
568    /// ```
569    pub fn atomicity(&self) -> Atomicity {
570        self.atomicity
571    }
572
573    #[inline]
574    fn inc_call_check_limit(mut self: Box<Self>) -> ParseResult<Box<Self>> {
575        if self.call_tracker.limit_reached() {
576            return Err(self);
577        }
578        self.call_tracker.increment_depth();
579        Ok(self)
580    }
581
582    #[inline]
583    fn reached_call_limit(&self) -> bool {
584        self.call_tracker.limit_reached()
585    }
586
587    /// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure
588    /// meant to match the rule.
589    ///
590    /// # Examples
591    ///
592    /// ```
593    /// # use pest;
594    /// # #[allow(non_camel_case_types)]
595    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
596    /// enum Rule {
597    ///     a
598    /// }
599    ///
600    /// let input = "a";
601    /// let pairs: Vec<_> = pest::state(input, |state| {
602    ///     state.rule(Rule::a, |s| Ok(s))
603    /// }).unwrap().collect();
604    ///
605    /// assert_eq!(pairs.len(), 1);
606    /// ```
607    #[inline]
608    pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>>
609    where
610        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
611    {
612        self = self.inc_call_check_limit()?;
613        // Position from which this `rule` starts parsing.
614        let actual_pos = self.position.pos();
615        // Remember index of the `self.queue` element that will be associated with this `rule`.
616        let index = self.queue.len();
617
618        let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos {
619            (self.pos_attempts.len(), self.neg_attempts.len())
620        } else {
621            // Attempts have not been cleared yet since the attempt_pos is older.
622            (0, 0)
623        };
624
625        if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic {
626            // Pair's position will only be known after running the closure.
627            self.queue.push(QueueableToken::Start {
628                end_token_index: 0,
629                input_pos: actual_pos,
630            });
631        }
632
633        // Remember attempts number before `f` call.
634        // In `track` using this variable we can say, how many attempts were added
635        // during children rules traversal.
636        let attempts = self.attempts_at(actual_pos);
637        // Number of call stacks present in `self.parse_attempts` before `f` call.
638        // We need to remember this number only in case there wasn't found any farther attempt.
639        // E.g. we are handling rule, on start position of which may be tested two
640        // children rules. At the moment we'll return from `f` call below,
641        // there will be two more children rules in `self.parse_attempts` that we'll
642        // consider to be the children of current `rule`.
643        let mut remember_call_stacks_number = self.parse_attempts.call_stacks_number();
644        // Max parsing attempt position at the moment of `rule` handling.
645        // It case it's raised during children rules handling, it means
646        // we've made a parsing progress.
647        let remember_max_position = self.parse_attempts.max_position;
648
649        let result = f(self);
650
651        let mut try_add_rule_to_stack = |new_state: &mut Box<ParserState<'_, R>>| {
652            if new_state.parse_attempts.max_position > remember_max_position {
653                // It means that one of `match_string` or e.g. `match_insensetive` function calls
654                // have already erased `self.parse_attempts.call_stacks` and that previously
655                // remembered values are not valid anymore.
656                remember_call_stacks_number = 0;
657            }
658            if !matches!(new_state.atomicity, Atomicity::Atomic) {
659                new_state
660                    .parse_attempts
661                    .try_add_new_stack_rule(rule, remember_call_stacks_number);
662            }
663        };
664
665        match result {
666            Ok(mut new_state) => {
667                if new_state.lookahead == Lookahead::Negative {
668                    new_state.track(
669                        rule,
670                        actual_pos,
671                        pos_attempts_index,
672                        neg_attempts_index,
673                        attempts,
674                    );
675                }
676
677                if new_state.lookahead == Lookahead::None
678                    && new_state.atomicity != Atomicity::Atomic
679                {
680                    // Index of `QueueableToken::End` token added below
681                    // that corresponds to previously added `QueueableToken::Start` token.
682                    let new_index = new_state.queue.len();
683                    match new_state.queue[index] {
684                        QueueableToken::Start {
685                            ref mut end_token_index,
686                            ..
687                        } => *end_token_index = new_index,
688                        _ => unreachable!(),
689                    };
690
691                    let new_pos = new_state.position.pos();
692
693                    new_state.queue.push(QueueableToken::End {
694                        start_token_index: index,
695                        rule,
696                        tag: None,
697                        input_pos: new_pos,
698                    });
699                }
700
701                // Note, that we need to count positive parsing results too, because we can fail in
702                // optional rule call inside which may lie the farthest
703                // parsed token.
704                if new_state.parse_attempts.enabled {
705                    try_add_rule_to_stack(&mut new_state);
706                }
707                Ok(new_state)
708            }
709            Err(mut new_state) => {
710                if new_state.lookahead != Lookahead::Negative {
711                    new_state.track(
712                        rule,
713                        actual_pos,
714                        pos_attempts_index,
715                        neg_attempts_index,
716                        attempts,
717                    );
718                    if new_state.parse_attempts.enabled {
719                        try_add_rule_to_stack(&mut new_state);
720                    }
721                }
722
723                if new_state.lookahead == Lookahead::None
724                    && new_state.atomicity != Atomicity::Atomic
725                {
726                    new_state.queue.truncate(index);
727                }
728
729                Err(new_state)
730            }
731        }
732    }
733
734    /// Tag current node
735    ///
736    /// # Examples
737    ///
738    /// Try to recognize the one specified in a set of characters
739    ///
740    /// ```
741    /// use pest::{state, ParseResult, ParserState, iterators::Pair};
742    /// #[allow(non_camel_case_types)]
743    /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
744    /// enum Rule {
745    ///     character,
746    /// }
747    /// fn mark_c(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
748    ///     state.sequence(|state| {
749    ///         character(state)
750    ///             .and_then(|state| character(state))
751    ///             .and_then(|state| character(state))
752    ///             .and_then(|state| state.tag_node("c"))
753    ///             .and_then(|state| character(state))
754    ///     })
755    /// }
756    /// fn character(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
757    ///     state.rule(Rule::character, |state| state.match_range('a'..'z'))
758    /// }
759    ///
760    /// let input = "abcd";
761    /// let pairs = state(input, mark_c).unwrap();
762    /// // find all node tag as `c`
763    /// let find: Vec<Pair<Rule>> = pairs.filter(|s| s.as_node_tag() == Some("c")).collect();
764    /// assert_eq!(find[0].as_str(), "c")
765    /// ```
766    #[inline]
767    pub fn tag_node(mut self: Box<Self>, tag: &'i str) -> ParseResult<Box<Self>> {
768        if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() {
769            *old = Some(tag)
770        }
771        Ok(self)
772    }
773
774    /// Get number of allowed rules attempts + prohibited rules attempts.
775    fn attempts_at(&self, pos: usize) -> usize {
776        if self.attempt_pos == pos {
777            self.pos_attempts.len() + self.neg_attempts.len()
778        } else {
779            0
780        }
781    }
782
783    fn track(
784        &mut self,
785        rule: R,
786        pos: usize,
787        pos_attempts_index: usize,
788        neg_attempts_index: usize,
789        prev_attempts: usize,
790    ) {
791        if self.atomicity == Atomicity::Atomic {
792            return;
793        }
794
795        // If nested rules made no progress, there is no use to report them; it's only useful to
796        // track the current rule, the exception being when only one attempt has been made during
797        // the children rules.
798        let curr_attempts = self.attempts_at(pos);
799        if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 {
800            return;
801        }
802
803        if pos == self.attempt_pos {
804            self.pos_attempts.truncate(pos_attempts_index);
805            self.neg_attempts.truncate(neg_attempts_index);
806        }
807
808        if pos > self.attempt_pos {
809            self.pos_attempts.clear();
810            self.neg_attempts.clear();
811            self.attempt_pos = pos;
812        }
813
814        let attempts = if self.lookahead != Lookahead::Negative {
815            &mut self.pos_attempts
816        } else {
817            &mut self.neg_attempts
818        };
819
820        if pos == self.attempt_pos {
821            attempts.push(rule);
822        }
823    }
824
825    /// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns
826    /// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current
827    /// `Box<ParserState>` otherwise.
828    ///
829    /// This method is useful to parse sequences that only match together which usually come in the
830    /// form of chained `Result`s with
831    /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then).
832    ///
833    ///
834    /// # Examples
835    ///
836    /// ```
837    /// # use pest;
838    /// # #[allow(non_camel_case_types)]
839    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
840    /// enum Rule {
841    ///     a
842    /// }
843    ///
844    /// let input = "a";
845    /// let pairs: Vec<_> = pest::state(input, |state| {
846    ///     state.sequence(|s| {
847    ///         s.rule(Rule::a, |s| Ok(s)).and_then(|s| {
848    ///             s.match_string("b")
849    ///         })
850    ///     }).or_else(|s| {
851    ///         Ok(s)
852    ///     })
853    /// }).unwrap().collect();
854    ///
855    /// assert_eq!(pairs.len(), 0);
856    /// ```
857    #[inline]
858    pub fn sequence<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
859    where
860        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
861    {
862        self = self.inc_call_check_limit()?;
863        let token_index = self.queue.len();
864        let initial_pos = self.position;
865
866        let result = f(self);
867
868        match result {
869            Ok(new_state) => Ok(new_state),
870            Err(mut new_state) => {
871                // Restore the initial position and truncate the token queue.
872                new_state.position = initial_pos;
873                new_state.queue.truncate(token_index);
874                Err(new_state)
875            }
876        }
877    }
878
879    /// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns
880    /// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`.
881    ///
882    /// # Examples
883    ///
884    /// ```
885    /// # use pest;
886    /// # #[allow(non_camel_case_types)]
887    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
888    /// enum Rule {
889    ///     ab
890    /// }
891    ///
892    /// let input = "aab";
893    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
894    /// let mut result = state.repeat(|s| {
895    ///     s.match_string("a")
896    /// });
897    /// assert!(result.is_ok());
898    /// assert_eq!(result.unwrap().position().pos(), 2);
899    ///
900    /// state = pest::ParserState::new(input);
901    /// result = state.repeat(|s| {
902    ///     s.match_string("b")
903    /// });
904    /// assert!(result.is_ok());
905    /// assert_eq!(result.unwrap().position().pos(), 0);
906    /// ```
907    #[inline]
908    pub fn repeat<F>(mut self: Box<Self>, mut f: F) -> ParseResult<Box<Self>>
909    where
910        F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,
911    {
912        self = self.inc_call_check_limit()?;
913        let mut result = f(self);
914
915        loop {
916            match result {
917                Ok(state) => result = f(state),
918                Err(state) => return Ok(state),
919            };
920        }
921    }
922
923    /// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns
924    /// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`.
925    ///
926    /// # Examples
927    ///
928    /// ```
929    /// # use pest;
930    /// # #[allow(non_camel_case_types)]
931    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
932    /// enum Rule {
933    ///     ab
934    /// }
935    ///
936    /// let input = "ab";
937    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
938    /// let result = state.optional(|s| {
939    ///     s.match_string("ab")
940    /// });
941    /// assert!(result.is_ok());
942    ///
943    /// state = pest::ParserState::new(input);
944    /// let result = state.optional(|s| {
945    ///     s.match_string("ac")
946    /// });
947    /// assert!(result.is_ok());
948    /// ```
949    #[inline]
950    pub fn optional<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
951    where
952        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
953    {
954        self = self.inc_call_check_limit()?;
955        match f(self) {
956            Ok(state) | Err(state) => Ok(state),
957        }
958    }
959
960    /// Generic function to handle result of char/string/range parsing
961    /// in order to track (un)expected tokens.
962    fn handle_token_parse_result(
963        &mut self,
964        start_position: usize,
965        token: ParsingToken,
966        parse_succeeded: bool,
967    ) {
968        // New position after tracked parsed element for case of `parse_succeded` is true.
969        // Position of parsing failure otherwise.
970        let current_pos = self.position.pos();
971
972        if parse_succeeded {
973            if self.lookahead == Lookahead::Negative {
974                self.parse_attempts
975                    .try_add_new_token(token, start_position, current_pos, true);
976            } else if current_pos > self.parse_attempts.max_position {
977                self.parse_attempts.nullify_expected_tokens(current_pos);
978            }
979        } else if self.lookahead != Lookahead::Negative {
980            self.parse_attempts
981                .try_add_new_token(token, start_position, current_pos, false);
982        }
983    }
984
985    /// Attempts to match a single character based on a filter function. Returns `Ok` with the
986    /// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>`
987    /// otherwise.
988    ///
989    /// # Examples
990    ///
991    /// ```
992    /// # use pest;
993    /// # #[allow(non_camel_case_types)]
994    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
995    /// enum Rule {}
996    ///
997    /// let input = "ab";
998    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
999    /// let result = state.match_char_by(|c| c.is_ascii());
1000    /// assert!(result.is_ok());
1001    /// assert_eq!(result.unwrap().position().pos(), 1);
1002    ///
1003    /// let input = "❤";
1004    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1005    /// let result = state.match_char_by(|c| c.is_ascii());
1006    /// assert!(result.is_err());
1007    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1008    /// ```
1009    #[inline]
1010    pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1011    where
1012        F: FnOnce(char) -> bool,
1013    {
1014        let start_position = self.position.pos();
1015        let succeeded = self.position.match_char_by(f);
1016        if self.parse_attempts.enabled {
1017            let token = ParsingToken::BuiltInRule;
1018            self.handle_token_parse_result(start_position, token, succeeded);
1019        }
1020        if succeeded {
1021            Ok(self)
1022        } else {
1023            Err(self)
1024        }
1025    }
1026
1027    /// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if
1028    /// successful, or `Err` with the updated `Box<ParserState>` otherwise.
1029    ///
1030    /// # Examples
1031    ///
1032    /// ```
1033    /// # use pest;
1034    /// # #[allow(non_camel_case_types)]
1035    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1036    /// enum Rule {}
1037    ///
1038    /// let input = "ab";
1039    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1040    /// let mut result = state.match_string("ab");
1041    /// assert!(result.is_ok());
1042    /// assert_eq!(result.unwrap().position().pos(), 2);
1043    ///
1044    /// state = pest::ParserState::new(input);
1045    /// result = state.match_string("ac");
1046    /// assert!(result.is_err());
1047    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1048    /// ```
1049    #[inline]
1050    pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
1051        let start_position = self.position.pos();
1052        let succeeded = self.position.match_string(string);
1053        if self.parse_attempts.enabled {
1054            let token = ParsingToken::Sensitive {
1055                token: String::from(string),
1056            };
1057            self.handle_token_parse_result(start_position, token, succeeded);
1058        }
1059        if succeeded {
1060            Ok(self)
1061        } else {
1062            Err(self)
1063        }
1064    }
1065
1066    /// Attempts to case-insensitively match the given string. Returns `Ok` with the updated
1067    /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1068    ///
1069    /// # Examples
1070    ///
1071    /// ```
1072    /// # use pest;
1073    /// # #[allow(non_camel_case_types)]
1074    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1075    /// enum Rule {}
1076    ///
1077    /// let input = "ab";
1078    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1079    /// let mut result = state.match_insensitive("AB");
1080    /// assert!(result.is_ok());
1081    /// assert_eq!(result.unwrap().position().pos(), 2);
1082    ///
1083    /// state = pest::ParserState::new(input);
1084    /// result = state.match_insensitive("AC");
1085    /// assert!(result.is_err());
1086    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1087    /// ```
1088    #[inline]
1089    pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
1090        let start_position: usize = self.position().pos();
1091        let succeeded = self.position.match_insensitive(string);
1092        if self.parse_attempts.enabled {
1093            let token = ParsingToken::Insensitive {
1094                token: String::from(string),
1095            };
1096            self.handle_token_parse_result(start_position, token, succeeded);
1097        }
1098        if succeeded {
1099            Ok(self)
1100        } else {
1101            Err(self)
1102        }
1103    }
1104
1105    /// Attempts to match a single character from the given range. Returns `Ok` with the updated
1106    /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1107    ///
1108    /// # Caution
1109    /// The provided `range` is interpreted as inclusive.
1110    ///
1111    /// # Examples
1112    ///
1113    /// ```
1114    /// # use pest;
1115    /// # #[allow(non_camel_case_types)]
1116    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1117    /// enum Rule {}
1118    ///
1119    /// let input = "ab";
1120    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1121    /// let mut result = state.match_range('a'..'z');
1122    /// assert!(result.is_ok());
1123    /// assert_eq!(result.unwrap().position().pos(), 1);
1124    ///
1125    /// state = pest::ParserState::new(input);
1126    /// result = state.match_range('A'..'Z');
1127    /// assert!(result.is_err());
1128    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1129    /// ```
1130    #[inline]
1131    pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> {
1132        let start_position = self.position().pos();
1133        let token = ParsingToken::Range {
1134            start: range.start,
1135            end: range.end,
1136        };
1137        let succeeded = self.position.match_range(range);
1138        if self.parse_attempts.enabled {
1139            self.handle_token_parse_result(start_position, token, succeeded);
1140        }
1141        if succeeded {
1142            Ok(self)
1143        } else {
1144            Err(self)
1145        }
1146    }
1147
1148    /// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>`
1149    /// if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1150    ///
1151    /// # Examples
1152    ///
1153    /// ```
1154    /// # use pest;
1155    /// # #[allow(non_camel_case_types)]
1156    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1157    /// enum Rule {}
1158    ///
1159    /// let input = "ab";
1160    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1161    /// let mut result = state.skip(1);
1162    /// assert!(result.is_ok());
1163    /// assert_eq!(result.unwrap().position().pos(), 1);
1164    ///
1165    /// state = pest::ParserState::new(input);
1166    /// result = state.skip(3);
1167    /// assert!(result.is_err());
1168    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1169    /// ```
1170    #[inline]
1171    pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> {
1172        if self.position.skip(n) {
1173            Ok(self)
1174        } else {
1175            Err(self)
1176        }
1177    }
1178
1179    /// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the
1180    /// updated `Box<ParserState>` whether or not one of the strings is found.
1181    ///
1182    /// # Examples
1183    ///
1184    /// ```
1185    /// # use pest;
1186    /// # #[allow(non_camel_case_types)]
1187    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1188    /// enum Rule {}
1189    ///
1190    /// let input = "abcd";
1191    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1192    /// let mut result = state.skip_until(&["c", "d"]);
1193    /// assert!(result.is_ok());
1194    /// assert_eq!(result.unwrap().position().pos(), 2);
1195    /// ```
1196    #[inline]
1197    pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> {
1198        self.position.skip_until(strings);
1199        Ok(self)
1200    }
1201
1202    /// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>`
1203    /// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise.
1204    ///
1205    /// # Examples
1206    ///
1207    /// ```
1208    /// # use pest;
1209    /// # #[allow(non_camel_case_types)]
1210    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1211    /// enum Rule {}
1212    ///
1213    /// let input = "ab";
1214    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1215    /// let mut result = state.start_of_input();
1216    /// assert!(result.is_ok());
1217    ///
1218    /// state = pest::ParserState::new(input);
1219    /// state = state.match_string("ab").unwrap();
1220    /// result = state.start_of_input();
1221    /// assert!(result.is_err());
1222    /// ```
1223    #[inline]
1224    pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
1225        if self.position.at_start() {
1226            Ok(self)
1227        } else {
1228            Err(self)
1229        }
1230    }
1231
1232    /// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if
1233    /// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise.
1234    ///
1235    /// # Examples
1236    ///
1237    /// ```
1238    /// # use pest;
1239    /// # #[allow(non_camel_case_types)]
1240    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1241    /// enum Rule {}
1242    ///
1243    /// let input = "ab";
1244    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1245    /// let mut result = state.end_of_input();
1246    /// assert!(result.is_err());
1247    ///
1248    /// state = pest::ParserState::new(input);
1249    /// state = state.match_string("ab").unwrap();
1250    /// result = state.end_of_input();
1251    /// assert!(result.is_ok());
1252    /// ```
1253    #[inline]
1254    pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
1255        if self.position.at_end() {
1256            Ok(self)
1257        } else {
1258            Err(self)
1259        }
1260    }
1261
1262    /// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns
1263    /// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current
1264    /// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err`
1265    /// together, negating the `Result`.
1266    ///
1267    /// # Examples
1268    ///
1269    /// ```
1270    /// # use pest;
1271    /// # #[allow(non_camel_case_types)]
1272    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1273    /// enum Rule {
1274    ///     a
1275    /// }
1276    ///
1277    /// let input = "a";
1278    /// let pairs: Vec<_> = pest::state(input, |state| {
1279    ///     state.lookahead(true, |state| {
1280    ///         state.rule(Rule::a, |s| Ok(s))
1281    ///     })
1282    /// }).unwrap().collect();
1283    ///
1284    /// assert_eq!(pairs.len(), 0);
1285    /// ```
1286    #[inline]
1287    pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>>
1288    where
1289        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1290    {
1291        self = self.inc_call_check_limit()?;
1292        let initial_lookahead = self.lookahead;
1293
1294        self.lookahead = if is_positive {
1295            match initial_lookahead {
1296                Lookahead::None | Lookahead::Positive => Lookahead::Positive,
1297                Lookahead::Negative => Lookahead::Negative,
1298            }
1299        } else {
1300            match initial_lookahead {
1301                Lookahead::None | Lookahead::Positive => Lookahead::Negative,
1302                Lookahead::Negative => Lookahead::Positive,
1303            }
1304        };
1305
1306        let initial_pos = self.position;
1307
1308        let result = f(self.checkpoint());
1309
1310        let result_state = match result {
1311            Ok(mut new_state) => {
1312                new_state.position = initial_pos;
1313                new_state.lookahead = initial_lookahead;
1314                Ok(new_state.restore())
1315            }
1316            Err(mut new_state) => {
1317                new_state.position = initial_pos;
1318                new_state.lookahead = initial_lookahead;
1319                Err(new_state.restore())
1320            }
1321        };
1322
1323        if is_positive {
1324            result_state
1325        } else {
1326            match result_state {
1327                Ok(state) => Err(state),
1328                Err(state) => Ok(state),
1329            }
1330        }
1331    }
1332
1333    /// Transformation which stops `Token`s from being generated according to `is_atomic`.
1334    /// Used as wrapper over `rule` (or even another `atomic`) call.
1335    ///
1336    /// # Examples
1337    ///
1338    /// ```
1339    /// # use pest::{self, Atomicity};
1340    /// # #[allow(non_camel_case_types)]
1341    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1342    /// enum Rule {
1343    ///     a
1344    /// }
1345    ///
1346    /// let input = "a";
1347    /// let pairs: Vec<_> = pest::state(input, |state| {
1348    ///     state.atomic(Atomicity::Atomic, |s| {
1349    ///         s.rule(Rule::a, |s| Ok(s))
1350    ///     })
1351    /// }).unwrap().collect();
1352    ///
1353    /// assert_eq!(pairs.len(), 0);
1354    /// ```
1355    #[inline]
1356    pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>>
1357    where
1358        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1359    {
1360        self = self.inc_call_check_limit()?;
1361        // In case child parsing call is another `atomic` it will have it's own atomicity status.
1362        let initial_atomicity = self.atomicity;
1363        // In case child atomicity is the same as we've demanded, we shouldn't do nothing.
1364        // E.g. we have the following rules:
1365        // * RootRule = @{ InnerRule }
1366        // * InnerRule = @{ ... }
1367        let should_toggle = self.atomicity != atomicity;
1368
1369        // Note that we take atomicity of the top rule and not of the leaf (inner).
1370        if should_toggle {
1371            self.atomicity = atomicity;
1372        }
1373
1374        let result = f(self);
1375
1376        match result {
1377            Ok(mut new_state) => {
1378                if should_toggle {
1379                    new_state.atomicity = initial_atomicity;
1380                }
1381                Ok(new_state)
1382            }
1383            Err(mut new_state) => {
1384                if should_toggle {
1385                    new_state.atomicity = initial_atomicity;
1386                }
1387                Err(new_state)
1388            }
1389        }
1390    }
1391
1392    /// Evaluates the result of closure `f` and pushes the span of the input consumed from before
1393    /// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is
1394    /// called successfully, or `Err(Box<ParserState>)` otherwise.
1395    ///
1396    /// # Examples
1397    ///
1398    /// ```
1399    /// # use pest;
1400    /// # #[allow(non_camel_case_types)]
1401    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1402    /// enum Rule {}
1403    ///
1404    /// let input = "ab";
1405    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1406    /// let mut result = state.stack_push(|state| state.match_string("a"));
1407    /// assert!(result.is_ok());
1408    /// assert_eq!(result.unwrap().position().pos(), 1);
1409    /// ```
1410    #[inline]
1411    pub fn stack_push<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1412    where
1413        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1414    {
1415        self = self.inc_call_check_limit()?;
1416        let start = self.position;
1417
1418        let result = f(self);
1419
1420        match result {
1421            Ok(mut state) => {
1422                let end = state.position;
1423                state.stack.push(start.span(&end));
1424                Ok(state)
1425            }
1426            Err(state) => Err(state),
1427        }
1428    }
1429
1430    /// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1431    /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1432    ///
1433    /// # Examples
1434    ///
1435    /// ```
1436    /// # use pest;
1437    /// # #[allow(non_camel_case_types)]
1438    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1439    /// enum Rule {}
1440    ///
1441    /// let input = "aa";
1442    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1443    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1444    ///     |state| state.stack_peek()
1445    /// );
1446    /// assert!(result.is_ok());
1447    /// assert_eq!(result.unwrap().position().pos(), 2);
1448    /// ```
1449    #[inline]
1450    pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1451        let string = self
1452            .stack
1453            .peek()
1454            .expect("peek was called on empty stack")
1455            .as_str();
1456        self.match_string(string)
1457    }
1458
1459    /// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1460    /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1461    ///
1462    /// # Examples
1463    ///
1464    /// ```
1465    /// # use pest;
1466    /// # #[allow(non_camel_case_types)]
1467    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1468    /// enum Rule {}
1469    ///
1470    /// let input = "aa";
1471    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1472    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1473    ///     |state| state.stack_pop()
1474    /// );
1475    /// assert!(result.is_ok());
1476    /// assert_eq!(result.unwrap().position().pos(), 2);
1477    /// ```
1478    #[inline]
1479    pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1480        let string = self
1481            .stack
1482            .pop()
1483            .expect("pop was called on empty stack")
1484            .as_str();
1485        self.match_string(string)
1486    }
1487
1488    /// Matches part of the state of the stack.
1489    ///
1490    /// # Examples
1491    ///
1492    /// ```
1493    /// # use pest::{self, MatchDir};
1494    /// # #[allow(non_camel_case_types)]
1495    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1496    /// enum Rule {}
1497    ///
1498    /// let input = "abcd cd cb";
1499    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1500    /// let mut result = state
1501    ///     .stack_push(|state| state.match_string("a"))
1502    ///     .and_then(|state| state.stack_push(|state| state.match_string("b")))
1503    ///     .and_then(|state| state.stack_push(|state| state.match_string("c")))
1504    ///     .and_then(|state| state.stack_push(|state| state.match_string("d")))
1505    ///     .and_then(|state| state.match_string(" "))
1506    ///     .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop))
1507    ///     .and_then(|state| state.match_string(" "))
1508    ///     .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom));
1509    /// assert!(result.is_ok());
1510    /// assert_eq!(result.unwrap().position().pos(), 10);
1511    /// ```
1512    #[inline]
1513    pub fn stack_match_peek_slice(
1514        mut self: Box<Self>,
1515        start: i32,
1516        end: Option<i32>,
1517        match_dir: MatchDir,
1518    ) -> ParseResult<Box<Self>> {
1519        let range = match constrain_idxs(start, end, self.stack.len()) {
1520            Some(r) => r,
1521            None => return Err(self),
1522        };
1523        // return true if an empty sequence is requested
1524        if range.end <= range.start {
1525            return Ok(self);
1526        }
1527
1528        let mut position = self.position;
1529        let result = {
1530            let mut iter_b2t = self.stack[range].iter();
1531            let matcher = |span: &Span<'_>| position.match_string(span.as_str());
1532            match match_dir {
1533                MatchDir::BottomToTop => iter_b2t.all(matcher),
1534                MatchDir::TopToBottom => iter_b2t.rev().all(matcher),
1535            }
1536        };
1537        if result {
1538            self.position = position;
1539            Ok(self)
1540        } else {
1541            Err(self)
1542        }
1543    }
1544
1545    /// Matches the full state of the stack.
1546    ///
1547    /// # Examples
1548    ///
1549    /// ```
1550    /// # use pest;
1551    /// # #[allow(non_camel_case_types)]
1552    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1553    /// enum Rule {}
1554    ///
1555    /// let input = "abba";
1556    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1557    /// let mut result = state
1558    ///     .stack_push(|state| state.match_string("a"))
1559    ///     .and_then(|state| { state.stack_push(|state| state.match_string("b")) })
1560    ///     .and_then(|state| state.stack_match_peek());
1561    /// assert!(result.is_ok());
1562    /// assert_eq!(result.unwrap().position().pos(), 4);
1563    /// ```
1564    #[inline]
1565    pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1566        self.stack_match_peek_slice(0, None, MatchDir::TopToBottom)
1567    }
1568
1569    /// Matches the full state of the stack. This method will clear the stack as it evaluates.
1570    ///
1571    /// # Examples
1572    ///
1573    /// ```
1574    /// /// # use pest;
1575    /// # #[allow(non_camel_case_types)]
1576    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1577    /// enum Rule {}
1578    ///
1579    /// let input = "aaaa";
1580    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1581    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(|state| {
1582    ///     state.stack_push(|state| state.match_string("a"))
1583    /// }).and_then(|state| state.stack_match_peek());
1584    /// assert!(result.is_ok());
1585    /// assert_eq!(result.unwrap().position().pos(), 4);
1586    /// ```
1587    #[inline]
1588    pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1589        let mut position = self.position;
1590        let mut result = true;
1591        while let Some(span) = self.stack.pop() {
1592            result = position.match_string(span.as_str());
1593            if !result {
1594                break;
1595            }
1596        }
1597
1598        if result {
1599            self.position = position;
1600            Ok(self)
1601        } else {
1602            Err(self)
1603        }
1604    }
1605
1606    /// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or
1607    /// `Err(Box<ParserState>)` otherwise.
1608    ///
1609    /// # Examples
1610    ///
1611    /// ```
1612    /// # use pest;
1613    /// # #[allow(non_camel_case_types)]
1614    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1615    /// enum Rule {}
1616    ///
1617    /// let input = "aa";
1618    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1619    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1620    ///     |state| state.stack_drop()
1621    /// );
1622    /// assert!(result.is_ok());
1623    /// assert_eq!(result.unwrap().position().pos(), 1);
1624    /// ```
1625    #[inline]
1626    pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1627        match self.stack.pop() {
1628            Some(_) => Ok(self),
1629            None => Err(self),
1630        }
1631    }
1632
1633    /// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently,
1634    /// this method only restores the stack.
1635    ///
1636    /// # Examples
1637    ///
1638    /// ```
1639    /// # use pest;
1640    /// # #[allow(non_camel_case_types)]
1641    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1642    /// enum Rule {}
1643    ///
1644    /// let input = "ab";
1645    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1646    /// let mut result = state.restore_on_err(|state| state.stack_push(|state|
1647    ///     state.match_string("a")).and_then(|state| state.match_string("a"))
1648    /// );
1649    ///
1650    /// assert!(result.is_err());
1651    ///
1652    /// // Since the the rule doesn't match, the "a" pushed to the stack will be removed.
1653    /// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop());
1654    /// assert!(catch_panic.is_err());
1655    /// ```
1656    #[inline]
1657    pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1658    where
1659        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1660    {
1661        match f(self.checkpoint()) {
1662            Ok(state) => Ok(state.checkpoint_ok()),
1663            Err(state) => Err(state.restore()),
1664        }
1665    }
1666
1667    // Mark the current state as a checkpoint and return the `Box`.
1668    #[inline]
1669    pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> {
1670        self.stack.snapshot();
1671        self
1672    }
1673
1674    // The checkpoint was cleared successfully
1675    // so remove it without touching other stack state.
1676    #[inline]
1677    pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> {
1678        self.stack.clear_snapshot();
1679        self
1680    }
1681
1682    // Restore the current state to the most recent checkpoint.
1683    #[inline]
1684    pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> {
1685        self.stack.restore();
1686        self
1687    }
1688}
1689
1690/// Helper function used only in case stack operations (PUSH/POP) are used in grammar.
1691fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> {
1692    let start_norm = normalize_index(start, len)?;
1693    let end_norm = end.map_or(Some(len), |e| normalize_index(e, len))?;
1694    Some(start_norm..end_norm)
1695}
1696
1697/// `constrain_idxs` helper function.
1698/// Normalizes the index using its sequence’s length.
1699/// Returns `None` if the normalized index is OOB.
1700fn normalize_index(i: i32, len: usize) -> Option<usize> {
1701    if i > len as i32 {
1702        None
1703    } else if i >= 0 {
1704        Some(i as usize)
1705    } else {
1706        let real_i = len as i32 + i;
1707        if real_i >= 0 {
1708            Some(real_i as usize)
1709        } else {
1710            None
1711        }
1712    }
1713}
1714
1715#[cfg(test)]
1716mod test {
1717    use super::*;
1718
1719    #[test]
1720    fn normalize_index_pos() {
1721        assert_eq!(normalize_index(4, 6), Some(4));
1722        assert_eq!(normalize_index(5, 5), Some(5));
1723        assert_eq!(normalize_index(6, 3), None);
1724    }
1725
1726    #[test]
1727    fn normalize_index_neg() {
1728        assert_eq!(normalize_index(-4, 6), Some(2));
1729        assert_eq!(normalize_index(-5, 5), Some(0));
1730        assert_eq!(normalize_index(-6, 3), None);
1731    }
1732}