fancy_regex/
vm.rs

Help
1// Copyright 2016 The Fancy Regex Authors.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Backtracking VM for implementing fancy regexes.
22//!
23//! Read <https://swtch.com/~rsc/regexp/regexp2.html> for a good introduction for how this works.
24//!
25//! The VM executes a sequence of instructions (a program) against an input string. It keeps track
26//! of a program counter (PC) and an index into the string (IX). Execution can have one or more
27//! threads.
28//!
29//! One of the basic instructions is `Lit`, which matches a string against the input. If it matches,
30//! the PC advances to the next instruction and the IX to the position after the matched string.
31//! If not, the current thread is stopped because it failed.
32//!
33//! If execution reaches an `End` instruction, the program is successful because a match was found.
34//! If there are no more threads to execute, the program has failed to match.
35//!
36//! A very simple program for the regex `a`:
37//!
38//! ```text
39//! 0: Lit("a")
40//! 1: End
41//! ```
42//!
43//! The `Split` instruction causes execution to split into two threads. The first thread is executed
44//! with the current string index. If it fails, we reset the string index and resume execution with
45//! the second thread. That is what "backtracking" refers to. In order to do that, we keep a stack
46//! of threads (PC and IX) to try.
47//!
48//! Example program for the regex `ab|ac`:
49//!
50//! ```text
51//! 0: Split(1, 4)
52//! 1: Lit("a")
53//! 2: Lit("b")
54//! 3: Jmp(6)
55//! 4: Lit("a")
56//! 5: Lit("c")
57//! 6: End
58//! ```
59//!
60//! The `Jmp` instruction causes execution to jump to the specified instruction. In the example it
61//! is needed to separate the two threads.
62//!
63//! Let's step through execution with that program for the input `ac`:
64//!
65//! 1. We're at PC 0 and IX 0
66//! 2. `Split(1, 4)` means we save a thread with PC 4 and IX 0 for trying later
67//! 3. Continue at `Lit("a")` which matches, so we advance IX to 1
68//! 4. `Lit("b")` doesn't match at IX 1 (`"b" != "c"`), so the thread fails
69//! 5. We continue with the previously saved thread at PC 4 and IX 0 (backtracking)
70//! 6. Both `Lit("a")` and `Lit("c")` match and we reach `End` -> successful match (index 0 to 2)
71
72use alloc::collections::BTreeSet;
73use alloc::string::String;
74use alloc::vec;
75use alloc::vec::Vec;
76use core::usize;
77use regex_automata::meta::Regex;
78use regex_automata::util::look::LookMatcher;
79use regex_automata::util::primitives::NonMaxUsize;
80use regex_automata::Anchored;
81use regex_automata::Input;
82
83use crate::error::RuntimeError;
84use crate::prev_codepoint_ix;
85use crate::Assertion;
86use crate::Error;
87use crate::Result;
88use crate::{codepoint_len, RegexOptions};
89
90/// Enable tracing of VM execution. Only for debugging/investigating.
91const OPTION_TRACE: u32 = 1 << 0;
92/// When iterating over all matches within a text (e.g. with `find_iter`), empty matches need to be
93/// handled specially. If we kept matching at the same position, we'd never stop. So what we do
94/// after we've had an empty match, is to advance the position where matching is attempted.
95/// If `\G` is used in the pattern, that means it no longer matches. If we didn't tell the VM about
96/// the fact that we skipped because of an empty match, it would still treat `\G` as matching. So
97/// this option is for communicating that to the VM. Phew.
98pub(crate) const OPTION_SKIPPED_EMPTY_MATCH: u32 = 1 << 1;
99
100// TODO: make configurable
101const MAX_STACK: usize = 1_000_000;
102
103/// Instruction of the VM.
104#[derive(Debug, Clone)]
105pub enum Insn {
106    /// Successful end of program
107    End,
108    /// Match any character (including newline)
109    Any,
110    /// Match any character (not including newline)
111    AnyNoNL,
112    /// Assertions
113    Assertion(Assertion),
114    /// Match the literal string at the current index
115    Lit(String), // should be cow?
116    /// Split execution into two threads. The two fields are positions of instructions. Execution
117    /// first tries the first thread. If that fails, the second position is tried.
118    Split(usize, usize),
119    /// Jump to instruction at position
120    Jmp(usize),
121    /// Save the current string index into the specified slot
122    Save(usize),
123    /// Save `0` into the specified slot
124    Save0(usize),
125    /// Set the string index to the value that was saved in the specified slot
126    Restore(usize),
127    /// Repeat greedily (match as much as possible)
128    RepeatGr {
129        /// Minimum number of matches
130        lo: usize,
131        /// Maximum number of matches
132        hi: usize,
133        /// The instruction after the repeat
134        next: usize,
135        /// The slot for keeping track of the number of repetitions
136        repeat: usize,
137    },
138    /// Repeat non-greedily (prefer matching as little as possible)
139    RepeatNg {
140        /// Minimum number of matches
141        lo: usize,
142        /// Maximum number of matches
143        hi: usize,
144        /// The instruction after the repeat
145        next: usize,
146        /// The slot for keeping track of the number of repetitions
147        repeat: usize,
148    },
149    /// Repeat greedily and prevent infinite loops from empty matches
150    RepeatEpsilonGr {
151        /// Minimum number of matches
152        lo: usize,
153        /// The instruction after the repeat
154        next: usize,
155        /// The slot for keeping track of the number of repetitions
156        repeat: usize,
157        /// The slot for saving the previous IX to check if we had an empty match
158        check: usize,
159    },
160    /// Repeat non-greedily and prevent infinite loops from empty matches
161    RepeatEpsilonNg {
162        /// Minimum number of matches
163        lo: usize,
164        /// The instruction after the repeat
165        next: usize,
166        /// The slot for keeping track of the number of repetitions
167        repeat: usize,
168        /// The slot for saving the previous IX to check if we had an empty match
169        check: usize,
170    },
171    /// Negative look-around failed
172    FailNegativeLookAround,
173    /// Set IX back by the specified number of characters
174    GoBack(usize),
175    /// Back reference to a group number to check
176    Backref(usize),
177    /// Begin of atomic group
178    BeginAtomic,
179    /// End of atomic group
180    EndAtomic,
181    /// Delegate matching to the regex crate
182    Delegate {
183        /// The regex
184        inner: Regex,
185        /// The first group number that this regex captures (if it contains groups)
186        start_group: usize,
187        /// The last group number
188        end_group: usize,
189    },
190    /// Anchor to match at the position where the previous match ended
191    ContinueFromPreviousMatchEnd,
192    /// Continue only if the specified capture group has already been populated as part of the match
193    BackrefExistsCondition(usize),
194}
195
196/// Sequence of instructions for the VM to execute.
197#[derive(Debug, Clone)]
198pub struct Prog {
199    /// Instructions of the program
200    pub body: Vec<Insn>,
201    n_saves: usize,
202}
203
204impl Prog {
205    pub(crate) fn new(body: Vec<Insn>, n_saves: usize) -> Prog {
206        Prog { body, n_saves }
207    }
208
209    #[doc(hidden)]
210    pub(crate) fn debug_print(&self) {
211        #[cfg(feature = "std")]
212        for (i, insn) in self.body.iter().enumerate() {
213            println!("{:3}: {:?}", i, insn);
214        }
215    }
216}
217
218#[derive(Debug)]
219struct Branch {
220    pc: usize,
221    ix: usize,
222    nsave: usize,
223}
224
225#[derive(Debug)]
226struct Save {
227    slot: usize,
228    value: usize,
229}
230
231struct State {
232    /// Saved values indexed by slot. Mostly indices to s, but can be repeat values etc.
233    /// Always contains the saves of the current state.
234    saves: Vec<usize>,
235    /// Stack of backtrack branches.
236    stack: Vec<Branch>,
237    /// Old saves (slot, value)
238    oldsave: Vec<Save>,
239    /// Number of saves at the end of `oldsave` that need to be restored to `saves` on pop
240    nsave: usize,
241    explicit_sp: usize,
242    /// Maximum size of the stack. If the size would be exceeded during execution, a `StackOverflow`
243    /// error is raised.
244    max_stack: usize,
245    #[allow(dead_code)]
246    options: u32,
247}
248
249// Each element in the stack conceptually represents the entire state
250// of the machine: the pc (index into prog), the index into the
251// string, and the entire vector of saves. However, copying the save
252// vector on every push/pop would be inefficient, so instead we use a
253// copy-on-write approach for each slot within the save vector. The
254// top `nsave` elements in `oldsave` represent the delta from the
255// current machine state to the top of stack.
256
257impl State {
258    fn new(n_saves: usize, max_stack: usize, options: u32) -> State {
259        State {
260            saves: vec![usize::MAX; n_saves],
261            stack: Vec::new(),
262            oldsave: Vec::new(),
263            nsave: 0,
264            explicit_sp: n_saves,
265            max_stack,
266            options,
267        }
268    }
269
270    // push a backtrack branch
271    fn push(&mut self, pc: usize, ix: usize) -> Result<()> {
272        if self.stack.len() < self.max_stack {
273            let nsave = self.nsave;
274            self.stack.push(Branch { pc, ix, nsave });
275            self.nsave = 0;
276            self.trace_stack("push");
277            Ok(())
278        } else {
279            Err(Error::RuntimeError(RuntimeError::StackOverflow))
280        }
281    }
282
283    // pop a backtrack branch
284    fn pop(&mut self) -> (usize, usize) {
285        for _ in 0..self.nsave {
286            let Save { slot, value } = self.oldsave.pop().unwrap();
287            self.saves[slot] = value;
288        }
289        let Branch { pc, ix, nsave } = self.stack.pop().unwrap();
290        self.nsave = nsave;
291        self.trace_stack("pop");
292        (pc, ix)
293    }
294
295    fn save(&mut self, slot: usize, val: usize) {
296        for i in 0..self.nsave {
297            // could avoid this iteration with some overhead; worth it?
298            if self.oldsave[self.oldsave.len() - i - 1].slot == slot {
299                // already saved, just update
300                self.saves[slot] = val;
301                return;
302            }
303        }
304        self.oldsave.push(Save {
305            slot,
306            value: self.saves[slot],
307        });
308        self.nsave += 1;
309        self.saves[slot] = val;
310
311        #[cfg(feature = "std")]
312        if self.options & OPTION_TRACE != 0 {
313            println!("saves: {:?}", self.saves);
314        }
315    }
316
317    fn get(&self, slot: usize) -> usize {
318        self.saves[slot]
319    }
320
321    // push a value onto the explicit stack; note: the entire contents of
322    // the explicit stack is saved and restored on backtrack.
323    fn stack_push(&mut self, val: usize) {
324        if self.saves.len() == self.explicit_sp {
325            self.saves.push(self.explicit_sp + 1);
326        }
327        let explicit_sp = self.explicit_sp;
328        let sp = self.get(explicit_sp);
329        if self.saves.len() == sp {
330            self.saves.push(val);
331        } else {
332            self.save(sp, val);
333        }
334        self.save(explicit_sp, sp + 1);
335    }
336
337    // pop a value from the explicit stack
338    fn stack_pop(&mut self) -> usize {
339        let explicit_sp = self.explicit_sp;
340        let sp = self.get(explicit_sp) - 1;
341        let result = self.get(sp);
342        self.save(explicit_sp, sp);
343        result
344    }
345
346    /// Get the current number of backtrack branches
347    fn backtrack_count(&self) -> usize {
348        self.stack.len()
349    }
350
351    /// Discard backtrack branches that were pushed since the call to `backtrack_count`.
352    ///
353    /// What we want:
354    /// * Keep the current `saves` as they are
355    /// * Only keep `count` backtrack branches on `stack`, discard the rest
356    /// * Keep the first `oldsave` for each slot, discard the rest (multiple pushes might have
357    ///   happened with saves to the same slot)
358    fn backtrack_cut(&mut self, count: usize) {
359        if self.stack.len() == count {
360            // no backtrack branches to discard, all good
361            return;
362        }
363        // start and end indexes of old saves for the branch we're cutting to
364        let (oldsave_start, oldsave_end) = {
365            let mut end = self.oldsave.len() - self.nsave;
366            for &Branch { nsave, .. } in &self.stack[count + 1..] {
367                end -= nsave;
368            }
369            let start = end - self.stack[count].nsave;
370            (start, end)
371        };
372        let mut saved = BTreeSet::new();
373        // keep all the old saves of our branch (they're all for different slots)
374        for &Save { slot, .. } in &self.oldsave[oldsave_start..oldsave_end] {
375            saved.insert(slot);
376        }
377        let mut oldsave_ix = oldsave_end;
378        // for other old saves, keep them only if they're for a slot that we haven't saved yet
379        for ix in oldsave_end..self.oldsave.len() {
380            let Save { slot, .. } = self.oldsave[ix];
381            let new_slot = saved.insert(slot);
382            if new_slot {
383                // put the save we want to keep (ix) after the ones we already have (oldsave_ix)
384                // note that it's fine if the indexes are the same (then swapping is a no-op)
385                self.oldsave.swap(oldsave_ix, ix);
386                oldsave_ix += 1;
387            }
388        }
389        self.stack.truncate(count);
390        self.oldsave.truncate(oldsave_ix);
391        self.nsave = oldsave_ix - oldsave_start;
392    }
393
394    #[inline]
395    #[allow(unused_variables)]
396    fn trace_stack(&self, operation: &str) {
397        #[cfg(feature = "std")]
398        if self.options & OPTION_TRACE != 0 {
399            println!("stack after {}: {:?}", operation, self.stack);
400        }
401    }
402}
403
404fn codepoint_len_at(s: &str, ix: usize) -> usize {
405    codepoint_len(s.as_bytes()[ix])
406}
407
408#[inline]
409fn matches_literal(s: &str, ix: usize, end: usize, literal: &str) -> bool {
410    // Compare as bytes because the literal might be a single byte char whereas ix
411    // points to a multibyte char. Comparing with str would result in an error like
412    // "byte index N is not a char boundary".
413    end <= s.len() && &s.as_bytes()[ix..end] == literal.as_bytes()
414}
415
416/// Run the program with trace printing for debugging.
417pub fn run_trace(prog: &Prog, s: &str, pos: usize) -> Result<Option<Vec<usize>>> {
418    run(prog, s, pos, OPTION_TRACE, &RegexOptions::default())
419}
420
421/// Run the program with default options.
422pub fn run_default(prog: &Prog, s: &str, pos: usize) -> Result<Option<Vec<usize>>> {
423    run(prog, s, pos, 0, &RegexOptions::default())
424}
425
426/// Run the program with options.
427#[allow(clippy::cognitive_complexity)]
428pub(crate) fn run(
429    prog: &Prog,
430    s: &str,
431    pos: usize,
432    option_flags: u32,
433    options: &RegexOptions,
434) -> Result<Option<Vec<usize>>> {
435    let mut state = State::new(prog.n_saves, MAX_STACK, option_flags);
436    let mut inner_slots: Vec<Option<NonMaxUsize>> = Vec::new();
437    let look_matcher = LookMatcher::new();
438    #[cfg(feature = "std")]
439    if option_flags & OPTION_TRACE != 0 {
440        println!("pos\tinstruction");
441    }
442    let mut backtrack_count = 0;
443    let mut pc = 0;
444    let mut ix = pos;
445    loop {
446        // break from this loop to fail, causes stack to pop
447        'fail: loop {
448            #[cfg(feature = "std")]
449            if option_flags & OPTION_TRACE != 0 {
450                println!("{}\t{} {:?}", ix, pc, prog.body[pc]);
451            }
452            match prog.body[pc] {
453                Insn::End => {
454                    // save of end position into slot 1 is now done
455                    // with an explicit group; we might want to
456                    // optimize that.
457                    //state.saves[1] = ix;
458                    #[cfg(feature = "std")]
459                    if option_flags & OPTION_TRACE != 0 {
460                        println!("saves: {:?}", state.saves);
461                    }
462                    if let Some(&slot1) = state.saves.get(1) {
463                        // With some features like keep out (\K), the match start can be after
464                        // the match end. Cap the start to <= end.
465                        if state.get(0) > slot1 {
466                            state.save(0, slot1);
467                        }
468                    }
469                    return Ok(Some(state.saves));
470                }
471                Insn::Any => {
472                    if ix < s.len() {
473                        ix += codepoint_len_at(s, ix);
474                    } else {
475                        break 'fail;
476                    }
477                }
478                Insn::AnyNoNL => {
479                    if ix < s.len() && s.as_bytes()[ix] != b'\n' {
480                        ix += codepoint_len_at(s, ix);
481                    } else {
482                        break 'fail;
483                    }
484                }
485                Insn::Lit(ref val) => {
486                    let ix_end = ix + val.len();
487                    if !matches_literal(s, ix, ix_end, val) {
488                        break 'fail;
489                    }
490                    ix = ix_end
491                }
492                Insn::Assertion(assertion) => {
493                    if !match assertion {
494                        Assertion::StartText => look_matcher.is_start(s.as_bytes(), ix),
495                        Assertion::EndText => look_matcher.is_end(s.as_bytes(), ix),
496                        Assertion::StartLine { crlf: false } => {
497                            look_matcher.is_start_lf(s.as_bytes(), ix)
498                        }
499                        Assertion::StartLine { crlf: true } => {
500                            look_matcher.is_start_crlf(s.as_bytes(), ix)
501                        }
502                        Assertion::EndLine { crlf: false } => {
503                            look_matcher.is_end_lf(s.as_bytes(), ix)
504                        }
505                        Assertion::EndLine { crlf: true } => {
506                            look_matcher.is_end_crlf(s.as_bytes(), ix)
507                        }
508                        Assertion::LeftWordBoundary => look_matcher
509                            .is_word_start_unicode(s.as_bytes(), ix)
510                            .unwrap(),
511                        Assertion::RightWordBoundary => {
512                            look_matcher.is_word_end_unicode(s.as_bytes(), ix).unwrap()
513                        }
514                        Assertion::WordBoundary => {
515                            look_matcher.is_word_unicode(s.as_bytes(), ix).unwrap()
516                        }
517                        Assertion::NotWordBoundary => look_matcher
518                            .is_word_unicode_negate(s.as_bytes(), ix)
519                            .unwrap(),
520                    } {
521                        break 'fail;
522                    }
523                }
524                Insn::Split(x, y) => {
525                    state.push(y, ix)?;
526                    pc = x;
527                    continue;
528                }
529                Insn::Jmp(target) => {
530                    pc = target;
531                    continue;
532                }
533                Insn::Save(slot) => state.save(slot, ix),
534                Insn::Save0(slot) => state.save(slot, 0),
535                Insn::Restore(slot) => ix = state.get(slot),
536                Insn::RepeatGr {
537                    lo,
538                    hi,
539                    next,
540                    repeat,
541                } => {
542                    let repcount = state.get(repeat);
543                    if repcount == hi {
544                        pc = next;
545                        continue;
546                    }
547                    state.save(repeat, repcount + 1);
548                    if repcount >= lo {
549                        state.push(next, ix)?;
550                    }
551                }
552                Insn::RepeatNg {
553                    lo,
554                    hi,
555                    next,
556                    repeat,
557                } => {
558                    let repcount = state.get(repeat);
559                    if repcount == hi {
560                        pc = next;
561                        continue;
562                    }
563                    state.save(repeat, repcount + 1);
564                    if repcount >= lo {
565                        state.push(pc + 1, ix)?;
566                        pc = next;
567                        continue;
568                    }
569                }
570                Insn::RepeatEpsilonGr {
571                    lo,
572                    next,
573                    repeat,
574                    check,
575                } => {
576                    let repcount = state.get(repeat);
577                    if repcount > lo && state.get(check) == ix {
578                        // prevent zero-length match on repeat
579                        break 'fail;
580                    }
581                    state.save(repeat, repcount + 1);
582                    if repcount >= lo {
583                        state.save(check, ix);
584                        state.push(next, ix)?;
585                    }
586                }
587                Insn::RepeatEpsilonNg {
588                    lo,
589                    next,
590                    repeat,
591                    check,
592                } => {
593                    let repcount = state.get(repeat);
594                    if repcount > lo && state.get(check) == ix {
595                        // prevent zero-length match on repeat
596                        break 'fail;
597                    }
598                    state.save(repeat, repcount + 1);
599                    if repcount >= lo {
600                        state.save(check, ix);
601                        state.push(pc + 1, ix)?;
602                        pc = next;
603                        continue;
604                    }
605                }
606                Insn::GoBack(count) => {
607                    for _ in 0..count {
608                        if ix == 0 {
609                            break 'fail;
610                        }
611                        ix = prev_codepoint_ix(s, ix);
612                    }
613                }
614                Insn::FailNegativeLookAround => {
615                    // Reaching this instruction means that the body of the
616                    // look-around matched. Because it's a *negative* look-around,
617                    // that means the look-around itself should fail (not match).
618                    // But before, we need to discard all the states that have
619                    // been pushed with the look-around, because we don't want to
620                    // explore them.
621                    loop {
622                        let (popped_pc, _) = state.pop();
623                        if popped_pc == pc + 1 {
624                            // We've reached the state that would jump us to
625                            // after the look-around (in case the look-around
626                            // succeeded). That means we popped enough states.
627                            break;
628                        }
629                    }
630                    break 'fail;
631                }
632                Insn::Backref(slot) => {
633                    let lo = state.get(slot);
634                    if lo == usize::MAX {
635                        // Referenced group hasn't matched, so the backref doesn't match either
636                        break 'fail;
637                    }
638                    let hi = state.get(slot + 1);
639                    if hi == usize::MAX {
640                        // Referenced group hasn't matched, so the backref doesn't match either
641                        break 'fail;
642                    }
643                    let ref_text = &s[lo..hi];
644                    let ix_end = ix + ref_text.len();
645                    if !matches_literal(s, ix, ix_end, ref_text) {
646                        break 'fail;
647                    }
648                    ix = ix_end;
649                }
650                Insn::BackrefExistsCondition(group) => {
651                    let lo = state.get(group * 2);
652                    if lo == usize::MAX {
653                        // Referenced group hasn't matched, so the backref doesn't match either
654                        break 'fail;
655                    }
656                }
657                Insn::BeginAtomic => {
658                    let count = state.backtrack_count();
659                    state.stack_push(count);
660                }
661                Insn::EndAtomic => {
662                    let count = state.stack_pop();
663                    state.backtrack_cut(count);
664                }
665                Insn::Delegate {
666                    ref inner,
667                    start_group,
668                    end_group,
669                } => {
670                    let input = Input::new(s).span(ix..s.len()).anchored(Anchored::Yes);
671                    if start_group == end_group {
672                        // No groups, so we can use faster methods
673                        match inner.search_half(&input) {
674                            Some(m) => ix = m.offset(),
675                            _ => break 'fail,
676                        }
677                    } else {
678                        inner_slots.resize((end_group - start_group + 1) * 2, None);
679                        if inner.search_slots(&input, &mut inner_slots).is_some() {
680                            for i in 0..(end_group - start_group) {
681                                let slot = (start_group + i) * 2;
682                                if let Some(start) = inner_slots[(i + 1) * 2] {
683                                    let end = inner_slots[(i + 1) * 2 + 1].unwrap();
684                                    state.save(slot, start.get());
685                                    state.save(slot + 1, end.get());
686                                } else {
687                                    state.save(slot, usize::MAX);
688                                    state.save(slot + 1, usize::MAX);
689                                }
690                            }
691                            ix = inner_slots[1].unwrap().get();
692                        } else {
693                            break 'fail;
694                        }
695                    }
696                }
697                Insn::ContinueFromPreviousMatchEnd => {
698                    if ix > pos || option_flags & OPTION_SKIPPED_EMPTY_MATCH != 0 {
699                        break 'fail;
700                    }
701                }
702            }
703            pc += 1;
704        }
705        #[cfg(feature = "std")]
706        if option_flags & OPTION_TRACE != 0 {
707            println!("fail");
708        }
709        // "break 'fail" goes here
710        if state.stack.is_empty() {
711            return Ok(None);
712        }
713
714        backtrack_count += 1;
715        if backtrack_count > options.backtrack_limit {
716            return Err(Error::RuntimeError(RuntimeError::BacktrackLimitExceeded));
717        }
718
719        let (newpc, newix) = state.pop();
720        pc = newpc;
721        ix = newix;
722    }
723}
724
725#[cfg(test)]
726mod tests {
727    use super::*;
728    use quickcheck::{quickcheck, Arbitrary, Gen};
729
730    #[test]
731    fn state_push_pop() {
732        let mut state = State::new(1, MAX_STACK, 0);
733
734        state.push(0, 0).unwrap();
735        state.push(1, 1).unwrap();
736        assert_eq!(state.pop(), (1, 1));
737        assert_eq!(state.pop(), (0, 0));
738        assert!(state.stack.is_empty());
739
740        state.push(2, 2).unwrap();
741        assert_eq!(state.pop(), (2, 2));
742        assert!(state.stack.is_empty());
743    }
744
745    #[test]
746    fn state_save_override() {
747        let mut state = State::new(1, MAX_STACK, 0);
748        state.save(0, 10);
749        state.push(0, 0).unwrap();
750        state.save(0, 20);
751        assert_eq!(state.pop(), (0, 0));
752        assert_eq!(state.get(0), 10);
753    }
754
755    #[test]
756    fn state_save_override_twice() {
757        let mut state = State::new(1, MAX_STACK, 0);
758        state.save(0, 10);
759        state.push(0, 0).unwrap();
760        state.save(0, 20);
761        state.push(1, 1).unwrap();
762        state.save(0, 30);
763
764        assert_eq!(state.get(0), 30);
765        assert_eq!(state.pop(), (1, 1));
766        assert_eq!(state.get(0), 20);
767        assert_eq!(state.pop(), (0, 0));
768        assert_eq!(state.get(0), 10);
769    }
770
771    #[test]
772    fn state_explicit_stack() {
773        let mut state = State::new(1, MAX_STACK, 0);
774        state.stack_push(11);
775        state.stack_push(12);
776
777        state.push(100, 101).unwrap();
778        state.stack_push(13);
779        assert_eq!(state.stack_pop(), 13);
780        state.stack_push(14);
781        assert_eq!(state.pop(), (100, 101));
782
783        // Note: 14 is not there because it was pushed as part of the backtrack branch
784        assert_eq!(state.stack_pop(), 12);
785        assert_eq!(state.stack_pop(), 11);
786    }
787
788    #[test]
789    fn state_backtrack_cut_simple() {
790        let mut state = State::new(2, MAX_STACK, 0);
791        state.save(0, 1);
792        state.save(1, 2);
793
794        let count = state.backtrack_count();
795
796        state.push(0, 0).unwrap();
797        state.save(0, 3);
798        assert_eq!(state.backtrack_count(), 1);
799
800        state.backtrack_cut(count);
801        assert_eq!(state.backtrack_count(), 0);
802        assert_eq!(state.get(0), 3);
803        assert_eq!(state.get(1), 2);
804    }
805
806    #[test]
807    fn state_backtrack_cut_complex() {
808        let mut state = State::new(2, MAX_STACK, 0);
809        state.save(0, 1);
810        state.save(1, 2);
811
812        state.push(0, 0).unwrap();
813        state.save(0, 3);
814
815        let count = state.backtrack_count();
816
817        state.push(1, 1).unwrap();
818        state.save(0, 4);
819        state.push(2, 2).unwrap();
820        state.save(1, 5);
821        assert_eq!(state.backtrack_count(), 3);
822
823        state.backtrack_cut(count);
824        assert_eq!(state.backtrack_count(), 1);
825        assert_eq!(state.get(0), 4);
826        assert_eq!(state.get(1), 5);
827
828        state.pop();
829        assert_eq!(state.backtrack_count(), 0);
830        // Check that oldsave were set correctly
831        assert_eq!(state.get(0), 1);
832        assert_eq!(state.get(1), 2);
833    }
834
835    #[derive(Clone, Debug)]
836    enum Operation {
837        Push,
838        Pop,
839        Save(usize, usize),
840    }
841
842    impl Arbitrary for Operation {
843        fn arbitrary(g: &mut Gen) -> Self {
844            match g.choose(&[0, 1, 2]) {
845                Some(0) => Operation::Push,
846                Some(1) => Operation::Pop,
847                _ => Operation::Save(
848                    *g.choose(&[0usize, 1, 2, 3, 4]).unwrap(),
849                    usize::arbitrary(g),
850                ),
851            }
852        }
853    }
854
855    fn check_saves_for_operations(operations: Vec<Operation>) -> bool {
856        let slots = operations
857            .iter()
858            .map(|o| match o {
859                &Operation::Save(slot, _) => slot + 1,
860                _ => 0,
861            })
862            .max()
863            .unwrap_or(0);
864        if slots == 0 {
865            // No point checking if there's no save instructions
866            return true;
867        }
868
869        // Stack with the complete VM state (including saves)
870        let mut stack = Vec::new();
871        let mut saves = vec![usize::MAX; slots];
872
873        let mut state = State::new(slots, MAX_STACK, 0);
874
875        let mut expected = Vec::new();
876        let mut actual = Vec::new();
877
878        for operation in operations {
879            match operation {
880                Operation::Push => {
881                    // We're not checking pc and ix later, so don't bother
882                    // putting in random values.
883                    stack.push((0, 0, saves.clone()));
884                    state.push(0, 0).unwrap();
885                }
886                Operation::Pop => {
887                    // Note that because we generate the operations randomly
888                    // there might be more pops than pushes. So ignore a pop
889                    // if the stack was empty.
890                    if let Some((_, _, previous_saves)) = stack.pop() {
891                        saves = previous_saves;
892                        state.pop();
893                    }
894                }
895                Operation::Save(slot, value) => {
896                    saves[slot] = value;
897                    state.save(slot, value);
898                }
899            }
900
901            // Remember state of saves for checking later
902            expected.push(saves.clone());
903            let mut actual_saves = vec![usize::MAX; slots];
904            for i in 0..slots {
905                actual_saves[i] = state.get(i);
906            }
907            actual.push(actual_saves);
908        }
909
910        expected == actual
911    }
912
913    quickcheck! {
914        fn state_save_quickcheck(operations: Vec<Operation>) -> bool {
915            check_saves_for_operations(operations)
916        }
917    }
918}
fancy_regex/vm.rs

fancy_regex/
vm.rs