fancy_regex/vm.rs
1// Copyright 2016 The Fancy Regex Authors.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Backtracking VM for implementing fancy regexes.
22//!
23//! Read <https://swtch.com/~rsc/regexp/regexp2.html> for a good introduction for how this works.
24//!
25//! The VM executes a sequence of instructions (a program) against an input string. It keeps track
26//! of a program counter (PC) and an index into the string (IX). Execution can have one or more
27//! threads.
28//!
29//! One of the basic instructions is `Lit`, which matches a string against the input. If it matches,
30//! the PC advances to the next instruction and the IX to the position after the matched string.
31//! If not, the current thread is stopped because it failed.
32//!
33//! If execution reaches an `End` instruction, the program is successful because a match was found.
34//! If there are no more threads to execute, the program has failed to match.
35//!
36//! A very simple program for the regex `a`:
37//!
38//! ```text
39//! 0: Lit("a")
40//! 1: End
41//! ```
42//!
43//! The `Split` instruction causes execution to split into two threads. The first thread is executed
44//! with the current string index. If it fails, we reset the string index and resume execution with
45//! the second thread. That is what "backtracking" refers to. In order to do that, we keep a stack
46//! of threads (PC and IX) to try.
47//!
48//! Example program for the regex `ab|ac`:
49//!
50//! ```text
51//! 0: Split(1, 4)
52//! 1: Lit("a")
53//! 2: Lit("b")
54//! 3: Jmp(6)
55//! 4: Lit("a")
56//! 5: Lit("c")
57//! 6: End
58//! ```
59//!
60//! The `Jmp` instruction causes execution to jump to the specified instruction. In the example it
61//! is needed to separate the two threads.
62//!
63//! Let's step through execution with that program for the input `ac`:
64//!
65//! 1. We're at PC 0 and IX 0
66//! 2. `Split(1, 4)` means we save a thread with PC 4 and IX 0 for trying later
67//! 3. Continue at `Lit("a")` which matches, so we advance IX to 1
68//! 4. `Lit("b")` doesn't match at IX 1 (`"b" != "c"`), so the thread fails
69//! 5. We continue with the previously saved thread at PC 4 and IX 0 (backtracking)
70//! 6. Both `Lit("a")` and `Lit("c")` match and we reach `End` -> successful match (index 0 to 2)
71
72use alloc::collections::BTreeSet;
73use alloc::string::String;
74use alloc::vec;
75use alloc::vec::Vec;
76use core::usize;
77use regex_automata::meta::Regex;
78use regex_automata::util::look::LookMatcher;
79use regex_automata::util::primitives::NonMaxUsize;
80use regex_automata::Anchored;
81use regex_automata::Input;
82
83use crate::error::RuntimeError;
84use crate::prev_codepoint_ix;
85use crate::Assertion;
86use crate::Error;
87use crate::Result;
88use crate::{codepoint_len, RegexOptions};
89
90/// Enable tracing of VM execution. Only for debugging/investigating.
91const OPTION_TRACE: u32 = 1 << 0;
92/// When iterating over all matches within a text (e.g. with `find_iter`), empty matches need to be
93/// handled specially. If we kept matching at the same position, we'd never stop. So what we do
94/// after we've had an empty match, is to advance the position where matching is attempted.
95/// If `\G` is used in the pattern, that means it no longer matches. If we didn't tell the VM about
96/// the fact that we skipped because of an empty match, it would still treat `\G` as matching. So
97/// this option is for communicating that to the VM. Phew.
98pub(crate) const OPTION_SKIPPED_EMPTY_MATCH: u32 = 1 << 1;
99
100// TODO: make configurable
101const MAX_STACK: usize = 1_000_000;
102
103/// Instruction of the VM.
104#[derive(Debug, Clone)]
105pub enum Insn {
106 /// Successful end of program
107 End,
108 /// Match any character (including newline)
109 Any,
110 /// Match any character (not including newline)
111 AnyNoNL,
112 /// Assertions
113 Assertion(Assertion),
114 /// Match the literal string at the current index
115 Lit(String), // should be cow?
116 /// Split execution into two threads. The two fields are positions of instructions. Execution
117 /// first tries the first thread. If that fails, the second position is tried.
118 Split(usize, usize),
119 /// Jump to instruction at position
120 Jmp(usize),
121 /// Save the current string index into the specified slot
122 Save(usize),
123 /// Save `0` into the specified slot
124 Save0(usize),
125 /// Set the string index to the value that was saved in the specified slot
126 Restore(usize),
127 /// Repeat greedily (match as much as possible)
128 RepeatGr {
129 /// Minimum number of matches
130 lo: usize,
131 /// Maximum number of matches
132 hi: usize,
133 /// The instruction after the repeat
134 next: usize,
135 /// The slot for keeping track of the number of repetitions
136 repeat: usize,
137 },
138 /// Repeat non-greedily (prefer matching as little as possible)
139 RepeatNg {
140 /// Minimum number of matches
141 lo: usize,
142 /// Maximum number of matches
143 hi: usize,
144 /// The instruction after the repeat
145 next: usize,
146 /// The slot for keeping track of the number of repetitions
147 repeat: usize,
148 },
149 /// Repeat greedily and prevent infinite loops from empty matches
150 RepeatEpsilonGr {
151 /// Minimum number of matches
152 lo: usize,
153 /// The instruction after the repeat
154 next: usize,
155 /// The slot for keeping track of the number of repetitions
156 repeat: usize,
157 /// The slot for saving the previous IX to check if we had an empty match
158 check: usize,
159 },
160 /// Repeat non-greedily and prevent infinite loops from empty matches
161 RepeatEpsilonNg {
162 /// Minimum number of matches
163 lo: usize,
164 /// The instruction after the repeat
165 next: usize,
166 /// The slot for keeping track of the number of repetitions
167 repeat: usize,
168 /// The slot for saving the previous IX to check if we had an empty match
169 check: usize,
170 },
171 /// Negative look-around failed
172 FailNegativeLookAround,
173 /// Set IX back by the specified number of characters
174 GoBack(usize),
175 /// Back reference to a group number to check
176 Backref(usize),
177 /// Begin of atomic group
178 BeginAtomic,
179 /// End of atomic group
180 EndAtomic,
181 /// Delegate matching to the regex crate
182 Delegate {
183 /// The regex
184 inner: Regex,
185 /// The first group number that this regex captures (if it contains groups)
186 start_group: usize,
187 /// The last group number
188 end_group: usize,
189 },
190 /// Anchor to match at the position where the previous match ended
191 ContinueFromPreviousMatchEnd,
192 /// Continue only if the specified capture group has already been populated as part of the match
193 BackrefExistsCondition(usize),
194}
195
196/// Sequence of instructions for the VM to execute.
197#[derive(Debug, Clone)]
198pub struct Prog {
199 /// Instructions of the program
200 pub body: Vec<Insn>,
201 n_saves: usize,
202}
203
204impl Prog {
205 pub(crate) fn new(body: Vec<Insn>, n_saves: usize) -> Prog {
206 Prog { body, n_saves }
207 }
208
209 #[doc(hidden)]
210 pub(crate) fn debug_print(&self) {
211 #[cfg(feature = "std")]
212 for (i, insn) in self.body.iter().enumerate() {
213 println!("{:3}: {:?}", i, insn);
214 }
215 }
216}
217
218#[derive(Debug)]
219struct Branch {
220 pc: usize,
221 ix: usize,
222 nsave: usize,
223}
224
225#[derive(Debug)]
226struct Save {
227 slot: usize,
228 value: usize,
229}
230
231struct State {
232 /// Saved values indexed by slot. Mostly indices to s, but can be repeat values etc.
233 /// Always contains the saves of the current state.
234 saves: Vec<usize>,
235 /// Stack of backtrack branches.
236 stack: Vec<Branch>,
237 /// Old saves (slot, value)
238 oldsave: Vec<Save>,
239 /// Number of saves at the end of `oldsave` that need to be restored to `saves` on pop
240 nsave: usize,
241 explicit_sp: usize,
242 /// Maximum size of the stack. If the size would be exceeded during execution, a `StackOverflow`
243 /// error is raised.
244 max_stack: usize,
245 #[allow(dead_code)]
246 options: u32,
247}
248
249// Each element in the stack conceptually represents the entire state
250// of the machine: the pc (index into prog), the index into the
251// string, and the entire vector of saves. However, copying the save
252// vector on every push/pop would be inefficient, so instead we use a
253// copy-on-write approach for each slot within the save vector. The
254// top `nsave` elements in `oldsave` represent the delta from the
255// current machine state to the top of stack.
256
257impl State {
258 fn new(n_saves: usize, max_stack: usize, options: u32) -> State {
259 State {
260 saves: vec![usize::MAX; n_saves],
261 stack: Vec::new(),
262 oldsave: Vec::new(),
263 nsave: 0,
264 explicit_sp: n_saves,
265 max_stack,
266 options,
267 }
268 }
269
270 // push a backtrack branch
271 fn push(&mut self, pc: usize, ix: usize) -> Result<()> {
272 if self.stack.len() < self.max_stack {
273 let nsave = self.nsave;
274 self.stack.push(Branch { pc, ix, nsave });
275 self.nsave = 0;
276 self.trace_stack("push");
277 Ok(())
278 } else {
279 Err(Error::RuntimeError(RuntimeError::StackOverflow))
280 }
281 }
282
283 // pop a backtrack branch
284 fn pop(&mut self) -> (usize, usize) {
285 for _ in 0..self.nsave {
286 let Save { slot, value } = self.oldsave.pop().unwrap();
287 self.saves[slot] = value;
288 }
289 let Branch { pc, ix, nsave } = self.stack.pop().unwrap();
290 self.nsave = nsave;
291 self.trace_stack("pop");
292 (pc, ix)
293 }
294
295 fn save(&mut self, slot: usize, val: usize) {
296 for i in 0..self.nsave {
297 // could avoid this iteration with some overhead; worth it?
298 if self.oldsave[self.oldsave.len() - i - 1].slot == slot {
299 // already saved, just update
300 self.saves[slot] = val;
301 return;
302 }
303 }
304 self.oldsave.push(Save {
305 slot,
306 value: self.saves[slot],
307 });
308 self.nsave += 1;
309 self.saves[slot] = val;
310
311 #[cfg(feature = "std")]
312 if self.options & OPTION_TRACE != 0 {
313 println!("saves: {:?}", self.saves);
314 }
315 }
316
317 fn get(&self, slot: usize) -> usize {
318 self.saves[slot]
319 }
320
321 // push a value onto the explicit stack; note: the entire contents of
322 // the explicit stack is saved and restored on backtrack.
323 fn stack_push(&mut self, val: usize) {
324 if self.saves.len() == self.explicit_sp {
325 self.saves.push(self.explicit_sp + 1);
326 }
327 let explicit_sp = self.explicit_sp;
328 let sp = self.get(explicit_sp);
329 if self.saves.len() == sp {
330 self.saves.push(val);
331 } else {
332 self.save(sp, val);
333 }
334 self.save(explicit_sp, sp + 1);
335 }
336
337 // pop a value from the explicit stack
338 fn stack_pop(&mut self) -> usize {
339 let explicit_sp = self.explicit_sp;
340 let sp = self.get(explicit_sp) - 1;
341 let result = self.get(sp);
342 self.save(explicit_sp, sp);
343 result
344 }
345
346 /// Get the current number of backtrack branches
347 fn backtrack_count(&self) -> usize {
348 self.stack.len()
349 }
350
351 /// Discard backtrack branches that were pushed since the call to `backtrack_count`.
352 ///
353 /// What we want:
354 /// * Keep the current `saves` as they are
355 /// * Only keep `count` backtrack branches on `stack`, discard the rest
356 /// * Keep the first `oldsave` for each slot, discard the rest (multiple pushes might have
357 /// happened with saves to the same slot)
358 fn backtrack_cut(&mut self, count: usize) {
359 if self.stack.len() == count {
360 // no backtrack branches to discard, all good
361 return;
362 }
363 // start and end indexes of old saves for the branch we're cutting to
364 let (oldsave_start, oldsave_end) = {
365 let mut end = self.oldsave.len() - self.nsave;
366 for &Branch { nsave, .. } in &self.stack[count + 1..] {
367 end -= nsave;
368 }
369 let start = end - self.stack[count].nsave;
370 (start, end)
371 };
372 let mut saved = BTreeSet::new();
373 // keep all the old saves of our branch (they're all for different slots)
374 for &Save { slot, .. } in &self.oldsave[oldsave_start..oldsave_end] {
375 saved.insert(slot);
376 }
377 let mut oldsave_ix = oldsave_end;
378 // for other old saves, keep them only if they're for a slot that we haven't saved yet
379 for ix in oldsave_end..self.oldsave.len() {
380 let Save { slot, .. } = self.oldsave[ix];
381 let new_slot = saved.insert(slot);
382 if new_slot {
383 // put the save we want to keep (ix) after the ones we already have (oldsave_ix)
384 // note that it's fine if the indexes are the same (then swapping is a no-op)
385 self.oldsave.swap(oldsave_ix, ix);
386 oldsave_ix += 1;
387 }
388 }
389 self.stack.truncate(count);
390 self.oldsave.truncate(oldsave_ix);
391 self.nsave = oldsave_ix - oldsave_start;
392 }
393
394 #[inline]
395 #[allow(unused_variables)]
396 fn trace_stack(&self, operation: &str) {
397 #[cfg(feature = "std")]
398 if self.options & OPTION_TRACE != 0 {
399 println!("stack after {}: {:?}", operation, self.stack);
400 }
401 }
402}
403
404fn codepoint_len_at(s: &str, ix: usize) -> usize {
405 codepoint_len(s.as_bytes()[ix])
406}
407
408#[inline]
409fn matches_literal(s: &str, ix: usize, end: usize, literal: &str) -> bool {
410 // Compare as bytes because the literal might be a single byte char whereas ix
411 // points to a multibyte char. Comparing with str would result in an error like
412 // "byte index N is not a char boundary".
413 end <= s.len() && &s.as_bytes()[ix..end] == literal.as_bytes()
414}
415
416/// Run the program with trace printing for debugging.
417pub fn run_trace(prog: &Prog, s: &str, pos: usize) -> Result<Option<Vec<usize>>> {
418 run(prog, s, pos, OPTION_TRACE, &RegexOptions::default())
419}
420
421/// Run the program with default options.
422pub fn run_default(prog: &Prog, s: &str, pos: usize) -> Result<Option<Vec<usize>>> {
423 run(prog, s, pos, 0, &RegexOptions::default())
424}
425
426/// Run the program with options.
427#[allow(clippy::cognitive_complexity)]
428pub(crate) fn run(
429 prog: &Prog,
430 s: &str,
431 pos: usize,
432 option_flags: u32,
433 options: &RegexOptions,
434) -> Result<Option<Vec<usize>>> {
435 let mut state = State::new(prog.n_saves, MAX_STACK, option_flags);
436 let mut inner_slots: Vec<Option<NonMaxUsize>> = Vec::new();
437 let look_matcher = LookMatcher::new();
438 #[cfg(feature = "std")]
439 if option_flags & OPTION_TRACE != 0 {
440 println!("pos\tinstruction");
441 }
442 let mut backtrack_count = 0;
443 let mut pc = 0;
444 let mut ix = pos;
445 loop {
446 // break from this loop to fail, causes stack to pop
447 'fail: loop {
448 #[cfg(feature = "std")]
449 if option_flags & OPTION_TRACE != 0 {
450 println!("{}\t{} {:?}", ix, pc, prog.body[pc]);
451 }
452 match prog.body[pc] {
453 Insn::End => {
454 // save of end position into slot 1 is now done
455 // with an explicit group; we might want to
456 // optimize that.
457 //state.saves[1] = ix;
458 #[cfg(feature = "std")]
459 if option_flags & OPTION_TRACE != 0 {
460 println!("saves: {:?}", state.saves);
461 }
462 if let Some(&slot1) = state.saves.get(1) {
463 // With some features like keep out (\K), the match start can be after
464 // the match end. Cap the start to <= end.
465 if state.get(0) > slot1 {
466 state.save(0, slot1);
467 }
468 }
469 return Ok(Some(state.saves));
470 }
471 Insn::Any => {
472 if ix < s.len() {
473 ix += codepoint_len_at(s, ix);
474 } else {
475 break 'fail;
476 }
477 }
478 Insn::AnyNoNL => {
479 if ix < s.len() && s.as_bytes()[ix] != b'\n' {
480 ix += codepoint_len_at(s, ix);
481 } else {
482 break 'fail;
483 }
484 }
485 Insn::Lit(ref val) => {
486 let ix_end = ix + val.len();
487 if !matches_literal(s, ix, ix_end, val) {
488 break 'fail;
489 }
490 ix = ix_end
491 }
492 Insn::Assertion(assertion) => {
493 if !match assertion {
494 Assertion::StartText => look_matcher.is_start(s.as_bytes(), ix),
495 Assertion::EndText => look_matcher.is_end(s.as_bytes(), ix),
496 Assertion::StartLine { crlf: false } => {
497 look_matcher.is_start_lf(s.as_bytes(), ix)
498 }
499 Assertion::StartLine { crlf: true } => {
500 look_matcher.is_start_crlf(s.as_bytes(), ix)
501 }
502 Assertion::EndLine { crlf: false } => {
503 look_matcher.is_end_lf(s.as_bytes(), ix)
504 }
505 Assertion::EndLine { crlf: true } => {
506 look_matcher.is_end_crlf(s.as_bytes(), ix)
507 }
508 Assertion::LeftWordBoundary => look_matcher
509 .is_word_start_unicode(s.as_bytes(), ix)
510 .unwrap(),
511 Assertion::RightWordBoundary => {
512 look_matcher.is_word_end_unicode(s.as_bytes(), ix).unwrap()
513 }
514 Assertion::WordBoundary => {
515 look_matcher.is_word_unicode(s.as_bytes(), ix).unwrap()
516 }
517 Assertion::NotWordBoundary => look_matcher
518 .is_word_unicode_negate(s.as_bytes(), ix)
519 .unwrap(),
520 } {
521 break 'fail;
522 }
523 }
524 Insn::Split(x, y) => {
525 state.push(y, ix)?;
526 pc = x;
527 continue;
528 }
529 Insn::Jmp(target) => {
530 pc = target;
531 continue;
532 }
533 Insn::Save(slot) => state.save(slot, ix),
534 Insn::Save0(slot) => state.save(slot, 0),
535 Insn::Restore(slot) => ix = state.get(slot),
536 Insn::RepeatGr {
537 lo,
538 hi,
539 next,
540 repeat,
541 } => {
542 let repcount = state.get(repeat);
543 if repcount == hi {
544 pc = next;
545 continue;
546 }
547 state.save(repeat, repcount + 1);
548 if repcount >= lo {
549 state.push(next, ix)?;
550 }
551 }
552 Insn::RepeatNg {
553 lo,
554 hi,
555 next,
556 repeat,
557 } => {
558 let repcount = state.get(repeat);
559 if repcount == hi {
560 pc = next;
561 continue;
562 }
563 state.save(repeat, repcount + 1);
564 if repcount >= lo {
565 state.push(pc + 1, ix)?;
566 pc = next;
567 continue;
568 }
569 }
570 Insn::RepeatEpsilonGr {
571 lo,
572 next,
573 repeat,
574 check,
575 } => {
576 let repcount = state.get(repeat);
577 if repcount > lo && state.get(check) == ix {
578 // prevent zero-length match on repeat
579 break 'fail;
580 }
581 state.save(repeat, repcount + 1);
582 if repcount >= lo {
583 state.save(check, ix);
584 state.push(next, ix)?;
585 }
586 }
587 Insn::RepeatEpsilonNg {
588 lo,
589 next,
590 repeat,
591 check,
592 } => {
593 let repcount = state.get(repeat);
594 if repcount > lo && state.get(check) == ix {
595 // prevent zero-length match on repeat
596 break 'fail;
597 }
598 state.save(repeat, repcount + 1);
599 if repcount >= lo {
600 state.save(check, ix);
601 state.push(pc + 1, ix)?;
602 pc = next;
603 continue;
604 }
605 }
606 Insn::GoBack(count) => {
607 for _ in 0..count {
608 if ix == 0 {
609 break 'fail;
610 }
611 ix = prev_codepoint_ix(s, ix);
612 }
613 }
614 Insn::FailNegativeLookAround => {
615 // Reaching this instruction means that the body of the
616 // look-around matched. Because it's a *negative* look-around,
617 // that means the look-around itself should fail (not match).
618 // But before, we need to discard all the states that have
619 // been pushed with the look-around, because we don't want to
620 // explore them.
621 loop {
622 let (popped_pc, _) = state.pop();
623 if popped_pc == pc + 1 {
624 // We've reached the state that would jump us to
625 // after the look-around (in case the look-around
626 // succeeded). That means we popped enough states.
627 break;
628 }
629 }
630 break 'fail;
631 }
632 Insn::Backref(slot) => {
633 let lo = state.get(slot);
634 if lo == usize::MAX {
635 // Referenced group hasn't matched, so the backref doesn't match either
636 break 'fail;
637 }
638 let hi = state.get(slot + 1);
639 if hi == usize::MAX {
640 // Referenced group hasn't matched, so the backref doesn't match either
641 break 'fail;
642 }
643 let ref_text = &s[lo..hi];
644 let ix_end = ix + ref_text.len();
645 if !matches_literal(s, ix, ix_end, ref_text) {
646 break 'fail;
647 }
648 ix = ix_end;
649 }
650 Insn::BackrefExistsCondition(group) => {
651 let lo = state.get(group * 2);
652 if lo == usize::MAX {
653 // Referenced group hasn't matched, so the backref doesn't match either
654 break 'fail;
655 }
656 }
657 Insn::BeginAtomic => {
658 let count = state.backtrack_count();
659 state.stack_push(count);
660 }
661 Insn::EndAtomic => {
662 let count = state.stack_pop();
663 state.backtrack_cut(count);
664 }
665 Insn::Delegate {
666 ref inner,
667 start_group,
668 end_group,
669 } => {
670 let input = Input::new(s).span(ix..s.len()).anchored(Anchored::Yes);
671 if start_group == end_group {
672 // No groups, so we can use faster methods
673 match inner.search_half(&input) {
674 Some(m) => ix = m.offset(),
675 _ => break 'fail,
676 }
677 } else {
678 inner_slots.resize((end_group - start_group + 1) * 2, None);
679 if inner.search_slots(&input, &mut inner_slots).is_some() {
680 for i in 0..(end_group - start_group) {
681 let slot = (start_group + i) * 2;
682 if let Some(start) = inner_slots[(i + 1) * 2] {
683 let end = inner_slots[(i + 1) * 2 + 1].unwrap();
684 state.save(slot, start.get());
685 state.save(slot + 1, end.get());
686 } else {
687 state.save(slot, usize::MAX);
688 state.save(slot + 1, usize::MAX);
689 }
690 }
691 ix = inner_slots[1].unwrap().get();
692 } else {
693 break 'fail;
694 }
695 }
696 }
697 Insn::ContinueFromPreviousMatchEnd => {
698 if ix > pos || option_flags & OPTION_SKIPPED_EMPTY_MATCH != 0 {
699 break 'fail;
700 }
701 }
702 }
703 pc += 1;
704 }
705 #[cfg(feature = "std")]
706 if option_flags & OPTION_TRACE != 0 {
707 println!("fail");
708 }
709 // "break 'fail" goes here
710 if state.stack.is_empty() {
711 return Ok(None);
712 }
713
714 backtrack_count += 1;
715 if backtrack_count > options.backtrack_limit {
716 return Err(Error::RuntimeError(RuntimeError::BacktrackLimitExceeded));
717 }
718
719 let (newpc, newix) = state.pop();
720 pc = newpc;
721 ix = newix;
722 }
723}
724
725#[cfg(test)]
726mod tests {
727 use super::*;
728 use quickcheck::{quickcheck, Arbitrary, Gen};
729
730 #[test]
731 fn state_push_pop() {
732 let mut state = State::new(1, MAX_STACK, 0);
733
734 state.push(0, 0).unwrap();
735 state.push(1, 1).unwrap();
736 assert_eq!(state.pop(), (1, 1));
737 assert_eq!(state.pop(), (0, 0));
738 assert!(state.stack.is_empty());
739
740 state.push(2, 2).unwrap();
741 assert_eq!(state.pop(), (2, 2));
742 assert!(state.stack.is_empty());
743 }
744
745 #[test]
746 fn state_save_override() {
747 let mut state = State::new(1, MAX_STACK, 0);
748 state.save(0, 10);
749 state.push(0, 0).unwrap();
750 state.save(0, 20);
751 assert_eq!(state.pop(), (0, 0));
752 assert_eq!(state.get(0), 10);
753 }
754
755 #[test]
756 fn state_save_override_twice() {
757 let mut state = State::new(1, MAX_STACK, 0);
758 state.save(0, 10);
759 state.push(0, 0).unwrap();
760 state.save(0, 20);
761 state.push(1, 1).unwrap();
762 state.save(0, 30);
763
764 assert_eq!(state.get(0), 30);
765 assert_eq!(state.pop(), (1, 1));
766 assert_eq!(state.get(0), 20);
767 assert_eq!(state.pop(), (0, 0));
768 assert_eq!(state.get(0), 10);
769 }
770
771 #[test]
772 fn state_explicit_stack() {
773 let mut state = State::new(1, MAX_STACK, 0);
774 state.stack_push(11);
775 state.stack_push(12);
776
777 state.push(100, 101).unwrap();
778 state.stack_push(13);
779 assert_eq!(state.stack_pop(), 13);
780 state.stack_push(14);
781 assert_eq!(state.pop(), (100, 101));
782
783 // Note: 14 is not there because it was pushed as part of the backtrack branch
784 assert_eq!(state.stack_pop(), 12);
785 assert_eq!(state.stack_pop(), 11);
786 }
787
788 #[test]
789 fn state_backtrack_cut_simple() {
790 let mut state = State::new(2, MAX_STACK, 0);
791 state.save(0, 1);
792 state.save(1, 2);
793
794 let count = state.backtrack_count();
795
796 state.push(0, 0).unwrap();
797 state.save(0, 3);
798 assert_eq!(state.backtrack_count(), 1);
799
800 state.backtrack_cut(count);
801 assert_eq!(state.backtrack_count(), 0);
802 assert_eq!(state.get(0), 3);
803 assert_eq!(state.get(1), 2);
804 }
805
806 #[test]
807 fn state_backtrack_cut_complex() {
808 let mut state = State::new(2, MAX_STACK, 0);
809 state.save(0, 1);
810 state.save(1, 2);
811
812 state.push(0, 0).unwrap();
813 state.save(0, 3);
814
815 let count = state.backtrack_count();
816
817 state.push(1, 1).unwrap();
818 state.save(0, 4);
819 state.push(2, 2).unwrap();
820 state.save(1, 5);
821 assert_eq!(state.backtrack_count(), 3);
822
823 state.backtrack_cut(count);
824 assert_eq!(state.backtrack_count(), 1);
825 assert_eq!(state.get(0), 4);
826 assert_eq!(state.get(1), 5);
827
828 state.pop();
829 assert_eq!(state.backtrack_count(), 0);
830 // Check that oldsave were set correctly
831 assert_eq!(state.get(0), 1);
832 assert_eq!(state.get(1), 2);
833 }
834
835 #[derive(Clone, Debug)]
836 enum Operation {
837 Push,
838 Pop,
839 Save(usize, usize),
840 }
841
842 impl Arbitrary for Operation {
843 fn arbitrary(g: &mut Gen) -> Self {
844 match g.choose(&[0, 1, 2]) {
845 Some(0) => Operation::Push,
846 Some(1) => Operation::Pop,
847 _ => Operation::Save(
848 *g.choose(&[0usize, 1, 2, 3, 4]).unwrap(),
849 usize::arbitrary(g),
850 ),
851 }
852 }
853 }
854
855 fn check_saves_for_operations(operations: Vec<Operation>) -> bool {
856 let slots = operations
857 .iter()
858 .map(|o| match o {
859 &Operation::Save(slot, _) => slot + 1,
860 _ => 0,
861 })
862 .max()
863 .unwrap_or(0);
864 if slots == 0 {
865 // No point checking if there's no save instructions
866 return true;
867 }
868
869 // Stack with the complete VM state (including saves)
870 let mut stack = Vec::new();
871 let mut saves = vec![usize::MAX; slots];
872
873 let mut state = State::new(slots, MAX_STACK, 0);
874
875 let mut expected = Vec::new();
876 let mut actual = Vec::new();
877
878 for operation in operations {
879 match operation {
880 Operation::Push => {
881 // We're not checking pc and ix later, so don't bother
882 // putting in random values.
883 stack.push((0, 0, saves.clone()));
884 state.push(0, 0).unwrap();
885 }
886 Operation::Pop => {
887 // Note that because we generate the operations randomly
888 // there might be more pops than pushes. So ignore a pop
889 // if the stack was empty.
890 if let Some((_, _, previous_saves)) = stack.pop() {
891 saves = previous_saves;
892 state.pop();
893 }
894 }
895 Operation::Save(slot, value) => {
896 saves[slot] = value;
897 state.save(slot, value);
898 }
899 }
900
901 // Remember state of saves for checking later
902 expected.push(saves.clone());
903 let mut actual_saves = vec![usize::MAX; slots];
904 for i in 0..slots {
905 actual_saves[i] = state.get(i);
906 }
907 actual.push(actual_saves);
908 }
909
910 expected == actual
911 }
912
913 quickcheck! {
914 fn state_save_quickcheck(operations: Vec<Operation>) -> bool {
915 check_saves_for_operations(operations)
916 }
917 }
918}