regex_lite/
nfa.rs

1use core::{cell::RefCell, mem::size_of};
2
3use alloc::{string::String, sync::Arc, vec, vec::Vec};
4
5use crate::{
6    error::Error,
7    hir::{self, Hir, HirKind},
8    int::U32,
9};
10
11pub(crate) type StateID = u32;
12
13#[derive(Clone, Copy, Debug)]
14pub(crate) struct Config {
15    pub(crate) size_limit: Option<usize>,
16}
17
18impl Default for Config {
19    fn default() -> Config {
20        Config { size_limit: Some(10 * (1 << 20)) }
21    }
22}
23
24#[derive(Clone)]
25pub(crate) struct NFA {
26    /// The pattern string this NFA was generated from.
27    ///
28    /// We put it here for lack of a better place to put it. ¯\_(ツ)_/¯
29    pattern: String,
30    /// The states that make up this NFA.
31    states: Vec<State>,
32    /// The ID of the start state.
33    start: StateID,
34    /// Whether this NFA can only match at the beginning of a haystack.
35    is_start_anchored: bool,
36    /// Whether this NFA can match the empty string.
37    is_match_empty: bool,
38    /// If every match has the same number of matching capture groups, then
39    /// this corresponds to the number of groups.
40    static_explicit_captures_len: Option<usize>,
41    /// A map from capture group name to its corresponding index.
42    cap_name_to_index: CaptureNameMap,
43    /// A map from capture group index to the corresponding name, if one
44    /// exists.
45    cap_index_to_name: Vec<Option<Arc<str>>>,
46    /// Heap memory used indirectly by NFA states and other things (like the
47    /// various capturing group representations above). Since each state
48    /// might use a different amount of heap, we need to keep track of this
49    /// incrementally.
50    memory_extra: usize,
51}
52
53impl NFA {
54    /// Creates a new NFA from the given configuration and HIR.
55    pub(crate) fn new(
56        config: Config,
57        pattern: String,
58        hir: &Hir,
59    ) -> Result<NFA, Error> {
60        Compiler::new(config, pattern).compile(hir)
61    }
62
63    /// Returns the pattern string used to construct this NFA.
64    pub(crate) fn pattern(&self) -> &str {
65        &self.pattern
66    }
67
68    /// Returns the state corresponding to the given ID.
69    ///
70    /// # Panics
71    ///
72    /// If the ID does not refer to a valid state, then this panics.
73    pub(crate) fn state(&self, id: StateID) -> &State {
74        &self.states[id.as_usize()]
75    }
76
77    /// Returns the total number of states in this NFA.
78    pub(crate) fn len(&self) -> usize {
79        self.states.len()
80    }
81
82    /// Returns the ID of the starting state for this NFA.
83    pub(crate) fn start(&self) -> StateID {
84        self.start
85    }
86
87    /// Returns the capture group index for the corresponding named group.
88    /// If no such group with the given name exists, then `None` is returned.
89    pub(crate) fn to_index(&self, name: &str) -> Option<usize> {
90        self.cap_name_to_index.get(name).cloned().map(|i| i.as_usize())
91    }
92
93    /*
94    /// Returns the capture group name for the corresponding index.
95    /// If no such group with the given index, then `None` is returned.
96    pub(crate) fn to_name(&self, index: usize) -> Option<&str> {
97        self.cap_index_to_name.get(index)?.as_deref()
98    }
99    */
100
101    /// Returns an iterator over all of the capture groups, along with their
102    /// names if they exist, in this NFA.
103    pub(crate) fn capture_names(&self) -> CaptureNames<'_> {
104        CaptureNames { it: self.cap_index_to_name.iter() }
105    }
106
107    /// Returns the total number of capture groups, including the first and
108    /// implicit group, in this NFA.
109    pub(crate) fn group_len(&self) -> usize {
110        self.cap_index_to_name.len()
111    }
112
113    /// Returns true if and only if this NFA can only match at the beginning of
114    /// a haystack.
115    pub(crate) fn is_start_anchored(&self) -> bool {
116        self.is_start_anchored
117    }
118
119    /// If the pattern always reports the same number of matching capture groups
120    /// for every match, then this returns the number of those groups. This
121    /// doesn't include the implicit group found in every pattern.
122    pub(crate) fn static_explicit_captures_len(&self) -> Option<usize> {
123        self.static_explicit_captures_len
124    }
125
126    /// Returns the heap memory usage, in bytes, used by this NFA.
127    fn memory_usage(&self) -> usize {
128        (self.states.len() * size_of::<State>())
129            + (self.cap_index_to_name.len() * size_of::<Option<Arc<str>>>())
130            + self.memory_extra
131    }
132}
133
134impl core::fmt::Debug for NFA {
135    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
136        writeln!(f, "NFA(")?;
137        writeln!(f, "pattern: {}", self.pattern)?;
138        for (sid, state) in self.states.iter().enumerate() {
139            writeln!(f, "{:07?}: {:?}", sid, state)?;
140        }
141        writeln!(f, ")")?;
142        Ok(())
143    }
144}
145
146/// An iterator over all capture groups in an NFA.
147///
148/// If a particular group has a name, then it is yielded. Otherwise, `None`
149/// is yielded.
150#[derive(Clone, Debug)]
151pub(crate) struct CaptureNames<'a> {
152    it: core::slice::Iter<'a, Option<Arc<str>>>,
153}
154
155impl<'a> Iterator for CaptureNames<'a> {
156    type Item = Option<&'a str>;
157
158    fn next(&mut self) -> Option<Option<&'a str>> {
159        self.it.next().map(|n| n.as_deref())
160    }
161}
162
163#[derive(Clone, Eq, PartialEq)]
164pub(crate) enum State {
165    Char { target: StateID, ch: char },
166    Ranges { target: StateID, ranges: Vec<(char, char)> },
167    Splits { targets: Vec<StateID>, reverse: bool },
168    Goto { target: StateID, look: Option<hir::Look> },
169    Capture { target: StateID, slot: u32 },
170    Fail,
171    Match,
172}
173
174impl State {
175    /// Returns the heap memory usage of this NFA state in bytes.
176    fn memory_usage(&self) -> usize {
177        match *self {
178            State::Char { .. }
179            | State::Goto { .. }
180            | State::Capture { .. }
181            | State::Fail { .. }
182            | State::Match => 0,
183            State::Splits { ref targets, .. } => {
184                targets.len() * size_of::<StateID>()
185            }
186            State::Ranges { ref ranges, .. } => {
187                ranges.len() * size_of::<(char, char)>()
188            }
189        }
190    }
191
192    /// Returns an iterator over the given split targets. The order of the
193    /// iterator yields elements in reverse when `reverse` is true.
194    pub(crate) fn iter_splits<'a>(
195        splits: &'a [StateID],
196        reverse: bool,
197    ) -> impl Iterator<Item = StateID> + 'a {
198        let mut it = splits.iter();
199        core::iter::from_fn(move || {
200            if reverse { it.next_back() } else { it.next() }.copied()
201        })
202    }
203}
204
205impl core::fmt::Debug for State {
206    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
207        match *self {
208            State::Char { target, ch } => {
209                write!(f, "{:?} => {:?}", ch, target)
210            }
211            State::Ranges { target, ref ranges } => {
212                for (i, &(start, end)) in ranges.iter().enumerate() {
213                    if i > 0 {
214                        write!(f, ", ")?;
215                    }
216                    write!(f, "{:?}-{:?} => {:?}", start, end, target)?;
217                }
218                Ok(())
219            }
220            State::Splits { ref targets, reverse } => {
221                write!(f, "splits(")?;
222                for (i, sid) in
223                    State::iter_splits(targets, reverse).enumerate()
224                {
225                    if i > 0 {
226                        write!(f, ", ")?;
227                    }
228                    write!(f, "{:?}", sid)?;
229                }
230                write!(f, ")")
231            }
232            State::Goto { target, look: None } => {
233                write!(f, "goto({:?})", target)
234            }
235            State::Goto { target, look: Some(look) } => {
236                write!(f, "{:?} => {:?}", look, target)
237            }
238            State::Capture { target, slot } => {
239                write!(f, "capture(slot={:?}) => {:?}", slot, target,)
240            }
241            State::Fail => write!(f, "FAIL"),
242            State::Match => {
243                write!(f, "MATCH")
244            }
245        }
246    }
247}
248
249/// A map from capture group name to its corresponding capture group index.
250///
251/// We define a type alias here so that we can transparently use a `HashMap`
252/// whenever it's available. We do so presumably because it's faster, although
253/// there are no benchmarks verifying this.
254#[cfg(feature = "std")]
255type CaptureNameMap = std::collections::HashMap<Arc<str>, u32>;
256#[cfg(not(feature = "std"))]
257type CaptureNameMap = alloc::collections::BTreeMap<Arc<str>, u32>;
258
259#[derive(Debug)]
260struct Compiler {
261    config: Config,
262    nfa: RefCell<NFA>,
263}
264
265impl Compiler {
266    fn new(config: Config, pattern: String) -> Compiler {
267        let nfa = RefCell::new(NFA {
268            pattern,
269            states: vec![],
270            start: 0,
271            is_start_anchored: false,
272            is_match_empty: false,
273            static_explicit_captures_len: None,
274            cap_name_to_index: CaptureNameMap::default(),
275            cap_index_to_name: vec![],
276            memory_extra: 0,
277        });
278        Compiler { config, nfa }
279    }
280
281    fn compile(self, hir: &Hir) -> Result<NFA, Error> {
282        self.nfa.borrow_mut().is_start_anchored = hir.is_start_anchored();
283        self.nfa.borrow_mut().is_match_empty = hir.is_match_empty();
284        self.nfa.borrow_mut().static_explicit_captures_len =
285            hir.static_explicit_captures_len();
286        let compiled = self.c_capture(0, None, hir)?;
287        let mat = self.add(State::Match)?;
288        self.patch(compiled.end, mat)?;
289        self.nfa.borrow_mut().start = compiled.start;
290        Ok(self.nfa.into_inner())
291    }
292
293    fn c(&self, hir: &Hir) -> Result<ThompsonRef, Error> {
294        match *hir.kind() {
295            HirKind::Empty => self.c_empty(),
296            HirKind::Char(ch) => self.c_char(ch),
297            HirKind::Class(ref class) => self.c_class(class),
298            HirKind::Look(ref look) => self.c_look(look),
299            HirKind::Repetition(ref rep) => self.c_repetition(rep),
300            HirKind::Capture(ref cap) => {
301                self.c_capture(cap.index, cap.name.as_deref(), &cap.sub)
302            }
303            HirKind::Concat(ref subs) => {
304                self.c_concat(subs.iter().map(|s| self.c(s)))
305            }
306            HirKind::Alternation(ref subs) => {
307                self.c_alternation(subs.iter().map(|s| self.c(s)))
308            }
309        }
310    }
311
312    /// Compile a "fail" state that can never be transitioned out of.
313    fn c_fail(&self) -> Result<ThompsonRef, Error> {
314        let id = self.add(State::Fail)?;
315        Ok(ThompsonRef { start: id, end: id })
316    }
317
318    /// Compile an "empty" state with one unconditional epsilon transition.
319    ///
320    /// Both the `start` and `end` locations point to the state created.
321    /// Callers will likely want to keep the `start`, but patch the `end` to
322    /// point to some other state.
323    fn c_empty(&self) -> Result<ThompsonRef, Error> {
324        let id = self.add_empty()?;
325        Ok(ThompsonRef { start: id, end: id })
326    }
327
328    /// Compile the given literal char to an NFA.
329    fn c_char(&self, ch: char) -> Result<ThompsonRef, Error> {
330        let id = self.add(State::Char { target: 0, ch })?;
331        Ok(ThompsonRef { start: id, end: id })
332    }
333
334    /// Compile the given character class into an NFA.
335    ///
336    /// If the class is empty, then this compiles to a `Fail` state.
337    fn c_class(&self, class: &hir::Class) -> Result<ThompsonRef, Error> {
338        let id = if class.ranges.is_empty() {
339            // Technically using an explicit fail state probably isn't
340            // necessary. Because if you try to match against an empty Ranges,
341            // then it should turn up with nothing regardless of input, and
342            // thus "acts" like a Fail state. But it's better to be more
343            // explicit, and there's no real cost to doing so.
344            self.add(State::Fail)
345        } else {
346            let ranges =
347                class.ranges.iter().map(|r| (r.start, r.end)).collect();
348            self.add(State::Ranges { target: 0, ranges })
349        }?;
350        Ok(ThompsonRef { start: id, end: id })
351    }
352
353    /// Compile the given HIR look-around assertion to an NFA look-around
354    /// assertion.
355    fn c_look(&self, look: &hir::Look) -> Result<ThompsonRef, Error> {
356        let id = self.add(State::Goto { target: 0, look: Some(*look) })?;
357        Ok(ThompsonRef { start: id, end: id })
358    }
359
360    /// Compile the given repetition expression. This handles all types of
361    /// repetitions and greediness.
362    fn c_repetition(
363        &self,
364        rep: &hir::Repetition,
365    ) -> Result<ThompsonRef, Error> {
366        match (rep.min, rep.max) {
367            (0, Some(1)) => self.c_zero_or_one(&rep.sub, rep.greedy),
368            (min, None) => self.c_at_least(&rep.sub, rep.greedy, min),
369            (min, Some(max)) if min == max => self.c_exactly(&rep.sub, min),
370            (min, Some(max)) => self.c_bounded(&rep.sub, rep.greedy, min, max),
371        }
372    }
373
374    /// Compile the given expression such that it matches at least `min` times,
375    /// but no more than `max` times.
376    ///
377    /// When `greedy` is true, then the preference is for the expression to
378    /// match as much as possible. Otherwise, it will match as little as
379    /// possible.
380    fn c_bounded(
381        &self,
382        hir: &Hir,
383        greedy: bool,
384        min: u32,
385        max: u32,
386    ) -> Result<ThompsonRef, Error> {
387        let prefix = self.c_exactly(hir, min)?;
388        if min == max {
389            return Ok(prefix);
390        }
391
392        // It is tempting here to compile the rest here as a concatenation
393        // of zero-or-one matches. i.e., for `a{2,5}`, compile it as if it
394        // were `aaa?a?a?`. The problem here is that it leads to this program:
395        //
396        //     >000000: 61 => 01
397        //      000001: 61 => 02
398        //      000002: union(03, 04)
399        //      000003: 61 => 04
400        //      000004: union(05, 06)
401        //      000005: 61 => 06
402        //      000006: union(07, 08)
403        //      000007: 61 => 08
404        //      000008: MATCH
405        //
406        // And effectively, once you hit state 2, the epsilon closure will
407        // include states 3, 5, 6, 7 and 8, which is quite a bit. It is better
408        // to instead compile it like so:
409        //
410        //     >000000: 61 => 01
411        //      000001: 61 => 02
412        //      000002: union(03, 08)
413        //      000003: 61 => 04
414        //      000004: union(05, 08)
415        //      000005: 61 => 06
416        //      000006: union(07, 08)
417        //      000007: 61 => 08
418        //      000008: MATCH
419        //
420        // So that the epsilon closure of state 2 is now just 3 and 8.
421        let empty = self.add_empty()?;
422        let mut prev_end = prefix.end;
423        for _ in min..max {
424            let splits =
425                self.add(State::Splits { targets: vec![], reverse: !greedy })?;
426            let compiled = self.c(hir)?;
427            self.patch(prev_end, splits)?;
428            self.patch(splits, compiled.start)?;
429            self.patch(splits, empty)?;
430            prev_end = compiled.end;
431        }
432        self.patch(prev_end, empty)?;
433        Ok(ThompsonRef { start: prefix.start, end: empty })
434    }
435
436    /// Compile the given expression such that it may be matched `n` or more
437    /// times, where `n` can be any integer. (Although a particularly large
438    /// integer is likely to run afoul of any configured size limits.)
439    ///
440    /// When `greedy` is true, then the preference is for the expression to
441    /// match as much as possible. Otherwise, it will match as little as
442    /// possible.
443    fn c_at_least(
444        &self,
445        hir: &Hir,
446        greedy: bool,
447        n: u32,
448    ) -> Result<ThompsonRef, Error> {
449        if n == 0 {
450            // When the expression cannot match the empty string, then we
451            // can get away with something much simpler: just one 'alt'
452            // instruction that optionally repeats itself. But if the expr
453            // can match the empty string... see below.
454            if !hir.is_match_empty() {
455                let splits = self.add(State::Splits {
456                    targets: vec![],
457                    reverse: !greedy,
458                })?;
459                let compiled = self.c(hir)?;
460                self.patch(splits, compiled.start)?;
461                self.patch(compiled.end, splits)?;
462                return Ok(ThompsonRef { start: splits, end: splits });
463            }
464
465            // What's going on here? Shouldn't x* be simpler than this? It
466            // turns out that when implementing leftmost-first (Perl-like)
467            // match semantics, x* results in an incorrect preference order
468            // when computing the transitive closure of states if and only if
469            // 'x' can match the empty string. So instead, we compile x* as
470            // (x+)?, which preserves the correct preference order.
471            //
472            // See: https://github.com/rust-lang/regex/issues/779
473            let compiled = self.c(hir)?;
474            let plus =
475                self.add(State::Splits { targets: vec![], reverse: !greedy })?;
476            self.patch(compiled.end, plus)?;
477            self.patch(plus, compiled.start)?;
478
479            let question =
480                self.add(State::Splits { targets: vec![], reverse: !greedy })?;
481            let empty = self.add_empty()?;
482            self.patch(question, compiled.start)?;
483            self.patch(question, empty)?;
484            self.patch(plus, empty)?;
485            Ok(ThompsonRef { start: question, end: empty })
486        } else if n == 1 {
487            let compiled = self.c(hir)?;
488            let splits =
489                self.add(State::Splits { targets: vec![], reverse: !greedy })?;
490            self.patch(compiled.end, splits)?;
491            self.patch(splits, compiled.start)?;
492            Ok(ThompsonRef { start: compiled.start, end: splits })
493        } else {
494            let prefix = self.c_exactly(hir, n - 1)?;
495            let last = self.c(hir)?;
496            let splits =
497                self.add(State::Splits { targets: vec![], reverse: !greedy })?;
498            self.patch(prefix.end, last.start)?;
499            self.patch(last.end, splits)?;
500            self.patch(splits, last.start)?;
501            Ok(ThompsonRef { start: prefix.start, end: splits })
502        }
503    }
504
505    /// Compile the given expression such that it may be matched zero or one
506    /// times.
507    ///
508    /// When `greedy` is true, then the preference is for the expression to
509    /// match as much as possible. Otherwise, it will match as little as
510    /// possible.
511    fn c_zero_or_one(
512        &self,
513        hir: &Hir,
514        greedy: bool,
515    ) -> Result<ThompsonRef, Error> {
516        let splits =
517            self.add(State::Splits { targets: vec![], reverse: !greedy })?;
518        let compiled = self.c(hir)?;
519        let empty = self.add_empty()?;
520        self.patch(splits, compiled.start)?;
521        self.patch(splits, empty)?;
522        self.patch(compiled.end, empty)?;
523        Ok(ThompsonRef { start: splits, end: empty })
524    }
525
526    /// Compile the given HIR expression exactly `n` times.
527    fn c_exactly(&self, hir: &Hir, n: u32) -> Result<ThompsonRef, Error> {
528        self.c_concat((0..n).map(|_| self.c(hir)))
529    }
530
531    /// Compile the given expression and insert capturing states at the
532    /// beginning and end of it. The slot for the capture states is computed
533    /// from the index.
534    fn c_capture(
535        &self,
536        index: u32,
537        name: Option<&str>,
538        hir: &Hir,
539    ) -> Result<ThompsonRef, Error> {
540        // For discontiguous indices, push placeholders for earlier capture
541        // groups that weren't explicitly added. This can happen, for example,
542        // with patterns like '(a){0}(a)' where '(a){0}' is completely removed
543        // from the pattern.
544        let existing_groups_len = self.nfa.borrow().cap_index_to_name.len();
545        for _ in 0..(index.as_usize().saturating_sub(existing_groups_len)) {
546            self.nfa.borrow_mut().cap_index_to_name.push(None);
547        }
548        if index.as_usize() >= existing_groups_len {
549            if let Some(name) = name {
550                let name = Arc::from(name);
551                let mut nfa = self.nfa.borrow_mut();
552                nfa.cap_name_to_index.insert(Arc::clone(&name), index);
553                nfa.cap_index_to_name.push(Some(Arc::clone(&name)));
554                // This is an approximation.
555                nfa.memory_extra += name.len() + size_of::<u32>();
556            } else {
557                self.nfa.borrow_mut().cap_index_to_name.push(None);
558            }
559        }
560
561        let Some(slot) = index.checked_mul(2) else {
562            return Err(Error::new("capture group slots exhausted"));
563        };
564        let start = self.add(State::Capture { target: 0, slot })?;
565        let inner = self.c(hir)?;
566        let Some(slot) = slot.checked_add(1) else {
567            return Err(Error::new("capture group slots exhausted"));
568        };
569        let end = self.add(State::Capture { target: 0, slot })?;
570        self.patch(start, inner.start)?;
571        self.patch(inner.end, end)?;
572
573        Ok(ThompsonRef { start, end })
574    }
575
576    /// Compile a concatenation of the sub-expressions yielded by the given
577    /// iterator. If the iterator yields no elements, then this compiles down
578    /// to an "empty" state that always matches.
579    fn c_concat<I>(&self, mut it: I) -> Result<ThompsonRef, Error>
580    where
581        I: Iterator<Item = Result<ThompsonRef, Error>>,
582    {
583        let ThompsonRef { start, mut end } = match it.next() {
584            Some(result) => result?,
585            None => return self.c_empty(),
586        };
587        for result in it {
588            let compiled = result?;
589            self.patch(end, compiled.start)?;
590            end = compiled.end;
591        }
592        Ok(ThompsonRef { start, end })
593    }
594
595    /// Compile an alternation, where each element yielded by the given
596    /// iterator represents an item in the alternation. If the iterator yields
597    /// no elements, then this compiles down to a "fail" state.
598    ///
599    /// In an alternation, expressions appearing earlier are "preferred" at
600    /// match time over expressions appearing later. (This is currently always
601    /// true, as this crate only supports leftmost-first semantics.)
602    fn c_alternation<I>(&self, mut it: I) -> Result<ThompsonRef, Error>
603    where
604        I: Iterator<Item = Result<ThompsonRef, Error>>,
605    {
606        let first = match it.next() {
607            None => return self.c_fail(),
608            Some(result) => result?,
609        };
610        let second = match it.next() {
611            None => return Ok(first),
612            Some(result) => result?,
613        };
614
615        let splits =
616            self.add(State::Splits { targets: vec![], reverse: false })?;
617        let end = self.add_empty()?;
618        self.patch(splits, first.start)?;
619        self.patch(first.end, end)?;
620        self.patch(splits, second.start)?;
621        self.patch(second.end, end)?;
622        for result in it {
623            let compiled = result?;
624            self.patch(splits, compiled.start)?;
625            self.patch(compiled.end, end)?;
626        }
627        Ok(ThompsonRef { start: splits, end })
628    }
629
630    /// A convenience routine for adding an empty state, also known as an
631    /// unconditional epsilon transition. These are quite useful for making
632    /// NFA construction simpler.
633    ///
634    /// (In the regex crate, we do a second pass to remove these, but don't
635    /// bother with that here.)
636    fn add_empty(&self) -> Result<StateID, Error> {
637        self.add(State::Goto { target: 0, look: None })
638    }
639
640    /// The common implementation of "add a state." It handles the common
641    /// error cases of state ID exhausting (by owning state ID allocation) and
642    /// whether the size limit has been exceeded.
643    fn add(&self, state: State) -> Result<StateID, Error> {
644        let id = u32::try_from(self.nfa.borrow().states.len())
645            .map_err(|_| Error::new("exhausted state IDs, too many states"))?;
646        self.nfa.borrow_mut().memory_extra += state.memory_usage();
647        self.nfa.borrow_mut().states.push(state);
648        self.check_size_limit()?;
649        Ok(id)
650    }
651
652    /// Add a transition from one state to another.
653    ///
654    /// This routine is called "patch" since it is very common to add the
655    /// states you want, typically with "dummy" state ID transitions, and then
656    /// "patch" in the real state IDs later. This is because you don't always
657    /// know all of the necessary state IDs to add because they might not
658    /// exist yet.
659    ///
660    /// # Errors
661    ///
662    /// This may error if patching leads to an increase in heap usage beyond
663    /// the configured size limit. Heap usage only grows when patching adds a
664    /// new transition (as in the case of a "splits" state).
665    fn patch(&self, from: StateID, to: StateID) -> Result<(), Error> {
666        let mut new_memory_extra = self.nfa.borrow().memory_extra;
667        match self.nfa.borrow_mut().states[from.as_usize()] {
668            State::Char { ref mut target, .. } => {
669                *target = to;
670            }
671            State::Ranges { ref mut target, .. } => {
672                *target = to;
673            }
674            State::Splits { ref mut targets, .. } => {
675                targets.push(to);
676                new_memory_extra += size_of::<StateID>();
677            }
678            State::Goto { ref mut target, .. } => {
679                *target = to;
680            }
681            State::Capture { ref mut target, .. } => {
682                *target = to;
683            }
684            State::Fail | State::Match => {}
685        }
686        if new_memory_extra != self.nfa.borrow().memory_extra {
687            self.nfa.borrow_mut().memory_extra = new_memory_extra;
688            self.check_size_limit()?;
689        }
690        Ok(())
691    }
692
693    /// Checks that the current heap memory usage of the NFA being compiled
694    /// doesn't exceed the configured size limit. If it does, an error is
695    /// returned.
696    fn check_size_limit(&self) -> Result<(), Error> {
697        if let Some(limit) = self.config.size_limit {
698            if self.nfa.borrow().memory_usage() > limit {
699                return Err(Error::new("compiled regex exceeded size limit"));
700            }
701        }
702        Ok(())
703    }
704}
705
706/// A value that represents the result of compiling a sub-expression of a
707/// regex's HIR. Specifically, this represents a sub-graph of the NFA that
708/// has an initial state at `start` and a final state at `end`.
709#[derive(Clone, Copy, Debug)]
710struct ThompsonRef {
711    start: StateID,
712    end: StateID,
713}