globset/
glob.rs

1use std::fmt::Write;
2use std::path::{Path, is_separator};
3
4use regex_automata::meta::Regex;
5
6use crate::{Candidate, Error, ErrorKind, new_regex};
7
8/// Describes a matching strategy for a particular pattern.
9///
10/// This provides a way to more quickly determine whether a pattern matches
11/// a particular file path in a way that scales with a large number of
12/// patterns. For example, if many patterns are of the form `*.ext`, then it's
13/// possible to test whether any of those patterns matches by looking up a
14/// file path's extension in a hash table.
15#[derive(Clone, Debug, Eq, PartialEq)]
16pub(crate) enum MatchStrategy {
17    /// A pattern matches if and only if the entire file path matches this
18    /// literal string.
19    Literal(String),
20    /// A pattern matches if and only if the file path's basename matches this
21    /// literal string.
22    BasenameLiteral(String),
23    /// A pattern matches if and only if the file path's extension matches this
24    /// literal string.
25    Extension(String),
26    /// A pattern matches if and only if this prefix literal is a prefix of the
27    /// candidate file path.
28    Prefix(String),
29    /// A pattern matches if and only if this prefix literal is a prefix of the
30    /// candidate file path.
31    ///
32    /// An exception: if `component` is true, then `suffix` must appear at the
33    /// beginning of a file path or immediately following a `/`.
34    Suffix {
35        /// The actual suffix.
36        suffix: String,
37        /// Whether this must start at the beginning of a path component.
38        component: bool,
39    },
40    /// A pattern matches only if the given extension matches the file path's
41    /// extension. Note that this is a necessary but NOT sufficient criterion.
42    /// Namely, if the extension matches, then a full regex search is still
43    /// required.
44    RequiredExtension(String),
45    /// A regex needs to be used for matching.
46    Regex,
47}
48
49impl MatchStrategy {
50    /// Returns a matching strategy for the given pattern.
51    pub(crate) fn new(pat: &Glob) -> MatchStrategy {
52        if let Some(lit) = pat.basename_literal() {
53            MatchStrategy::BasenameLiteral(lit)
54        } else if let Some(lit) = pat.literal() {
55            MatchStrategy::Literal(lit)
56        } else if let Some(ext) = pat.ext() {
57            MatchStrategy::Extension(ext)
58        } else if let Some(prefix) = pat.prefix() {
59            MatchStrategy::Prefix(prefix)
60        } else if let Some((suffix, component)) = pat.suffix() {
61            MatchStrategy::Suffix { suffix, component }
62        } else if let Some(ext) = pat.required_ext() {
63            MatchStrategy::RequiredExtension(ext)
64        } else {
65            MatchStrategy::Regex
66        }
67    }
68}
69
70/// Glob represents a successfully parsed shell glob pattern.
71///
72/// It cannot be used directly to match file paths, but it can be converted
73/// to a regular expression string or a matcher.
74#[derive(Clone, Eq)]
75#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
76pub struct Glob {
77    glob: String,
78    re: String,
79    opts: GlobOptions,
80    tokens: Tokens,
81}
82
83impl AsRef<Glob> for Glob {
84    fn as_ref(&self) -> &Glob {
85        self
86    }
87}
88
89impl PartialEq for Glob {
90    fn eq(&self, other: &Glob) -> bool {
91        self.glob == other.glob && self.opts == other.opts
92    }
93}
94
95impl std::hash::Hash for Glob {
96    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
97        self.glob.hash(state);
98        self.opts.hash(state);
99    }
100}
101
102impl std::fmt::Debug for Glob {
103    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104        if f.alternate() {
105            f.debug_struct("Glob")
106                .field("glob", &self.glob)
107                .field("re", &self.re)
108                .field("opts", &self.opts)
109                .field("tokens", &self.tokens)
110                .finish()
111        } else {
112            f.debug_tuple("Glob").field(&self.glob).finish()
113        }
114    }
115}
116
117impl std::fmt::Display for Glob {
118    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
119        self.glob.fmt(f)
120    }
121}
122
123impl std::str::FromStr for Glob {
124    type Err = Error;
125
126    fn from_str(glob: &str) -> Result<Self, Self::Err> {
127        Self::new(glob)
128    }
129}
130
131/// A matcher for a single pattern.
132#[derive(Clone, Debug)]
133pub struct GlobMatcher {
134    /// The underlying pattern.
135    pat: Glob,
136    /// The pattern, as a compiled regex.
137    re: Regex,
138}
139
140impl GlobMatcher {
141    /// Tests whether the given path matches this pattern or not.
142    pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
143        self.is_match_candidate(&Candidate::new(path.as_ref()))
144    }
145
146    /// Tests whether the given path matches this pattern or not.
147    pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
148        self.re.is_match(&path.path)
149    }
150
151    /// Returns the `Glob` used to compile this matcher.
152    pub fn glob(&self) -> &Glob {
153        &self.pat
154    }
155}
156
157/// A strategic matcher for a single pattern.
158#[cfg(test)]
159#[derive(Clone, Debug)]
160struct GlobStrategic {
161    /// The match strategy to use.
162    strategy: MatchStrategy,
163    /// The pattern, as a compiled regex.
164    re: Regex,
165}
166
167#[cfg(test)]
168impl GlobStrategic {
169    /// Tests whether the given path matches this pattern or not.
170    fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
171        self.is_match_candidate(&Candidate::new(path.as_ref()))
172    }
173
174    /// Tests whether the given path matches this pattern or not.
175    fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
176        let byte_path = &*candidate.path;
177
178        match self.strategy {
179            MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
180            MatchStrategy::BasenameLiteral(ref lit) => {
181                lit.as_bytes() == &*candidate.basename
182            }
183            MatchStrategy::Extension(ref ext) => {
184                ext.as_bytes() == &*candidate.ext
185            }
186            MatchStrategy::Prefix(ref pre) => {
187                starts_with(pre.as_bytes(), byte_path)
188            }
189            MatchStrategy::Suffix { ref suffix, component } => {
190                if component && byte_path == &suffix.as_bytes()[1..] {
191                    return true;
192                }
193                ends_with(suffix.as_bytes(), byte_path)
194            }
195            MatchStrategy::RequiredExtension(ref ext) => {
196                let ext = ext.as_bytes();
197                &*candidate.ext == ext && self.re.is_match(byte_path)
198            }
199            MatchStrategy::Regex => self.re.is_match(byte_path),
200        }
201    }
202}
203
204/// A builder for a pattern.
205///
206/// This builder enables configuring the match semantics of a pattern. For
207/// example, one can make matching case insensitive.
208///
209/// The lifetime `'a` refers to the lifetime of the pattern string.
210#[derive(Clone, Debug)]
211pub struct GlobBuilder<'a> {
212    /// The glob pattern to compile.
213    glob: &'a str,
214    /// Options for the pattern.
215    opts: GlobOptions,
216}
217
218#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
219#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
220struct GlobOptions {
221    /// Whether to match case insensitively.
222    case_insensitive: bool,
223    /// Whether to require a literal separator to match a separator in a file
224    /// path. e.g., when enabled, `*` won't match `/`.
225    literal_separator: bool,
226    /// Whether or not to use `\` to escape special characters.
227    /// e.g., when enabled, `\*` will match a literal `*`.
228    backslash_escape: bool,
229    /// Whether or not an empty case in an alternate will be removed.
230    /// e.g., when enabled, `{,a}` will match "" and "a".
231    empty_alternates: bool,
232    /// Whether or not an unclosed character class is allowed. When an unclosed
233    /// character class is found, the opening `[` is treated as a literal `[`.
234    /// When this isn't enabled, an opening `[` without a corresponding `]` is
235    /// treated as an error.
236    allow_unclosed_class: bool,
237}
238
239impl GlobOptions {
240    fn default() -> GlobOptions {
241        GlobOptions {
242            case_insensitive: false,
243            literal_separator: false,
244            backslash_escape: !is_separator('\\'),
245            empty_alternates: false,
246            allow_unclosed_class: false,
247        }
248    }
249}
250
251#[derive(Clone, Debug, Default, Eq, PartialEq)]
252#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
253struct Tokens(Vec<Token>);
254
255impl std::ops::Deref for Tokens {
256    type Target = Vec<Token>;
257    fn deref(&self) -> &Vec<Token> {
258        &self.0
259    }
260}
261
262impl std::ops::DerefMut for Tokens {
263    fn deref_mut(&mut self) -> &mut Vec<Token> {
264        &mut self.0
265    }
266}
267
268#[derive(Clone, Debug, Eq, PartialEq)]
269#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
270enum Token {
271    Literal(char),
272    Any,
273    ZeroOrMore,
274    RecursivePrefix,
275    RecursiveSuffix,
276    RecursiveZeroOrMore,
277    Class { negated: bool, ranges: Vec<(char, char)> },
278    Alternates(Vec<Tokens>),
279}
280
281impl Glob {
282    /// Builds a new pattern with default options.
283    pub fn new(glob: &str) -> Result<Glob, Error> {
284        GlobBuilder::new(glob).build()
285    }
286
287    /// Returns a matcher for this pattern.
288    pub fn compile_matcher(&self) -> GlobMatcher {
289        let re =
290            new_regex(&self.re).expect("regex compilation shouldn't fail");
291        GlobMatcher { pat: self.clone(), re }
292    }
293
294    /// Returns a strategic matcher.
295    ///
296    /// This isn't exposed because it's not clear whether it's actually
297    /// faster than just running a regex for a *single* pattern. If it
298    /// is faster, then GlobMatcher should do it automatically.
299    #[cfg(test)]
300    fn compile_strategic_matcher(&self) -> GlobStrategic {
301        let strategy = MatchStrategy::new(self);
302        let re =
303            new_regex(&self.re).expect("regex compilation shouldn't fail");
304        GlobStrategic { strategy, re }
305    }
306
307    /// Returns the original glob pattern used to build this pattern.
308    pub fn glob(&self) -> &str {
309        &self.glob
310    }
311
312    /// Returns the regular expression string for this glob.
313    ///
314    /// Note that regular expressions for globs are intended to be matched on
315    /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
316    /// particular, globs are frequently used on file paths, where there is no
317    /// general guarantee that file paths are themselves valid UTF-8. As a
318    /// result, callers will need to ensure that they are using a regex API
319    /// that can match on arbitrary bytes. For example, the
320    /// [`regex`](https://crates.io/regex)
321    /// crate's
322    /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
323    /// API is not suitable for this since it matches on `&str`, but its
324    /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
325    /// API is suitable for this.
326    pub fn regex(&self) -> &str {
327        &self.re
328    }
329
330    /// Returns the pattern as a literal if and only if the pattern must match
331    /// an entire path exactly.
332    ///
333    /// The basic format of these patterns is `{literal}`.
334    fn literal(&self) -> Option<String> {
335        if self.opts.case_insensitive {
336            return None;
337        }
338        let mut lit = String::new();
339        for t in &*self.tokens {
340            let Token::Literal(c) = *t else { return None };
341            lit.push(c);
342        }
343        if lit.is_empty() { None } else { Some(lit) }
344    }
345
346    /// Returns an extension if this pattern matches a file path if and only
347    /// if the file path has the extension returned.
348    ///
349    /// Note that this extension returned differs from the extension that
350    /// std::path::Path::extension returns. Namely, this extension includes
351    /// the '.'. Also, paths like `.rs` are considered to have an extension
352    /// of `.rs`.
353    fn ext(&self) -> Option<String> {
354        if self.opts.case_insensitive {
355            return None;
356        }
357        let start = match *self.tokens.get(0)? {
358            Token::RecursivePrefix => 1,
359            _ => 0,
360        };
361        match *self.tokens.get(start)? {
362            Token::ZeroOrMore => {
363                // If there was no recursive prefix, then we only permit
364                // `*` if `*` can match a `/`. For example, if `*` can't
365                // match `/`, then `*.c` doesn't match `foo/bar.c`.
366                if start == 0 && self.opts.literal_separator {
367                    return None;
368                }
369            }
370            _ => return None,
371        }
372        match *self.tokens.get(start + 1)? {
373            Token::Literal('.') => {}
374            _ => return None,
375        }
376        let mut lit = ".".to_string();
377        for t in self.tokens[start + 2..].iter() {
378            match *t {
379                Token::Literal('.') | Token::Literal('/') => return None,
380                Token::Literal(c) => lit.push(c),
381                _ => return None,
382            }
383        }
384        if lit.is_empty() { None } else { Some(lit) }
385    }
386
387    /// This is like `ext`, but returns an extension even if it isn't sufficient
388    /// to imply a match. Namely, if an extension is returned, then it is
389    /// necessary but not sufficient for a match.
390    fn required_ext(&self) -> Option<String> {
391        if self.opts.case_insensitive {
392            return None;
393        }
394        // We don't care at all about the beginning of this pattern. All we
395        // need to check for is if it ends with a literal of the form `.ext`.
396        let mut ext: Vec<char> = vec![]; // built in reverse
397        for t in self.tokens.iter().rev() {
398            match *t {
399                Token::Literal('/') => return None,
400                Token::Literal(c) => {
401                    ext.push(c);
402                    if c == '.' {
403                        break;
404                    }
405                }
406                _ => return None,
407            }
408        }
409        if ext.last() != Some(&'.') {
410            None
411        } else {
412            ext.reverse();
413            Some(ext.into_iter().collect())
414        }
415    }
416
417    /// Returns a literal prefix of this pattern if the entire pattern matches
418    /// if the literal prefix matches.
419    fn prefix(&self) -> Option<String> {
420        if self.opts.case_insensitive {
421            return None;
422        }
423        let (end, need_sep) = match *self.tokens.last()? {
424            Token::ZeroOrMore => {
425                if self.opts.literal_separator {
426                    // If a trailing `*` can't match a `/`, then we can't
427                    // assume a match of the prefix corresponds to a match
428                    // of the overall pattern. e.g., `foo/*` with
429                    // `literal_separator` enabled matches `foo/bar` but not
430                    // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
431                    // literal prefix.
432                    return None;
433                }
434                (self.tokens.len() - 1, false)
435            }
436            Token::RecursiveSuffix => (self.tokens.len() - 1, true),
437            _ => (self.tokens.len(), false),
438        };
439        let mut lit = String::new();
440        for t in &self.tokens[0..end] {
441            let Token::Literal(c) = *t else { return None };
442            lit.push(c);
443        }
444        if need_sep {
445            lit.push('/');
446        }
447        if lit.is_empty() { None } else { Some(lit) }
448    }
449
450    /// Returns a literal suffix of this pattern if the entire pattern matches
451    /// if the literal suffix matches.
452    ///
453    /// If a literal suffix is returned and it must match either the entire
454    /// file path or be preceded by a `/`, then also return true. This happens
455    /// with a pattern like `**/foo/bar`. Namely, this pattern matches
456    /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
457    /// suffix returned is `/foo/bar` (but should match the entire path
458    /// `foo/bar`).
459    ///
460    /// When this returns true, the suffix literal is guaranteed to start with
461    /// a `/`.
462    fn suffix(&self) -> Option<(String, bool)> {
463        if self.opts.case_insensitive {
464            return None;
465        }
466        let mut lit = String::new();
467        let (start, entire) = match *self.tokens.get(0)? {
468            Token::RecursivePrefix => {
469                // We only care if this follows a path component if the next
470                // token is a literal.
471                if let Some(&Token::Literal(_)) = self.tokens.get(1) {
472                    lit.push('/');
473                    (1, true)
474                } else {
475                    (1, false)
476                }
477            }
478            _ => (0, false),
479        };
480        let start = match *self.tokens.get(start)? {
481            Token::ZeroOrMore => {
482                // If literal_separator is enabled, then a `*` can't
483                // necessarily match everything, so reporting a suffix match
484                // as a match of the pattern would be a false positive.
485                if self.opts.literal_separator {
486                    return None;
487                }
488                start + 1
489            }
490            _ => start,
491        };
492        for t in &self.tokens[start..] {
493            let Token::Literal(c) = *t else { return None };
494            lit.push(c);
495        }
496        if lit.is_empty() || lit == "/" { None } else { Some((lit, entire)) }
497    }
498
499    /// If this pattern only needs to inspect the basename of a file path,
500    /// then the tokens corresponding to only the basename match are returned.
501    ///
502    /// For example, given a pattern of `**/*.foo`, only the tokens
503    /// corresponding to `*.foo` are returned.
504    ///
505    /// Note that this will return None if any match of the basename tokens
506    /// doesn't correspond to a match of the entire pattern. For example, the
507    /// glob `foo` only matches when a file path has a basename of `foo`, but
508    /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
509    /// `foo` doesn't match `abc/foo`.
510    fn basename_tokens(&self) -> Option<&[Token]> {
511        if self.opts.case_insensitive {
512            return None;
513        }
514        let start = match *self.tokens.get(0)? {
515            Token::RecursivePrefix => 1,
516            _ => {
517                // With nothing to gobble up the parent portion of a path,
518                // we can't assume that matching on only the basename is
519                // correct.
520                return None;
521            }
522        };
523        if self.tokens[start..].is_empty() {
524            return None;
525        }
526        for t in self.tokens[start..].iter() {
527            match *t {
528                Token::Literal('/') => return None,
529                Token::Literal(_) => {} // OK
530                Token::Any | Token::ZeroOrMore => {
531                    if !self.opts.literal_separator {
532                        // In this case, `*` and `?` can match a path
533                        // separator, which means this could reach outside
534                        // the basename.
535                        return None;
536                    }
537                }
538                Token::RecursivePrefix
539                | Token::RecursiveSuffix
540                | Token::RecursiveZeroOrMore => {
541                    return None;
542                }
543                Token::Class { .. } | Token::Alternates(..) => {
544                    // We *could* be a little smarter here, but either one
545                    // of these is going to prevent our literal optimizations
546                    // anyway, so give up.
547                    return None;
548                }
549            }
550        }
551        Some(&self.tokens[start..])
552    }
553
554    /// Returns the pattern as a literal if and only if the pattern exclusively
555    /// matches the basename of a file path *and* is a literal.
556    ///
557    /// The basic format of these patterns is `**/{literal}`, where `{literal}`
558    /// does not contain a path separator.
559    fn basename_literal(&self) -> Option<String> {
560        let tokens = self.basename_tokens()?;
561        let mut lit = String::new();
562        for t in tokens {
563            let Token::Literal(c) = *t else { return None };
564            lit.push(c);
565        }
566        Some(lit)
567    }
568}
569
570impl<'a> GlobBuilder<'a> {
571    /// Create a new builder for the pattern given.
572    ///
573    /// The pattern is not compiled until `build` is called.
574    pub fn new(glob: &'a str) -> GlobBuilder<'a> {
575        GlobBuilder { glob, opts: GlobOptions::default() }
576    }
577
578    /// Parses and builds the pattern.
579    pub fn build(&self) -> Result<Glob, Error> {
580        let mut p = Parser {
581            glob: &self.glob,
582            alternates_stack: Vec::new(),
583            branches: vec![Tokens::default()],
584            chars: self.glob.chars().peekable(),
585            prev: None,
586            cur: None,
587            found_unclosed_class: false,
588            opts: &self.opts,
589        };
590        p.parse()?;
591        if p.branches.is_empty() {
592            // OK because of how the the branches/alternate_stack are managed.
593            // If we end up here, then there *must* be a bug in the parser
594            // somewhere.
595            unreachable!()
596        } else if p.branches.len() > 1 {
597            Err(Error {
598                glob: Some(self.glob.to_string()),
599                kind: ErrorKind::UnclosedAlternates,
600            })
601        } else {
602            let tokens = p.branches.pop().unwrap();
603            Ok(Glob {
604                glob: self.glob.to_string(),
605                re: tokens.to_regex_with(&self.opts),
606                opts: self.opts,
607                tokens,
608            })
609        }
610    }
611
612    /// Toggle whether the pattern matches case insensitively or not.
613    ///
614    /// This is disabled by default.
615    pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
616        self.opts.case_insensitive = yes;
617        self
618    }
619
620    /// Toggle whether a literal `/` is required to match a path separator.
621    ///
622    /// By default this is false: `*` and `?` will match `/`.
623    pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
624        self.opts.literal_separator = yes;
625        self
626    }
627
628    /// When enabled, a back slash (`\`) may be used to escape
629    /// special characters in a glob pattern. Additionally, this will
630    /// prevent `\` from being interpreted as a path separator on all
631    /// platforms.
632    ///
633    /// This is enabled by default on platforms where `\` is not a
634    /// path separator and disabled by default on platforms where `\`
635    /// is a path separator.
636    pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
637        self.opts.backslash_escape = yes;
638        self
639    }
640
641    /// Toggle whether an empty pattern in a list of alternates is accepted.
642    ///
643    /// For example, if this is set then the glob `foo{,.txt}` will match both
644    /// `foo` and `foo.txt`.
645    ///
646    /// By default this is false.
647    pub fn empty_alternates(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
648        self.opts.empty_alternates = yes;
649        self
650    }
651
652    /// Toggle whether unclosed character classes are allowed. When allowed,
653    /// a `[` without a matching `]` is treated literally instead of resulting
654    /// in a parse error.
655    ///
656    /// For example, if this is set then the glob `[abc` will be treated as the
657    /// literal string `[abc` instead of returning an error.
658    ///
659    /// By default, this is false. Generally speaking, enabling this leads to
660    /// worse failure modes since the glob parser becomes more permissive. You
661    /// might want to enable this when compatibility (e.g., with POSIX glob
662    /// implementations) is more important than good error messages.
663    pub fn allow_unclosed_class(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
664        self.opts.allow_unclosed_class = yes;
665        self
666    }
667}
668
669impl Tokens {
670    /// Convert this pattern to a string that is guaranteed to be a valid
671    /// regular expression and will represent the matching semantics of this
672    /// glob pattern and the options given.
673    fn to_regex_with(&self, options: &GlobOptions) -> String {
674        let mut re = String::new();
675        re.push_str("(?-u)");
676        if options.case_insensitive {
677            re.push_str("(?i)");
678        }
679        re.push('^');
680        // Special case. If the entire glob is just `**`, then it should match
681        // everything.
682        if self.len() == 1 && self[0] == Token::RecursivePrefix {
683            re.push_str(".*");
684            re.push('$');
685            return re;
686        }
687        self.tokens_to_regex(options, &self, &mut re);
688        re.push('$');
689        re
690    }
691
692    fn tokens_to_regex(
693        &self,
694        options: &GlobOptions,
695        tokens: &[Token],
696        re: &mut String,
697    ) {
698        for tok in tokens.iter() {
699            match *tok {
700                Token::Literal(c) => {
701                    re.push_str(&char_to_escaped_literal(c));
702                }
703                Token::Any => {
704                    if options.literal_separator {
705                        re.push_str("[^/]");
706                    } else {
707                        re.push_str(".");
708                    }
709                }
710                Token::ZeroOrMore => {
711                    if options.literal_separator {
712                        re.push_str("[^/]*");
713                    } else {
714                        re.push_str(".*");
715                    }
716                }
717                Token::RecursivePrefix => {
718                    re.push_str("(?:/?|.*/)");
719                }
720                Token::RecursiveSuffix => {
721                    re.push_str("/.*");
722                }
723                Token::RecursiveZeroOrMore => {
724                    re.push_str("(?:/|/.*/)");
725                }
726                Token::Class { negated, ref ranges } => {
727                    re.push('[');
728                    if negated {
729                        re.push('^');
730                    }
731                    for r in ranges {
732                        if r.0 == r.1 {
733                            // Not strictly necessary, but nicer to look at.
734                            re.push_str(&char_to_escaped_literal(r.0));
735                        } else {
736                            re.push_str(&char_to_escaped_literal(r.0));
737                            re.push('-');
738                            re.push_str(&char_to_escaped_literal(r.1));
739                        }
740                    }
741                    re.push(']');
742                }
743                Token::Alternates(ref patterns) => {
744                    let mut parts = vec![];
745                    for pat in patterns {
746                        let mut altre = String::new();
747                        self.tokens_to_regex(options, &pat, &mut altre);
748                        if !altre.is_empty() || options.empty_alternates {
749                            parts.push(altre);
750                        }
751                    }
752
753                    // It is possible to have an empty set in which case the
754                    // resulting alternation '()' would be an error.
755                    if !parts.is_empty() {
756                        re.push_str("(?:");
757                        re.push_str(&parts.join("|"));
758                        re.push(')');
759                    }
760                }
761            }
762        }
763    }
764}
765
766/// Convert a Unicode scalar value to an escaped string suitable for use as
767/// a literal in a non-Unicode regex.
768fn char_to_escaped_literal(c: char) -> String {
769    let mut buf = [0; 4];
770    let bytes = c.encode_utf8(&mut buf).as_bytes();
771    bytes_to_escaped_literal(bytes)
772}
773
774/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
775/// code units are converted to their escaped form.
776fn bytes_to_escaped_literal(bs: &[u8]) -> String {
777    let mut s = String::with_capacity(bs.len());
778    for &b in bs {
779        if b <= 0x7F {
780            regex_syntax::escape_into(
781                char::from(b).encode_utf8(&mut [0; 4]),
782                &mut s,
783            );
784        } else {
785            write!(&mut s, "\\x{:02x}", b).unwrap();
786        }
787    }
788    s
789}
790
791struct Parser<'a> {
792    /// The glob to parse.
793    glob: &'a str,
794    /// Marks the index in `stack` where the alternation started.
795    alternates_stack: Vec<usize>,
796    /// The set of active alternation branches being parsed.
797    /// Tokens are added to the end of the last one.
798    branches: Vec<Tokens>,
799    /// A character iterator over the glob pattern to parse.
800    chars: std::iter::Peekable<std::str::Chars<'a>>,
801    /// The previous character seen.
802    prev: Option<char>,
803    /// The current character.
804    cur: Option<char>,
805    /// Whether we failed to find a closing `]` for a character
806    /// class. This can only be true when `GlobOptions::allow_unclosed_class`
807    /// is enabled. When enabled, it is impossible to ever parse another
808    /// character class with this glob. That's because classes cannot be
809    /// nested *and* the only way this happens is when there is never a `]`.
810    ///
811    /// We track this state so that we don't end up spending quadratic time
812    /// trying to parse something like `[[[[[[[[[[[[[[[[[[[[[[[...`.
813    found_unclosed_class: bool,
814    /// Glob options, which may influence parsing.
815    opts: &'a GlobOptions,
816}
817
818impl<'a> Parser<'a> {
819    fn error(&self, kind: ErrorKind) -> Error {
820        Error { glob: Some(self.glob.to_string()), kind }
821    }
822
823    fn parse(&mut self) -> Result<(), Error> {
824        while let Some(c) = self.bump() {
825            match c {
826                '?' => self.push_token(Token::Any)?,
827                '*' => self.parse_star()?,
828                '[' if !self.found_unclosed_class => self.parse_class()?,
829                '{' => self.push_alternate()?,
830                '}' => self.pop_alternate()?,
831                ',' => self.parse_comma()?,
832                '\\' => self.parse_backslash()?,
833                c => self.push_token(Token::Literal(c))?,
834            }
835        }
836        Ok(())
837    }
838
839    fn push_alternate(&mut self) -> Result<(), Error> {
840        self.alternates_stack.push(self.branches.len());
841        self.branches.push(Tokens::default());
842        Ok(())
843    }
844
845    fn pop_alternate(&mut self) -> Result<(), Error> {
846        let Some(start) = self.alternates_stack.pop() else {
847            return Err(self.error(ErrorKind::UnopenedAlternates));
848        };
849        assert!(start <= self.branches.len());
850        let alts = Token::Alternates(self.branches.drain(start..).collect());
851        self.push_token(alts)?;
852        Ok(())
853    }
854
855    fn push_token(&mut self, tok: Token) -> Result<(), Error> {
856        if let Some(ref mut pat) = self.branches.last_mut() {
857            return Ok(pat.push(tok));
858        }
859        Err(self.error(ErrorKind::UnopenedAlternates))
860    }
861
862    fn pop_token(&mut self) -> Result<Token, Error> {
863        if let Some(ref mut pat) = self.branches.last_mut() {
864            return Ok(pat.pop().unwrap());
865        }
866        Err(self.error(ErrorKind::UnopenedAlternates))
867    }
868
869    fn have_tokens(&self) -> Result<bool, Error> {
870        match self.branches.last() {
871            None => Err(self.error(ErrorKind::UnopenedAlternates)),
872            Some(ref pat) => Ok(!pat.is_empty()),
873        }
874    }
875
876    fn parse_comma(&mut self) -> Result<(), Error> {
877        // If we aren't inside a group alternation, then don't
878        // treat commas specially. Otherwise, we need to start
879        // a new alternate branch.
880        if self.alternates_stack.is_empty() {
881            self.push_token(Token::Literal(','))
882        } else {
883            Ok(self.branches.push(Tokens::default()))
884        }
885    }
886
887    fn parse_backslash(&mut self) -> Result<(), Error> {
888        if self.opts.backslash_escape {
889            match self.bump() {
890                None => Err(self.error(ErrorKind::DanglingEscape)),
891                Some(c) => self.push_token(Token::Literal(c)),
892            }
893        } else if is_separator('\\') {
894            // Normalize all patterns to use / as a separator.
895            self.push_token(Token::Literal('/'))
896        } else {
897            self.push_token(Token::Literal('\\'))
898        }
899    }
900
901    fn parse_star(&mut self) -> Result<(), Error> {
902        let prev = self.prev;
903        if self.peek() != Some('*') {
904            self.push_token(Token::ZeroOrMore)?;
905            return Ok(());
906        }
907        assert!(self.bump() == Some('*'));
908        if !self.have_tokens()? {
909            if !self.peek().map_or(true, is_separator) {
910                self.push_token(Token::ZeroOrMore)?;
911                self.push_token(Token::ZeroOrMore)?;
912            } else {
913                self.push_token(Token::RecursivePrefix)?;
914                assert!(self.bump().map_or(true, is_separator));
915            }
916            return Ok(());
917        }
918
919        if !prev.map(is_separator).unwrap_or(false) {
920            if self.branches.len() <= 1
921                || (prev != Some(',') && prev != Some('{'))
922            {
923                self.push_token(Token::ZeroOrMore)?;
924                self.push_token(Token::ZeroOrMore)?;
925                return Ok(());
926            }
927        }
928        let is_suffix = match self.peek() {
929            None => {
930                assert!(self.bump().is_none());
931                true
932            }
933            Some(',') | Some('}') if self.branches.len() >= 2 => true,
934            Some(c) if is_separator(c) => {
935                assert!(self.bump().map(is_separator).unwrap_or(false));
936                false
937            }
938            _ => {
939                self.push_token(Token::ZeroOrMore)?;
940                self.push_token(Token::ZeroOrMore)?;
941                return Ok(());
942            }
943        };
944        match self.pop_token()? {
945            Token::RecursivePrefix => {
946                self.push_token(Token::RecursivePrefix)?;
947            }
948            Token::RecursiveSuffix => {
949                self.push_token(Token::RecursiveSuffix)?;
950            }
951            _ => {
952                if is_suffix {
953                    self.push_token(Token::RecursiveSuffix)?;
954                } else {
955                    self.push_token(Token::RecursiveZeroOrMore)?;
956                }
957            }
958        }
959        Ok(())
960    }
961
962    fn parse_class(&mut self) -> Result<(), Error> {
963        // Save parser state for potential rollback to literal '[' parsing.
964        let saved_chars = self.chars.clone();
965        let saved_prev = self.prev;
966        let saved_cur = self.cur;
967
968        fn add_to_last_range(
969            glob: &str,
970            r: &mut (char, char),
971            add: char,
972        ) -> Result<(), Error> {
973            r.1 = add;
974            if r.1 < r.0 {
975                Err(Error {
976                    glob: Some(glob.to_string()),
977                    kind: ErrorKind::InvalidRange(r.0, r.1),
978                })
979            } else {
980                Ok(())
981            }
982        }
983        let mut ranges = vec![];
984        let negated = match self.chars.peek() {
985            Some(&'!') | Some(&'^') => {
986                let bump = self.bump();
987                assert!(bump == Some('!') || bump == Some('^'));
988                true
989            }
990            _ => false,
991        };
992        let mut first = true;
993        let mut in_range = false;
994        loop {
995            let Some(c) = self.bump() else {
996                return if self.opts.allow_unclosed_class == true {
997                    self.chars = saved_chars;
998                    self.cur = saved_cur;
999                    self.prev = saved_prev;
1000                    self.found_unclosed_class = true;
1001
1002                    self.push_token(Token::Literal('['))
1003                } else {
1004                    Err(self.error(ErrorKind::UnclosedClass))
1005                };
1006            };
1007            match c {
1008                ']' => {
1009                    if first {
1010                        ranges.push((']', ']'));
1011                    } else {
1012                        break;
1013                    }
1014                }
1015                '-' => {
1016                    if first {
1017                        ranges.push(('-', '-'));
1018                    } else if in_range {
1019                        // invariant: in_range is only set when there is
1020                        // already at least one character seen.
1021                        let r = ranges.last_mut().unwrap();
1022                        add_to_last_range(&self.glob, r, '-')?;
1023                        in_range = false;
1024                    } else {
1025                        assert!(!ranges.is_empty());
1026                        in_range = true;
1027                    }
1028                }
1029                c => {
1030                    if in_range {
1031                        // invariant: in_range is only set when there is
1032                        // already at least one character seen.
1033                        add_to_last_range(
1034                            &self.glob,
1035                            ranges.last_mut().unwrap(),
1036                            c,
1037                        )?;
1038                    } else {
1039                        ranges.push((c, c));
1040                    }
1041                    in_range = false;
1042                }
1043            }
1044            first = false;
1045        }
1046        if in_range {
1047            // Means that the last character in the class was a '-', so add
1048            // it as a literal.
1049            ranges.push(('-', '-'));
1050        }
1051        self.push_token(Token::Class { negated, ranges })
1052    }
1053
1054    fn bump(&mut self) -> Option<char> {
1055        self.prev = self.cur;
1056        self.cur = self.chars.next();
1057        self.cur
1058    }
1059
1060    fn peek(&mut self) -> Option<char> {
1061        self.chars.peek().map(|&ch| ch)
1062    }
1063}
1064
1065#[cfg(test)]
1066fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
1067    needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
1068}
1069
1070#[cfg(test)]
1071fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
1072    if needle.len() > haystack.len() {
1073        return false;
1074    }
1075    needle == &haystack[haystack.len() - needle.len()..]
1076}
1077
1078#[cfg(test)]
1079mod tests {
1080    use super::Token::*;
1081    use super::{Glob, GlobBuilder, Token};
1082    use crate::{ErrorKind, GlobSetBuilder};
1083
1084    #[derive(Clone, Copy, Debug, Default)]
1085    struct Options {
1086        casei: Option<bool>,
1087        litsep: Option<bool>,
1088        bsesc: Option<bool>,
1089        ealtre: Option<bool>,
1090        unccls: Option<bool>,
1091    }
1092
1093    macro_rules! syntax {
1094        ($name:ident, $pat:expr, $tokens:expr) => {
1095            #[test]
1096            fn $name() {
1097                let pat = Glob::new($pat).unwrap();
1098                assert_eq!($tokens, pat.tokens.0);
1099            }
1100        };
1101    }
1102
1103    macro_rules! syntaxerr {
1104        ($name:ident, $pat:expr, $err:expr) => {
1105            #[test]
1106            fn $name() {
1107                let err = Glob::new($pat).unwrap_err();
1108                assert_eq!(&$err, err.kind());
1109            }
1110        };
1111    }
1112
1113    macro_rules! toregex {
1114        ($name:ident, $pat:expr, $re:expr) => {
1115            toregex!($name, $pat, $re, Options::default());
1116        };
1117        ($name:ident, $pat:expr, $re:expr, $options:expr) => {
1118            #[test]
1119            fn $name() {
1120                let mut builder = GlobBuilder::new($pat);
1121                if let Some(casei) = $options.casei {
1122                    builder.case_insensitive(casei);
1123                }
1124                if let Some(litsep) = $options.litsep {
1125                    builder.literal_separator(litsep);
1126                }
1127                if let Some(bsesc) = $options.bsesc {
1128                    builder.backslash_escape(bsesc);
1129                }
1130                if let Some(ealtre) = $options.ealtre {
1131                    builder.empty_alternates(ealtre);
1132                }
1133                if let Some(unccls) = $options.unccls {
1134                    builder.allow_unclosed_class(unccls);
1135                }
1136
1137                let pat = builder.build().unwrap();
1138                assert_eq!(format!("(?-u){}", $re), pat.regex());
1139            }
1140        };
1141    }
1142
1143    macro_rules! matches {
1144        ($name:ident, $pat:expr, $path:expr) => {
1145            matches!($name, $pat, $path, Options::default());
1146        };
1147        ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1148            #[test]
1149            fn $name() {
1150                let mut builder = GlobBuilder::new($pat);
1151                if let Some(casei) = $options.casei {
1152                    builder.case_insensitive(casei);
1153                }
1154                if let Some(litsep) = $options.litsep {
1155                    builder.literal_separator(litsep);
1156                }
1157                if let Some(bsesc) = $options.bsesc {
1158                    builder.backslash_escape(bsesc);
1159                }
1160                if let Some(ealtre) = $options.ealtre {
1161                    builder.empty_alternates(ealtre);
1162                }
1163                let pat = builder.build().unwrap();
1164                let matcher = pat.compile_matcher();
1165                let strategic = pat.compile_strategic_matcher();
1166                let set = GlobSetBuilder::new().add(pat).build().unwrap();
1167                assert!(matcher.is_match($path));
1168                assert!(strategic.is_match($path));
1169                assert!(set.is_match($path));
1170            }
1171        };
1172    }
1173
1174    macro_rules! nmatches {
1175        ($name:ident, $pat:expr, $path:expr) => {
1176            nmatches!($name, $pat, $path, Options::default());
1177        };
1178        ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1179            #[test]
1180            fn $name() {
1181                let mut builder = GlobBuilder::new($pat);
1182                if let Some(casei) = $options.casei {
1183                    builder.case_insensitive(casei);
1184                }
1185                if let Some(litsep) = $options.litsep {
1186                    builder.literal_separator(litsep);
1187                }
1188                if let Some(bsesc) = $options.bsesc {
1189                    builder.backslash_escape(bsesc);
1190                }
1191                if let Some(ealtre) = $options.ealtre {
1192                    builder.empty_alternates(ealtre);
1193                }
1194                let pat = builder.build().unwrap();
1195                let matcher = pat.compile_matcher();
1196                let strategic = pat.compile_strategic_matcher();
1197                let set = GlobSetBuilder::new().add(pat).build().unwrap();
1198                assert!(!matcher.is_match($path));
1199                assert!(!strategic.is_match($path));
1200                assert!(!set.is_match($path));
1201            }
1202        };
1203    }
1204
1205    fn s(string: &str) -> String {
1206        string.to_string()
1207    }
1208
1209    fn class(s: char, e: char) -> Token {
1210        Class { negated: false, ranges: vec![(s, e)] }
1211    }
1212
1213    fn classn(s: char, e: char) -> Token {
1214        Class { negated: true, ranges: vec![(s, e)] }
1215    }
1216
1217    fn rclass(ranges: &[(char, char)]) -> Token {
1218        Class { negated: false, ranges: ranges.to_vec() }
1219    }
1220
1221    fn rclassn(ranges: &[(char, char)]) -> Token {
1222        Class { negated: true, ranges: ranges.to_vec() }
1223    }
1224
1225    syntax!(literal1, "a", vec![Literal('a')]);
1226    syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1227    syntax!(any1, "?", vec![Any]);
1228    syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1229    syntax!(seq1, "*", vec![ZeroOrMore]);
1230    syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1231    syntax!(
1232        seq3,
1233        "*a*b*",
1234        vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
1235    );
1236    syntax!(rseq1, "**", vec![RecursivePrefix]);
1237    syntax!(rseq2, "**/", vec![RecursivePrefix]);
1238    syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1239    syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1240    syntax!(
1241        rseq5,
1242        "a/**/b",
1243        vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
1244    );
1245    syntax!(cls1, "[a]", vec![class('a', 'a')]);
1246    syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1247    syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1248    syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1249    syntax!(cls5, "[-]", vec![class('-', '-')]);
1250    syntax!(cls6, "[]]", vec![class(']', ']')]);
1251    syntax!(cls7, "[*]", vec![class('*', '*')]);
1252    syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1253    syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1254    syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1255    syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1256    syntax!(
1257        cls12,
1258        "[-a-z-]",
1259        vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
1260    );
1261    syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1262    syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1263    syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1264    syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1265    syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1266    syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1267    syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1268    syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1269    syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1270
1271    syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1272    syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1273    syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1274    syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1275    syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1276    syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1277    syntaxerr!(err_alt1, "{a,b", ErrorKind::UnclosedAlternates);
1278    syntaxerr!(err_alt2, "{a,{b,c}", ErrorKind::UnclosedAlternates);
1279    syntaxerr!(err_alt3, "a,b}", ErrorKind::UnopenedAlternates);
1280    syntaxerr!(err_alt4, "{a,b}}", ErrorKind::UnopenedAlternates);
1281
1282    const CASEI: Options = Options {
1283        casei: Some(true),
1284        litsep: None,
1285        bsesc: None,
1286        ealtre: None,
1287        unccls: None,
1288    };
1289    const SLASHLIT: Options = Options {
1290        casei: None,
1291        litsep: Some(true),
1292        bsesc: None,
1293        ealtre: None,
1294        unccls: None,
1295    };
1296    const NOBSESC: Options = Options {
1297        casei: None,
1298        litsep: None,
1299        bsesc: Some(false),
1300        ealtre: None,
1301        unccls: None,
1302    };
1303    const BSESC: Options = Options {
1304        casei: None,
1305        litsep: None,
1306        bsesc: Some(true),
1307        ealtre: None,
1308        unccls: None,
1309    };
1310    const EALTRE: Options = Options {
1311        casei: None,
1312        litsep: None,
1313        bsesc: Some(true),
1314        ealtre: Some(true),
1315        unccls: None,
1316    };
1317    const UNCCLS: Options = Options {
1318        casei: None,
1319        litsep: None,
1320        bsesc: None,
1321        ealtre: None,
1322        unccls: Some(true),
1323    };
1324
1325    toregex!(allow_unclosed_class_single, r"[", r"^\[$", &UNCCLS);
1326    toregex!(allow_unclosed_class_many, r"[abc", r"^\[abc$", &UNCCLS);
1327    toregex!(allow_unclosed_class_empty1, r"[]", r"^\[\]$", &UNCCLS);
1328    toregex!(allow_unclosed_class_empty2, r"[][", r"^\[\]\[$", &UNCCLS);
1329    toregex!(allow_unclosed_class_negated_unclosed, r"[!", r"^\[!$", &UNCCLS);
1330    toregex!(allow_unclosed_class_negated_empty, r"[!]", r"^\[!\]$", &UNCCLS);
1331    toregex!(
1332        allow_unclosed_class_brace1,
1333        r"{[abc,xyz}",
1334        r"^(?:\[abc|xyz)$",
1335        &UNCCLS
1336    );
1337    toregex!(
1338        allow_unclosed_class_brace2,
1339        r"{[abc,[xyz}",
1340        r"^(?:\[abc|\[xyz)$",
1341        &UNCCLS
1342    );
1343    toregex!(
1344        allow_unclosed_class_brace3,
1345        r"{[abc],[xyz}",
1346        r"^(?:[abc]|\[xyz)$",
1347        &UNCCLS
1348    );
1349
1350    toregex!(re_empty, "", "^$");
1351
1352    toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1353
1354    toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1355    toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);
1356
1357    toregex!(re1, "a", "^a$");
1358    toregex!(re2, "?", "^.$");
1359    toregex!(re3, "*", "^.*$");
1360    toregex!(re4, "a?", "^a.$");
1361    toregex!(re5, "?a", "^.a$");
1362    toregex!(re6, "a*", "^a.*$");
1363    toregex!(re7, "*a", "^.*a$");
1364    toregex!(re8, "[*]", r"^[\*]$");
1365    toregex!(re9, "[+]", r"^[\+]$");
1366    toregex!(re10, "+", r"^\+$");
1367    toregex!(re11, "☃", r"^\xe2\x98\x83$");
1368    toregex!(re12, "**", r"^.*$");
1369    toregex!(re13, "**/", r"^.*$");
1370    toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
1371    toregex!(re15, "**/**", r"^.*$");
1372    toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
1373    toregex!(re17, "**/**/**", r"^.*$");
1374    toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
1375    toregex!(re19, "a/**", r"^a/.*$");
1376    toregex!(re20, "a/**/**", r"^a/.*$");
1377    toregex!(re21, "a/**/**/**", r"^a/.*$");
1378    toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
1379    toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
1380    toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
1381    toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
1382    toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
1383    toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
1384    toregex!(re28, "a**", r"^a.*.*$");
1385    toregex!(re29, "**a", r"^.*.*a$");
1386    toregex!(re30, "a**b", r"^a.*.*b$");
1387    toregex!(re31, "***", r"^.*.*.*$");
1388    toregex!(re32, "/a**", r"^/a.*.*$");
1389    toregex!(re33, "/**a", r"^/.*.*a$");
1390    toregex!(re34, "/a**b", r"^/a.*.*b$");
1391    toregex!(re35, "{a,b}", r"^(?:a|b)$");
1392    toregex!(re36, "{a,{b,c}}", r"^(?:a|(?:b|c))$");
1393    toregex!(re37, "{{a,b},{c,d}}", r"^(?:(?:a|b)|(?:c|d))$");
1394
1395    matches!(match1, "a", "a");
1396    matches!(match2, "a*b", "a_b");
1397    matches!(match3, "a*b*c", "abc");
1398    matches!(match4, "a*b*c", "a_b_c");
1399    matches!(match5, "a*b*c", "a___b___c");
1400    matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc");
1401    matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1402    matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
1403    matches!(match9, "*.rs", ".rs");
1404    matches!(match10, "☃", "☃");
1405
1406    matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1407    matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1408    matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1409    matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1410    matches!(matchrec5, "**", "abcde");
1411    matches!(matchrec6, "**", "");
1412    matches!(matchrec7, "**", ".asdf");
1413    matches!(matchrec8, "**", "/x/.asdf");
1414    matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt");
1415    matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt");
1416    matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt");
1417    matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt");
1418    matches!(matchrec13, "**/test", "one/two/test");
1419    matches!(matchrec14, "**/test", "one/test");
1420    matches!(matchrec15, "**/test", "test");
1421    matches!(matchrec16, "/**/test", "/one/two/test");
1422    matches!(matchrec17, "/**/test", "/one/test");
1423    matches!(matchrec18, "/**/test", "/test");
1424    matches!(matchrec19, "**/.*", ".abc");
1425    matches!(matchrec20, "**/.*", "abc/.abc");
1426    matches!(matchrec21, "**/foo/bar", "foo/bar");
1427    matches!(matchrec22, ".*/**", ".abc/abc");
1428    matches!(matchrec23, "test/**", "test/");
1429    matches!(matchrec24, "test/**", "test/one");
1430    matches!(matchrec25, "test/**", "test/one/two");
1431    matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt");
1432
1433    matches!(matchrange1, "a[0-9]b", "a0b");
1434    matches!(matchrange2, "a[0-9]b", "a9b");
1435    matches!(matchrange3, "a[!0-9]b", "a_b");
1436    matches!(matchrange4, "[a-z123]", "1");
1437    matches!(matchrange5, "[1a-z23]", "1");
1438    matches!(matchrange6, "[123a-z]", "1");
1439    matches!(matchrange7, "[abc-]", "-");
1440    matches!(matchrange8, "[-abc]", "-");
1441    matches!(matchrange9, "[-a-c]", "b");
1442    matches!(matchrange10, "[a-c-]", "b");
1443    matches!(matchrange11, "[-]", "-");
1444    matches!(matchrange12, "a[^0-9]b", "a_b");
1445
1446    matches!(matchpat1, "*hello.txt", "hello.txt");
1447    matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1448    matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1449    matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
1450    matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1451    matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1452    matches!(
1453        matchpat7,
1454        "*some/path/to/hello.txt",
1455        "a/bigger/some/path/to/hello.txt"
1456    );
1457
1458    matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
1459
1460    matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1461    matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1462    matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1463    matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1464
1465    matches!(matchalt1, "a,b", "a,b");
1466    matches!(matchalt2, ",", ",");
1467    matches!(matchalt3, "{a,b}", "a");
1468    matches!(matchalt4, "{a,b}", "b");
1469    matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
1470    matches!(matchalt6, "{**/src/**,foo}", "foo");
1471    matches!(matchalt7, "{[}],foo}", "}");
1472    matches!(matchalt8, "{foo}", "foo");
1473    matches!(matchalt9, "{}", "");
1474    matches!(matchalt10, "{,}", "");
1475    matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
1476    matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
1477    matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
1478    matches!(matchalt14, "foo{,.txt}", "foo.txt");
1479    nmatches!(matchalt15, "foo{,.txt}", "foo");
1480    matches!(matchalt16, "foo{,.txt}", "foo", EALTRE);
1481    matches!(matchalt17, "{a,b{c,d}}", "bc");
1482    matches!(matchalt18, "{a,b{c,d}}", "bd");
1483    matches!(matchalt19, "{a,b{c,d}}", "a");
1484
1485    matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1486    #[cfg(unix)]
1487    nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1488    #[cfg(not(unix))]
1489    nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
1490    nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1491    matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1492    #[cfg(unix)]
1493    nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1494    #[cfg(not(unix))]
1495    matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1496
1497    matches!(matchbackslash1, "\\[", "[", BSESC);
1498    matches!(matchbackslash2, "\\?", "?", BSESC);
1499    matches!(matchbackslash3, "\\*", "*", BSESC);
1500    matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
1501    matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
1502    matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
1503    #[cfg(unix)]
1504    matches!(matchbackslash7, "\\a", "a");
1505    #[cfg(not(unix))]
1506    matches!(matchbackslash8, "\\a", "/a");
1507
1508    nmatches!(matchnot1, "a*b*c", "abcd");
1509    nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
1510    nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1511    nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt");
1512    nmatches!(matchnot5, "/**/test", "test");
1513    nmatches!(matchnot6, "/**/test", "/one/notthis");
1514    nmatches!(matchnot7, "/**/test", "/notthis");
1515    nmatches!(matchnot8, "**/.*", "ab.c");
1516    nmatches!(matchnot9, "**/.*", "abc/ab.c");
1517    nmatches!(matchnot10, ".*/**", "a.bc");
1518    nmatches!(matchnot11, ".*/**", "abc/a.bc");
1519    nmatches!(matchnot12, "a[0-9]b", "a_b");
1520    nmatches!(matchnot13, "a[!0-9]b", "a0b");
1521    nmatches!(matchnot14, "a[!0-9]b", "a9b");
1522    nmatches!(matchnot15, "[!-]", "-");
1523    nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1524    nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1525    nmatches!(
1526        matchnot18,
1527        "*some/path/to/hello.txt",
1528        "some/path/to/hello.txt-and-then-some"
1529    );
1530    nmatches!(
1531        matchnot19,
1532        "*some/path/to/hello.txt",
1533        "some/other/path/to/hello.txt"
1534    );
1535    nmatches!(matchnot20, "a", "foo/a");
1536    nmatches!(matchnot21, "./foo", "foo");
1537    nmatches!(matchnot22, "**/foo", "foofoo");
1538    nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1539    nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1540    nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1541    nmatches!(
1542        matchnot26,
1543        "**/m4/ltoptions.m4",
1544        "csharp/src/packages/repositories.config",
1545        SLASHLIT
1546    );
1547    nmatches!(matchnot27, "a[^0-9]b", "a0b");
1548    nmatches!(matchnot28, "a[^0-9]b", "a9b");
1549    nmatches!(matchnot29, "[^-]", "-");
1550    nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1551    nmatches!(
1552        matchrec31,
1553        "some/*/needle.txt",
1554        "some/one/two/needle.txt",
1555        SLASHLIT
1556    );
1557    nmatches!(
1558        matchrec32,
1559        "some/*/needle.txt",
1560        "some/one/two/three/needle.txt",
1561        SLASHLIT
1562    );
1563    nmatches!(matchrec33, ".*/**", ".abc");
1564    nmatches!(matchrec34, "foo/**", "foo");
1565
1566    macro_rules! extract {
1567        ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1568            extract!($which, $name, $pat, $expect, Options::default());
1569        };
1570        ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1571            #[test]
1572            fn $name() {
1573                let mut builder = GlobBuilder::new($pat);
1574                if let Some(casei) = $options.casei {
1575                    builder.case_insensitive(casei);
1576                }
1577                if let Some(litsep) = $options.litsep {
1578                    builder.literal_separator(litsep);
1579                }
1580                if let Some(bsesc) = $options.bsesc {
1581                    builder.backslash_escape(bsesc);
1582                }
1583                if let Some(ealtre) = $options.ealtre {
1584                    builder.empty_alternates(ealtre);
1585                }
1586                let pat = builder.build().unwrap();
1587                assert_eq!($expect, pat.$which());
1588            }
1589        };
1590    }
1591
1592    macro_rules! literal {
1593        ($($tt:tt)*) => { extract!(literal, $($tt)*); }
1594    }
1595
1596    macro_rules! basetokens {
1597        ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); }
1598    }
1599
1600    macro_rules! ext {
1601        ($($tt:tt)*) => { extract!(ext, $($tt)*); }
1602    }
1603
1604    macro_rules! required_ext {
1605        ($($tt:tt)*) => { extract!(required_ext, $($tt)*); }
1606    }
1607
1608    macro_rules! prefix {
1609        ($($tt:tt)*) => { extract!(prefix, $($tt)*); }
1610    }
1611
1612    macro_rules! suffix {
1613        ($($tt:tt)*) => { extract!(suffix, $($tt)*); }
1614    }
1615
1616    macro_rules! baseliteral {
1617        ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); }
1618    }
1619
1620    literal!(extract_lit1, "foo", Some(s("foo")));
1621    literal!(extract_lit2, "foo", None, CASEI);
1622    literal!(extract_lit3, "/foo", Some(s("/foo")));
1623    literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1624    literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1625    literal!(extract_lit6, "*.foo", None);
1626    literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1627    literal!(extract_lit8, "**/foo/bar", None);
1628
1629    basetokens!(
1630        extract_basetoks1,
1631        "**/foo",
1632        Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
1633    );
1634    basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1635    basetokens!(
1636        extract_basetoks3,
1637        "**/foo",
1638        Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
1639        SLASHLIT
1640    );
1641    basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1642    basetokens!(extract_basetoks5, "*foo", None);
1643    basetokens!(extract_basetoks6, "**/fo*o", None);
1644    basetokens!(
1645        extract_basetoks7,
1646        "**/fo*o",
1647        Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
1648        SLASHLIT
1649    );
1650
1651    ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
1652    ext!(extract_ext2, "**/*.rs.bak", None);
1653    ext!(extract_ext3, "*.rs", Some(s(".rs")));
1654    ext!(extract_ext4, "a*.rs", None);
1655    ext!(extract_ext5, "/*.c", None);
1656    ext!(extract_ext6, "*.c", None, SLASHLIT);
1657    ext!(extract_ext7, "*.c", Some(s(".c")));
1658
1659    required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1660    required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1661    required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1662    required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1663    required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1664    required_ext!(extract_req_ext6, "./rs", None);
1665    required_ext!(extract_req_ext7, "foo", None);
1666    required_ext!(extract_req_ext8, ".foo/", None);
1667    required_ext!(extract_req_ext9, "foo/", None);
1668
1669    prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1670    prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1671    prefix!(extract_prefix3, "**/foo", None);
1672    prefix!(extract_prefix4, "foo/**", Some(s("foo/")));
1673
1674    suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
1675    suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
1676    suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1677    suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false)));
1678    suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false)));
1679    suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1680    suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false)));
1681
1682    baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1683    baseliteral!(extract_baselit2, "foo", None);
1684    baseliteral!(extract_baselit3, "*foo", None);
1685    baseliteral!(extract_baselit4, "*/foo", None);
1686}
globset/glob.rs

globset/
glob.rs