globset/
lib.rs

1/*!
2The globset crate provides cross platform single glob and glob set matching.
3
4Glob set matching is the process of matching one or more glob patterns against
5a single candidate path simultaneously, and returning all of the globs that
6matched. For example, given this set of globs:
7
8* `*.rs`
9* `src/lib.rs`
10* `src/**/foo.rs`
11
12and a path `src/bar/baz/foo.rs`, then the set would report the first and third
13globs as matching.
14
15# Example: one glob
16
17This example shows how to match a single glob against a single file path.
18
19```
20use globset::Glob;
21
22let glob = Glob::new("*.rs")?.compile_matcher();
23
24assert!(glob.is_match("foo.rs"));
25assert!(glob.is_match("foo/bar.rs"));
26assert!(!glob.is_match("Cargo.toml"));
27# Ok::<(), Box<dyn std::error::Error>>(())
28```
29
30# Example: configuring a glob matcher
31
32This example shows how to use a `GlobBuilder` to configure aspects of match
33semantics. In this example, we prevent wildcards from matching path separators.
34
35```
36use globset::GlobBuilder;
37
38let glob = GlobBuilder::new("*.rs")
39    .literal_separator(true).build()?.compile_matcher();
40
41assert!(glob.is_match("foo.rs"));
42assert!(!glob.is_match("foo/bar.rs")); // no longer matches
43assert!(!glob.is_match("Cargo.toml"));
44# Ok::<(), Box<dyn std::error::Error>>(())
45```
46
47# Example: match multiple globs at once
48
49This example shows how to match multiple glob patterns at once.
50
51```
52use globset::{Glob, GlobSetBuilder};
53
54let mut builder = GlobSetBuilder::new();
55// A GlobBuilder can be used to configure each glob's match semantics
56// independently.
57builder.add(Glob::new("*.rs")?);
58builder.add(Glob::new("src/lib.rs")?);
59builder.add(Glob::new("src/**/foo.rs")?);
60let set = builder.build()?;
61
62assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
63# Ok::<(), Box<dyn std::error::Error>>(())
64```
65
66# Syntax
67
68Standard Unix-style glob syntax is supported:
69
70* `?` matches any single character. (If the `literal_separator` option is
71  enabled, then `?` can never match a path separator.)
72* `*` matches zero or more characters. (If the `literal_separator` option is
73  enabled, then `*` can never match a path separator.)
74* `**` recursively matches directories but are only legal in three situations.
75  First, if the glob starts with <code>\*\*&#x2F;</code>, then it matches
76  all directories. For example, <code>\*\*&#x2F;foo</code> matches `foo`
77  and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
78  <code>&#x2F;\*\*</code>, then it matches all sub-entries. For example,
79  <code>foo&#x2F;\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`.
80  Thirdly, if the glob contains <code>&#x2F;\*\*&#x2F;</code> anywhere within
81  the pattern, then it matches zero or more directories. Using `**` anywhere
82  else is illegal (N.B. the glob `**` is allowed and means "match everything").
83* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
84  (N.B. Nesting `{...}` is not currently allowed.)
85* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
86  `[!ab]` to match any character except for `a` and `b`.
87* Metacharacters such as `*` and `?` can be escaped with character class
88  notation. e.g., `[*]` matches `*`.
89* When backslash escapes are enabled, a backslash (`\`) will escape all meta
90  characters in a glob. If it precedes a non-meta character, then the slash is
91  ignored. A `\\` will match a literal `\\`. Note that this mode is only
92  enabled on Unix platforms by default, but can be enabled on any platform
93  via the `backslash_escape` setting on `Glob`.
94
95A `GlobBuilder` can be used to prevent wildcards from matching path separators,
96or to enable case insensitive matching.
97
98# Crate Features
99
100This crate includes optional features that can be enabled if necessary.
101These features are not required but may be useful depending on the use case.
102
103The following features are available:
104
105* **arbitrary** -
106  Enabling this feature introduces a public dependency on the
107  [`arbitrary`](https://crates.io/crates/arbitrary)
108  crate. Namely, it implements the `Arbitrary` trait from that crate for the
109  [`Glob`] type. This feature is disabled by default.
110*/
111
112#![deny(missing_docs)]
113
114use std::{
115    borrow::Cow,
116    panic::{RefUnwindSafe, UnwindSafe},
117    path::Path,
118    sync::Arc,
119};
120
121use {
122    aho_corasick::AhoCorasick,
123    bstr::{B, ByteSlice, ByteVec},
124    regex_automata::{
125        PatternSet,
126        meta::Regex,
127        util::pool::{Pool, PoolGuard},
128    },
129};
130
131use crate::{
132    glob::MatchStrategy,
133    pathutil::{file_name, file_name_ext, normalize_path},
134};
135
136pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
137
138mod fnv;
139mod glob;
140mod pathutil;
141
142#[cfg(feature = "serde1")]
143mod serde_impl;
144
145#[cfg(feature = "log")]
146macro_rules! debug {
147    ($($token:tt)*) => (::log::debug!($($token)*);)
148}
149
150#[cfg(not(feature = "log"))]
151macro_rules! debug {
152    ($($token:tt)*) => {};
153}
154
155/// Represents an error that can occur when parsing a glob pattern.
156#[derive(Clone, Debug, Eq, PartialEq)]
157pub struct Error {
158    /// The original glob provided by the caller.
159    glob: Option<String>,
160    /// The kind of error.
161    kind: ErrorKind,
162}
163
164/// The kind of error that can occur when parsing a glob pattern.
165#[derive(Clone, Debug, Eq, PartialEq)]
166#[non_exhaustive]
167pub enum ErrorKind {
168    /// **DEPRECATED**.
169    ///
170    /// This error used to occur for consistency with git's glob specification,
171    /// but the specification now accepts all uses of `**`. When `**` does not
172    /// appear adjacent to a path separator or at the beginning/end of a glob,
173    /// it is now treated as two consecutive `*` patterns. As such, this error
174    /// is no longer used.
175    InvalidRecursive,
176    /// Occurs when a character class (e.g., `[abc]`) is not closed.
177    UnclosedClass,
178    /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
179    /// example, if the range starts with a lexicographically larger character
180    /// than it ends with.
181    InvalidRange(char, char),
182    /// Occurs when a `}` is found without a matching `{`.
183    UnopenedAlternates,
184    /// Occurs when a `{` is found without a matching `}`.
185    UnclosedAlternates,
186    /// **DEPRECATED**.
187    ///
188    /// This error used to occur when an alternating group was nested inside
189    /// another alternating group, e.g., `{{a,b},{c,d}}`. However, this is now
190    /// supported and as such this error cannot occur.
191    NestedAlternates,
192    /// Occurs when an unescaped '\' is found at the end of a glob.
193    DanglingEscape,
194    /// An error associated with parsing or compiling a regex.
195    Regex(String),
196}
197
198impl std::error::Error for Error {
199    fn description(&self) -> &str {
200        self.kind.description()
201    }
202}
203
204impl Error {
205    /// Return the glob that caused this error, if one exists.
206    pub fn glob(&self) -> Option<&str> {
207        self.glob.as_ref().map(|s| &**s)
208    }
209
210    /// Return the kind of this error.
211    pub fn kind(&self) -> &ErrorKind {
212        &self.kind
213    }
214}
215
216impl ErrorKind {
217    fn description(&self) -> &str {
218        match *self {
219            ErrorKind::InvalidRecursive => {
220                "invalid use of **; must be one path component"
221            }
222            ErrorKind::UnclosedClass => {
223                "unclosed character class; missing ']'"
224            }
225            ErrorKind::InvalidRange(_, _) => "invalid character range",
226            ErrorKind::UnopenedAlternates => {
227                "unopened alternate group; missing '{' \
228                (maybe escape '}' with '[}]'?)"
229            }
230            ErrorKind::UnclosedAlternates => {
231                "unclosed alternate group; missing '}' \
232                (maybe escape '{' with '[{]'?)"
233            }
234            ErrorKind::NestedAlternates => {
235                "nested alternate groups are not allowed"
236            }
237            ErrorKind::DanglingEscape => "dangling '\\'",
238            ErrorKind::Regex(ref err) => err,
239        }
240    }
241}
242
243impl std::fmt::Display for Error {
244    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
245        match self.glob {
246            None => self.kind.fmt(f),
247            Some(ref glob) => {
248                write!(f, "error parsing glob '{}': {}", glob, self.kind)
249            }
250        }
251    }
252}
253
254impl std::fmt::Display for ErrorKind {
255    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
256        match *self {
257            ErrorKind::InvalidRecursive
258            | ErrorKind::UnclosedClass
259            | ErrorKind::UnopenedAlternates
260            | ErrorKind::UnclosedAlternates
261            | ErrorKind::NestedAlternates
262            | ErrorKind::DanglingEscape
263            | ErrorKind::Regex(_) => write!(f, "{}", self.description()),
264            ErrorKind::InvalidRange(s, e) => {
265                write!(f, "invalid range; '{}' > '{}'", s, e)
266            }
267        }
268    }
269}
270
271fn new_regex(pat: &str) -> Result<Regex, Error> {
272    let syntax = regex_automata::util::syntax::Config::new()
273        .utf8(false)
274        .dot_matches_new_line(true);
275    let config = Regex::config()
276        .utf8_empty(false)
277        .nfa_size_limit(Some(10 * (1 << 20)))
278        .hybrid_cache_capacity(10 * (1 << 20));
279    Regex::builder().syntax(syntax).configure(config).build(pat).map_err(
280        |err| Error {
281            glob: Some(pat.to_string()),
282            kind: ErrorKind::Regex(err.to_string()),
283        },
284    )
285}
286
287fn new_regex_set(pats: Vec<String>) -> Result<Regex, Error> {
288    let syntax = regex_automata::util::syntax::Config::new()
289        .utf8(false)
290        .dot_matches_new_line(true);
291    let config = Regex::config()
292        .match_kind(regex_automata::MatchKind::All)
293        .utf8_empty(false)
294        .nfa_size_limit(Some(10 * (1 << 20)))
295        .hybrid_cache_capacity(10 * (1 << 20));
296    Regex::builder()
297        .syntax(syntax)
298        .configure(config)
299        .build_many(&pats)
300        .map_err(|err| Error {
301            glob: None,
302            kind: ErrorKind::Regex(err.to_string()),
303        })
304}
305
306/// GlobSet represents a group of globs that can be matched together in a
307/// single pass.
308#[derive(Clone, Debug)]
309pub struct GlobSet {
310    len: usize,
311    strats: Vec<GlobSetMatchStrategy>,
312}
313
314impl GlobSet {
315    /// Create a new [`GlobSetBuilder`]. A `GlobSetBuilder` can be used to add
316    /// new patterns. Once all patterns have been added, `build` should be
317    /// called to produce a `GlobSet`, which can then be used for matching.
318    #[inline]
319    pub fn builder() -> GlobSetBuilder {
320        GlobSetBuilder::new()
321    }
322
323    /// Create an empty `GlobSet`. An empty set matches nothing.
324    #[inline]
325    pub const fn empty() -> GlobSet {
326        GlobSet { len: 0, strats: vec![] }
327    }
328
329    /// Returns true if this set is empty, and therefore matches nothing.
330    #[inline]
331    pub fn is_empty(&self) -> bool {
332        self.len == 0
333    }
334
335    /// Returns the number of globs in this set.
336    #[inline]
337    pub fn len(&self) -> usize {
338        self.len
339    }
340
341    /// Returns true if any glob in this set matches the path given.
342    pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
343        self.is_match_candidate(&Candidate::new(path.as_ref()))
344    }
345
346    /// Returns true if any glob in this set matches the path given.
347    ///
348    /// This takes a Candidate as input, which can be used to amortize the
349    /// cost of preparing a path for matching.
350    pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
351        if self.is_empty() {
352            return false;
353        }
354        for strat in &self.strats {
355            if strat.is_match(path) {
356                return true;
357            }
358        }
359        false
360    }
361
362    /// Returns true if all globs in this set match the path given.
363    ///
364    /// This will return true if the set of globs is empty, as in that case all
365    /// `0` of the globs will match.
366    ///
367    /// ```
368    /// use globset::{Glob, GlobSetBuilder};
369    ///
370    /// let mut builder = GlobSetBuilder::new();
371    /// builder.add(Glob::new("src/*").unwrap());
372    /// builder.add(Glob::new("**/*.rs").unwrap());
373    /// let set = builder.build().unwrap();
374    ///
375    /// assert!(set.matches_all("src/foo.rs"));
376    /// assert!(!set.matches_all("src/bar.c"));
377    /// assert!(!set.matches_all("test.rs"));
378    /// ```
379    pub fn matches_all<P: AsRef<Path>>(&self, path: P) -> bool {
380        self.matches_all_candidate(&Candidate::new(path.as_ref()))
381    }
382
383    /// Returns ture if all globs in this set match the path given.
384    ///
385    /// This takes a Candidate as input, which can be used to amortize the cost
386    /// of peparing a path for matching.
387    ///
388    /// This will return true if the set of globs is empty, as in that case all
389    /// `0` of the globs will match.
390    pub fn matches_all_candidate(&self, path: &Candidate<'_>) -> bool {
391        for strat in &self.strats {
392            if !strat.is_match(path) {
393                return false;
394            }
395        }
396        true
397    }
398
399    /// Returns the sequence number of every glob pattern that matches the
400    /// given path.
401    pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
402        self.matches_candidate(&Candidate::new(path.as_ref()))
403    }
404
405    /// Returns the sequence number of every glob pattern that matches the
406    /// given path.
407    ///
408    /// This takes a Candidate as input, which can be used to amortize the
409    /// cost of preparing a path for matching.
410    pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> {
411        let mut into = vec![];
412        if self.is_empty() {
413            return into;
414        }
415        self.matches_candidate_into(path, &mut into);
416        into
417    }
418
419    /// Adds the sequence number of every glob pattern that matches the given
420    /// path to the vec given.
421    ///
422    /// `into` is cleared before matching begins, and contains the set of
423    /// sequence numbers (in ascending order) after matching ends. If no globs
424    /// were matched, then `into` will be empty.
425    pub fn matches_into<P: AsRef<Path>>(
426        &self,
427        path: P,
428        into: &mut Vec<usize>,
429    ) {
430        self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
431    }
432
433    /// Adds the sequence number of every glob pattern that matches the given
434    /// path to the vec given.
435    ///
436    /// `into` is cleared before matching begins, and contains the set of
437    /// sequence numbers (in ascending order) after matching ends. If no globs
438    /// were matched, then `into` will be empty.
439    ///
440    /// This takes a Candidate as input, which can be used to amortize the
441    /// cost of preparing a path for matching.
442    pub fn matches_candidate_into(
443        &self,
444        path: &Candidate<'_>,
445        into: &mut Vec<usize>,
446    ) {
447        into.clear();
448        if self.is_empty() {
449            return;
450        }
451        for strat in &self.strats {
452            strat.matches_into(path, into);
453        }
454        into.sort();
455        into.dedup();
456    }
457
458    /// Builds a new matcher from a collection of Glob patterns.
459    ///
460    /// Once a matcher is built, no new patterns can be added to it.
461    pub fn new<I, G>(globs: I) -> Result<GlobSet, Error>
462    where
463        I: IntoIterator<Item = G>,
464        G: AsRef<Glob>,
465    {
466        let mut it = globs.into_iter().peekable();
467        if it.peek().is_none() {
468            return Ok(GlobSet::empty());
469        }
470
471        let mut len = 0;
472        let mut lits = LiteralStrategy::new();
473        let mut base_lits = BasenameLiteralStrategy::new();
474        let mut exts = ExtensionStrategy::new();
475        let mut prefixes = MultiStrategyBuilder::new();
476        let mut suffixes = MultiStrategyBuilder::new();
477        let mut required_exts = RequiredExtensionStrategyBuilder::new();
478        let mut regexes = MultiStrategyBuilder::new();
479        for (i, p) in it.enumerate() {
480            len += 1;
481
482            let p = p.as_ref();
483            match MatchStrategy::new(p) {
484                MatchStrategy::Literal(lit) => {
485                    lits.add(i, lit);
486                }
487                MatchStrategy::BasenameLiteral(lit) => {
488                    base_lits.add(i, lit);
489                }
490                MatchStrategy::Extension(ext) => {
491                    exts.add(i, ext);
492                }
493                MatchStrategy::Prefix(prefix) => {
494                    prefixes.add(i, prefix);
495                }
496                MatchStrategy::Suffix { suffix, component } => {
497                    if component {
498                        lits.add(i, suffix[1..].to_string());
499                    }
500                    suffixes.add(i, suffix);
501                }
502                MatchStrategy::RequiredExtension(ext) => {
503                    required_exts.add(i, ext, p.regex().to_owned());
504                }
505                MatchStrategy::Regex => {
506                    debug!(
507                        "glob `{:?}` converted to regex: `{:?}`",
508                        p,
509                        p.regex()
510                    );
511                    regexes.add(i, p.regex().to_owned());
512                }
513            }
514        }
515        debug!(
516            "built glob set; {} literals, {} basenames, {} extensions, \
517                {} prefixes, {} suffixes, {} required extensions, {} regexes",
518            lits.0.len(),
519            base_lits.0.len(),
520            exts.0.len(),
521            prefixes.literals.len(),
522            suffixes.literals.len(),
523            required_exts.0.len(),
524            regexes.literals.len()
525        );
526        let mut strats = Vec::with_capacity(7);
527        // Only add strategies that are populated
528        if !exts.0.is_empty() {
529            strats.push(GlobSetMatchStrategy::Extension(exts));
530        }
531        if !base_lits.0.is_empty() {
532            strats.push(GlobSetMatchStrategy::BasenameLiteral(base_lits));
533        }
534        if !lits.0.is_empty() {
535            strats.push(GlobSetMatchStrategy::Literal(lits));
536        }
537        if !suffixes.is_empty() {
538            strats.push(GlobSetMatchStrategy::Suffix(suffixes.suffix()));
539        }
540        if !prefixes.is_empty() {
541            strats.push(GlobSetMatchStrategy::Prefix(prefixes.prefix()));
542        }
543        if !required_exts.0.is_empty() {
544            strats.push(GlobSetMatchStrategy::RequiredExtension(
545                required_exts.build()?,
546            ));
547        }
548        if !regexes.is_empty() {
549            strats.push(GlobSetMatchStrategy::Regex(regexes.regex_set()?));
550        }
551
552        Ok(GlobSet { len, strats })
553    }
554}
555
556impl Default for GlobSet {
557    /// Create a default empty GlobSet.
558    fn default() -> Self {
559        GlobSet::empty()
560    }
561}
562
563/// GlobSetBuilder builds a group of patterns that can be used to
564/// simultaneously match a file path.
565#[derive(Clone, Debug)]
566pub struct GlobSetBuilder {
567    pats: Vec<Glob>,
568}
569
570impl GlobSetBuilder {
571    /// Create a new `GlobSetBuilder`. A `GlobSetBuilder` can be used to add new
572    /// patterns. Once all patterns have been added, `build` should be called
573    /// to produce a [`GlobSet`], which can then be used for matching.
574    pub fn new() -> GlobSetBuilder {
575        GlobSetBuilder { pats: vec![] }
576    }
577
578    /// Builds a new matcher from all of the glob patterns added so far.
579    ///
580    /// Once a matcher is built, no new patterns can be added to it.
581    pub fn build(&self) -> Result<GlobSet, Error> {
582        GlobSet::new(self.pats.iter())
583    }
584
585    /// Add a new pattern to this set.
586    pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
587        self.pats.push(pat);
588        self
589    }
590}
591
592/// A candidate path for matching.
593///
594/// All glob matching in this crate operates on `Candidate` values.
595/// Constructing candidates has a very small cost associated with it, so
596/// callers may find it beneficial to amortize that cost when matching a single
597/// path against multiple globs or sets of globs.
598#[derive(Clone)]
599pub struct Candidate<'a> {
600    path: Cow<'a, [u8]>,
601    basename: Cow<'a, [u8]>,
602    ext: Cow<'a, [u8]>,
603}
604
605impl<'a> std::fmt::Debug for Candidate<'a> {
606    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
607        f.debug_struct("Candidate")
608            .field("path", &self.path.as_bstr())
609            .field("basename", &self.basename.as_bstr())
610            .field("ext", &self.ext.as_bstr())
611            .finish()
612    }
613}
614
615impl<'a> Candidate<'a> {
616    /// Create a new candidate for matching from the given path.
617    pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
618        Self::from_cow(Vec::from_path_lossy(path.as_ref()))
619    }
620
621    /// Create a new candidate for matching from the given path as a sequence
622    /// of bytes.
623    ///
624    /// Generally speaking, this routine expects the bytes to be
625    /// _conventionally_ UTF-8. It is legal for the byte sequence to contain
626    /// invalid UTF-8. However, if the bytes are in some other encoding that
627    /// isn't ASCII compatible (for example, UTF-16), then the results of
628    /// matching are unspecified.
629    pub fn from_bytes<P: AsRef<[u8]> + ?Sized>(path: &'a P) -> Candidate<'a> {
630        Self::from_cow(Cow::Borrowed(path.as_ref()))
631    }
632
633    fn from_cow(path: Cow<'a, [u8]>) -> Candidate<'a> {
634        let path = normalize_path(path);
635        let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
636        let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
637        Candidate { path, basename, ext }
638    }
639
640    fn path_prefix(&self, max: usize) -> &[u8] {
641        if self.path.len() <= max { &*self.path } else { &self.path[..max] }
642    }
643
644    fn path_suffix(&self, max: usize) -> &[u8] {
645        if self.path.len() <= max {
646            &*self.path
647        } else {
648            &self.path[self.path.len() - max..]
649        }
650    }
651}
652
653#[derive(Clone, Debug)]
654enum GlobSetMatchStrategy {
655    Literal(LiteralStrategy),
656    BasenameLiteral(BasenameLiteralStrategy),
657    Extension(ExtensionStrategy),
658    Prefix(PrefixStrategy),
659    Suffix(SuffixStrategy),
660    RequiredExtension(RequiredExtensionStrategy),
661    Regex(RegexSetStrategy),
662}
663
664impl GlobSetMatchStrategy {
665    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
666        use self::GlobSetMatchStrategy::*;
667        match *self {
668            Literal(ref s) => s.is_match(candidate),
669            BasenameLiteral(ref s) => s.is_match(candidate),
670            Extension(ref s) => s.is_match(candidate),
671            Prefix(ref s) => s.is_match(candidate),
672            Suffix(ref s) => s.is_match(candidate),
673            RequiredExtension(ref s) => s.is_match(candidate),
674            Regex(ref s) => s.is_match(candidate),
675        }
676    }
677
678    fn matches_into(
679        &self,
680        candidate: &Candidate<'_>,
681        matches: &mut Vec<usize>,
682    ) {
683        use self::GlobSetMatchStrategy::*;
684        match *self {
685            Literal(ref s) => s.matches_into(candidate, matches),
686            BasenameLiteral(ref s) => s.matches_into(candidate, matches),
687            Extension(ref s) => s.matches_into(candidate, matches),
688            Prefix(ref s) => s.matches_into(candidate, matches),
689            Suffix(ref s) => s.matches_into(candidate, matches),
690            RequiredExtension(ref s) => s.matches_into(candidate, matches),
691            Regex(ref s) => s.matches_into(candidate, matches),
692        }
693    }
694}
695
696#[derive(Clone, Debug)]
697struct LiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
698
699impl LiteralStrategy {
700    fn new() -> LiteralStrategy {
701        LiteralStrategy(fnv::HashMap::default())
702    }
703
704    fn add(&mut self, global_index: usize, lit: String) {
705        self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
706    }
707
708    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
709        self.0.contains_key(candidate.path.as_bytes())
710    }
711
712    #[inline(never)]
713    fn matches_into(
714        &self,
715        candidate: &Candidate<'_>,
716        matches: &mut Vec<usize>,
717    ) {
718        if let Some(hits) = self.0.get(candidate.path.as_bytes()) {
719            matches.extend(hits);
720        }
721    }
722}
723
724#[derive(Clone, Debug)]
725struct BasenameLiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
726
727impl BasenameLiteralStrategy {
728    fn new() -> BasenameLiteralStrategy {
729        BasenameLiteralStrategy(fnv::HashMap::default())
730    }
731
732    fn add(&mut self, global_index: usize, lit: String) {
733        self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
734    }
735
736    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
737        if candidate.basename.is_empty() {
738            return false;
739        }
740        self.0.contains_key(candidate.basename.as_bytes())
741    }
742
743    #[inline(never)]
744    fn matches_into(
745        &self,
746        candidate: &Candidate<'_>,
747        matches: &mut Vec<usize>,
748    ) {
749        if candidate.basename.is_empty() {
750            return;
751        }
752        if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
753            matches.extend(hits);
754        }
755    }
756}
757
758#[derive(Clone, Debug)]
759struct ExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
760
761impl ExtensionStrategy {
762    fn new() -> ExtensionStrategy {
763        ExtensionStrategy(fnv::HashMap::default())
764    }
765
766    fn add(&mut self, global_index: usize, ext: String) {
767        self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
768    }
769
770    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
771        if candidate.ext.is_empty() {
772            return false;
773        }
774        self.0.contains_key(candidate.ext.as_bytes())
775    }
776
777    #[inline(never)]
778    fn matches_into(
779        &self,
780        candidate: &Candidate<'_>,
781        matches: &mut Vec<usize>,
782    ) {
783        if candidate.ext.is_empty() {
784            return;
785        }
786        if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
787            matches.extend(hits);
788        }
789    }
790}
791
792#[derive(Clone, Debug)]
793struct PrefixStrategy {
794    matcher: AhoCorasick,
795    map: Vec<usize>,
796    longest: usize,
797}
798
799impl PrefixStrategy {
800    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
801        let path = candidate.path_prefix(self.longest);
802        for m in self.matcher.find_overlapping_iter(path) {
803            if m.start() == 0 {
804                return true;
805            }
806        }
807        false
808    }
809
810    fn matches_into(
811        &self,
812        candidate: &Candidate<'_>,
813        matches: &mut Vec<usize>,
814    ) {
815        let path = candidate.path_prefix(self.longest);
816        for m in self.matcher.find_overlapping_iter(path) {
817            if m.start() == 0 {
818                matches.push(self.map[m.pattern()]);
819            }
820        }
821    }
822}
823
824#[derive(Clone, Debug)]
825struct SuffixStrategy {
826    matcher: AhoCorasick,
827    map: Vec<usize>,
828    longest: usize,
829}
830
831impl SuffixStrategy {
832    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
833        let path = candidate.path_suffix(self.longest);
834        for m in self.matcher.find_overlapping_iter(path) {
835            if m.end() == path.len() {
836                return true;
837            }
838        }
839        false
840    }
841
842    fn matches_into(
843        &self,
844        candidate: &Candidate<'_>,
845        matches: &mut Vec<usize>,
846    ) {
847        let path = candidate.path_suffix(self.longest);
848        for m in self.matcher.find_overlapping_iter(path) {
849            if m.end() == path.len() {
850                matches.push(self.map[m.pattern()]);
851            }
852        }
853    }
854}
855
856#[derive(Clone, Debug)]
857struct RequiredExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<(usize, Regex)>>);
858
859impl RequiredExtensionStrategy {
860    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
861        if candidate.ext.is_empty() {
862            return false;
863        }
864        match self.0.get(candidate.ext.as_bytes()) {
865            None => false,
866            Some(regexes) => {
867                for &(_, ref re) in regexes {
868                    if re.is_match(candidate.path.as_bytes()) {
869                        return true;
870                    }
871                }
872                false
873            }
874        }
875    }
876
877    #[inline(never)]
878    fn matches_into(
879        &self,
880        candidate: &Candidate<'_>,
881        matches: &mut Vec<usize>,
882    ) {
883        if candidate.ext.is_empty() {
884            return;
885        }
886        if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
887            for &(global_index, ref re) in regexes {
888                if re.is_match(candidate.path.as_bytes()) {
889                    matches.push(global_index);
890                }
891            }
892        }
893    }
894}
895
896#[derive(Clone, Debug)]
897struct RegexSetStrategy {
898    matcher: Regex,
899    map: Vec<usize>,
900    // We use a pool of PatternSets to hopefully allocating a fresh one on each
901    // call.
902    //
903    // TODO: In the next semver breaking release, we should drop this pool and
904    // expose an opaque type that wraps PatternSet. Then callers can provide
905    // it to `matches_into` directly. Callers might still want to use a pool
906    // or similar to amortize allocation, but that matches the status quo and
907    // absolves us of needing to do it here.
908    patset: Arc<Pool<PatternSet, PatternSetPoolFn>>,
909}
910
911type PatternSetPoolFn =
912    Box<dyn Fn() -> PatternSet + Send + Sync + UnwindSafe + RefUnwindSafe>;
913
914impl RegexSetStrategy {
915    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
916        self.matcher.is_match(candidate.path.as_bytes())
917    }
918
919    fn matches_into(
920        &self,
921        candidate: &Candidate<'_>,
922        matches: &mut Vec<usize>,
923    ) {
924        let input = regex_automata::Input::new(candidate.path.as_bytes());
925        let mut patset = self.patset.get();
926        patset.clear();
927        self.matcher.which_overlapping_matches(&input, &mut patset);
928        for i in patset.iter() {
929            matches.push(self.map[i]);
930        }
931        PoolGuard::put(patset);
932    }
933}
934
935#[derive(Clone, Debug)]
936struct MultiStrategyBuilder {
937    literals: Vec<String>,
938    map: Vec<usize>,
939    longest: usize,
940}
941
942impl MultiStrategyBuilder {
943    fn new() -> MultiStrategyBuilder {
944        MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 }
945    }
946
947    fn add(&mut self, global_index: usize, literal: String) {
948        if literal.len() > self.longest {
949            self.longest = literal.len();
950        }
951        self.map.push(global_index);
952        self.literals.push(literal);
953    }
954
955    fn prefix(self) -> PrefixStrategy {
956        PrefixStrategy {
957            matcher: AhoCorasick::new(&self.literals).unwrap(),
958            map: self.map,
959            longest: self.longest,
960        }
961    }
962
963    fn suffix(self) -> SuffixStrategy {
964        SuffixStrategy {
965            matcher: AhoCorasick::new(&self.literals).unwrap(),
966            map: self.map,
967            longest: self.longest,
968        }
969    }
970
971    fn regex_set(self) -> Result<RegexSetStrategy, Error> {
972        let matcher = new_regex_set(self.literals)?;
973        let pattern_len = matcher.pattern_len();
974        let create: PatternSetPoolFn =
975            Box::new(move || PatternSet::new(pattern_len));
976        Ok(RegexSetStrategy {
977            matcher,
978            map: self.map,
979            patset: Arc::new(Pool::new(create)),
980        })
981    }
982
983    fn is_empty(&self) -> bool {
984        self.literals.is_empty()
985    }
986}
987
988#[derive(Clone, Debug)]
989struct RequiredExtensionStrategyBuilder(
990    fnv::HashMap<Vec<u8>, Vec<(usize, String)>>,
991);
992
993impl RequiredExtensionStrategyBuilder {
994    fn new() -> RequiredExtensionStrategyBuilder {
995        RequiredExtensionStrategyBuilder(fnv::HashMap::default())
996    }
997
998    fn add(&mut self, global_index: usize, ext: String, regex: String) {
999        self.0
1000            .entry(ext.into_bytes())
1001            .or_insert(vec![])
1002            .push((global_index, regex));
1003    }
1004
1005    fn build(self) -> Result<RequiredExtensionStrategy, Error> {
1006        let mut exts = fnv::HashMap::default();
1007        for (ext, regexes) in self.0.into_iter() {
1008            exts.insert(ext.clone(), vec![]);
1009            for (global_index, regex) in regexes {
1010                let compiled = new_regex(&regex)?;
1011                exts.get_mut(&ext).unwrap().push((global_index, compiled));
1012            }
1013        }
1014        Ok(RequiredExtensionStrategy(exts))
1015    }
1016}
1017
1018/// Escape meta-characters within the given glob pattern.
1019///
1020/// The escaping works by surrounding meta-characters with brackets. For
1021/// example, `*` becomes `[*]`.
1022///
1023/// # Example
1024///
1025/// ```
1026/// use globset::escape;
1027///
1028/// assert_eq!(escape("foo*bar"), "foo[*]bar");
1029/// assert_eq!(escape("foo?bar"), "foo[?]bar");
1030/// assert_eq!(escape("foo[bar"), "foo[[]bar");
1031/// assert_eq!(escape("foo]bar"), "foo[]]bar");
1032/// assert_eq!(escape("foo{bar"), "foo[{]bar");
1033/// assert_eq!(escape("foo}bar"), "foo[}]bar");
1034/// ```
1035pub fn escape(s: &str) -> String {
1036    let mut escaped = String::with_capacity(s.len());
1037    for c in s.chars() {
1038        match c {
1039            // note that ! does not need escaping because it is only special
1040            // inside brackets
1041            '?' | '*' | '[' | ']' | '{' | '}' => {
1042                escaped.push('[');
1043                escaped.push(c);
1044                escaped.push(']');
1045            }
1046            c => {
1047                escaped.push(c);
1048            }
1049        }
1050    }
1051    escaped
1052}
1053
1054#[cfg(test)]
1055mod tests {
1056    use crate::glob::Glob;
1057
1058    use super::{GlobSet, GlobSetBuilder};
1059
1060    #[test]
1061    fn set_works() {
1062        let mut builder = GlobSetBuilder::new();
1063        builder.add(Glob::new("src/**/*.rs").unwrap());
1064        builder.add(Glob::new("*.c").unwrap());
1065        builder.add(Glob::new("src/lib.rs").unwrap());
1066        let set = builder.build().unwrap();
1067
1068        assert!(set.is_match("foo.c"));
1069        assert!(set.is_match("src/foo.c"));
1070        assert!(!set.is_match("foo.rs"));
1071        assert!(!set.is_match("tests/foo.rs"));
1072        assert!(set.is_match("src/foo.rs"));
1073        assert!(set.is_match("src/grep/src/main.rs"));
1074
1075        let matches = set.matches("src/lib.rs");
1076        assert_eq!(2, matches.len());
1077        assert_eq!(0, matches[0]);
1078        assert_eq!(2, matches[1]);
1079    }
1080
1081    #[test]
1082    fn empty_set_works() {
1083        let set = GlobSetBuilder::new().build().unwrap();
1084        assert!(!set.is_match(""));
1085        assert!(!set.is_match("a"));
1086        assert!(set.matches_all("a"));
1087    }
1088
1089    #[test]
1090    fn default_set_is_empty_works() {
1091        let set: GlobSet = Default::default();
1092        assert!(!set.is_match(""));
1093        assert!(!set.is_match("a"));
1094    }
1095
1096    #[test]
1097    fn escape() {
1098        use super::escape;
1099        assert_eq!("foo", escape("foo"));
1100        assert_eq!("foo[*]", escape("foo*"));
1101        assert_eq!("[[][]]", escape("[]"));
1102        assert_eq!("[*][?]", escape("*?"));
1103        assert_eq!("src/[*][*]/[*].rs", escape("src/**/*.rs"));
1104        assert_eq!("bar[[]ab[]]baz", escape("bar[ab]baz"));
1105        assert_eq!("bar[[]!![]]!baz", escape("bar[!!]!baz"));
1106    }
1107
1108    // This tests that regex matching doesn't "remember" the results of
1109    // previous searches. That is, if any memory is reused from a previous
1110    // search, then it should be cleared first.
1111    #[test]
1112    fn set_does_not_remember() {
1113        let mut builder = GlobSetBuilder::new();
1114        builder.add(Glob::new("*foo*").unwrap());
1115        builder.add(Glob::new("*bar*").unwrap());
1116        builder.add(Glob::new("*quux*").unwrap());
1117        let set = builder.build().unwrap();
1118
1119        let matches = set.matches("ZfooZquuxZ");
1120        assert_eq!(2, matches.len());
1121        assert_eq!(0, matches[0]);
1122        assert_eq!(2, matches[1]);
1123
1124        let matches = set.matches("nada");
1125        assert_eq!(0, matches.len());
1126    }
1127
1128    #[test]
1129    fn debug() {
1130        let mut builder = GlobSetBuilder::new();
1131        builder.add(Glob::new("*foo*").unwrap());
1132        builder.add(Glob::new("*bar*").unwrap());
1133        builder.add(Glob::new("*quux*").unwrap());
1134        assert_eq!(
1135            format!("{builder:?}"),
1136            "GlobSetBuilder { pats: [Glob(\"*foo*\"), Glob(\"*bar*\"), Glob(\"*quux*\")] }",
1137        );
1138    }
1139}
globset/lib.rs

globset/
lib.rs