infer/
lib.rs

1/*!
2Small crate to infer file and MIME type by checking the
3[magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)) signature.
4
5# Examples
6
7### Get the type of a buffer
8
9```rust
10let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
11let info = infer::Infer::new();
12assert_eq!("image/jpeg", info.get(&v).unwrap().mime);
13assert_eq!("jpg", info.get(&v).unwrap().ext);
14```
15
16### Check path
17
18```rust
19let info = infer::Infer::new();
20let res = info.get_from_path("testdata/sample.jpg");
21assert!(res.is_ok());
22let o = res.unwrap();
23assert!(o.is_some());
24let typ = o.unwrap();
25assert_eq!("image/jpeg", typ.mime);
26assert_eq!("jpg", typ.ext);
27```
28
29### Check for specific type
30
31Note individual matcher functions do not require an Infer struct instance.
32
33```rust
34let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
35assert!(infer::image::is_jpeg(&v));
36```
37
38### Check for specific type class
39
40```rust
41let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
42let info = infer::Infer::new();
43assert!(info.is_image(&v));
44```
45
46### Adds a custom file type matcher
47
48```rust
49fn custom_matcher(buf: &[u8]) -> bool {
50    return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12;
51}
52
53let mut info = infer::Infer::new();
54info.add("custom/foo", "foo", custom_matcher);
55
56let v = vec![0x10, 0x11, 0x12, 0x13];
57let res =  info.get(&v).unwrap();
58
59assert_eq!("custom/foo", res.mime);
60assert_eq!("foo", res.ext);
61```
62*/
63#![crate_name = "infer"]
64
65mod map;
66mod matchers;
67
68use std::fs::File;
69use std::io::Read;
70use std::path::Path;
71
72use map::{MatcherType, MATCHER_MAP};
73
74/// All the supported matchers categorized and exposed as functions
75pub use matchers::*;
76
77/// Matcher function
78pub type Matcher = fn(buf: &[u8]) -> bool;
79
80/// Generic information for a type
81#[derive(Debug, Eq, PartialEq)]
82pub struct Type {
83    /// The mime
84    pub mime: String,
85
86    /// The file extension
87    pub ext: String,
88}
89
90/// Infer is the main struct of the module
91pub struct Infer {
92    mmap: Vec<(map::MatcherType, String, String, Matcher)>,
93}
94
95impl Infer {
96    /// Initialize a new instance of the infer struct.
97    pub fn new() -> Infer {
98        Infer { mmap: Vec::new() }
99    }
100
101    fn iter_matchers(&self) -> impl Iterator<Item = (&MatcherType, &str, &str, &Matcher)> {
102        let custom = self
103            .mmap
104            .iter()
105            .map(|(mt, mime, ext, matcher)| (mt, mime.as_str(), ext.as_str(), matcher));
106        MATCHER_MAP
107            .iter()
108            .map(|(mt, mime, ext, matcher)| (mt, *mime, *ext, matcher))
109            .chain(custom)
110    }
111
112    /// Returns the file type of the buffer.
113    ///
114    /// # Examples
115    ///
116    /// ```rust
117    /// let info = infer::Infer::new();
118    /// let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
119    /// assert_eq!("image/jpeg", info.get(&v).unwrap().mime);
120    /// assert_eq!("jpg", info.get(&v).unwrap().ext);
121    /// ```
122    pub fn get(&self, buf: &[u8]) -> Option<Type> {
123        for (_, mime, ext, matcher) in self.iter_matchers() {
124            if matcher(buf) {
125                return Some(Type {
126                    mime: mime.to_string(),
127                    ext: ext.to_string(),
128                });
129            }
130        }
131
132        None
133    }
134
135    /// Returns the file type of the file given a path.
136    ///
137    /// # Errors
138    ///
139    /// Returns an error if we fail to read the path.
140    ///
141    /// # Examples
142    ///
143    /// ```rust
144    /// let info = infer::Infer::new();
145    /// let res = info.get_from_path("testdata/sample.jpg");
146    /// assert!(res.is_ok());
147    /// let o = res.unwrap();
148    /// assert!(o.is_some());
149    /// let typ = o.unwrap();
150    /// assert_eq!("image/jpeg", typ.mime);
151    /// assert_eq!("jpg", typ.ext);
152    /// ```
153    pub fn get_from_path<P: AsRef<Path>>(&self, path: P) -> Result<Option<Type>, std::io::Error> {
154        let file = File::open(path)?;
155
156        let limit = file
157            .metadata()
158            .map(|m| std::cmp::min(m.len(), 8192) as usize + 1)
159            .unwrap_or(0);
160        let mut bytes = Vec::with_capacity(limit);
161        file.take(8192).read_to_end(&mut bytes)?;
162
163        Ok(self.get(&bytes))
164    }
165
166    /// Determines whether a buffer is of given extension.
167    ///
168    /// # Examples
169    ///
170    /// ```rust
171    /// let info = infer::Infer::new();
172    /// let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
173    /// assert!(info.is(&v, "jpg"));
174    /// ```
175    pub fn is(&self, buf: &[u8], ext: &str) -> bool {
176        if let Some((_mt, _mi, _e, matcher)) = self
177            .iter_matchers()
178            .find(|(_mt, _mime, ex, _matcher)| *ex == ext)
179        {
180            if matcher(buf) {
181                return true;
182            }
183        }
184
185        false
186    }
187
188    /// Determines whether a buffer is of given mime type.
189    ///
190    /// # Examples
191    ///
192    /// ```rust
193    /// let info = infer::Infer::new();
194    /// let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
195    /// assert!(info.is_mime(&v, "image/jpeg"));
196    /// ```
197    pub fn is_mime(&self, buf: &[u8], mime: &str) -> bool {
198        if let Some((_mt, _mi, _e, matcher)) = self
199            .iter_matchers()
200            .find(|(_mt, mi, _ext, _matcher)| *mi == mime)
201        {
202            if matcher(buf) {
203                return true;
204            }
205        }
206
207        false
208    }
209
210    /// Returns whether an extension is supported.
211    ///
212    /// # Examples
213    ///
214    /// ```rust
215    /// let info = infer::Infer::new();
216    /// assert!(info.is_supported("jpg"));
217    /// ```
218    pub fn is_supported(&self, ext: &str) -> bool {
219        for (_mt, _mime, type_ext, _matcher) in self.iter_matchers() {
220            if ext == type_ext {
221                return true;
222            }
223        }
224
225        false
226    }
227
228    /// Returns whether a mime type is supported.
229    ///
230    /// # Examples
231    ///
232    /// ```rust
233    /// let info = infer::Infer::new();
234    /// assert!(info.is_mime_supported("image/jpeg"));
235    /// ```
236    pub fn is_mime_supported(&self, mime: &str) -> bool {
237        for (_mt, type_mime, _ext, _matcher) in self.iter_matchers() {
238            if mime == type_mime {
239                return true;
240            }
241        }
242
243        false
244    }
245
246    /// Determines whether a buffer is an application type.
247    ///
248    /// # Examples
249    ///
250    /// ```rust
251    /// use std::fs;
252    /// let info = infer::Infer::new();
253    /// assert!(info.is_app(&fs::read("testdata/sample.wasm").unwrap()));
254    /// ```
255    pub fn is_app(&self, buf: &[u8]) -> bool {
256        self.is_type(buf, map::MatcherType::APP)
257    }
258
259    /// Determines whether a buffer is an archive type.
260    /// # Examples
261    ///
262    /// ```rust
263    /// use std::fs;
264    /// let info = infer::Infer::new();
265    /// assert!(info.is_archive(&fs::read("testdata/sample.pdf").unwrap()));
266    /// ```
267    pub fn is_archive(&self, buf: &[u8]) -> bool {
268        self.is_type(buf, map::MatcherType::ARCHIVE)
269    }
270
271    /// Determines whether a buffer is an audio type.
272    ///
273    /// # Examples
274    ///
275    /// ```rust
276    /// // mp3
277    /// let info = infer::Infer::new();
278    /// let v = vec![0xff, 0xfb, 0x90, 0x44, 0x00];
279    /// assert!(info.is_audio(&v));
280    /// ```
281    pub fn is_audio(&self, buf: &[u8]) -> bool {
282        self.is_type(buf, map::MatcherType::AUDIO)
283    }
284
285    /// Determines whether a buffer is a document type.
286    ///
287    /// # Examples
288    ///
289    /// ```rust
290    /// use std::fs;
291    /// let info = infer::Infer::new();
292    /// assert!(info.is_document(&fs::read("testdata/sample.docx").unwrap()));
293    /// ```
294    pub fn is_document(&self, buf: &[u8]) -> bool {
295        self.is_type(buf, map::MatcherType::DOC)
296    }
297
298    /// Determines whether a buffer is a font type.
299    ///
300    /// # Examples
301    ///
302    /// ```rust
303    /// use std::fs;
304    /// let info = infer::Infer::new();
305    /// assert!(info.is_font(&fs::read("testdata/sample.ttf").unwrap()));
306    /// ```
307    pub fn is_font(&self, buf: &[u8]) -> bool {
308        self.is_type(buf, map::MatcherType::FONT)
309    }
310
311    /// Determines whether a buffer is an image type.
312    ///
313    /// # Examples
314    ///
315    /// ```rust
316    /// let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
317    /// let info = infer::Infer::new();
318    /// assert!(info.is_image(&v));
319    /// ```
320    pub fn is_image(&self, buf: &[u8]) -> bool {
321        self.is_type(buf, map::MatcherType::IMAGE)
322    }
323
324    /// Determines whether a buffer is a video type.
325    ///
326    /// # Examples
327    ///
328    /// ```rust
329    /// use std::fs;
330    /// let info = infer::Infer::new();
331    /// assert!(info.is_video(&fs::read("testdata/sample.mov").unwrap()));
332    /// ```
333    pub fn is_video(&self, buf: &[u8]) -> bool {
334        self.is_type(buf, map::MatcherType::VIDEO)
335    }
336
337    /// Determines whether a buffer is one of the custom types added.
338    ///
339    /// # Examples
340    ///
341    /// ```rust
342    /// fn custom_matcher(buf: &[u8]) -> bool {
343    ///     return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12;
344    /// }
345    ///
346    /// let mut info = infer::Infer::new();
347    /// info.add("custom/foo", "foo", custom_matcher);
348    /// let v = vec![0x10, 0x11, 0x12, 0x13];
349    /// assert!(info.is_custom(&v));
350    /// ```
351    pub fn is_custom(&self, buf: &[u8]) -> bool {
352        self.is_type(buf, map::MatcherType::CUSTOM)
353    }
354
355    /// Adds a custom matcher.
356    ///
357    /// # Examples
358    ///
359    /// ```rust
360    /// fn custom_matcher(buf: &[u8]) -> bool {
361    ///     return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12;
362    /// }
363    ///
364    /// let mut info = infer::Infer::new();
365    /// info.add("custom/foo", "foo", custom_matcher);
366    /// let v = vec![0x10, 0x11, 0x12, 0x13];
367    /// let res =  info.get(&v).unwrap();
368    /// assert_eq!("custom/foo", res.mime);
369    /// assert_eq!("foo", res.ext);
370    /// ```
371    pub fn add(&mut self, mime: &str, ext: &str, m: Matcher) {
372        self.mmap.push((
373            map::MatcherType::CUSTOM,
374            mime.to_string(),
375            ext.to_string(),
376            m,
377        ));
378    }
379
380    fn is_type(&self, buf: &[u8], typ: map::MatcherType) -> bool {
381        for (_mt, _mi, _ex, matcher) in self
382            .iter_matchers()
383            .filter(|(mt, _mime, _e, _matcher)| **mt == typ)
384        {
385            if matcher(buf) {
386                return true;
387            }
388        }
389
390        false
391    }
392}
393
394impl Default for Infer {
395    fn default() -> Self {
396        Infer::new()
397    }
398}
399
400#[cfg(test)]
401mod tests {
402    use super::Infer;
403
404    #[test]
405    fn test_get_unknown() {
406        let v = Vec::new();
407        let info = Infer::new();
408        assert!(info.get(&v).is_none());
409    }
410
411    #[test]
412    fn test_get_jpeg() {
413        let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
414        let info = Infer::new();
415        match info.get(&v) {
416            Some(info) => {
417                assert_eq!(info.ext, "jpg");
418                assert_eq!(info.mime, "image/jpeg");
419            }
420            None => panic!("type info expected"),
421        }
422    }
423}