infer/lib.rs
1/*!
2Small crate to infer file and MIME type by checking the
3[magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)) signature.
4
5# Examples
6
7### Get the type of a buffer
8
9```rust
10let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
11let info = infer::Infer::new();
12assert_eq!("image/jpeg", info.get(&v).unwrap().mime);
13assert_eq!("jpg", info.get(&v).unwrap().ext);
14```
15
16### Check path
17
18```rust
19let info = infer::Infer::new();
20let res = info.get_from_path("testdata/sample.jpg");
21assert!(res.is_ok());
22let o = res.unwrap();
23assert!(o.is_some());
24let typ = o.unwrap();
25assert_eq!("image/jpeg", typ.mime);
26assert_eq!("jpg", typ.ext);
27```
28
29### Check for specific type
30
31Note individual matcher functions do not require an Infer struct instance.
32
33```rust
34let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
35assert!(infer::image::is_jpeg(&v));
36```
37
38### Check for specific type class
39
40```rust
41let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
42let info = infer::Infer::new();
43assert!(info.is_image(&v));
44```
45
46### Adds a custom file type matcher
47
48```rust
49fn custom_matcher(buf: &[u8]) -> bool {
50 return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12;
51}
52
53let mut info = infer::Infer::new();
54info.add("custom/foo", "foo", custom_matcher);
55
56let v = vec![0x10, 0x11, 0x12, 0x13];
57let res = info.get(&v).unwrap();
58
59assert_eq!("custom/foo", res.mime);
60assert_eq!("foo", res.ext);
61```
62*/
63#![crate_name = "infer"]
64
65mod map;
66mod matchers;
67
68use std::fs::File;
69use std::io::Read;
70use std::path::Path;
71
72use map::{MatcherType, MATCHER_MAP};
73
74/// All the supported matchers categorized and exposed as functions
75pub use matchers::*;
76
77/// Matcher function
78pub type Matcher = fn(buf: &[u8]) -> bool;
79
80/// Generic information for a type
81#[derive(Debug, Eq, PartialEq)]
82pub struct Type {
83 /// The mime
84 pub mime: String,
85
86 /// The file extension
87 pub ext: String,
88}
89
90/// Infer is the main struct of the module
91pub struct Infer {
92 mmap: Vec<(map::MatcherType, String, String, Matcher)>,
93}
94
95impl Infer {
96 /// Initialize a new instance of the infer struct.
97 pub fn new() -> Infer {
98 Infer { mmap: Vec::new() }
99 }
100
101 fn iter_matchers(&self) -> impl Iterator<Item = (&MatcherType, &str, &str, &Matcher)> {
102 let custom = self
103 .mmap
104 .iter()
105 .map(|(mt, mime, ext, matcher)| (mt, mime.as_str(), ext.as_str(), matcher));
106 MATCHER_MAP
107 .iter()
108 .map(|(mt, mime, ext, matcher)| (mt, *mime, *ext, matcher))
109 .chain(custom)
110 }
111
112 /// Returns the file type of the buffer.
113 ///
114 /// # Examples
115 ///
116 /// ```rust
117 /// let info = infer::Infer::new();
118 /// let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
119 /// assert_eq!("image/jpeg", info.get(&v).unwrap().mime);
120 /// assert_eq!("jpg", info.get(&v).unwrap().ext);
121 /// ```
122 pub fn get(&self, buf: &[u8]) -> Option<Type> {
123 for (_, mime, ext, matcher) in self.iter_matchers() {
124 if matcher(buf) {
125 return Some(Type {
126 mime: mime.to_string(),
127 ext: ext.to_string(),
128 });
129 }
130 }
131
132 None
133 }
134
135 /// Returns the file type of the file given a path.
136 ///
137 /// # Errors
138 ///
139 /// Returns an error if we fail to read the path.
140 ///
141 /// # Examples
142 ///
143 /// ```rust
144 /// let info = infer::Infer::new();
145 /// let res = info.get_from_path("testdata/sample.jpg");
146 /// assert!(res.is_ok());
147 /// let o = res.unwrap();
148 /// assert!(o.is_some());
149 /// let typ = o.unwrap();
150 /// assert_eq!("image/jpeg", typ.mime);
151 /// assert_eq!("jpg", typ.ext);
152 /// ```
153 pub fn get_from_path<P: AsRef<Path>>(&self, path: P) -> Result<Option<Type>, std::io::Error> {
154 let file = File::open(path)?;
155
156 let limit = file
157 .metadata()
158 .map(|m| std::cmp::min(m.len(), 8192) as usize + 1)
159 .unwrap_or(0);
160 let mut bytes = Vec::with_capacity(limit);
161 file.take(8192).read_to_end(&mut bytes)?;
162
163 Ok(self.get(&bytes))
164 }
165
166 /// Determines whether a buffer is of given extension.
167 ///
168 /// # Examples
169 ///
170 /// ```rust
171 /// let info = infer::Infer::new();
172 /// let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
173 /// assert!(info.is(&v, "jpg"));
174 /// ```
175 pub fn is(&self, buf: &[u8], ext: &str) -> bool {
176 if let Some((_mt, _mi, _e, matcher)) = self
177 .iter_matchers()
178 .find(|(_mt, _mime, ex, _matcher)| *ex == ext)
179 {
180 if matcher(buf) {
181 return true;
182 }
183 }
184
185 false
186 }
187
188 /// Determines whether a buffer is of given mime type.
189 ///
190 /// # Examples
191 ///
192 /// ```rust
193 /// let info = infer::Infer::new();
194 /// let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
195 /// assert!(info.is_mime(&v, "image/jpeg"));
196 /// ```
197 pub fn is_mime(&self, buf: &[u8], mime: &str) -> bool {
198 if let Some((_mt, _mi, _e, matcher)) = self
199 .iter_matchers()
200 .find(|(_mt, mi, _ext, _matcher)| *mi == mime)
201 {
202 if matcher(buf) {
203 return true;
204 }
205 }
206
207 false
208 }
209
210 /// Returns whether an extension is supported.
211 ///
212 /// # Examples
213 ///
214 /// ```rust
215 /// let info = infer::Infer::new();
216 /// assert!(info.is_supported("jpg"));
217 /// ```
218 pub fn is_supported(&self, ext: &str) -> bool {
219 for (_mt, _mime, type_ext, _matcher) in self.iter_matchers() {
220 if ext == type_ext {
221 return true;
222 }
223 }
224
225 false
226 }
227
228 /// Returns whether a mime type is supported.
229 ///
230 /// # Examples
231 ///
232 /// ```rust
233 /// let info = infer::Infer::new();
234 /// assert!(info.is_mime_supported("image/jpeg"));
235 /// ```
236 pub fn is_mime_supported(&self, mime: &str) -> bool {
237 for (_mt, type_mime, _ext, _matcher) in self.iter_matchers() {
238 if mime == type_mime {
239 return true;
240 }
241 }
242
243 false
244 }
245
246 /// Determines whether a buffer is an application type.
247 ///
248 /// # Examples
249 ///
250 /// ```rust
251 /// use std::fs;
252 /// let info = infer::Infer::new();
253 /// assert!(info.is_app(&fs::read("testdata/sample.wasm").unwrap()));
254 /// ```
255 pub fn is_app(&self, buf: &[u8]) -> bool {
256 self.is_type(buf, map::MatcherType::APP)
257 }
258
259 /// Determines whether a buffer is an archive type.
260 /// # Examples
261 ///
262 /// ```rust
263 /// use std::fs;
264 /// let info = infer::Infer::new();
265 /// assert!(info.is_archive(&fs::read("testdata/sample.pdf").unwrap()));
266 /// ```
267 pub fn is_archive(&self, buf: &[u8]) -> bool {
268 self.is_type(buf, map::MatcherType::ARCHIVE)
269 }
270
271 /// Determines whether a buffer is an audio type.
272 ///
273 /// # Examples
274 ///
275 /// ```rust
276 /// // mp3
277 /// let info = infer::Infer::new();
278 /// let v = vec![0xff, 0xfb, 0x90, 0x44, 0x00];
279 /// assert!(info.is_audio(&v));
280 /// ```
281 pub fn is_audio(&self, buf: &[u8]) -> bool {
282 self.is_type(buf, map::MatcherType::AUDIO)
283 }
284
285 /// Determines whether a buffer is a document type.
286 ///
287 /// # Examples
288 ///
289 /// ```rust
290 /// use std::fs;
291 /// let info = infer::Infer::new();
292 /// assert!(info.is_document(&fs::read("testdata/sample.docx").unwrap()));
293 /// ```
294 pub fn is_document(&self, buf: &[u8]) -> bool {
295 self.is_type(buf, map::MatcherType::DOC)
296 }
297
298 /// Determines whether a buffer is a font type.
299 ///
300 /// # Examples
301 ///
302 /// ```rust
303 /// use std::fs;
304 /// let info = infer::Infer::new();
305 /// assert!(info.is_font(&fs::read("testdata/sample.ttf").unwrap()));
306 /// ```
307 pub fn is_font(&self, buf: &[u8]) -> bool {
308 self.is_type(buf, map::MatcherType::FONT)
309 }
310
311 /// Determines whether a buffer is an image type.
312 ///
313 /// # Examples
314 ///
315 /// ```rust
316 /// let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
317 /// let info = infer::Infer::new();
318 /// assert!(info.is_image(&v));
319 /// ```
320 pub fn is_image(&self, buf: &[u8]) -> bool {
321 self.is_type(buf, map::MatcherType::IMAGE)
322 }
323
324 /// Determines whether a buffer is a video type.
325 ///
326 /// # Examples
327 ///
328 /// ```rust
329 /// use std::fs;
330 /// let info = infer::Infer::new();
331 /// assert!(info.is_video(&fs::read("testdata/sample.mov").unwrap()));
332 /// ```
333 pub fn is_video(&self, buf: &[u8]) -> bool {
334 self.is_type(buf, map::MatcherType::VIDEO)
335 }
336
337 /// Determines whether a buffer is one of the custom types added.
338 ///
339 /// # Examples
340 ///
341 /// ```rust
342 /// fn custom_matcher(buf: &[u8]) -> bool {
343 /// return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12;
344 /// }
345 ///
346 /// let mut info = infer::Infer::new();
347 /// info.add("custom/foo", "foo", custom_matcher);
348 /// let v = vec![0x10, 0x11, 0x12, 0x13];
349 /// assert!(info.is_custom(&v));
350 /// ```
351 pub fn is_custom(&self, buf: &[u8]) -> bool {
352 self.is_type(buf, map::MatcherType::CUSTOM)
353 }
354
355 /// Adds a custom matcher.
356 ///
357 /// # Examples
358 ///
359 /// ```rust
360 /// fn custom_matcher(buf: &[u8]) -> bool {
361 /// return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12;
362 /// }
363 ///
364 /// let mut info = infer::Infer::new();
365 /// info.add("custom/foo", "foo", custom_matcher);
366 /// let v = vec![0x10, 0x11, 0x12, 0x13];
367 /// let res = info.get(&v).unwrap();
368 /// assert_eq!("custom/foo", res.mime);
369 /// assert_eq!("foo", res.ext);
370 /// ```
371 pub fn add(&mut self, mime: &str, ext: &str, m: Matcher) {
372 self.mmap.push((
373 map::MatcherType::CUSTOM,
374 mime.to_string(),
375 ext.to_string(),
376 m,
377 ));
378 }
379
380 fn is_type(&self, buf: &[u8], typ: map::MatcherType) -> bool {
381 for (_mt, _mi, _ex, matcher) in self
382 .iter_matchers()
383 .filter(|(mt, _mime, _e, _matcher)| **mt == typ)
384 {
385 if matcher(buf) {
386 return true;
387 }
388 }
389
390 false
391 }
392}
393
394impl Default for Infer {
395 fn default() -> Self {
396 Infer::new()
397 }
398}
399
400#[cfg(test)]
401mod tests {
402 use super::Infer;
403
404 #[test]
405 fn test_get_unknown() {
406 let v = Vec::new();
407 let info = Infer::new();
408 assert!(info.get(&v).is_none());
409 }
410
411 #[test]
412 fn test_get_jpeg() {
413 let v = vec![0xFF, 0xD8, 0xFF, 0xAA];
414 let info = Infer::new();
415 match info.get(&v) {
416 Some(info) => {
417 assert_eq!(info.ext, "jpg");
418 assert_eq!(info.mime, "image/jpeg");
419 }
420 None => panic!("type info expected"),
421 }
422 }
423}