infer/matchers/
doc.rs
1use std::convert::TryInto;
2
3#[derive(Debug, Eq, PartialEq)]
4enum DocType {
5 DOCX,
7 XLSX,
9 PPTX,
11 OOXLM,
12}
13
14pub fn is_doc(buf: &[u8]) -> bool {
16 buf.len() > 7
17 && buf[0] == 0xD0
18 && buf[1] == 0xCF
19 && buf[2] == 0x11
20 && buf[3] == 0xE0
21 && buf[4] == 0xA1
22 && buf[5] == 0xB1
23 && buf[6] == 0x1A
24 && buf[7] == 0xE1
25}
26
27pub fn is_docx(buf: &[u8]) -> bool {
29 match msooxml(buf) {
30 Some(typ) => typ == DocType::DOCX,
31 None => false,
32 }
33}
34
35pub fn is_xls(buf: &[u8]) -> bool {
37 buf.len() > 7
38 && buf[0] == 0xD0
39 && buf[1] == 0xCF
40 && buf[2] == 0x11
41 && buf[3] == 0xE0
42 && buf[4] == 0xA1
43 && buf[5] == 0xB1
44 && buf[6] == 0x1A
45 && buf[7] == 0xE1
46}
47
48pub fn is_xlsx(buf: &[u8]) -> bool {
50 match msooxml(buf) {
51 Some(typ) => typ == DocType::XLSX,
52 None => false,
53 }
54}
55
56pub fn is_ppt(buf: &[u8]) -> bool {
58 buf.len() > 7
59 && buf[0] == 0xD0
60 && buf[1] == 0xCF
61 && buf[2] == 0x11
62 && buf[3] == 0xE0
63 && buf[4] == 0xA1
64 && buf[5] == 0xB1
65 && buf[6] == 0x1A
66 && buf[7] == 0xE1
67}
68
69pub fn is_pptx(buf: &[u8]) -> bool {
71 match msooxml(buf) {
72 Some(typ) => typ == DocType::PPTX,
73 None => false,
74 }
75}
76
77fn msooxml(buf: &[u8]) -> Option<DocType> {
78 let signature = [b'P', b'K', 0x03, 0x04];
79
80 if !compare_bytes(buf, &signature, 0) {
82 return None;
83 }
84
85 let v = check_msooml(buf, 0x1E);
86 if v.is_some() {
87 return v;
88 }
89
90 if !compare_bytes(buf, b"[Content_Types].xml", 0x1E)
91 && !compare_bytes(buf, b"_rels/.rels", 0x1E)
92 {
93 return None;
94 }
95
96 let mut start_offset = (u32::from_le_bytes(buf[18..22].try_into().unwrap()) + 49) as usize;
100 let idx = search(buf, start_offset, 6000)?;
101
102 start_offset += idx + 4 + 26;
105 let idx = search(buf, start_offset, 6000)?;
106
107 start_offset += idx + 4 + 26;
111 check_msooml(buf, start_offset)?;
112
113 start_offset += 26;
115 let idx = search(buf, start_offset, 6000);
116 match idx {
117 Some(idx) => start_offset += idx + 4 + 26,
118 None => return Some(DocType::OOXLM),
119 };
120
121 let typo = check_msooml(buf, start_offset);
122 if typo.is_some() {
123 return typo;
124 }
125
126 Some(DocType::OOXLM)
127}
128
129fn compare_bytes(slice: &[u8], sub_slice: &[u8], start_offset: usize) -> bool {
130 let sl = sub_slice.len();
131
132 if start_offset + sl > slice.len() {
133 return false;
134 }
135
136 for (i, v) in slice.iter().skip(start_offset).take(sl).enumerate() {
137 let v2 = sub_slice[i];
138
139 if *v != v2 {
140 return false;
141 }
142 }
143
144 true
145}
146
147fn check_msooml(buf: &[u8], offset: usize) -> Option<DocType> {
148 if compare_bytes(buf, &[b'w', b'o', b'r', b'd', b'/'], offset) {
149 Some(DocType::DOCX)
150 } else if compare_bytes(buf, &[b'p', b'p', b't', b'/'], offset) {
151 Some(DocType::PPTX)
152 } else if compare_bytes(buf, &[b'x', b'l', b'/'], offset) {
153 Some(DocType::XLSX)
154 } else {
155 None
156 }
157}
158
159fn search(buf: &[u8], start: usize, range: usize) -> Option<usize> {
160 let length = buf.len();
161 let mut end = start + range;
162 let signature: &[_] = &[b'P', b'K', 0x03, 0x04];
163
164 if end > length {
165 end = length;
166 }
167
168 if start >= end {
169 return None;
170 }
171
172 buf[start..end]
173 .windows(signature.len())
174 .position(|window| window == signature)
175}