1use std::io::{Read, Seek, SeekFrom};
2
3use memchr::memmem::{Finder, FinderRev};
4
5use crate::result::ZipResult;
6
7pub trait FinderDirection<'a> {
8 fn new(needle: &'a [u8]) -> Self;
9 fn reset_cursor(bounds: (u64, u64), window_size: usize) -> u64;
10 fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize);
11
12 fn needle(&self) -> &[u8];
13 fn find(&self, haystack: &[u8]) -> Option<usize>;
14 fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64>;
15 fn move_scope(&self, offset: usize) -> usize;
16}
17
18pub struct Forward<'a>(Finder<'a>);
19impl<'a> FinderDirection<'a> for Forward<'a> {
20 fn new(needle: &'a [u8]) -> Self {
21 Self(Finder::new(needle))
22 }
23
24 fn reset_cursor((start_inclusive, _): (u64, u64), _: usize) -> u64 {
25 start_inclusive
26 }
27
28 fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize) {
29 (&window[mid_window_offset..], mid_window_offset)
30 }
31
32 fn find(&self, haystack: &[u8]) -> Option<usize> {
33 self.0.find(haystack)
34 }
35
36 fn needle(&self) -> &[u8] {
37 self.0.needle()
38 }
39
40 fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64> {
41 let magic_overlap = self.needle().len().saturating_sub(1) as u64;
42 let next = cursor.saturating_add(window_size as u64 - magic_overlap);
43
44 if next >= bounds.1 {
45 None
46 } else {
47 Some(next)
48 }
49 }
50
51 fn move_scope(&self, offset: usize) -> usize {
52 offset + self.needle().len()
53 }
54}
55
56pub struct Backwards<'a>(FinderRev<'a>);
57impl<'a> FinderDirection<'a> for Backwards<'a> {
58 fn new(needle: &'a [u8]) -> Self {
59 Self(FinderRev::new(needle))
60 }
61
62 fn reset_cursor(bounds: (u64, u64), window_size: usize) -> u64 {
63 bounds
64 .1
65 .saturating_sub(window_size as u64)
66 .clamp(bounds.0, bounds.1)
67 }
68
69 fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize) {
70 (&window[..mid_window_offset], 0)
71 }
72
73 fn find(&self, haystack: &[u8]) -> Option<usize> {
74 self.0.rfind(haystack)
75 }
76
77 fn needle(&self) -> &[u8] {
78 self.0.needle()
79 }
80
81 fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64> {
82 let magic_overlap = self.needle().len().saturating_sub(1) as u64;
83
84 if cursor <= bounds.0 {
85 None
86 } else {
87 Some(
88 cursor
89 .saturating_add(magic_overlap)
90 .saturating_sub(window_size as u64)
91 .clamp(bounds.0, bounds.1),
92 )
93 }
94 }
95
96 fn move_scope(&self, offset: usize) -> usize {
97 offset
98 }
99}
100
101pub struct MagicFinder<Direction> {
105 buffer: Box<[u8]>,
106 pub(self) finder: Direction,
107 cursor: u64,
108 mid_buffer_offset: Option<usize>,
109 bounds: (u64, u64),
110}
111
112impl<'a, T: FinderDirection<'a>> MagicFinder<T> {
113 pub fn new(magic_bytes: &'a [u8], start_inclusive: u64, end_exclusive: u64) -> Self {
115 const BUFFER_SIZE: usize = 2048;
116
117 debug_assert!(BUFFER_SIZE >= magic_bytes.len());
120
121 Self {
122 buffer: vec![0; BUFFER_SIZE].into_boxed_slice(),
123 finder: T::new(magic_bytes),
124 cursor: T::reset_cursor((start_inclusive, end_exclusive), BUFFER_SIZE),
125 mid_buffer_offset: None,
126 bounds: (start_inclusive, end_exclusive),
127 }
128 }
129
130 pub fn repurpose(&mut self, magic_bytes: &'a [u8], bounds: (u64, u64)) -> &mut Self {
132 debug_assert!(self.buffer.len() >= magic_bytes.len());
133
134 self.finder = T::new(magic_bytes);
135 self.cursor = T::reset_cursor(bounds, self.buffer.len());
136 self.bounds = bounds;
137
138 self.mid_buffer_offset = None;
140
141 self
142 }
143
144 pub fn next<R: Read + Seek>(&mut self, reader: &mut R) -> ZipResult<Option<u64>> {
146 loop {
147 if self.cursor < self.bounds.0 || self.cursor >= self.bounds.1 {
148 break;
150 }
151
152 let window_start = self.cursor;
154 let window_end = self
155 .cursor
156 .saturating_add(self.buffer.len() as u64)
157 .min(self.bounds.1);
158
159 if window_end <= window_start {
160 break;
162 }
163
164 let window = &mut self.buffer[..(window_end - window_start) as usize];
165
166 if self.mid_buffer_offset.is_none() {
167 reader.seek(SeekFrom::Start(window_start))?;
168 reader.read_exact(window)?;
169 }
170
171 let (window, window_start_offset) = match self.mid_buffer_offset {
172 Some(mid_buffer_offset) => T::scope_window(window, mid_buffer_offset),
173 None => (&*window, 0usize),
174 };
175
176 if let Some(offset) = self.finder.find(window) {
177 let magic_pos = window_start + window_start_offset as u64 + offset as u64;
178 reader.seek(SeekFrom::Start(magic_pos))?;
179
180 self.mid_buffer_offset = Some(self.finder.move_scope(window_start_offset + offset));
181
182 return Ok(Some(magic_pos));
183 }
184
185 self.mid_buffer_offset = None;
186
187 match self
188 .finder
189 .move_cursor(self.cursor, self.bounds, self.buffer.len())
190 {
191 Some(new_cursor) => {
192 self.cursor = new_cursor;
193 }
194 None => {
195 self.bounds.0 = self.bounds.1;
197 break;
198 }
199 }
200 }
201
202 Ok(None)
203 }
204}
205
206pub struct OptimisticMagicFinder<Direction> {
214 inner: MagicFinder<Direction>,
215 initial_guess: Option<(u64, bool)>,
216}
217
218const STACK_BUFFER_SIZE: usize = 8;
222
223impl<'a, Direction: FinderDirection<'a>> OptimisticMagicFinder<Direction> {
224 pub fn new_empty() -> Self {
226 Self {
227 inner: MagicFinder::new(&[], 0, 0),
228 initial_guess: None,
229 }
230 }
231
232 pub fn repurpose(
234 &mut self,
235 magic_bytes: &'a [u8],
236 bounds: (u64, u64),
237 initial_guess: Option<(u64, bool)>,
238 ) -> &mut Self {
239 debug_assert!(magic_bytes.len() <= STACK_BUFFER_SIZE);
240
241 self.inner.repurpose(magic_bytes, bounds);
242 self.initial_guess = initial_guess;
243
244 self
245 }
246
247 pub fn next<R: Read + Seek>(&mut self, reader: &mut R) -> ZipResult<Option<u64>> {
250 if let Some((v, mandatory)) = self.initial_guess {
251 reader.seek(SeekFrom::Start(v))?;
252
253 let mut buffer = [0; STACK_BUFFER_SIZE];
254 let buffer = &mut buffer[..self.inner.finder.needle().len()];
255
256 if v.saturating_add(buffer.len() as u64) <= self.inner.bounds.1 {
258 reader.read_exact(buffer)?;
259
260 if self.inner.finder.needle() == buffer {
262 self.initial_guess.take();
263 reader.seek(SeekFrom::Start(v))?;
264 return Ok(Some(v));
265 }
266 }
267
268 if mandatory {
270 return Ok(None);
271 }
272
273 self.initial_guess.take();
275 }
276
277 self.inner.next(reader)
278 }
279}