encoding/
util.rs

1// This is a part of rust-encoding.
2// Copyright (c) 2013-2015, Kang Seonghoon.
3// See README.md and LICENSE.txt for details.
4
5//! Internal utilities.
6
7use std::{str, char, mem};
8use std::marker::PhantomData;
9use std::convert::Into;
10use std::default::Default;
11use types;
12
13/// Unchecked conversion to `char`.
14pub fn as_char(ch: u32) -> char {
15    debug_assert!(char::from_u32(ch).is_some());
16    unsafe { mem::transmute(ch) }
17}
18
19/// External iterator for a string's characters with its corresponding byte offset range.
20pub struct StrCharIndexIterator<'r> {
21    index: usize,
22    chars: str::Chars<'r>,
23}
24
25impl<'r> Iterator for StrCharIndexIterator<'r> {
26    type Item = ((usize,usize), char);
27
28    #[inline]
29    fn next(&mut self) -> Option<((usize,usize), char)> {
30        if let Some(ch) = self.chars.next() {
31            let prev = self.index;
32            let next = prev + ch.len_utf8();
33            self.index = next;
34            Some(((prev, next), ch))
35        } else {
36            None
37        }
38    }
39}
40
41/// A trait providing an `index_iter` method.
42pub trait StrCharIndex<'r> {
43    fn index_iter(&self) -> StrCharIndexIterator<'r>;
44}
45
46impl<'r> StrCharIndex<'r> for &'r str {
47    /// Iterates over each character with corresponding byte offset range.
48    fn index_iter(&self) -> StrCharIndexIterator<'r> {
49        StrCharIndexIterator { index: 0, chars: self.chars() }
50    }
51}
52
53/// A helper struct for the stateful decoder DSL.
54pub struct StatefulDecoderHelper<'a, St, Data: 'a> {
55    /// The current buffer.
56    pub buf: &'a [u8],
57    /// The current index to the buffer.
58    pub pos: usize,
59    /// The output buffer.
60    pub output: &'a mut (types::StringWriter + 'a),
61    /// The last codec error. The caller will later collect this.
62    pub err: Option<types::CodecError>,
63    /// The additional data attached for the use from transition functions.
64    pub data: &'a Data,
65    /// A marker for the phantom type parameter `St`.
66    _marker: PhantomData<St>,
67}
68
69impl<'a, St: Default, Data> StatefulDecoderHelper<'a, St, Data> {
70    /// Makes a new decoder context out of given buffer and output callback.
71    #[inline(always)]
72    pub fn new(buf: &'a [u8], output: &'a mut (types::StringWriter + 'a),
73               data: &'a Data) -> StatefulDecoderHelper<'a, St, Data> {
74        StatefulDecoderHelper { buf: buf, pos: 0, output: output, err: None,
75                                data: data, _marker: PhantomData }
76    }
77
78    /// Reads one byte from the buffer if any.
79    #[inline(always)]
80    pub fn read(&mut self) -> Option<u8> {
81        match self.buf.get(self.pos) {
82            Some(&c) => { self.pos += 1; Some(c) }
83            None => None
84        }
85    }
86
87    /// Resets back to the initial state.
88    /// This should be the last expr in the rules.
89    #[inline(always)]
90    pub fn reset(&self) -> St {
91        Default::default()
92    }
93
94    /// Writes one Unicode scalar value to the output.
95    /// There is intentionally no check for `c`, so the caller should ensure that it's valid.
96    /// If this is the last expr in the rules, also resets back to the initial state.
97    #[inline(always)]
98    pub fn emit(&mut self, c: u32) -> St {
99        self.output.write_char(unsafe {mem::transmute(c)});
100        Default::default()
101    }
102
103    /// Writes a Unicode string to the output.
104    /// If this is the last expr in the rules, also resets back to the initial state.
105    #[inline(always)]
106    pub fn emit_str(&mut self, s: &str) -> St {
107        self.output.write_str(s);
108        Default::default()
109    }
110
111    /// Issues a codec error with given message at the current position.
112    /// If this is the last expr in the rules, also resets back to the initial state.
113    #[inline(always)]
114    pub fn err(&mut self, msg: &'static str) -> St {
115        self.err = Some(types::CodecError { upto: self.pos as isize, cause: msg.into() });
116        Default::default()
117    }
118
119    /// Issues a codec error with given message at the current position minus `backup` bytes.
120    /// If this is the last expr in the rules, also resets back to the initial state.
121    ///
122    /// This should be used to implement "prepending byte to the stream" in the Encoding spec,
123    /// which corresponds to `ctx.backup_and_err(1, ...)`.
124    #[inline(always)]
125    pub fn backup_and_err(&mut self, backup: usize, msg: &'static str) -> St {
126        let upto = self.pos as isize - backup as isize;
127        self.err = Some(types::CodecError { upto: upto, cause: msg.into() });
128        Default::default()
129    }
130}
131
132/// Defines a stateful decoder from given state machine.
133macro_rules! stateful_decoder {
134    (
135        module $stmod:ident; // should be unique from other existing identifiers
136        $(internal $item:item)* // will only be visible from state functions
137    initial:
138        state $inist:ident($inictx:ident: Context) {
139            $(case $($inilhs:pat),+ => $($inirhs:expr),+;)+
140            final => $($inifin:expr),+;
141        }
142    checkpoint:
143        $(state $ckst:ident($ckctx:ident: Context $(, $ckarg:ident: $ckty:ty)*) {
144            $(case $($cklhs:pat),+ => $($ckrhs:expr),+;)+
145            final => $($ckfin:expr),+;
146        })*
147    transient:
148        $(state $st:ident($ctx:ident: Context $(, $arg:ident: $ty:ty)*) {
149            $(case $($lhs:pat),+ => $($rhs:expr),+;)+
150            final => $($fin:expr),+;
151        })*
152    ) => (
153        #[allow(non_snake_case)]
154        mod $stmod {
155            pub use self::State::*;
156
157            #[derive(PartialEq, Clone, Copy)]
158            pub enum State {
159                $inist,
160                $(
161                    $ckst(() $(, $ckty)*),
162                )*
163                $(
164                    $st(() $(, $ty)*),
165                )*
166            }
167
168            impl ::std::default::Default for State {
169                #[inline(always)] fn default() -> State { $inist }
170            }
171
172            pub mod internal {
173                pub type Context<'a, Data> = ::util::StatefulDecoderHelper<'a, super::State, Data>;
174
175                $($item)*
176            }
177
178            pub mod start {
179                use super::internal::*;
180
181                #[inline(always)]
182                pub fn $inist<T>($inictx: &mut Context<T>) -> super::State {
183                    // prohibits all kind of recursions, including self-recursions
184                    #[allow(unused_imports)] use super::transient::*;
185                    match $inictx.read() {
186                        None => super::$inist,
187                        Some(c) => match c { $($($inilhs)|+ => { $($inirhs);+ })+ },
188                    }
189                }
190
191                $(
192                    #[inline(always)]
193                    pub fn $ckst<T>($ckctx: &mut Context<T> $(, $ckarg: $ckty)*) -> super::State {
194                        // prohibits all kind of recursions, including self-recursions
195                        #[allow(unused_imports)] use super::transient::*;
196                        match $ckctx.read() {
197                            None => super::$ckst(() $(, $ckarg)*),
198                            Some(c) => match c { $($($cklhs)|+ => { $($ckrhs);+ })+ },
199                        }
200                    }
201                )*
202            }
203
204            pub mod transient {
205                use super::internal::*;
206
207                #[inline(always)]
208                #[allow(dead_code)]
209                pub fn $inist<T>(_: &mut Context<T>) -> super::State {
210                    super::$inist // do not recurse further
211                }
212
213                $(
214                    #[inline(always)]
215                    #[allow(dead_code)]
216                    pub fn $ckst<T>(_: &mut Context<T> $(, $ckarg: $ckty)*) -> super::State {
217                        super::$ckst(() $(, $ckarg)*) // do not recurse further
218                    }
219                )*
220
221                $(
222                    #[inline(always)]
223                    pub fn $st<T>($ctx: &mut Context<T> $(, $arg: $ty)*) -> super::State {
224                        match $inictx.read() {
225                            None => super::$st(() $(, $arg)*),
226                            Some(c) => match c { $($($lhs)|+ => { $($rhs);+ })+ },
227                        }
228                    }
229                )*
230            }
231
232            pub fn raw_feed<T>(mut st: State, input: &[u8], output: &mut ::types::StringWriter,
233                               data: &T) -> (State, usize, Option<::types::CodecError>) {
234                output.writer_hint(input.len());
235
236                let mut ctx = ::util::StatefulDecoderHelper::new(input, output, data);
237                let mut processed = 0;
238
239                let st_ = match st {
240                    $inist => $inist,
241                    $(
242                        $ckst(() $(, $ckarg)*) => start::$ckst(&mut ctx $(, $ckarg)*),
243                    )*
244                    $(
245                        $st(() $(, $arg)*) => transient::$st(&mut ctx $(, $arg)*),
246                    )*
247                };
248                match (ctx.err.take(), st_) {
249                    (None, $inist) $(| (None, $ckst(..)))* => { st = st_; processed = ctx.pos; }
250                    // XXX splitting the match case improves the performance somehow, but why?
251                    (None, _) => { return (st_, processed, None); }
252                    (Some(err), _) => { return (st_, processed, Some(err)); }
253                }
254
255                while ctx.pos < ctx.buf.len() {
256                    let st_ = match st {
257                        $inist => start::$inist(&mut ctx),
258                        $(
259                            $ckst(() $(, $ckarg)*) => start::$ckst(&mut ctx $(, $ckarg)*),
260                        )*
261                        _ => unreachable!(),
262                    };
263                    match (ctx.err.take(), st_) {
264                        (None, $inist) $(| (None, $ckst(..)))* => { st = st_; processed = ctx.pos; }
265                        // XXX splitting the match case improves the performance somehow, but why?
266                        (None, _) => { return (st_, processed, None); }
267                        (Some(err), _) => { return (st_, processed, Some(err)); }
268                    }
269                }
270
271                (st, processed, None)
272            }
273
274            pub fn raw_finish<T>(mut st: State, output: &mut ::types::StringWriter,
275                                 data: &T) -> (State, Option<::types::CodecError>) {
276                #![allow(unused_mut, unused_variables)]
277                let mut ctx = ::util::StatefulDecoderHelper::new(&[], output, data);
278                let st = match ::std::mem::replace(&mut st, $inist) {
279                    $inist => { let $inictx = &mut ctx; $($inifin);+ },
280                    $(
281                        $ckst(() $(, $ckarg)*) => { let $ckctx = &mut ctx; $($ckfin);+ },
282                    )*
283                    $(
284                        $st(() $(, $arg)*) => { let $ctx = &mut ctx; $($fin);+ },
285                    )*
286                };
287                (st, ctx.err.take())
288            }
289        }
290    );
291
292    // simplified rules: no checkpoint and default final actions
293    (
294        module $stmod:ident; // should be unique from other existing identifiers
295        $(internal $item:item)* // will only be visible from state functions
296    initial:
297        state $inist:ident($inictx:ident: Context) {
298            $(case $($inilhs:pat),+ => $($inirhs:expr),+;)+
299        }
300    transient:
301        $(state $st:ident($ctx:ident: Context $(, $arg:ident: $ty:ty)*) {
302            $(case $($lhs:pat),+ => $($rhs:expr),+;)+
303        })*
304    ) => (
305        stateful_decoder! {
306            module $stmod;
307            $(internal $item)*
308        initial:
309            state $inist($inictx: Context) {
310                $(case $($inilhs),+ => $($inirhs),+;)+
311                final => $inictx.reset();
312            }
313        checkpoint:
314        transient:
315            $(state $st($ctx: Context $(, $arg: $ty)*) {
316                $(case $($lhs),+ => $($rhs),+;)+
317                final => $ctx.err("incomplete sequence");
318            })*
319        }
320    );
321}
322