encoding/codec/
utf_16.rs

Help
1// This is a part of rust-encoding.
2// Copyright (c) 2013-2015, Kang Seonghoon.
3// See README.md and LICENSE.txt for details.
4
5//! UTF-16.
6
7use std::convert::Into;
8use std::marker::PhantomData;
9use util::as_char;
10use types::*;
11
12/// An implementation type for little endian.
13///
14/// Can be used as a type parameter to `UTF16Encoding`, `UTF16Encoder` and `UTF16Decoder`.
15#[derive(Clone, Copy)]
16pub struct Little;
17
18/// An implementation type for big endian.
19///
20/// Can be used as a type parameter to `UTF16Encoding`, `UTF16Encoder` and `UTF16Decoder`.
21#[derive(Clone, Copy)]
22pub struct Big;
23
24/// An internal trait used to customize UTF-16 implementations.
25#[doc(hidden)] // XXX never intended to be used publicly, should be gone later
26pub trait Endian: Clone + 'static {
27    fn name() -> &'static str;
28    fn whatwg_name() -> Option<&'static str>;
29    fn write_two_bytes(output: &mut ByteWriter, msb: u8, lsb: u8);
30    fn concat_two_bytes(lead: u16, trail: u8) -> u16;
31}
32
33impl Endian for Little {
34    fn name() -> &'static str { "utf-16le" }
35    fn whatwg_name() -> Option<&'static str> { Some("utf-16le") }
36    fn write_two_bytes(output: &mut ByteWriter, msb: u8, lsb: u8) {
37        output.write_byte(lsb);
38        output.write_byte(msb);
39    }
40    fn concat_two_bytes(lead: u16, trail: u8) -> u16 {
41        lead | ((trail as u16) << 8)
42    }
43}
44
45impl Endian for Big {
46    fn name() -> &'static str { "utf-16be" }
47    fn whatwg_name() -> Option<&'static str> { Some("utf-16be") }
48    fn write_two_bytes(output: &mut ByteWriter, msb: u8, lsb: u8) {
49        output.write_byte(msb);
50        output.write_byte(lsb);
51    }
52    fn concat_two_bytes(lead: u16, trail: u8) -> u16 {
53        (lead << 8) | trail as u16
54    }
55}
56
57/**
58 * UTF-16 (UCS Transformation Format, 16-bit).
59 *
60 * This is a Unicode encoding where one codepoint may use
61 * 2 (up to U+FFFF) or 4 bytes (up to U+10FFFF) depending on its value.
62 * It uses a "surrogate" mechanism to encode non-BMP codepoints,
63 * which are represented as a pair of lower surrogate and upper surrogate characters.
64 * In this effect, surrogate characters (U+D800..DFFF) cannot appear alone
65 * and cannot be included in a valid Unicode string.
66 *
67 * ## Specialization
68 *
69 * This type is specialized with endianness type `E`,
70 * which should be either `Little` (little endian) or `Big` (big endian).
71 */
72#[derive(Clone, Copy)]
73pub struct UTF16Encoding<E> {
74    _marker: PhantomData<E>
75}
76
77/// A type for UTF-16 in little endian.
78pub type UTF16LEEncoding = UTF16Encoding<Little>;
79/// A type for UTF-16 in big endian.
80pub type UTF16BEEncoding = UTF16Encoding<Big>;
81
82/// An instance for UTF-16 in little endian.
83pub const UTF_16LE_ENCODING: UTF16LEEncoding = UTF16Encoding { _marker: PhantomData };
84/// An instance for UTF-16 in big endian.
85pub const UTF_16BE_ENCODING: UTF16BEEncoding = UTF16Encoding { _marker: PhantomData };
86
87impl<E: Endian> Encoding for UTF16Encoding<E> {
88    fn name(&self) -> &'static str { <E as Endian>::name() }
89    fn whatwg_name(&self) -> Option<&'static str> { <E as Endian>::whatwg_name() }
90    fn raw_encoder(&self) -> Box<RawEncoder> { UTF16Encoder::<E>::new() }
91    fn raw_decoder(&self) -> Box<RawDecoder> { UTF16Decoder::<E>::new() }
92}
93
94/**
95 * An encoder for UTF-16.
96 *
97 * ## Specialization
98 *
99 * This type is specialized with endianness type `E`,
100 * which should be either `Little` (little endian) or `Big` (big endian).
101 */
102#[derive(Clone, Copy)]
103pub struct UTF16Encoder<E> {
104    _marker: PhantomData<E>
105}
106
107impl<E: Endian> UTF16Encoder<E> {
108    fn new() -> Box<RawEncoder> {
109        Box::new(UTF16Encoder::<E> { _marker: PhantomData })
110    }
111}
112
113impl<E: Endian> RawEncoder for UTF16Encoder<E> {
114    fn from_self(&self) -> Box<RawEncoder> { UTF16Encoder::<E>::new() }
115
116    fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>) {
117        output.writer_hint(input.len() * 2);
118
119        let write_two_bytes = |output: &mut ByteWriter, msb: u8, lsb: u8|
120            <E as Endian>::write_two_bytes(output, msb, lsb);
121
122        for ch in input.chars() {
123            match ch {
124                '\u{0}'...'\u{d7ff}' | '\u{e000}'...'\u{ffff}' => {
125                    let ch = ch as u32;
126                    write_two_bytes(output, (ch >> 8) as u8, (ch & 0xff) as u8);
127                }
128                '\u{10000}'...'\u{10ffff}' => {
129                    let ch = ch as u32 - 0x10000;
130                    write_two_bytes(output, (0xd8 | (ch >> 18)) as u8,
131                                            ((ch >> 10) & 0xff) as u8);
132                    write_two_bytes(output, (0xdc | ((ch >> 8) & 0x3)) as u8,
133                                            (ch & 0xff) as u8);
134                }
135                _ => unreachable!() // XXX Rust issue #12483, this is redundant
136            }
137        }
138        (input.len(), None)
139    }
140
141    fn raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError> {
142        None
143    }
144}
145
146/**
147 * A decoder for UTF-16.
148 *
149 * ## Specialization
150 *
151 * This type is specialized with endianness type `E`,
152 * which should be either `Little` (little endian) or `Big` (big endian).
153 */
154pub struct UTF16Decoder<E> {
155    leadbyte: u16,
156    leadsurrogate: u16,
157    _marker: PhantomData<E>
158}
159
160impl<E: Endian> UTF16Decoder<E> {
161    pub fn new() -> Box<RawDecoder> {
162        Box::new(UTF16Decoder::<E> { leadbyte: 0xffff, leadsurrogate: 0xffff,
163                                     _marker: PhantomData })
164    }
165}
166
167impl<E: Endian> RawDecoder for UTF16Decoder<E> {
168    fn from_self(&self) -> Box<RawDecoder> { UTF16Decoder::<E>::new() }
169
170    fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
171        output.writer_hint(input.len() / 2); // when every codepoint is U+0000..007F
172
173        let concat_two_bytes = |lead: u16, trail: u8|
174            <E as Endian>::concat_two_bytes(lead, trail);
175
176        let mut i = 0;
177        let mut processed = 0;
178        let len = input.len();
179
180        if i >= len { return (processed, None); }
181
182        if self.leadbyte != 0xffff {
183            let ch = concat_two_bytes(self.leadbyte, input[i]);
184            i += 1;
185            self.leadbyte = 0xffff;
186            if self.leadsurrogate != 0xffff { // `ch` is lower surrogate
187                let upper = self.leadsurrogate;
188                self.leadsurrogate = 0xffff;
189                match ch {
190                    0xdc00...0xdfff => {
191                        let ch = ((upper as u32 - 0xd800) << 10) + (ch as u32 - 0xdc00);
192                        output.write_char(as_char(ch + 0x10000));
193                        processed = i;
194                    }
195                    _ => {
196                        return (processed, Some(CodecError {
197                            upto: i as isize - 2, cause: "invalid sequence".into()
198                        }));
199                    }
200                }
201            } else {
202                match ch {
203                    0xd800...0xdbff => {
204                        self.leadsurrogate = ch;
205                        // pass through
206                    }
207                    0xdc00...0xdfff => {
208                        return (processed, Some(CodecError {
209                            upto: i as isize, cause: "invalid sequence".into()
210                        }));
211                    }
212                    _ => {
213                        output.write_char(as_char(ch as u32));
214                        processed = i;
215                    }
216                }
217            }
218            if i >= len { return (processed, None); }
219        }
220
221        if self.leadsurrogate != 0xffff {
222            i += 1;
223            if i >= len {
224                self.leadbyte = input[i-1] as u16;
225                return (processed, None);
226            }
227            let upper = self.leadsurrogate;
228            let ch = concat_two_bytes(input[i-1] as u16, input[i]);
229            i += 1;
230            match ch {
231                0xdc00...0xdfff => {
232                    let ch = ((upper as u32 - 0xd800) << 10) + (ch as u32 - 0xdc00);
233                    output.write_char(as_char(ch + 0x10000));
234                }
235                _ => {
236                    self.leadbyte = 0xffff;
237                    self.leadsurrogate = 0xffff;
238                    return (processed, Some(CodecError {
239                        upto: i as isize - 2, cause: "invalid sequence".into()
240                    }));
241                }
242            }
243        }
244
245        self.leadbyte = 0xffff;
246        self.leadsurrogate = 0xffff;
247        processed = i;
248        while i < len {
249            i += 1;
250            if i >= len {
251                self.leadbyte = input[i-1] as u16;
252                break;
253            }
254            let ch = concat_two_bytes(input[i-1] as u16, input[i]);
255            match ch {
256                0xd800...0xdbff => {
257                    i += 2;
258                    if i >= len {
259                        self.leadsurrogate = ch;
260                        if i-1 < len { self.leadbyte = input[i-1] as u16; }
261                        break;
262                    }
263                    let ch2 = concat_two_bytes(input[i-1] as u16, input[i]);
264                    match ch2 {
265                        0xdc00...0xdfff => {
266                            let ch = ((ch as u32 - 0xd800) << 10) + (ch2 as u32 - 0xdc00);
267                            output.write_char(as_char(ch + 0x10000));
268                        }
269                        _ => {
270                            return (processed, Some(CodecError {
271                                upto: i as isize - 1, cause: "invalid sequence".into()
272                            }));
273                        }
274                    }
275                }
276                0xdc00...0xdfff => {
277                    return (processed, Some(CodecError {
278                        upto: i as isize + 1, cause: "invalid sequence".into()
279                    }));
280                }
281                _ => {
282                    output.write_char(as_char(ch as u32));
283                }
284            }
285            i += 1;
286            processed = i;
287        }
288        (processed, None)
289    }
290
291    fn raw_finish(&mut self, _output: &mut StringWriter) -> Option<CodecError> {
292        let leadbyte = self.leadbyte;
293        let leadsurrogate = self.leadsurrogate;
294        self.leadbyte = 0xffff;
295        self.leadsurrogate = 0xffff;
296        if leadbyte != 0xffff || leadsurrogate != 0xffff {
297            Some(CodecError { upto: 0, cause: "incomplete sequence".into() })
298        } else {
299            None
300        }
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    // little endian and big endian is symmetric to each other, there's no need to test both.
307    // since big endian is easier to inspect we test UTF_16BE only.
308
309    use super::UTF_16BE_ENCODING as UTF_16BE;
310    use types::*;
311
312    #[test]
313    fn test_encoder_valid() {
314        let mut e = UTF_16BE.raw_encoder();
315        assert_feed_ok!(e, "\u{0}\
316                            \u{1}\u{02}\u{004}\u{0008}\
317                            \u{10}\u{020}\u{0040}\u{80}\
318                            \u{100}\u{0200}\u{400}\u{800}\
319                            \u{1000}\u{2000}\u{4000}\u{8000}\
320                            \u{ffff}", "",
321                        [0x00, 0x00,
322                         0x00, 0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08,
323                         0x00, 0x10, 0x00, 0x20, 0x00, 0x40, 0x00, 0x80,
324                         0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08, 0x00,
325                         0x10, 0x00, 0x20, 0x00, 0x40, 0x00, 0x80, 0x00,
326                         0xff, 0xff]);
327        assert_feed_ok!(e, "\u{10000}\
328                            \u{10001}\u{010002}\
329                            \u{10004}\u{010008}\
330                            \u{10010}\u{010020}\
331                            \u{10040}\u{010080}\
332                            \u{10100}\u{010200}\
333                            \u{10400}\u{010800}\
334                            \u{11000}\u{012000}\
335                            \u{14000}\u{018000}\
336                            \u{20000}\u{030000}\
337                            \u{50000}\u{090000}\
338                            \u{10FFFF}", "",
339                        [0xd8, 0x00, 0xdc, 0x00,
340                         0xd8, 0x00, 0xdc, 0x01, 0xd8, 0x00, 0xdc, 0x02,
341                         0xd8, 0x00, 0xdc, 0x04, 0xd8, 0x00, 0xdc, 0x08,
342                         0xd8, 0x00, 0xdc, 0x10, 0xd8, 0x00, 0xdc, 0x20,
343                         0xd8, 0x00, 0xdc, 0x40, 0xd8, 0x00, 0xdc, 0x80,
344                         0xd8, 0x00, 0xdd, 0x00, 0xd8, 0x00, 0xde, 0x00,
345                         0xd8, 0x01, 0xdc, 0x00, 0xd8, 0x02, 0xdc, 0x00,
346                         0xd8, 0x04, 0xdc, 0x00, 0xd8, 0x08, 0xdc, 0x00,
347                         0xd8, 0x10, 0xdc, 0x00, 0xd8, 0x20, 0xdc, 0x00,
348                         0xd8, 0x40, 0xdc, 0x00, 0xd8, 0x80, 0xdc, 0x00,
349                         0xd9, 0x00, 0xdc, 0x00, 0xda, 0x00, 0xdc, 0x00,
350                         0xdb, 0xff, 0xdf, 0xff]);
351        assert_finish_ok!(e, []);
352    }
353
354    #[test]
355    fn test_decoder_valid() {
356        let mut d = UTF_16BE.raw_decoder();
357        assert_feed_ok!(d, [0x00, 0x00,
358                            0x00, 0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08,
359                            0x00, 0x10, 0x00, 0x20, 0x00, 0x40, 0x00, 0x80,
360                            0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08, 0x00,
361                            0x10, 0x00, 0x20, 0x00, 0x40, 0x00, 0x80, 0x00,
362                            0xff, 0xff], [],
363                        "\u{0}\
364                         \u{1}\u{02}\u{004}\u{0008}\
365                         \u{10}\u{020}\u{0040}\u{80}\
366                         \u{100}\u{0200}\u{400}\u{800}\
367                         \u{1000}\u{2000}\u{4000}\u{8000}\
368                         \u{ffff}");
369        assert_feed_ok!(d, [0xd8, 0x00, 0xdc, 0x00,
370                            0xd8, 0x00, 0xdc, 0x01, 0xd8, 0x00, 0xdc, 0x02,
371                            0xd8, 0x00, 0xdc, 0x04, 0xd8, 0x00, 0xdc, 0x08,
372                            0xd8, 0x00, 0xdc, 0x10, 0xd8, 0x00, 0xdc, 0x20,
373                            0xd8, 0x00, 0xdc, 0x40, 0xd8, 0x00, 0xdc, 0x80,
374                            0xd8, 0x00, 0xdd, 0x00, 0xd8, 0x00, 0xde, 0x00,
375                            0xd8, 0x01, 0xdc, 0x00, 0xd8, 0x02, 0xdc, 0x00,
376                            0xd8, 0x04, 0xdc, 0x00, 0xd8, 0x08, 0xdc, 0x00,
377                            0xd8, 0x10, 0xdc, 0x00, 0xd8, 0x20, 0xdc, 0x00,
378                            0xd8, 0x40, 0xdc, 0x00, 0xd8, 0x80, 0xdc, 0x00,
379                            0xd9, 0x00, 0xdc, 0x00, 0xda, 0x00, 0xdc, 0x00,
380                            0xdb, 0xff, 0xdf, 0xff], [],
381                        "\u{10000}\
382                         \u{10001}\u{010002}\
383                         \u{10004}\u{010008}\
384                         \u{10010}\u{010020}\
385                         \u{10040}\u{010080}\
386                         \u{10100}\u{010200}\
387                         \u{10400}\u{010800}\
388                         \u{11000}\u{012000}\
389                         \u{14000}\u{018000}\
390                         \u{20000}\u{030000}\
391                         \u{50000}\u{090000}\
392                         \u{10FFFF}");
393        assert_finish_ok!(d, "");
394    }
395
396    #[test]
397    fn test_decoder_valid_partial_bmp() {
398        let mut d = UTF_16BE.raw_decoder();
399        assert_feed_ok!(d, [], [0x12], "");
400        assert_feed_ok!(d, [0x34], [], "\u{1234}");
401        assert_feed_ok!(d, [], [0x56], "");
402        assert_feed_ok!(d, [0x78], [], "\u{5678}");
403        assert_finish_ok!(d, "");
404
405        let mut d = UTF_16BE.raw_decoder();
406        assert_feed_ok!(d, [], [0x12], "");
407        assert_feed_ok!(d, [0x34], [0x56], "\u{1234}");
408        assert_feed_ok!(d, [0x78, 0xab, 0xcd], [], "\u{5678}\u{abcd}");
409        assert_finish_ok!(d, "");
410    }
411
412    #[test]
413    fn test_decoder_valid_partial_non_bmp() {
414        let mut d = UTF_16BE.raw_decoder();
415        assert_feed_ok!(d, [], [0xd8], "");
416        assert_feed_ok!(d, [], [0x08], "");
417        assert_feed_ok!(d, [], [0xdf], "");
418        assert_feed_ok!(d, [0x45], [0xd9], "\u{12345}");
419        assert_feed_ok!(d, [], [0x5e], "");
420        assert_feed_ok!(d, [], [0xdc], "");
421        assert_feed_ok!(d, [0x90], [], "\u{67890}");
422        assert_finish_ok!(d, "");
423
424        let mut d = UTF_16BE.raw_decoder();
425        assert_feed_ok!(d, [], [0xd8], "");
426        assert_feed_ok!(d, [], [0x08, 0xdf], "");
427        assert_feed_ok!(d, [0x45], [0xd9, 0x5e], "\u{12345}");
428        assert_feed_ok!(d, [0xdc, 0x90], [], "\u{67890}");
429        assert_finish_ok!(d, "");
430
431        let mut d = UTF_16BE.raw_decoder();
432        assert_feed_ok!(d, [], [0xd8, 0x08, 0xdf], "");
433        assert_feed_ok!(d, [0x45], [0xd9, 0x5e, 0xdc], "\u{12345}");
434        assert_feed_ok!(d, [0x90], [], "\u{67890}");
435        assert_finish_ok!(d, "");
436    }
437
438    #[test]
439    fn test_decoder_invalid_partial() {
440        let mut d = UTF_16BE.raw_decoder();
441        assert_feed_ok!(d, [], [0x12], "");
442        assert_finish_err!(d, "");
443
444        let mut d = UTF_16BE.raw_decoder();
445        assert_feed_ok!(d, [], [0xd8], "");
446        assert_finish_err!(d, "");
447
448        let mut d = UTF_16BE.raw_decoder();
449        assert_feed_ok!(d, [], [0xd8, 0x08], "");
450        assert_finish_err!(d, "");
451
452        let mut d = UTF_16BE.raw_decoder();
453        assert_feed_ok!(d, [], [0xd8, 0x08, 0xdf], "");
454        assert_finish_err!(d, "");
455    }
456
457    #[test]
458    fn test_decoder_invalid_lone_upper_surrogate() {
459        let mut d = UTF_16BE.raw_decoder();
460        assert_feed_ok!(d, [], [0xd8, 0x00], "");
461        assert_feed_err!(d, [], [], [0x12, 0x34], "");
462        assert_feed_err!(d, [], [0xd8, 0x00], [0x56, 0x78], "");
463        assert_feed_ok!(d, [], [0xd8, 0x00], "");
464        assert_feed_err!(d, [], [], [0xd8, 0x00], "");
465        assert_feed_ok!(d, [], [0xd8, 0x00], "");
466        assert_finish_err!(d, "");
467
468        let mut d = UTF_16BE.raw_decoder();
469        assert_feed_ok!(d, [], [0xdb, 0xff], "");
470        assert_feed_err!(d, [], [], [0x12, 0x34], "");
471        assert_feed_err!(d, [], [0xdb, 0xff], [0x56, 0x78], "");
472        assert_feed_ok!(d, [], [0xdb, 0xff], "");
473        assert_feed_err!(d, [], [], [0xdb, 0xff], "");
474        assert_feed_ok!(d, [], [0xdb, 0xff], "");
475        assert_finish_err!(d, "");
476    }
477
478    #[test]
479    fn test_decoder_invalid_lone_upper_surrogate_partial() {
480        let mut d = UTF_16BE.raw_decoder();
481        assert_feed_ok!(d, [], [0xd8], "");
482        assert_feed_err!(d, [], [0x00], [0x12, 0x34], "");
483        assert_feed_ok!(d, [], [0xd8, 0x00, 0x56], "");
484        assert_feed_err!(d, -1, [], [], [0x56, 0x78], "");
485        assert_feed_ok!(d, [], [0xd8], "");
486        assert_feed_err!(d, [], [0x00], [0xd8, 0x00], "");
487        assert_feed_ok!(d, [], [0xd8, 0x00, 0xdb], "");
488        assert_feed_err!(d, -1, [], [], [0xdb, 0xff], "");
489        assert_feed_ok!(d, [], [0xd8], "");
490        assert_finish_err!(d, "");
491
492        let mut d = UTF_16BE.raw_decoder();
493        assert_feed_ok!(d, [], [0xdb], "");
494        assert_feed_err!(d, [], [0xff], [0x12, 0x34], "");
495        assert_feed_ok!(d, [], [0xdb, 0xff, 0x56], "");
496        assert_feed_err!(d, -1, [], [], [0x56, 0x78], "");
497        assert_feed_ok!(d, [], [0xdb], "");
498        assert_feed_err!(d, [], [0xff], [0xdb, 0xff], "");
499        assert_feed_ok!(d, [], [0xdb, 0xff, 0xd8], "");
500        assert_feed_err!(d, -1, [], [], [0xd8, 0x00], "");
501        assert_feed_ok!(d, [], [0xdb], "");
502        assert_finish_err!(d, "");
503    }
504
505    #[test]
506    fn test_decoder_invalid_lone_lower_surrogate() {
507        let mut d = UTF_16BE.raw_decoder();
508        assert_feed_err!(d, [], [0xdc, 0x00], [], "");
509        assert_feed_err!(d, [0x12, 0x34], [0xdc, 0x00], [0x56, 0x78], "\u{1234}");
510        assert_finish_ok!(d, "");
511
512        let mut d = UTF_16BE.raw_decoder();
513        assert_feed_err!(d, [], [0xdf, 0xff], [], "");
514        assert_feed_err!(d, [0x12, 0x34], [0xdf, 0xff], [0x56, 0x78], "\u{1234}");
515        assert_finish_ok!(d, "");
516    }
517
518    #[test]
519    fn test_decoder_invalid_lone_lower_surrogate_partial() {
520        let mut d = UTF_16BE.raw_decoder();
521        assert_feed_ok!(d, [], [0xdc], "");
522        assert_feed_err!(d, [], [0x00], [], "");
523        assert_feed_ok!(d, [0x12, 0x34], [0xdc], "\u{1234}");
524        assert_feed_err!(d, [], [0x00], [0x56, 0x78], "");
525        assert_finish_ok!(d, "");
526
527        assert_feed_ok!(d, [], [0xdf], "");
528        assert_feed_err!(d, [], [0xff], [], "");
529        assert_feed_ok!(d, [0x12, 0x34], [0xdf], "\u{1234}");
530        assert_feed_err!(d, [], [0xff], [0x56, 0x78], "");
531        assert_finish_ok!(d, "");
532    }
533
534    #[test]
535    fn test_decoder_invalid_one_byte_before_finish() {
536        let mut d = UTF_16BE.raw_decoder();
537        assert_feed_ok!(d, [], [0x12], "");
538        assert_finish_err!(d, "");
539
540        let mut d = UTF_16BE.raw_decoder();
541        assert_feed_ok!(d, [0x12, 0x34], [0x56], "\u{1234}");
542        assert_finish_err!(d, "");
543    }
544
545    #[test]
546    fn test_decoder_invalid_three_bytes_before_finish() {
547        let mut d = UTF_16BE.raw_decoder();
548        assert_feed_ok!(d, [], [0xd8, 0x00, 0xdc], "");
549        assert_finish_err!(d, "");
550
551        let mut d = UTF_16BE.raw_decoder();
552        assert_feed_ok!(d, [0x12, 0x34], [0xd8, 0x00, 0xdc], "\u{1234}");
553        assert_finish_err!(d, "");
554    }
555
556    #[test]
557    fn test_decoder_invalid_three_bytes_before_finish_partial() {
558        let mut d = UTF_16BE.raw_decoder();
559        assert_feed_ok!(d, [], [0xd8], "");
560        assert_feed_ok!(d, [], [0x00], "");
561        assert_feed_ok!(d, [], [0xdc], "");
562        assert_finish_err!(d, "");
563
564        let mut d = UTF_16BE.raw_decoder();
565        assert_feed_ok!(d, [0x12, 0x34], [0xd8], "\u{1234}");
566        assert_feed_ok!(d, [], [0x00, 0xdc], "");
567        assert_finish_err!(d, "");
568
569        let mut d = UTF_16BE.raw_decoder();
570        assert_feed_ok!(d, [0x12, 0x34], [0xd8, 0x00], "\u{1234}");
571        assert_feed_ok!(d, [], [0xdc], "");
572        assert_finish_err!(d, "");
573    }
574
575    #[test]
576    fn test_decoder_feed_after_finish() {
577        let mut d = UTF_16BE.raw_decoder();
578        assert_feed_ok!(d, [0x12, 0x34], [0x12], "\u{1234}");
579        assert_finish_err!(d, "");
580        assert_feed_ok!(d, [0x12, 0x34], [], "\u{1234}");
581        assert_finish_ok!(d, "");
582
583        let mut d = UTF_16BE.raw_decoder();
584        assert_feed_ok!(d, [0xd8, 0x08, 0xdf, 0x45], [0xd8, 0x08, 0xdf], "\u{12345}");
585        assert_finish_err!(d, "");
586        assert_feed_ok!(d, [0xd8, 0x08, 0xdf, 0x45], [0xd8, 0x08], "\u{12345}");
587        assert_finish_err!(d, "");
588        assert_feed_ok!(d, [0xd8, 0x08, 0xdf, 0x45], [0xd8], "\u{12345}");
589        assert_finish_err!(d, "");
590        assert_feed_ok!(d, [0xd8, 0x08, 0xdf, 0x45], [], "\u{12345}");
591        assert_finish_ok!(d, "");
592    }
593}
594
encoding/codec/utf_16.rs

encoding/codec/
utf_16.rs