encoding_rs/
single_byte.rs

1// Copyright Mozilla Foundation. See the COPYRIGHT
2// file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10use super::*;
11use crate::ascii::*;
12use crate::data::position;
13use crate::handles::*;
14use crate::variant::*;
15
16pub struct SingleByteDecoder {
17    table: &'static [u16; 128],
18}
19
20impl SingleByteDecoder {
21    pub fn new(data: &'static [u16; 128]) -> VariantDecoder {
22        VariantDecoder::SingleByte(SingleByteDecoder { table: data })
23    }
24
25    pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
26        Some(byte_length)
27    }
28
29    pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
30        byte_length.checked_mul(3)
31    }
32
33    pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
34        byte_length.checked_mul(3)
35    }
36
37    pub fn decode_to_utf8_raw(
38        &mut self,
39        src: &[u8],
40        dst: &mut [u8],
41        _last: bool,
42    ) -> (DecoderResult, usize, usize) {
43        let mut source = ByteSource::new(src);
44        let mut dest = Utf8Destination::new(dst);
45        'outermost: loop {
46            match dest.copy_ascii_from_check_space_bmp(&mut source) {
47                CopyAsciiResult::Stop(ret) => return ret,
48                CopyAsciiResult::GoOn((mut non_ascii, mut handle)) => 'middle: loop {
49                    // Start non-boilerplate
50                    //
51                    // Since the non-ASCIIness of `non_ascii` is hidden from
52                    // the optimizer, it can't figure out that it's OK to
53                    // statically omit the bound check when accessing
54                    // `[u16; 128]` with an index
55                    // `non_ascii as usize - 0x80usize`.
56                    let mapped =
57                        unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) };
58                    // let mapped = self.table[non_ascii as usize - 0x80usize];
59                    if mapped == 0u16 {
60                        return (
61                            DecoderResult::Malformed(1, 0),
62                            source.consumed(),
63                            handle.written(),
64                        );
65                    }
66                    let dest_again = handle.write_bmp_excl_ascii(mapped);
67                    // End non-boilerplate
68                    match source.check_available() {
69                        Space::Full(src_consumed) => {
70                            return (
71                                DecoderResult::InputEmpty,
72                                src_consumed,
73                                dest_again.written(),
74                            );
75                        }
76                        Space::Available(source_handle) => {
77                            match dest_again.check_space_bmp() {
78                                Space::Full(dst_written) => {
79                                    return (
80                                        DecoderResult::OutputFull,
81                                        source_handle.consumed(),
82                                        dst_written,
83                                    );
84                                }
85                                Space::Available(mut destination_handle) => {
86                                    let (mut b, unread_handle) = source_handle.read();
87                                    let source_again = unread_handle.commit();
88                                    'innermost: loop {
89                                        if b > 127 {
90                                            non_ascii = b;
91                                            handle = destination_handle;
92                                            continue 'middle;
93                                        }
94                                        // Testing on Haswell says that we should write the
95                                        // byte unconditionally instead of trying to unread it
96                                        // to make it part of the next SIMD stride.
97                                        let dest_again_again = destination_handle.write_ascii(b);
98                                        if b < 60 {
99                                            // We've got punctuation
100                                            match source_again.check_available() {
101                                                Space::Full(src_consumed_again) => {
102                                                    return (
103                                                        DecoderResult::InputEmpty,
104                                                        src_consumed_again,
105                                                        dest_again_again.written(),
106                                                    );
107                                                }
108                                                Space::Available(source_handle_again) => {
109                                                    match dest_again_again.check_space_bmp() {
110                                                        Space::Full(dst_written_again) => {
111                                                            return (
112                                                                DecoderResult::OutputFull,
113                                                                source_handle_again.consumed(),
114                                                                dst_written_again,
115                                                            );
116                                                        }
117                                                        Space::Available(
118                                                            destination_handle_again,
119                                                        ) => {
120                                                            let (b_again, _unread_handle_again) =
121                                                                source_handle_again.read();
122                                                            b = b_again;
123                                                            destination_handle =
124                                                                destination_handle_again;
125                                                            continue 'innermost;
126                                                        }
127                                                    }
128                                                }
129                                            }
130                                        }
131                                        // We've got markup or ASCII text
132                                        continue 'outermost;
133                                    }
134                                }
135                            }
136                        }
137                    }
138                },
139            }
140        }
141    }
142
143    pub fn decode_to_utf16_raw(
144        &mut self,
145        src: &[u8],
146        dst: &mut [u16],
147        _last: bool,
148    ) -> (DecoderResult, usize, usize) {
149        let (pending, length) = if dst.len() < src.len() {
150            (DecoderResult::OutputFull, dst.len())
151        } else {
152            (DecoderResult::InputEmpty, src.len())
153        };
154        let mut converted = 0usize;
155        'outermost: loop {
156            match unsafe {
157                ascii_to_basic_latin(
158                    src.as_ptr().add(converted),
159                    dst.as_mut_ptr().add(converted),
160                    length - converted,
161                )
162            } {
163                None => {
164                    return (pending, length, length);
165                }
166                Some((mut non_ascii, consumed)) => {
167                    converted += consumed;
168                    'middle: loop {
169                        // `converted` doesn't count the reading of `non_ascii` yet.
170                        // Since the non-ASCIIness of `non_ascii` is hidden from
171                        // the optimizer, it can't figure out that it's OK to
172                        // statically omit the bound check when accessing
173                        // `[u16; 128]` with an index
174                        // `non_ascii as usize - 0x80usize`.
175                        let mapped =
176                            unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) };
177                        // let mapped = self.table[non_ascii as usize - 0x80usize];
178                        if mapped == 0u16 {
179                            return (
180                                DecoderResult::Malformed(1, 0),
181                                converted + 1, // +1 `for non_ascii`
182                                converted,
183                            );
184                        }
185                        unsafe {
186                            // The bound check has already been performed
187                            *(dst.get_unchecked_mut(converted)) = mapped;
188                        }
189                        converted += 1;
190                        // Next, handle ASCII punctuation and non-ASCII without
191                        // going back to ASCII acceleration. Non-ASCII scripts
192                        // use ASCII punctuation, so this avoid going to
193                        // acceleration just for punctuation/space and then
194                        // failing. This is a significant boost to non-ASCII
195                        // scripts.
196                        // TODO: Split out Latin converters without this part
197                        // this stuff makes Latin script-conversion slower.
198                        if converted == length {
199                            return (pending, length, length);
200                        }
201                        let mut b = unsafe { *(src.get_unchecked(converted)) };
202                        'innermost: loop {
203                            if b > 127 {
204                                non_ascii = b;
205                                continue 'middle;
206                            }
207                            // Testing on Haswell says that we should write the
208                            // byte unconditionally instead of trying to unread it
209                            // to make it part of the next SIMD stride.
210                            unsafe {
211                                *(dst.get_unchecked_mut(converted)) = u16::from(b);
212                            }
213                            converted += 1;
214                            if b < 60 {
215                                // We've got punctuation
216                                if converted == length {
217                                    return (pending, length, length);
218                                }
219                                b = unsafe { *(src.get_unchecked(converted)) };
220                                continue 'innermost;
221                            }
222                            // We've got markup or ASCII text
223                            continue 'outermost;
224                        }
225                    }
226                }
227            }
228        }
229    }
230
231    pub fn latin1_byte_compatible_up_to(&self, buffer: &[u8]) -> usize {
232        let mut bytes = buffer;
233        let mut total = 0;
234        loop {
235            if let Some((non_ascii, offset)) = validate_ascii(bytes) {
236                total += offset;
237                let mapped = unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) };
238                if mapped != u16::from(non_ascii) {
239                    return total;
240                }
241                total += 1;
242                bytes = &bytes[offset + 1..];
243            } else {
244                return total;
245            }
246        }
247    }
248}
249
250pub struct SingleByteEncoder {
251    table: &'static [u16; 128],
252    run_bmp_offset: usize,
253    run_byte_offset: usize,
254    run_length: usize,
255}
256
257impl SingleByteEncoder {
258    pub fn new(
259        encoding: &'static Encoding,
260        data: &'static [u16; 128],
261        run_bmp_offset: u16,
262        run_byte_offset: u8,
263        run_length: u8,
264    ) -> Encoder {
265        Encoder::new(
266            encoding,
267            VariantEncoder::SingleByte(SingleByteEncoder {
268                table: data,
269                run_bmp_offset: run_bmp_offset as usize,
270                run_byte_offset: run_byte_offset as usize,
271                run_length: run_length as usize,
272            }),
273        )
274    }
275
276    pub fn max_buffer_length_from_utf16_without_replacement(
277        &self,
278        u16_length: usize,
279    ) -> Option<usize> {
280        Some(u16_length)
281    }
282
283    pub fn max_buffer_length_from_utf8_without_replacement(
284        &self,
285        byte_length: usize,
286    ) -> Option<usize> {
287        Some(byte_length)
288    }
289
290    #[inline(always)]
291    fn encode_u16(&self, code_unit: u16) -> Option<u8> {
292        // First, we see if the code unit falls into a run of consecutive
293        // code units that can be mapped by offset. This is very efficient
294        // for most non-Latin encodings as well as Latin1-ish encodings.
295        //
296        // For encodings that don't fit this pattern, the run (which may
297        // have the length of just one) just establishes the starting point
298        // for the next rule.
299        //
300        // Next, we do a forward linear search in the part of the index
301        // after the run. Even in non-Latin1-ish Latin encodings (except
302        // macintosh), the lower case letters are here.
303        //
304        // Next, we search the third quadrant up to the start of the run
305        // (upper case letters in Latin encodings except macintosh, in
306        // Greek and in KOI encodings) and then the second quadrant,
307        // except if the run stared before the third quadrant, we search
308        // the second quadrant up to the run.
309        //
310        // Last, we search the first quadrant, which has unused controls
311        // or punctuation in most encodings. This is bad for macintosh
312        // and IBM866, but those are rare.
313
314        // Run of consecutive units
315        let unit_as_usize = code_unit as usize;
316        let offset = unit_as_usize.wrapping_sub(self.run_bmp_offset);
317        if offset < self.run_length {
318            return Some((128 + self.run_byte_offset + offset) as u8);
319        }
320
321        // Search after the run
322        let tail_start = self.run_byte_offset + self.run_length;
323        if let Some(pos) = position(&self.table[tail_start..], code_unit) {
324            return Some((128 + tail_start + pos) as u8);
325        }
326
327        if self.run_byte_offset >= 64 {
328            // Search third quadrant before the run
329            if let Some(pos) = position(&self.table[64..self.run_byte_offset], code_unit) {
330                return Some(((128 + 64) + pos) as u8);
331            }
332
333            // Search second quadrant
334            if let Some(pos) = position(&self.table[32..64], code_unit) {
335                return Some(((128 + 32) + pos) as u8);
336            }
337        } else if let Some(pos) = position(&self.table[32..self.run_byte_offset], code_unit) {
338            // windows-1252, windows-874, ISO-8859-15 and ISO-8859-5
339            // Search second quadrant before the run
340            return Some(((128 + 32) + pos) as u8);
341        }
342
343        // Search first quadrant
344        if let Some(pos) = position(&self.table[..32], code_unit) {
345            return Some((128 + pos) as u8);
346        }
347
348        None
349    }
350
351    ascii_compatible_bmp_encoder_function!(
352        {
353            match self.encode_u16(bmp) {
354                Some(byte) => handle.write_one(byte),
355                None => {
356                    return (
357                        EncoderResult::unmappable_from_bmp(bmp),
358                        source.consumed(),
359                        handle.written(),
360                    );
361                }
362            }
363        },
364        bmp,
365        self,
366        source,
367        handle,
368        copy_ascii_to_check_space_one,
369        check_space_one,
370        encode_from_utf8_raw,
371        str,
372        Utf8Source,
373        true
374    );
375
376    pub fn encode_from_utf16_raw(
377        &mut self,
378        src: &[u16],
379        dst: &mut [u8],
380        _last: bool,
381    ) -> (EncoderResult, usize, usize) {
382        let (pending, length) = if dst.len() < src.len() {
383            (EncoderResult::OutputFull, dst.len())
384        } else {
385            (EncoderResult::InputEmpty, src.len())
386        };
387        let mut converted = 0usize;
388        'outermost: loop {
389            match unsafe {
390                basic_latin_to_ascii(
391                    src.as_ptr().add(converted),
392                    dst.as_mut_ptr().add(converted),
393                    length - converted,
394                )
395            } {
396                None => {
397                    return (pending, length, length);
398                }
399                Some((mut non_ascii, consumed)) => {
400                    converted += consumed;
401                    'middle: loop {
402                        // `converted` doesn't count the reading of `non_ascii` yet.
403                        match self.encode_u16(non_ascii) {
404                            Some(byte) => {
405                                unsafe {
406                                    *(dst.get_unchecked_mut(converted)) = byte;
407                                }
408                                converted += 1;
409                            }
410                            None => {
411                                // At this point, we need to know if we
412                                // have a surrogate.
413                                let high_bits = non_ascii & 0xFC00u16;
414                                if high_bits == 0xD800u16 {
415                                    // high surrogate
416                                    if converted + 1 == length {
417                                        // End of buffer. This surrogate is unpaired.
418                                        return (
419                                            EncoderResult::Unmappable('\u{FFFD}'),
420                                            converted + 1, // +1 `for non_ascii`
421                                            converted,
422                                        );
423                                    }
424                                    let second =
425                                        u32::from(unsafe { *src.get_unchecked(converted + 1) });
426                                    if second & 0xFC00u32 != 0xDC00u32 {
427                                        return (
428                                            EncoderResult::Unmappable('\u{FFFD}'),
429                                            converted + 1, // +1 `for non_ascii`
430                                            converted,
431                                        );
432                                    }
433                                    // The next code unit is a low surrogate.
434                                    let astral: char = unsafe {
435                                        ::std::char::from_u32_unchecked(
436                                            (u32::from(non_ascii) << 10) + second
437                                                - (((0xD800u32 << 10) - 0x1_0000u32) + 0xDC00u32),
438                                        )
439                                    };
440                                    return (
441                                        EncoderResult::Unmappable(astral),
442                                        converted + 2, // +2 `for non_ascii` and `second`
443                                        converted,
444                                    );
445                                }
446                                if high_bits == 0xDC00u16 {
447                                    // Unpaired low surrogate
448                                    return (
449                                        EncoderResult::Unmappable('\u{FFFD}'),
450                                        converted + 1, // +1 `for non_ascii`
451                                        converted,
452                                    );
453                                }
454                                return (
455                                    EncoderResult::unmappable_from_bmp(non_ascii),
456                                    converted + 1, // +1 `for non_ascii`
457                                    converted,
458                                );
459                            }
460                        }
461                        // Next, handle ASCII punctuation and non-ASCII without
462                        // going back to ASCII acceleration. Non-ASCII scripts
463                        // use ASCII punctuation, so this avoid going to
464                        // acceleration just for punctuation/space and then
465                        // failing. This is a significant boost to non-ASCII
466                        // scripts.
467                        // TODO: Split out Latin converters without this part
468                        // this stuff makes Latin script-conversion slower.
469                        if converted == length {
470                            return (pending, length, length);
471                        }
472                        let mut unit = unsafe { *(src.get_unchecked(converted)) };
473                        'innermost: loop {
474                            if unit > 127 {
475                                non_ascii = unit;
476                                continue 'middle;
477                            }
478                            // Testing on Haswell says that we should write the
479                            // byte unconditionally instead of trying to unread it
480                            // to make it part of the next SIMD stride.
481                            unsafe {
482                                *(dst.get_unchecked_mut(converted)) = unit as u8;
483                            }
484                            converted += 1;
485                            if unit < 60 {
486                                // We've got punctuation
487                                if converted == length {
488                                    return (pending, length, length);
489                                }
490                                unit = unsafe { *(src.get_unchecked(converted)) };
491                                continue 'innermost;
492                            }
493                            // We've got markup or ASCII text
494                            continue 'outermost;
495                        }
496                    }
497                }
498            }
499        }
500    }
501}
502
503// Any copyright to the test code below this comment is dedicated to the
504// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
505
506#[cfg(test)]
507mod tests {
508    use super::super::testing::*;
509    use super::super::*;
510
511    #[test]
512    fn test_windows_1255_ca() {
513        decode(WINDOWS_1255, b"\xCA", "\u{05BA}");
514        encode(WINDOWS_1255, "\u{05BA}", b"\xCA");
515    }
516
517    #[test]
518    fn test_ascii_punctuation() {
519        let bytes = b"\xC1\xF5\xF4\xFC \xE5\xDF\xED\xE1\xE9 \xDD\xED\xE1 \xF4\xE5\xF3\xF4. \xC1\xF5\xF4\xFC \xE5\xDF\xED\xE1\xE9 \xDD\xED\xE1 \xF4\xE5\xF3\xF4.";
520        let characters = "\u{0391}\u{03C5}\u{03C4}\u{03CC} \
521                          \u{03B5}\u{03AF}\u{03BD}\u{03B1}\u{03B9} \u{03AD}\u{03BD}\u{03B1} \
522                          \u{03C4}\u{03B5}\u{03C3}\u{03C4}. \u{0391}\u{03C5}\u{03C4}\u{03CC} \
523                          \u{03B5}\u{03AF}\u{03BD}\u{03B1}\u{03B9} \u{03AD}\u{03BD}\u{03B1} \
524                          \u{03C4}\u{03B5}\u{03C3}\u{03C4}.";
525        decode(WINDOWS_1253, bytes, characters);
526        encode(WINDOWS_1253, characters, bytes);
527    }
528
529    #[test]
530    fn test_decode_malformed() {
531        decode(
532            WINDOWS_1253,
533            b"\xC1\xF5\xD2\xF4\xFC",
534            "\u{0391}\u{03C5}\u{FFFD}\u{03C4}\u{03CC}",
535        );
536    }
537
538    #[test]
539    fn test_encode_unmappables() {
540        encode(
541            WINDOWS_1253,
542            "\u{0391}\u{03C5}\u{2603}\u{03C4}\u{03CC}",
543            b"\xC1\xF5&#9731;\xF4\xFC",
544        );
545        encode(
546            WINDOWS_1253,
547            "\u{0391}\u{03C5}\u{1F4A9}\u{03C4}\u{03CC}",
548            b"\xC1\xF5&#128169;\xF4\xFC",
549        );
550    }
551
552    #[test]
553    fn test_encode_unpaired_surrogates() {
554        encode_from_utf16(
555            WINDOWS_1253,
556            &[0x0391u16, 0x03C5u16, 0xDCA9u16, 0x03C4u16, 0x03CCu16],
557            b"\xC1\xF5&#65533;\xF4\xFC",
558        );
559        encode_from_utf16(
560            WINDOWS_1253,
561            &[0x0391u16, 0x03C5u16, 0xD83Du16, 0x03C4u16, 0x03CCu16],
562            b"\xC1\xF5&#65533;\xF4\xFC",
563        );
564        encode_from_utf16(
565            WINDOWS_1253,
566            &[0x0391u16, 0x03C5u16, 0x03C4u16, 0x03CCu16, 0xD83Du16],
567            b"\xC1\xF5\xF4\xFC&#65533;",
568        );
569    }
570
571    pub const HIGH_BYTES: &'static [u8; 128] = &[
572        0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E,
573        0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D,
574        0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC,
575        0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB,
576        0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA,
577        0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9,
578        0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8,
579        0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
580        0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
581    ];
582
583    fn decode_single_byte(encoding: &'static Encoding, data: &'static [u16; 128]) {
584        let mut with_replacement = [0u16; 128];
585        let mut it = data.iter().enumerate();
586        loop {
587            match it.next() {
588                Some((i, code_point)) => {
589                    if *code_point == 0 {
590                        with_replacement[i] = 0xFFFD;
591                    } else {
592                        with_replacement[i] = *code_point;
593                    }
594                }
595                None => {
596                    break;
597                }
598            }
599        }
600
601        decode_to_utf16(encoding, HIGH_BYTES, &with_replacement[..]);
602    }
603
604    fn encode_single_byte(encoding: &'static Encoding, data: &'static [u16; 128]) {
605        let mut with_zeros = [0u8; 128];
606        let mut it = data.iter().enumerate();
607        loop {
608            match it.next() {
609                Some((i, code_point)) => {
610                    if *code_point == 0 {
611                        with_zeros[i] = 0;
612                    } else {
613                        with_zeros[i] = HIGH_BYTES[i];
614                    }
615                }
616                None => {
617                    break;
618                }
619            }
620        }
621
622        encode_from_utf16(encoding, data, &with_zeros[..]);
623    }
624
625    #[test]
626    fn test_single_byte_from_two_low_surrogates() {
627        let expectation = b"&#65533;&#65533;";
628        let mut output = [0u8; 40];
629        let mut encoder = WINDOWS_1253.new_encoder();
630        let (result, read, written, had_errors) =
631            encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true);
632        assert_eq!(result, CoderResult::InputEmpty);
633        assert_eq!(read, 2);
634        assert_eq!(written, expectation.len());
635        assert!(had_errors);
636        assert_eq!(&output[..written], expectation);
637    }
638
639    // These tests are so self-referential that they are pretty useless.
640
641    // BEGIN GENERATED CODE. PLEASE DO NOT EDIT.
642    // Instead, please regenerate using generate-encoding-data.py
643
644    #[test]
645    fn test_single_byte_decode() {
646        decode_single_byte(IBM866, &data::SINGLE_BYTE_DATA.ibm866);
647        decode_single_byte(ISO_8859_10, &data::SINGLE_BYTE_DATA.iso_8859_10);
648        if cfg!(miri) {
649            // Miri is too slow
650            return;
651        }
652        decode_single_byte(ISO_8859_13, &data::SINGLE_BYTE_DATA.iso_8859_13);
653        decode_single_byte(ISO_8859_14, &data::SINGLE_BYTE_DATA.iso_8859_14);
654        decode_single_byte(ISO_8859_15, &data::SINGLE_BYTE_DATA.iso_8859_15);
655        decode_single_byte(ISO_8859_16, &data::SINGLE_BYTE_DATA.iso_8859_16);
656        decode_single_byte(ISO_8859_2, &data::SINGLE_BYTE_DATA.iso_8859_2);
657        decode_single_byte(ISO_8859_3, &data::SINGLE_BYTE_DATA.iso_8859_3);
658        decode_single_byte(ISO_8859_4, &data::SINGLE_BYTE_DATA.iso_8859_4);
659        decode_single_byte(ISO_8859_5, &data::SINGLE_BYTE_DATA.iso_8859_5);
660        decode_single_byte(ISO_8859_6, &data::SINGLE_BYTE_DATA.iso_8859_6);
661        decode_single_byte(ISO_8859_7, &data::SINGLE_BYTE_DATA.iso_8859_7);
662        decode_single_byte(ISO_8859_8, &data::SINGLE_BYTE_DATA.iso_8859_8);
663        decode_single_byte(KOI8_R, &data::SINGLE_BYTE_DATA.koi8_r);
664        decode_single_byte(KOI8_U, &data::SINGLE_BYTE_DATA.koi8_u);
665        decode_single_byte(MACINTOSH, &data::SINGLE_BYTE_DATA.macintosh);
666        decode_single_byte(WINDOWS_1250, &data::SINGLE_BYTE_DATA.windows_1250);
667        decode_single_byte(WINDOWS_1251, &data::SINGLE_BYTE_DATA.windows_1251);
668        decode_single_byte(WINDOWS_1252, &data::SINGLE_BYTE_DATA.windows_1252);
669        decode_single_byte(WINDOWS_1253, &data::SINGLE_BYTE_DATA.windows_1253);
670        decode_single_byte(WINDOWS_1254, &data::SINGLE_BYTE_DATA.windows_1254);
671        decode_single_byte(WINDOWS_1255, &data::SINGLE_BYTE_DATA.windows_1255);
672        decode_single_byte(WINDOWS_1256, &data::SINGLE_BYTE_DATA.windows_1256);
673        decode_single_byte(WINDOWS_1257, &data::SINGLE_BYTE_DATA.windows_1257);
674        decode_single_byte(WINDOWS_1258, &data::SINGLE_BYTE_DATA.windows_1258);
675        decode_single_byte(WINDOWS_874, &data::SINGLE_BYTE_DATA.windows_874);
676        decode_single_byte(X_MAC_CYRILLIC, &data::SINGLE_BYTE_DATA.x_mac_cyrillic);
677    }
678
679    #[test]
680    fn test_single_byte_encode() {
681        encode_single_byte(IBM866, &data::SINGLE_BYTE_DATA.ibm866);
682        encode_single_byte(ISO_8859_10, &data::SINGLE_BYTE_DATA.iso_8859_10);
683        if cfg!(miri) {
684            // Miri is too slow
685            return;
686        }
687        encode_single_byte(ISO_8859_13, &data::SINGLE_BYTE_DATA.iso_8859_13);
688        encode_single_byte(ISO_8859_14, &data::SINGLE_BYTE_DATA.iso_8859_14);
689        encode_single_byte(ISO_8859_15, &data::SINGLE_BYTE_DATA.iso_8859_15);
690        encode_single_byte(ISO_8859_16, &data::SINGLE_BYTE_DATA.iso_8859_16);
691        encode_single_byte(ISO_8859_2, &data::SINGLE_BYTE_DATA.iso_8859_2);
692        encode_single_byte(ISO_8859_3, &data::SINGLE_BYTE_DATA.iso_8859_3);
693        encode_single_byte(ISO_8859_4, &data::SINGLE_BYTE_DATA.iso_8859_4);
694        encode_single_byte(ISO_8859_5, &data::SINGLE_BYTE_DATA.iso_8859_5);
695        encode_single_byte(ISO_8859_6, &data::SINGLE_BYTE_DATA.iso_8859_6);
696        encode_single_byte(ISO_8859_7, &data::SINGLE_BYTE_DATA.iso_8859_7);
697        encode_single_byte(ISO_8859_8, &data::SINGLE_BYTE_DATA.iso_8859_8);
698        encode_single_byte(KOI8_R, &data::SINGLE_BYTE_DATA.koi8_r);
699        encode_single_byte(KOI8_U, &data::SINGLE_BYTE_DATA.koi8_u);
700        encode_single_byte(MACINTOSH, &data::SINGLE_BYTE_DATA.macintosh);
701        encode_single_byte(WINDOWS_1250, &data::SINGLE_BYTE_DATA.windows_1250);
702        encode_single_byte(WINDOWS_1251, &data::SINGLE_BYTE_DATA.windows_1251);
703        encode_single_byte(WINDOWS_1252, &data::SINGLE_BYTE_DATA.windows_1252);
704        encode_single_byte(WINDOWS_1253, &data::SINGLE_BYTE_DATA.windows_1253);
705        encode_single_byte(WINDOWS_1254, &data::SINGLE_BYTE_DATA.windows_1254);
706        encode_single_byte(WINDOWS_1255, &data::SINGLE_BYTE_DATA.windows_1255);
707        encode_single_byte(WINDOWS_1256, &data::SINGLE_BYTE_DATA.windows_1256);
708        encode_single_byte(WINDOWS_1257, &data::SINGLE_BYTE_DATA.windows_1257);
709        encode_single_byte(WINDOWS_1258, &data::SINGLE_BYTE_DATA.windows_1258);
710        encode_single_byte(WINDOWS_874, &data::SINGLE_BYTE_DATA.windows_874);
711        encode_single_byte(X_MAC_CYRILLIC, &data::SINGLE_BYTE_DATA.x_mac_cyrillic);
712    }
713    // END GENERATED CODE
714}