encoding_rs/
x_user_defined.rs

1// Copyright Mozilla Foundation. See the COPYRIGHT
2// file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10use super::*;
11use crate::handles::*;
12use crate::variant::*;
13
14cfg_if! {
15    if #[cfg(feature = "simd-accel")] {
16        use simd_funcs::*;
17        use packed_simd::u16x8;
18
19        #[inline(always)]
20        fn shift_upper(unpacked: u16x8) -> u16x8 {
21            let highest_ascii = u16x8::splat(0x7F);
22            unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0))        }
23    } else {
24    }
25}
26
27pub struct UserDefinedDecoder;
28
29impl UserDefinedDecoder {
30    pub fn new() -> VariantDecoder {
31        VariantDecoder::UserDefined(UserDefinedDecoder)
32    }
33
34    pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
35        Some(byte_length)
36    }
37
38    pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
39        byte_length.checked_mul(3)
40    }
41
42    pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
43        byte_length.checked_mul(3)
44    }
45
46    decoder_function!(
47        {},
48        {},
49        {},
50        {
51            if b < 0x80 {
52                // ASCII run not optimized, because binary data expected
53                destination_handle.write_ascii(b);
54                continue;
55            }
56            destination_handle.write_upper_bmp(u16::from(b) + 0xF700);
57            continue;
58        },
59        self,
60        src_consumed,
61        dest,
62        source,
63        b,
64        destination_handle,
65        _unread_handle,
66        check_space_bmp,
67        decode_to_utf8_raw,
68        u8,
69        Utf8Destination
70    );
71
72    #[cfg(not(feature = "simd-accel"))]
73    pub fn decode_to_utf16_raw(
74        &mut self,
75        src: &[u8],
76        dst: &mut [u16],
77        _last: bool,
78    ) -> (DecoderResult, usize, usize) {
79        let (pending, length) = if dst.len() < src.len() {
80            (DecoderResult::OutputFull, dst.len())
81        } else {
82            (DecoderResult::InputEmpty, src.len())
83        };
84        let src_trim = &src[..length];
85        let dst_trim = &mut dst[..length];
86        src_trim
87            .iter()
88            .zip(dst_trim.iter_mut())
89            .for_each(|(from, to)| {
90                *to = {
91                    let unit = *from;
92                    if unit < 0x80 {
93                        u16::from(unit)
94                    } else {
95                        u16::from(unit) + 0xF700
96                    }
97                }
98            });
99        (pending, length, length)
100    }
101
102    #[cfg(feature = "simd-accel")]
103    pub fn decode_to_utf16_raw(
104        &mut self,
105        src: &[u8],
106        dst: &mut [u16],
107        _last: bool,
108    ) -> (DecoderResult, usize, usize) {
109        let (pending, length) = if dst.len() < src.len() {
110            (DecoderResult::OutputFull, dst.len())
111        } else {
112            (DecoderResult::InputEmpty, src.len())
113        };
114        // Not bothering with alignment
115        let tail_start = length & !0xF;
116        let simd_iterations = length >> 4;
117        let src_ptr = src.as_ptr();
118        let dst_ptr = dst.as_mut_ptr();
119        for i in 0..simd_iterations {
120            let input = unsafe { load16_unaligned(src_ptr.add(i * 16)) };
121            let (first, second) = simd_unpack(input);
122            unsafe {
123                store8_unaligned(dst_ptr.add(i * 16), shift_upper(first));
124                store8_unaligned(dst_ptr.add((i * 16) + 8), shift_upper(second));
125            }
126        }
127        let src_tail = &src[tail_start..length];
128        let dst_tail = &mut dst[tail_start..length];
129        src_tail
130            .iter()
131            .zip(dst_tail.iter_mut())
132            .for_each(|(from, to)| {
133                *to = {
134                    let unit = *from;
135                    if unit < 0x80 {
136                        u16::from(unit)
137                    } else {
138                        u16::from(unit) + 0xF700
139                    }
140                }
141            });
142        (pending, length, length)
143    }
144}
145
146pub struct UserDefinedEncoder;
147
148impl UserDefinedEncoder {
149    pub fn new(encoding: &'static Encoding) -> Encoder {
150        Encoder::new(encoding, VariantEncoder::UserDefined(UserDefinedEncoder))
151    }
152
153    pub fn max_buffer_length_from_utf16_without_replacement(
154        &self,
155        u16_length: usize,
156    ) -> Option<usize> {
157        Some(u16_length)
158    }
159
160    pub fn max_buffer_length_from_utf8_without_replacement(
161        &self,
162        byte_length: usize,
163    ) -> Option<usize> {
164        Some(byte_length)
165    }
166
167    encoder_functions!(
168        {},
169        {
170            if c <= '\u{7F}' {
171                // TODO optimize ASCII run
172                destination_handle.write_one(c as u8);
173                continue;
174            }
175            if c < '\u{F780}' || c > '\u{F7FF}' {
176                return (
177                    EncoderResult::Unmappable(c),
178                    unread_handle.consumed(),
179                    destination_handle.written(),
180                );
181            }
182            destination_handle.write_one((u32::from(c) - 0xF700) as u8);
183            continue;
184        },
185        self,
186        src_consumed,
187        source,
188        dest,
189        c,
190        destination_handle,
191        unread_handle,
192        check_space_one
193    );
194}
195
196// Any copyright to the test code below this comment is dedicated to the
197// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
198
199#[cfg(test)]
200mod tests {
201    use super::super::testing::*;
202    use super::super::*;
203
204    fn decode_x_user_defined(bytes: &[u8], expect: &str) {
205        decode(X_USER_DEFINED, bytes, expect);
206    }
207
208    fn encode_x_user_defined(string: &str, expect: &[u8]) {
209        encode(X_USER_DEFINED, string, expect);
210    }
211
212    #[test]
213    fn test_x_user_defined_decode() {
214        // Empty
215        decode_x_user_defined(b"", "");
216
217        // ASCII
218        decode_x_user_defined(b"\x61\x62", "\u{0061}\u{0062}");
219
220        decode_x_user_defined(b"\x80\xFF", "\u{F780}\u{F7FF}");
221        decode_x_user_defined(b"\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62", "\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}");
222    }
223
224    #[test]
225    fn test_x_user_defined_encode() {
226        // Empty
227        encode_x_user_defined("", b"");
228
229        // ASCII
230        encode_x_user_defined("\u{0061}\u{0062}", b"\x61\x62");
231
232        encode_x_user_defined("\u{F780}\u{F7FF}", b"\x80\xFF");
233        encode_x_user_defined("\u{F77F}\u{F800}", b"&#63359;&#63488;");
234    }
235
236    #[test]
237    fn test_x_user_defined_from_two_low_surrogates() {
238        let expectation = b"&#65533;&#65533;";
239        let mut output = [0u8; 40];
240        let mut encoder = X_USER_DEFINED.new_encoder();
241        let (result, read, written, had_errors) =
242            encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true);
243        assert_eq!(result, CoderResult::InputEmpty);
244        assert_eq!(read, 2);
245        assert_eq!(written, expectation.len());
246        assert!(had_errors);
247        assert_eq!(&output[..written], expectation);
248    }
249}