1use super::*;
11use crate::data::*;
12use crate::handles::*;
13use crate::variant::*;
14use super::in_inclusive_range;
16use super::in_inclusive_range16;
17
18pub struct ShiftJisDecoder {
19 lead: Option<u8>,
20}
21
22impl ShiftJisDecoder {
23 pub fn new() -> VariantDecoder {
24 VariantDecoder::ShiftJis(ShiftJisDecoder { lead: None })
25 }
26
27 pub fn in_neutral_state(&self) -> bool {
28 self.lead.is_none()
29 }
30
31 fn plus_one_if_lead(&self, byte_length: usize) -> Option<usize> {
32 byte_length.checked_add(match self.lead {
33 None => 0,
34 Some(_) => 1,
35 })
36 }
37
38 pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
39 self.plus_one_if_lead(byte_length)
40 }
41
42 pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
43 self.max_utf8_buffer_length(byte_length)
45 }
46
47 pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
48 checked_mul(3, self.plus_one_if_lead(byte_length))
49 }
50
51 ascii_compatible_two_byte_decoder_functions!(
52 {
53 let mut non_ascii_minus_offset =
60 non_ascii.wrapping_sub(0x81);
61 if non_ascii_minus_offset > (0x9F - 0x81) {
62 let non_ascii_minus_range_start = non_ascii.wrapping_sub(0xE0);
63 if non_ascii_minus_range_start > (0xFC - 0xE0) {
64 let non_ascii_minus_half_with_katakana_start = non_ascii.wrapping_sub(0xA1);
65 if non_ascii_minus_half_with_katakana_start > (0xDF - 0xA1) {
66 if non_ascii == 0x80 {
67 handle.write_mid_bmp(0x80);
68 continue 'outermost;
70 }
71 return (DecoderResult::Malformed(1, 0),
72 source.consumed(),
73 handle.written());
74 }
75 handle.write_upper_bmp(0xFF61 + u16::from(non_ascii_minus_half_with_katakana_start));
76 continue 'outermost;
78 }
79 non_ascii_minus_offset = non_ascii - 0xC1;
80 }
81 non_ascii_minus_offset
82 },
83 {
84 let trail_minus_hiragana = byte.wrapping_sub(0x9F);
94 if lead_minus_offset == 0x01 && trail_minus_hiragana < 0x53 {
95 handle.write_upper_bmp(0x3041 + u16::from(trail_minus_hiragana))
97 } else {
98 let mut trail_minus_offset =
99 byte.wrapping_sub(0x40);
100 if trail_minus_offset > (0x7E - 0x40) {
101 let trail_minus_range_start =
102 byte.wrapping_sub(0x80);
103 if trail_minus_range_start > (0xFC - 0x80) {
104 if byte < 0x80 {
105 return (DecoderResult::Malformed(1, 0),
106 unread_handle_trail.unread(),
107 handle.written());
108 }
109 return (DecoderResult::Malformed(2, 0),
110 unread_handle_trail.consumed(),
111 handle.written());
112 }
113 trail_minus_offset = byte - 0x41;
114 }
115 if lead_minus_offset == 0x02 &&
116 trail_minus_offset < 0x56 {
117 handle.write_upper_bmp(0x30A1 + u16::from(trail_minus_offset))
119 } else {
120 let pointer = lead_minus_offset as usize *
121 188usize +
122 trail_minus_offset as usize;
123 let level1_pointer = pointer.wrapping_sub(1410);
124 if level1_pointer < JIS0208_LEVEL1_KANJI.len() {
125 handle.write_upper_bmp(JIS0208_LEVEL1_KANJI[level1_pointer])
126 } else {
127 let level2_pointer = pointer.wrapping_sub(4418);
128 if level2_pointer <
129 JIS0208_LEVEL2_AND_ADDITIONAL_KANJI.len() {
130 handle.write_upper_bmp(JIS0208_LEVEL2_AND_ADDITIONAL_KANJI[level2_pointer])
131 } else {
132 let upper_ibm_pointer = pointer.wrapping_sub(10744);
133 if upper_ibm_pointer < IBM_KANJI.len() {
134 handle.write_upper_bmp(IBM_KANJI[upper_ibm_pointer])
135 } else {
136 let lower_ibm_pointer = pointer.wrapping_sub(8272);
137 if lower_ibm_pointer < IBM_KANJI.len() {
138 handle.write_upper_bmp(IBM_KANJI[lower_ibm_pointer])
139 } else if in_inclusive_range(pointer, 8836, 10715) {
140 handle.write_upper_bmp((0xE000 - 8836 + pointer) as u16)
141 } else if let Some(bmp) = jis0208_symbol_decode(pointer) {
142 handle.write_bmp_excl_ascii(bmp)
143 } else if let Some(bmp) = jis0208_range_decode(pointer) {
144 handle.write_bmp_excl_ascii(bmp)
145 } else {
146 if byte < 0x80 {
147 return (DecoderResult::Malformed(1, 0),
148 unread_handle_trail.unread(),
149 handle.written());
150 }
151 return (DecoderResult::Malformed(2, 0),
152 unread_handle_trail.consumed(),
153 handle.written());
154 }
155 }
156 }
157 }
158 }
159 }
160 },
161 self,
162 non_ascii,
163 byte,
164 lead_minus_offset,
165 unread_handle_trail,
166 source,
167 handle,
168 'outermost,
169 copy_ascii_from_check_space_bmp,
170 check_space_bmp,
171 false);
172}
173
174#[cfg(feature = "fast-kanji-encode")]
175#[inline(always)]
176fn encode_kanji(bmp: u16) -> Option<(u8, u8)> {
177 jis0208_kanji_shift_jis_encode(bmp)
178}
179
180#[cfg(not(feature = "fast-kanji-encode"))]
181#[inline(always)]
182fn encode_kanji(bmp: u16) -> Option<(u8, u8)> {
183 if let Some((lead, trail)) = jis0208_level1_kanji_shift_jis_encode(bmp) {
184 return Some((lead, trail));
185 }
186 let pointer = if 0x4EDD == bmp {
187 23
189 } else if let Some(pos) = jis0208_level2_and_additional_kanji_encode(bmp) {
190 4418 + pos
191 } else if let Some(pos) = position(&IBM_KANJI[..], bmp) {
192 10744 + pos
193 } else {
194 return None;
195 };
196 let lead = pointer / 188;
197 let lead_offset = if lead < 0x1F { 0x81usize } else { 0xC1usize };
198 let trail = pointer % 188;
199 let trail_offset = if trail < 0x3F { 0x40usize } else { 0x41usize };
200 Some(((lead + lead_offset) as u8, (trail + trail_offset) as u8))
201}
202
203pub struct ShiftJisEncoder;
204
205impl ShiftJisEncoder {
206 pub fn new(encoding: &'static Encoding) -> Encoder {
207 Encoder::new(encoding, VariantEncoder::ShiftJis(ShiftJisEncoder))
208 }
209
210 pub fn max_buffer_length_from_utf16_without_replacement(
211 &self,
212 u16_length: usize,
213 ) -> Option<usize> {
214 u16_length.checked_mul(2)
215 }
216
217 pub fn max_buffer_length_from_utf8_without_replacement(
218 &self,
219 byte_length: usize,
220 ) -> Option<usize> {
221 byte_length.checked_add(1)
222 }
223
224 ascii_compatible_bmp_encoder_functions!(
225 {
226 let bmp_minus_hiragana = bmp.wrapping_sub(0x3041);
228 if bmp_minus_hiragana < 0x53 {
229 handle.write_two(0x82, 0x9F + bmp_minus_hiragana as u8)
230 } else if in_inclusive_range16(bmp, 0x4E00, 0x9FA0) {
231 if let Some((lead, trail)) = encode_kanji(bmp) {
232 handle.write_two(lead, trail)
233 } else {
234 return (
235 EncoderResult::unmappable_from_bmp(bmp),
236 source.consumed(),
237 handle.written(),
238 );
239 }
240 } else {
241 let bmp_minus_katakana = bmp.wrapping_sub(0x30A1);
242 if bmp_minus_katakana < 0x56 {
243 let trail_offset = if bmp_minus_katakana < 0x3F {
244 0x40
245 } else {
246 0x41
247 };
248 handle.write_two(0x83, (trail_offset + bmp_minus_katakana) as u8)
249 } else {
250 let bmp_minus_space = bmp.wrapping_sub(0x3000);
251 if bmp_minus_space < 3 {
252 handle.write_two(0x81, 0x40 + bmp_minus_space as u8)
254 } else if bmp == 0xA5 {
255 handle.write_one(0x5Cu8)
256 } else if bmp == 0x80 {
257 handle.write_one(0x80u8)
258 } else if bmp == 0x203E {
259 handle.write_one(0x7Eu8)
260 } else if in_inclusive_range16(bmp, 0xFF61, 0xFF9F) {
261 handle.write_one((bmp - (0xFF61 - 0xA1)) as u8)
262 } else if bmp == 0x2212 {
263 handle.write_two(0x81u8, 0x7Cu8)
264 } else {
265 let bmp_minus_roman = bmp.wrapping_sub(0x2170);
266 let pointer = if bmp_minus_roman <= (0x2179 - 0x2170) {
267 10716 + bmp_minus_roman as usize
268 } else if let Some(pointer) = jis0208_range_encode(bmp) {
269 pointer
270 } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D)
271 || bmp == 0xF929
272 || bmp == 0xF9DC
273 {
274 let pos = position(&IBM_KANJI[..], bmp).unwrap();
276 10744 + pos
277 } else if let Some(pointer) = jis0208_symbol_encode(bmp) {
278 pointer
279 } else {
280 return (
281 EncoderResult::unmappable_from_bmp(bmp),
282 source.consumed(),
283 handle.written(),
284 );
285 };
286 let lead = pointer / 188;
287 let lead_offset = if lead < 0x1F { 0x81usize } else { 0xC1usize };
288 let trail = pointer % 188;
289 let trail_offset = if trail < 0x3F { 0x40usize } else { 0x41usize };
290 handle.write_two((lead + lead_offset) as u8, (trail + trail_offset) as u8)
291 }
292 }
293 }
294 },
295 bmp,
296 self,
297 source,
298 handle,
299 copy_ascii_to_check_space_two,
300 check_space_two,
301 false
302 );
303}
304
305#[cfg(test)]
309mod tests {
310 use super::super::testing::*;
311 use super::super::*;
312
313 fn decode_shift_jis(bytes: &[u8], expect: &str) {
314 decode(SHIFT_JIS, bytes, expect);
315 }
316
317 fn encode_shift_jis(string: &str, expect: &[u8]) {
318 encode(SHIFT_JIS, string, expect);
319 }
320
321 #[test]
322 fn test_shift_jis_decode() {
323 decode_shift_jis(b"", &"");
325
326 decode_shift_jis(b"\x61\x62", "\u{0061}\u{0062}");
328
329 decode_shift_jis(b"\xA1", "\u{FF61}");
331 decode_shift_jis(b"\xDF", "\u{FF9F}");
332 decode_shift_jis(b"\xA0", "\u{FFFD}");
333 decode_shift_jis(b"\xE0", "\u{FFFD}");
334 decode_shift_jis(b"\xA0+", "\u{FFFD}+");
335 decode_shift_jis(b"\xE0+", "\u{FFFD}+");
336
337 decode_shift_jis(b"\xF0\x40", "\u{E000}");
339 decode_shift_jis(b"\xF9\xFC", "\u{E757}");
340 decode_shift_jis(b"\xEF\xFC", "\u{FFFD}");
341 decode_shift_jis(b"\xFA\x40", "\u{2170}");
342
343 decode_shift_jis(b"\x81\x40", "\u{3000}");
345 decode_shift_jis(b"\x81\x3F", "\u{FFFD}?");
346 decode_shift_jis(b"\xEE\xFC", "\u{FF02}");
347 decode_shift_jis(b"\xEE\xFD", "\u{FFFD}");
348 decode_shift_jis(b"\xFA\x40", "\u{2170}");
349 decode_shift_jis(b"\xFA\x3F", "\u{FFFD}?");
350 decode_shift_jis(b"\xFC\x4B", "\u{9ED1}");
351 decode_shift_jis(b"\xFC\x4C", "\u{FFFD}L");
352 }
354
355 #[test]
356 fn test_shift_jis_encode() {
357 encode_shift_jis("", b"");
359
360 encode_shift_jis("\u{0061}\u{0062}", b"\x61\x62");
362
363 encode_shift_jis("\u{0080}", b"\x80");
365 encode_shift_jis("\u{00A5}", b"\x5C");
366 encode_shift_jis("\u{203E}", b"\x7E");
367 encode_shift_jis("\u{2212}", b"\x81\x7C");
368
369 encode_shift_jis("\u{FF61}", b"\xA1");
371 encode_shift_jis("\u{FF9F}", b"\xDF");
372
373 encode_shift_jis("\u{E000}", b"");
375 encode_shift_jis("\u{E757}", b"");
376
377 encode_shift_jis("\u{02D8}", b"˘");
379
380 encode_shift_jis("\u{3000}", b"\x81\x40");
382 encode_shift_jis("\u{FF02}", b"\xFA\x57");
383 encode_shift_jis("\u{2170}", b"\xFA\x40");
384 encode_shift_jis("\u{9ED1}", b"\xFC\x4B");
385 }
386
387 #[test]
388 #[cfg_attr(miri, ignore)] fn test_shift_jis_decode_all() {
390 let input = include_bytes!("test_data/shift_jis_in.txt");
391 let expectation = include_str!("test_data/shift_jis_in_ref.txt");
392 let (cow, had_errors) = SHIFT_JIS.decode_without_bom_handling(input);
393 assert!(had_errors, "Should have had errors.");
394 assert_eq!(&cow[..], expectation);
395 }
396
397 #[test]
398 #[cfg_attr(miri, ignore)] fn test_shift_jis_encode_all() {
400 let input = include_str!("test_data/shift_jis_out.txt");
401 let expectation = include_bytes!("test_data/shift_jis_out_ref.txt");
402 let (cow, encoding, had_errors) = SHIFT_JIS.encode(input);
403 assert!(!had_errors, "Should not have had errors.");
404 assert_eq!(encoding, SHIFT_JIS);
405 assert_eq!(&cow[..], &expectation[..]);
406 }
407
408 #[test]
409 fn test_shift_jis_half_width_katakana_length() {
410 let mut output = [0u8; 20];
411 let mut decoder = SHIFT_JIS.new_decoder();
412 {
413 let needed = decoder
414 .max_utf8_buffer_length_without_replacement(1)
415 .unwrap();
416 let (result, read, written) =
417 decoder.decode_to_utf8_without_replacement(b"\xA1", &mut output[..needed], true);
418 assert_eq!(result, DecoderResult::InputEmpty);
419 assert_eq!(read, 1);
420 assert_eq!(written, 3);
421 assert_eq!(output[0], 0xEF);
422 assert_eq!(output[1], 0xBD);
423 assert_eq!(output[2], 0xA1);
424 }
425 }
426}