1use std::convert::Into;
8use std::marker::PhantomData;
9use std::default::Default;
10use util::StrCharIndex;
11use index_simpchinese as index;
12use types::*;
13
14#[derive(Clone, Copy)]
19pub struct GBK;
20
21#[derive(Clone, Copy)]
26pub struct GB18030;
27
28#[doc(hidden)] pub trait GBType: Clone + 'static {
31 fn name() -> &'static str;
32 fn whatwg_name() -> Option<&'static str>;
33 fn initial_gbk_flag() -> bool;
34}
35
36impl GBType for GBK {
37 fn name() -> &'static str { "gbk" }
38 fn whatwg_name() -> Option<&'static str> { Some("gbk") }
39 fn initial_gbk_flag() -> bool { true }
40}
41
42impl GBType for GB18030 {
43 fn name() -> &'static str { "gb18030" }
44 fn whatwg_name() -> Option<&'static str> { Some("gb18030") }
45 fn initial_gbk_flag() -> bool { false }
46}
47
48#[derive(Clone, Copy)]
77pub struct GBEncoding<T> {
78 _marker: PhantomData<T>
79}
80
81pub type GBKEncoding = GBEncoding<GBK>;
83pub type GB18030Encoding = GBEncoding<GB18030>;
85
86pub const GBK_ENCODING: GBKEncoding = GBEncoding { _marker: PhantomData };
88pub const GB18030_ENCODING: GB18030Encoding = GBEncoding { _marker: PhantomData };
90
91impl<T: GBType> Encoding for GBEncoding<T> {
92 fn name(&self) -> &'static str { <T as GBType>::name() }
93 fn whatwg_name(&self) -> Option<&'static str> { <T as GBType>::whatwg_name() }
94 fn raw_encoder(&self) -> Box<RawEncoder> { GBEncoder::<T>::new() }
95 fn raw_decoder(&self) -> Box<RawDecoder> { GB18030Decoder::new() }
96}
97
98#[derive(Clone, Copy)]
107pub struct GBEncoder<T> {
108 _marker: PhantomData<T>
109}
110
111impl<T: GBType> GBEncoder<T> {
112 pub fn new() -> Box<RawEncoder> {
113 Box::new(GBEncoder::<T> { _marker: PhantomData })
114 }
115}
116
117impl<T: GBType> RawEncoder for GBEncoder<T> {
118 fn from_self(&self) -> Box<RawEncoder> { GBEncoder::<T>::new() }
119 fn is_ascii_compatible(&self) -> bool { true }
120
121 fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>) {
122 output.writer_hint(input.len());
123
124 let gbk_flag = <T as GBType>::initial_gbk_flag();
125 for ((i, j), ch) in input.index_iter() {
126 if ch < '\u{80}' {
127 output.write_byte(ch as u8);
128 } else if gbk_flag && ch == '\u{20AC}' {
129 output.write_byte('\u{80}' as u8)
130 } else {
131 let ptr = index::gb18030::backward(ch as u32);
132 if ptr == 0xffff {
133 if gbk_flag {
134 return (i, Some(CodecError {
135 upto: j as isize,
136 cause: "gbk doesn't support gb18030 extensions".into()
137 }));
138 }
139 let ptr = index::gb18030_ranges::backward(ch as u32);
140 assert!(ptr != 0xffffffff);
141 let (ptr, byte4) = (ptr / 10, ptr % 10);
142 let (ptr, byte3) = (ptr / 126, ptr % 126);
143 let (byte1, byte2) = (ptr / 10, ptr % 10);
144 output.write_byte((byte1 + 0x81) as u8);
145 output.write_byte((byte2 + 0x30) as u8);
146 output.write_byte((byte3 + 0x81) as u8);
147 output.write_byte((byte4 + 0x30) as u8);
148 } else {
149 let lead = ptr / 190 + 0x81;
150 let trail = ptr % 190;
151 let trailoffset = if trail < 0x3f {0x40} else {0x41};
152 output.write_byte(lead as u8);
153 output.write_byte((trail + trailoffset) as u8);
154 }
155 }
156 }
157 (input.len(), None)
158 }
159
160 fn raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError> {
161 None
162 }
163}
164
165#[derive(Clone, Copy)]
167struct GB18030Decoder {
168 st: gb18030::State,
169}
170
171impl GB18030Decoder {
172 pub fn new() -> Box<RawDecoder> {
173 Box::new(GB18030Decoder { st: Default::default() })
174 }
175}
176
177impl RawDecoder for GB18030Decoder {
178 fn from_self(&self) -> Box<RawDecoder> { GB18030Decoder::new() }
179 fn is_ascii_compatible(&self) -> bool { true }
180
181 fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
182 let (st, processed, err) = gb18030::raw_feed(self.st, input, output, &());
183 self.st = st;
184 (processed, err)
185 }
186
187 fn raw_finish(&mut self, output: &mut StringWriter) -> Option<CodecError> {
188 let (st, err) = gb18030::raw_finish(self.st, output, &());
189 self.st = st;
190 err
191 }
192}
193
194stateful_decoder! {
195 module gb18030;
196
197 internal pub fn map_two_bytes(lead: u8, trail: u8) -> u32 {
198 use index_simpchinese as index;
199
200 let lead = lead as u16;
201 let trail = trail as u16;
202 let index = match (lead, trail) {
203 (0x81...0xfe, 0x40...0x7e) | (0x81...0xfe, 0x80...0xfe) => {
204 let trailoffset = if trail < 0x7f {0x40} else {0x41};
205 (lead - 0x81) * 190 + trail - trailoffset
206 }
207 _ => 0xffff,
208 };
209 index::gb18030::forward(index)
210 }
211
212 internal pub fn map_four_bytes(b1: u8, b2: u8, b3: u8, b4: u8) -> u32 {
213 use index_simpchinese as index;
214
215 let index = (b1 as u32 - 0x81) * 12600 + (b2 as u32 - 0x30) * 1260 +
217 (b3 as u32 - 0x81) * 10 + (b4 as u32 - 0x30);
218 index::gb18030_ranges::forward(index)
219 }
220
221initial:
222 state S0(ctx: Context) {
224 case b @ 0x00...0x7f => ctx.emit(b as u32);
225 case 0x80 => ctx.emit(0x20ac);
226 case b @ 0x81...0xfe => S1(ctx, b);
227 case _ => ctx.err("invalid sequence");
228 }
229
230transient:
231 state S1(ctx: Context, first: u8) {
233 case b @ 0x30...0x39 => S2(ctx, first, b);
234 case b => match map_two_bytes(first, b) {
235 0xffff => ctx.backup_and_err(1, "invalid sequence"), ch => ctx.emit(ch)
237 };
238 }
239
240 state S2(ctx: Context, first: u8, second: u8) {
242 case b @ 0x81...0xfe => S3(ctx, first, second, b);
243 case _ => ctx.backup_and_err(2, "invalid sequence");
244 }
245
246 state S3(ctx: Context, first: u8, second: u8, third: u8) {
248 case b @ 0x30...0x39 => match map_four_bytes(first, second, third, b) {
249 0xffffffff => ctx.backup_and_err(3, "invalid sequence"), ch => ctx.emit(ch)
251 };
252 case _ => ctx.backup_and_err(3, "invalid sequence");
253 }
254}
255
256#[cfg(test)]
257mod gb18030_tests {
258 extern crate test;
259 use super::GB18030_ENCODING;
260 use testutils;
261 use types::*;
262
263 #[test]
264 fn test_encoder() {
265 let mut e = GB18030_ENCODING.raw_encoder();
266 assert_feed_ok!(e, "A", "", [0x41]);
267 assert_feed_ok!(e, "BC", "", [0x42, 0x43]);
268 assert_feed_ok!(e, "", "", []);
269 assert_feed_ok!(e, "\u{4e2d}\u{534e}\u{4eba}\u{6c11}\u{5171}\u{548c}\u{56fd}", "",
270 [0xd6, 0xd0, 0xbb, 0xaa, 0xc8, 0xcb, 0xc3, 0xf1,
271 0xb9, 0xb2, 0xba, 0xcd, 0xb9, 0xfa]);
272 assert_feed_ok!(e, "1\u{20ac}/m", "", [0x31, 0xa2, 0xe3, 0x2f, 0x6d]);
273 assert_feed_ok!(e, "\u{ff21}\u{ff22}\u{ff23}", "", [0xa3, 0xc1, 0xa3, 0xc2, 0xa3, 0xc3]);
274 assert_feed_ok!(e, "\u{80}", "", [0x81, 0x30, 0x81, 0x30]);
275 assert_feed_ok!(e, "\u{81}", "", [0x81, 0x30, 0x81, 0x31]);
276 assert_feed_ok!(e, "\u{a3}", "", [0x81, 0x30, 0x84, 0x35]);
277 assert_feed_ok!(e, "\u{a4}", "", [0xa1, 0xe8]);
278 assert_feed_ok!(e, "\u{a5}", "", [0x81, 0x30, 0x84, 0x36]);
279 assert_feed_ok!(e, "\u{10ffff}", "", [0xe3, 0x32, 0x9a, 0x35]);
280 assert_feed_ok!(e, "\u{2a6a5}\u{3007}", "", [0x98, 0x35, 0xee, 0x37, 0xa9, 0x96]);
281 assert_finish_ok!(e, []);
282 }
283
284 #[test]
285 fn test_decoder_valid() {
286 let mut d = GB18030_ENCODING.raw_decoder();
287 assert_feed_ok!(d, [0x41], [], "A");
288 assert_feed_ok!(d, [0x42, 0x43], [], "BC");
289 assert_feed_ok!(d, [], [], "");
290 assert_feed_ok!(d, [0xd6, 0xd0, 0xbb, 0xaa, 0xc8, 0xcb, 0xc3, 0xf1,
291 0xb9, 0xb2, 0xba, 0xcd, 0xb9, 0xfa], [],
292 "\u{4e2d}\u{534e}\u{4eba}\u{6c11}\u{5171}\u{548c}\u{56fd}");
293 assert_feed_ok!(d, [0x31, 0x80, 0x2f, 0x6d], [], "1\u{20ac}/m");
294 assert_feed_ok!(d, [0xa3, 0xc1, 0xa3, 0xc2, 0xa3, 0xc3], [], "\u{ff21}\u{ff22}\u{ff23}");
295 assert_feed_ok!(d, [0x81, 0x30, 0x81, 0x30], [], "\u{80}");
296 assert_feed_ok!(d, [0x81, 0x30, 0x81, 0x31], [], "\u{81}");
297 assert_feed_ok!(d, [0x81, 0x30, 0x84, 0x35], [], "\u{a3}");
298 assert_feed_ok!(d, [0xa1, 0xe8], [], "\u{a4}" );
299 assert_feed_ok!(d, [0x81, 0x30, 0x84, 0x36], [], "\u{a5}");
300 assert_feed_ok!(d, [0xe3, 0x32, 0x9a, 0x35], [], "\u{10ffff}");
301 assert_feed_ok!(d, [0x98, 0x35, 0xee, 0x37, 0xa9, 0x96], [], "\u{2a6a5}\u{3007}");
302 assert_finish_ok!(d, "");
303 }
304
305 #[test]
306 fn test_decoder_valid_partial() {
307 let mut d = GB18030_ENCODING.raw_decoder();
308 assert_feed_ok!(d, [], [0xa1], "");
309 assert_feed_ok!(d, [0xa1], [], "\u{3000}");
310 assert_feed_ok!(d, [], [0x81], "");
311 assert_feed_ok!(d, [], [0x30], "");
312 assert_feed_ok!(d, [], [0x81], "");
313 assert_feed_ok!(d, [0x30], [], "\u{80}");
314 assert_feed_ok!(d, [], [0x81], "");
315 assert_feed_ok!(d, [], [0x30], "");
316 assert_feed_ok!(d, [0x81, 0x31], [], "\u{81}");
317 assert_feed_ok!(d, [], [0x81], "");
318 assert_feed_ok!(d, [0x30, 0x81, 0x32], [], "\u{82}");
319 assert_feed_ok!(d, [], [0x81], "");
320 assert_feed_ok!(d, [], [0x30, 0x81], "");
321 assert_feed_ok!(d, [0x33], [], "\u{83}");
322 assert_feed_ok!(d, [], [0x81, 0x30], "");
323 assert_feed_ok!(d, [], [0x81], "");
324 assert_feed_ok!(d, [0x34], [], "\u{84}");
325 assert_feed_ok!(d, [], [0x81, 0x30], "");
326 assert_feed_ok!(d, [0x81, 0x35], [], "\u{85}");
327 assert_feed_ok!(d, [], [0x81, 0x30, 0x81], "");
328 assert_feed_ok!(d, [0x36], [], "\u{86}");
329 assert_finish_ok!(d, "");
330 }
331
332 #[test]
333 fn test_decoder_invalid_partial() {
334 let mut d = GB18030_ENCODING.raw_decoder();
335 assert_feed_ok!(d, [], [0xa1], "");
336 assert_finish_err!(d, "");
337
338 let mut d = GB18030_ENCODING.raw_decoder();
339 assert_feed_ok!(d, [], [0x81], "");
340 assert_finish_err!(d, "");
341
342 let mut d = GB18030_ENCODING.raw_decoder();
343 assert_feed_ok!(d, [], [0x81, 0x30], "");
344 assert_finish_err!(d, "");
345
346 let mut d = GB18030_ENCODING.raw_decoder();
347 assert_feed_ok!(d, [], [0x81, 0x30, 0x81], "");
348 assert_finish_err!(d, "");
349 }
350
351 #[test]
352 fn test_decoder_invalid_out_of_range() {
353 let mut d = GB18030_ENCODING.raw_decoder();
354 assert_feed_err!(d, [], [0xff], [], "");
355 assert_feed_err!(d, [], [0x81], [0x00], "");
356 assert_feed_err!(d, [], [0x81], [0x7f], "");
357 assert_feed_err!(d, [], [0x81], [0xff], "");
358 assert_feed_err!(d, [], [0x81], [0x31, 0x00], "");
359 assert_feed_err!(d, [], [0x81], [0x31, 0x80], "");
360 assert_feed_err!(d, [], [0x81], [0x31, 0xff], "");
361 assert_feed_err!(d, [], [0x81], [0x31, 0x81, 0x00], "");
362 assert_feed_err!(d, [], [0x81], [0x31, 0x81, 0x2f], "");
363 assert_feed_err!(d, [], [0x81], [0x31, 0x81, 0x3a], "");
364 assert_feed_err!(d, [], [0x81], [0x31, 0x81, 0xff], "");
365 assert_finish_ok!(d, "");
366 }
367
368 #[test]
369 fn test_decoder_invalid_boundary() {
370 let mut d = GB18030_ENCODING.raw_decoder();
374 assert_feed_ok!(d, [], [0xe3], "");
375 assert_feed_err!(d, [], [], [0x32, 0x9a, 0x36], "");
376 assert_finish_ok!(d, "");
377
378 let mut d = GB18030_ENCODING.raw_decoder();
379 assert_feed_ok!(d, [], [0xe3], "");
380 assert_feed_ok!(d, [], [0x32, 0x9a], "");
381 assert_feed_err!(d, -2, [], [], [0x32, 0x9a, 0x36], "");
382 assert_finish_ok!(d, "");
383 }
384
385 #[test]
386 fn test_decoder_feed_after_finish() {
387 let mut d = GB18030_ENCODING.raw_decoder();
388 assert_feed_ok!(d, [0xd2, 0xbb], [0xd2], "\u{4e00}");
389 assert_finish_err!(d, "");
390 assert_feed_ok!(d, [0xd2, 0xbb], [], "\u{4e00}");
391 assert_finish_ok!(d, "");
392
393 let mut d = GB18030_ENCODING.raw_decoder();
394 assert_feed_ok!(d, [0x98, 0x35, 0xee, 0x37], [0x98, 0x35, 0xee], "\u{2a6a5}");
395 assert_finish_err!(d, "");
396 assert_feed_ok!(d, [0x98, 0x35, 0xee, 0x37], [0x98, 0x35], "\u{2a6a5}");
397 assert_finish_err!(d, "");
398 assert_feed_ok!(d, [0x98, 0x35, 0xee, 0x37], [0x98], "\u{2a6a5}");
399 assert_finish_err!(d, "");
400 assert_feed_ok!(d, [0x98, 0x35, 0xee, 0x37], [], "\u{2a6a5}");
401 assert_finish_ok!(d, "");
402 }
403
404 #[bench]
405 fn bench_encode_short_text(bencher: &mut test::Bencher) {
406 let s = testutils::SIMPLIFIED_CHINESE_TEXT;
407 bencher.bytes = s.len() as u64;
408 bencher.iter(|| test::black_box({
409 GB18030_ENCODING.encode(&s, EncoderTrap::Strict)
410 }))
411 }
412
413 #[bench]
414 fn bench_decode_short_text(bencher: &mut test::Bencher) {
415 let s = GB18030_ENCODING.encode(testutils::SIMPLIFIED_CHINESE_TEXT,
416 EncoderTrap::Strict).ok().unwrap();
417 bencher.bytes = s.len() as u64;
418 bencher.iter(|| test::black_box({
419 GB18030_ENCODING.decode(&s, DecoderTrap::Strict)
420 }))
421 }
422}
423
424#[cfg(test)]
425mod gbk_tests {
426 extern crate test;
427 use super::GBK_ENCODING;
428 use testutils;
429 use types::*;
430
431 #[test]
434 fn test_encoder() {
435 let mut e = GBK_ENCODING.raw_encoder();
436 assert_feed_ok!(e, "A", "", [0x41]);
437 assert_feed_ok!(e, "BC", "", [0x42, 0x43]);
438 assert_feed_ok!(e, "", "", []);
439 assert_feed_ok!(e, "\u{4e2d}\u{534e}\u{4eba}\u{6c11}\u{5171}\u{548c}\u{56fd}", "",
440 [0xd6, 0xd0, 0xbb, 0xaa, 0xc8, 0xcb, 0xc3, 0xf1,
441 0xb9, 0xb2, 0xba, 0xcd, 0xb9, 0xfa]);
442 assert_feed_ok!(e, "1\u{20ac}/m", "", [0x31, 0x80, 0x2f, 0x6d]);
443 assert_feed_ok!(e, "\u{ff21}\u{ff22}\u{ff23}", "", [0xa3, 0xc1, 0xa3, 0xc2, 0xa3, 0xc3]);
444 assert_feed_err!(e, "", "\u{80}", "", []);
445 assert_feed_err!(e, "", "\u{81}", "", []);
446 assert_feed_err!(e, "", "\u{a3}", "", []);
447 assert_feed_ok!(e, "\u{a4}", "", [0xa1, 0xe8]);
448 assert_feed_err!(e, "", "\u{a5}", "", []);
449 assert_feed_err!(e, "", "\u{10ffff}", "", []);
450 assert_feed_err!(e, "", "\u{2a6a5}", "\u{3007}", []);
451 assert_feed_err!(e, "\u{3007}", "\u{2a6a5}", "", [0xa9, 0x96]);
452 assert_finish_ok!(e, []);
453 }
454
455 #[bench]
456 fn bench_encode_short_text(bencher: &mut test::Bencher) {
457 let s = testutils::SIMPLIFIED_CHINESE_TEXT;
458 bencher.bytes = s.len() as u64;
459 bencher.iter(|| test::black_box({
460 GBK_ENCODING.encode(&s, EncoderTrap::Strict)
461 }))
462 }
463}
464
465#[derive(Clone, Copy)]
475pub struct HZEncoding;
476
477impl Encoding for HZEncoding {
478 fn name(&self) -> &'static str { "hz" }
479 fn whatwg_name(&self) -> Option<&'static str> { None }
480 fn raw_encoder(&self) -> Box<RawEncoder> { HZEncoder::new() }
481 fn raw_decoder(&self) -> Box<RawDecoder> { HZDecoder::new() }
482}
483
484#[derive(Clone, Copy)]
486pub struct HZEncoder {
487 escaped: bool,
488}
489
490impl HZEncoder {
491 pub fn new() -> Box<RawEncoder> { Box::new(HZEncoder { escaped: false }) }
492}
493
494impl RawEncoder for HZEncoder {
495 fn from_self(&self) -> Box<RawEncoder> { HZEncoder::new() }
496 fn is_ascii_compatible(&self) -> bool { false }
497
498 fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>) {
499 output.writer_hint(input.len());
500
501 let mut escaped = self.escaped;
502 macro_rules! ensure_escaped(
503 () => (if !escaped { output.write_bytes(b"~{"); escaped = true; })
504 );
505 macro_rules! ensure_unescaped(
506 () => (if escaped { output.write_bytes(b"~}"); escaped = false; })
507 );
508
509 for ((i,j), ch) in input.index_iter() {
510 if ch < '\u{80}' {
511 ensure_unescaped!();
512 output.write_byte(ch as u8);
513 if ch == '~' { output.write_byte('~' as u8); }
514 } else {
515 let ptr = index::gb18030::backward(ch as u32);
516 if ptr == 0xffff {
517 self.escaped = escaped; return (i, Some(CodecError {
519 upto: j as isize, cause: "unrepresentable character".into()
520 }));
521 } else {
522 let lead = ptr / 190;
523 let trail = ptr % 190;
524 if lead < 0x21 - 1 || trail < 0x21 + 0x3f { self.escaped = escaped; return (i, Some(CodecError {
527 upto: j as isize, cause: "unrepresentable character".into()
528 }));
529 } else {
530 ensure_escaped!();
531 output.write_byte((lead + 1) as u8);
532 output.write_byte((trail - 0x3f) as u8);
533 }
534 }
535 }
536 }
537
538 self.escaped = escaped;
539 (input.len(), None)
540 }
541
542 fn raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError> {
543 None
544 }
545}
546
547#[derive(Clone, Copy)]
549struct HZDecoder {
550 st: hz::State,
551}
552
553impl HZDecoder {
554 pub fn new() -> Box<RawDecoder> {
555 Box::new(HZDecoder { st: Default::default() })
556 }
557}
558
559impl RawDecoder for HZDecoder {
560 fn from_self(&self) -> Box<RawDecoder> { HZDecoder::new() }
561 fn is_ascii_compatible(&self) -> bool { true }
562
563 fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
564 let (st, processed, err) = hz::raw_feed(self.st, input, output, &());
565 self.st = st;
566 (processed, err)
567 }
568
569 fn raw_finish(&mut self, output: &mut StringWriter) -> Option<CodecError> {
570 let (st, err) = hz::raw_finish(self.st, output, &());
571 self.st = st;
572 err
573 }
574}
575
576stateful_decoder! {
577 module hz;
578
579 internal pub fn map_two_bytes(lead: u8, trail: u8) -> u32 {
580 use index_simpchinese as index;
581
582 let lead = lead as u16;
583 let trail = trail as u16;
584 let index = match (lead, trail) {
585 (0x20...0x7f, 0x21...0x7e) => (lead - 1) * 190 + (trail + 0x3f),
586 _ => 0xffff,
587 };
588 index::gb18030::forward(index)
589 }
590
591initial:
592 state A0(ctx: Context) {
594 case 0x7e => A1(ctx);
595 case b @ 0x00...0x7f => ctx.emit(b as u32);
596 case _ => ctx.err("invalid sequence");
597 final => ctx.reset();
598 }
599
600checkpoint:
601 state B0(ctx: Context) {
603 case 0x7e => B1(ctx);
604 case b @ 0x20...0x7f => B2(ctx, b);
605 case 0x0a => ctx.err("invalid sequence"); case _ => ctx.err("invalid sequence"), B0(ctx);
607 final => ctx.reset();
608 }
609
610transient:
611 state A1(ctx: Context) {
613 case 0x7b => B0(ctx);
614 case 0x7d => A0(ctx);
615 case 0x7e => ctx.emit(0x7e), A0(ctx);
616 case 0x0a => A0(ctx);
617 case _ => ctx.backup_and_err(1, "invalid sequence");
618 final => ctx.err("incomplete sequence");
619 }
620
621 state B1(ctx: Context) {
623 case 0x7b => B0(ctx);
624 case 0x7d => A0(ctx);
625 case 0x7e => ctx.emit(0x7e), B0(ctx);
626 case 0x0a => A0(ctx);
627 case _ => ctx.backup_and_err(1, "invalid sequence"), B0(ctx);
628 final => ctx.err("incomplete sequence");
629 }
630
631 state B2(ctx: Context, lead: u8) {
633 case 0x0a => ctx.err("invalid sequence"); case b =>
635 match map_two_bytes(lead, b) {
636 0xffff => ctx.err("invalid sequence"),
637 ch => ctx.emit(ch)
638 },
639 B0(ctx);
640 final => ctx.err("incomplete sequence");
641 }
642}
643
644#[cfg(test)]
645mod hz_tests {
646 extern crate test;
647 use super::HZEncoding;
648 use testutils;
649 use types::*;
650
651 #[test]
652 fn test_encoder_valid() {
653 let mut e = HZEncoding.raw_encoder();
654 assert_feed_ok!(e, "A", "", *b"A");
655 assert_feed_ok!(e, "BC", "", *b"BC");
656 assert_feed_ok!(e, "", "", *b"");
657 assert_feed_ok!(e, "\u{4e2d}\u{534e}\u{4eba}\u{6c11}\u{5171}\u{548c}\u{56fd}", "",
658 *b"~{VP;*HKCq92:M9z");
659 assert_feed_ok!(e, "\u{ff21}\u{ff22}\u{ff23}", "", *b"#A#B#C");
660 assert_feed_ok!(e, "1\u{20ac}/m", "", *b"~}1~{\"c~}/m");
661 assert_feed_ok!(e, "~<\u{a4}~\u{0a4}>~", "", *b"~~<~{!h~}~~~{!h~}>~~");
662 assert_finish_ok!(e, []);
663 }
664
665 #[test]
666 fn test_encoder_invalid() {
667 let mut e = HZEncoding.raw_encoder();
668 assert_feed_err!(e, "", "\u{ffff}", "", []);
669 assert_feed_err!(e, "?", "\u{ffff}", "!", [0x3f]);
670 assert_feed_err!(e, "", "\u{3007}", "", []);
672 assert_finish_ok!(e, []);
673 }
674
675 #[test]
676 fn test_decoder_valid() {
677 let mut d = HZEncoding.raw_decoder();
678 assert_feed_ok!(d, *b"A", *b"", "A");
679 assert_feed_ok!(d, *b"BC", *b"", "BC");
680 assert_feed_ok!(d, *b"D~~E", *b"~", "D~E");
681 assert_feed_ok!(d, *b"~F~\nG", *b"~", "~FG");
682 assert_feed_ok!(d, *b"", *b"", "");
683 assert_feed_ok!(d, *b"\nH", *b"~", "H");
684 assert_feed_ok!(d, *b"{VP~}~{;*~{HKCq92:M9z", *b"",
685 "\u{4e2d}\u{534e}\u{4eba}\u{6c11}\u{5171}\u{548c}\u{56fd}");
686 assert_feed_ok!(d, *b"", *b"#", "");
687 assert_feed_ok!(d, *b"A", *b"~", "\u{ff21}");
688 assert_feed_ok!(d, *b"~#B~~#C", *b"~", "~\u{ff22}~\u{ff23}");
689 assert_feed_ok!(d, *b"", *b"", "");
690 assert_feed_ok!(d, *b"\n#D~{#E~\n#F~{#G", *b"~", "#D\u{ff25}#F\u{ff27}");
691 assert_feed_ok!(d, *b"}X~}YZ", *b"", "XYZ");
692 assert_finish_ok!(d, "");
693 }
694
695 #[test]
696 fn test_decoder_invalid_out_or_range() {
697 let mut d = HZEncoding.raw_decoder();
698 assert_feed_ok!(d, *b"~{", *b"", "");
699 assert_feed_err!(d, *b"", *b"\x20\x20", *b"", "");
700 assert_feed_err!(d, *b"", *b"\x20\x7f", *b"", ""); assert_feed_err!(d, *b"", *b"\x21\x7f", *b"", "");
702 assert_feed_err!(d, *b"", *b"\x7f\x20", *b"", "");
703 assert_feed_err!(d, *b"", *b"\x7f\x21", *b"", "");
704 assert_feed_err!(d, *b"", *b"\x7f\x7f", *b"", "");
705 assert_finish_ok!(d, "");
706 }
707
708 #[test]
709 fn test_decoder_invalid_carriage_return() {
710 let mut d = HZEncoding.raw_decoder();
712 assert_feed_ok!(d, *b"~{#A", *b"", "\u{ff21}");
713 assert_feed_err!(d, *b"", *b"\n", *b"", "");
714 assert_feed_ok!(d, *b"#B~{#C", *b"", "#B\u{ff23}");
715 assert_feed_err!(d, *b"", *b"#\n", *b"", "");
716 assert_feed_ok!(d, *b"#D", *b"", "#D");
717 assert_finish_ok!(d, "");
718 }
719
720 #[test]
721 fn test_decoder_invalid_partial() {
722 let mut d = HZEncoding.raw_decoder();
723 assert_feed_ok!(d, *b"", *b"~", "");
724 assert_finish_err!(d, "");
725
726 let mut d = HZEncoding.raw_decoder();
727 assert_feed_ok!(d, *b"~{", *b"#", "");
728 assert_finish_err!(d, "");
729
730 let mut d = HZEncoding.raw_decoder();
731 assert_feed_ok!(d, *b"~{#A", *b"~", "\u{ff21}");
732 assert_finish_err!(d, "");
733 }
734
735 #[test]
736 fn test_decoder_invalid_escape() {
737 let mut d = HZEncoding.raw_decoder();
738 assert_feed_ok!(d, *b"#A", *b"", "#A");
739 assert_feed_err!(d, *b"", *b"~", *b"xy", "");
740 assert_feed_ok!(d, *b"#B", *b"", "#B");
741 assert_feed_ok!(d, *b"", *b"~", "");
742 assert_feed_err!(d, *b"", *b"", *b"xy", "");
743 assert_feed_ok!(d, *b"#C~{#D", *b"", "#C\u{ff24}");
744 assert_feed_err!(d, *b"", *b"~", *b"xy", "");
745 assert_feed_ok!(d, *b"#E", *b"", "\u{ff25}"); assert_feed_ok!(d, *b"", *b"~", "");
747 assert_feed_err!(d, *b"", *b"", *b"xy", "");
748 assert_feed_ok!(d, *b"#F~}#G", *b"", "\u{ff26}#G");
749 assert_finish_ok!(d, "");
750 }
751
752 #[test]
753 fn test_decoder_feed_after_finish() {
754 let mut d = HZEncoding.raw_decoder();
755 assert_feed_ok!(d, *b"R;~{R;", *b"R", "R;\u{4e00}");
756 assert_finish_err!(d, "");
757 assert_feed_ok!(d, *b"R;~{R;", *b"", "R;\u{4e00}");
758 assert_finish_ok!(d, "");
759 }
760
761 #[bench]
762 fn bench_encode_short_text(bencher: &mut test::Bencher) {
763 let s = testutils::SIMPLIFIED_CHINESE_TEXT;
764 bencher.bytes = s.len() as u64;
765 bencher.iter(|| test::black_box({
766 HZEncoding.encode(&s, EncoderTrap::Strict)
767 }))
768 }
769
770 #[bench]
771 fn bench_decode_short_text(bencher: &mut test::Bencher) {
772 let s = HZEncoding.encode(testutils::SIMPLIFIED_CHINESE_TEXT,
773 EncoderTrap::Strict).ok().unwrap();
774 bencher.bytes = s.len() as u64;
775 bencher.iter(|| test::black_box({
776 HZEncoding.decode(&s, DecoderTrap::Strict)
777 }))
778 }
779}
780