encoding/codec/
ascii.rs

1// This is a part of rust-encoding.
2// Copyright (c) 2013-2015, Kang Seonghoon.
3// See README.md and LICENSE.txt for details.
4
5//! 7-bit ASCII encoding.
6
7use std::mem;
8use std::convert::Into;
9use types::*;
10
11/**
12 * ASCII, also known as ISO/IEC 646:US.
13 *
14 * It is both a basis and a lowest common denominator of many other encodings
15 * including UTF-8, which Rust internally assumes.
16 */
17#[derive(Clone, Copy)]
18pub struct ASCIIEncoding;
19
20impl Encoding for ASCIIEncoding {
21    fn name(&self) -> &'static str { "ascii" }
22    fn raw_encoder(&self) -> Box<RawEncoder> { ASCIIEncoder::new() }
23    fn raw_decoder(&self) -> Box<RawDecoder> { ASCIIDecoder::new() }
24}
25
26/// An encoder for ASCII.
27#[derive(Clone, Copy)]
28pub struct ASCIIEncoder;
29
30impl ASCIIEncoder {
31    pub fn new() -> Box<RawEncoder> { Box::new(ASCIIEncoder) }
32}
33
34impl RawEncoder for ASCIIEncoder {
35    fn from_self(&self) -> Box<RawEncoder> { ASCIIEncoder::new() }
36    fn is_ascii_compatible(&self) -> bool { true }
37
38    fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>) {
39        output.writer_hint(input.len());
40
41        match input.as_bytes().iter().position(|&ch| ch >= 0x80) {
42            Some(first_error) => {
43                output.write_bytes(&input.as_bytes()[..first_error]);
44                let len = input[first_error..].chars().next().unwrap().len_utf8();
45                (first_error, Some(CodecError {
46                    upto: (first_error + len) as isize, cause: "unrepresentable character".into()
47                }))
48            }
49            None => {
50                output.write_bytes(input.as_bytes());
51                (input.len(), None)
52            }
53        }
54    }
55
56    fn raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError> {
57        None
58    }
59}
60
61/// A decoder for ASCII.
62#[derive(Clone, Copy)]
63pub struct ASCIIDecoder;
64
65impl ASCIIDecoder {
66    pub fn new() -> Box<RawDecoder> { Box::new(ASCIIDecoder) }
67}
68
69impl RawDecoder for ASCIIDecoder {
70    fn from_self(&self) -> Box<RawDecoder> { ASCIIDecoder::new() }
71    fn is_ascii_compatible(&self) -> bool { true }
72
73    fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
74        output.writer_hint(input.len());
75
76        fn write_ascii_bytes(output: &mut StringWriter, buf: &[u8]) {
77            output.write_str(unsafe {mem::transmute(buf)});
78        }
79
80        match input.iter().position(|&ch| ch >= 0x80) {
81            Some(first_error) => {
82                write_ascii_bytes(output, &input[..first_error]);
83                (first_error, Some(CodecError {
84                    upto: first_error as isize + 1, cause: "invalid sequence".into()
85                }))
86            }
87            None => {
88                write_ascii_bytes(output, input);
89                (input.len(), None)
90            }
91        }
92    }
93
94    fn raw_finish(&mut self, _output: &mut StringWriter) -> Option<CodecError> {
95        None
96    }
97}
98
99#[cfg(test)]
100mod tests {
101    extern crate test;
102    use super::ASCIIEncoding;
103    use testutils;
104    use types::*;
105
106    #[test]
107    fn test_encoder() {
108        let mut e = ASCIIEncoding.raw_encoder();
109        assert_feed_ok!(e, "A", "", [0x41]);
110        assert_feed_ok!(e, "BC", "", [0x42, 0x43]);
111        assert_feed_ok!(e, "", "", []);
112        assert_feed_err!(e, "", "\u{a0}", "", []);
113        assert_feed_err!(e, "X", "\u{a0}", "Z", [0x58]);
114        assert_finish_ok!(e, []);
115    }
116
117    #[test]
118    fn test_decoder() {
119        let mut d = ASCIIEncoding.raw_decoder();
120        assert_feed_ok!(d, [0x41], [], "A");
121        assert_feed_ok!(d, [0x42, 0x43], [], "BC");
122        assert_feed_ok!(d, [], [], "");
123        assert_feed_err!(d, [], [0xa0], [], "");
124        assert_feed_err!(d, [0x58], [0xa0], [0x5a], "X");
125        assert_finish_ok!(d, "");
126    }
127
128    #[bench]
129    fn bench_encode(bencher: &mut test::Bencher) {
130        let s = testutils::ASCII_TEXT;
131        bencher.bytes = s.len() as u64;
132        bencher.iter(|| test::black_box({
133            ASCIIEncoding.encode(s, EncoderTrap::Strict)
134        }))
135    }
136
137    #[bench]
138    fn bench_decode(bencher: &mut test::Bencher) {
139        let s = testutils::ASCII_TEXT.as_bytes();
140        bencher.bytes = s.len() as u64;
141        bencher.iter(|| test::black_box({
142            ASCIIEncoding.decode(s, DecoderTrap::Strict)
143        }))
144    }
145
146    #[bench]
147    fn bench_encode_replace(bencher: &mut test::Bencher) {
148        let s = testutils::KOREAN_TEXT;
149        bencher.bytes = s.len() as u64;
150        bencher.iter(|| test::black_box({
151            ASCIIEncoding.encode(s, EncoderTrap::Replace)
152        }))
153    }
154
155    #[bench]
156    fn bench_decode_replace(bencher: &mut test::Bencher) {
157        let s = testutils::KOREAN_TEXT.as_bytes();
158        bencher.bytes = s.len() as u64;
159        bencher.iter(|| test::black_box({
160            ASCIIEncoding.decode(s, DecoderTrap::Replace)
161        }))
162    }
163}