encoding/codec/
singlebyte.rs

1// This is a part of rust-encoding.
2// Copyright (c) 2013-2015, Kang Seonghoon.
3// See README.md and LICENSE.txt for details.
4
5//! Common codec implementation for single-byte encodings.
6
7use std::convert::Into;
8use util::{as_char, StrCharIndex};
9use types::*;
10
11/// A common framework for single-byte encodings based on ASCII.
12#[derive(Copy, Clone)]
13pub struct SingleByteEncoding {
14    pub name: &'static str,
15    pub whatwg_name: Option<&'static str>,
16    pub index_forward: extern "Rust" fn(u8) -> u16,
17    pub index_backward: extern "Rust" fn(u32) -> u8,
18}
19
20impl Encoding for SingleByteEncoding {
21    fn name(&self) -> &'static str { self.name }
22    fn whatwg_name(&self) -> Option<&'static str> { self.whatwg_name }
23    fn raw_encoder(&self) -> Box<RawEncoder> { SingleByteEncoder::new(self.index_backward) }
24    fn raw_decoder(&self) -> Box<RawDecoder> { SingleByteDecoder::new(self.index_forward) }
25}
26
27/// An encoder for single-byte encodings based on ASCII.
28#[derive(Clone, Copy)]
29pub struct SingleByteEncoder {
30    index_backward: extern "Rust" fn(u32) -> u8,
31}
32
33impl SingleByteEncoder {
34    pub fn new(index_backward: extern "Rust" fn(u32) -> u8) -> Box<RawEncoder> {
35        Box::new(SingleByteEncoder { index_backward: index_backward })
36    }
37}
38
39impl RawEncoder for SingleByteEncoder {
40    fn from_self(&self) -> Box<RawEncoder> { SingleByteEncoder::new(self.index_backward) }
41    fn is_ascii_compatible(&self) -> bool { true }
42
43    fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>) {
44        output.writer_hint(input.len());
45
46        for ((i,j), ch) in input.index_iter() {
47            if ch <= '\u{7f}' {
48                output.write_byte(ch as u8);
49                continue;
50            } else {
51                let index = (self.index_backward)(ch as u32);
52                if index != 0 {
53                    output.write_byte(index);
54                } else {
55                    return (i, Some(CodecError {
56                        upto: j as isize, cause: "unrepresentable character".into()
57                    }));
58                }
59            }
60        }
61        (input.len(), None)
62    }
63
64    fn raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError> {
65        None
66    }
67}
68
69/// A decoder for single-byte encodings based on ASCII.
70#[derive(Clone, Copy)]
71pub struct SingleByteDecoder {
72    index_forward: extern "Rust" fn(u8) -> u16,
73}
74
75impl SingleByteDecoder {
76    pub fn new(index_forward: extern "Rust" fn(u8) -> u16) -> Box<RawDecoder> {
77        Box::new(SingleByteDecoder { index_forward: index_forward })
78    }
79}
80
81impl RawDecoder for SingleByteDecoder {
82    fn from_self(&self) -> Box<RawDecoder> { SingleByteDecoder::new(self.index_forward) }
83    fn is_ascii_compatible(&self) -> bool { true }
84
85    fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
86        output.writer_hint(input.len());
87
88        let mut i = 0;
89        let len = input.len();
90        while i < len {
91            if input[i] <= 0x7f {
92                output.write_char(input[i] as char);
93            } else {
94                let ch = (self.index_forward)(input[i]);
95                if ch != 0xffff {
96                    output.write_char(as_char(ch as u32));
97                } else {
98                    return (i, Some(CodecError {
99                        upto: i as isize + 1, cause: "invalid sequence".into()
100                    }));
101                }
102            }
103            i += 1;
104        }
105        (i, None)
106    }
107
108    fn raw_finish(&mut self, _output: &mut StringWriter) -> Option<CodecError> {
109        None
110    }
111}
112
113/// Algorithmic mapping for ISO 8859-1.
114pub mod iso_8859_1 {
115    #[inline] pub fn forward(code: u8) -> u16 { code as u16 }
116    #[inline] pub fn backward(code: u32) -> u8 { if (code & !0x7f) == 0x80 {code as u8} else {0} }
117}
118
119#[cfg(test)]
120mod tests {
121    use all::ISO_8859_2;
122    use types::*;
123
124    #[test]
125    fn test_encoder_non_bmp() {
126        let mut e = ISO_8859_2.raw_encoder();
127        assert_feed_err!(e, "A", "\u{FFFF}", "B", [0x41]);
128        assert_feed_err!(e, "A", "\u{10000}", "B", [0x41]);
129    }
130}
131