1use std::convert::Into;
8use std::default::Default;
9use util::StrCharIndex;
10use index_korean as index;
11use types::*;
12
13#[derive(Clone, Copy)]
25pub struct Windows949Encoding;
26
27impl Encoding for Windows949Encoding {
28 fn name(&self) -> &'static str { "windows-949" }
29 fn whatwg_name(&self) -> Option<&'static str> { Some("euc-kr") } fn raw_encoder(&self) -> Box<RawEncoder> { Windows949Encoder::new() }
31 fn raw_decoder(&self) -> Box<RawDecoder> { Windows949Decoder::new() }
32}
33
34#[derive(Clone, Copy)]
36pub struct Windows949Encoder;
37
38impl Windows949Encoder {
39 pub fn new() -> Box<RawEncoder> { Box::new(Windows949Encoder) }
40}
41
42impl RawEncoder for Windows949Encoder {
43 fn from_self(&self) -> Box<RawEncoder> { Windows949Encoder::new() }
44 fn is_ascii_compatible(&self) -> bool { true }
45
46 fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>) {
47 output.writer_hint(input.len());
48
49 for ((i,j), ch) in input.index_iter() {
50 if ch <= '\u{7f}' {
51 output.write_byte(ch as u8);
52 } else {
53 let ptr = index::euc_kr::backward(ch as u32);
54 if ptr == 0xffff {
55 return (i, Some(CodecError {
56 upto: j as isize, cause: "unrepresentable character".into()
57 }));
58 } else {
59 output.write_byte((ptr / 190 + 0x81) as u8);
60 output.write_byte((ptr % 190 + 0x41) as u8);
61 }
62 }
63 }
64 (input.len(), None)
65 }
66
67 fn raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError> {
68 None
69 }
70}
71
72#[derive(Clone, Copy)]
74struct Windows949Decoder {
75 st: windows949::State,
76}
77
78impl Windows949Decoder {
79 pub fn new() -> Box<RawDecoder> {
80 Box::new(Windows949Decoder { st: Default::default() })
81 }
82}
83
84impl RawDecoder for Windows949Decoder {
85 fn from_self(&self) -> Box<RawDecoder> { Windows949Decoder::new() }
86 fn is_ascii_compatible(&self) -> bool { true }
87
88 fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
89 let (st, processed, err) = windows949::raw_feed(self.st, input, output, &());
90 self.st = st;
91 (processed, err)
92 }
93
94 fn raw_finish(&mut self, output: &mut StringWriter) -> Option<CodecError> {
95 let (st, err) = windows949::raw_finish(self.st, output, &());
96 self.st = st;
97 err
98 }
99}
100
101stateful_decoder! {
102 module windows949;
103
104 internal pub fn map_two_bytes(lead: u8, trail: u8) -> u32 {
105 use index_korean as index;
106
107 let lead = lead as u16;
108 let trail = trail as u16;
109 let index = match (lead, trail) {
110 (0x81...0xfe, 0x41...0xfe) => (lead - 0x81) * 190 + (trail - 0x41),
111 (_, _) => 0xffff,
112 };
113 index::euc_kr::forward(index)
114 }
115
116initial:
117 state S0(ctx: Context) {
119 case b @ 0x00...0x7f => ctx.emit(b as u32);
120 case b @ 0x81...0xfe => S1(ctx, b);
121 case _ => ctx.err("invalid sequence");
122 }
123
124transient:
125 state S1(ctx: Context, lead: u8) {
127 case b => match map_two_bytes(lead, b) {
128 0xffff => {
129 let backup = if b < 0x80 {1} else {0};
130 ctx.backup_and_err(backup, "invalid sequence")
131 },
132 ch => ctx.emit(ch as u32)
133 };
134 }
135}
136
137#[cfg(test)]
138mod windows949_tests {
139 extern crate test;
140 use super::Windows949Encoding;
141 use testutils;
142 use types::*;
143
144 #[test]
145 fn test_encoder_valid() {
146 let mut e = Windows949Encoding.raw_encoder();
147 assert_feed_ok!(e, "A", "", [0x41]);
148 assert_feed_ok!(e, "BC", "", [0x42, 0x43]);
149 assert_feed_ok!(e, "", "", []);
150 assert_feed_ok!(e, "\u{ac00}", "", [0xb0, 0xa1]);
151 assert_feed_ok!(e, "\u{b098}\u{b2e4}", "", [0xb3, 0xaa, 0xb4, 0xd9]);
152 assert_feed_ok!(e, "\u{bdc1}\u{314b}\u{d7a3}", "", [0x94, 0xee, 0xa4, 0xbb, 0xc6, 0x52]);
153 assert_finish_ok!(e, []);
154 }
155
156 #[test]
157 fn test_encoder_invalid() {
158 let mut e = Windows949Encoding.raw_encoder();
159 assert_feed_err!(e, "", "\u{ffff}", "", []);
160 assert_feed_err!(e, "?", "\u{ffff}", "!", [0x3f]);
161 assert_feed_err!(e, "?", "\u{fffd}", "!", [0x3f]); assert_finish_ok!(e, []);
163 }
164
165 #[test]
166 fn test_decoder_valid() {
167 let mut d = Windows949Encoding.raw_decoder();
168 assert_feed_ok!(d, [0x41], [], "A");
169 assert_feed_ok!(d, [0x42, 0x43], [], "BC");
170 assert_feed_ok!(d, [], [], "");
171 assert_feed_ok!(d, [0xb0, 0xa1], [], "\u{ac00}");
172 assert_feed_ok!(d, [0xb3, 0xaa, 0xb4, 0xd9], [], "\u{b098}\u{b2e4}");
173 assert_feed_ok!(d, [0x94, 0xee, 0xa4, 0xbb, 0xc6, 0x52, 0xc1, 0x64], [],
174 "\u{bdc1}\u{314b}\u{d7a3}\u{d58f}");
175 assert_finish_ok!(d, "");
176 }
177
178 #[test]
179 fn test_decoder_valid_partial() {
180 let mut d = Windows949Encoding.raw_decoder();
181 assert_feed_ok!(d, [], [0xb0], "");
182 assert_feed_ok!(d, [0xa1], [], "\u{ac00}");
183 assert_feed_ok!(d, [0xb3, 0xaa], [0xb4], "\u{b098}");
184 assert_feed_ok!(d, [0xd9], [0x94], "\u{b2e4}");
185 assert_feed_ok!(d, [0xee, 0xa4, 0xbb], [0xc6], "\u{bdc1}\u{314b}");
186 assert_feed_ok!(d, [0x52, 0xc1, 0x64], [], "\u{d7a3}\u{d58f}");
187 assert_finish_ok!(d, "");
188 }
189
190 #[test]
191 fn test_decoder_invalid_lone_lead_immediate_test_finish() {
192 for i in 0x81..0xff {
193 let mut d = Windows949Encoding.raw_decoder();
194 assert_feed_ok!(d, [], [i], ""); assert_finish_err!(d, "");
196 }
197
198 let mut d = Windows949Encoding.raw_decoder();
200 assert_feed_err!(d, [], [0x80], [], "");
201 assert_feed_err!(d, [], [0xff], [], "");
202 assert_finish_ok!(d, "");
203 }
204
205 #[test]
206 fn test_decoder_invalid_lone_lead_followed_by_space() {
207 for i in 0x80..0x100 {
208 let i = i as u8;
209 let mut d = Windows949Encoding.raw_decoder();
210 assert_feed_err!(d, [], [i], [0x20], "");
211 assert_finish_ok!(d, "");
212 }
213 }
214
215 #[test]
216 fn test_decoder_invalid_lead_followed_by_invalid_trail() {
217 for i in 0x81..0xff {
220 let mut d = Windows949Encoding.raw_decoder();
221 assert_feed_err!(d, [], [i, 0x80], [0x20], "");
222 assert_feed_err!(d, [], [i, 0xff], [0x20], "");
223 assert_finish_ok!(d, "");
224
225 let mut d = Windows949Encoding.raw_decoder();
226 assert_feed_ok!(d, [], [i], "");
227 assert_feed_err!(d, [], [0x80], [0x20], "");
228 assert_feed_ok!(d, [], [i], "");
229 assert_feed_err!(d, [], [0xff], [0x20], "");
230 assert_finish_ok!(d, "");
231 }
232
233 let mut d = Windows949Encoding.raw_decoder();
234 assert_feed_err!(d, [], [0x80], [0x80], "");
235 assert_feed_err!(d, [], [0x80], [0xff], "");
236 assert_feed_err!(d, [], [0xff], [0x80], "");
237 assert_feed_err!(d, [], [0xff], [0xff], "");
238 assert_finish_ok!(d, "");
239 }
240
241 #[test]
242 fn test_decoder_invalid_boundary() {
243 let mut d = Windows949Encoding.raw_decoder();
247 assert_feed_ok!(d, [], [0xc6], "");
248 assert_feed_err!(d, [], [], [0x53], "");
249 assert_finish_ok!(d, "");
250 }
251
252 #[test]
253 fn test_decoder_feed_after_finish() {
254 let mut d = Windows949Encoding.raw_decoder();
255 assert_feed_ok!(d, [0xb0, 0xa1], [0xb0], "\u{ac00}");
256 assert_finish_err!(d, "");
257 assert_feed_ok!(d, [0xb0, 0xa1], [], "\u{ac00}");
258 assert_finish_ok!(d, "");
259 }
260
261 #[bench]
262 fn bench_encode_short_text(bencher: &mut test::Bencher) {
263 let s = testutils::KOREAN_TEXT;
264 bencher.bytes = s.len() as u64;
265 bencher.iter(|| test::black_box({
266 Windows949Encoding.encode(&s, EncoderTrap::Strict)
267 }))
268 }
269
270 #[bench]
271 fn bench_decode_short_text(bencher: &mut test::Bencher) {
272 let s = Windows949Encoding.encode(testutils::KOREAN_TEXT,
273 EncoderTrap::Strict).ok().unwrap();
274 bencher.bytes = s.len() as u64;
275 bencher.iter(|| test::black_box({
276 Windows949Encoding.decode(&s, DecoderTrap::Strict)
277 }))
278 }
279}
280