zip/
cp437.rs

1//! Convert a string in IBM codepage 437 to UTF-8
2
3/// Trait to convert IBM codepage 437 to the target type
4pub trait FromCp437 {
5    /// Target type
6    type Target;
7
8    /// Function that does the conversion from cp437.
9    /// Generally allocations will be avoided if all data falls into the ASCII range.
10    #[allow(clippy::wrong_self_convention)]
11    fn from_cp437(self) -> Self::Target;
12}
13
14impl<'a> FromCp437 for &'a [u8] {
15    type Target = ::std::borrow::Cow<'a, str>;
16
17    fn from_cp437(self) -> Self::Target {
18        if self.iter().all(|c| *c < 0x80) {
19            ::std::str::from_utf8(self).unwrap().into()
20        } else {
21            self.iter().map(|c| to_char(*c)).collect::<String>().into()
22        }
23    }
24}
25
26impl FromCp437 for Box<[u8]> {
27    type Target = Box<str>;
28
29    fn from_cp437(self) -> Self::Target {
30        if self.iter().all(|c| *c < 0x80) {
31            String::from_utf8(self.into()).unwrap()
32        } else {
33            self.iter().copied().map(to_char).collect()
34        }
35        .into_boxed_str()
36    }
37}
38
39fn to_char(input: u8) -> char {
40    let output = match input {
41        0x00..=0x7f => input as u32,
42        0x80 => 0x00c7,
43        0x81 => 0x00fc,
44        0x82 => 0x00e9,
45        0x83 => 0x00e2,
46        0x84 => 0x00e4,
47        0x85 => 0x00e0,
48        0x86 => 0x00e5,
49        0x87 => 0x00e7,
50        0x88 => 0x00ea,
51        0x89 => 0x00eb,
52        0x8a => 0x00e8,
53        0x8b => 0x00ef,
54        0x8c => 0x00ee,
55        0x8d => 0x00ec,
56        0x8e => 0x00c4,
57        0x8f => 0x00c5,
58        0x90 => 0x00c9,
59        0x91 => 0x00e6,
60        0x92 => 0x00c6,
61        0x93 => 0x00f4,
62        0x94 => 0x00f6,
63        0x95 => 0x00f2,
64        0x96 => 0x00fb,
65        0x97 => 0x00f9,
66        0x98 => 0x00ff,
67        0x99 => 0x00d6,
68        0x9a => 0x00dc,
69        0x9b => 0x00a2,
70        0x9c => 0x00a3,
71        0x9d => 0x00a5,
72        0x9e => 0x20a7,
73        0x9f => 0x0192,
74        0xa0 => 0x00e1,
75        0xa1 => 0x00ed,
76        0xa2 => 0x00f3,
77        0xa3 => 0x00fa,
78        0xa4 => 0x00f1,
79        0xa5 => 0x00d1,
80        0xa6 => 0x00aa,
81        0xa7 => 0x00ba,
82        0xa8 => 0x00bf,
83        0xa9 => 0x2310,
84        0xaa => 0x00ac,
85        0xab => 0x00bd,
86        0xac => 0x00bc,
87        0xad => 0x00a1,
88        0xae => 0x00ab,
89        0xaf => 0x00bb,
90        0xb0 => 0x2591,
91        0xb1 => 0x2592,
92        0xb2 => 0x2593,
93        0xb3 => 0x2502,
94        0xb4 => 0x2524,
95        0xb5 => 0x2561,
96        0xb6 => 0x2562,
97        0xb7 => 0x2556,
98        0xb8 => 0x2555,
99        0xb9 => 0x2563,
100        0xba => 0x2551,
101        0xbb => 0x2557,
102        0xbc => 0x255d,
103        0xbd => 0x255c,
104        0xbe => 0x255b,
105        0xbf => 0x2510,
106        0xc0 => 0x2514,
107        0xc1 => 0x2534,
108        0xc2 => 0x252c,
109        0xc3 => 0x251c,
110        0xc4 => 0x2500,
111        0xc5 => 0x253c,
112        0xc6 => 0x255e,
113        0xc7 => 0x255f,
114        0xc8 => 0x255a,
115        0xc9 => 0x2554,
116        0xca => 0x2569,
117        0xcb => 0x2566,
118        0xcc => 0x2560,
119        0xcd => 0x2550,
120        0xce => 0x256c,
121        0xcf => 0x2567,
122        0xd0 => 0x2568,
123        0xd1 => 0x2564,
124        0xd2 => 0x2565,
125        0xd3 => 0x2559,
126        0xd4 => 0x2558,
127        0xd5 => 0x2552,
128        0xd6 => 0x2553,
129        0xd7 => 0x256b,
130        0xd8 => 0x256a,
131        0xd9 => 0x2518,
132        0xda => 0x250c,
133        0xdb => 0x2588,
134        0xdc => 0x2584,
135        0xdd => 0x258c,
136        0xde => 0x2590,
137        0xdf => 0x2580,
138        0xe0 => 0x03b1,
139        0xe1 => 0x00df,
140        0xe2 => 0x0393,
141        0xe3 => 0x03c0,
142        0xe4 => 0x03a3,
143        0xe5 => 0x03c3,
144        0xe6 => 0x00b5,
145        0xe7 => 0x03c4,
146        0xe8 => 0x03a6,
147        0xe9 => 0x0398,
148        0xea => 0x03a9,
149        0xeb => 0x03b4,
150        0xec => 0x221e,
151        0xed => 0x03c6,
152        0xee => 0x03b5,
153        0xef => 0x2229,
154        0xf0 => 0x2261,
155        0xf1 => 0x00b1,
156        0xf2 => 0x2265,
157        0xf3 => 0x2264,
158        0xf4 => 0x2320,
159        0xf5 => 0x2321,
160        0xf6 => 0x00f7,
161        0xf7 => 0x2248,
162        0xf8 => 0x00b0,
163        0xf9 => 0x2219,
164        0xfa => 0x00b7,
165        0xfb => 0x221a,
166        0xfc => 0x207f,
167        0xfd => 0x00b2,
168        0xfe => 0x25a0,
169        0xff => 0x00a0,
170    };
171    ::std::char::from_u32(output).unwrap()
172}
173
174#[cfg(test)]
175mod test {
176    #[test]
177    fn to_char_valid() {
178        for i in 0x00_u32..0x100 {
179            super::to_char(i as u8);
180        }
181    }
182
183    #[test]
184    fn ascii() {
185        for i in 0x00..0x80 {
186            assert_eq!(super::to_char(i), i as char);
187        }
188    }
189
190    #[test]
191    #[allow(unknown_lints)] // invalid_from_utf8 was added in rust 1.72
192    #[allow(invalid_from_utf8)]
193    fn example_slice() {
194        use super::FromCp437;
195        let data = b"Cura\x87ao";
196        assert!(::std::str::from_utf8(data).is_err());
197        assert_eq!(data.from_cp437(), "Curaçao");
198    }
199
200    #[test]
201    fn example_vec() {
202        use super::FromCp437;
203        let data = vec![0xCC, 0xCD, 0xCD, 0xB9];
204        assert!(String::from_utf8(data.clone()).is_err());
205        assert_eq!(&*data.from_cp437(), "╠══╣");
206    }
207}