1use encoding_rs::Encoding;
10use std::fmt;
11
12use crate::error::Error;
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub struct Collation {
16 info: u32,
18 sort_id: u8,
20}
21
22impl Collation {
23 pub fn new(info: u32, sort_id: u8) -> Self {
24 Self { info, sort_id }
25 }
26
27 pub fn lcid(&self) -> u16 {
29 (self.info & 0xffff) as u16
30 }
31
32 pub fn sort_id(&self) -> u8 {
33 self.sort_id
34 }
35
36 pub fn info(&self) -> u32 {
37 self.info
38 }
39
40 pub fn encoding(&self) -> crate::Result<&'static Encoding> {
42 let res = if self.sort_id == 0 {
43 lcid_to_encoding(self.lcid())
44 } else {
45 sortid_to_encoding(self.sort_id)
46 };
47
48 res.ok_or_else(|| {
49 Error::Encoding(
50 format!(
51 "encoding: unspported encoding (LCID: {:#02x}, sort ID: {})",
52 self.lcid(),
53 self.sort_id(),
54 )
55 .into(),
56 )
57 })
58 }
59}
60
61impl fmt::Display for Collation {
62 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
63 match self.encoding() {
64 Ok(encoding) => write!(f, "{}", encoding.name()),
65 _ => write!(f, "None"),
66 }
67 }
68}
69
70pub fn lcid_to_encoding(locale: u16) -> Option<&'static Encoding> {
80 match locale {
81 0x0401 => Some(encoding_rs::WINDOWS_1256),
82 0x0402 => Some(encoding_rs::WINDOWS_1251),
83 0x0403 => Some(encoding_rs::WINDOWS_1252),
84 0x0404 | 0x0c04 | 0x1404 => Some(encoding_rs::BIG5),
86 0x0405 => Some(encoding_rs::WINDOWS_1250),
87 0x0406 => Some(encoding_rs::WINDOWS_1252),
88 0x0407 => Some(encoding_rs::WINDOWS_1252),
89 0x0408 => Some(encoding_rs::WINDOWS_1253),
90 0x0409 => Some(encoding_rs::WINDOWS_1252),
91 0x040a => Some(encoding_rs::WINDOWS_1252),
92 0x040b => Some(encoding_rs::WINDOWS_1252),
93 0x040c => Some(encoding_rs::WINDOWS_1252),
94 0x040d => Some(encoding_rs::WINDOWS_1255),
95 0x040e => Some(encoding_rs::WINDOWS_1250),
96 0x040f => Some(encoding_rs::WINDOWS_1252),
97 0x0410 => Some(encoding_rs::WINDOWS_1252),
98 0x0411 => Some(encoding_rs::SHIFT_JIS),
100 0x0412 => Some(encoding_rs::EUC_KR),
101 0x0413 => Some(encoding_rs::WINDOWS_1252),
102 0x0414 => Some(encoding_rs::WINDOWS_1252),
103 0x0415 => Some(encoding_rs::WINDOWS_1250),
104 0x0416 => Some(encoding_rs::WINDOWS_1252),
105 0x0417 => Some(encoding_rs::WINDOWS_1252),
106 0x0418 => Some(encoding_rs::WINDOWS_1250),
107 0x0419 => Some(encoding_rs::WINDOWS_1251),
108 0x041a => Some(encoding_rs::WINDOWS_1250),
109 0x041b => Some(encoding_rs::WINDOWS_1250),
110 0x041c => Some(encoding_rs::WINDOWS_1250),
111 0x041d => Some(encoding_rs::WINDOWS_1252),
112 0x041e => Some(encoding_rs::WINDOWS_874),
113 0x041f => Some(encoding_rs::WINDOWS_1254),
114 0x0420 => Some(encoding_rs::WINDOWS_1256),
115 0x0421 => Some(encoding_rs::WINDOWS_1252),
116 0x0422 => Some(encoding_rs::WINDOWS_1251),
117 0x0423 => Some(encoding_rs::WINDOWS_1251),
118 0x0424 => Some(encoding_rs::WINDOWS_1250),
119 0x0425 => Some(encoding_rs::WINDOWS_1257),
120 0x0426 => Some(encoding_rs::WINDOWS_1257),
121 0x0427 => Some(encoding_rs::WINDOWS_1257),
122 0x0428 => Some(encoding_rs::WINDOWS_1251),
123 0x0429 => Some(encoding_rs::WINDOWS_1256),
124 0x042a => Some(encoding_rs::WINDOWS_1258),
125 0x042b => Some(encoding_rs::WINDOWS_1252),
126 0x042c => Some(encoding_rs::WINDOWS_1254),
127 0x042d => Some(encoding_rs::WINDOWS_1252),
128 0x042e => Some(encoding_rs::WINDOWS_1252),
129 0x042f => Some(encoding_rs::WINDOWS_1251),
130 0x0432 => Some(encoding_rs::WINDOWS_1252),
131 0x0434 => Some(encoding_rs::WINDOWS_1252),
132 0x0435 => Some(encoding_rs::WINDOWS_1252),
133 0x0436 => Some(encoding_rs::WINDOWS_1252),
134 0x0437 => Some(encoding_rs::WINDOWS_1252),
135 0x0438 => Some(encoding_rs::WINDOWS_1252),
136 0x0439 => Some(encoding_rs::UTF_16LE),
137 0x043a => Some(encoding_rs::UTF_16LE),
138 0x043b => Some(encoding_rs::WINDOWS_1252),
139 0x043e => Some(encoding_rs::WINDOWS_1252),
140 0x043f => Some(encoding_rs::WINDOWS_1251),
141 0x0440 => Some(encoding_rs::WINDOWS_1251),
142 0x0441 => Some(encoding_rs::WINDOWS_1252),
143 0x0442 => Some(encoding_rs::WINDOWS_1250),
144 0x0443 => Some(encoding_rs::WINDOWS_1254),
145 0x0444 => Some(encoding_rs::WINDOWS_1251),
146 0x0445 => Some(encoding_rs::UTF_16LE),
147 0x0446 => Some(encoding_rs::UTF_16LE),
148 0x0447 => Some(encoding_rs::UTF_16LE),
149 0x0448 => Some(encoding_rs::UTF_16LE),
150 0x0449 => Some(encoding_rs::UTF_16LE),
151 0x044a => Some(encoding_rs::UTF_16LE),
152 0x044b => Some(encoding_rs::UTF_16LE),
153 0x044c => Some(encoding_rs::UTF_16LE),
154 0x044d => Some(encoding_rs::UTF_16LE),
155 0x044e => Some(encoding_rs::UTF_16LE),
156 0x044f => Some(encoding_rs::UTF_16LE),
157 0x0450 => Some(encoding_rs::WINDOWS_1251),
158 0x0451 => Some(encoding_rs::UTF_16LE),
159 0x0452 => Some(encoding_rs::WINDOWS_1252),
160 0x0453 => Some(encoding_rs::UTF_16LE),
161 0x0454 => Some(encoding_rs::UTF_16LE),
162 0x0456 => Some(encoding_rs::WINDOWS_1252),
163 0x0457 => Some(encoding_rs::UTF_16LE),
164 0x045a => Some(encoding_rs::UTF_16LE),
165 0x045b => Some(encoding_rs::UTF_16LE),
166 0x045d => Some(encoding_rs::WINDOWS_1252),
167 0x045e => Some(encoding_rs::WINDOWS_1252),
168 0x0461 => Some(encoding_rs::UTF_16LE),
169 0x0462 => Some(encoding_rs::WINDOWS_1252),
170 0x0463 => Some(encoding_rs::UTF_16LE),
171 0x0464 => Some(encoding_rs::WINDOWS_1252),
172 0x0465 => Some(encoding_rs::UTF_16LE),
173 0x0468 => Some(encoding_rs::WINDOWS_1252),
174 0x046a => Some(encoding_rs::WINDOWS_1252),
175 0x046b => Some(encoding_rs::WINDOWS_1252),
176 0x046c => Some(encoding_rs::WINDOWS_1252),
177 0x046d => Some(encoding_rs::WINDOWS_1251),
178 0x046e => Some(encoding_rs::WINDOWS_1252),
179 0x046f => Some(encoding_rs::WINDOWS_1252),
180 0x0470 => Some(encoding_rs::WINDOWS_1252),
181 0x0478 => Some(encoding_rs::WINDOWS_1252),
182 0x047a => Some(encoding_rs::WINDOWS_1252),
183 0x047c => Some(encoding_rs::WINDOWS_1252),
184 0x047e => Some(encoding_rs::WINDOWS_1252),
185 0x0480 => Some(encoding_rs::WINDOWS_1256),
186 0x0481 => Some(encoding_rs::UTF_16LE),
187 0x0482 => Some(encoding_rs::WINDOWS_1252),
188 0x0483 => Some(encoding_rs::WINDOWS_1252),
189 0x0484 => Some(encoding_rs::WINDOWS_1252),
190 0x0485 => Some(encoding_rs::WINDOWS_1251),
191 0x0486 => Some(encoding_rs::WINDOWS_1252),
192 0x0487 => Some(encoding_rs::WINDOWS_1252),
193 0x0488 => Some(encoding_rs::WINDOWS_1252),
194 0x048c => Some(encoding_rs::WINDOWS_1256),
195 0x0801 => Some(encoding_rs::WINDOWS_1256),
196 0x0804 | 0x1004 => Some(encoding_rs::GB18030),
198 0x0807 => Some(encoding_rs::WINDOWS_1252),
199 0x0809 => Some(encoding_rs::WINDOWS_1252),
200 0x080a => Some(encoding_rs::WINDOWS_1252),
201 0x080c => Some(encoding_rs::WINDOWS_1252),
202 0x0810 => Some(encoding_rs::WINDOWS_1252),
203 0x0813 => Some(encoding_rs::WINDOWS_1252),
204 0x0814 => Some(encoding_rs::WINDOWS_1252),
205 0x0816 => Some(encoding_rs::WINDOWS_1252),
206 0x081a => Some(encoding_rs::WINDOWS_1250),
207 0x081d => Some(encoding_rs::WINDOWS_1252),
208 0x0827 => Some(encoding_rs::WINDOWS_1257),
209 0x082c => Some(encoding_rs::WINDOWS_1251),
210 0x082e => Some(encoding_rs::WINDOWS_1252),
211 0x083b => Some(encoding_rs::WINDOWS_1252),
212 0x083c => Some(encoding_rs::WINDOWS_1252),
213 0x083e => Some(encoding_rs::WINDOWS_1252),
214 0x0843 => Some(encoding_rs::WINDOWS_1251),
215 0x0845 => Some(encoding_rs::UTF_16LE),
216 0x0850 => Some(encoding_rs::WINDOWS_1251),
217 0x085d => Some(encoding_rs::WINDOWS_1252),
218 0x085f => Some(encoding_rs::WINDOWS_1252),
219 0x086b => Some(encoding_rs::WINDOWS_1252),
220 0x0c01 => Some(encoding_rs::WINDOWS_1256),
221 0x0c07 => Some(encoding_rs::WINDOWS_1252),
222 0x0c09 => Some(encoding_rs::WINDOWS_1252),
223 0x0c0a => Some(encoding_rs::WINDOWS_1252),
224 0x0c0c => Some(encoding_rs::WINDOWS_1252),
225 0x0c1a => Some(encoding_rs::WINDOWS_1251),
226 0x0c3b => Some(encoding_rs::WINDOWS_1252),
227 0x0c6b => Some(encoding_rs::WINDOWS_1252),
228 0x1001 => Some(encoding_rs::WINDOWS_1256),
229 0x1007 => Some(encoding_rs::WINDOWS_1252),
230 0x1009 => Some(encoding_rs::WINDOWS_1252),
231 0x100a => Some(encoding_rs::WINDOWS_1252),
232 0x100c => Some(encoding_rs::WINDOWS_1252),
233 0x101a => Some(encoding_rs::WINDOWS_1250),
234 0x103b => Some(encoding_rs::WINDOWS_1252),
235 0x1401 => Some(encoding_rs::WINDOWS_1256),
236 0x1407 => Some(encoding_rs::WINDOWS_1252),
237 0x1409 => Some(encoding_rs::WINDOWS_1252),
238 0x140a => Some(encoding_rs::WINDOWS_1252),
239 0x140c => Some(encoding_rs::WINDOWS_1252),
240 0x141a => Some(encoding_rs::WINDOWS_1250),
241 0x143b => Some(encoding_rs::WINDOWS_1252),
242 0x1801 => Some(encoding_rs::WINDOWS_1256),
243 0x1809 => Some(encoding_rs::WINDOWS_1252),
244 0x180a => Some(encoding_rs::WINDOWS_1252),
245 0x180c => Some(encoding_rs::WINDOWS_1252),
246 0x181a => Some(encoding_rs::WINDOWS_1250),
247 0x183b => Some(encoding_rs::WINDOWS_1252),
248 0x1c01 => Some(encoding_rs::WINDOWS_1256),
249 0x1c09 => Some(encoding_rs::WINDOWS_1252),
250 0x1c0a => Some(encoding_rs::WINDOWS_1252),
251 0x1c1a => Some(encoding_rs::WINDOWS_1251),
252 0x1c3b => Some(encoding_rs::WINDOWS_1252),
253 0x2001 => Some(encoding_rs::WINDOWS_1256),
254 0x2009 => Some(encoding_rs::WINDOWS_1252),
255 0x200a => Some(encoding_rs::WINDOWS_1252),
256 0x201a => Some(encoding_rs::WINDOWS_1251),
257 0x203b => Some(encoding_rs::WINDOWS_1252),
258 0x2401 => Some(encoding_rs::WINDOWS_1256),
259 0x2409 => Some(encoding_rs::WINDOWS_1252),
260 0x240a => Some(encoding_rs::WINDOWS_1252),
261 0x243b => Some(encoding_rs::WINDOWS_1252),
262 0x2801 => Some(encoding_rs::WINDOWS_1256),
263 0x2809 => Some(encoding_rs::WINDOWS_1252),
264 0x280a => Some(encoding_rs::WINDOWS_1252),
265 0x2c01 => Some(encoding_rs::WINDOWS_1256),
266 0x2c09 => Some(encoding_rs::WINDOWS_1252),
267 0x2c0a => Some(encoding_rs::WINDOWS_1252),
268 0x3001 => Some(encoding_rs::WINDOWS_1256),
269 0x3009 => Some(encoding_rs::WINDOWS_1252),
270 0x300a => Some(encoding_rs::WINDOWS_1252),
271 0x3401 => Some(encoding_rs::WINDOWS_1256),
272 0x3409 => Some(encoding_rs::WINDOWS_1252),
273 0x340a => Some(encoding_rs::WINDOWS_1252),
274 0x3801 => Some(encoding_rs::WINDOWS_1256),
275 0x380a => Some(encoding_rs::WINDOWS_1252),
276 0x3c01 => Some(encoding_rs::WINDOWS_1256),
277 0x3c0a => Some(encoding_rs::WINDOWS_1252),
278 0x4001 => Some(encoding_rs::WINDOWS_1256),
279 0x4009 => Some(encoding_rs::WINDOWS_1252),
280 0x400a => Some(encoding_rs::WINDOWS_1252),
281 0x4409 => Some(encoding_rs::WINDOWS_1252),
282 0x440a => Some(encoding_rs::WINDOWS_1252),
283 0x4809 => Some(encoding_rs::WINDOWS_1252),
284 0x480a => Some(encoding_rs::WINDOWS_1252),
285 0x4c0a => Some(encoding_rs::WINDOWS_1252),
286 0x500a => Some(encoding_rs::WINDOWS_1252),
287 0x540a => Some(encoding_rs::WINDOWS_1252),
288 _ => None,
289 }
290}
291
292pub fn sortid_to_encoding(sort_id: u8) -> Option<&'static Encoding> {
301 match sort_id {
302 50 => Some(encoding_rs::WINDOWS_1252),
305 51 => Some(encoding_rs::WINDOWS_1252),
306 52 => Some(encoding_rs::WINDOWS_1252),
307 53 => Some(encoding_rs::WINDOWS_1252),
308 54 => Some(encoding_rs::WINDOWS_1252),
309 71 => Some(encoding_rs::WINDOWS_1252),
311 72 => Some(encoding_rs::WINDOWS_1252),
312 73 => Some(encoding_rs::WINDOWS_1252),
313 74 => Some(encoding_rs::WINDOWS_1252),
314 75 => Some(encoding_rs::WINDOWS_1252),
315 80 => Some(encoding_rs::WINDOWS_1250),
316 81 => Some(encoding_rs::WINDOWS_1250),
317 82 => Some(encoding_rs::WINDOWS_1250),
318 83 => Some(encoding_rs::WINDOWS_1250),
319 84 => Some(encoding_rs::WINDOWS_1250),
320 85 => Some(encoding_rs::WINDOWS_1250),
321 86 => Some(encoding_rs::WINDOWS_1250),
322 87 => Some(encoding_rs::WINDOWS_1250),
323 88 => Some(encoding_rs::WINDOWS_1250),
324 89 => Some(encoding_rs::WINDOWS_1250),
325 90 => Some(encoding_rs::WINDOWS_1250),
326 91 => Some(encoding_rs::WINDOWS_1250),
327 92 => Some(encoding_rs::WINDOWS_1250),
328 93 => Some(encoding_rs::WINDOWS_1250),
329 94 => Some(encoding_rs::WINDOWS_1250),
330 95 => Some(encoding_rs::WINDOWS_1250),
331 96 => Some(encoding_rs::WINDOWS_1250),
332 97 => Some(encoding_rs::WINDOWS_1250),
333 98 => Some(encoding_rs::WINDOWS_1250),
334 104 => Some(encoding_rs::WINDOWS_1251),
335 105 => Some(encoding_rs::WINDOWS_1251),
336 106 => Some(encoding_rs::WINDOWS_1251),
337 107 => Some(encoding_rs::WINDOWS_1251),
338 108 => Some(encoding_rs::WINDOWS_1251),
339 112 => Some(encoding_rs::WINDOWS_1253),
340 113 => Some(encoding_rs::WINDOWS_1253),
341 114 => Some(encoding_rs::WINDOWS_1253),
342 120 => Some(encoding_rs::WINDOWS_1253),
343 121 => Some(encoding_rs::WINDOWS_1253),
344 122 => Some(encoding_rs::WINDOWS_1253),
345 124 => Some(encoding_rs::WINDOWS_1253),
346 128 => Some(encoding_rs::WINDOWS_1254),
347 129 => Some(encoding_rs::WINDOWS_1254),
348 130 => Some(encoding_rs::WINDOWS_1254),
349 136 => Some(encoding_rs::WINDOWS_1255),
350 137 => Some(encoding_rs::WINDOWS_1255),
351 138 => Some(encoding_rs::WINDOWS_1255),
352 144 => Some(encoding_rs::WINDOWS_1256),
353 145 => Some(encoding_rs::WINDOWS_1256),
354 146 => Some(encoding_rs::WINDOWS_1256),
355 152 => Some(encoding_rs::WINDOWS_1257),
356 153 => Some(encoding_rs::WINDOWS_1257),
357 154 => Some(encoding_rs::WINDOWS_1257),
358 155 => Some(encoding_rs::WINDOWS_1257),
359 156 => Some(encoding_rs::WINDOWS_1257),
360 157 => Some(encoding_rs::WINDOWS_1257),
361 158 => Some(encoding_rs::WINDOWS_1257),
362 159 => Some(encoding_rs::WINDOWS_1257),
363 160 => Some(encoding_rs::WINDOWS_1257),
364 183 => Some(encoding_rs::WINDOWS_1252),
365 184 => Some(encoding_rs::WINDOWS_1252),
366 185 => Some(encoding_rs::WINDOWS_1252),
367 186 => Some(encoding_rs::WINDOWS_1252),
368 192 | 193 | 200 => Some(encoding_rs::SHIFT_JIS),
370 194 => Some(encoding_rs::EUC_KR),
371 195 => Some(encoding_rs::EUC_KR),
372 196 | 197 | 202 => Some(encoding_rs::BIG5),
374 198 | 199 | 203 => Some(encoding_rs::GB18030),
376 201 => Some(encoding_rs::BIG5),
377 204 => Some(encoding_rs::WINDOWS_874),
378 205 => Some(encoding_rs::WINDOWS_874),
379 206 => Some(encoding_rs::WINDOWS_874),
380 210 => Some(encoding_rs::WINDOWS_1252),
381 211 => Some(encoding_rs::WINDOWS_1252),
382 212 => Some(encoding_rs::WINDOWS_1252),
383 213 => Some(encoding_rs::WINDOWS_1252),
384 214 => Some(encoding_rs::WINDOWS_1252),
385 215 => Some(encoding_rs::WINDOWS_1252),
386 216 => Some(encoding_rs::WINDOWS_1252),
387 217 => Some(encoding_rs::WINDOWS_1252),
388 _ => None,
389 }
390}
391
392