1use crate::byte_chunk::{ByteChunk, Chunk};
4
5#[inline]
9pub fn count(text: &str) -> usize {
10 count_impl::<Chunk>(text.as_bytes())
11}
12
13#[inline]
22pub fn from_byte_idx(text: &str, byte_idx: usize) -> usize {
23 let bytes = text.as_bytes();
24
25 let mut i = byte_idx;
28 while Some(true) == bytes.get(i).map(|byte| (*byte & 0xC0) == 0x80) {
29 i -= 1;
30 }
31
32 count_impl::<Chunk>(&bytes[0..i.min(bytes.len())])
33}
34
35#[inline]
41pub fn to_byte_idx(text: &str, char_idx: usize) -> usize {
42 to_byte_idx_impl::<Chunk>(text, char_idx)
43}
44
45#[inline(always)]
48fn to_byte_idx_impl<T: ByteChunk>(text: &str, char_idx: usize) -> usize {
49 let (start, middle, _) = unsafe { text.as_bytes().align_to::<T>() };
54
55 let mut byte_count = 0;
56 let mut char_count = 0;
57
58 for byte in start.iter() {
60 char_count += ((*byte & 0xC0) != 0x80) as usize;
61 if char_count > char_idx {
62 break;
63 }
64 byte_count += 1;
65 }
66
67 let mut chunks = middle;
69 let mut max_round_len = char_idx.saturating_sub(char_count) / T::MAX_ACC;
70 while max_round_len > 0 && !chunks.is_empty() {
71 let round_len = T::MAX_ACC.min(max_round_len).min(chunks.len());
75 max_round_len -= round_len;
76 let round = &chunks[..round_len];
77 chunks = &chunks[round_len..];
78
79 let mut acc = T::zero();
81 for chunk in round.iter() {
82 acc = acc.add(chunk.bitand(T::splat(0xc0)).cmp_eq_byte(0x80));
83 }
84 char_count += (T::SIZE * round_len) - acc.sum_bytes();
85 byte_count += T::SIZE * round_len;
86 }
87
88 for chunk in chunks.iter() {
90 let new_char_count =
91 char_count + T::SIZE - chunk.bitand(T::splat(0xc0)).cmp_eq_byte(0x80).sum_bytes();
92 if new_char_count >= char_idx {
93 break;
94 }
95 char_count = new_char_count;
96 byte_count += T::SIZE;
97 }
98
99 let end = &text.as_bytes()[byte_count..];
101 for byte in end.iter() {
102 char_count += ((*byte & 0xC0) != 0x80) as usize;
103 if char_count > char_idx {
104 break;
105 }
106 byte_count += 1;
107 }
108
109 byte_count
110}
111
112#[inline(always)]
113pub(crate) fn count_impl<T: ByteChunk>(text: &[u8]) -> usize {
114 if text.len() < T::SIZE {
115 text.iter()
118 .map(|byte| ((byte & 0xC0) != 0x80) as usize)
119 .sum()
120 } else {
121 let (start, middle, end) = unsafe { text.align_to::<T>() };
123
124 let mut inv_count = 0;
125
126 for byte in start.iter() {
128 inv_count += ((byte & 0xC0) == 0x80) as usize;
129 }
130
131 for chunks in middle.chunks(T::MAX_ACC) {
133 let mut acc = T::zero();
134 for chunk in chunks.iter() {
135 acc = acc.add(chunk.bitand(T::splat(0xc0)).cmp_eq_byte(0x80));
136 }
137 inv_count += acc.sum_bytes();
138 }
139
140 for byte in end.iter() {
142 inv_count += ((byte & 0xC0) == 0x80) as usize;
143 }
144
145 text.len() - inv_count
146 }
147}
148
149#[cfg(test)]
152mod tests {
153 use super::*;
154
155 const TEXT_LINES: &str = "Hello there! How're you doing?\nIt's \
157 a fine day, isn't it?\nAren't you glad \
158 we're alive?\nこんにちは、みんなさん!";
159
160 #[test]
161 fn count_01() {
162 let text = "Hello せかい! Hello せかい! Hello せかい! Hello せかい! Hello せかい!";
163
164 assert_eq!(54, count(text));
165 }
166
167 #[test]
168 fn count_02() {
169 assert_eq!(100, count(TEXT_LINES));
170 }
171
172 #[test]
173 fn from_byte_idx_01() {
174 let text = "Hello せかい!";
175 assert_eq!(0, from_byte_idx(text, 0));
176 assert_eq!(1, from_byte_idx(text, 1));
177 assert_eq!(6, from_byte_idx(text, 6));
178 assert_eq!(6, from_byte_idx(text, 7));
179 assert_eq!(6, from_byte_idx(text, 8));
180 assert_eq!(7, from_byte_idx(text, 9));
181 assert_eq!(7, from_byte_idx(text, 10));
182 assert_eq!(7, from_byte_idx(text, 11));
183 assert_eq!(8, from_byte_idx(text, 12));
184 assert_eq!(8, from_byte_idx(text, 13));
185 assert_eq!(8, from_byte_idx(text, 14));
186 assert_eq!(9, from_byte_idx(text, 15));
187 assert_eq!(10, from_byte_idx(text, 16));
188 assert_eq!(10, from_byte_idx(text, 17));
189 assert_eq!(10, from_byte_idx(text, 18));
190 assert_eq!(10, from_byte_idx(text, 19));
191 }
192
193 #[test]
194 fn from_byte_idx_02() {
195 let text = "";
196 assert_eq!(0, from_byte_idx(text, 0));
197 assert_eq!(0, from_byte_idx(text, 1));
198
199 let text = "h";
200 assert_eq!(0, from_byte_idx(text, 0));
201 assert_eq!(1, from_byte_idx(text, 1));
202 assert_eq!(1, from_byte_idx(text, 2));
203
204 let text = "hi";
205 assert_eq!(0, from_byte_idx(text, 0));
206 assert_eq!(1, from_byte_idx(text, 1));
207 assert_eq!(2, from_byte_idx(text, 2));
208 assert_eq!(2, from_byte_idx(text, 3));
209 }
210
211 #[test]
212 fn from_byte_idx_03() {
213 let text = "せかい";
214 assert_eq!(0, from_byte_idx(text, 0));
215 assert_eq!(0, from_byte_idx(text, 1));
216 assert_eq!(0, from_byte_idx(text, 2));
217 assert_eq!(1, from_byte_idx(text, 3));
218 assert_eq!(1, from_byte_idx(text, 4));
219 assert_eq!(1, from_byte_idx(text, 5));
220 assert_eq!(2, from_byte_idx(text, 6));
221 assert_eq!(2, from_byte_idx(text, 7));
222 assert_eq!(2, from_byte_idx(text, 8));
223 assert_eq!(3, from_byte_idx(text, 9));
224 assert_eq!(3, from_byte_idx(text, 10));
225 assert_eq!(3, from_byte_idx(text, 11));
226 assert_eq!(3, from_byte_idx(text, 12));
227 }
228
229 #[test]
230 fn from_byte_idx_04() {
231 for i in 0..88 {
233 assert_eq!(i, from_byte_idx(TEXT_LINES, i));
234 }
235
236 for i in 88..125 {
238 assert_eq!(88 + ((i - 88) / 3), from_byte_idx(TEXT_LINES, i));
239 }
240
241 for i in 125..130 {
243 assert_eq!(100, from_byte_idx(TEXT_LINES, i));
244 }
245 }
246
247 #[test]
248 fn to_byte_idx_01() {
249 let text = "Hello せかい!";
250 assert_eq!(0, to_byte_idx(text, 0));
251 assert_eq!(1, to_byte_idx(text, 1));
252 assert_eq!(2, to_byte_idx(text, 2));
253 assert_eq!(5, to_byte_idx(text, 5));
254 assert_eq!(6, to_byte_idx(text, 6));
255 assert_eq!(12, to_byte_idx(text, 8));
256 assert_eq!(15, to_byte_idx(text, 9));
257 assert_eq!(16, to_byte_idx(text, 10));
258 }
259
260 #[test]
261 fn to_byte_idx_02() {
262 let text = "せかい";
263 assert_eq!(0, to_byte_idx(text, 0));
264 assert_eq!(3, to_byte_idx(text, 1));
265 assert_eq!(6, to_byte_idx(text, 2));
266 assert_eq!(9, to_byte_idx(text, 3));
267 }
268
269 #[test]
270 fn to_byte_idx_03() {
271 let text = "Hello world!";
272 assert_eq!(0, to_byte_idx(text, 0));
273 assert_eq!(1, to_byte_idx(text, 1));
274 assert_eq!(8, to_byte_idx(text, 8));
275 assert_eq!(11, to_byte_idx(text, 11));
276 assert_eq!(12, to_byte_idx(text, 12));
277 }
278
279 #[test]
280 fn to_byte_idx_04() {
281 let text = "Hello world! Hello せかい! Hello world! Hello せかい! \
282 Hello world! Hello せかい! Hello world! Hello せかい! \
283 Hello world! Hello せかい! Hello world! Hello せかい! \
284 Hello world! Hello せかい! Hello world! Hello せかい!";
285 assert_eq!(0, to_byte_idx(text, 0));
286 assert_eq!(30, to_byte_idx(text, 24));
287 assert_eq!(60, to_byte_idx(text, 48));
288 assert_eq!(90, to_byte_idx(text, 72));
289 assert_eq!(115, to_byte_idx(text, 93));
290 assert_eq!(120, to_byte_idx(text, 96));
291 assert_eq!(150, to_byte_idx(text, 120));
292 assert_eq!(180, to_byte_idx(text, 144));
293 assert_eq!(210, to_byte_idx(text, 168));
294 assert_eq!(239, to_byte_idx(text, 191));
295 }
296
297 #[test]
298 fn to_byte_idx_05() {
299 for i in 0..88 {
301 assert_eq!(i, to_byte_idx(TEXT_LINES, i));
302 }
303
304 for i in 88..100 {
306 assert_eq!(88 + ((i - 88) * 3), to_byte_idx(TEXT_LINES, i));
307 }
308
309 for i in 100..110 {
311 assert_eq!(124, to_byte_idx(TEXT_LINES, i));
312 }
313 }
314}