1use crate::byte_chunk::{ByteChunk, Chunk};
13
14#[inline]
18pub fn count_breaks(text: &str) -> usize {
19 count_breaks_impl::<Chunk>(text.as_bytes())
20}
21
22#[inline]
33pub fn from_byte_idx(text: &str, byte_idx: usize) -> usize {
34 let i = byte_idx.min(text.len());
35 let nl_count = count_breaks_impl::<Chunk>(&text.as_bytes()[..i]);
36 if crate::is_not_crlf_middle(i, text.as_bytes()) {
37 nl_count
38 } else {
39 nl_count - 1
40 }
41}
42
43#[inline]
53pub fn to_byte_idx(text: &str, line_idx: usize) -> usize {
54 to_byte_idx_impl::<Chunk>(text.as_bytes(), line_idx)
55}
56
57#[inline(always)]
60fn to_byte_idx_impl<T: ByteChunk>(text: &[u8], line_idx: usize) -> usize {
61 let (start, middle, _) = unsafe { text.align_to::<T>() };
66
67 let mut byte_count = 0;
68 let mut break_count = 0;
69
70 for byte in start.iter() {
72 if break_count == line_idx {
73 break;
74 }
75 break_count +=
76 (*byte == 0x0A || (*byte == 0x0D && text.get(byte_count + 1) != Some(&0x0A))) as usize;
77 byte_count += 1;
78 }
79
80 let mut chunks = middle;
82 let mut max_round_len = (line_idx - break_count) / T::MAX_ACC;
83 while max_round_len > 0 && !chunks.is_empty() {
84 let round_len = T::MAX_ACC.min(max_round_len).min(chunks.len());
88 max_round_len -= round_len;
89 let round = &chunks[..round_len];
90 chunks = &chunks[round_len..];
91
92 let mut acc = T::zero();
94 for chunk in round.iter() {
95 let lf_flags = chunk.cmp_eq_byte(0x0A);
96 let cr_flags = chunk.cmp_eq_byte(0x0D);
97 let crlf_flags = cr_flags.bitand(lf_flags.shift_back_lex(1));
98 acc = acc.add(lf_flags).add(cr_flags.sub(crlf_flags));
99 }
100 break_count += acc.sum_bytes();
101
102 let mut i = byte_count;
104 while i < (byte_count + T::SIZE * round_len) {
105 i += T::SIZE;
106 break_count -= (text[i - 1] == 0x0D && text.get(i) == Some(&0x0A)) as usize;
107 }
108
109 byte_count += T::SIZE * round_len;
110 }
111
112 for chunk in chunks.iter() {
114 let breaks = {
115 let lf_flags = chunk.cmp_eq_byte(0x0A);
116 let cr_flags = chunk.cmp_eq_byte(0x0D);
117 let crlf_flags = cr_flags.bitand(lf_flags.shift_back_lex(1));
118 lf_flags.add(cr_flags.sub(crlf_flags)).sum_bytes()
119 };
120 let boundary_crlf = {
121 let i = byte_count + T::SIZE;
122 (text[i - 1] == 0x0D && text.get(i) == Some(&0x0A)) as usize
123 };
124 let new_break_count = break_count + breaks - boundary_crlf;
125 if new_break_count >= line_idx {
126 break;
127 }
128 break_count = new_break_count;
129 byte_count += T::SIZE;
130 }
131
132 let end = &text[byte_count..];
134 for byte in end.iter() {
135 if break_count == line_idx {
136 break;
137 }
138 break_count +=
139 (*byte == 0x0A || (*byte == 0x0D && text.get(byte_count + 1) != Some(&0x0A))) as usize;
140 byte_count += 1;
141 }
142
143 byte_count
145}
146
147#[inline(always)]
152fn count_breaks_impl<T: ByteChunk>(text: &[u8]) -> usize {
153 let (start, middle, end) = unsafe { text.align_to::<T>() };
155
156 let mut count = 0;
157
158 let mut last_was_cr = false;
160 for byte in start.iter().copied() {
161 let is_lf = byte == 0x0A;
162 let is_cr = byte == 0x0D;
163 count += (is_cr | (is_lf & !last_was_cr)) as usize;
164 last_was_cr = is_cr;
165 }
166
167 for chunks in middle.chunks(T::MAX_ACC) {
169 let mut acc = T::zero();
170 for chunk in chunks.iter() {
171 let lf_flags = chunk.cmp_eq_byte(0x0A);
172 let cr_flags = chunk.cmp_eq_byte(0x0D);
173 let crlf_flags = cr_flags.bitand(lf_flags.shift_back_lex(1));
174 acc = acc.add(lf_flags).add(cr_flags.sub(crlf_flags));
175 }
176 count += acc.sum_bytes();
177 }
178
179 let mut i = start.len();
181 while i < (text.len() - end.len()) {
182 if text[i] == 0x0A {
183 count -= (text.get(i.saturating_sub(1)) == Some(&0x0D)) as usize;
184 }
185 i += T::SIZE;
186 }
187
188 let mut last_was_cr = text.get((text.len() - end.len()).saturating_sub(1)) == Some(&0x0D);
190 for byte in end.iter().copied() {
191 let is_lf = byte == 0x0A;
192 let is_cr = byte == 0x0D;
193 count += (is_cr | (is_lf & !last_was_cr)) as usize;
194 last_was_cr = is_cr;
195 }
196
197 count
198}
199
200#[cfg(test)]
203mod tests {
204 use super::*;
205
206 const TEXT_LINES: &str = "Hello there! How're you doing?\nIt's \
208 a fine day, isn't it?\nAren't you glad \
209 we're alive?\nこんにちは、みんなさん!";
210
211 #[test]
212 fn count_breaks_01() {
213 let text = "\u{000A}Hello\u{000D}\u{000A}せ\u{000B}か\u{000C}い\u{0085}. \
214 There\u{000A}is something.\u{2029}";
215 assert_eq!(45, text.len());
216 assert_eq!(3, count_breaks(text));
217 }
218
219 #[test]
220 fn from_byte_idx_01() {
221 let text = "Here\nare\nsome\nwords";
222 assert_eq!(0, from_byte_idx(text, 0));
223 assert_eq!(0, from_byte_idx(text, 4));
224 assert_eq!(1, from_byte_idx(text, 5));
225 assert_eq!(1, from_byte_idx(text, 8));
226 assert_eq!(2, from_byte_idx(text, 9));
227 assert_eq!(2, from_byte_idx(text, 13));
228 assert_eq!(3, from_byte_idx(text, 14));
229 assert_eq!(3, from_byte_idx(text, 19));
230 }
231
232 #[test]
233 fn from_byte_idx_02() {
234 let text = "\nHere\nare\nsome\nwords\n";
235 assert_eq!(0, from_byte_idx(text, 0));
236 assert_eq!(1, from_byte_idx(text, 1));
237 assert_eq!(1, from_byte_idx(text, 5));
238 assert_eq!(2, from_byte_idx(text, 6));
239 assert_eq!(2, from_byte_idx(text, 9));
240 assert_eq!(3, from_byte_idx(text, 10));
241 assert_eq!(3, from_byte_idx(text, 14));
242 assert_eq!(4, from_byte_idx(text, 15));
243 assert_eq!(4, from_byte_idx(text, 20));
244 assert_eq!(5, from_byte_idx(text, 21));
245 }
246
247 #[test]
248 fn from_byte_idx_03() {
249 let text = "Here\r\nare\r\nsome\r\nwords";
250 assert_eq!(0, from_byte_idx(text, 0));
251 assert_eq!(0, from_byte_idx(text, 4));
252 assert_eq!(0, from_byte_idx(text, 5));
253 assert_eq!(1, from_byte_idx(text, 6));
254 assert_eq!(1, from_byte_idx(text, 9));
255 assert_eq!(1, from_byte_idx(text, 10));
256 assert_eq!(2, from_byte_idx(text, 11));
257 assert_eq!(2, from_byte_idx(text, 15));
258 assert_eq!(2, from_byte_idx(text, 16));
259 assert_eq!(3, from_byte_idx(text, 17));
260 }
261
262 #[test]
263 fn from_byte_idx_04() {
264 for i in 0..32 {
266 assert_eq!(0, from_byte_idx(TEXT_LINES, i));
267 }
268
269 for i in 32..59 {
271 assert_eq!(1, from_byte_idx(TEXT_LINES, i));
272 }
273
274 for i in 59..88 {
276 assert_eq!(2, from_byte_idx(TEXT_LINES, i));
277 }
278
279 for i in 88..125 {
281 assert_eq!(3, from_byte_idx(TEXT_LINES, i));
282 }
283
284 for i in 125..130 {
286 assert_eq!(3, from_byte_idx(TEXT_LINES, i));
287 }
288 }
289
290 #[test]
291 fn to_byte_idx_01() {
292 let text = "Here\r\nare\r\nsome\r\nwords";
293 assert_eq!(0, to_byte_idx(text, 0));
294 assert_eq!(6, to_byte_idx(text, 1));
295 assert_eq!(11, to_byte_idx(text, 2));
296 assert_eq!(17, to_byte_idx(text, 3));
297 }
298
299 #[test]
300 fn to_byte_idx_02() {
301 let text = "\nHere\nare\nsome\nwords\n";
302 assert_eq!(0, to_byte_idx(text, 0));
303 assert_eq!(1, to_byte_idx(text, 1));
304 assert_eq!(6, to_byte_idx(text, 2));
305 assert_eq!(10, to_byte_idx(text, 3));
306 assert_eq!(15, to_byte_idx(text, 4));
307 assert_eq!(21, to_byte_idx(text, 5));
308 }
309
310 #[test]
311 fn to_byte_idx_03() {
312 assert_eq!(0, to_byte_idx(TEXT_LINES, 0));
313 assert_eq!(32, to_byte_idx(TEXT_LINES, 1));
314 assert_eq!(59, to_byte_idx(TEXT_LINES, 2));
315 assert_eq!(88, to_byte_idx(TEXT_LINES, 3));
316
317 assert_eq!(124, to_byte_idx(TEXT_LINES, 4));
319 assert_eq!(124, to_byte_idx(TEXT_LINES, 5));
320 assert_eq!(124, to_byte_idx(TEXT_LINES, 6));
321 }
322
323 #[test]
324 fn line_byte_round_trip() {
325 let text = "\nHere\nare\nsome\nwords\n";
326 assert_eq!(6, to_byte_idx(text, from_byte_idx(text, 6)));
327 assert_eq!(2, from_byte_idx(text, to_byte_idx(text, 2)));
328
329 assert_eq!(0, to_byte_idx(text, from_byte_idx(text, 0)));
330 assert_eq!(0, from_byte_idx(text, to_byte_idx(text, 0)));
331
332 assert_eq!(21, to_byte_idx(text, from_byte_idx(text, 21)));
333 assert_eq!(5, from_byte_idx(text, to_byte_idx(text, 5)));
334 }
335}