1use crate::byte_chunk::{ByteChunk, Chunk};
10
11#[inline]
15pub fn count_breaks(text: &str) -> usize {
16 count_breaks_impl::<Chunk>(text.as_bytes())
17}
18
19#[inline]
30pub fn from_byte_idx(text: &str, byte_idx: usize) -> usize {
31 let i = byte_idx.min(text.len());
32 count_breaks_impl::<Chunk>(&text.as_bytes()[..i])
33}
34
35#[inline]
45pub fn to_byte_idx(text: &str, line_idx: usize) -> usize {
46 to_byte_idx_impl::<Chunk>(text.as_bytes(), line_idx)
47}
48
49#[inline(always)]
52fn to_byte_idx_impl<T: ByteChunk>(text: &[u8], line_idx: usize) -> usize {
53 let (start, middle, _) = unsafe { text.align_to::<T>() };
58
59 let mut byte_count = 0;
60 let mut lf_count = 0;
61
62 for byte in start.iter() {
64 if lf_count == line_idx {
65 break;
66 }
67 lf_count += (*byte == 0x0A) as usize;
68 byte_count += 1;
69 }
70
71 let mut chunks = middle;
73 let mut max_round_len = (line_idx - lf_count) / T::MAX_ACC;
74 while max_round_len > 0 && !chunks.is_empty() {
75 let round_len = T::MAX_ACC.min(max_round_len).min(chunks.len());
79 max_round_len -= round_len;
80 let round = &chunks[..round_len];
81 chunks = &chunks[round_len..];
82
83 let mut acc = T::zero();
85 for chunk in round.iter() {
86 acc = acc.add(chunk.cmp_eq_byte(0x0A));
87 }
88 lf_count += acc.sum_bytes();
89 byte_count += T::SIZE * round_len;
90 }
91
92 for chunk in chunks.iter() {
94 let new_lf_count = lf_count + chunk.cmp_eq_byte(0x0A).sum_bytes();
95 if new_lf_count >= line_idx {
96 break;
97 }
98 lf_count = new_lf_count;
99 byte_count += T::SIZE;
100 }
101
102 let end = &text[byte_count..];
104 for byte in end.iter() {
105 if lf_count == line_idx {
106 break;
107 }
108 lf_count += (*byte == 0x0A) as usize;
109 byte_count += 1;
110 }
111
112 byte_count
114}
115
116#[inline(always)]
121fn count_breaks_impl<T: ByteChunk>(text: &[u8]) -> usize {
122 if text.len() < T::SIZE {
123 text.iter().map(|byte| (*byte == 0x0A) as usize).sum()
126 } else {
127 let (start, middle, end) = unsafe { text.align_to::<T>() };
129
130 let mut count = 0;
131
132 for byte in start.iter() {
134 count += (*byte == 0x0A) as usize;
135 }
136
137 for chunks in middle.chunks(T::MAX_ACC) {
139 let mut acc = T::zero();
140 for chunk in chunks.iter() {
141 acc = acc.add(chunk.cmp_eq_byte(0x0A));
142 }
143 count += acc.sum_bytes();
144 }
145
146 for byte in end.iter() {
148 count += (*byte == 0x0A) as usize;
149 }
150
151 count
152 }
153}
154
155#[cfg(test)]
158mod tests {
159 use super::*;
160
161 const TEXT_LINES: &str = "Hello there! How're you doing?\nIt's \
163 a fine day, isn't it?\nAren't you glad \
164 we're alive?\nこんにちは、みんなさん!";
165
166 #[test]
167 fn count_breaks_01() {
168 let text = "\nHello\u{000D}\nせ\u{000B}か\u{000C}い\u{0085}. \
169 There\nis something.\u{2029}";
170 assert_eq!(45, text.len());
171 assert_eq!(3, count_breaks(text));
172 }
173
174 #[test]
175 fn from_byte_idx_01() {
176 let text = "Here\nare\nsome\nwords";
177 assert_eq!(0, from_byte_idx(text, 0));
178 assert_eq!(0, from_byte_idx(text, 4));
179 assert_eq!(1, from_byte_idx(text, 5));
180 assert_eq!(1, from_byte_idx(text, 8));
181 assert_eq!(2, from_byte_idx(text, 9));
182 assert_eq!(2, from_byte_idx(text, 13));
183 assert_eq!(3, from_byte_idx(text, 14));
184 assert_eq!(3, from_byte_idx(text, 19));
185 }
186
187 #[test]
188 fn from_byte_idx_02() {
189 let text = "\nHere\nare\nsome\nwords\n";
190 assert_eq!(0, from_byte_idx(text, 0));
191 assert_eq!(1, from_byte_idx(text, 1));
192 assert_eq!(1, from_byte_idx(text, 5));
193 assert_eq!(2, from_byte_idx(text, 6));
194 assert_eq!(2, from_byte_idx(text, 9));
195 assert_eq!(3, from_byte_idx(text, 10));
196 assert_eq!(3, from_byte_idx(text, 14));
197 assert_eq!(4, from_byte_idx(text, 15));
198 assert_eq!(4, from_byte_idx(text, 20));
199 assert_eq!(5, from_byte_idx(text, 21));
200 }
201
202 #[test]
203 fn from_byte_idx_03() {
204 let text = "Here\r\nare\r\nsome\r\nwords";
205 assert_eq!(0, from_byte_idx(text, 0));
206 assert_eq!(0, from_byte_idx(text, 4));
207 assert_eq!(0, from_byte_idx(text, 5));
208 assert_eq!(1, from_byte_idx(text, 6));
209 assert_eq!(1, from_byte_idx(text, 9));
210 assert_eq!(1, from_byte_idx(text, 10));
211 assert_eq!(2, from_byte_idx(text, 11));
212 assert_eq!(2, from_byte_idx(text, 15));
213 assert_eq!(2, from_byte_idx(text, 16));
214 assert_eq!(3, from_byte_idx(text, 17));
215 }
216
217 #[test]
218 fn from_byte_idx_04() {
219 for i in 0..32 {
221 assert_eq!(0, from_byte_idx(TEXT_LINES, i));
222 }
223
224 for i in 32..59 {
226 assert_eq!(1, from_byte_idx(TEXT_LINES, i));
227 }
228
229 for i in 59..88 {
231 assert_eq!(2, from_byte_idx(TEXT_LINES, i));
232 }
233
234 for i in 88..125 {
236 assert_eq!(3, from_byte_idx(TEXT_LINES, i));
237 }
238
239 for i in 125..130 {
241 assert_eq!(3, from_byte_idx(TEXT_LINES, i));
242 }
243 }
244
245 #[test]
246 fn to_byte_idx_01() {
247 let text = "Here\r\nare\r\nsome\r\nwords";
248 assert_eq!(0, to_byte_idx(text, 0));
249 assert_eq!(6, to_byte_idx(text, 1));
250 assert_eq!(11, to_byte_idx(text, 2));
251 assert_eq!(17, to_byte_idx(text, 3));
252 }
253
254 #[test]
255 fn to_byte_idx_02() {
256 let text = "\nHere\nare\nsome\nwords\n";
257 assert_eq!(0, to_byte_idx(text, 0));
258 assert_eq!(1, to_byte_idx(text, 1));
259 assert_eq!(6, to_byte_idx(text, 2));
260 assert_eq!(10, to_byte_idx(text, 3));
261 assert_eq!(15, to_byte_idx(text, 4));
262 assert_eq!(21, to_byte_idx(text, 5));
263 }
264
265 #[test]
266 fn to_byte_idx_03() {
267 assert_eq!(0, to_byte_idx(TEXT_LINES, 0));
268 assert_eq!(32, to_byte_idx(TEXT_LINES, 1));
269 assert_eq!(59, to_byte_idx(TEXT_LINES, 2));
270 assert_eq!(88, to_byte_idx(TEXT_LINES, 3));
271
272 assert_eq!(124, to_byte_idx(TEXT_LINES, 4));
274 assert_eq!(124, to_byte_idx(TEXT_LINES, 5));
275 assert_eq!(124, to_byte_idx(TEXT_LINES, 6));
276 }
277
278 #[test]
279 fn line_byte_round_trip() {
280 let text = "\nHere\nare\nsome\nwords\n";
281 assert_eq!(6, to_byte_idx(text, from_byte_idx(text, 6)));
282 assert_eq!(2, from_byte_idx(text, to_byte_idx(text, 2)));
283
284 assert_eq!(0, to_byte_idx(text, from_byte_idx(text, 0)));
285 assert_eq!(0, from_byte_idx(text, to_byte_idx(text, 0)));
286
287 assert_eq!(21, to_byte_idx(text, from_byte_idx(text, 21)));
288 assert_eq!(5, from_byte_idx(text, to_byte_idx(text, 5)));
289 }
290}