#[inline]
pub fn is_break(byte_idx: usize, text: &[u8]) -> bool {
debug_assert!(byte_idx <= text.len());
if byte_idx == 0 || byte_idx == text.len() {
true
} else {
(text[byte_idx] >> 6 != 0b10) && ((text[byte_idx - 1] != 0x0D) | (text[byte_idx] != 0x0A))
}
}
#[inline]
pub fn seam_is_break(left: &[u8], right: &[u8]) -> bool {
debug_assert!(!left.is_empty() && !right.is_empty());
(right[0] >> 6 != 0b10) && ((left[left.len() - 1] != 0x0D) | (right[0] != 0x0A))
}
#[inline]
pub fn prev_break(byte_idx: usize, text: &[u8]) -> usize {
debug_assert!(byte_idx <= text.len());
if byte_idx == 0 {
0
} else {
let mut boundary_idx = byte_idx - 1;
while !is_break(boundary_idx, text) {
boundary_idx -= 1;
}
boundary_idx
}
}
#[inline]
pub fn next_break(byte_idx: usize, text: &[u8]) -> usize {
debug_assert!(byte_idx <= text.len());
if byte_idx == text.len() {
text.len()
} else {
let mut boundary_idx = byte_idx + 1;
while !is_break(boundary_idx, text) {
boundary_idx += 1;
}
boundary_idx
}
}
#[inline]
pub fn nearest_internal_break(byte_idx: usize, text: &[u8]) -> usize {
debug_assert!(byte_idx <= text.len());
let left = if is_break(byte_idx, text) && byte_idx != text.len() {
byte_idx
} else {
prev_break(byte_idx, text)
};
let right = next_break(byte_idx, text);
if left == 0 || (right != text.len() && (byte_idx - left) >= (right - byte_idx)) {
return right;
} else {
return left;
}
}
#[inline]
pub fn find_good_split(byte_idx: usize, text: &[u8], bias_left: bool) -> usize {
debug_assert!(byte_idx <= text.len());
if is_break(byte_idx, text) {
byte_idx
} else {
let prev = prev_break(byte_idx, text);
let next = next_break(byte_idx, text);
if bias_left {
if prev > 0 {
prev
} else {
next
}
} else {
#[allow(clippy::collapsible_if)] if next < text.len() {
next
} else {
prev
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn crlf_segmenter_01() {
let text = b"Hello world!\r\nHow's it going?";
assert!(is_break(0, b""));
assert!(is_break(0, text));
assert!(is_break(12, text));
assert!(!is_break(13, text));
assert!(is_break(14, text));
assert!(is_break(19, text));
}
#[test]
fn crlf_segmenter_02() {
let l = b"Hello world!\r";
let r = b"\nHow's it going?";
assert!(!seam_is_break(l, r));
assert!(!seam_is_break(l, b"\n"));
assert!(!seam_is_break(b"\r", r));
assert!(!seam_is_break(b"\r", b"\n"));
assert!(seam_is_break(r, l));
assert!(seam_is_break(b"\n", b"\r"));
}
#[test]
fn nearest_internal_break_01() {
let text = b"Hello world!";
assert_eq!(1, nearest_internal_break(0, text));
assert_eq!(6, nearest_internal_break(6, text));
assert_eq!(11, nearest_internal_break(12, text));
}
#[test]
fn nearest_internal_break_02() {
let text = b"Hello\r\n world!";
assert_eq!(5, nearest_internal_break(5, text));
assert_eq!(7, nearest_internal_break(6, text));
assert_eq!(7, nearest_internal_break(7, text));
}
#[test]
fn nearest_internal_break_03() {
let text = b"\r\nHello world!\r\n";
assert_eq!(2, nearest_internal_break(0, text));
assert_eq!(2, nearest_internal_break(1, text));
assert_eq!(2, nearest_internal_break(2, text));
assert_eq!(14, nearest_internal_break(14, text));
assert_eq!(14, nearest_internal_break(15, text));
assert_eq!(14, nearest_internal_break(16, text));
}
#[test]
fn nearest_internal_break_04() {
let text = b"\r\n";
assert_eq!(2, nearest_internal_break(0, text));
assert_eq!(2, nearest_internal_break(1, text));
assert_eq!(2, nearest_internal_break(2, text));
}
#[test]
fn is_break_01() {
let text = b"\n\r\n\r\n\r\n\r\n\r\n\r";
assert!(is_break(0, text));
assert!(is_break(12, text));
assert!(is_break(3, text));
assert!(!is_break(6, text));
}
#[test]
fn seam_is_break_01() {
let text1 = b"\r\n\r\n\r\n";
let text2 = b"\r\n\r\n";
assert!(seam_is_break(text1, text2));
}
#[test]
fn seam_is_break_02() {
let text1 = b"\r\n\r\n\r";
let text2 = b"\n\r\n\r\n";
assert!(!seam_is_break(text1, text2));
}
}