use anstyle_parse::state::state_change;
use anstyle_parse::state::Action;
use anstyle_parse::state::State;
#[inline]
pub fn strip_str(data: &str) -> StrippedStr<'_> {
StrippedStr::new(data)
}
#[derive(Default, Clone, Debug, PartialEq, Eq)]
pub struct StrippedStr<'s> {
bytes: &'s [u8],
state: State,
}
impl<'s> StrippedStr<'s> {
#[inline]
fn new(data: &'s str) -> Self {
Self {
bytes: data.as_bytes(),
state: State::Ground,
}
}
#[inline]
#[allow(clippy::inherent_to_string_shadow_display)] pub fn to_string(&self) -> String {
use std::fmt::Write as _;
let mut stripped = String::with_capacity(self.bytes.len());
let _ = write!(&mut stripped, "{self}");
stripped
}
}
impl<'s> std::fmt::Display for StrippedStr<'s> {
#[inline]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let iter = Self {
bytes: self.bytes,
state: self.state,
};
for printable in iter {
printable.fmt(f)?;
}
Ok(())
}
}
impl<'s> Iterator for StrippedStr<'s> {
type Item = &'s str;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
next_str(&mut self.bytes, &mut self.state)
}
}
#[derive(Default, Clone, Debug, PartialEq, Eq)]
pub struct StripStr {
state: State,
}
impl StripStr {
pub fn new() -> Self {
Default::default()
}
pub fn strip_next<'s>(&'s mut self, data: &'s str) -> StripStrIter<'s> {
StripStrIter {
bytes: data.as_bytes(),
state: &mut self.state,
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct StripStrIter<'s> {
bytes: &'s [u8],
state: &'s mut State,
}
impl<'s> Iterator for StripStrIter<'s> {
type Item = &'s str;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
next_str(&mut self.bytes, self.state)
}
}
#[inline]
fn next_str<'s>(bytes: &mut &'s [u8], state: &mut State) -> Option<&'s str> {
let offset = bytes.iter().copied().position(|b| {
let (next_state, action) = state_change(*state, b);
if next_state != State::Anywhere {
*state = next_state;
}
is_printable_bytes(action, b)
});
let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
*bytes = next;
*state = State::Ground;
let offset = bytes.iter().copied().position(|b| {
let (_next_state, action) = state_change(State::Ground, b);
!(is_printable_bytes(action, b) || is_utf8_continuation(b))
});
let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
*bytes = next;
if printable.is_empty() {
None
} else {
let printable = unsafe {
from_utf8_unchecked(
printable,
"`bytes` was validated as UTF-8, the parser preserves UTF-8 continuations",
)
};
Some(printable)
}
}
#[inline]
unsafe fn from_utf8_unchecked<'b>(bytes: &'b [u8], safety_justification: &'static str) -> &'b str {
unsafe {
if cfg!(debug_assertions) {
std::str::from_utf8(bytes).expect(safety_justification)
} else {
std::str::from_utf8_unchecked(bytes)
}
}
}
#[inline]
fn is_utf8_continuation(b: u8) -> bool {
matches!(b, 0x80..=0xbf)
}
#[inline]
pub fn strip_bytes(data: &[u8]) -> StrippedBytes<'_> {
StrippedBytes::new(data)
}
#[derive(Default, Clone, Debug, PartialEq, Eq)]
pub struct StrippedBytes<'s> {
bytes: &'s [u8],
state: State,
utf8parser: Utf8Parser,
}
impl<'s> StrippedBytes<'s> {
#[inline]
pub fn new(bytes: &'s [u8]) -> Self {
Self {
bytes,
state: State::Ground,
utf8parser: Default::default(),
}
}
#[inline]
pub fn extend(&mut self, bytes: &'s [u8]) {
debug_assert!(
self.is_empty(),
"current bytes must be processed to ensure we end at the right state"
);
self.bytes = bytes;
}
#[inline]
pub fn is_empty(&self) -> bool {
self.bytes.is_empty()
}
#[inline]
pub fn into_vec(self) -> Vec<u8> {
let mut stripped = Vec::with_capacity(self.bytes.len());
for printable in self {
stripped.extend(printable);
}
stripped
}
}
impl<'s> Iterator for StrippedBytes<'s> {
type Item = &'s [u8];
#[inline]
fn next(&mut self) -> Option<Self::Item> {
next_bytes(&mut self.bytes, &mut self.state, &mut self.utf8parser)
}
}
#[derive(Default, Clone, Debug, PartialEq, Eq)]
pub struct StripBytes {
state: State,
utf8parser: Utf8Parser,
}
impl StripBytes {
pub fn new() -> Self {
Default::default()
}
pub fn strip_next<'s>(&'s mut self, bytes: &'s [u8]) -> StripBytesIter<'s> {
StripBytesIter {
bytes,
state: &mut self.state,
utf8parser: &mut self.utf8parser,
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct StripBytesIter<'s> {
bytes: &'s [u8],
state: &'s mut State,
utf8parser: &'s mut Utf8Parser,
}
impl<'s> Iterator for StripBytesIter<'s> {
type Item = &'s [u8];
#[inline]
fn next(&mut self) -> Option<Self::Item> {
next_bytes(&mut self.bytes, self.state, self.utf8parser)
}
}
#[inline]
fn next_bytes<'s>(
bytes: &mut &'s [u8],
state: &mut State,
utf8parser: &mut Utf8Parser,
) -> Option<&'s [u8]> {
let offset = bytes.iter().copied().position(|b| {
if *state == State::Utf8 {
true
} else {
let (next_state, action) = state_change(*state, b);
if next_state != State::Anywhere {
*state = next_state;
}
is_printable_bytes(action, b)
}
});
let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
*bytes = next;
let offset = bytes.iter().copied().position(|b| {
if *state == State::Utf8 {
if utf8parser.add(b) {
*state = State::Ground;
}
false
} else {
let (next_state, action) = state_change(State::Ground, b);
if next_state != State::Anywhere {
*state = next_state;
}
if *state == State::Utf8 {
utf8parser.add(b);
false
} else {
!is_printable_bytes(action, b)
}
}
});
let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
*bytes = next;
if printable.is_empty() {
None
} else {
Some(printable)
}
}
#[derive(Default, Clone, Debug, PartialEq, Eq)]
pub(crate) struct Utf8Parser {
utf8_parser: utf8parse::Parser,
}
impl Utf8Parser {
fn add(&mut self, byte: u8) -> bool {
let mut b = false;
let mut receiver = VtUtf8Receiver(&mut b);
self.utf8_parser.advance(&mut receiver, byte);
b
}
}
struct VtUtf8Receiver<'a>(&'a mut bool);
impl<'a> utf8parse::Receiver for VtUtf8Receiver<'a> {
fn codepoint(&mut self, _: char) {
*self.0 = true;
}
fn invalid_sequence(&mut self) {
*self.0 = true;
}
}
#[inline]
fn is_printable_bytes(action: Action, byte: u8) -> bool {
const DEL: u8 = 0x7f;
(action == Action::Print && byte != DEL)
|| action == Action::BeginUtf8
|| (action == Action::Execute && byte.is_ascii_whitespace())
}
#[cfg(test)]
mod test {
use super::*;
use proptest::prelude::*;
fn parser_strip(bytes: &[u8]) -> String {
#[derive(Default)]
struct Strip(String);
impl Strip {
fn with_capacity(capacity: usize) -> Self {
Self(String::with_capacity(capacity))
}
}
impl anstyle_parse::Perform for Strip {
fn print(&mut self, c: char) {
self.0.push(c);
}
fn execute(&mut self, byte: u8) {
if byte.is_ascii_whitespace() {
self.0.push(byte as char);
}
}
}
let mut stripped = Strip::with_capacity(bytes.len());
let mut parser = anstyle_parse::Parser::<anstyle_parse::DefaultCharAccumulator>::new();
for byte in bytes {
parser.advance(&mut stripped, *byte);
}
stripped.0
}
fn strip_char(mut s: &str) -> String {
let mut result = String::new();
let mut state = StripStr::new();
while !s.is_empty() {
let mut indices = s.char_indices();
indices.next(); let offset = indices.next().map(|(i, _)| i).unwrap_or_else(|| s.len());
let (current, remainder) = s.split_at(offset);
for printable in state.strip_next(current) {
result.push_str(printable);
}
s = remainder;
}
result
}
fn strip_byte(s: &[u8]) -> Vec<u8> {
let mut result = Vec::new();
let mut state = StripBytes::default();
for start in 0..s.len() {
let current = &s[start..=start];
for printable in state.strip_next(current) {
result.extend(printable);
}
}
result
}
#[test]
fn test_strip_bytes_multibyte() {
let bytes = [240, 145, 141, 139];
let expected = parser_strip(&bytes);
let actual = String::from_utf8(strip_bytes(&bytes).into_vec()).unwrap();
assert_eq!(expected, actual);
}
#[test]
fn test_strip_byte_multibyte() {
let bytes = [240, 145, 141, 139];
let expected = parser_strip(&bytes);
let actual = String::from_utf8(strip_byte(&bytes).clone()).unwrap();
assert_eq!(expected, actual);
}
#[test]
fn test_strip_str_del() {
let input = std::str::from_utf8(&[0x7f]).unwrap();
let expected = "";
let actual = strip_str(input).to_string();
assert_eq!(expected, actual);
}
#[test]
fn test_strip_byte_del() {
let bytes = [0x7f];
let expected = "";
let actual = String::from_utf8(strip_byte(&bytes).clone()).unwrap();
assert_eq!(expected, actual);
}
#[test]
fn test_strip_str_handles_broken_sequence() {
let s = "ö\x1b😀hello😀goodbye";
let mut it = strip_str(s);
assert_eq!("ö", it.next().unwrap());
assert_eq!("ello😀goodbye", it.next().unwrap());
}
proptest! {
#[test]
#[cfg_attr(miri, ignore)] fn strip_str_no_escapes(s in "\\PC*") {
let expected = parser_strip(s.as_bytes());
let actual = strip_str(&s).to_string();
assert_eq!(expected, actual);
}
#[test]
#[cfg_attr(miri, ignore)] fn strip_char_no_escapes(s in "\\PC*") {
let expected = parser_strip(s.as_bytes());
let actual = strip_char(&s);
assert_eq!(expected, actual);
}
#[test]
#[cfg_attr(miri, ignore)] fn strip_bytes_no_escapes(s in "\\PC*") {
dbg!(&s);
dbg!(s.as_bytes());
let expected = parser_strip(s.as_bytes());
let actual = String::from_utf8(strip_bytes(s.as_bytes()).into_vec()).unwrap();
assert_eq!(expected, actual);
}
#[test]
#[cfg_attr(miri, ignore)] fn strip_byte_no_escapes(s in "\\PC*") {
dbg!(&s);
dbg!(s.as_bytes());
let expected = parser_strip(s.as_bytes());
let actual = String::from_utf8(strip_byte(s.as_bytes()).clone()).unwrap();
assert_eq!(expected, actual);
}
}
}