papergrid/util/
string.rs

1//! This module contains a different functions which are used by the [`IterGrid`].
2//!
3//! You should use it if you want to comply with how [`IterGrid`].
4//!
5//! [`IterGrid`]: crate::grid::iterable::IterGrid
6
7/// Returns string width and count lines of a string.
8/// It's a combination of [`get_text_width`] and [`count_lines`].
9#[cfg(feature = "std")]
10pub fn get_text_dimension(text: &str) -> (usize, usize) {
11    get_lines(text)
12        .map(|line| get_line_width(&line))
13        .fold((0, 0), |(i, acc), width| (i + 1, acc.max(width)))
14}
15
16/// Returns a string width.
17pub fn get_line_width(text: &str) -> usize {
18    #[cfg(not(feature = "ansi"))]
19    {
20        get_string_width(text)
21    }
22
23    #[cfg(feature = "ansi")]
24    {
25        // we need to strip ansi because of terminal links
26        // and they're can't be stripped by ansi_str.
27
28        ansitok::parse_ansi(text)
29            .filter(|e| e.kind() == ansitok::ElementKind::Text)
30            .map(|e| &text[e.start()..e.end()])
31            .map(get_string_width)
32            .sum()
33    }
34}
35
36/// Returns a max string width of a line.
37pub fn get_text_width(text: &str) -> usize {
38    text.lines().map(get_line_width).max().unwrap_or(0)
39}
40
41/// Returns a char width.
42pub fn get_char_width(c: char) -> usize {
43    unicode_width::UnicodeWidthChar::width(c).unwrap_or_default()
44}
45
46/// Returns a string width (accouting all characters).
47pub fn get_string_width(text: &str) -> usize {
48    unicode_width::UnicodeWidthStr::width(text)
49}
50
51/// Calculates a number of lines.
52pub fn count_lines(s: &str) -> usize {
53    if s.is_empty() {
54        return 1;
55    }
56
57    bytecount::count(s.as_bytes(), b'\n') + 1
58}
59
60/// Returns a list of tabs (`\t`) in a string..
61pub fn count_tabs(s: &str) -> usize {
62    bytecount::count(s.as_bytes(), b'\t')
63}
64
65/// Splits the string by lines.
66#[cfg(feature = "std")]
67pub fn get_lines(text: &str) -> Lines<'_> {
68    #[cfg(not(feature = "ansi"))]
69    {
70        // we call `split()` but not `lines()` in order to match colored implementation
71        // specifically how we treat a trailing '\n' character.
72        Lines {
73            inner: text.split('\n'),
74        }
75    }
76
77    #[cfg(feature = "ansi")]
78    {
79        Lines {
80            inner: ansi_str::AnsiStr::ansi_split(text, "\n"),
81        }
82    }
83}
84
85/// Iterator over lines.
86///
87/// In comparison to `std::str::Lines`, it treats trailing '\n' as a new line.
88#[allow(missing_debug_implementations)]
89#[cfg(feature = "std")]
90pub struct Lines<'a> {
91    #[cfg(not(feature = "ansi"))]
92    inner: std::str::Split<'a, char>,
93    #[cfg(feature = "ansi")]
94    inner: ansi_str::AnsiSplit<'a>,
95}
96#[cfg(feature = "std")]
97impl<'a> Iterator for Lines<'a> {
98    type Item = std::borrow::Cow<'a, str>;
99
100    fn next(&mut self) -> Option<Self::Item> {
101        #[cfg(not(feature = "ansi"))]
102        {
103            self.inner.next().map(std::borrow::Cow::Borrowed)
104        }
105
106        #[cfg(feature = "ansi")]
107        {
108            self.inner.next()
109        }
110    }
111}
112
113#[cfg(feature = "std")]
114/// Replaces tabs in a string with a given width of spaces.
115pub fn replace_tab(text: &str, n: usize) -> std::borrow::Cow<'_, str> {
116    if !text.contains('\t') {
117        return std::borrow::Cow::Borrowed(text);
118    }
119
120    // it's a general case which probably must be faster?
121    let replaced = if n == 4 {
122        text.replace('\t', "    ")
123    } else {
124        let mut text = text.to_owned();
125        replace_tab_range(&mut text, n);
126        text
127    };
128
129    std::borrow::Cow::Owned(replaced)
130}
131
132#[cfg(feature = "std")]
133fn replace_tab_range(cell: &mut String, n: usize) -> &str {
134    let mut skip = 0;
135    while let &Some(pos) = &cell[skip..].find('\t') {
136        let pos = skip + pos;
137
138        let is_escaped = pos > 0 && cell.get(pos - 1..pos) == Some("\\");
139        if is_escaped {
140            skip = pos + 1;
141        } else if n == 0 {
142            cell.remove(pos);
143            skip = pos;
144        } else {
145            // I'am not sure which version is faster a loop of 'replace'
146            // or allacation of a string for replacement;
147            cell.replace_range(pos..=pos, &" ".repeat(n));
148            skip = pos + 1;
149        }
150
151        if cell.is_empty() || skip >= cell.len() {
152            break;
153        }
154    }
155
156    cell
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162
163    #[test]
164    fn string_width_emojie_test() {
165        // ...emojis such as “joy”, which normally take up two columns when printed in a terminal
166        // https://github.com/mgeisler/textwrap/pull/276
167        assert_eq!(get_line_width("🎩"), 2);
168        assert_eq!(get_line_width("Rust 💕"), 7);
169        assert_eq!(get_text_width("Go 👍\nC 😎"), 5);
170    }
171
172    #[cfg(feature = "ansi")]
173    #[test]
174    fn colored_string_width_test() {
175        assert_eq!(get_line_width("\u{1b}[34mhello world\u{1b}[0m"), 11);
176        assert_eq!(
177            get_text_width("\u{1b}[34mhello\u{1b}[0m\n\u{1b}[34mworld\u{1b}[0m",),
178            5
179        );
180        assert_eq!(get_line_width("\u{1b}[34m0\u{1b}[0m"), 1);
181        assert_eq!(
182            get_line_width("\u{1b}[34m\u{1b}[34m\u{1b}[34m0\u{1b}[0m"),
183            1
184        );
185    }
186
187    #[test]
188    fn count_lines_test() {
189        assert_eq!(
190            count_lines("\u{1b}[37mnow is the time for all good men\n\u{1b}[0m"),
191            2
192        );
193        assert_eq!(count_lines("now is the time for all good men\n"), 2);
194    }
195
196    #[cfg(feature = "ansi")]
197    #[test]
198    fn string_width_multinline_for_link() {
199        assert_eq!(
200            get_text_width(
201                "\u{1b}]8;;file:///home/nushell/asd.zip\u{1b}\\asd.zip\u{1b}]8;;\u{1b}\\"
202            ),
203            7
204        );
205    }
206
207    #[cfg(feature = "ansi")]
208    #[test]
209    fn string_width_for_link() {
210        assert_eq!(
211            get_line_width(
212                "\u{1b}]8;;file:///home/nushell/asd.zip\u{1b}\\asd.zip\u{1b}]8;;\u{1b}\\"
213            ),
214            7
215        );
216    }
217
218    #[cfg(feature = "std")]
219    #[test]
220    fn string_dimension_test() {
221        assert_eq!(
222            get_text_dimension("\u{1b}[37mnow is the time for all good men\n\u{1b}[0m"),
223            {
224                #[cfg(feature = "ansi")]
225                {
226                    (2, 32)
227                }
228                #[cfg(not(feature = "ansi"))]
229                {
230                    (2, 37)
231                }
232            }
233        );
234        assert_eq!(
235            get_text_dimension("now is the time for all good men\n"),
236            (2, 32)
237        );
238        assert_eq!(get_text_dimension("asd"), (1, 3));
239        assert_eq!(get_text_dimension(""), (1, 0));
240    }
241
242    #[cfg(feature = "std")]
243    #[test]
244    fn replace_tab_test() {
245        assert_eq!(replace_tab("123\t\tabc\t", 3), "123      abc   ");
246
247        assert_eq!(replace_tab("\t", 0), "");
248        assert_eq!(replace_tab("\t", 3), "   ");
249        assert_eq!(replace_tab("123\tabc", 3), "123   abc");
250        assert_eq!(replace_tab("123\tabc\tzxc", 0), "123abczxc");
251
252        assert_eq!(replace_tab("\\t", 0), "\\t");
253        assert_eq!(replace_tab("\\t", 4), "\\t");
254        assert_eq!(replace_tab("123\\tabc", 0), "123\\tabc");
255        assert_eq!(replace_tab("123\\tabc", 4), "123\\tabc");
256    }
257}