papergrid/
util.rs

1//! This module contains a different functions which are used by the [`Grid`].
2//!
3//! You should use it if you want to comply with how [`Grid`] works.
4//!
5//! [`Grid`]: crate::Grid
6
7use std::borrow::Cow;
8
9/// Get string at
10///
11/// BE AWARE: width is expected to be in bytes.
12pub fn spplit_str_at(text: &str, at: usize) -> (Cow<'_, str>, Cow<'_, str>) {
13    #[cfg(feature = "color")]
14    {
15        const REPLACEMENT: char = '\u{FFFD}';
16
17        let stripped = ansi_str::AnsiStr::ansi_strip(text);
18        let (length, count_unknowns, _) = split_at_pos(&stripped, at);
19
20        let mut buf = ansi_str::AnsiStr::ansi_cut(text, ..length);
21
22        if count_unknowns > 0 {
23            let mut b = buf.into_owned();
24            b.extend(std::iter::repeat(REPLACEMENT).take(count_unknowns));
25            buf = Cow::Owned(b);
26        }
27
28        let rest = ansi_str::AnsiStr::ansi_cut(text, length..);
29
30        (buf, rest)
31    }
32    #[cfg(not(feature = "color"))]
33    {
34        const REPLACEMENT: char = '\u{FFFD}';
35
36        let (length, count_unknowns, _) = split_at_pos(text, at);
37        let buf = &text[..length];
38        let rest = &text[length..];
39        if count_unknowns == 0 {
40            return (Cow::Borrowed(buf), Cow::Borrowed(rest));
41        }
42
43        let mut buf = buf.to_owned();
44        buf.extend(std::iter::repeat(REPLACEMENT).take(count_unknowns));
45
46        return (Cow::Owned(buf), Cow::Borrowed(rest));
47    }
48}
49
50/// The function cuts the string to a specific width.
51///
52/// BE AWARE: width is expected to be in bytes.
53pub fn cut_str(s: &str, width: usize) -> Cow<'_, str> {
54    #[cfg(feature = "color")]
55    {
56        const REPLACEMENT: char = '\u{FFFD}';
57
58        let stripped = ansi_str::AnsiStr::ansi_strip(s);
59        let (length, count_unknowns, _) = split_at_pos(&stripped, width);
60
61        let mut buf = ansi_str::AnsiStr::ansi_cut(s, ..length);
62        if count_unknowns > 0 {
63            let mut b = buf.into_owned();
64            b.extend(std::iter::repeat(REPLACEMENT).take(count_unknowns));
65            buf = Cow::Owned(b);
66        }
67
68        buf
69    }
70    #[cfg(not(feature = "color"))]
71    {
72        cut_str_basic(s, width)
73    }
74}
75
76/// The function cuts the string to a specific width.
77///
78/// BE AWARE: width is expected to be in bytes.
79pub fn cut_str_basic(s: &str, width: usize) -> Cow<'_, str> {
80    const REPLACEMENT: char = '\u{FFFD}';
81
82    let (length, count_unknowns, _) = split_at_pos(s, width);
83    let buf = &s[..length];
84    if count_unknowns == 0 {
85        return Cow::Borrowed(buf);
86    }
87
88    let mut buf = buf.to_owned();
89    buf.extend(std::iter::repeat(REPLACEMENT).take(count_unknowns));
90
91    Cow::Owned(buf)
92}
93
94/// The function splits a string in the position and
95/// returns a exact number of bytes before the position and in case of a split in an unicode grapheme
96/// a width of a character which was tried to be splited in.
97///
98/// BE AWARE: pos is expected to be in bytes.
99pub fn split_at_pos(s: &str, pos: usize) -> (usize, usize, usize) {
100    let mut length = 0;
101    let mut i = 0;
102    for c in s.chars() {
103        if i == pos {
104            break;
105        };
106
107        let c_width = unicode_width::UnicodeWidthChar::width(c).unwrap_or(0);
108
109        // We cut the chars which takes more then 1 symbol to display,
110        // in order to archive the necessary width.
111        if i + c_width > pos {
112            let count = pos - i;
113            return (length, count, c.len_utf8());
114        }
115
116        i += c_width;
117        length += c.len_utf8();
118    }
119
120    (length, 0, 0)
121}
122
123/// Returns a string width.
124#[cfg(not(feature = "color"))]
125pub fn string_width(text: &str) -> usize {
126    unicode_width::UnicodeWidthStr::width(text)
127}
128
129/// Returns a string width.
130#[cfg(feature = "color")]
131pub fn string_width(text: &str) -> usize {
132    // we need to strip ansi because of terminal links
133    // and they're can't be stripped by ansi_str.
134
135    ansitok::parse_ansi(text)
136        .filter(|e| e.kind() == ansitok::ElementKind::Text)
137        .map(|e| &text[e.start()..e.end()])
138        .map(unicode_width::UnicodeWidthStr::width)
139        .sum()
140}
141
142/// Returns a max string width of a line.
143#[cfg(not(feature = "color"))]
144pub fn string_width_multiline(text: &str) -> usize {
145    text.lines()
146        .map(unicode_width::UnicodeWidthStr::width)
147        .max()
148        .unwrap_or(0)
149}
150
151/// Returns a max string width of a line.
152#[cfg(feature = "color")]
153pub fn string_width_multiline(text: &str) -> usize {
154    text.lines().map(string_width).max().unwrap_or(0)
155}
156
157/// Calculates a number of lines.
158pub fn count_lines(s: &str) -> usize {
159    if s.is_empty() {
160        return 1;
161    }
162
163    bytecount::count(s.as_bytes(), b'\n') + 1
164}
165
166/// Returns a string width with correction to tab width.
167pub fn string_width_tab(text: &str, tab_width: usize) -> usize {
168    let width = string_width(text);
169    let count_tabs = count_tabs(text);
170
171    width + count_tabs * tab_width
172}
173
174/// Returns a max per line string width with correction to tab width.
175pub fn string_width_multiline_tab(text: &str, tab_width: usize) -> usize {
176    text.lines()
177        .map(|line| string_width_tab(line, tab_width))
178        .max()
179        .unwrap_or(0)
180}
181
182/// Trims a string.
183#[cfg(not(feature = "color"))]
184pub fn string_trim(text: &str) -> Cow<'_, str> {
185    text.trim().into()
186}
187
188/// Trims a string.
189#[cfg(feature = "color")]
190pub fn string_trim(text: &str) -> Cow<'_, str> {
191    ansi_str::AnsiStr::ansi_trim(text)
192}
193
194/// Returns a list of tabs (`\t`) in a string..
195pub fn count_tabs(s: &str) -> usize {
196    bytecount::count(s.as_bytes(), b'\t')
197}
198
199/// Splits the string by lines.
200#[cfg(not(feature = "color"))]
201pub fn get_lines(text: &str) -> impl Iterator<Item = Cow<'_, str>> {
202    // we call split but not `lines()` in order to match colored implementation
203    text.split('\n').map(Cow::Borrowed)
204}
205
206/// Splits the string by lines.
207#[cfg(feature = "color")]
208pub fn get_lines(text: &str) -> impl Iterator<Item = Cow<'_, str>> {
209    ansi_str::AnsiStr::ansi_split(text, "\n")
210}
211
212/// Replaces tabs in a string with a given width of spaces.
213pub fn replace_tab(text: &str, n: usize) -> String {
214    // it's a general case which probably must be faster?
215    if n == 4 {
216        text.replace('\t', "    ")
217    } else {
218        let mut text = text.to_owned();
219        replace_tab_range(&mut text, n);
220        text
221    }
222}
223
224/// Strip OSC codes from `s`. If `s` is a single OSC8 hyperlink, with no other text, then return
225/// (s_with_all_hyperlinks_removed, Some(url)). If `s` does not meet this description, then return
226/// (s_with_all_hyperlinks_removed, None). Any ANSI color sequences in `s` will be retained. See
227/// <https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda>
228///
229/// The function is based on Dan Davison <https://github.com/dandavison> delta <https://github.com/dandavison/delta> ansi library.
230#[cfg(feature = "color")]
231pub fn strip_osc(text: &str) -> (String, Option<String>) {
232    #[derive(Debug)]
233    enum ExtractOsc8HyperlinkState {
234        ExpectOsc8Url,
235        ExpectFirstText,
236        ExpectMoreTextOrTerminator,
237        SeenOneHyperlink,
238        WillNotReturnUrl,
239    }
240
241    use ExtractOsc8HyperlinkState::*;
242
243    let mut url = None;
244    let mut state = ExpectOsc8Url;
245    let mut buf = String::with_capacity(text.len());
246
247    for el in ansitok::parse_ansi(text) {
248        match el.kind() {
249            ansitok::ElementKind::Osc => match state {
250                ExpectOsc8Url => {
251                    url = Some(&text[el.start()..el.end()]);
252                    state = ExpectFirstText;
253                }
254                ExpectMoreTextOrTerminator => state = SeenOneHyperlink,
255                _ => state = WillNotReturnUrl,
256            },
257            ansitok::ElementKind::Sgr => buf.push_str(&text[el.start()..el.end()]),
258            ansitok::ElementKind::Csi => buf.push_str(&text[el.start()..el.end()]),
259            ansitok::ElementKind::Esc => {}
260            ansitok::ElementKind::Text => {
261                buf.push_str(&text[el.start()..el.end()]);
262                match state {
263                    ExpectFirstText => state = ExpectMoreTextOrTerminator,
264                    ExpectMoreTextOrTerminator => {}
265                    _ => state = WillNotReturnUrl,
266                }
267            }
268        }
269    }
270
271    match state {
272        WillNotReturnUrl => (buf, None),
273        _ => {
274            let url = url.and_then(|s| {
275                s.strip_prefix("\x1b]8;;")
276                    .and_then(|s| s.strip_suffix('\x1b'))
277            });
278            if let Some(url) = url {
279                (buf, Some(url.to_string()))
280            } else {
281                (buf, None)
282            }
283        }
284    }
285}
286
287fn replace_tab_range(cell: &mut String, n: usize) -> &str {
288    let mut skip = 0;
289    while let &Some(pos) = &cell[skip..].find('\t') {
290        let pos = skip + pos;
291
292        let is_escaped = pos > 0 && cell.get(pos - 1..pos) == Some("\\");
293        if is_escaped {
294            skip = pos + 1;
295        } else if n == 0 {
296            cell.remove(pos);
297            skip = pos;
298        } else {
299            // I'am not sure which version is faster a loop of 'replace'
300            // or allacation of a string for replacement;
301            cell.replace_range(pos..=pos, &" ".repeat(n));
302            skip = pos + 1;
303        }
304
305        if cell.is_empty() || skip >= cell.len() {
306            break;
307        }
308    }
309    cell
310}
311
312#[cfg(test)]
313mod tests {
314    use super::*;
315
316    #[test]
317    fn replace_tab_test() {
318        assert_eq!(replace_tab("123\t\tabc\t", 3), "123      abc   ");
319
320        assert_eq!(replace_tab("\t", 0), "");
321        assert_eq!(replace_tab("\t", 3), "   ");
322        assert_eq!(replace_tab("123\tabc", 3), "123   abc");
323        assert_eq!(replace_tab("123\tabc\tzxc", 0), "123abczxc");
324
325        assert_eq!(replace_tab("\\t", 0), "\\t");
326        assert_eq!(replace_tab("\\t", 4), "\\t");
327        assert_eq!(replace_tab("123\\tabc", 0), "123\\tabc");
328        assert_eq!(replace_tab("123\\tabc", 4), "123\\tabc");
329    }
330
331    #[test]
332    fn string_width_emojie_test() {
333        // ...emojis such as “joy”, which normally take up two columns when printed in a terminal
334        // https://github.com/mgeisler/textwrap/pull/276
335        assert_eq!(string_width("🎩"), 2);
336        assert_eq!(string_width("Rust 💕"), 7);
337        assert_eq!(string_width_multiline("Go 👍\nC 😎"), 5);
338    }
339
340    #[cfg(feature = "color")]
341    #[test]
342    fn colored_string_width_test() {
343        use owo_colors::OwoColorize;
344        assert_eq!(string_width(&"hello world".red().to_string()), 11);
345        assert_eq!(
346            string_width_multiline(&"hello\nworld".blue().to_string()),
347            5
348        );
349        assert_eq!(string_width("\u{1b}[34m0\u{1b}[0m"), 1);
350        assert_eq!(string_width(&"0".red().to_string()), 1);
351    }
352
353    #[test]
354    fn strip_test() {
355        assert_eq!(cut_str("123456", 0), "");
356        assert_eq!(cut_str("123456", 3), "123");
357        assert_eq!(cut_str("123456", 10), "123456");
358
359        assert_eq!(cut_str("a week ago", 4), "a we");
360
361        assert_eq!(cut_str("😳😳😳😳😳", 0), "");
362        assert_eq!(cut_str("😳😳😳😳😳", 3), "😳�");
363        assert_eq!(cut_str("😳😳😳😳😳", 4), "😳😳");
364        assert_eq!(cut_str("😳😳😳😳😳", 20), "😳😳😳😳😳");
365
366        assert_eq!(cut_str("🏳️🏳️", 0), "");
367        assert_eq!(cut_str("🏳️🏳️", 1), "🏳");
368        assert_eq!(cut_str("🏳️🏳️", 2), "🏳\u{fe0f}🏳");
369        assert_eq!(string_width("🏳️🏳️"), string_width("🏳\u{fe0f}🏳"));
370
371        assert_eq!(cut_str("🎓", 1), "�");
372        assert_eq!(cut_str("🎓", 2), "🎓");
373
374        assert_eq!(cut_str("🥿", 1), "�");
375        assert_eq!(cut_str("🥿", 2), "🥿");
376
377        assert_eq!(cut_str("🩰", 1), "�");
378        assert_eq!(cut_str("🩰", 2), "🩰");
379
380        assert_eq!(cut_str("👍🏿", 1), "�");
381        assert_eq!(cut_str("👍🏿", 2), "👍");
382        assert_eq!(cut_str("👍🏿", 3), "👍�");
383        assert_eq!(cut_str("👍🏿", 4), "👍🏿");
384
385        assert_eq!(cut_str("🇻🇬", 1), "🇻");
386        assert_eq!(cut_str("🇻🇬", 2), "🇻🇬");
387        assert_eq!(cut_str("🇻🇬", 3), "🇻🇬");
388        assert_eq!(cut_str("🇻🇬", 4), "🇻🇬");
389    }
390
391    #[cfg(feature = "color")]
392    #[test]
393    fn strip_color_test() {
394        use owo_colors::OwoColorize;
395
396        let numbers = "123456".red().on_bright_black().to_string();
397
398        assert_eq!(cut_str(&numbers, 0), "\u{1b}[31;100m\u{1b}[39m\u{1b}[49m");
399        assert_eq!(
400            cut_str(&numbers, 3),
401            "\u{1b}[31;100m123\u{1b}[39m\u{1b}[49m"
402        );
403        assert_eq!(cut_str(&numbers, 10), "\u{1b}[31;100m123456\u{1b}[0m");
404
405        let emojies = "😳😳😳😳😳".red().on_bright_black().to_string();
406
407        assert_eq!(cut_str(&emojies, 0), "\u{1b}[31;100m\u{1b}[39m\u{1b}[49m");
408        assert_eq!(
409            cut_str(&emojies, 3),
410            "\u{1b}[31;100m😳\u{1b}[39m\u{1b}[49m�"
411        );
412        assert_eq!(
413            cut_str(&emojies, 4),
414            "\u{1b}[31;100m😳😳\u{1b}[39m\u{1b}[49m"
415        );
416        assert_eq!(cut_str(&emojies, 20), "\u{1b}[31;100m😳😳😳😳😳\u{1b}[0m");
417
418        let emojies = "🏳️🏳️".red().on_bright_black().to_string();
419
420        assert_eq!(cut_str(&emojies, 0), "\u{1b}[31;100m\u{1b}[39m\u{1b}[49m");
421        assert_eq!(cut_str(&emojies, 1), "\u{1b}[31;100m🏳\u{1b}[39m\u{1b}[49m");
422        assert_eq!(
423            cut_str(&emojies, 2),
424            "\u{1b}[31;100m🏳\u{fe0f}🏳\u{1b}[39m\u{1b}[49m"
425        );
426        assert_eq!(
427            string_width(&emojies),
428            string_width("\u{1b}[31;100m🏳\u{fe0f}🏳\u{1b}[39m\u{1b}[49m")
429        );
430    }
431
432    #[test]
433    fn count_lines_test() {
434        assert_eq!(
435            count_lines("\u{1b}[37mnow is the time for all good men\n\u{1b}[0m"),
436            2
437        );
438        assert_eq!(count_lines("now is the time for all good men\n"), 2);
439    }
440
441    #[cfg(feature = "color")]
442    #[test]
443    fn string_width_multinline_for_link() {
444        assert_eq!(
445            string_width_multiline(
446                "\u{1b}]8;;file:///home/nushell/asd.zip\u{1b}\\asd.zip\u{1b}]8;;\u{1b}\\"
447            ),
448            7
449        );
450    }
451
452    #[cfg(feature = "color")]
453    #[test]
454    fn string_width_for_link() {
455        assert_eq!(
456            string_width("\u{1b}]8;;file:///home/nushell/asd.zip\u{1b}\\asd.zip\u{1b}]8;;\u{1b}\\"),
457            7
458        );
459    }
460}