convert_case/
boundary.rs

1use unicode_segmentation::UnicodeSegmentation;
2
3use alloc::vec::Vec;
4
5fn grapheme_is_digit(c: &&str) -> bool {
6    c.chars().all(|c| c.is_ascii_digit())
7}
8
9fn grapheme_is_uppercase(c: &&str) -> bool {
10    c.to_uppercase() != c.to_lowercase() && *c == c.to_uppercase()
11}
12
13fn grapheme_is_lowercase(c: &&str) -> bool {
14    c.to_uppercase() != c.to_lowercase() && *c == c.to_lowercase()
15}
16
17/// Conditions for splitting an identifier into words.
18///
19/// Some boundaries, [`HYPHEN`](Boundary::HYPHEN), [`UNDERSCORE`](Boundary::UNDERSCORE), and [`SPACE`](Boundary::SPACE),
20/// consume the character they split on, whereas the other boundaries do not.
21///
22/// `Boundary` includes methods that return useful groups of boundaries.  It also
23/// contains the [`defaults_from`](Boundary::defaults_from) method which will generate a subset
24/// of default boundaries based on the boundaries present in a string.
25///
26/// You can also create custom delimiter boundaries using the [`from_delim`](Boundary::from_delim)
27/// method or directly instantiate `Boundary` for complex boundary conditions.
28/// ```
29/// use convert_case::{Boundary, Case, Casing, Converter};
30///
31/// assert_eq!(
32///     "transformations_in_3d",
33///     "TransformationsIn3D"
34///         .from_case(Case::Camel)
35///         .without_boundaries(&Boundary::digit_letter())
36///         .to_case(Case::Snake)
37/// );
38///
39/// let conv = Converter::new()
40///     .set_boundaries(&Boundary::defaults_from("aA "))
41///     .to_case(Case::Title);
42/// assert_eq!("7empest By Tool", conv.convert("7empest byTool"));
43/// ```
44#[derive(Debug, Eq, Hash, Clone, Copy)]
45pub struct Boundary {
46    /// A unique name used for comparison.
47    pub name: &'static str,
48    /// A function that determines if this boundary is present at the start
49    /// of the string.  Second argument is the `arg` field.
50    pub condition: fn(&[&str], Option<&'static str>) -> bool,
51    /// An optional string passed to `condition` at runtime.  Used
52    /// internally for [`Boundary::from_delim`] method.
53    pub arg: Option<&'static str>,
54    /// Where the beginning of the boundary is.
55    pub start: usize,
56    /// The length of the boundary.  This is the number of graphemes that
57    /// are removed when splitting.
58    pub len: usize,
59}
60
61impl PartialEq for Boundary {
62    fn eq(&self, other: &Self) -> bool {
63        self.name == other.name
64    }
65}
66
67impl Boundary {
68    /// Splits on `_`, consuming the character on segmentation.
69    /// ```
70    /// # use convert_case::Boundary;
71    /// assert_eq!(
72    ///     vec![Boundary::UNDERSCORE],
73    ///     Boundary::defaults_from("_")
74    /// );
75    /// ```
76    pub const UNDERSCORE: Boundary = Boundary {
77        name: "Underscore",
78        condition: |s, _| s.get(0) == Some(&"_"),
79        arg: None,
80        start: 0,
81        len: 1,
82    };
83
84    /// Splits on `-`, consuming the character on segmentation.
85    /// ```
86    /// # use convert_case::Boundary;
87    /// assert_eq!(
88    ///     vec![Boundary::HYPHEN],
89    ///     Boundary::defaults_from("-")
90    /// );
91    /// ```
92    pub const HYPHEN: Boundary = Boundary {
93        name: "Hyphen",
94        condition: |s, _| s.get(0) == Some(&"-"),
95        arg: None,
96        start: 0,
97        len: 1,
98    };
99
100    /// Splits on space, consuming the character on segmentation.
101    /// ```
102    /// # use convert_case::Boundary;
103    /// assert_eq!(
104    ///     vec![Boundary::SPACE],
105    ///     Boundary::defaults_from(" ")
106    /// );
107    /// ```
108    pub const SPACE: Boundary = Boundary {
109        name: "Space",
110        condition: |s, _| s.get(0) == Some(&" "),
111        arg: None,
112        start: 0,
113        len: 1,
114    };
115
116    /// Splits where a lowercase letter is followed by an uppercase letter.
117    /// ```
118    /// # use convert_case::Boundary;
119    /// assert_eq!(
120    ///     vec![Boundary::LOWER_UPPER],
121    ///     Boundary::defaults_from("aA")
122    /// );
123    /// ```
124    pub const LOWER_UPPER: Boundary = Boundary {
125        name: "LowerUpper",
126        condition: |s, _| {
127            s.get(0).map(grapheme_is_lowercase) == Some(true)
128                && s.get(1).map(grapheme_is_uppercase) == Some(true)
129        },
130        arg: None,
131        start: 1,
132        len: 0,
133    };
134    /// Splits where an uppercase letter is followed by a lowercase letter.  This is seldom used,
135    /// and is **not** included in the [defaults](Boundary::defaults).
136    /// ```
137    /// # use convert_case::Boundary;
138    /// assert!(
139    ///     Boundary::defaults_from("Aa").len() == 0
140    /// );
141    /// ```
142    pub const UPPER_LOWER: Boundary = Boundary {
143        name: "UpperLower",
144        condition: |s, _| {
145            s.get(0).map(grapheme_is_uppercase) == Some(true)
146                && s.get(1).map(grapheme_is_lowercase) == Some(true)
147        },
148        arg: None,
149        start: 1,
150        len: 0,
151    };
152
153    /// Acronyms are identified by two uppercase letters followed by a lowercase letter.
154    /// The word boundary is between the two uppercase letters.  For example, "HTTPRequest"
155    /// would have an acronym boundary identified at "PRe" and split into "HTTP" and "Request".
156    /// ```
157    /// # use convert_case::Boundary;
158    /// assert_eq!(
159    ///     vec![Boundary::ACRONYM],
160    ///     Boundary::defaults_from("AAa")
161    /// );
162    /// ```
163    pub const ACRONYM: Boundary = Boundary {
164        name: "Acronym",
165        condition: |s, _| {
166            s.get(0).map(grapheme_is_uppercase) == Some(true)
167                && s.get(1).map(grapheme_is_uppercase) == Some(true)
168                && s.get(2).map(grapheme_is_lowercase) == Some(true)
169        },
170        arg: None,
171        start: 1,
172        len: 0,
173    };
174
175    /// Splits where a lowercase letter is followed by a digit.
176    /// ```
177    /// # use convert_case::Boundary;
178    /// assert_eq!(
179    ///     vec![Boundary::LOWER_DIGIT],
180    ///     Boundary::defaults_from("a1")
181    /// );
182    /// ```
183    pub const LOWER_DIGIT: Boundary = Boundary {
184        name: "LowerDigit",
185        condition: |s, _| {
186            s.get(0).map(grapheme_is_lowercase) == Some(true)
187                && s.get(1).map(grapheme_is_digit) == Some(true)
188        },
189        arg: None,
190        start: 1,
191        len: 0,
192    };
193
194    /// Splits where an uppercase letter is followed by a digit.
195    /// ```
196    /// # use convert_case::Boundary;
197    /// assert_eq!(
198    ///     vec![Boundary::UPPER_DIGIT],
199    ///     Boundary::defaults_from("A1")
200    /// );
201    /// ```
202    pub const UPPER_DIGIT: Boundary = Boundary {
203        name: "UpperDigit",
204        condition: |s, _| {
205            s.get(0).map(grapheme_is_uppercase) == Some(true)
206                && s.get(1).map(grapheme_is_digit) == Some(true)
207        },
208        arg: None,
209        start: 1,
210        len: 0,
211    };
212
213    /// Splits where digit is followed by a lowercase letter.
214    /// ```
215    /// # use convert_case::Boundary;
216    /// assert_eq!(
217    ///     vec![Boundary::DIGIT_LOWER],
218    ///     Boundary::defaults_from("1a")
219    /// );
220    /// ```
221    pub const DIGIT_LOWER: Boundary = Boundary {
222        name: "DigitLower",
223        condition: |s, _| {
224            s.get(0).map(grapheme_is_digit) == Some(true)
225                && s.get(1).map(grapheme_is_lowercase) == Some(true)
226        },
227        arg: None,
228        start: 1,
229        len: 0,
230    };
231
232    /// Splits where digit is followed by an uppercase letter.
233    /// ```
234    /// # use convert_case::Boundary;
235    /// assert_eq!(
236    ///     vec![Boundary::DIGIT_UPPER],
237    ///     Boundary::defaults_from("1A")
238    /// );
239    /// ```
240    pub const DIGIT_UPPER: Boundary = Boundary {
241        name: "DigitUpper",
242        condition: |s, _| {
243            s.get(0).map(grapheme_is_digit) == Some(true)
244                && s.get(1).map(grapheme_is_uppercase) == Some(true)
245        },
246        arg: None,
247        start: 1,
248        len: 0,
249    };
250
251    /// Create a new boundary based on a delimiter.
252    /// ```
253    /// # use convert_case::{Case, Converter, Boundary};
254    /// let conv = Converter::new()
255    ///     .set_boundaries(&[Boundary::from_delim("::")])
256    ///     .to_case(Case::Camel);
257    /// assert_eq!(
258    ///     "myVarName",
259    ///     conv.convert("my::var::name")
260    /// )
261    /// ```
262    pub const fn from_delim(delim: &'static str) -> Boundary {
263        Boundary {
264            name: delim,
265            arg: Some(delim),
266            condition: |s, arg| s.join("").starts_with(arg.unwrap()),
267            start: 0,
268            len: delim.len(),
269        }
270    }
271
272    /// The default list of boundaries used when `Casing::to_case` is called directly
273    /// and in a `Converter` generated from `Converter::new()`.
274    /// ```
275    /// # use convert_case::Boundary;
276    /// assert_eq!(
277    ///     [
278    ///         Boundary::UNDERSCORE,
279    ///         Boundary::HYPHEN,
280    ///         Boundary::SPACE,
281    ///         Boundary::LOWER_UPPER,
282    ///         Boundary::LOWER_DIGIT,
283    ///         Boundary::UPPER_DIGIT,
284    ///         Boundary::DIGIT_LOWER,
285    ///         Boundary::DIGIT_UPPER,
286    ///         Boundary::ACRONYM,
287    ///     ],
288    ///     Boundary::defaults()
289    /// );
290    /// ```
291    pub const fn defaults() -> [Boundary; 9] {
292        [
293            Boundary::UNDERSCORE,
294            Boundary::HYPHEN,
295            Boundary::SPACE,
296            Boundary::LOWER_UPPER,
297            Boundary::LOWER_DIGIT,
298            Boundary::UPPER_DIGIT,
299            Boundary::DIGIT_LOWER,
300            Boundary::DIGIT_UPPER,
301            Boundary::ACRONYM,
302        ]
303    }
304
305    /// Returns the boundaries that involve digits.
306    /// ```
307    /// # use convert_case::Boundary;
308    /// assert_eq!(
309    ///     [
310    ///         Boundary::LOWER_DIGIT,
311    ///         Boundary::UPPER_DIGIT,
312    ///         Boundary::DIGIT_LOWER,
313    ///         Boundary::DIGIT_UPPER,
314    ///     ],
315    ///     Boundary::digits()
316    /// );
317    /// ```
318    pub const fn digits() -> [Boundary; 4] {
319        [
320            Boundary::LOWER_DIGIT,
321            Boundary::UPPER_DIGIT,
322            Boundary::DIGIT_LOWER,
323            Boundary::DIGIT_UPPER,
324        ]
325    }
326
327    /// Returns the boundaries that are letters followed by digits.
328    /// ```
329    /// # use convert_case::Boundary;
330    /// assert_eq!(
331    ///     [
332    ///         Boundary::LOWER_DIGIT,
333    ///         Boundary::UPPER_DIGIT,
334    ///     ],
335    ///     Boundary::letter_digit()
336    /// );
337    /// ```
338    pub const fn letter_digit() -> [Boundary; 2] {
339        [Boundary::LOWER_DIGIT, Boundary::UPPER_DIGIT]
340    }
341
342    /// Returns the boundaries that are digits followed by letters.
343    /// ```
344    /// # use convert_case::Boundary;
345    /// assert_eq!(
346    ///     [
347    ///         Boundary::DIGIT_LOWER,
348    ///         Boundary::DIGIT_UPPER
349    ///     ],
350    ///     Boundary::digit_letter()
351    /// );
352    /// ```
353    pub const fn digit_letter() -> [Boundary; 2] {
354        [Boundary::DIGIT_LOWER, Boundary::DIGIT_UPPER]
355    }
356
357    /// Returns a list of all boundaries that are identified within the given string.
358    /// Could be a short of writing out all the boundaries in a list directly.  This will not
359    /// identify boundary `UpperLower` if it also used as part of `Acronym`.
360    ///
361    /// If you want to be very explicit and not overlap boundaries, it is recommended to use a colon
362    /// character.
363    /// ```
364    /// # use convert_case::Boundary;
365    /// assert_eq!(
366    ///     vec![
367    ///         Boundary::HYPHEN,
368    ///         Boundary::SPACE,
369    ///         Boundary::LOWER_UPPER,
370    ///         Boundary::UPPER_DIGIT,
371    ///         Boundary::DIGIT_LOWER,
372    ///     ],
373    ///     Boundary::defaults_from("aA8a -")
374    /// );
375    /// assert_eq!(
376    ///     vec![
377    ///         Boundary::UNDERSCORE,
378    ///         Boundary::LOWER_UPPER,
379    ///         Boundary::DIGIT_UPPER,
380    ///         Boundary::ACRONYM,
381    ///     ],
382    ///     Boundary::defaults_from("bD:0B:_:AAa")
383    /// );
384    /// ```
385    pub fn defaults_from(pattern: &str) -> Vec<Boundary> {
386        let mut boundaries = Vec::new();
387        for boundary in Boundary::defaults() {
388            let parts = split(&pattern, &[boundary]);
389            if parts.len() > 1 || parts.len() == 0 || parts[0] != pattern {
390                boundaries.push(boundary);
391            }
392        }
393        boundaries
394    }
395}
396
397/// Split an identifier into a list of words using the list of boundaries.
398///
399/// This is used internally for splitting an identifier before mutating by
400/// a pattern and joining again with a delimiter.
401/// ```
402/// use convert_case::{Boundary, split};
403/// assert_eq!(
404///     vec!["one", "two", "three.four"],
405///     split(&"one_two-three.four", &[Boundary::UNDERSCORE, Boundary::HYPHEN]),
406/// )
407/// ```
408pub fn split<'s, T>(s: &'s T, boundaries: &[Boundary]) -> Vec<&'s str>
409where
410    T: AsRef<str>,
411{
412    let s = s.as_ref();
413
414    if s.len() == 0 {
415        return Vec::new();
416    }
417
418    let mut words = Vec::new();
419    let mut last_boundary_end = 0;
420
421    let (indices, graphemes): (Vec<_>, Vec<_>) = s.grapheme_indices(true).unzip();
422    let grapheme_length = indices[graphemes.len() - 1] + graphemes[graphemes.len() - 1].len();
423
424    for i in 0..graphemes.len() {
425        for boundary in boundaries {
426            //let byte_index = indices[i];
427
428            if (boundary.condition)(&graphemes[i..], boundary.arg) {
429                // What if we find a condition at the end of the array?
430                // Maybe we can stop early based on length
431                // To do this, need to switch the loops
432                // TODO
433                let boundary_byte_start: usize =
434                    *indices.get(i + boundary.start).unwrap_or(&grapheme_length);
435                let boundary_byte_end: usize = *indices
436                    .get(i + boundary.start + boundary.len)
437                    .unwrap_or(&grapheme_length);
438
439                // todo clean this up a bit
440                words.push(&s[last_boundary_end..boundary_byte_start]);
441                last_boundary_end = boundary_byte_end;
442                break;
443            }
444        }
445    }
446    words.push(&s[last_boundary_end..]);
447    words.into_iter().filter(|s| !s.is_empty()).collect()
448}
449
450// ascii version
451//pub fn split<'s, T>(s: &'s T, boundaries: &[Boundary]) -> Vec<&'s str>
452//where
453//    T: AsRef<str>,
454//{
455//    let s = s.as_ref();
456//
457//    let mut words = Vec::new();
458//    let mut last_end = 0;
459//    for i in 0..s.len() {
460//        for boundary in boundaries {
461//            if (boundary.condition)(&s[i..]) {
462//                words.push(&s[last_end..i + boundary.start]);
463//                last_end = i + boundary.start + boundary.len;
464//                break;
465//            }
466//        }
467//    }
468//    words.push(&s[last_end..]);
469//    words
470//}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475
476    #[test]
477    fn hyphen() {
478        let s = "a-b-c";
479        let v = split(&s, &[Boundary::HYPHEN]);
480        assert_eq!(v, vec!["a", "b", "c"]);
481    }
482
483    #[test]
484    fn underscore() {
485        let s = "a_b_c";
486        let v = split(&s, &[Boundary::UNDERSCORE]);
487        assert_eq!(v, vec!["a", "b", "c"]);
488    }
489
490    #[test]
491    fn space() {
492        let s = "a b c";
493        let v = split(&s, &[Boundary::SPACE]);
494        assert_eq!(v, vec!["a", "b", "c"]);
495    }
496
497    #[test]
498    fn delimiters() {
499        let s = "aaa-bbb_ccc ddd ddd-eee";
500        let v = split(
501            &s,
502            &[Boundary::SPACE, Boundary::UNDERSCORE, Boundary::HYPHEN],
503        );
504        assert_eq!(v, vec!["aaa", "bbb", "ccc", "ddd", "ddd", "eee"]);
505    }
506
507    #[test]
508    fn lower_upper() {
509        let s = "lowerUpperUpper";
510        let v = split(&s, &[Boundary::LOWER_UPPER]);
511        assert_eq!(v, vec!["lower", "Upper", "Upper"]);
512    }
513
514    #[test]
515    fn acronym() {
516        let s = "XMLRequest";
517        let v = split(&s, &[Boundary::ACRONYM]);
518        assert_eq!(v, vec!["XML", "Request"]);
519    }
520
521    // TODO: add tests for other boundaries
522
523    #[test]
524    fn boundaries_found_in_string() {
525        // upper lower is not longer a default
526        assert_eq!(Vec::<Boundary>::new(), Boundary::defaults_from(".Aaaa"));
527        assert_eq!(
528            vec![Boundary::LOWER_UPPER, Boundary::LOWER_DIGIT,],
529            Boundary::defaults_from("a8.Aa.aA")
530        );
531        assert_eq!(
532            Boundary::digits().to_vec(),
533            Boundary::defaults_from("b1B1b")
534        );
535        assert_eq!(
536            vec![
537                Boundary::UNDERSCORE,
538                Boundary::HYPHEN,
539                Boundary::SPACE,
540                Boundary::ACRONYM,
541            ],
542            Boundary::defaults_from("AAa -_")
543        );
544    }
545
546    #[test]
547    fn boundary_consts_same() {
548        assert_eq!(Boundary::SPACE, Boundary::SPACE);
549    }
550
551    #[test]
552    fn from_delim_dot() {
553        let boundary = Boundary::from_delim(".");
554        let s = "lower.Upper.Upper";
555        let v = split(&s, &[boundary]);
556        assert_eq!(vec!["lower", "Upper", "Upper"], v)
557    }
558
559    #[test]
560    fn from_delim_double_colon() {
561        let boundary = Boundary::from_delim("::");
562        let s = "lower::lowerUpper::Upper";
563        let v = split(&s, &[boundary]);
564        assert_eq!(vec!["lower", "lowerUpper", "Upper"], v)
565    }
566}