convert_case/boundary.rs
1use unicode_segmentation::UnicodeSegmentation;
2
3use alloc::vec::Vec;
4
5fn grapheme_is_digit(c: &&str) -> bool {
6 c.chars().all(|c| c.is_ascii_digit())
7}
8
9fn grapheme_is_uppercase(c: &&str) -> bool {
10 c.to_uppercase() != c.to_lowercase() && *c == c.to_uppercase()
11}
12
13fn grapheme_is_lowercase(c: &&str) -> bool {
14 c.to_uppercase() != c.to_lowercase() && *c == c.to_lowercase()
15}
16
17/// Conditions for splitting an identifier into words.
18///
19/// Some boundaries, [`HYPHEN`](Boundary::HYPHEN), [`UNDERSCORE`](Boundary::UNDERSCORE), and [`SPACE`](Boundary::SPACE),
20/// consume the character they split on, whereas the other boundaries do not.
21///
22/// `Boundary` includes methods that return useful groups of boundaries. It also
23/// contains the [`defaults_from`](Boundary::defaults_from) method which will generate a subset
24/// of default boundaries based on the boundaries present in a string.
25///
26/// You can also create custom delimiter boundaries using the [`from_delim`](Boundary::from_delim)
27/// method or directly instantiate `Boundary` for complex boundary conditions.
28/// ```
29/// use convert_case::{Boundary, Case, Casing, Converter};
30///
31/// assert_eq!(
32/// "transformations_in_3d",
33/// "TransformationsIn3D"
34/// .from_case(Case::Camel)
35/// .without_boundaries(&Boundary::digit_letter())
36/// .to_case(Case::Snake)
37/// );
38///
39/// let conv = Converter::new()
40/// .set_boundaries(&Boundary::defaults_from("aA "))
41/// .to_case(Case::Title);
42/// assert_eq!("7empest By Tool", conv.convert("7empest byTool"));
43/// ```
44#[derive(Debug, Eq, Hash, Clone, Copy)]
45pub struct Boundary {
46 /// A unique name used for comparison.
47 pub name: &'static str,
48 /// A function that determines if this boundary is present at the start
49 /// of the string. Second argument is the `arg` field.
50 pub condition: fn(&[&str], Option<&'static str>) -> bool,
51 /// An optional string passed to `condition` at runtime. Used
52 /// internally for [`Boundary::from_delim`] method.
53 pub arg: Option<&'static str>,
54 /// Where the beginning of the boundary is.
55 pub start: usize,
56 /// The length of the boundary. This is the number of graphemes that
57 /// are removed when splitting.
58 pub len: usize,
59}
60
61impl PartialEq for Boundary {
62 fn eq(&self, other: &Self) -> bool {
63 self.name == other.name
64 }
65}
66
67impl Boundary {
68 /// Splits on `_`, consuming the character on segmentation.
69 /// ```
70 /// # use convert_case::Boundary;
71 /// assert_eq!(
72 /// vec![Boundary::UNDERSCORE],
73 /// Boundary::defaults_from("_")
74 /// );
75 /// ```
76 pub const UNDERSCORE: Boundary = Boundary {
77 name: "Underscore",
78 condition: |s, _| s.get(0) == Some(&"_"),
79 arg: None,
80 start: 0,
81 len: 1,
82 };
83
84 /// Splits on `-`, consuming the character on segmentation.
85 /// ```
86 /// # use convert_case::Boundary;
87 /// assert_eq!(
88 /// vec![Boundary::HYPHEN],
89 /// Boundary::defaults_from("-")
90 /// );
91 /// ```
92 pub const HYPHEN: Boundary = Boundary {
93 name: "Hyphen",
94 condition: |s, _| s.get(0) == Some(&"-"),
95 arg: None,
96 start: 0,
97 len: 1,
98 };
99
100 /// Splits on space, consuming the character on segmentation.
101 /// ```
102 /// # use convert_case::Boundary;
103 /// assert_eq!(
104 /// vec![Boundary::SPACE],
105 /// Boundary::defaults_from(" ")
106 /// );
107 /// ```
108 pub const SPACE: Boundary = Boundary {
109 name: "Space",
110 condition: |s, _| s.get(0) == Some(&" "),
111 arg: None,
112 start: 0,
113 len: 1,
114 };
115
116 /// Splits where a lowercase letter is followed by an uppercase letter.
117 /// ```
118 /// # use convert_case::Boundary;
119 /// assert_eq!(
120 /// vec![Boundary::LOWER_UPPER],
121 /// Boundary::defaults_from("aA")
122 /// );
123 /// ```
124 pub const LOWER_UPPER: Boundary = Boundary {
125 name: "LowerUpper",
126 condition: |s, _| {
127 s.get(0).map(grapheme_is_lowercase) == Some(true)
128 && s.get(1).map(grapheme_is_uppercase) == Some(true)
129 },
130 arg: None,
131 start: 1,
132 len: 0,
133 };
134 /// Splits where an uppercase letter is followed by a lowercase letter. This is seldom used,
135 /// and is **not** included in the [defaults](Boundary::defaults).
136 /// ```
137 /// # use convert_case::Boundary;
138 /// assert!(
139 /// Boundary::defaults_from("Aa").len() == 0
140 /// );
141 /// ```
142 pub const UPPER_LOWER: Boundary = Boundary {
143 name: "UpperLower",
144 condition: |s, _| {
145 s.get(0).map(grapheme_is_uppercase) == Some(true)
146 && s.get(1).map(grapheme_is_lowercase) == Some(true)
147 },
148 arg: None,
149 start: 1,
150 len: 0,
151 };
152
153 /// Acronyms are identified by two uppercase letters followed by a lowercase letter.
154 /// The word boundary is between the two uppercase letters. For example, "HTTPRequest"
155 /// would have an acronym boundary identified at "PRe" and split into "HTTP" and "Request".
156 /// ```
157 /// # use convert_case::Boundary;
158 /// assert_eq!(
159 /// vec![Boundary::ACRONYM],
160 /// Boundary::defaults_from("AAa")
161 /// );
162 /// ```
163 pub const ACRONYM: Boundary = Boundary {
164 name: "Acronym",
165 condition: |s, _| {
166 s.get(0).map(grapheme_is_uppercase) == Some(true)
167 && s.get(1).map(grapheme_is_uppercase) == Some(true)
168 && s.get(2).map(grapheme_is_lowercase) == Some(true)
169 },
170 arg: None,
171 start: 1,
172 len: 0,
173 };
174
175 /// Splits where a lowercase letter is followed by a digit.
176 /// ```
177 /// # use convert_case::Boundary;
178 /// assert_eq!(
179 /// vec![Boundary::LOWER_DIGIT],
180 /// Boundary::defaults_from("a1")
181 /// );
182 /// ```
183 pub const LOWER_DIGIT: Boundary = Boundary {
184 name: "LowerDigit",
185 condition: |s, _| {
186 s.get(0).map(grapheme_is_lowercase) == Some(true)
187 && s.get(1).map(grapheme_is_digit) == Some(true)
188 },
189 arg: None,
190 start: 1,
191 len: 0,
192 };
193
194 /// Splits where an uppercase letter is followed by a digit.
195 /// ```
196 /// # use convert_case::Boundary;
197 /// assert_eq!(
198 /// vec![Boundary::UPPER_DIGIT],
199 /// Boundary::defaults_from("A1")
200 /// );
201 /// ```
202 pub const UPPER_DIGIT: Boundary = Boundary {
203 name: "UpperDigit",
204 condition: |s, _| {
205 s.get(0).map(grapheme_is_uppercase) == Some(true)
206 && s.get(1).map(grapheme_is_digit) == Some(true)
207 },
208 arg: None,
209 start: 1,
210 len: 0,
211 };
212
213 /// Splits where digit is followed by a lowercase letter.
214 /// ```
215 /// # use convert_case::Boundary;
216 /// assert_eq!(
217 /// vec![Boundary::DIGIT_LOWER],
218 /// Boundary::defaults_from("1a")
219 /// );
220 /// ```
221 pub const DIGIT_LOWER: Boundary = Boundary {
222 name: "DigitLower",
223 condition: |s, _| {
224 s.get(0).map(grapheme_is_digit) == Some(true)
225 && s.get(1).map(grapheme_is_lowercase) == Some(true)
226 },
227 arg: None,
228 start: 1,
229 len: 0,
230 };
231
232 /// Splits where digit is followed by an uppercase letter.
233 /// ```
234 /// # use convert_case::Boundary;
235 /// assert_eq!(
236 /// vec![Boundary::DIGIT_UPPER],
237 /// Boundary::defaults_from("1A")
238 /// );
239 /// ```
240 pub const DIGIT_UPPER: Boundary = Boundary {
241 name: "DigitUpper",
242 condition: |s, _| {
243 s.get(0).map(grapheme_is_digit) == Some(true)
244 && s.get(1).map(grapheme_is_uppercase) == Some(true)
245 },
246 arg: None,
247 start: 1,
248 len: 0,
249 };
250
251 /// Create a new boundary based on a delimiter.
252 /// ```
253 /// # use convert_case::{Case, Converter, Boundary};
254 /// let conv = Converter::new()
255 /// .set_boundaries(&[Boundary::from_delim("::")])
256 /// .to_case(Case::Camel);
257 /// assert_eq!(
258 /// "myVarName",
259 /// conv.convert("my::var::name")
260 /// )
261 /// ```
262 pub const fn from_delim(delim: &'static str) -> Boundary {
263 Boundary {
264 name: delim,
265 arg: Some(delim),
266 condition: |s, arg| s.join("").starts_with(arg.unwrap()),
267 start: 0,
268 len: delim.len(),
269 }
270 }
271
272 /// The default list of boundaries used when `Casing::to_case` is called directly
273 /// and in a `Converter` generated from `Converter::new()`.
274 /// ```
275 /// # use convert_case::Boundary;
276 /// assert_eq!(
277 /// [
278 /// Boundary::UNDERSCORE,
279 /// Boundary::HYPHEN,
280 /// Boundary::SPACE,
281 /// Boundary::LOWER_UPPER,
282 /// Boundary::LOWER_DIGIT,
283 /// Boundary::UPPER_DIGIT,
284 /// Boundary::DIGIT_LOWER,
285 /// Boundary::DIGIT_UPPER,
286 /// Boundary::ACRONYM,
287 /// ],
288 /// Boundary::defaults()
289 /// );
290 /// ```
291 pub const fn defaults() -> [Boundary; 9] {
292 [
293 Boundary::UNDERSCORE,
294 Boundary::HYPHEN,
295 Boundary::SPACE,
296 Boundary::LOWER_UPPER,
297 Boundary::LOWER_DIGIT,
298 Boundary::UPPER_DIGIT,
299 Boundary::DIGIT_LOWER,
300 Boundary::DIGIT_UPPER,
301 Boundary::ACRONYM,
302 ]
303 }
304
305 /// Returns the boundaries that involve digits.
306 /// ```
307 /// # use convert_case::Boundary;
308 /// assert_eq!(
309 /// [
310 /// Boundary::LOWER_DIGIT,
311 /// Boundary::UPPER_DIGIT,
312 /// Boundary::DIGIT_LOWER,
313 /// Boundary::DIGIT_UPPER,
314 /// ],
315 /// Boundary::digits()
316 /// );
317 /// ```
318 pub const fn digits() -> [Boundary; 4] {
319 [
320 Boundary::LOWER_DIGIT,
321 Boundary::UPPER_DIGIT,
322 Boundary::DIGIT_LOWER,
323 Boundary::DIGIT_UPPER,
324 ]
325 }
326
327 /// Returns the boundaries that are letters followed by digits.
328 /// ```
329 /// # use convert_case::Boundary;
330 /// assert_eq!(
331 /// [
332 /// Boundary::LOWER_DIGIT,
333 /// Boundary::UPPER_DIGIT,
334 /// ],
335 /// Boundary::letter_digit()
336 /// );
337 /// ```
338 pub const fn letter_digit() -> [Boundary; 2] {
339 [Boundary::LOWER_DIGIT, Boundary::UPPER_DIGIT]
340 }
341
342 /// Returns the boundaries that are digits followed by letters.
343 /// ```
344 /// # use convert_case::Boundary;
345 /// assert_eq!(
346 /// [
347 /// Boundary::DIGIT_LOWER,
348 /// Boundary::DIGIT_UPPER
349 /// ],
350 /// Boundary::digit_letter()
351 /// );
352 /// ```
353 pub const fn digit_letter() -> [Boundary; 2] {
354 [Boundary::DIGIT_LOWER, Boundary::DIGIT_UPPER]
355 }
356
357 /// Returns a list of all boundaries that are identified within the given string.
358 /// Could be a short of writing out all the boundaries in a list directly. This will not
359 /// identify boundary `UpperLower` if it also used as part of `Acronym`.
360 ///
361 /// If you want to be very explicit and not overlap boundaries, it is recommended to use a colon
362 /// character.
363 /// ```
364 /// # use convert_case::Boundary;
365 /// assert_eq!(
366 /// vec![
367 /// Boundary::HYPHEN,
368 /// Boundary::SPACE,
369 /// Boundary::LOWER_UPPER,
370 /// Boundary::UPPER_DIGIT,
371 /// Boundary::DIGIT_LOWER,
372 /// ],
373 /// Boundary::defaults_from("aA8a -")
374 /// );
375 /// assert_eq!(
376 /// vec![
377 /// Boundary::UNDERSCORE,
378 /// Boundary::LOWER_UPPER,
379 /// Boundary::DIGIT_UPPER,
380 /// Boundary::ACRONYM,
381 /// ],
382 /// Boundary::defaults_from("bD:0B:_:AAa")
383 /// );
384 /// ```
385 pub fn defaults_from(pattern: &str) -> Vec<Boundary> {
386 let mut boundaries = Vec::new();
387 for boundary in Boundary::defaults() {
388 let parts = split(&pattern, &[boundary]);
389 if parts.len() > 1 || parts.len() == 0 || parts[0] != pattern {
390 boundaries.push(boundary);
391 }
392 }
393 boundaries
394 }
395}
396
397/// Split an identifier into a list of words using the list of boundaries.
398///
399/// This is used internally for splitting an identifier before mutating by
400/// a pattern and joining again with a delimiter.
401/// ```
402/// use convert_case::{Boundary, split};
403/// assert_eq!(
404/// vec!["one", "two", "three.four"],
405/// split(&"one_two-three.four", &[Boundary::UNDERSCORE, Boundary::HYPHEN]),
406/// )
407/// ```
408pub fn split<'s, T>(s: &'s T, boundaries: &[Boundary]) -> Vec<&'s str>
409where
410 T: AsRef<str>,
411{
412 let s = s.as_ref();
413
414 if s.len() == 0 {
415 return Vec::new();
416 }
417
418 let mut words = Vec::new();
419 let mut last_boundary_end = 0;
420
421 let (indices, graphemes): (Vec<_>, Vec<_>) = s.grapheme_indices(true).unzip();
422 let grapheme_length = indices[graphemes.len() - 1] + graphemes[graphemes.len() - 1].len();
423
424 for i in 0..graphemes.len() {
425 for boundary in boundaries {
426 //let byte_index = indices[i];
427
428 if (boundary.condition)(&graphemes[i..], boundary.arg) {
429 // What if we find a condition at the end of the array?
430 // Maybe we can stop early based on length
431 // To do this, need to switch the loops
432 // TODO
433 let boundary_byte_start: usize =
434 *indices.get(i + boundary.start).unwrap_or(&grapheme_length);
435 let boundary_byte_end: usize = *indices
436 .get(i + boundary.start + boundary.len)
437 .unwrap_or(&grapheme_length);
438
439 // todo clean this up a bit
440 words.push(&s[last_boundary_end..boundary_byte_start]);
441 last_boundary_end = boundary_byte_end;
442 break;
443 }
444 }
445 }
446 words.push(&s[last_boundary_end..]);
447 words.into_iter().filter(|s| !s.is_empty()).collect()
448}
449
450// ascii version
451//pub fn split<'s, T>(s: &'s T, boundaries: &[Boundary]) -> Vec<&'s str>
452//where
453// T: AsRef<str>,
454//{
455// let s = s.as_ref();
456//
457// let mut words = Vec::new();
458// let mut last_end = 0;
459// for i in 0..s.len() {
460// for boundary in boundaries {
461// if (boundary.condition)(&s[i..]) {
462// words.push(&s[last_end..i + boundary.start]);
463// last_end = i + boundary.start + boundary.len;
464// break;
465// }
466// }
467// }
468// words.push(&s[last_end..]);
469// words
470//}
471
472#[cfg(test)]
473mod tests {
474 use super::*;
475
476 #[test]
477 fn hyphen() {
478 let s = "a-b-c";
479 let v = split(&s, &[Boundary::HYPHEN]);
480 assert_eq!(v, vec!["a", "b", "c"]);
481 }
482
483 #[test]
484 fn underscore() {
485 let s = "a_b_c";
486 let v = split(&s, &[Boundary::UNDERSCORE]);
487 assert_eq!(v, vec!["a", "b", "c"]);
488 }
489
490 #[test]
491 fn space() {
492 let s = "a b c";
493 let v = split(&s, &[Boundary::SPACE]);
494 assert_eq!(v, vec!["a", "b", "c"]);
495 }
496
497 #[test]
498 fn delimiters() {
499 let s = "aaa-bbb_ccc ddd ddd-eee";
500 let v = split(
501 &s,
502 &[Boundary::SPACE, Boundary::UNDERSCORE, Boundary::HYPHEN],
503 );
504 assert_eq!(v, vec!["aaa", "bbb", "ccc", "ddd", "ddd", "eee"]);
505 }
506
507 #[test]
508 fn lower_upper() {
509 let s = "lowerUpperUpper";
510 let v = split(&s, &[Boundary::LOWER_UPPER]);
511 assert_eq!(v, vec!["lower", "Upper", "Upper"]);
512 }
513
514 #[test]
515 fn acronym() {
516 let s = "XMLRequest";
517 let v = split(&s, &[Boundary::ACRONYM]);
518 assert_eq!(v, vec!["XML", "Request"]);
519 }
520
521 // TODO: add tests for other boundaries
522
523 #[test]
524 fn boundaries_found_in_string() {
525 // upper lower is not longer a default
526 assert_eq!(Vec::<Boundary>::new(), Boundary::defaults_from(".Aaaa"));
527 assert_eq!(
528 vec![Boundary::LOWER_UPPER, Boundary::LOWER_DIGIT,],
529 Boundary::defaults_from("a8.Aa.aA")
530 );
531 assert_eq!(
532 Boundary::digits().to_vec(),
533 Boundary::defaults_from("b1B1b")
534 );
535 assert_eq!(
536 vec![
537 Boundary::UNDERSCORE,
538 Boundary::HYPHEN,
539 Boundary::SPACE,
540 Boundary::ACRONYM,
541 ],
542 Boundary::defaults_from("AAa -_")
543 );
544 }
545
546 #[test]
547 fn boundary_consts_same() {
548 assert_eq!(Boundary::SPACE, Boundary::SPACE);
549 }
550
551 #[test]
552 fn from_delim_dot() {
553 let boundary = Boundary::from_delim(".");
554 let s = "lower.Upper.Upper";
555 let v = split(&s, &[boundary]);
556 assert_eq!(vec!["lower", "Upper", "Upper"], v)
557 }
558
559 #[test]
560 fn from_delim_double_colon() {
561 let boundary = Boundary::from_delim("::");
562 let s = "lower::lowerUpper::Upper";
563 let v = split(&s, &[boundary]);
564 assert_eq!(vec!["lower", "lowerUpper", "Upper"], v)
565 }
566}