1use mz_repr::adt::regex::Regex;
11
12pub fn regexp_split_to_array<'a>(text: &'a str, regexp: &Regex) -> Vec<&'a str> {
13 let mut finder = regexp.find_iter(text);
19 let mut last = 0;
20 let mut found = Vec::new();
21 loop {
22 match finder.next() {
23 None => {
24 if last <= text.len() {
25 let s = &text[last..];
26 found.push(s);
27 }
28 break;
29 }
30 Some(m) => {
31 if m.end() > 0 && m.start() < text.len() {
33 let matched = &text[last..m.start()];
34 last = m.end();
35 found.push(matched);
36 }
37 }
38 }
39 }
40 found
41}
42
43#[cfg(test)]
44mod tests {
45 use mz_repr::adt::regex::Regex;
46
47 use crate::regexp_split_to_array;
48
49 fn build_regex(needle: &str, flags: &str) -> Result<Regex, anyhow::Error> {
50 let mut case_insensitive = false;
51 for f in flags.chars() {
53 match f {
54 'i' => {
55 case_insensitive = true;
56 }
57 'c' => {
58 case_insensitive = false;
59 }
60 _ => anyhow::bail!("unexpected regex flags"),
61 }
62 }
63 Ok(Regex::new(needle, case_insensitive)?)
64 }
65
66 #[mz_ore::test]
68 #[cfg_attr(miri, ignore)] fn test_pg_regexp_split_array() {
70 let Ok(postgres_url) = std::env::var("POSTGRES_URL") else {
71 return;
72 };
73 let mut client = postgres::Client::connect(&postgres_url, postgres::NoTls).unwrap();
74
75 let inputs = vec!["", " ", " ", "12 34", "12 34", " 12 34 "];
76 let regexps = vec!["", "\\s", "\\s+", "\\s*"];
77 for input in inputs {
78 for re in ®exps {
79 let regex = build_regex(re, "").unwrap();
80 let pg: Vec<String> = client
81 .query_one("select regexp_split_to_array($1, $2)", &[&input, re])
82 .unwrap()
83 .get(0);
84 let mz = regexp_split_to_array(input, ®ex);
85 assert_eq!(pg, mz);
86 println!(
88 r#"TestCase {{
89 text: "{input}",
90 regexp: "{}",
91 expect: &{pg:?},
92 }},"#,
93 re.replace('\\', "\\\\"),
94 );
95 }
96 }
97 }
98
99 #[mz_ore::test]
100 #[cfg_attr(miri, ignore)] fn test_regexp_split_array() {
102 struct TestCase {
104 text: &'static str,
105 regexp: &'static str,
106 expect: &'static [&'static str],
107 }
108 let tests = vec![
109 TestCase {
110 text: "",
111 regexp: "",
112 expect: &[""],
113 },
114 TestCase {
115 text: "",
116 regexp: "\\s",
117 expect: &[""],
118 },
119 TestCase {
120 text: "",
121 regexp: "\\s+",
122 expect: &[""],
123 },
124 TestCase {
125 text: "",
126 regexp: "\\s*",
127 expect: &[""],
128 },
129 TestCase {
130 text: " ",
131 regexp: "",
132 expect: &[" "],
133 },
134 TestCase {
135 text: " ",
136 regexp: "\\s",
137 expect: &["", ""],
138 },
139 TestCase {
140 text: " ",
141 regexp: "\\s+",
142 expect: &["", ""],
143 },
144 TestCase {
145 text: " ",
146 regexp: "\\s*",
147 expect: &["", ""],
148 },
149 TestCase {
150 text: " ",
151 regexp: "",
152 expect: &[" ", " "],
153 },
154 TestCase {
155 text: " ",
156 regexp: "\\s",
157 expect: &["", "", ""],
158 },
159 TestCase {
160 text: " ",
161 regexp: "\\s+",
162 expect: &["", ""],
163 },
164 TestCase {
165 text: " ",
166 regexp: "\\s*",
167 expect: &["", ""],
168 },
169 TestCase {
170 text: "12 34",
171 regexp: "",
172 expect: &["1", "2", " ", "3", "4"],
173 },
174 TestCase {
175 text: "12 34",
176 regexp: "\\s",
177 expect: &["12", "34"],
178 },
179 TestCase {
180 text: "12 34",
181 regexp: "\\s+",
182 expect: &["12", "34"],
183 },
184 TestCase {
185 text: "12 34",
186 regexp: "\\s*",
187 expect: &["1", "2", "3", "4"],
188 },
189 TestCase {
190 text: "12 34",
191 regexp: "",
192 expect: &["1", "2", " ", " ", "3", "4"],
193 },
194 TestCase {
195 text: "12 34",
196 regexp: "\\s",
197 expect: &["12", "", "34"],
198 },
199 TestCase {
200 text: "12 34",
201 regexp: "\\s+",
202 expect: &["12", "34"],
203 },
204 TestCase {
205 text: "12 34",
206 regexp: "\\s*",
207 expect: &["1", "2", "3", "4"],
208 },
209 TestCase {
210 text: " 12 34 ",
211 regexp: "",
212 expect: &[" ", "1", "2", " ", "3", "4", " "],
213 },
214 TestCase {
215 text: " 12 34 ",
216 regexp: "\\s",
217 expect: &["", "12", "34", ""],
218 },
219 TestCase {
220 text: " 12 34 ",
221 regexp: "\\s+",
222 expect: &["", "12", "34", ""],
223 },
224 TestCase {
225 text: " 12 34 ",
226 regexp: "\\s*",
227 expect: &["", "1", "2", "3", "4", ""],
228 },
229 ];
230 for tc in tests {
231 let regex = build_regex(tc.regexp, "").unwrap();
232 let result = regexp_split_to_array(tc.text, ®ex);
233 if tc.expect != result {
234 println!(
235 "input: `{}`, regex: `{}`, got: {:?}, expect: {:?}",
236 tc.text, tc.regexp, result, tc.expect
237 );
238 }
239 }
240 }
241}