1use mz_repr::adt::regex::Regex;
11
12pub fn regexp_split_to_array<'a>(text: &'a str, regexp: &Regex) -> Vec<&'a str> {
13 let mut finder = regexp.find_iter(text);
19 let mut last = 0;
20 let mut found = Vec::new();
21 loop {
22 match finder.next() {
23 None => {
24 if last <= text.len() {
25 let s = &text[last..];
26 found.push(s);
27 }
28 break;
29 }
30 Some(m) => {
31 if m.end() > 0 && m.start() < text.len() {
33 let matched = &text[last..m.start()];
34 last = m.end();
35 found.push(matched);
36 }
37 }
38 }
39 }
40 found
41}
42
43#[cfg(test)]
44mod tests {
45 use mz_repr::adt::regex::Regex;
46
47 use crate::regexp_split_to_array;
48
49 fn build_regex(needle: &str, flags: &str) -> Result<Regex, anyhow::Error> {
50 let mut case_insensitive = false;
51 for f in flags.chars() {
53 match f {
54 'i' => {
55 case_insensitive = true;
56 }
57 'c' => {
58 case_insensitive = false;
59 }
60 _ => anyhow::bail!("unexpected regex flags"),
61 }
62 }
63 Regex::new(needle, case_insensitive).map_err(|e| anyhow::anyhow!("{}", e))
64 }
65
66 #[mz_ore::test]
68 #[cfg_attr(miri, ignore)] fn test_pg_regexp_split_array() {
70 let Ok(postgres_url) = std::env::var("POSTGRES_URL") else {
71 return;
72 };
73 let mut client = postgres::Client::connect(&postgres_url, postgres::NoTls).unwrap();
74
75 let inputs = vec!["", " ", " ", "12 34", "12 34", " 12 34 "];
76 let regexps = vec!["", "\\s", "\\s+", "\\s*"];
77 for input in inputs {
78 for re in ®exps {
79 let regex = build_regex(re, "").unwrap();
80 let pg: Vec<String> = client
83 .query_one("select regexp_split_to_array($1, $2)", &[&input, re])
84 .unwrap()
85 .get(0);
86 let mz = regexp_split_to_array(input, ®ex);
87 assert_eq!(pg, mz);
88 println!(
90 r#"TestCase {{
91 text: "{input}",
92 regexp: "{}",
93 expect: &{pg:?},
94 }},"#,
95 re.replace('\\', "\\\\"),
96 );
97 }
98 }
99 }
100
101 #[mz_ore::test]
102 #[cfg_attr(miri, ignore)] fn test_regexp_split_array() {
104 struct TestCase {
106 text: &'static str,
107 regexp: &'static str,
108 expect: &'static [&'static str],
109 }
110 let tests = vec![
111 TestCase {
112 text: "",
113 regexp: "",
114 expect: &[""],
115 },
116 TestCase {
117 text: "",
118 regexp: "\\s",
119 expect: &[""],
120 },
121 TestCase {
122 text: "",
123 regexp: "\\s+",
124 expect: &[""],
125 },
126 TestCase {
127 text: "",
128 regexp: "\\s*",
129 expect: &[""],
130 },
131 TestCase {
132 text: " ",
133 regexp: "",
134 expect: &[" "],
135 },
136 TestCase {
137 text: " ",
138 regexp: "\\s",
139 expect: &["", ""],
140 },
141 TestCase {
142 text: " ",
143 regexp: "\\s+",
144 expect: &["", ""],
145 },
146 TestCase {
147 text: " ",
148 regexp: "\\s*",
149 expect: &["", ""],
150 },
151 TestCase {
152 text: " ",
153 regexp: "",
154 expect: &[" ", " "],
155 },
156 TestCase {
157 text: " ",
158 regexp: "\\s",
159 expect: &["", "", ""],
160 },
161 TestCase {
162 text: " ",
163 regexp: "\\s+",
164 expect: &["", ""],
165 },
166 TestCase {
167 text: " ",
168 regexp: "\\s*",
169 expect: &["", ""],
170 },
171 TestCase {
172 text: "12 34",
173 regexp: "",
174 expect: &["1", "2", " ", "3", "4"],
175 },
176 TestCase {
177 text: "12 34",
178 regexp: "\\s",
179 expect: &["12", "34"],
180 },
181 TestCase {
182 text: "12 34",
183 regexp: "\\s+",
184 expect: &["12", "34"],
185 },
186 TestCase {
187 text: "12 34",
188 regexp: "\\s*",
189 expect: &["1", "2", "3", "4"],
190 },
191 TestCase {
192 text: "12 34",
193 regexp: "",
194 expect: &["1", "2", " ", " ", "3", "4"],
195 },
196 TestCase {
197 text: "12 34",
198 regexp: "\\s",
199 expect: &["12", "", "34"],
200 },
201 TestCase {
202 text: "12 34",
203 regexp: "\\s+",
204 expect: &["12", "34"],
205 },
206 TestCase {
207 text: "12 34",
208 regexp: "\\s*",
209 expect: &["1", "2", "3", "4"],
210 },
211 TestCase {
212 text: " 12 34 ",
213 regexp: "",
214 expect: &[" ", "1", "2", " ", "3", "4", " "],
215 },
216 TestCase {
217 text: " 12 34 ",
218 regexp: "\\s",
219 expect: &["", "12", "34", ""],
220 },
221 TestCase {
222 text: " 12 34 ",
223 regexp: "\\s+",
224 expect: &["", "12", "34", ""],
225 },
226 TestCase {
227 text: " 12 34 ",
228 regexp: "\\s*",
229 expect: &["", "1", "2", "3", "4", ""],
230 },
231 ];
232 for tc in tests {
233 let regex = build_regex(tc.regexp, "").unwrap();
234 let result = regexp_split_to_array(tc.text, ®ex);
235 if tc.expect != result {
236 println!(
237 "input: `{}`, regex: `{}`, got: {:?}, expect: {:?}",
238 tc.text, tc.regexp, result, tc.expect
239 );
240 }
241 }
242 }
243}