arrow_string/
like.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Provide SQL's LIKE operators for Arrow's string arrays
19
20use crate::predicate::Predicate;
21use arrow_array::cast::AsArray;
22use arrow_array::*;
23use arrow_schema::*;
24use arrow_select::take::take;
25use iterator::ArrayIter;
26use std::sync::Arc;
27
28#[derive(Debug)]
29enum Op {
30    Like(bool),
31    ILike(bool),
32    Contains,
33    StartsWith,
34    EndsWith,
35}
36
37impl std::fmt::Display for Op {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        match self {
40            Op::Like(false) => write!(f, "LIKE"),
41            Op::Like(true) => write!(f, "NLIKE"),
42            Op::ILike(false) => write!(f, "ILIKE"),
43            Op::ILike(true) => write!(f, "NILIKE"),
44            Op::Contains => write!(f, "CONTAINS"),
45            Op::StartsWith => write!(f, "STARTS_WITH"),
46            Op::EndsWith => write!(f, "ENDS_WITH"),
47        }
48    }
49}
50
51/// Perform SQL `left LIKE right`
52///
53/// There are two wildcards supported with the LIKE operator:
54///
55/// 1. `%` - The percent sign represents zero, one, or multiple characters
56/// 2. `_` - The underscore represents a single character
57///
58/// For example:
59/// ```
60/// # use arrow_array::{StringArray, BooleanArray};
61/// # use arrow_string::like::like;
62/// #
63/// let strings = StringArray::from(vec!["Arrow", "Arrow", "Arrow", "Ar"]);
64/// let patterns = StringArray::from(vec!["A%", "B%", "A.", "A_"]);
65///
66/// let result = like(&strings, &patterns).unwrap();
67/// assert_eq!(result, BooleanArray::from(vec![true, false, false, true]));
68/// ```
69pub fn like(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
70    like_op(Op::Like(false), left, right)
71}
72
73/// Perform SQL `left ILIKE right`
74///
75/// This is a case-insensitive version of [`like`]
76///
77/// Note: this only implements loose matching as defined by the Unicode standard. For example,
78/// the `ff` ligature is not equivalent to `FF` and `ß` is not equivalent to `SS`
79pub fn ilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
80    like_op(Op::ILike(false), left, right)
81}
82
83/// Perform SQL `left NOT LIKE right`
84///
85/// See the documentation on [`like`] for more details
86pub fn nlike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
87    like_op(Op::Like(true), left, right)
88}
89
90/// Perform SQL `left NOT ILIKE right`
91///
92/// See the documentation on [`ilike`] for more details
93pub fn nilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
94    like_op(Op::ILike(true), left, right)
95}
96
97/// Perform SQL `STARTSWITH(left, right)`
98pub fn starts_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
99    like_op(Op::StartsWith, left, right)
100}
101
102/// Perform SQL `ENDSWITH(left, right)`
103pub fn ends_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
104    like_op(Op::EndsWith, left, right)
105}
106
107/// Perform SQL `CONTAINS(left, right)`
108pub fn contains(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
109    like_op(Op::Contains, left, right)
110}
111
112fn like_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowError> {
113    use arrow_schema::DataType::*;
114    let (l, l_s) = lhs.get();
115    let (r, r_s) = rhs.get();
116
117    if l.len() != r.len() && !l_s && !r_s {
118        return Err(ArrowError::InvalidArgumentError(format!(
119            "Cannot compare arrays of different lengths, got {} vs {}",
120            l.len(),
121            r.len()
122        )));
123    }
124
125    let l_v = l.as_any_dictionary_opt();
126    let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l);
127
128    let r_v = r.as_any_dictionary_opt();
129    let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r);
130
131    match (l.data_type(), r.data_type()) {
132        (Utf8, Utf8) => {
133            apply::<&GenericStringArray<i32>>(op, l.as_string(), l_s, l_v, r.as_string(), r_s, r_v)
134        }
135        (LargeUtf8, LargeUtf8) => {
136            apply::<&GenericStringArray<i64>>(op, l.as_string(), l_s, l_v, r.as_string(), r_s, r_v)
137        }
138        (Utf8View, Utf8View) => apply::<&StringViewArray>(
139            op,
140            l.as_string_view(),
141            l_s,
142            l_v,
143            r.as_string_view(),
144            r_s,
145            r_v,
146        ),
147        (l_t, r_t) => Err(ArrowError::InvalidArgumentError(format!(
148            "Invalid string operation: {l_t} {op} {r_t}"
149        ))),
150    }
151}
152
153/// A trait for Arrow String Arrays, currently three types are supported:
154/// - `StringArray`
155/// - `LargeStringArray`
156/// - `StringViewArray`
157///
158/// This trait helps to abstract over the different types of string arrays
159/// so that we don't need to duplicate the implementation for each type.
160pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
161    /// Returns true if all data within this string array is ASCII
162    fn is_ascii(&self) -> bool;
163    /// Constructs a new iterator
164    fn iter(&self) -> ArrayIter<Self>;
165}
166
167impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
168    fn is_ascii(&self) -> bool {
169        GenericStringArray::<O>::is_ascii(self)
170    }
171
172    fn iter(&self) -> ArrayIter<Self> {
173        GenericStringArray::<O>::iter(self)
174    }
175}
176impl<'a> StringArrayType<'a> for &'a StringViewArray {
177    fn is_ascii(&self) -> bool {
178        StringViewArray::is_ascii(self)
179    }
180
181    fn iter(&self) -> ArrayIter<Self> {
182        StringViewArray::iter(self)
183    }
184}
185
186fn apply<'a, T: StringArrayType<'a> + 'a>(
187    op: Op,
188    l: T,
189    l_s: bool,
190    l_v: Option<&'a dyn AnyDictionaryArray>,
191    r: T,
192    r_s: bool,
193    r_v: Option<&'a dyn AnyDictionaryArray>,
194) -> Result<BooleanArray, ArrowError> {
195    let l_len = l_v.map(|l| l.len()).unwrap_or(l.len());
196    if r_s {
197        let idx = match r_v {
198            Some(dict) if dict.null_count() != 0 => return Ok(BooleanArray::new_null(l_len)),
199            Some(dict) => dict.normalized_keys()[0],
200            None => 0,
201        };
202        if r.is_null(idx) {
203            return Ok(BooleanArray::new_null(l_len));
204        }
205        op_scalar::<T>(op, l, l_v, r.value(idx))
206    } else {
207        match (l_s, l_v, r_v) {
208            (true, None, None) => {
209                let v = l.is_valid(0).then(|| l.value(0));
210                op_binary(op, std::iter::repeat(v), r.iter())
211            }
212            (true, Some(l_v), None) => {
213                let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
214                let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
215                op_binary(op, std::iter::repeat(v), r.iter())
216            }
217            (true, None, Some(r_v)) => {
218                let v = l.is_valid(0).then(|| l.value(0));
219                op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
220            }
221            (true, Some(l_v), Some(r_v)) => {
222                let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
223                let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
224                op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
225            }
226            (false, None, None) => op_binary(op, l.iter(), r.iter()),
227            (false, Some(l_v), None) => op_binary(op, vectored_iter(l, l_v), r.iter()),
228            (false, None, Some(r_v)) => op_binary(op, l.iter(), vectored_iter(r, r_v)),
229            (false, Some(l_v), Some(r_v)) => {
230                op_binary(op, vectored_iter(l, l_v), vectored_iter(r, r_v))
231            }
232        }
233    }
234}
235
236#[inline(never)]
237fn op_scalar<'a, T: StringArrayType<'a>>(
238    op: Op,
239    l: T,
240    l_v: Option<&dyn AnyDictionaryArray>,
241    r: &str,
242) -> Result<BooleanArray, ArrowError> {
243    let r = match op {
244        Op::Like(neg) => Predicate::like(r)?.evaluate_array(l, neg),
245        Op::ILike(neg) => Predicate::ilike(r, l.is_ascii())?.evaluate_array(l, neg),
246        Op::Contains => Predicate::contains(r).evaluate_array(l, false),
247        Op::StartsWith => Predicate::StartsWith(r).evaluate_array(l, false),
248        Op::EndsWith => Predicate::EndsWith(r).evaluate_array(l, false),
249    };
250
251    Ok(match l_v {
252        Some(v) => take(&r, v.keys(), None)?.as_boolean().clone(),
253        None => r,
254    })
255}
256
257fn vectored_iter<'a, T: StringArrayType<'a> + 'a>(
258    a: T,
259    a_v: &'a dyn AnyDictionaryArray,
260) -> impl Iterator<Item = Option<&'a str>> + 'a {
261    let nulls = a_v.nulls();
262    let keys = a_v.normalized_keys();
263    keys.into_iter().enumerate().map(move |(idx, key)| {
264        if nulls.map(|n| n.is_null(idx)).unwrap_or_default() || a.is_null(key) {
265            return None;
266        }
267        Some(a.value(key))
268    })
269}
270
271#[inline(never)]
272fn op_binary<'a>(
273    op: Op,
274    l: impl Iterator<Item = Option<&'a str>>,
275    r: impl Iterator<Item = Option<&'a str>>,
276) -> Result<BooleanArray, ArrowError> {
277    match op {
278        Op::Like(neg) => binary_predicate(l, r, neg, Predicate::like),
279        Op::ILike(neg) => binary_predicate(l, r, neg, |s| Predicate::ilike(s, false)),
280        Op::Contains => Ok(l.zip(r).map(|(l, r)| Some(str_contains(l?, r?))).collect()),
281        Op::StartsWith => Ok(l
282            .zip(r)
283            .map(|(l, r)| Some(Predicate::StartsWith(r?).evaluate(l?)))
284            .collect()),
285        Op::EndsWith => Ok(l
286            .zip(r)
287            .map(|(l, r)| Some(Predicate::EndsWith(r?).evaluate(l?)))
288            .collect()),
289    }
290}
291
292fn str_contains(haystack: &str, needle: &str) -> bool {
293    memchr::memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some()
294}
295
296fn binary_predicate<'a>(
297    l: impl Iterator<Item = Option<&'a str>>,
298    r: impl Iterator<Item = Option<&'a str>>,
299    neg: bool,
300    f: impl Fn(&'a str) -> Result<Predicate<'a>, ArrowError>,
301) -> Result<BooleanArray, ArrowError> {
302    let mut previous = None;
303    l.zip(r)
304        .map(|(l, r)| match (l, r) {
305            (Some(l), Some(r)) => {
306                let p: &Predicate = match previous {
307                    Some((expr, ref predicate)) if expr == r => predicate,
308                    _ => &previous.insert((r, f(r)?)).1,
309                };
310                Ok(Some(p.evaluate(l) != neg))
311            }
312            _ => Ok(None),
313        })
314        .collect()
315}
316
317// Deprecated kernels
318
319fn make_scalar(data_type: &DataType, scalar: &str) -> Result<ArrayRef, ArrowError> {
320    match data_type {
321        DataType::Utf8 => Ok(Arc::new(StringArray::from_iter_values([scalar]))),
322        DataType::LargeUtf8 => Ok(Arc::new(LargeStringArray::from_iter_values([scalar]))),
323        DataType::Dictionary(_, v) => make_scalar(v.as_ref(), scalar),
324        d => Err(ArrowError::InvalidArgumentError(format!(
325            "Unsupported string scalar data type {d:?}",
326        ))),
327    }
328}
329
330macro_rules! legacy_kernels {
331    ($fn_datum:ident, $fn_array:ident, $fn_scalar:ident, $fn_array_dyn:ident, $fn_scalar_dyn:ident, $deprecation:expr) => {
332        #[doc(hidden)]
333        #[deprecated(note = $deprecation)]
334        pub fn $fn_array<O: OffsetSizeTrait>(
335            left: &GenericStringArray<O>,
336            right: &GenericStringArray<O>,
337        ) -> Result<BooleanArray, ArrowError> {
338            $fn_datum(left, right)
339        }
340
341        #[doc(hidden)]
342        #[deprecated(note = $deprecation)]
343        pub fn $fn_scalar<O: OffsetSizeTrait>(
344            left: &GenericStringArray<O>,
345            right: &str,
346        ) -> Result<BooleanArray, ArrowError> {
347            let scalar = GenericStringArray::<O>::from_iter_values([right]);
348            $fn_datum(left, &Scalar::new(&scalar))
349        }
350
351        #[doc(hidden)]
352        #[deprecated(note = $deprecation)]
353        pub fn $fn_array_dyn(
354            left: &dyn Array,
355            right: &dyn Array,
356        ) -> Result<BooleanArray, ArrowError> {
357            $fn_datum(&left, &right)
358        }
359
360        #[doc(hidden)]
361        #[deprecated(note = $deprecation)]
362        pub fn $fn_scalar_dyn(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
363            let scalar = make_scalar(left.data_type(), right)?;
364            $fn_datum(&left, &Scalar::new(&scalar))
365        }
366    };
367}
368
369legacy_kernels!(
370    like,
371    like_utf8,
372    like_utf8_scalar,
373    like_dyn,
374    like_utf8_scalar_dyn,
375    "Use arrow_string::like::like"
376);
377legacy_kernels!(
378    ilike,
379    ilike_utf8,
380    ilike_utf8_scalar,
381    ilike_dyn,
382    ilike_utf8_scalar_dyn,
383    "Use arrow_string::like::ilike"
384);
385legacy_kernels!(
386    nlike,
387    nlike_utf8,
388    nlike_utf8_scalar,
389    nlike_dyn,
390    nlike_utf8_scalar_dyn,
391    "Use arrow_string::like::nlike"
392);
393legacy_kernels!(
394    nilike,
395    nilike_utf8,
396    nilike_utf8_scalar,
397    nilike_dyn,
398    nilike_utf8_scalar_dyn,
399    "Use arrow_string::like::nilike"
400);
401legacy_kernels!(
402    contains,
403    contains_utf8,
404    contains_utf8_scalar,
405    contains_dyn,
406    contains_utf8_scalar_dyn,
407    "Use arrow_string::like::contains"
408);
409legacy_kernels!(
410    starts_with,
411    starts_with_utf8,
412    starts_with_utf8_scalar,
413    starts_with_dyn,
414    starts_with_utf8_scalar_dyn,
415    "Use arrow_string::like::starts_with"
416);
417
418legacy_kernels!(
419    ends_with,
420    ends_with_utf8,
421    ends_with_utf8_scalar,
422    ends_with_dyn,
423    ends_with_utf8_scalar_dyn,
424    "Use arrow_string::like::ends_with"
425);
426
427#[cfg(test)]
428#[allow(deprecated)]
429mod tests {
430    use super::*;
431    use arrow_array::types::Int8Type;
432    use std::iter::zip;
433
434    /// Applying `op(left, right)`, both sides are arrays
435    /// The macro tests four types of array implementations:
436    /// - `StringArray`
437    /// - `LargeStringArray`
438    /// - `StringViewArray`
439    /// - `DictionaryArray`
440    macro_rules! test_utf8 {
441        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
442            #[test]
443            fn $test_name() {
444                let expected = BooleanArray::from($expected);
445
446                let left = StringArray::from($left);
447                let right = StringArray::from($right);
448                let res = $op(&left, &right).unwrap();
449                assert_eq!(res, expected);
450
451                let left = LargeStringArray::from($left);
452                let right = LargeStringArray::from($right);
453                let res = $op(&left, &right).unwrap();
454                assert_eq!(res, expected);
455
456                let left = StringViewArray::from($left);
457                let right = StringViewArray::from($right);
458                let res = $op(&left, &right).unwrap();
459                assert_eq!(res, expected);
460
461                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
462                let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
463                let res = $op(&left, &right).unwrap();
464                assert_eq!(res, expected);
465            }
466        };
467    }
468
469    /// Applying `op(left, right)`, left side is array, right side is scalar
470    /// The macro tests four types of array implementations:
471    /// - `StringArray`
472    /// - `LargeStringArray`
473    /// - `StringViewArray`
474    /// - `DictionaryArray`
475    macro_rules! test_utf8_scalar {
476        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
477            #[test]
478            fn $test_name() {
479                let expected = BooleanArray::from($expected);
480
481                let left = StringArray::from($left);
482                let right = StringArray::from_iter_values([$right]);
483                let res = $op(&left, &Scalar::new(&right)).unwrap();
484                assert_eq!(res, expected);
485
486                let left = LargeStringArray::from($left);
487                let right = LargeStringArray::from_iter_values([$right]);
488                let res = $op(&left, &Scalar::new(&right)).unwrap();
489                assert_eq!(res, expected);
490
491                let left = StringViewArray::from($left);
492                let right = StringViewArray::from_iter_values([$right]);
493                let res = $op(&left, &Scalar::new(&right)).unwrap();
494                assert_eq!(res, expected);
495
496                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
497                let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
498                let res = $op(&left, &Scalar::new(&right)).unwrap();
499                assert_eq!(res, expected);
500            }
501        };
502    }
503
504    test_utf8!(
505        test_utf8_array_like,
506        vec![
507            "arrow",
508            "arrow_long_string_more than 12 bytes",
509            "arrow",
510            "arrow",
511            "arrow",
512            "arrows",
513            "arrow",
514            "arrow"
515        ],
516        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"],
517        like,
518        vec![true, true, true, false, false, true, false, false]
519    );
520
521    test_utf8_scalar!(
522        test_utf8_array_like_scalar_escape_testing,
523        vec![
524            "varchar(255)",
525            "int(255)longer than 12 bytes",
526            "varchar",
527            "int"
528        ],
529        "%(%)%",
530        like,
531        vec![true, true, false, false]
532    );
533
534    test_utf8_scalar!(
535        test_utf8_array_like_scalar_escape_regex,
536        vec![".*", "a", "*"],
537        ".*",
538        like,
539        vec![true, false, false]
540    );
541
542    test_utf8_scalar!(
543        test_utf8_array_like_scalar_escape_regex_dot,
544        vec![".", "a", "*"],
545        ".",
546        like,
547        vec![true, false, false]
548    );
549
550    test_utf8_scalar!(
551        test_utf8_array_like_scalar,
552        vec![
553            "arrow",
554            "parquet",
555            "datafusion",
556            "flight",
557            "long string arrow test 12 bytes"
558        ],
559        "%ar%",
560        like,
561        vec![true, true, false, false, true]
562    );
563
564    test_utf8_scalar!(
565        test_utf8_array_like_scalar_start,
566        vec![
567            "arrow",
568            "parrow",
569            "arrows",
570            "arr",
571            "arrow long string longer than 12 bytes"
572        ],
573        "arrow%",
574        like,
575        vec![true, false, true, false, true]
576    );
577
578    // Replicates `test_utf8_array_like_scalar_start` `test_utf8_array_like_scalar_dyn_start` to
579    // demonstrate that `SQL STARTSWITH` works as expected.
580    test_utf8_scalar!(
581        test_utf8_array_starts_with_scalar_start,
582        vec![
583            "arrow",
584            "parrow",
585            "arrows",
586            "arr",
587            "arrow long string longer than 12 bytes"
588        ],
589        "arrow",
590        starts_with,
591        vec![true, false, true, false, true]
592    );
593
594    test_utf8_scalar!(
595        test_utf8_array_like_scalar_end,
596        vec![
597            "arrow",
598            "parrow",
599            "arrows",
600            "arr",
601            "arrow long string longer than 12 bytes"
602        ],
603        "%arrow",
604        like,
605        vec![true, true, false, false, false]
606    );
607
608    // Replicates `test_utf8_array_like_scalar_end` `test_utf8_array_like_scalar_dyn_end` to
609    // demonstrate that `SQL ENDSWITH` works as expected.
610    test_utf8_scalar!(
611        test_utf8_array_ends_with_scalar_end,
612        vec![
613            "arrow",
614            "parrow",
615            "arrows",
616            "arr",
617            "arrow long string longer than 12 bytes"
618        ],
619        "arrow",
620        ends_with,
621        vec![true, true, false, false, false]
622    );
623
624    test_utf8_scalar!(
625        test_utf8_array_like_scalar_equals,
626        vec![
627            "arrow",
628            "parrow",
629            "arrows",
630            "arr",
631            "arrow long string longer than 12 bytes"
632        ],
633        "arrow",
634        like,
635        vec![true, false, false, false, false]
636    );
637
638    test_utf8_scalar!(
639        test_utf8_array_like_scalar_one,
640        vec![
641            "arrow",
642            "arrows",
643            "parrow",
644            "arr",
645            "arrow long string longer than 12 bytes"
646        ],
647        "arrow_",
648        like,
649        vec![false, true, false, false, false]
650    );
651
652    test_utf8_scalar!(
653        test_utf8_scalar_like_escape,
654        vec!["a%", "a\\x", "arrow long string longer than 12 bytes"],
655        "a\\%",
656        like,
657        vec![true, false, false]
658    );
659
660    test_utf8_scalar!(
661        test_utf8_scalar_like_escape_contains,
662        vec!["ba%", "ba\\x", "arrow long string longer than 12 bytes"],
663        "%a\\%",
664        like,
665        vec![true, false, false]
666    );
667
668    test_utf8!(
669        test_utf8_scalar_ilike_regex,
670        vec!["%%%"],
671        vec![r"\%_\%"],
672        ilike,
673        vec![true]
674    );
675
676    test_utf8!(
677        test_utf8_array_nlike,
678        vec![
679            "arrow",
680            "arrow",
681            "arrow long string longer than 12 bytes",
682            "arrow",
683            "arrow",
684            "arrows",
685            "arrow"
686        ],
687        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
688        nlike,
689        vec![false, false, false, true, true, false, true]
690    );
691
692    test_utf8_scalar!(
693        test_utf8_array_nlike_escape_testing,
694        vec![
695            "varchar(255)",
696            "int(255) arrow long string longer than 12 bytes",
697            "varchar",
698            "int"
699        ],
700        "%(%)%",
701        nlike,
702        vec![false, false, true, true]
703    );
704
705    test_utf8_scalar!(
706        test_utf8_array_nlike_scalar_escape_regex,
707        vec![".*", "a", "*"],
708        ".*",
709        nlike,
710        vec![false, true, true]
711    );
712
713    test_utf8_scalar!(
714        test_utf8_array_nlike_scalar_escape_regex_dot,
715        vec![".", "a", "*"],
716        ".",
717        nlike,
718        vec![false, true, true]
719    );
720    test_utf8_scalar!(
721        test_utf8_array_nlike_scalar,
722        vec![
723            "arrow",
724            "parquet",
725            "datafusion",
726            "flight",
727            "arrow long string longer than 12 bytes"
728        ],
729        "%ar%",
730        nlike,
731        vec![false, false, true, true, false]
732    );
733
734    test_utf8_scalar!(
735        test_utf8_array_nlike_scalar_start,
736        vec![
737            "arrow",
738            "parrow",
739            "arrows",
740            "arr",
741            "arrow long string longer than 12 bytes"
742        ],
743        "arrow%",
744        nlike,
745        vec![false, true, false, true, false]
746    );
747
748    test_utf8_scalar!(
749        test_utf8_array_nlike_scalar_end,
750        vec![
751            "arrow",
752            "parrow",
753            "arrows",
754            "arr",
755            "arrow long string longer than 12 bytes"
756        ],
757        "%arrow",
758        nlike,
759        vec![false, false, true, true, true]
760    );
761
762    test_utf8_scalar!(
763        test_utf8_array_nlike_scalar_equals,
764        vec![
765            "arrow",
766            "parrow",
767            "arrows",
768            "arr",
769            "arrow long string longer than 12 bytes"
770        ],
771        "arrow",
772        nlike,
773        vec![false, true, true, true, true]
774    );
775
776    test_utf8_scalar!(
777        test_utf8_array_nlike_scalar_one,
778        vec![
779            "arrow",
780            "arrows",
781            "parrow",
782            "arr",
783            "arrow long string longer than 12 bytes"
784        ],
785        "arrow_",
786        nlike,
787        vec![true, false, true, true, true]
788    );
789
790    test_utf8!(
791        test_utf8_array_ilike,
792        vec![
793            "arrow",
794            "arrow",
795            "ARROW long string longer than 12 bytes",
796            "arrow",
797            "ARROW",
798            "ARROWS",
799            "arROw"
800        ],
801        vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
802        ilike,
803        vec![true, true, true, false, false, true, false]
804    );
805
806    test_utf8_scalar!(
807        ilike_utf8_scalar_escape_testing,
808        vec![
809            "varchar(255)",
810            "int(255) long string longer than 12 bytes",
811            "varchar",
812            "int"
813        ],
814        "%(%)%",
815        ilike,
816        vec![true, true, false, false]
817    );
818
819    test_utf8_scalar!(
820        test_utf8_array_ilike_scalar,
821        vec![
822            "arrow",
823            "parquet",
824            "datafusion",
825            "flight",
826            "arrow long string longer than 12 bytes"
827        ],
828        "%AR%",
829        ilike,
830        vec![true, true, false, false, true]
831    );
832
833    test_utf8_scalar!(
834        test_utf8_array_ilike_scalar_start,
835        vec![
836            "arrow",
837            "parrow",
838            "arrows",
839            "ARR",
840            "arrow long string longer than 12 bytes"
841        ],
842        "aRRow%",
843        ilike,
844        vec![true, false, true, false, true]
845    );
846
847    test_utf8_scalar!(
848        test_utf8_array_ilike_scalar_end,
849        vec![
850            "ArroW",
851            "parrow",
852            "ARRowS",
853            "arr",
854            "arrow long string longer than 12 bytes"
855        ],
856        "%arrow",
857        ilike,
858        vec![true, true, false, false, false]
859    );
860
861    test_utf8_scalar!(
862        test_utf8_array_ilike_scalar_equals,
863        vec![
864            "arrow",
865            "parrow",
866            "arrows",
867            "arr",
868            "arrow long string longer than 12 bytes"
869        ],
870        "Arrow",
871        ilike,
872        vec![true, false, false, false, false]
873    );
874
875    // We only implement loose matching
876    test_utf8_scalar!(
877        test_utf8_array_ilike_unicode,
878        vec![
879            "FFkoß",
880            "FFkoSS",
881            "FFkoss",
882            "FFkoS",
883            "FFkos",
884            "ffkoSS",
885            "ffkoß",
886            "FFKoSS",
887            "longer than 12 bytes FFKoSS"
888        ],
889        "FFkoSS",
890        ilike,
891        vec![false, true, true, false, false, false, false, true, false]
892    );
893
894    test_utf8_scalar!(
895        test_utf8_array_ilike_unicode_starts,
896        vec![
897            "FFkoßsdlkdf",
898            "FFkoSSsdlkdf",
899            "FFkosssdlkdf",
900            "FFkoS",
901            "FFkos",
902            "ffkoSS",
903            "ffkoß",
904            "FfkosSsdfd",
905            "FFKoSS",
906            "longer than 12 bytes FFKoSS",
907        ],
908        "FFkoSS%",
909        ilike,
910        vec![false, true, true, false, false, false, false, true, true, false]
911    );
912
913    test_utf8_scalar!(
914        test_utf8_array_ilike_unicode_ends,
915        vec![
916            "sdlkdfFFkoß",
917            "sdlkdfFFkoSS",
918            "sdlkdfFFkoss",
919            "FFkoS",
920            "FFkos",
921            "ffkoSS",
922            "ffkoß",
923            "h😃klFfkosS",
924            "FFKoSS",
925            "longer than 12 bytes FFKoSS",
926        ],
927        "%FFkoSS",
928        ilike,
929        vec![false, true, true, false, false, false, false, true, true, true]
930    );
931
932    test_utf8_scalar!(
933        test_utf8_array_ilike_unicode_contains,
934        vec![
935            "sdlkdfFkoßsdfs",
936            "sdlkdfFkoSSdggs",
937            "sdlkdfFkosssdsd",
938            "FkoS",
939            "Fkos",
940            "ffkoSS",
941            "ffkoß",
942            "😃sadlksffkosSsh😃klF",
943            "😱slgffkosSsh😃klF",
944            "FFKoSS",
945            "longer than 12 bytes FFKoSS",
946        ],
947        "%FFkoSS%",
948        ilike,
949        vec![false, true, true, false, false, false, false, true, true, true, true]
950    );
951
952    // Replicates `test_utf8_array_ilike_unicode_contains` and
953    // `test_utf8_array_ilike_unicode_contains_dyn` to
954    // demonstrate that `SQL CONTAINS` works as expected.
955    //
956    // NOTE: 5 of the values were changed because the original used a case insensitive `ilike`.
957    test_utf8_scalar!(
958        test_utf8_array_contains_unicode_contains,
959        vec![
960            "sdlkdfFkoßsdfs",
961            "sdlkdFFkoSSdggs", // Original was case insensitive "sdlkdfFkoSSdggs"
962            "sdlkdFFkoSSsdsd", // Original was case insensitive "sdlkdfFkosssdsd"
963            "FkoS",
964            "Fkos",
965            "ffkoSS",
966            "ffkoß",
967            "😃sadlksFFkoSSsh😃klF", // Original was case insensitive "😃sadlksffkosSsh😃klF"
968            "😱slgFFkoSSsh😃klF",    // Original was case insensitive "😱slgffkosSsh😃klF"
969            "FFkoSS",                // "FFKoSS"
970            "longer than 12 bytes FFKoSS",
971        ],
972        "FFkoSS",
973        contains,
974        vec![false, true, true, false, false, false, false, true, true, true, false]
975    );
976
977    test_utf8_scalar!(
978        test_utf8_array_ilike_unicode_complex,
979        vec![
980            "sdlkdfFooßsdfs",
981            "sdlkdfFooSSdggs",
982            "sdlkdfFoosssdsd",
983            "FooS",
984            "Foos",
985            "ffooSS",
986            "ffooß",
987            "😃sadlksffofsSsh😃klF",
988            "😱slgffoesSsh😃klF",
989            "FFKoSS",
990            "longer than 12 bytes FFKoSS",
991        ],
992        "%FF__SS%",
993        ilike,
994        vec![false, true, true, false, false, false, false, true, true, true, true]
995    );
996
997    // 😈 is four bytes long.
998    test_utf8_scalar!(
999        test_uff8_array_like_multibyte,
1000        vec![
1001            "sdlkdfFooßsdfs",
1002            "sdlkdfFooSSdggs",
1003            "sdlkdfFoosssdsd",
1004            "FooS",
1005            "Foos",
1006            "ffooSS",
1007            "ffooß",
1008            "😃sadlksffofsSsh😈klF",
1009            "😱slgffoesSsh😈klF",
1010            "FFKoSS",
1011            "longer than 12 bytes FFKoSS",
1012        ],
1013        "%Ssh😈klF",
1014        like,
1015        vec![false, false, false, false, false, false, false, true, true, false, false]
1016    );
1017
1018    test_utf8_scalar!(
1019        test_utf8_array_ilike_scalar_one,
1020        vec![
1021            "arrow",
1022            "arrows",
1023            "parrow",
1024            "arr",
1025            "arrow long string longer than 12 bytes"
1026        ],
1027        "arrow_",
1028        ilike,
1029        vec![false, true, false, false, false]
1030    );
1031
1032    test_utf8!(
1033        test_utf8_array_nilike,
1034        vec![
1035            "arrow",
1036            "arrow",
1037            "ARROW longer than 12 bytes string",
1038            "arrow",
1039            "ARROW",
1040            "ARROWS",
1041            "arROw"
1042        ],
1043        vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1044        nilike,
1045        vec![false, false, false, true, true, false, true]
1046    );
1047
1048    test_utf8_scalar!(
1049        nilike_utf8_scalar_escape_testing,
1050        vec![
1051            "varchar(255)",
1052            "int(255) longer than 12 bytes string",
1053            "varchar",
1054            "int"
1055        ],
1056        "%(%)%",
1057        nilike,
1058        vec![false, false, true, true]
1059    );
1060
1061    test_utf8_scalar!(
1062        test_utf8_array_nilike_scalar,
1063        vec![
1064            "arrow",
1065            "parquet",
1066            "datafusion",
1067            "flight",
1068            "arrow long string longer than 12 bytes"
1069        ],
1070        "%AR%",
1071        nilike,
1072        vec![false, false, true, true, false]
1073    );
1074
1075    test_utf8_scalar!(
1076        test_utf8_array_nilike_scalar_start,
1077        vec![
1078            "arrow",
1079            "parrow",
1080            "arrows",
1081            "ARR",
1082            "arrow long string longer than 12 bytes"
1083        ],
1084        "aRRow%",
1085        nilike,
1086        vec![false, true, false, true, false]
1087    );
1088
1089    test_utf8_scalar!(
1090        test_utf8_array_nilike_scalar_end,
1091        vec![
1092            "ArroW",
1093            "parrow",
1094            "ARRowS",
1095            "arr",
1096            "arrow long string longer than 12 bytes"
1097        ],
1098        "%arrow",
1099        nilike,
1100        vec![false, false, true, true, true]
1101    );
1102
1103    test_utf8_scalar!(
1104        test_utf8_array_nilike_scalar_equals,
1105        vec![
1106            "arRow",
1107            "parrow",
1108            "arrows",
1109            "arr",
1110            "arrow long string longer than 12 bytes"
1111        ],
1112        "Arrow",
1113        nilike,
1114        vec![false, true, true, true, true]
1115    );
1116
1117    test_utf8_scalar!(
1118        test_utf8_array_nilike_scalar_one,
1119        vec![
1120            "arrow",
1121            "arrows",
1122            "parrow",
1123            "arr",
1124            "arrow long string longer than 12 bytes"
1125        ],
1126        "arrow_",
1127        nilike,
1128        vec![true, false, true, true, true]
1129    );
1130
1131    #[test]
1132    fn test_dict_like_kernels() {
1133        let data = vec![
1134            Some("Earth"),
1135            Some("Fire"),
1136            Some("Water"),
1137            Some("Air"),
1138            None,
1139            Some("Air"),
1140            Some("bbbbb\nAir"),
1141        ];
1142
1143        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1144
1145        assert_eq!(
1146            like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1147            BooleanArray::from(vec![
1148                Some(false),
1149                Some(false),
1150                Some(false),
1151                Some(true),
1152                None,
1153                Some(true),
1154                Some(false),
1155            ]),
1156        );
1157
1158        assert_eq!(
1159            like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1160            BooleanArray::from(vec![
1161                Some(false),
1162                Some(false),
1163                Some(false),
1164                Some(true),
1165                None,
1166                Some(true),
1167                Some(false),
1168            ]),
1169        );
1170
1171        assert_eq!(
1172            like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1173            BooleanArray::from(vec![
1174                Some(false),
1175                Some(false),
1176                Some(true),
1177                Some(false),
1178                None,
1179                Some(false),
1180                Some(false),
1181            ]),
1182        );
1183
1184        assert_eq!(
1185            like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1186            BooleanArray::from(vec![
1187                Some(false),
1188                Some(false),
1189                Some(true),
1190                Some(false),
1191                None,
1192                Some(false),
1193                Some(false),
1194            ]),
1195        );
1196
1197        assert_eq!(
1198            like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1199            BooleanArray::from(vec![
1200                Some(false),
1201                Some(false),
1202                Some(true),
1203                Some(true),
1204                None,
1205                Some(true),
1206                Some(true),
1207            ]),
1208        );
1209
1210        assert_eq!(
1211            like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1212            BooleanArray::from(vec![
1213                Some(false),
1214                Some(false),
1215                Some(true),
1216                Some(true),
1217                None,
1218                Some(true),
1219                Some(true),
1220            ]),
1221        );
1222
1223        assert_eq!(
1224            like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1225            BooleanArray::from(vec![
1226                Some(false),
1227                Some(true),
1228                Some(false),
1229                Some(true),
1230                None,
1231                Some(true),
1232                Some(true),
1233            ]),
1234        );
1235
1236        assert_eq!(
1237            like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1238            BooleanArray::from(vec![
1239                Some(false),
1240                Some(true),
1241                Some(false),
1242                Some(true),
1243                None,
1244                Some(true),
1245                Some(true),
1246            ]),
1247        );
1248
1249        assert_eq!(
1250            like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1251            BooleanArray::from(vec![
1252                Some(true),
1253                Some(false),
1254                Some(true),
1255                Some(false),
1256                None,
1257                Some(false),
1258                Some(false),
1259            ]),
1260        );
1261
1262        assert_eq!(
1263            like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1264            BooleanArray::from(vec![
1265                Some(true),
1266                Some(false),
1267                Some(true),
1268                Some(false),
1269                None,
1270                Some(false),
1271                Some(false),
1272            ]),
1273        );
1274    }
1275
1276    #[test]
1277    fn test_dict_nlike_kernels() {
1278        let data = vec![
1279            Some("Earth"),
1280            Some("Fire"),
1281            Some("Water"),
1282            Some("Air"),
1283            None,
1284            Some("Air"),
1285            Some("bbbbb\nAir"),
1286        ];
1287
1288        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1289
1290        assert_eq!(
1291            nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1292            BooleanArray::from(vec![
1293                Some(true),
1294                Some(true),
1295                Some(true),
1296                Some(false),
1297                None,
1298                Some(false),
1299                Some(true),
1300            ]),
1301        );
1302
1303        assert_eq!(
1304            nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1305            BooleanArray::from(vec![
1306                Some(true),
1307                Some(true),
1308                Some(true),
1309                Some(false),
1310                None,
1311                Some(false),
1312                Some(true),
1313            ]),
1314        );
1315
1316        assert_eq!(
1317            nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1318            BooleanArray::from(vec![
1319                Some(true),
1320                Some(true),
1321                Some(false),
1322                Some(true),
1323                None,
1324                Some(true),
1325                Some(true),
1326            ]),
1327        );
1328
1329        assert_eq!(
1330            nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1331            BooleanArray::from(vec![
1332                Some(true),
1333                Some(true),
1334                Some(false),
1335                Some(true),
1336                None,
1337                Some(true),
1338                Some(true),
1339            ]),
1340        );
1341
1342        assert_eq!(
1343            nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1344            BooleanArray::from(vec![
1345                Some(true),
1346                Some(true),
1347                Some(false),
1348                Some(false),
1349                None,
1350                Some(false),
1351                Some(false),
1352            ]),
1353        );
1354
1355        assert_eq!(
1356            nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1357            BooleanArray::from(vec![
1358                Some(true),
1359                Some(true),
1360                Some(false),
1361                Some(false),
1362                None,
1363                Some(false),
1364                Some(false),
1365            ]),
1366        );
1367
1368        assert_eq!(
1369            nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1370            BooleanArray::from(vec![
1371                Some(true),
1372                Some(false),
1373                Some(true),
1374                Some(false),
1375                None,
1376                Some(false),
1377                Some(false),
1378            ]),
1379        );
1380
1381        assert_eq!(
1382            nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1383            BooleanArray::from(vec![
1384                Some(true),
1385                Some(false),
1386                Some(true),
1387                Some(false),
1388                None,
1389                Some(false),
1390                Some(false),
1391            ]),
1392        );
1393
1394        assert_eq!(
1395            nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1396            BooleanArray::from(vec![
1397                Some(false),
1398                Some(true),
1399                Some(false),
1400                Some(true),
1401                None,
1402                Some(true),
1403                Some(true),
1404            ]),
1405        );
1406
1407        assert_eq!(
1408            nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1409            BooleanArray::from(vec![
1410                Some(false),
1411                Some(true),
1412                Some(false),
1413                Some(true),
1414                None,
1415                Some(true),
1416                Some(true),
1417            ]),
1418        );
1419    }
1420
1421    #[test]
1422    fn test_dict_ilike_kernels() {
1423        let data = vec![
1424            Some("Earth"),
1425            Some("Fire"),
1426            Some("Water"),
1427            Some("Air"),
1428            None,
1429            Some("Air"),
1430            Some("bbbbb\nAir"),
1431        ];
1432
1433        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1434
1435        assert_eq!(
1436            ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1437            BooleanArray::from(vec![
1438                Some(false),
1439                Some(false),
1440                Some(false),
1441                Some(true),
1442                None,
1443                Some(true),
1444                Some(false),
1445            ]),
1446        );
1447
1448        assert_eq!(
1449            ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1450            BooleanArray::from(vec![
1451                Some(false),
1452                Some(false),
1453                Some(false),
1454                Some(true),
1455                None,
1456                Some(true),
1457                Some(false),
1458            ]),
1459        );
1460
1461        assert_eq!(
1462            ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1463            BooleanArray::from(vec![
1464                Some(false),
1465                Some(false),
1466                Some(true),
1467                Some(false),
1468                None,
1469                Some(false),
1470                Some(false),
1471            ]),
1472        );
1473
1474        assert_eq!(
1475            ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1476            BooleanArray::from(vec![
1477                Some(false),
1478                Some(false),
1479                Some(true),
1480                Some(false),
1481                None,
1482                Some(false),
1483                Some(false),
1484            ]),
1485        );
1486
1487        assert_eq!(
1488            ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1489            BooleanArray::from(vec![
1490                Some(false),
1491                Some(false),
1492                Some(true),
1493                Some(true),
1494                None,
1495                Some(true),
1496                Some(true),
1497            ]),
1498        );
1499
1500        assert_eq!(
1501            ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1502            BooleanArray::from(vec![
1503                Some(false),
1504                Some(false),
1505                Some(true),
1506                Some(true),
1507                None,
1508                Some(true),
1509                Some(true),
1510            ]),
1511        );
1512
1513        assert_eq!(
1514            ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1515            BooleanArray::from(vec![
1516                Some(false),
1517                Some(true),
1518                Some(false),
1519                Some(true),
1520                None,
1521                Some(true),
1522                Some(true),
1523            ]),
1524        );
1525
1526        assert_eq!(
1527            ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1528            BooleanArray::from(vec![
1529                Some(false),
1530                Some(true),
1531                Some(false),
1532                Some(true),
1533                None,
1534                Some(true),
1535                Some(true),
1536            ]),
1537        );
1538
1539        assert_eq!(
1540            ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1541            BooleanArray::from(vec![
1542                Some(true),
1543                Some(false),
1544                Some(true),
1545                Some(true),
1546                None,
1547                Some(true),
1548                Some(true),
1549            ]),
1550        );
1551
1552        assert_eq!(
1553            ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1554            BooleanArray::from(vec![
1555                Some(true),
1556                Some(false),
1557                Some(true),
1558                Some(true),
1559                None,
1560                Some(true),
1561                Some(true),
1562            ]),
1563        );
1564    }
1565
1566    #[test]
1567    fn test_dict_nilike_kernels() {
1568        let data = vec![
1569            Some("Earth"),
1570            Some("Fire"),
1571            Some("Water"),
1572            Some("Air"),
1573            None,
1574            Some("Air"),
1575            Some("bbbbb\nAir"),
1576        ];
1577
1578        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1579
1580        assert_eq!(
1581            nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1582            BooleanArray::from(vec![
1583                Some(true),
1584                Some(true),
1585                Some(true),
1586                Some(false),
1587                None,
1588                Some(false),
1589                Some(true),
1590            ]),
1591        );
1592
1593        assert_eq!(
1594            nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1595            BooleanArray::from(vec![
1596                Some(true),
1597                Some(true),
1598                Some(true),
1599                Some(false),
1600                None,
1601                Some(false),
1602                Some(true),
1603            ]),
1604        );
1605
1606        assert_eq!(
1607            nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1608            BooleanArray::from(vec![
1609                Some(true),
1610                Some(true),
1611                Some(false),
1612                Some(true),
1613                None,
1614                Some(true),
1615                Some(true),
1616            ]),
1617        );
1618
1619        assert_eq!(
1620            nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1621            BooleanArray::from(vec![
1622                Some(true),
1623                Some(true),
1624                Some(false),
1625                Some(true),
1626                None,
1627                Some(true),
1628                Some(true),
1629            ]),
1630        );
1631
1632        assert_eq!(
1633            nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1634            BooleanArray::from(vec![
1635                Some(true),
1636                Some(true),
1637                Some(false),
1638                Some(false),
1639                None,
1640                Some(false),
1641                Some(false),
1642            ]),
1643        );
1644
1645        assert_eq!(
1646            nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1647            BooleanArray::from(vec![
1648                Some(true),
1649                Some(true),
1650                Some(false),
1651                Some(false),
1652                None,
1653                Some(false),
1654                Some(false),
1655            ]),
1656        );
1657
1658        assert_eq!(
1659            nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1660            BooleanArray::from(vec![
1661                Some(true),
1662                Some(false),
1663                Some(true),
1664                Some(false),
1665                None,
1666                Some(false),
1667                Some(false),
1668            ]),
1669        );
1670
1671        assert_eq!(
1672            nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1673            BooleanArray::from(vec![
1674                Some(true),
1675                Some(false),
1676                Some(true),
1677                Some(false),
1678                None,
1679                Some(false),
1680                Some(false),
1681            ]),
1682        );
1683
1684        assert_eq!(
1685            nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1686            BooleanArray::from(vec![
1687                Some(false),
1688                Some(true),
1689                Some(false),
1690                Some(false),
1691                None,
1692                Some(false),
1693                Some(false),
1694            ]),
1695        );
1696
1697        assert_eq!(
1698            nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1699            BooleanArray::from(vec![
1700                Some(false),
1701                Some(true),
1702                Some(false),
1703                Some(false),
1704                None,
1705                Some(false),
1706                Some(false),
1707            ]),
1708        );
1709    }
1710
1711    #[test]
1712    fn string_null_like_pattern() {
1713        // Different patterns have different execution code paths
1714        for pattern in &[
1715            "",           // can execute as equality check
1716            "_",          // can execute as length check
1717            "%",          // can execute as starts_with("") or non-null check
1718            "a%",         // can execute as starts_with("a")
1719            "%a",         // can execute as ends_with("")
1720            "a%b",        // can execute as starts_with("a") && ends_with("b")
1721            "%a%",        // can_execute as contains("a")
1722            "%a%b_c_d%e", // can_execute as regular expression
1723        ] {
1724            // These tests focus on the null handling, but are case-insensitive
1725            for like_f in [like, ilike, nlike, nilike] {
1726                let a = Scalar::new(StringArray::new_null(1));
1727                let b = StringArray::new_scalar(pattern);
1728                let r = like_f(&a, &b).unwrap();
1729                assert_eq!(r.len(), 1, "With pattern {pattern}");
1730                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1731                assert!(r.is_null(0), "With pattern {pattern}");
1732
1733                let a = Scalar::new(StringArray::new_null(1));
1734                let b = StringArray::from_iter_values([pattern]);
1735                let r = like_f(&a, &b).unwrap();
1736                assert_eq!(r.len(), 1, "With pattern {pattern}");
1737                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1738                assert!(r.is_null(0), "With pattern {pattern}");
1739
1740                let a = StringArray::new_null(1);
1741                let b = StringArray::from_iter_values([pattern]);
1742                let r = like_f(&a, &b).unwrap();
1743                assert_eq!(r.len(), 1, "With pattern {pattern}");
1744                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1745                assert!(r.is_null(0), "With pattern {pattern}");
1746
1747                let a = StringArray::new_null(1);
1748                let b = StringArray::new_scalar(pattern);
1749                let r = like_f(&a, &b).unwrap();
1750                assert_eq!(r.len(), 1, "With pattern {pattern}");
1751                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1752                assert!(r.is_null(0), "With pattern {pattern}");
1753            }
1754        }
1755    }
1756
1757    #[test]
1758    fn string_view_null_like_pattern() {
1759        // Different patterns have different execution code paths
1760        for pattern in &[
1761            "",           // can execute as equality check
1762            "_",          // can execute as length check
1763            "%",          // can execute as starts_with("") or non-null check
1764            "a%",         // can execute as starts_with("a")
1765            "%a",         // can execute as ends_with("")
1766            "a%b",        // can execute as starts_with("a") && ends_with("b")
1767            "%a%",        // can_execute as contains("a")
1768            "%a%b_c_d%e", // can_execute as regular expression
1769        ] {
1770            // These tests focus on the null handling, but are case-insensitive
1771            for like_f in [like, ilike, nlike, nilike] {
1772                let a = Scalar::new(StringViewArray::new_null(1));
1773                let b = StringViewArray::new_scalar(pattern);
1774                let r = like_f(&a, &b).unwrap();
1775                assert_eq!(r.len(), 1, "With pattern {pattern}");
1776                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1777                assert!(r.is_null(0), "With pattern {pattern}");
1778
1779                let a = Scalar::new(StringViewArray::new_null(1));
1780                let b = StringViewArray::from_iter_values([pattern]);
1781                let r = like_f(&a, &b).unwrap();
1782                assert_eq!(r.len(), 1, "With pattern {pattern}");
1783                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1784                assert!(r.is_null(0), "With pattern {pattern}");
1785
1786                let a = StringViewArray::new_null(1);
1787                let b = StringViewArray::from_iter_values([pattern]);
1788                let r = like_f(&a, &b).unwrap();
1789                assert_eq!(r.len(), 1, "With pattern {pattern}");
1790                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1791                assert!(r.is_null(0), "With pattern {pattern}");
1792
1793                let a = StringViewArray::new_null(1);
1794                let b = StringViewArray::new_scalar(pattern);
1795                let r = like_f(&a, &b).unwrap();
1796                assert_eq!(r.len(), 1, "With pattern {pattern}");
1797                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1798                assert!(r.is_null(0), "With pattern {pattern}");
1799            }
1800        }
1801    }
1802
1803    #[test]
1804    fn string_like_scalar_null() {
1805        for like_f in [like, ilike, nlike, nilike] {
1806            let a = StringArray::new_scalar("a");
1807            let b = Scalar::new(StringArray::new_null(1));
1808            let r = like_f(&a, &b).unwrap();
1809            assert_eq!(r.len(), 1);
1810            assert_eq!(r.null_count(), 1);
1811            assert!(r.is_null(0));
1812
1813            let a = StringArray::from_iter_values(["a"]);
1814            let b = Scalar::new(StringArray::new_null(1));
1815            let r = like_f(&a, &b).unwrap();
1816            assert_eq!(r.len(), 1);
1817            assert_eq!(r.null_count(), 1);
1818            assert!(r.is_null(0));
1819
1820            let a = StringArray::from_iter_values(["a"]);
1821            let b = StringArray::new_null(1);
1822            let r = like_f(&a, &b).unwrap();
1823            assert_eq!(r.len(), 1);
1824            assert_eq!(r.null_count(), 1);
1825            assert!(r.is_null(0));
1826
1827            let a = StringArray::new_scalar("a");
1828            let b = StringArray::new_null(1);
1829            let r = like_f(&a, &b).unwrap();
1830            assert_eq!(r.len(), 1);
1831            assert_eq!(r.null_count(), 1);
1832            assert!(r.is_null(0));
1833        }
1834    }
1835
1836    #[test]
1837    fn string_view_like_scalar_null() {
1838        for like_f in [like, ilike, nlike, nilike] {
1839            let a = StringViewArray::new_scalar("a");
1840            let b = Scalar::new(StringViewArray::new_null(1));
1841            let r = like_f(&a, &b).unwrap();
1842            assert_eq!(r.len(), 1);
1843            assert_eq!(r.null_count(), 1);
1844            assert!(r.is_null(0));
1845
1846            let a = StringViewArray::from_iter_values(["a"]);
1847            let b = Scalar::new(StringViewArray::new_null(1));
1848            let r = like_f(&a, &b).unwrap();
1849            assert_eq!(r.len(), 1);
1850            assert_eq!(r.null_count(), 1);
1851            assert!(r.is_null(0));
1852
1853            let a = StringViewArray::from_iter_values(["a"]);
1854            let b = StringViewArray::new_null(1);
1855            let r = like_f(&a, &b).unwrap();
1856            assert_eq!(r.len(), 1);
1857            assert_eq!(r.null_count(), 1);
1858            assert!(r.is_null(0));
1859
1860            let a = StringViewArray::new_scalar("a");
1861            let b = StringViewArray::new_null(1);
1862            let r = like_f(&a, &b).unwrap();
1863            assert_eq!(r.len(), 1);
1864            assert_eq!(r.null_count(), 1);
1865            assert!(r.is_null(0));
1866        }
1867    }
1868
1869    #[test]
1870    fn like_escape() {
1871        // (value, pattern, expected)
1872        let test_cases = vec![
1873            // Empty pattern
1874            (r"", r"", true),
1875            (r"\", r"", false),
1876            // Sole (dangling) escape (some engines consider this invalid pattern)
1877            (r"", r"\", false),
1878            (r"\", r"\", true),
1879            (r"\\", r"\", false),
1880            (r"a", r"\", false),
1881            (r"\a", r"\", false),
1882            (r"\\a", r"\", false),
1883            // Sole escape
1884            (r"", r"\\", false),
1885            (r"\", r"\\", true),
1886            (r"\\", r"\\", false),
1887            (r"a", r"\\", false),
1888            (r"\a", r"\\", false),
1889            (r"\\a", r"\\", false),
1890            // Sole escape and dangling escape
1891            (r"", r"\\\", false),
1892            (r"\", r"\\\", false),
1893            (r"\\", r"\\\", true),
1894            (r"\\\", r"\\\", false),
1895            (r"\\\\", r"\\\", false),
1896            (r"a", r"\\\", false),
1897            (r"\a", r"\\\", false),
1898            (r"\\a", r"\\\", false),
1899            // Sole two escapes
1900            (r"", r"\\\\", false),
1901            (r"\", r"\\\\", false),
1902            (r"\\", r"\\\\", true),
1903            (r"\\\", r"\\\\", false),
1904            (r"\\\\", r"\\\\", false),
1905            (r"\\\\\", r"\\\\", false),
1906            (r"a", r"\\\\", false),
1907            (r"\a", r"\\\\", false),
1908            (r"\\a", r"\\\\", false),
1909            // Escaped non-wildcard
1910            (r"", r"\a", false),
1911            (r"\", r"\a", false),
1912            (r"\\", r"\a", false),
1913            (r"a", r"\a", true),
1914            (r"\a", r"\a", false),
1915            (r"\\a", r"\a", false),
1916            // Escaped _ wildcard
1917            (r"", r"\_", false),
1918            (r"\", r"\_", false),
1919            (r"\\", r"\_", false),
1920            (r"a", r"\_", false),
1921            (r"_", r"\_", true),
1922            (r"%", r"\_", false),
1923            (r"\a", r"\_", false),
1924            (r"\\a", r"\_", false),
1925            (r"\_", r"\_", false),
1926            (r"\\_", r"\_", false),
1927            // Escaped % wildcard
1928            (r"", r"\%", false),
1929            (r"\", r"\%", false),
1930            (r"\\", r"\%", false),
1931            (r"a", r"\%", false),
1932            (r"_", r"\%", false),
1933            (r"%", r"\%", true),
1934            (r"\a", r"\%", false),
1935            (r"\\a", r"\%", false),
1936            (r"\%", r"\%", false),
1937            (r"\\%", r"\%", false),
1938            // Escape and non-wildcard
1939            (r"", r"\\a", false),
1940            (r"\", r"\\a", false),
1941            (r"\\", r"\\a", false),
1942            (r"a", r"\\a", false),
1943            (r"\a", r"\\a", true),
1944            (r"\\a", r"\\a", false),
1945            (r"\\\a", r"\\a", false),
1946            // Escape and _ wildcard
1947            (r"", r"\\_", false),
1948            (r"\", r"\\_", false),
1949            (r"\\", r"\\_", true),
1950            (r"a", r"\\_", false),
1951            (r"_", r"\\_", false),
1952            (r"%", r"\\_", false),
1953            (r"\a", r"\\_", true),
1954            (r"\\a", r"\\_", false),
1955            (r"\_", r"\\_", true),
1956            (r"\\_", r"\\_", false),
1957            (r"\\\_", r"\\_", false),
1958            // Escape and % wildcard
1959            (r"", r"\\%", false),
1960            (r"\", r"\\%", true),
1961            (r"\\", r"\\%", true),
1962            (r"a", r"\\%", false),
1963            (r"ab", r"\\%", false),
1964            (r"a%", r"\\%", false),
1965            (r"_", r"\\%", false),
1966            (r"%", r"\\%", false),
1967            (r"\a", r"\\%", true),
1968            (r"\\a", r"\\%", true),
1969            (r"\%", r"\\%", true),
1970            (r"\\%", r"\\%", true),
1971            (r"\\\%", r"\\%", true),
1972            // %... pattern with dangling wildcard
1973            (r"\", r"%\", true),
1974            (r"\\", r"%\", true),
1975            (r"%\", r"%\", true),
1976            (r"%\\", r"%\", true),
1977            (r"abc\", r"%\", true),
1978            (r"abc", r"%\", false),
1979            // %... pattern with wildcard
1980            (r"\", r"%\\", true),
1981            (r"\\", r"%\\", true),
1982            (r"%\\", r"%\\", true),
1983            (r"%\\\", r"%\\", true),
1984            (r"abc\", r"%\\", true),
1985            (r"abc", r"%\\", false),
1986            // %... pattern including escaped non-wildcard
1987            (r"ac", r"%a\c", true),
1988            (r"xyzac", r"%a\c", true),
1989            (r"abc", r"%a\c", false),
1990            (r"a\c", r"%a\c", false),
1991            (r"%a\c", r"%a\c", false),
1992            // %... pattern including escape
1993            (r"\", r"%a\\c", false),
1994            (r"\\", r"%a\\c", false),
1995            (r"ac", r"%a\\c", false),
1996            (r"a\c", r"%a\\c", true),
1997            (r"a\\c", r"%a\\c", false),
1998            (r"abc", r"%a\\c", false),
1999            (r"xyza\c", r"%a\\c", true),
2000            (r"xyza\\c", r"%a\\c", false),
2001            (r"%a\\c", r"%a\\c", false),
2002            // ...% pattern with wildcard
2003            (r"\", r"\\%", true),
2004            (r"\\", r"\\%", true),
2005            (r"\\%", r"\\%", true),
2006            (r"\\\%", r"\\%", true),
2007            (r"\abc", r"\\%", true),
2008            (r"a", r"\\%", false),
2009            (r"abc", r"\\%", false),
2010            // ...% pattern including escaped non-wildcard
2011            (r"ac", r"a\c%", true),
2012            (r"acxyz", r"a\c%", true),
2013            (r"abc", r"a\c%", false),
2014            (r"a\c", r"a\c%", false),
2015            (r"a\c%", r"a\c%", false),
2016            (r"a\\c%", r"a\c%", false),
2017            // ...% pattern including escape
2018            (r"ac", r"a\\c%", false),
2019            (r"a\c", r"a\\c%", true),
2020            (r"a\cxyz", r"a\\c%", true),
2021            (r"a\\c", r"a\\c%", false),
2022            (r"a\\cxyz", r"a\\c%", false),
2023            (r"abc", r"a\\c%", false),
2024            (r"abcxyz", r"a\\c%", false),
2025            (r"a\\c%", r"a\\c%", false),
2026            // %...% pattern including escaped non-wildcard
2027            (r"ac", r"%a\c%", true),
2028            (r"xyzacxyz", r"%a\c%", true),
2029            (r"abc", r"%a\c%", false),
2030            (r"a\c", r"%a\c%", false),
2031            (r"xyza\cxyz", r"%a\c%", false),
2032            (r"%a\c%", r"%a\c%", false),
2033            (r"%a\\c%", r"%a\c%", false),
2034            // %...% pattern including escape
2035            (r"ac", r"%a\\c%", false),
2036            (r"a\c", r"%a\\c%", true),
2037            (r"xyza\cxyz", r"%a\\c%", true),
2038            (r"a\\c", r"%a\\c%", false),
2039            (r"xyza\\cxyz", r"%a\\c%", false),
2040            (r"abc", r"%a\\c%", false),
2041            (r"xyzabcxyz", r"%a\\c%", false),
2042            (r"%a\\c%", r"%a\\c%", false),
2043            // Odd (7) backslashes and % wildcard
2044            (r"\\%", r"\\\\\\\%", false),
2045            (r"\\\", r"\\\\\\\%", false),
2046            (r"\\\%", r"\\\\\\\%", true),
2047            (r"\\\\", r"\\\\\\\%", false),
2048            (r"\\\\%", r"\\\\\\\%", false),
2049            (r"\\\\\\\%", r"\\\\\\\%", false),
2050            // Odd (7) backslashes and _ wildcard
2051            (r"\\\", r"\\\\\\\_", false),
2052            (r"\\\\", r"\\\\\\\_", false),
2053            (r"\\\_", r"\\\\\\\_", true),
2054            (r"\\\\", r"\\\\\\\_", false),
2055            (r"\\\a", r"\\\\\\\_", false),
2056            (r"\\\\_", r"\\\\\\\_", false),
2057            (r"\\\\\\\_", r"\\\\\\\_", false),
2058            // Even (8) backslashes and % wildcard
2059            (r"\\\", r"\\\\\\\\%", false),
2060            (r"\\\\", r"\\\\\\\\%", true),
2061            (r"\\\\\", r"\\\\\\\\%", true),
2062            (r"\\\\xyz", r"\\\\\\\\%", true),
2063            (r"\\\\\\\\%", r"\\\\\\\\%", true),
2064            // Even (8) backslashes and _ wildcard
2065            (r"\\\", r"\\\\\\\\_", false),
2066            (r"\\\\", r"\\\\\\\\_", false),
2067            (r"\\\\\", r"\\\\\\\\_", true),
2068            (r"\\\\a", r"\\\\\\\\_", true),
2069            (r"\\\\\a", r"\\\\\\\\_", false),
2070            (r"\\\\ab", r"\\\\\\\\_", false),
2071            (r"\\\\\\\\_", r"\\\\\\\\_", false),
2072        ];
2073
2074        for (value, pattern, expected) in test_cases {
2075            let unexpected = BooleanArray::from(vec![!expected]);
2076            let expected = BooleanArray::from(vec![expected]);
2077
2078            for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
2079                for ((value_datum, value_type), (pattern_datum, pattern_type)) in zip(
2080                    make_datums(value, &string_type),
2081                    make_datums(pattern, &string_type),
2082                ) {
2083                    let value_datum = value_datum.as_ref();
2084                    let pattern_datum = pattern_datum.as_ref();
2085                    assert_eq!(
2086                        like(value_datum, pattern_datum).unwrap(),
2087                        expected,
2088                        "{value_type:?} «{value}» like {pattern_type:?} «{pattern}»"
2089                    );
2090                    assert_eq!(
2091                        ilike(value_datum, pattern_datum).unwrap(),
2092                        expected,
2093                        "{value_type:?} «{value}» ilike {pattern_type:?} «{pattern}»"
2094                    );
2095                    assert_eq!(
2096                        nlike(value_datum, pattern_datum).unwrap(),
2097                        unexpected,
2098                        "{value_type:?} «{value}» nlike {pattern_type:?} «{pattern}»"
2099                    );
2100                    assert_eq!(
2101                        nilike(value_datum, pattern_datum).unwrap(),
2102                        unexpected,
2103                        "{value_type:?} «{value}» nilike {pattern_type:?} «{pattern}»"
2104                    );
2105                }
2106            }
2107        }
2108    }
2109
2110    #[test]
2111    fn like_escape_many() {
2112        // (value, pattern, expected)
2113        let test_cases = vec![
2114            (r"", r"", true),
2115            (r"\", r"", false),
2116            (r"\\", r"", false),
2117            (r"\\\", r"", false),
2118            (r"\\\\", r"", false),
2119            (r"a", r"", false),
2120            (r"\a", r"", false),
2121            (r"\\a", r"", false),
2122            (r"%", r"", false),
2123            (r"\%", r"", false),
2124            (r"\\%", r"", false),
2125            (r"%%", r"", false),
2126            (r"\%%", r"", false),
2127            (r"\\%%", r"", false),
2128            (r"_", r"", false),
2129            (r"\_", r"", false),
2130            (r"\\_", r"", false),
2131            (r"__", r"", false),
2132            (r"\__", r"", false),
2133            (r"\\__", r"", false),
2134            (r"abc", r"", false),
2135            (r"a_c", r"", false),
2136            (r"a\bc", r"", false),
2137            (r"a\_c", r"", false),
2138            (r"%abc", r"", false),
2139            (r"\%abc", r"", false),
2140            (r"a\\_c%", r"", false),
2141            (r"", r"\", false),
2142            (r"\", r"\", true),
2143            (r"\\", r"\", false),
2144            (r"\\\", r"\", false),
2145            (r"\\\\", r"\", false),
2146            (r"a", r"\", false),
2147            (r"\a", r"\", false),
2148            (r"\\a", r"\", false),
2149            (r"%", r"\", false),
2150            (r"\%", r"\", false),
2151            (r"\\%", r"\", false),
2152            (r"%%", r"\", false),
2153            (r"\%%", r"\", false),
2154            (r"\\%%", r"\", false),
2155            (r"_", r"\", false),
2156            (r"\_", r"\", false),
2157            (r"\\_", r"\", false),
2158            (r"__", r"\", false),
2159            (r"\__", r"\", false),
2160            (r"\\__", r"\", false),
2161            (r"abc", r"\", false),
2162            (r"a_c", r"\", false),
2163            (r"a\bc", r"\", false),
2164            (r"a\_c", r"\", false),
2165            (r"%abc", r"\", false),
2166            (r"\%abc", r"\", false),
2167            (r"a\\_c%", r"\", false),
2168            (r"", r"\\", false),
2169            (r"\", r"\\", true),
2170            (r"\\", r"\\", false),
2171            (r"\\\", r"\\", false),
2172            (r"\\\\", r"\\", false),
2173            (r"a", r"\\", false),
2174            (r"\a", r"\\", false),
2175            (r"\\a", r"\\", false),
2176            (r"%", r"\\", false),
2177            (r"\%", r"\\", false),
2178            (r"\\%", r"\\", false),
2179            (r"%%", r"\\", false),
2180            (r"\%%", r"\\", false),
2181            (r"\\%%", r"\\", false),
2182            (r"_", r"\\", false),
2183            (r"\_", r"\\", false),
2184            (r"\\_", r"\\", false),
2185            (r"__", r"\\", false),
2186            (r"\__", r"\\", false),
2187            (r"\\__", r"\\", false),
2188            (r"abc", r"\\", false),
2189            (r"a_c", r"\\", false),
2190            (r"a\bc", r"\\", false),
2191            (r"a\_c", r"\\", false),
2192            (r"%abc", r"\\", false),
2193            (r"\%abc", r"\\", false),
2194            (r"a\\_c%", r"\\", false),
2195            (r"", r"\\\", false),
2196            (r"\", r"\\\", false),
2197            (r"\\", r"\\\", true),
2198            (r"\\\", r"\\\", false),
2199            (r"\\\\", r"\\\", false),
2200            (r"a", r"\\\", false),
2201            (r"\a", r"\\\", false),
2202            (r"\\a", r"\\\", false),
2203            (r"%", r"\\\", false),
2204            (r"\%", r"\\\", false),
2205            (r"\\%", r"\\\", false),
2206            (r"%%", r"\\\", false),
2207            (r"\%%", r"\\\", false),
2208            (r"\\%%", r"\\\", false),
2209            (r"_", r"\\\", false),
2210            (r"\_", r"\\\", false),
2211            (r"\\_", r"\\\", false),
2212            (r"__", r"\\\", false),
2213            (r"\__", r"\\\", false),
2214            (r"\\__", r"\\\", false),
2215            (r"abc", r"\\\", false),
2216            (r"a_c", r"\\\", false),
2217            (r"a\bc", r"\\\", false),
2218            (r"a\_c", r"\\\", false),
2219            (r"%abc", r"\\\", false),
2220            (r"\%abc", r"\\\", false),
2221            (r"a\\_c%", r"\\\", false),
2222            (r"", r"\\\\", false),
2223            (r"\", r"\\\\", false),
2224            (r"\\", r"\\\\", true),
2225            (r"\\\", r"\\\\", false),
2226            (r"\\\\", r"\\\\", false),
2227            (r"a", r"\\\\", false),
2228            (r"\a", r"\\\\", false),
2229            (r"\\a", r"\\\\", false),
2230            (r"%", r"\\\\", false),
2231            (r"\%", r"\\\\", false),
2232            (r"\\%", r"\\\\", false),
2233            (r"%%", r"\\\\", false),
2234            (r"\%%", r"\\\\", false),
2235            (r"\\%%", r"\\\\", false),
2236            (r"_", r"\\\\", false),
2237            (r"\_", r"\\\\", false),
2238            (r"\\_", r"\\\\", false),
2239            (r"__", r"\\\\", false),
2240            (r"\__", r"\\\\", false),
2241            (r"\\__", r"\\\\", false),
2242            (r"abc", r"\\\\", false),
2243            (r"a_c", r"\\\\", false),
2244            (r"a\bc", r"\\\\", false),
2245            (r"a\_c", r"\\\\", false),
2246            (r"%abc", r"\\\\", false),
2247            (r"\%abc", r"\\\\", false),
2248            (r"a\\_c%", r"\\\\", false),
2249            (r"", r"a", false),
2250            (r"\", r"a", false),
2251            (r"\\", r"a", false),
2252            (r"\\\", r"a", false),
2253            (r"\\\\", r"a", false),
2254            (r"a", r"a", true),
2255            (r"\a", r"a", false),
2256            (r"\\a", r"a", false),
2257            (r"%", r"a", false),
2258            (r"\%", r"a", false),
2259            (r"\\%", r"a", false),
2260            (r"%%", r"a", false),
2261            (r"\%%", r"a", false),
2262            (r"\\%%", r"a", false),
2263            (r"_", r"a", false),
2264            (r"\_", r"a", false),
2265            (r"\\_", r"a", false),
2266            (r"__", r"a", false),
2267            (r"\__", r"a", false),
2268            (r"\\__", r"a", false),
2269            (r"abc", r"a", false),
2270            (r"a_c", r"a", false),
2271            (r"a\bc", r"a", false),
2272            (r"a\_c", r"a", false),
2273            (r"%abc", r"a", false),
2274            (r"\%abc", r"a", false),
2275            (r"a\\_c%", r"a", false),
2276            (r"", r"\a", false),
2277            (r"\", r"\a", false),
2278            (r"\\", r"\a", false),
2279            (r"\\\", r"\a", false),
2280            (r"\\\\", r"\a", false),
2281            (r"a", r"\a", true),
2282            (r"\a", r"\a", false),
2283            (r"\\a", r"\a", false),
2284            (r"%", r"\a", false),
2285            (r"\%", r"\a", false),
2286            (r"\\%", r"\a", false),
2287            (r"%%", r"\a", false),
2288            (r"\%%", r"\a", false),
2289            (r"\\%%", r"\a", false),
2290            (r"_", r"\a", false),
2291            (r"\_", r"\a", false),
2292            (r"\\_", r"\a", false),
2293            (r"__", r"\a", false),
2294            (r"\__", r"\a", false),
2295            (r"\\__", r"\a", false),
2296            (r"abc", r"\a", false),
2297            (r"a_c", r"\a", false),
2298            (r"a\bc", r"\a", false),
2299            (r"a\_c", r"\a", false),
2300            (r"%abc", r"\a", false),
2301            (r"\%abc", r"\a", false),
2302            (r"a\\_c%", r"\a", false),
2303            (r"", r"\\a", false),
2304            (r"\", r"\\a", false),
2305            (r"\\", r"\\a", false),
2306            (r"\\\", r"\\a", false),
2307            (r"\\\\", r"\\a", false),
2308            (r"a", r"\\a", false),
2309            (r"\a", r"\\a", true),
2310            (r"\\a", r"\\a", false),
2311            (r"%", r"\\a", false),
2312            (r"\%", r"\\a", false),
2313            (r"\\%", r"\\a", false),
2314            (r"%%", r"\\a", false),
2315            (r"\%%", r"\\a", false),
2316            (r"\\%%", r"\\a", false),
2317            (r"_", r"\\a", false),
2318            (r"\_", r"\\a", false),
2319            (r"\\_", r"\\a", false),
2320            (r"__", r"\\a", false),
2321            (r"\__", r"\\a", false),
2322            (r"\\__", r"\\a", false),
2323            (r"abc", r"\\a", false),
2324            (r"a_c", r"\\a", false),
2325            (r"a\bc", r"\\a", false),
2326            (r"a\_c", r"\\a", false),
2327            (r"%abc", r"\\a", false),
2328            (r"\%abc", r"\\a", false),
2329            (r"a\\_c%", r"\\a", false),
2330            (r"", r"%", true),
2331            (r"\", r"%", true),
2332            (r"\\", r"%", true),
2333            (r"\\\", r"%", true),
2334            (r"\\\\", r"%", true),
2335            (r"a", r"%", true),
2336            (r"\a", r"%", true),
2337            (r"\\a", r"%", true),
2338            (r"%", r"%", true),
2339            (r"\%", r"%", true),
2340            (r"\\%", r"%", true),
2341            (r"%%", r"%", true),
2342            (r"\%%", r"%", true),
2343            (r"\\%%", r"%", true),
2344            (r"_", r"%", true),
2345            (r"\_", r"%", true),
2346            (r"\\_", r"%", true),
2347            (r"__", r"%", true),
2348            (r"\__", r"%", true),
2349            (r"\\__", r"%", true),
2350            (r"abc", r"%", true),
2351            (r"a_c", r"%", true),
2352            (r"a\bc", r"%", true),
2353            (r"a\_c", r"%", true),
2354            (r"%abc", r"%", true),
2355            (r"\%abc", r"%", true),
2356            (r"a\\_c%", r"%", true),
2357            (r"", r"\%", false),
2358            (r"\", r"\%", false),
2359            (r"\\", r"\%", false),
2360            (r"\\\", r"\%", false),
2361            (r"\\\\", r"\%", false),
2362            (r"a", r"\%", false),
2363            (r"\a", r"\%", false),
2364            (r"\\a", r"\%", false),
2365            (r"%", r"\%", true),
2366            (r"\%", r"\%", false),
2367            (r"\\%", r"\%", false),
2368            (r"%%", r"\%", false),
2369            (r"\%%", r"\%", false),
2370            (r"\\%%", r"\%", false),
2371            (r"_", r"\%", false),
2372            (r"\_", r"\%", false),
2373            (r"\\_", r"\%", false),
2374            (r"__", r"\%", false),
2375            (r"\__", r"\%", false),
2376            (r"\\__", r"\%", false),
2377            (r"abc", r"\%", false),
2378            (r"a_c", r"\%", false),
2379            (r"a\bc", r"\%", false),
2380            (r"a\_c", r"\%", false),
2381            (r"%abc", r"\%", false),
2382            (r"\%abc", r"\%", false),
2383            (r"a\\_c%", r"\%", false),
2384            (r"", r"\\%", false),
2385            (r"\", r"\\%", true),
2386            (r"\\", r"\\%", true),
2387            (r"\\\", r"\\%", true),
2388            (r"\\\\", r"\\%", true),
2389            (r"a", r"\\%", false),
2390            (r"\a", r"\\%", true),
2391            (r"\\a", r"\\%", true),
2392            (r"%", r"\\%", false),
2393            (r"\%", r"\\%", true),
2394            (r"\\%", r"\\%", true),
2395            (r"%%", r"\\%", false),
2396            (r"\%%", r"\\%", true),
2397            (r"\\%%", r"\\%", true),
2398            (r"_", r"\\%", false),
2399            (r"\_", r"\\%", true),
2400            (r"\\_", r"\\%", true),
2401            (r"__", r"\\%", false),
2402            (r"\__", r"\\%", true),
2403            (r"\\__", r"\\%", true),
2404            (r"abc", r"\\%", false),
2405            (r"a_c", r"\\%", false),
2406            (r"a\bc", r"\\%", false),
2407            (r"a\_c", r"\\%", false),
2408            (r"%abc", r"\\%", false),
2409            (r"\%abc", r"\\%", true),
2410            (r"a\\_c%", r"\\%", false),
2411            (r"", r"%%", true),
2412            (r"\", r"%%", true),
2413            (r"\\", r"%%", true),
2414            (r"\\\", r"%%", true),
2415            (r"\\\\", r"%%", true),
2416            (r"a", r"%%", true),
2417            (r"\a", r"%%", true),
2418            (r"\\a", r"%%", true),
2419            (r"%", r"%%", true),
2420            (r"\%", r"%%", true),
2421            (r"\\%", r"%%", true),
2422            (r"%%", r"%%", true),
2423            (r"\%%", r"%%", true),
2424            (r"\\%%", r"%%", true),
2425            (r"_", r"%%", true),
2426            (r"\_", r"%%", true),
2427            (r"\\_", r"%%", true),
2428            (r"__", r"%%", true),
2429            (r"\__", r"%%", true),
2430            (r"\\__", r"%%", true),
2431            (r"abc", r"%%", true),
2432            (r"a_c", r"%%", true),
2433            (r"a\bc", r"%%", true),
2434            (r"a\_c", r"%%", true),
2435            (r"%abc", r"%%", true),
2436            (r"\%abc", r"%%", true),
2437            (r"a\\_c%", r"%%", true),
2438            (r"", r"\%%", false),
2439            (r"\", r"\%%", false),
2440            (r"\\", r"\%%", false),
2441            (r"\\\", r"\%%", false),
2442            (r"\\\\", r"\%%", false),
2443            (r"a", r"\%%", false),
2444            (r"\a", r"\%%", false),
2445            (r"\\a", r"\%%", false),
2446            (r"%", r"\%%", true),
2447            (r"\%", r"\%%", false),
2448            (r"\\%", r"\%%", false),
2449            (r"%%", r"\%%", true),
2450            (r"\%%", r"\%%", false),
2451            (r"\\%%", r"\%%", false),
2452            (r"_", r"\%%", false),
2453            (r"\_", r"\%%", false),
2454            (r"\\_", r"\%%", false),
2455            (r"__", r"\%%", false),
2456            (r"\__", r"\%%", false),
2457            (r"\\__", r"\%%", false),
2458            (r"abc", r"\%%", false),
2459            (r"a_c", r"\%%", false),
2460            (r"a\bc", r"\%%", false),
2461            (r"a\_c", r"\%%", false),
2462            (r"%abc", r"\%%", true),
2463            (r"\%abc", r"\%%", false),
2464            (r"a\\_c%", r"\%%", false),
2465            (r"", r"\\%%", false),
2466            (r"\", r"\\%%", true),
2467            (r"\\", r"\\%%", true),
2468            (r"\\\", r"\\%%", true),
2469            (r"\\\\", r"\\%%", true),
2470            (r"a", r"\\%%", false),
2471            (r"\a", r"\\%%", true),
2472            (r"\\a", r"\\%%", true),
2473            (r"%", r"\\%%", false),
2474            (r"\%", r"\\%%", true),
2475            (r"\\%", r"\\%%", true),
2476            (r"%%", r"\\%%", false),
2477            (r"\%%", r"\\%%", true),
2478            (r"\\%%", r"\\%%", true),
2479            (r"_", r"\\%%", false),
2480            (r"\_", r"\\%%", true),
2481            (r"\\_", r"\\%%", true),
2482            (r"__", r"\\%%", false),
2483            (r"\__", r"\\%%", true),
2484            (r"\\__", r"\\%%", true),
2485            (r"abc", r"\\%%", false),
2486            (r"a_c", r"\\%%", false),
2487            (r"a\bc", r"\\%%", false),
2488            (r"a\_c", r"\\%%", false),
2489            (r"%abc", r"\\%%", false),
2490            (r"\%abc", r"\\%%", true),
2491            (r"a\\_c%", r"\\%%", false),
2492            (r"", r"_", false),
2493            (r"\", r"_", true),
2494            (r"\\", r"_", false),
2495            (r"\\\", r"_", false),
2496            (r"\\\\", r"_", false),
2497            (r"a", r"_", true),
2498            (r"\a", r"_", false),
2499            (r"\\a", r"_", false),
2500            (r"%", r"_", true),
2501            (r"\%", r"_", false),
2502            (r"\\%", r"_", false),
2503            (r"%%", r"_", false),
2504            (r"\%%", r"_", false),
2505            (r"\\%%", r"_", false),
2506            (r"_", r"_", true),
2507            (r"\_", r"_", false),
2508            (r"\\_", r"_", false),
2509            (r"__", r"_", false),
2510            (r"\__", r"_", false),
2511            (r"\\__", r"_", false),
2512            (r"abc", r"_", false),
2513            (r"a_c", r"_", false),
2514            (r"a\bc", r"_", false),
2515            (r"a\_c", r"_", false),
2516            (r"%abc", r"_", false),
2517            (r"\%abc", r"_", false),
2518            (r"a\\_c%", r"_", false),
2519            (r"", r"\_", false),
2520            (r"\", r"\_", false),
2521            (r"\\", r"\_", false),
2522            (r"\\\", r"\_", false),
2523            (r"\\\\", r"\_", false),
2524            (r"a", r"\_", false),
2525            (r"\a", r"\_", false),
2526            (r"\\a", r"\_", false),
2527            (r"%", r"\_", false),
2528            (r"\%", r"\_", false),
2529            (r"\\%", r"\_", false),
2530            (r"%%", r"\_", false),
2531            (r"\%%", r"\_", false),
2532            (r"\\%%", r"\_", false),
2533            (r"_", r"\_", true),
2534            (r"\_", r"\_", false),
2535            (r"\\_", r"\_", false),
2536            (r"__", r"\_", false),
2537            (r"\__", r"\_", false),
2538            (r"\\__", r"\_", false),
2539            (r"abc", r"\_", false),
2540            (r"a_c", r"\_", false),
2541            (r"a\bc", r"\_", false),
2542            (r"a\_c", r"\_", false),
2543            (r"%abc", r"\_", false),
2544            (r"\%abc", r"\_", false),
2545            (r"a\\_c%", r"\_", false),
2546            (r"", r"\\_", false),
2547            (r"\", r"\\_", false),
2548            (r"\\", r"\\_", true),
2549            (r"\\\", r"\\_", false),
2550            (r"\\\\", r"\\_", false),
2551            (r"a", r"\\_", false),
2552            (r"\a", r"\\_", true),
2553            (r"\\a", r"\\_", false),
2554            (r"%", r"\\_", false),
2555            (r"\%", r"\\_", true),
2556            (r"\\%", r"\\_", false),
2557            (r"%%", r"\\_", false),
2558            (r"\%%", r"\\_", false),
2559            (r"\\%%", r"\\_", false),
2560            (r"_", r"\\_", false),
2561            (r"\_", r"\\_", true),
2562            (r"\\_", r"\\_", false),
2563            (r"__", r"\\_", false),
2564            (r"\__", r"\\_", false),
2565            (r"\\__", r"\\_", false),
2566            (r"abc", r"\\_", false),
2567            (r"a_c", r"\\_", false),
2568            (r"a\bc", r"\\_", false),
2569            (r"a\_c", r"\\_", false),
2570            (r"%abc", r"\\_", false),
2571            (r"\%abc", r"\\_", false),
2572            (r"a\\_c%", r"\\_", false),
2573            (r"", r"__", false),
2574            (r"\", r"__", false),
2575            (r"\\", r"__", true),
2576            (r"\\\", r"__", false),
2577            (r"\\\\", r"__", false),
2578            (r"a", r"__", false),
2579            (r"\a", r"__", true),
2580            (r"\\a", r"__", false),
2581            (r"%", r"__", false),
2582            (r"\%", r"__", true),
2583            (r"\\%", r"__", false),
2584            (r"%%", r"__", true),
2585            (r"\%%", r"__", false),
2586            (r"\\%%", r"__", false),
2587            (r"_", r"__", false),
2588            (r"\_", r"__", true),
2589            (r"\\_", r"__", false),
2590            (r"__", r"__", true),
2591            (r"\__", r"__", false),
2592            (r"\\__", r"__", false),
2593            (r"abc", r"__", false),
2594            (r"a_c", r"__", false),
2595            (r"a\bc", r"__", false),
2596            (r"a\_c", r"__", false),
2597            (r"%abc", r"__", false),
2598            (r"\%abc", r"__", false),
2599            (r"a\\_c%", r"__", false),
2600            (r"", r"\__", false),
2601            (r"\", r"\__", false),
2602            (r"\\", r"\__", false),
2603            (r"\\\", r"\__", false),
2604            (r"\\\\", r"\__", false),
2605            (r"a", r"\__", false),
2606            (r"\a", r"\__", false),
2607            (r"\\a", r"\__", false),
2608            (r"%", r"\__", false),
2609            (r"\%", r"\__", false),
2610            (r"\\%", r"\__", false),
2611            (r"%%", r"\__", false),
2612            (r"\%%", r"\__", false),
2613            (r"\\%%", r"\__", false),
2614            (r"_", r"\__", false),
2615            (r"\_", r"\__", false),
2616            (r"\\_", r"\__", false),
2617            (r"__", r"\__", true),
2618            (r"\__", r"\__", false),
2619            (r"\\__", r"\__", false),
2620            (r"abc", r"\__", false),
2621            (r"a_c", r"\__", false),
2622            (r"a\bc", r"\__", false),
2623            (r"a\_c", r"\__", false),
2624            (r"%abc", r"\__", false),
2625            (r"\%abc", r"\__", false),
2626            (r"a\\_c%", r"\__", false),
2627            (r"", r"\\__", false),
2628            (r"\", r"\\__", false),
2629            (r"\\", r"\\__", false),
2630            (r"\\\", r"\\__", true),
2631            (r"\\\\", r"\\__", false),
2632            (r"a", r"\\__", false),
2633            (r"\a", r"\\__", false),
2634            (r"\\a", r"\\__", true),
2635            (r"%", r"\\__", false),
2636            (r"\%", r"\\__", false),
2637            (r"\\%", r"\\__", true),
2638            (r"%%", r"\\__", false),
2639            (r"\%%", r"\\__", true),
2640            (r"\\%%", r"\\__", false),
2641            (r"_", r"\\__", false),
2642            (r"\_", r"\\__", false),
2643            (r"\\_", r"\\__", true),
2644            (r"__", r"\\__", false),
2645            (r"\__", r"\\__", true),
2646            (r"\\__", r"\\__", false),
2647            (r"abc", r"\\__", false),
2648            (r"a_c", r"\\__", false),
2649            (r"a\bc", r"\\__", false),
2650            (r"a\_c", r"\\__", false),
2651            (r"%abc", r"\\__", false),
2652            (r"\%abc", r"\\__", false),
2653            (r"a\\_c%", r"\\__", false),
2654            (r"", r"abc", false),
2655            (r"\", r"abc", false),
2656            (r"\\", r"abc", false),
2657            (r"\\\", r"abc", false),
2658            (r"\\\\", r"abc", false),
2659            (r"a", r"abc", false),
2660            (r"\a", r"abc", false),
2661            (r"\\a", r"abc", false),
2662            (r"%", r"abc", false),
2663            (r"\%", r"abc", false),
2664            (r"\\%", r"abc", false),
2665            (r"%%", r"abc", false),
2666            (r"\%%", r"abc", false),
2667            (r"\\%%", r"abc", false),
2668            (r"_", r"abc", false),
2669            (r"\_", r"abc", false),
2670            (r"\\_", r"abc", false),
2671            (r"__", r"abc", false),
2672            (r"\__", r"abc", false),
2673            (r"\\__", r"abc", false),
2674            (r"abc", r"abc", true),
2675            (r"a_c", r"abc", false),
2676            (r"a\bc", r"abc", false),
2677            (r"a\_c", r"abc", false),
2678            (r"%abc", r"abc", false),
2679            (r"\%abc", r"abc", false),
2680            (r"a\\_c%", r"abc", false),
2681            (r"", r"a_c", false),
2682            (r"\", r"a_c", false),
2683            (r"\\", r"a_c", false),
2684            (r"\\\", r"a_c", false),
2685            (r"\\\\", r"a_c", false),
2686            (r"a", r"a_c", false),
2687            (r"\a", r"a_c", false),
2688            (r"\\a", r"a_c", false),
2689            (r"%", r"a_c", false),
2690            (r"\%", r"a_c", false),
2691            (r"\\%", r"a_c", false),
2692            (r"%%", r"a_c", false),
2693            (r"\%%", r"a_c", false),
2694            (r"\\%%", r"a_c", false),
2695            (r"_", r"a_c", false),
2696            (r"\_", r"a_c", false),
2697            (r"\\_", r"a_c", false),
2698            (r"__", r"a_c", false),
2699            (r"\__", r"a_c", false),
2700            (r"\\__", r"a_c", false),
2701            (r"abc", r"a_c", true),
2702            (r"a_c", r"a_c", true),
2703            (r"a\bc", r"a_c", false),
2704            (r"a\_c", r"a_c", false),
2705            (r"%abc", r"a_c", false),
2706            (r"\%abc", r"a_c", false),
2707            (r"a\\_c%", r"a_c", false),
2708            (r"", r"a\bc", false),
2709            (r"\", r"a\bc", false),
2710            (r"\\", r"a\bc", false),
2711            (r"\\\", r"a\bc", false),
2712            (r"\\\\", r"a\bc", false),
2713            (r"a", r"a\bc", false),
2714            (r"\a", r"a\bc", false),
2715            (r"\\a", r"a\bc", false),
2716            (r"%", r"a\bc", false),
2717            (r"\%", r"a\bc", false),
2718            (r"\\%", r"a\bc", false),
2719            (r"%%", r"a\bc", false),
2720            (r"\%%", r"a\bc", false),
2721            (r"\\%%", r"a\bc", false),
2722            (r"_", r"a\bc", false),
2723            (r"\_", r"a\bc", false),
2724            (r"\\_", r"a\bc", false),
2725            (r"__", r"a\bc", false),
2726            (r"\__", r"a\bc", false),
2727            (r"\\__", r"a\bc", false),
2728            (r"abc", r"a\bc", true),
2729            (r"a_c", r"a\bc", false),
2730            (r"a\bc", r"a\bc", false),
2731            (r"a\_c", r"a\bc", false),
2732            (r"%abc", r"a\bc", false),
2733            (r"\%abc", r"a\bc", false),
2734            (r"a\\_c%", r"a\bc", false),
2735            (r"", r"a\_c", false),
2736            (r"\", r"a\_c", false),
2737            (r"\\", r"a\_c", false),
2738            (r"\\\", r"a\_c", false),
2739            (r"\\\\", r"a\_c", false),
2740            (r"a", r"a\_c", false),
2741            (r"\a", r"a\_c", false),
2742            (r"\\a", r"a\_c", false),
2743            (r"%", r"a\_c", false),
2744            (r"\%", r"a\_c", false),
2745            (r"\\%", r"a\_c", false),
2746            (r"%%", r"a\_c", false),
2747            (r"\%%", r"a\_c", false),
2748            (r"\\%%", r"a\_c", false),
2749            (r"_", r"a\_c", false),
2750            (r"\_", r"a\_c", false),
2751            (r"\\_", r"a\_c", false),
2752            (r"__", r"a\_c", false),
2753            (r"\__", r"a\_c", false),
2754            (r"\\__", r"a\_c", false),
2755            (r"abc", r"a\_c", false),
2756            (r"a_c", r"a\_c", true),
2757            (r"a\bc", r"a\_c", false),
2758            (r"a\_c", r"a\_c", false),
2759            (r"%abc", r"a\_c", false),
2760            (r"\%abc", r"a\_c", false),
2761            (r"a\\_c%", r"a\_c", false),
2762            (r"", r"%abc", false),
2763            (r"\", r"%abc", false),
2764            (r"\\", r"%abc", false),
2765            (r"\\\", r"%abc", false),
2766            (r"\\\\", r"%abc", false),
2767            (r"a", r"%abc", false),
2768            (r"\a", r"%abc", false),
2769            (r"\\a", r"%abc", false),
2770            (r"%", r"%abc", false),
2771            (r"\%", r"%abc", false),
2772            (r"\\%", r"%abc", false),
2773            (r"%%", r"%abc", false),
2774            (r"\%%", r"%abc", false),
2775            (r"\\%%", r"%abc", false),
2776            (r"_", r"%abc", false),
2777            (r"\_", r"%abc", false),
2778            (r"\\_", r"%abc", false),
2779            (r"__", r"%abc", false),
2780            (r"\__", r"%abc", false),
2781            (r"\\__", r"%abc", false),
2782            (r"abc", r"%abc", true),
2783            (r"a_c", r"%abc", false),
2784            (r"a\bc", r"%abc", false),
2785            (r"a\_c", r"%abc", false),
2786            (r"%abc", r"%abc", true),
2787            (r"\%abc", r"%abc", true),
2788            (r"a\\_c%", r"%abc", false),
2789            (r"", r"\%abc", false),
2790            (r"\", r"\%abc", false),
2791            (r"\\", r"\%abc", false),
2792            (r"\\\", r"\%abc", false),
2793            (r"\\\\", r"\%abc", false),
2794            (r"a", r"\%abc", false),
2795            (r"\a", r"\%abc", false),
2796            (r"\\a", r"\%abc", false),
2797            (r"%", r"\%abc", false),
2798            (r"\%", r"\%abc", false),
2799            (r"\\%", r"\%abc", false),
2800            (r"%%", r"\%abc", false),
2801            (r"\%%", r"\%abc", false),
2802            (r"\\%%", r"\%abc", false),
2803            (r"_", r"\%abc", false),
2804            (r"\_", r"\%abc", false),
2805            (r"\\_", r"\%abc", false),
2806            (r"__", r"\%abc", false),
2807            (r"\__", r"\%abc", false),
2808            (r"\\__", r"\%abc", false),
2809            (r"abc", r"\%abc", false),
2810            (r"a_c", r"\%abc", false),
2811            (r"a\bc", r"\%abc", false),
2812            (r"a\_c", r"\%abc", false),
2813            (r"%abc", r"\%abc", true),
2814            (r"\%abc", r"\%abc", false),
2815            (r"a\\_c%", r"\%abc", false),
2816            (r"", r"a\\_c%", false),
2817            (r"\", r"a\\_c%", false),
2818            (r"\\", r"a\\_c%", false),
2819            (r"\\\", r"a\\_c%", false),
2820            (r"\\\\", r"a\\_c%", false),
2821            (r"a", r"a\\_c%", false),
2822            (r"\a", r"a\\_c%", false),
2823            (r"\\a", r"a\\_c%", false),
2824            (r"%", r"a\\_c%", false),
2825            (r"\%", r"a\\_c%", false),
2826            (r"\\%", r"a\\_c%", false),
2827            (r"%%", r"a\\_c%", false),
2828            (r"\%%", r"a\\_c%", false),
2829            (r"\\%%", r"a\\_c%", false),
2830            (r"_", r"a\\_c%", false),
2831            (r"\_", r"a\\_c%", false),
2832            (r"\\_", r"a\\_c%", false),
2833            (r"__", r"a\\_c%", false),
2834            (r"\__", r"a\\_c%", false),
2835            (r"\\__", r"a\\_c%", false),
2836            (r"abc", r"a\\_c%", false),
2837            (r"a_c", r"a\\_c%", false),
2838            (r"a\bc", r"a\\_c%", true),
2839            (r"a\_c", r"a\\_c%", true),
2840            (r"%abc", r"a\\_c%", false),
2841            (r"\%abc", r"a\\_c%", false),
2842            (r"a\\_c%", r"a\\_c%", false),
2843        ];
2844
2845        let values = test_cases
2846            .iter()
2847            .map(|(value, _, _)| *value)
2848            .collect::<Vec<_>>();
2849        let patterns = test_cases
2850            .iter()
2851            .map(|(_, pattern, _)| *pattern)
2852            .collect::<Vec<_>>();
2853        let expected = BooleanArray::from(
2854            test_cases
2855                .iter()
2856                .map(|(_, _, expected)| *expected)
2857                .collect::<Vec<_>>(),
2858        );
2859        let unexpected = BooleanArray::from(
2860            test_cases
2861                .iter()
2862                .map(|(_, _, expected)| !*expected)
2863                .collect::<Vec<_>>(),
2864        );
2865
2866        for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
2867            let values = make_array(values.iter(), &string_type);
2868            let patterns = make_array(patterns.iter(), &string_type);
2869            let (values, patterns) = (values.as_ref(), patterns.as_ref());
2870
2871            assert_eq!(like(&values, &patterns).unwrap(), expected,);
2872            assert_eq!(ilike(&values, &patterns).unwrap(), expected,);
2873            assert_eq!(nlike(&values, &patterns).unwrap(), unexpected,);
2874            assert_eq!(nilike(&values, &patterns).unwrap(), unexpected,);
2875        }
2876    }
2877
2878    fn make_datums(
2879        value: impl AsRef<str>,
2880        data_type: &DataType,
2881    ) -> Vec<(Box<dyn Datum>, DatumType)> {
2882        match data_type {
2883            DataType::Utf8 => {
2884                let array = StringArray::from_iter_values([value]);
2885                vec![
2886                    (Box::new(array.clone()), DatumType::Array),
2887                    (Box::new(Scalar::new(array)), DatumType::Scalar),
2888                ]
2889            }
2890            DataType::LargeUtf8 => {
2891                let array = LargeStringArray::from_iter_values([value]);
2892                vec![
2893                    (Box::new(array.clone()), DatumType::Array),
2894                    (Box::new(Scalar::new(array)), DatumType::Scalar),
2895                ]
2896            }
2897            DataType::Utf8View => {
2898                let array = StringViewArray::from_iter_values([value]);
2899                vec![
2900                    (Box::new(array.clone()), DatumType::Array),
2901                    (Box::new(Scalar::new(array)), DatumType::Scalar),
2902                ]
2903            }
2904            _ => unimplemented!(),
2905        }
2906    }
2907
2908    fn make_array(
2909        values: impl IntoIterator<Item: AsRef<str>>,
2910        data_type: &DataType,
2911    ) -> Box<dyn Array> {
2912        match data_type {
2913            DataType::Utf8 => Box::new(StringArray::from_iter_values(values)),
2914            DataType::LargeUtf8 => Box::new(LargeStringArray::from_iter_values(values)),
2915            DataType::Utf8View => Box::new(StringViewArray::from_iter_values(values)),
2916            _ => unimplemented!(),
2917        }
2918    }
2919
2920    #[derive(Debug)]
2921    enum DatumType {
2922        Array,
2923        Scalar,
2924    }
2925}