mz_transform/literal_constraints.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! See if there are predicates of the form `<expr> = literal` that can be sped up using an index.
11//! More specifically, look for an MFP on top of a Get, where the MFP has an appropriate filter, and
12//! the Get has a matching index. Convert these to `IndexedFilter` joins, which is a semi-join with
13//! a constant collection.
14//!
15//! E.g.: Logically, we go from something like
16//! `SELECT f1, f2, f3 FROM t WHERE t.f1 = lit1 AND t.f2 = lit2`
17//! to
18//! `SELECT f1, f2, f3 FROM t, (SELECT * FROM (VALUES (lit1, lit2))) as filter_list
19//! WHERE t.f1 = filter_list.column1 AND t.f2 = filter_list.column2`
20
21use std::collections::{BTreeMap, BTreeSet};
22
23use itertools::Itertools;
24use mz_expr::JoinImplementation::IndexedFilter;
25use mz_expr::canonicalize::canonicalize_predicates;
26use mz_expr::visit::{Visit, VisitChildren};
27use mz_expr::{BinaryFunc, Id, MapFilterProject, MirRelationExpr, MirScalarExpr, VariadicFunc};
28use mz_ore::collections::CollectionExt;
29use mz_ore::iter::IteratorExt;
30use mz_ore::stack::RecursionLimitError;
31use mz_ore::vec::swap_remove_multiple;
32use mz_repr::{Diff, GlobalId, RelationType, Row};
33
34use crate::TransformCtx;
35use crate::canonicalize_mfp::CanonicalizeMfp;
36use crate::notice::IndexTooWideForLiteralConstraints;
37
38/// Convert literal constraints into `IndexedFilter` joins.
39#[derive(Debug)]
40pub struct LiteralConstraints;
41
42impl crate::Transform for LiteralConstraints {
43 fn name(&self) -> &'static str {
44 "LiteralConstraints"
45 }
46
47 #[mz_ore::instrument(
48 target = "optimizer",
49 level = "debug",
50 fields(path.segment = "literal_constraints")
51 )]
52 fn actually_perform_transform(
53 &self,
54 relation: &mut MirRelationExpr,
55 ctx: &mut TransformCtx,
56 ) -> Result<(), crate::TransformError> {
57 let result = self.action(relation, ctx);
58 mz_repr::explain::trace_plan(&*relation);
59 result
60 }
61}
62
63impl LiteralConstraints {
64 fn action(
65 &self,
66 relation: &mut MirRelationExpr,
67 transform_ctx: &mut TransformCtx,
68 ) -> Result<(), crate::TransformError> {
69 let mut mfp = MapFilterProject::extract_non_errors_from_expr_mut(relation);
70 relation.try_visit_mut_children(|e| self.action(e, transform_ctx))?;
71
72 if let MirRelationExpr::Get {
73 id: Id::Global(id),
74 ref typ,
75 ..
76 } = *relation
77 {
78 let orig_mfp = mfp.clone();
79
80 // Preparation for the literal constraints detection.
81 Self::inline_literal_constraints(&mut mfp);
82 Self::list_of_predicates_to_and_of_predicates(&mut mfp);
83 Self::distribute_and_over_or(&mut mfp)?;
84 Self::unary_and(&mut mfp);
85
86 /// The above preparation might make the MFP more complicated, so we'll later want to
87 /// either undo the preparation transformations or get back to `orig_mfp`.
88 fn undo_preparation(
89 mfp: &mut MapFilterProject,
90 orig_mfp: &MapFilterProject,
91 relation: &MirRelationExpr,
92 relation_type: RelationType,
93 ) {
94 // undo list_of_predicates_to_and_of_predicates, distribute_and_over_or, unary_and
95 // (It undoes the latter 2 through `MirScalarExp::reduce`.)
96 LiteralConstraints::canonicalize_predicates(mfp, relation, relation_type);
97 // undo inline_literal_constraints
98 mfp.optimize();
99 // We can usually undo, but sometimes not (see comment on `distribute_and_over_or`),
100 // so in those cases we might have a more complicated MFP than the original MFP
101 // (despite the removal of the literal constraints and/or contradicting OR args).
102 // So let's use the simpler one.
103 if LiteralConstraints::predicates_size(orig_mfp)
104 < LiteralConstraints::predicates_size(mfp)
105 {
106 *mfp = orig_mfp.clone();
107 }
108 }
109
110 let removed_contradicting_or_args = Self::remove_impossible_or_args(&mut mfp)?;
111
112 // todo: We might want to also call `canonicalize_equivalences`,
113 // see near the end of literal_constraints.slt.
114
115 let inp_typ = typ.clone();
116
117 let key_val = Self::detect_literal_constraints(&mfp, id, transform_ctx);
118
119 match key_val {
120 None => {
121 // We didn't find a usable index, so no chance to remove literal constraints.
122 // But, we might have removed contradicting OR args.
123 if removed_contradicting_or_args {
124 undo_preparation(&mut mfp, &orig_mfp, relation, inp_typ);
125 } else {
126 // We didn't remove anything, so let's go with the original MFP.
127 mfp = orig_mfp;
128 }
129 }
130 Some((idx_id, key, possible_vals)) => {
131 // We found a usable index. We'll try to remove the corresponding literal
132 // constraints.
133 if Self::remove_literal_constraints(&mut mfp, &key)
134 || removed_contradicting_or_args
135 {
136 // We were able to remove the literal constraints or contradicting OR args,
137 // so we would like to use this new MFP, so we try undoing the preparation.
138 undo_preparation(&mut mfp, &orig_mfp, relation, inp_typ.clone());
139 } else {
140 // We were not able to remove the literal constraint, so `mfp` is
141 // equivalent to `orig_mfp`, but `orig_mfp` is often simpler (or the same).
142 mfp = orig_mfp;
143 }
144
145 // We transform the Get into a semi-join with a constant collection.
146
147 let inp_id = id.clone();
148 let filter_list = MirRelationExpr::Constant {
149 rows: Ok(possible_vals
150 .iter()
151 .map(|val| (val.clone(), Diff::ONE))
152 .collect()),
153 typ: mz_repr::RelationType {
154 column_types: key
155 .iter()
156 .map(|e| {
157 e.typ(&inp_typ.column_types)
158 // We make sure to not include a null in `expr_eq_literal`.
159 .nullable(false)
160 })
161 .collect(),
162 // (Note that the key inference for `MirRelationExpr::Constant` inspects
163 // the constant values to detect keys not listed within the node, but it
164 // can only detect a single-column key this way. A multi-column key is
165 // common here, so we explicitly add it.)
166 keys: vec![(0..key.len()).collect()],
167 },
168 }
169 .arrange_by(&[(0..key.len()).map(MirScalarExpr::column).collect_vec()]);
170
171 if possible_vals.is_empty() {
172 // Even better than what we were hoping for: Found contradicting
173 // literal constraints, so the whole relation is empty.
174 relation.take_safely(Some(inp_typ));
175 } else {
176 // The common case: We need to build the join which is the main point of
177 // this transform.
178 *relation = MirRelationExpr::Join {
179 // It's important to keep the `filter_list` in the second position.
180 // Both the lowering and EXPLAIN depends on this.
181 inputs: vec![
182 relation.clone().arrange_by(std::slice::from_ref(&key)),
183 filter_list,
184 ],
185 equivalences: key
186 .iter()
187 .enumerate()
188 .map(|(i, e)| {
189 vec![(*e).clone(), MirScalarExpr::column(i + inp_typ.arity())]
190 })
191 .collect(),
192 implementation: IndexedFilter(
193 inp_id,
194 idx_id,
195 key.clone(),
196 possible_vals,
197 ),
198 };
199
200 // Rebuild the MFP to add the projection that removes the columns coming from
201 // the filter_list side of the join.
202 let (map, filter, project) = mfp.as_map_filter_project();
203 mfp = MapFilterProject::new(inp_typ.arity() + key.len())
204 .project(0..inp_typ.arity()) // make the join semi
205 .map(map)
206 .filter(filter)
207 .project(project);
208 mfp.optimize()
209 }
210 }
211 }
212 }
213
214 CanonicalizeMfp::rebuild_mfp(mfp, relation);
215
216 Ok(())
217 }
218
219 /// Detects literal constraints in an MFP on top of a Get of `id`, and a matching index that can
220 /// be used to speed up the Filter of the MFP.
221 ///
222 /// For example, if there is an index on `(f1, f2)`, and the Filter is
223 /// `(f1 = 3 AND f2 = 5) OR (f1 = 7 AND f2 = 9)`, it returns `Some([f1, f2], [[3,5], [7,9]])`.
224 ///
225 /// We can use an index if each argument of the OR includes a literal constraint on each of the
226 /// key fields of the index. Extra predicates inside the OR arguments are ok.
227 ///
228 /// Returns (idx_id, idx_key, values to lookup in the index).
229 fn detect_literal_constraints(
230 mfp: &MapFilterProject,
231 get_id: GlobalId,
232 transform_ctx: &mut TransformCtx,
233 ) -> Option<(GlobalId, Vec<MirScalarExpr>, Vec<Row>)> {
234 // Checks whether an index with the specified key can be used to speed up the given filter.
235 // See comment of `IndexMatch`.
236 fn match_index(key: &[MirScalarExpr], or_args: &Vec<MirScalarExpr>) -> IndexMatch {
237 if key.is_empty() {
238 // Nothing to do with an index that has an empty key.
239 return IndexMatch::UnusableNoSubset;
240 }
241 if !key.iter().all_unique() {
242 // This is a weird index. Why does it have duplicate key expressions?
243 return IndexMatch::UnusableNoSubset;
244 }
245 let mut literal_values = Vec::new();
246 let mut inv_cast_any = false;
247 // This starts with all key fields of the index.
248 // At the end, it will contain a subset S of index key fields such that if the index had
249 // only S as its key, then the index would be usable.
250 let mut usable_key_fields = key.iter().collect::<BTreeSet<_>>();
251 let mut usable = true;
252 for or_arg in or_args {
253 let mut row = Row::default();
254 let mut packer = row.packer();
255 for key_field in key {
256 let and_args = or_arg.and_or_args(VariadicFunc::And);
257 // Let's find a constraint for this key field
258 if let Some((literal, inv_cast)) = and_args
259 .iter()
260 .find_map(|and_arg| and_arg.expr_eq_literal(key_field))
261 {
262 // (Note that the above find_map can find only 0 or 1 result, because
263 // of `remove_impossible_or_args`.)
264 packer.push(literal.unpack_first());
265 inv_cast_any |= inv_cast;
266 } else {
267 // There is an `or_arg` where we didn't find a constraint for a key field,
268 // so the index is unusable. Throw out the field from the usable fields.
269 usable = false;
270 usable_key_fields.remove(key_field);
271 if usable_key_fields.is_empty() {
272 return IndexMatch::UnusableNoSubset;
273 }
274 }
275 }
276 literal_values.push(row);
277 }
278 if usable {
279 // We should deduplicate, because a constraint can be duplicated by
280 // `distribute_and_over_or`. For example: `IN ('l1', 'l2') AND (a > 0 OR a < 5)`:
281 // the 2 args of the OR will cause the IN constraints to be duplicated. This doesn't
282 // alter the meaning of the expression when evaluated as a filter, but if we extract
283 // those literals 2 times into `literal_values` then the Peek code will look up
284 // those keys from the index 2 times, leading to duplicate results.
285 literal_values.sort();
286 literal_values.dedup();
287 IndexMatch::Usable(literal_values, inv_cast_any)
288 } else {
289 if usable_key_fields.is_empty() {
290 IndexMatch::UnusableNoSubset
291 } else {
292 IndexMatch::UnusableTooWide(
293 usable_key_fields.into_iter().cloned().collect_vec(),
294 )
295 }
296 }
297 }
298
299 let or_args = Self::get_or_args(mfp);
300
301 let index_matches = transform_ctx
302 .indexes
303 .indexes_on(get_id)
304 .map(|(index_id, key)| (index_id, key.to_owned(), match_index(key, &or_args)))
305 .collect_vec();
306
307 let result = index_matches
308 .iter()
309 .cloned()
310 .filter_map(|(idx_id, key, index_match)| match index_match {
311 IndexMatch::Usable(vals, inv_cast) => Some((idx_id, key, vals, inv_cast)),
312 _ => None,
313 })
314 // Maximize:
315 // 1. number of predicates that are sped using a single index.
316 // 2. whether we are using a simpler index by having removed a cast from the key expr.
317 .max_by_key(|(_idx_id, key, _vals, inv_cast)| (key.len(), *inv_cast))
318 .map(|(idx_id, key, vals, _inv_cast)| (idx_id, key, vals));
319
320 if result.is_none() && !or_args.is_empty() {
321 // Let's see if we can give a hint to the user.
322 index_matches
323 .into_iter()
324 .for_each(|(index_id, index_key, index_match)| {
325 match index_match {
326 IndexMatch::UnusableTooWide(usable_subset) => {
327 // see comment of `UnusableTooWide`
328 assert!(!usable_subset.is_empty());
329 // Determine literal values that we would get if the index was on
330 // `usable_subset`.
331 let literal_values = match match_index(&usable_subset, &or_args) {
332 IndexMatch::Usable(literal_vals, _) => literal_vals,
333 _ => unreachable!(), // `usable_subset` would make the index usable.
334 };
335
336 // Let's come up with a recommendation for what columns to index:
337 // Intersect literal constraints across all OR args. (Which might
338 // include columns that are NOT in this index, and therefore not in
339 // `usable_subset`.)
340 let recommended_key = or_args
341 .iter()
342 .map(|or_arg| {
343 let and_args = or_arg.and_or_args(VariadicFunc::And);
344 and_args
345 .iter()
346 .filter_map(|and_arg| and_arg.any_expr_eq_literal())
347 .collect::<BTreeSet<_>>()
348 })
349 .reduce(|fields1, fields2| {
350 fields1.intersection(&fields2).cloned().collect()
351 })
352 // The unwrap is safe because above we checked `!or_args.is_empty()`
353 .unwrap()
354 .into_iter()
355 .collect_vec();
356
357 transform_ctx.df_meta.push_optimizer_notice_dedup(
358 IndexTooWideForLiteralConstraints {
359 index_id,
360 index_key,
361 usable_subset,
362 literal_values,
363 index_on_id: get_id,
364 recommended_key,
365 },
366 )
367 }
368 _ => (),
369 }
370 });
371 }
372
373 result
374 }
375
376 /// Removes the expressions that [LiteralConstraints::detect_literal_constraints] found, if
377 /// possible. Returns whether it removed anything.
378 /// For example, if the key of the detected literal constraint is just `f1`, and we have the
379 /// expression
380 /// `(f1 = 3 AND f2 = 5) OR (f1 = 7 AND f2 = 5)`, then this modifies it to `f2 = 5`.
381 /// However, if OR branches differ in their non-key parts, then we cannot remove the literal
382 /// constraint. For example,
383 /// `(f1 = 3 AND f2 = 5) OR (f1 = 7 AND f2 = 555)`, then we cannot remove the `f1` parts,
384 /// because then the filter wouldn't know whether to check `f2 = 5` or `f2 = 555`.
385 fn remove_literal_constraints(mfp: &mut MapFilterProject, key: &Vec<MirScalarExpr>) -> bool {
386 let or_args = Self::get_or_args(mfp);
387 if or_args.len() == 0 {
388 return false;
389 }
390
391 // In simple situations it would be enough to check here that if we remove the detected
392 // literal constraints from each OR arg, then the residual OR args are all equal.
393 // However, this wouldn't be able to perform the removal when the expression that should
394 // remain in the end has an OR. This is because conversion to DNF makes duplicates of
395 // every literal constraint, with different residuals. To also handle this case, we collect
396 // the possible residuals for every literal constraint row, and check that all sets are
397 // equal. Example: The user wrote
398 // `WHERE ((a=1 AND b=1) OR (a=2 AND b=2)) AND (c OR (d AND e))`.
399 // The DNF of this is
400 // `(a=1 AND b=1 AND c) OR (a=1 AND b=1 AND d AND e) OR (a=2 AND b=2 AND c) OR (a=2 AND b=2 AND d AND e)`.
401 // Then `constraints_to_residual_sets` will be:
402 // [
403 // [`a=1`, `b=1`] -> {[`c`], [`d`, `e`]},
404 // [`a=2`, `b=2`] -> {[`c`], [`d`, `e`]}
405 // ]
406 // After removing the literal constraints we have
407 // `c OR (d AND e)`
408 let mut constraints_to_residual_sets = BTreeMap::new();
409 or_args.iter().for_each(|or_arg| {
410 let and_args = or_arg.and_or_args(VariadicFunc::And);
411 let (mut constraints, mut residual): (Vec<_>, Vec<_>) =
412 and_args.iter().cloned().partition(|and_arg| {
413 key.iter()
414 .any(|key_field| matches!(and_arg.expr_eq_literal(key_field), Some(..)))
415 });
416 // In every or_arg there has to be some literal constraints, otherwise
417 // `detect_literal_constraints` would have returned None.
418 assert!(constraints.len() >= 1);
419 // `remove_impossible_or_args` made sure that inside each or_arg, each
420 // expression can be literal constrained only once. So if we find one of the
421 // key fields being literal constrained, then it's definitely that literal
422 // constraint that detect_literal_constraints based one of its return values on.
423 //
424 // This is important, because without `remove_impossible_or_args`, we might
425 // have the situation here that or_arg would be something like
426 // `a = 5 AND a = 8`, of which `detect_literal_constraints` found only the `a = 5`,
427 // but here we would remove both the `a = 5` and the `a = 8`.
428 constraints.sort();
429 residual.sort();
430 let entry = constraints_to_residual_sets
431 .entry(constraints)
432 .or_insert_with(BTreeSet::new);
433 entry.insert(residual);
434 });
435 let residual_sets = constraints_to_residual_sets
436 .into_iter()
437 .map(|(_constraints, residual_set)| residual_set)
438 .collect::<Vec<_>>();
439 if residual_sets.iter().all_equal() {
440 // We can remove the literal constraint
441 assert!(residual_sets.len() >= 1); // We already checked `or_args.len() == 0` above
442 let residual_set = residual_sets.into_iter().into_first();
443 let new_pred = MirScalarExpr::CallVariadic {
444 func: VariadicFunc::Or,
445 exprs: residual_set
446 .into_iter()
447 .map(|residual| MirScalarExpr::CallVariadic {
448 func: VariadicFunc::And,
449 exprs: residual,
450 })
451 .collect::<Vec<_>>(),
452 };
453 let (map, _predicates, project) = mfp.as_map_filter_project();
454 *mfp = MapFilterProject::new(mfp.input_arity)
455 .map(map)
456 .filter(std::iter::once(new_pred))
457 .project(project);
458
459 true
460 } else {
461 false
462 }
463 }
464
465 /// 1. Removes such OR args in which there are contradicting literal constraints.
466 /// 2. Also, if an OR arg doesn't have any contradiction, this fn just deduplicates
467 /// the AND arg list of that OR arg. (Might additionally sort all AND arg lists.)
468 ///
469 /// Returns whether it performed any removal or deduplication.
470 ///
471 /// Example for 1:
472 /// `<arg1> OR (a = 5 AND a = 5 AND a = 8) OR <arg3>`
473 /// -->
474 /// `<arg1> OR <arg3> `
475 ///
476 /// Example for 2:
477 /// `<arg1> OR (a = 5 AND a = 5 AND b = 8) OR <arg3>`
478 /// -->
479 /// `<arg1> OR (a = 5 AND b = 8) OR <arg3>`
480 fn remove_impossible_or_args(mfp: &mut MapFilterProject) -> Result<bool, RecursionLimitError> {
481 let mut or_args = Self::get_or_args(mfp);
482 if or_args.len() == 0 {
483 return Ok(false);
484 }
485 let mut to_remove = Vec::new();
486 let mut changed = false;
487 or_args.iter_mut().enumerate().for_each(|(i, or_arg)| {
488 if let MirScalarExpr::CallVariadic {
489 func: VariadicFunc::And,
490 exprs: and_args,
491 } = or_arg
492 {
493 if and_args
494 .iter()
495 .any(|e| e.impossible_literal_equality_because_types())
496 {
497 changed = true;
498 to_remove.push(i);
499 } else {
500 and_args.sort_by_key(|e: &MirScalarExpr| e.invert_casts_on_expr_eq_literal());
501 let and_args_before_dedup = and_args.clone();
502 and_args
503 .dedup_by_key(|e: &mut MirScalarExpr| e.invert_casts_on_expr_eq_literal());
504 if *and_args != and_args_before_dedup {
505 changed = true;
506 }
507 // Deduplicated, so we cannot have something like `a = 5 AND a = 5`.
508 // This means that if we now have `<expr1> = <literal1> AND <expr1> = <literal2>`,
509 // then `literal1` is definitely not the same as `literal2`. This means that this
510 // whole or_arg is a contradiction, because it's something like `a = 5 AND a = 8`.
511 let mut literal_constrained_exprs = and_args
512 .iter()
513 .filter_map(|and_arg| and_arg.any_expr_eq_literal());
514 if !literal_constrained_exprs.all_unique() {
515 changed = true;
516 to_remove.push(i);
517 }
518 }
519 } else {
520 // `unary_and` made sure that each OR arg is an AND
521 unreachable!("OR arg was not an AND in remove_impossible_or_args");
522 }
523 });
524 // We remove the marked OR args.
525 // (If the OR has 0 or 1 args remaining, then `reduce_and_canonicalize_and_or` will later
526 // further simplify.)
527 swap_remove_multiple(&mut or_args, to_remove);
528 // Rebuild the MFP if needed
529 if changed {
530 let new_predicates = vec![MirScalarExpr::CallVariadic {
531 func: VariadicFunc::Or,
532 exprs: or_args,
533 }];
534 let (map, _predicates, project) = mfp.as_map_filter_project();
535 *mfp = MapFilterProject::new(mfp.input_arity)
536 .map(map)
537 .filter(new_predicates)
538 .project(project);
539 Ok(true)
540 } else {
541 Ok(false)
542 }
543 }
544
545 /// Returns the arguments of the predicate's top-level OR as a Vec.
546 /// If there is no top-level OR, then interpret the predicate as a 1-arg OR, i.e., return a
547 /// 1-element Vec.
548 ///
549 /// Assumes that [LiteralConstraints::list_of_predicates_to_and_of_predicates] has already run.
550 fn get_or_args(mfp: &MapFilterProject) -> Vec<MirScalarExpr> {
551 assert_eq!(mfp.predicates.len(), 1); // list_of_predicates_to_and_of_predicates ensured this
552 let (_, pred) = mfp.predicates.get(0).unwrap();
553 pred.and_or_args(VariadicFunc::Or)
554 }
555
556 /// Makes the job of [LiteralConstraints::detect_literal_constraints] easier by undoing some CSE to
557 /// reconstruct literal constraints.
558 fn inline_literal_constraints(mfp: &mut MapFilterProject) {
559 let mut should_inline = vec![false; mfp.input_arity + mfp.expressions.len()];
560 // Mark those expressions for inlining that contain a subexpression of the form
561 // `<xxx> = <lit>` or `<lit> = <xxx>`.
562 for (i, e) in mfp.expressions.iter().enumerate() {
563 e.visit_pre(|s| {
564 if s.any_expr_eq_literal().is_some() {
565 should_inline[i + mfp.input_arity] = true;
566 }
567 });
568 }
569 // Whenever
570 // `<Column(i)> = <lit>` or `<lit> = <Column(i)>`
571 // appears in a predicate, mark the ith expression to be inlined.
572 for (_before, p) in mfp.predicates.iter() {
573 p.visit_pre(|e| {
574 if let MirScalarExpr::CallBinary {
575 func: BinaryFunc::Eq,
576 expr1,
577 expr2,
578 } = e
579 {
580 if matches!(**expr1, MirScalarExpr::Literal(..)) {
581 if let MirScalarExpr::Column(col, _) = **expr2 {
582 if col >= mfp.input_arity {
583 should_inline[col] = true;
584 }
585 }
586 }
587 if matches!(**expr2, MirScalarExpr::Literal(..)) {
588 if let MirScalarExpr::Column(col, _) = **expr1 {
589 if col >= mfp.input_arity {
590 should_inline[col] = true;
591 }
592 }
593 }
594 }
595 });
596 }
597 // Perform the marked inlinings.
598 mfp.perform_inlining(should_inline);
599 }
600
601 /// MFPs have a Vec of predicates `[p1, p2, ...]`, which logically represents `p1 AND p2 AND ...`.
602 /// This function performs this conversion. Note that it might create a variadic AND with
603 /// 0 or 1 args, so the resulting predicate Vec always has exactly 1 element.
604 fn list_of_predicates_to_and_of_predicates(mfp: &mut MapFilterProject) {
605 // Rebuild the MFP. (Unfortunately, we cannot modify the predicates in place, because MFP
606 // predicates also have a "before" field, which we need to update. (`filter` will recompute
607 // these.)
608 let (map, _predicates, project) = mfp.as_map_filter_project();
609 let new_predicates = vec![MirScalarExpr::CallVariadic {
610 func: VariadicFunc::And,
611 exprs: mfp.predicates.iter().map(|(_, p)| p.clone()).collect(),
612 }];
613 *mfp = MapFilterProject::new(mfp.input_arity)
614 .map(map)
615 .filter(new_predicates)
616 .project(project);
617 }
618
619 /// Call [mz_expr::canonicalize::canonicalize_predicates] on each of the predicates in the MFP.
620 fn canonicalize_predicates(
621 mfp: &mut MapFilterProject,
622 relation: &MirRelationExpr,
623 relation_type: RelationType,
624 ) {
625 let (map, mut predicates, project) = mfp.as_map_filter_project();
626 let typ_after_map = relation
627 .clone()
628 .map(map.clone())
629 .typ_with_input_types(&[relation_type]);
630 canonicalize_predicates(&mut predicates, &typ_after_map.column_types);
631 // Rebuild the MFP with the new predicates.
632 *mfp = MapFilterProject::new(mfp.input_arity)
633 .map(map)
634 .filter(predicates)
635 .project(project);
636 }
637
638 /// Distribute AND over OR + do flatten_and_or until fixed point.
639 /// This effectively converts to disjunctive normal form (DNF) (i.e., an OR of ANDs), because
640 /// [MirScalarExpr::reduce] did Demorgans and double-negation-elimination. So after
641 /// [MirScalarExpr::reduce], we get here a tree of AND/OR nodes. A distribution step lifts an OR
642 /// up the tree by 1 level, and a [MirScalarExpr::flatten_associative] merges two ORs that are at
643 /// adjacent levels, so eventually we'll end up with just one OR that is at the top of the tree,
644 /// with ANDs below it.
645 /// For example:
646 /// (a || b) && (c || d)
647 /// ->
648 /// ((a || b) && c) || ((a || b) && d)
649 /// ->
650 /// (a && c) || (b && c) || (a && d) || (b && d)
651 /// (This is a variadic OR with 4 arguments.)
652 ///
653 /// Example:
654 /// User wrote `WHERE (a,b) IN ((1,2), (1,4), (8,5))`,
655 /// from which [MirScalarExpr::undistribute_and_or] made this before us:
656 /// (#0 = 1 AND (#1 = 2 OR #1 = 4)) OR (#0 = 8 AND #1 = 5)
657 /// And now we distribute the first AND over the first OR in 2 steps: First to
658 /// ((#0 = 1 AND #1 = 2) OR (#0 = 1 AND #1 = 4)) OR (#0 = 8 AND #1 = 5)
659 /// then [MirScalarExpr::flatten_associative]:
660 /// (#0 = 1 AND #1 = 2) OR (#0 = 1 AND #1 = 4) OR (#0 = 8 AND #1 = 5)
661 ///
662 /// Note that [MirScalarExpr::undistribute_and_or] is not exactly an inverse to this because
663 /// 1) it can undistribute both AND over OR and OR over AND.
664 /// 2) it cannot always undo the distribution, because an expression might have multiple
665 /// overlapping undistribution opportunities, see comment there.
666 fn distribute_and_over_or(mfp: &mut MapFilterProject) -> Result<(), RecursionLimitError> {
667 mfp.predicates.iter_mut().try_for_each(|(_, p)| {
668 let mut old_p = MirScalarExpr::column(0);
669 while old_p != *p {
670 let size = p.size();
671 // We might make the expression exponentially larger, so we should have some limit.
672 // Below 1000 (e.g., a single IN list of ~300 elements, or 3 IN lists of 4-5
673 // elements each), we are <10 ms for a single IN list, and even less for multiple IN
674 // lists.
675 if size > 1000 {
676 break;
677 }
678 old_p = p.clone();
679 p.visit_mut_post(&mut |e: &mut MirScalarExpr| {
680 if let MirScalarExpr::CallVariadic {
681 func: VariadicFunc::And,
682 exprs: and_args,
683 } = e
684 {
685 if let Some((i, _)) = and_args.iter().enumerate().find(|(_i, a)| {
686 matches!(
687 a,
688 MirScalarExpr::CallVariadic {
689 func: VariadicFunc::Or,
690 ..
691 }
692 )
693 }) {
694 // We found an AND whose ith argument is an OR. We'll distribute the other
695 // args of the AND over this OR.
696 let mut or = and_args.swap_remove(i);
697 let to_distribute = MirScalarExpr::CallVariadic {
698 func: VariadicFunc::And,
699 exprs: (*and_args).clone(),
700 };
701 if let MirScalarExpr::CallVariadic {
702 func: VariadicFunc::Or,
703 exprs: ref mut or_args,
704 } = or
705 {
706 or_args.iter_mut().for_each(|a| {
707 *a = a.clone().and(to_distribute.clone());
708 });
709 } else {
710 unreachable!(); // because the `find` found a match already
711 }
712 *e = or; // The modified OR will be the new top-level expr.
713 }
714 }
715 })?;
716 p.visit_mut_post(&mut |e: &mut MirScalarExpr| {
717 e.flatten_associative();
718 })?;
719 }
720 Ok(())
721 })
722 }
723
724 /// For each of the arguments of the top-level OR (if no top-level OR, then interpret the whole
725 /// expression as a 1-arg OR, see [LiteralConstraints::get_or_args]), check if it's an AND, and
726 /// if not, then wrap it in a 1-arg AND.
727 fn unary_and(mfp: &mut MapFilterProject) {
728 let mut or_args = Self::get_or_args(mfp);
729 let mut changed = false;
730 or_args.iter_mut().for_each(|or_arg| {
731 if !matches!(
732 or_arg,
733 MirScalarExpr::CallVariadic {
734 func: VariadicFunc::And,
735 ..
736 }
737 ) {
738 *or_arg = MirScalarExpr::CallVariadic {
739 func: VariadicFunc::And,
740 exprs: vec![or_arg.clone()],
741 };
742 changed = true;
743 }
744 });
745 if changed {
746 let new_predicates = vec![MirScalarExpr::CallVariadic {
747 func: VariadicFunc::Or,
748 exprs: or_args,
749 }];
750 let (map, _predicates, project) = mfp.as_map_filter_project();
751 *mfp = MapFilterProject::new(mfp.input_arity)
752 .map(map)
753 .filter(new_predicates)
754 .project(project);
755 }
756 }
757
758 fn predicates_size(mfp: &MapFilterProject) -> usize {
759 let mut sum = 0;
760 for (_, p) in mfp.predicates.iter() {
761 sum = sum + p.size();
762 }
763 sum
764 }
765}
766
767/// Whether an index is usable to speed up a Filter with literal constraints.
768#[derive(Clone)]
769enum IndexMatch {
770 /// The index is usable, that is, each OR argument constrains each key field.
771 ///
772 /// The `Vec<Row>` has the constraining literal values, where each Row corresponds to one OR
773 /// argument, and each value in the Row corresponds to one key field.
774 ///
775 /// The `bool` indicates whether we needed to inverse cast equalities to match them up with key
776 /// fields. The inverse cast enables index usage when an implicit cast is wrapping a key field.
777 /// E.g., if `a` is smallint, and the user writes `a = 5`, then HIR inserts an implicit cast:
778 /// `smallint_to_integer(a) = 5`, which we invert to `a = 5`, where the `5` is a smallint
779 /// literal. For more details on the inversion, see `invert_casts_on_expr_eq_literal_inner`.
780 Usable(Vec<Row>, bool),
781 /// The index is unusable. However, there is a subset of key fields such that if the index would
782 /// be only on this subset, then it would be usable.
783 /// Note: this Vec is never empty. (If it were empty, then we'd get `UnusableNoSubset` instead.)
784 UnusableTooWide(Vec<MirScalarExpr>),
785 /// The index is unusable. Moreover, none of its key fields could be used as an alternate index
786 /// to speed up this filter.
787 UnusableNoSubset,
788}