misc.python.materialize.sqlancer

Shared code for running SQLancer and SQLancer++ against Materialize.

  1# Copyright Materialize, Inc. and contributors. All rights reserved.
  2#
  3# Use of this software is governed by the Business Source License
  4# included in the LICENSE file at the root of this repository.
  5#
  6# As of the Change Date specified in that file, in accordance with
  7# the Business Source License, use of this software will be governed
  8# by the Apache License, Version 2.0.
  9
 10"""
 11Shared code for running SQLancer and SQLancer++ against Materialize.
 12"""
 13
 14import argparse
 15import random
 16import re
 17import shutil
 18import subprocess
 19from collections.abc import Callable
 20from pathlib import Path
 21from threading import Thread
 22
 23from materialize import buildkite, spawn
 24from materialize.mzcompose.composition import (
 25    Composition,
 26    WorkflowArgumentParser,
 27)
 28from materialize.mzcompose.service import Service
 29from materialize.mzcompose.services.materialized import Materialized
 30
 31IGNORED_ERROR_PATTERNS = [
 32    r"^(?!.*panic).*not yet supported",
 33    r"does not exist",
 34    r"must have",
 35    r"overflow",
 36    r"invalid input",
 37    r"cannot be matched",
 38    r"implicitly casting",
 39    r"is not unique",
 40    r"invalid digit",
 41    r"is not defined",
 42    r"is defined",
 43    r"unterminated",
 44    r"Expected ",
 45    r"is out of range",
 46    r"of a negative",
 47    r"does not support",
 48    r"ANALYZE",
 49    r"is of type",
 50    r"must appear in the",
 51    r"negative substring length",
 52    r"is ambiguous",
 53    r"out of range",
 54    r"division by zero",
 55    r"is only defined for finite",
 56    r"cannot canonicalize predicates that are not of type",
 57    r"unexpected character in input",
 58    r"could not determine polymorphic type",
 59    r"cannot reference pseudo type",
 60    r"string is not a valid identifier",
 61    r"invalid regular expression",
 62    r" violates not-null constraint",
 63    r"could not convert type",
 64    r"are not allowed",
 65    r"requires a record",
 66    r"unrecognized privilege type",
 67    r"cannot cast",
 68    r"requires an OVER",
 69    r"value too long",
 70    r"may only refer to user-defined",
 71    r"cannot materialize call to",
 72    r"invalid hash algorithm",
 73    r"expected id",
 74    r"expected exactly one statement",
 75    r"regex parse error",
 76    r"invalid encoding name",
 77    r"must specify at least one capture group",
 78    r"requires a string literal",
 79    r"too large for encoding",
 80    r"must be a positive integer",
 81    r"bound must be less than",
 82    r"not recognized",
 83    r"invalid escape string",
 84    r"canceling statement due to statement timeout",
 85    r"invalid time zone",
 86    r"result exceeds max size",
 87    r"calls to mz_now in write statements",
 88    r"out of valid range",
 89    r"more than one record produced in subquery",
 90    r"input of anonymous composite types is not implemented",
 91    r"lists must all be the same length",
 92    r"invalid IANA Time Zone Database identifier",
 93    r"unknown schema",
 94    r"octal escapes are not supported",
 95    r"attempt to create relation with too many columns",
 96    r"column notation applied to type text",
 97    r"Unexpected EOF",
 98    r"missing required exponent",
 99    r"invalid unicode escape",
100    r"dimension array or low bound array must not be null",
101    r"LIKE pattern exceeds maximum length",
102    r"cannot return complex numbers",
103    r"must be greater than zero",
104    r"null character not permitted",
105    r"invalid datepart",
106    r"must use value within",
107    r"null character not permitted",
108    r"expressions must appear in select list",
109    r"expressions must match initial",
110    r"invalid selection: operation may only",
111    r"array size exceeds the maximum allowed",
112    r"does not allow subqueries",
113    r"must use value within",
114    r"zero raised to a negative power is undefined",
115    r"range type over",
116]
117
118
119def check_query_errors(logs_dir: Path) -> list[str]:
120    unexpected_errors = []
121    log_files = list(logs_dir.glob("*.log"))
122
123    for log_file in log_files:
124        try:
125            content = log_file.read_text()
126        except Exception:
127            continue
128
129        for line in content.splitlines():
130            if "ERROR" not in line:
131                continue
132
133            is_ignored = False
134            for pattern in IGNORED_ERROR_PATTERNS:
135                if re.search(pattern, line):
136                    is_ignored = True
137                    break
138
139            if not is_ignored:
140                unexpected_errors.append(f"{log_file.name}: {line}")
141
142    return unexpected_errors
143
144
145def create_services(service_name: str) -> list[Service | Materialized]:
146    """Create the services needed for SQLancer or SQLancer++ tests."""
147    return [
148        # Auto-restart so we can keep testing even after we ran into a panic
149        Materialized(
150            restart="on-failure",
151            default_replication_factor=1,
152            additional_system_parameter_defaults={
153                "enable_alter_table_add_column": "true",
154                "enable_statement_lifecycle_logging": "false",
155                "enable_internal_statement_logging": "false",
156                "statement_logging_default_sample_rate": "0",
157                "statement_logging_max_sample_rate": "0",
158                "enable_repeat_row": "true",
159                "enable_list_length_max": "true",
160                "enable_list_n_layers": "true",
161                "enable_time_at_time_zone": "true",
162                "enable_date_bin_hopping": "true",
163            },
164        ),
165        Service(
166            service_name,
167            {
168                "mzbuild": service_name,
169            },
170        ),
171    ]
172
173
174def _print_logs(container_id: str) -> None:
175    spawn.runv(["docker", "logs", "-f", container_id])
176
177
178def run_sqlancer(
179    c: Composition,
180    parser: WorkflowArgumentParser,
181    *,
182    service_name: str,
183    default_oracle: str,
184    build_run_args: Callable[[argparse.Namespace, int], list[str]],
185    docker_logs_path: str,
186    log_prefix: str,
187) -> None:
188    parser.add_argument("--runtime", default=600, type=int)
189    parser.add_argument("--num-tries", default=100000, type=int)
190    parser.add_argument("--num-threads", default=16, type=int)
191    parser.add_argument("--seed", default=None, type=int)
192    parser.add_argument("--qpg", default=True, action=argparse.BooleanOptionalAction)
193    parser.add_argument("--oracle", default=default_oracle, type=str)
194    args = parser.parse_args()
195
196    c.up("materialized")
197
198    c.sql(
199        "ALTER SYSTEM SET max_databases TO 1000",
200        user="mz_system",
201        port=6877,
202    )
203    c.sql(
204        "ALTER SYSTEM SET max_tables TO 1000",
205        user="mz_system",
206        port=6877,
207    )
208    c.sql(
209        "ALTER SYSTEM SET max_materialized_views TO 1000",
210        user="mz_system",
211        port=6877,
212    )
213
214    seed = args.seed or random.randint(0, 2**31)
215
216    run_args = build_run_args(args, seed)
217
218    print("--- Run in progress")
219    result = c.run(
220        service_name,
221        *run_args,
222        check=False,
223        detach=True,
224        capture=True,
225    )
226    container_id = result.stdout.strip()
227
228    # Print logs in a background thread so that we get immediate output in CI,
229    # and also when running SQLancer locally
230    thread = Thread(target=_print_logs, args=(container_id,))
231    thread.start()
232    # At the same time capture the logs to analyze for finding new issues
233    stdout = spawn.capture(
234        ["docker", "logs", "-f", container_id], stderr=subprocess.STDOUT
235    )
236
237    in_assertion = False
238    for line in stdout.splitlines():
239        if "OutOfMemoryError" in line or "IgnoreMeException" in line:
240            continue
241        if line.startswith("--java.lang."):
242            in_assertion = True
243            print(f"--- [{log_prefix}] {line.removeprefix('--java.lang.')}")
244        elif line == "":
245            in_assertion = False
246        elif in_assertion:
247            print(line)
248    print(f"--- {result.stdout.splitlines()[-1]}")
249
250    # Check for unexpected query errors in the logs
251    logs = Path("logs")
252    if logs.exists() and logs.is_dir():
253        shutil.rmtree(logs)
254    spawn.runv(
255        [
256            "docker",
257            "cp",
258            f"{container_id}:{docker_logs_path}",
259            str(logs),
260        ]
261    )
262    spawn.runv(["tar", "cfz", "logs.tar.gz", str(logs)])
263    buildkite.upload_artifact("logs.tar.gz")
264    unexpected_errors = check_query_errors(logs)
265    if unexpected_errors:
266        print("--- Unexpected query errors found:")
267        for error in unexpected_errors:
268            print(error)
269        raise Exception(
270            f"Found {len(unexpected_errors)} unexpected query error(s) in logs"
271        )
IGNORED_ERROR_PATTERNS = ['^(?!.*panic).*not yet supported', 'does not exist', 'must have', 'overflow', 'invalid input', 'cannot be matched', 'implicitly casting', 'is not unique', 'invalid digit', 'is not defined', 'is defined', 'unterminated', 'Expected ', 'is out of range', 'of a negative', 'does not support', 'ANALYZE', 'is of type', 'must appear in the', 'negative substring length', 'is ambiguous', 'out of range', 'division by zero', 'is only defined for finite', 'cannot canonicalize predicates that are not of type', 'unexpected character in input', 'could not determine polymorphic type', 'cannot reference pseudo type', 'string is not a valid identifier', 'invalid regular expression', ' violates not-null constraint', 'could not convert type', 'are not allowed', 'requires a record', 'unrecognized privilege type', 'cannot cast', 'requires an OVER', 'value too long', 'may only refer to user-defined', 'cannot materialize call to', 'invalid hash algorithm', 'expected id', 'expected exactly one statement', 'regex parse error', 'invalid encoding name', 'must specify at least one capture group', 'requires a string literal', 'too large for encoding', 'must be a positive integer', 'bound must be less than', 'not recognized', 'invalid escape string', 'canceling statement due to statement timeout', 'invalid time zone', 'result exceeds max size', 'calls to mz_now in write statements', 'out of valid range', 'more than one record produced in subquery', 'input of anonymous composite types is not implemented', 'lists must all be the same length', 'invalid IANA Time Zone Database identifier', 'unknown schema', 'octal escapes are not supported', 'attempt to create relation with too many columns', 'column notation applied to type text', 'Unexpected EOF', 'missing required exponent', 'invalid unicode escape', 'dimension array or low bound array must not be null', 'LIKE pattern exceeds maximum length', 'cannot return complex numbers', 'must be greater than zero', 'null character not permitted', 'invalid datepart', 'must use value within', 'null character not permitted', 'expressions must appear in select list', 'expressions must match initial', 'invalid selection: operation may only', 'array size exceeds the maximum allowed', 'does not allow subqueries', 'must use value within', 'zero raised to a negative power is undefined', 'range type over']
def check_query_errors(logs_dir: pathlib.Path) -> list[str]:
120def check_query_errors(logs_dir: Path) -> list[str]:
121    unexpected_errors = []
122    log_files = list(logs_dir.glob("*.log"))
123
124    for log_file in log_files:
125        try:
126            content = log_file.read_text()
127        except Exception:
128            continue
129
130        for line in content.splitlines():
131            if "ERROR" not in line:
132                continue
133
134            is_ignored = False
135            for pattern in IGNORED_ERROR_PATTERNS:
136                if re.search(pattern, line):
137                    is_ignored = True
138                    break
139
140            if not is_ignored:
141                unexpected_errors.append(f"{log_file.name}: {line}")
142
143    return unexpected_errors
def create_services( service_name: str) -> list[materialize.mzcompose.service.Service | materialize.mzcompose.services.materialized.Materialized]:
146def create_services(service_name: str) -> list[Service | Materialized]:
147    """Create the services needed for SQLancer or SQLancer++ tests."""
148    return [
149        # Auto-restart so we can keep testing even after we ran into a panic
150        Materialized(
151            restart="on-failure",
152            default_replication_factor=1,
153            additional_system_parameter_defaults={
154                "enable_alter_table_add_column": "true",
155                "enable_statement_lifecycle_logging": "false",
156                "enable_internal_statement_logging": "false",
157                "statement_logging_default_sample_rate": "0",
158                "statement_logging_max_sample_rate": "0",
159                "enable_repeat_row": "true",
160                "enable_list_length_max": "true",
161                "enable_list_n_layers": "true",
162                "enable_time_at_time_zone": "true",
163                "enable_date_bin_hopping": "true",
164            },
165        ),
166        Service(
167            service_name,
168            {
169                "mzbuild": service_name,
170            },
171        ),
172    ]

Create the services needed for SQLancer or SQLancer++ tests.

def run_sqlancer( c: materialize.mzcompose.composition.Composition, parser: materialize.mzcompose.composition.WorkflowArgumentParser, *, service_name: str, default_oracle: str, build_run_args: Callable[[argparse.Namespace, int], list[str]], docker_logs_path: str, log_prefix: str) -> None:
179def run_sqlancer(
180    c: Composition,
181    parser: WorkflowArgumentParser,
182    *,
183    service_name: str,
184    default_oracle: str,
185    build_run_args: Callable[[argparse.Namespace, int], list[str]],
186    docker_logs_path: str,
187    log_prefix: str,
188) -> None:
189    parser.add_argument("--runtime", default=600, type=int)
190    parser.add_argument("--num-tries", default=100000, type=int)
191    parser.add_argument("--num-threads", default=16, type=int)
192    parser.add_argument("--seed", default=None, type=int)
193    parser.add_argument("--qpg", default=True, action=argparse.BooleanOptionalAction)
194    parser.add_argument("--oracle", default=default_oracle, type=str)
195    args = parser.parse_args()
196
197    c.up("materialized")
198
199    c.sql(
200        "ALTER SYSTEM SET max_databases TO 1000",
201        user="mz_system",
202        port=6877,
203    )
204    c.sql(
205        "ALTER SYSTEM SET max_tables TO 1000",
206        user="mz_system",
207        port=6877,
208    )
209    c.sql(
210        "ALTER SYSTEM SET max_materialized_views TO 1000",
211        user="mz_system",
212        port=6877,
213    )
214
215    seed = args.seed or random.randint(0, 2**31)
216
217    run_args = build_run_args(args, seed)
218
219    print("--- Run in progress")
220    result = c.run(
221        service_name,
222        *run_args,
223        check=False,
224        detach=True,
225        capture=True,
226    )
227    container_id = result.stdout.strip()
228
229    # Print logs in a background thread so that we get immediate output in CI,
230    # and also when running SQLancer locally
231    thread = Thread(target=_print_logs, args=(container_id,))
232    thread.start()
233    # At the same time capture the logs to analyze for finding new issues
234    stdout = spawn.capture(
235        ["docker", "logs", "-f", container_id], stderr=subprocess.STDOUT
236    )
237
238    in_assertion = False
239    for line in stdout.splitlines():
240        if "OutOfMemoryError" in line or "IgnoreMeException" in line:
241            continue
242        if line.startswith("--java.lang."):
243            in_assertion = True
244            print(f"--- [{log_prefix}] {line.removeprefix('--java.lang.')}")
245        elif line == "":
246            in_assertion = False
247        elif in_assertion:
248            print(line)
249    print(f"--- {result.stdout.splitlines()[-1]}")
250
251    # Check for unexpected query errors in the logs
252    logs = Path("logs")
253    if logs.exists() and logs.is_dir():
254        shutil.rmtree(logs)
255    spawn.runv(
256        [
257            "docker",
258            "cp",
259            f"{container_id}:{docker_logs_path}",
260            str(logs),
261        ]
262    )
263    spawn.runv(["tar", "cfz", "logs.tar.gz", str(logs)])
264    buildkite.upload_artifact("logs.tar.gz")
265    unexpected_errors = check_query_errors(logs)
266    if unexpected_errors:
267        print("--- Unexpected query errors found:")
268        for error in unexpected_errors:
269            print(error)
270        raise Exception(
271            f"Found {len(unexpected_errors)} unexpected query error(s) in logs"
272        )