misc.python.materialize.sqlancer

Shared code for running SQLancer and SQLancer++ against Materialize.

  1# Copyright Materialize, Inc. and contributors. All rights reserved.
  2#
  3# Use of this software is governed by the Business Source License
  4# included in the LICENSE file at the root of this repository.
  5#
  6# As of the Change Date specified in that file, in accordance with
  7# the Business Source License, use of this software will be governed
  8# by the Apache License, Version 2.0.
  9
 10"""
 11Shared code for running SQLancer and SQLancer++ against Materialize.
 12"""
 13
 14import argparse
 15import random
 16import re
 17import shutil
 18import subprocess
 19from collections.abc import Callable
 20from pathlib import Path
 21from threading import Thread
 22
 23from materialize import buildkite, spawn
 24from materialize.mzcompose.composition import (
 25    Composition,
 26    WorkflowArgumentParser,
 27)
 28from materialize.mzcompose.service import Service
 29from materialize.mzcompose.services.materialized import Materialized
 30
 31IGNORED_ERROR_PATTERNS = [
 32    r"^(?!.*panic).*not yet supported",
 33    r"does not exist",
 34    r"must have",
 35    r"overflow",
 36    r"invalid input",
 37    r"cannot be matched",
 38    r"implicitly casting",
 39    r"is not unique",
 40    r"invalid digit",
 41    r"is not defined",
 42    r"is defined",
 43    r"unterminated",
 44    r"Expected ",
 45    r"is out of range",
 46    r"of a negative",
 47    r"does not support",
 48    r"ANALYZE",
 49    r"is of type",
 50    r"must appear in the",
 51    r"negative substring length",
 52    r"is ambiguous",
 53    r"out of range",
 54    r"division by zero",
 55    r"is only defined for finite",
 56    r"cannot canonicalize predicates that are not of type",
 57    r"unexpected character in input",
 58    r"could not determine polymorphic type",
 59    r"cannot reference pseudo type",
 60    r"string is not a valid identifier",
 61    r"invalid regular expression",
 62    r" violates not-null constraint",
 63    r"could not convert type",
 64    r"are not allowed",
 65    r"requires a record",
 66    r"unrecognized privilege type",
 67    r"cannot cast",
 68    r"requires an OVER",
 69    r"value too long",
 70    r"may only refer to user-defined",
 71    r"cannot materialize call to",
 72    r"invalid hash algorithm",
 73    r"expected id",
 74    r"expected exactly one statement",
 75    r"regex parse error",
 76    r"invalid encoding name",
 77    r"must specify at least one capture group",
 78    r"requires a string literal",
 79    r"too large for encoding",
 80    r"must be a positive integer",
 81    r"bound must be less than",
 82    r"not recognized",
 83    r"invalid escape string",
 84    r"canceling statement due to statement timeout",
 85    r"invalid time zone",
 86    r"result exceeds max size",
 87    r"calls to mz_now in write statements",
 88    r"out of valid range",
 89    r"more than one record produced in subquery",
 90    r"input of anonymous composite types is not implemented",
 91    r"lists must all be the same length",
 92    r"invalid IANA Time Zone Database identifier",
 93    r"unknown schema",
 94    r"octal escapes are not supported",
 95    r"attempt to create relation with too many columns",
 96    r"column notation applied to type text",
 97    r"Unexpected EOF",
 98    r"missing required exponent",
 99    r"invalid unicode escape",
100    r"dimension array or low bound array must not be null",
101    r"dimension values must not be null",
102    r"LIKE pattern exceeds maximum length",
103    r"cannot return complex numbers",
104    r"must be greater than zero",
105    r"null character not permitted",
106    r"invalid datepart",
107    r"must use value within",
108    r"null character not permitted",
109    r"expressions must appear in select list",
110    r"expressions must match initial",
111    r"invalid selection: operation may only",
112    r"array size exceeds the maximum allowed",
113    r"does not allow subqueries",
114    r"must use value within",
115    r"zero raised to a negative power is undefined",
116    r"range type over",
117]
118
119
120def check_query_errors(logs_dir: Path) -> list[str]:
121    unexpected_errors = []
122    log_files = list(logs_dir.glob("*.log"))
123
124    for log_file in log_files:
125        try:
126            content = log_file.read_text()
127        except Exception:
128            continue
129
130        for line in content.splitlines():
131            if "ERROR" not in line:
132                continue
133
134            is_ignored = False
135            for pattern in IGNORED_ERROR_PATTERNS:
136                if re.search(pattern, line):
137                    is_ignored = True
138                    break
139
140            if not is_ignored:
141                unexpected_errors.append(f"{log_file.name}: {line}")
142
143    return unexpected_errors
144
145
146def create_services(service_name: str) -> list[Service | Materialized]:
147    """Create the services needed for SQLancer or SQLancer++ tests."""
148    return [
149        # Auto-restart so we can keep testing even after we ran into a panic
150        Materialized(
151            restart="on-failure",
152            default_replication_factor=1,
153            additional_system_parameter_defaults={
154                "enable_alter_table_add_column": "true",
155                "enable_statement_lifecycle_logging": "false",
156                "enable_internal_statement_logging": "false",
157                "statement_logging_default_sample_rate": "0",
158                "statement_logging_max_sample_rate": "0",
159                "enable_repeat_row": "true",
160                "enable_list_length_max": "true",
161                "enable_list_n_layers": "true",
162                "enable_time_at_time_zone": "true",
163                "enable_date_bin_hopping": "true",
164            },
165        ),
166        Service(
167            service_name,
168            {
169                "mzbuild": service_name,
170            },
171        ),
172    ]
173
174
175def _print_logs(container_id: str) -> None:
176    spawn.runv(["docker", "logs", "-f", container_id])
177
178
179def run_sqlancer(
180    c: Composition,
181    parser: WorkflowArgumentParser,
182    *,
183    service_name: str,
184    default_oracle: str,
185    build_run_args: Callable[[argparse.Namespace, int], list[str]],
186    docker_logs_path: str,
187    log_prefix: str,
188) -> None:
189    parser.add_argument("--runtime", default=600, type=int)
190    parser.add_argument("--num-tries", default=100000, type=int)
191    parser.add_argument("--num-threads", default=16, type=int)
192    parser.add_argument("--seed", default=None, type=int)
193    parser.add_argument("--qpg", default=True, action=argparse.BooleanOptionalAction)
194    parser.add_argument("--oracle", default=default_oracle, type=str)
195    args = parser.parse_args()
196
197    c.up("materialized")
198
199    c.sql(
200        "ALTER SYSTEM SET max_databases TO 1000",
201        user="mz_system",
202        port=6877,
203    )
204    c.sql(
205        "ALTER SYSTEM SET max_tables TO 1000",
206        user="mz_system",
207        port=6877,
208    )
209    c.sql(
210        "ALTER SYSTEM SET max_materialized_views TO 1000",
211        user="mz_system",
212        port=6877,
213    )
214
215    seed = args.seed or random.randint(0, 2**31)
216
217    run_args = build_run_args(args, seed)
218
219    print("--- Run in progress")
220    result = c.run(
221        service_name,
222        *run_args,
223        check=False,
224        detach=True,
225        capture=True,
226    )
227    container_id = result.stdout.strip()
228
229    # Print logs in a background thread so that we get immediate output in CI,
230    # and also when running SQLancer locally
231    thread = Thread(target=_print_logs, args=(container_id,))
232    thread.start()
233    # At the same time capture the logs to analyze for finding new issues
234    stdout = spawn.capture(
235        ["docker", "logs", "-f", container_id], stderr=subprocess.STDOUT
236    )
237
238    in_assertion = False
239    for line in stdout.splitlines():
240        if "OutOfMemoryError" in line or "IgnoreMeException" in line:
241            continue
242        if line.startswith("--java.lang."):
243            in_assertion = True
244            print(f"--- [{log_prefix}] {line.removeprefix('--java.lang.')}")
245        elif line == "":
246            in_assertion = False
247        elif in_assertion:
248            print(line)
249    print(f"--- {result.stdout.splitlines()[-1]}")
250
251    # Check for unexpected query errors in the logs
252    logs = Path("logs")
253    if logs.exists() and logs.is_dir():
254        shutil.rmtree(logs)
255    spawn.runv(
256        [
257            "docker",
258            "cp",
259            f"{container_id}:{docker_logs_path}",
260            str(logs),
261        ]
262    )
263    spawn.runv(["tar", "cfz", "logs.tar.gz", str(logs)])
264    buildkite.upload_artifact("logs.tar.gz")
265    unexpected_errors = check_query_errors(logs)
266    if unexpected_errors:
267        print("--- Unexpected query errors found:")
268        for error in unexpected_errors:
269            print(error)
270        raise Exception(
271            f"Found {len(unexpected_errors)} unexpected query error(s) in logs"
272        )
IGNORED_ERROR_PATTERNS = ['^(?!.*panic).*not yet supported', 'does not exist', 'must have', 'overflow', 'invalid input', 'cannot be matched', 'implicitly casting', 'is not unique', 'invalid digit', 'is not defined', 'is defined', 'unterminated', 'Expected ', 'is out of range', 'of a negative', 'does not support', 'ANALYZE', 'is of type', 'must appear in the', 'negative substring length', 'is ambiguous', 'out of range', 'division by zero', 'is only defined for finite', 'cannot canonicalize predicates that are not of type', 'unexpected character in input', 'could not determine polymorphic type', 'cannot reference pseudo type', 'string is not a valid identifier', 'invalid regular expression', ' violates not-null constraint', 'could not convert type', 'are not allowed', 'requires a record', 'unrecognized privilege type', 'cannot cast', 'requires an OVER', 'value too long', 'may only refer to user-defined', 'cannot materialize call to', 'invalid hash algorithm', 'expected id', 'expected exactly one statement', 'regex parse error', 'invalid encoding name', 'must specify at least one capture group', 'requires a string literal', 'too large for encoding', 'must be a positive integer', 'bound must be less than', 'not recognized', 'invalid escape string', 'canceling statement due to statement timeout', 'invalid time zone', 'result exceeds max size', 'calls to mz_now in write statements', 'out of valid range', 'more than one record produced in subquery', 'input of anonymous composite types is not implemented', 'lists must all be the same length', 'invalid IANA Time Zone Database identifier', 'unknown schema', 'octal escapes are not supported', 'attempt to create relation with too many columns', 'column notation applied to type text', 'Unexpected EOF', 'missing required exponent', 'invalid unicode escape', 'dimension array or low bound array must not be null', 'dimension values must not be null', 'LIKE pattern exceeds maximum length', 'cannot return complex numbers', 'must be greater than zero', 'null character not permitted', 'invalid datepart', 'must use value within', 'null character not permitted', 'expressions must appear in select list', 'expressions must match initial', 'invalid selection: operation may only', 'array size exceeds the maximum allowed', 'does not allow subqueries', 'must use value within', 'zero raised to a negative power is undefined', 'range type over']
def check_query_errors(logs_dir: pathlib.Path) -> list[str]:
121def check_query_errors(logs_dir: Path) -> list[str]:
122    unexpected_errors = []
123    log_files = list(logs_dir.glob("*.log"))
124
125    for log_file in log_files:
126        try:
127            content = log_file.read_text()
128        except Exception:
129            continue
130
131        for line in content.splitlines():
132            if "ERROR" not in line:
133                continue
134
135            is_ignored = False
136            for pattern in IGNORED_ERROR_PATTERNS:
137                if re.search(pattern, line):
138                    is_ignored = True
139                    break
140
141            if not is_ignored:
142                unexpected_errors.append(f"{log_file.name}: {line}")
143
144    return unexpected_errors
def create_services( service_name: str) -> list[materialize.mzcompose.service.Service | materialize.mzcompose.services.materialized.Materialized]:
147def create_services(service_name: str) -> list[Service | Materialized]:
148    """Create the services needed for SQLancer or SQLancer++ tests."""
149    return [
150        # Auto-restart so we can keep testing even after we ran into a panic
151        Materialized(
152            restart="on-failure",
153            default_replication_factor=1,
154            additional_system_parameter_defaults={
155                "enable_alter_table_add_column": "true",
156                "enable_statement_lifecycle_logging": "false",
157                "enable_internal_statement_logging": "false",
158                "statement_logging_default_sample_rate": "0",
159                "statement_logging_max_sample_rate": "0",
160                "enable_repeat_row": "true",
161                "enable_list_length_max": "true",
162                "enable_list_n_layers": "true",
163                "enable_time_at_time_zone": "true",
164                "enable_date_bin_hopping": "true",
165            },
166        ),
167        Service(
168            service_name,
169            {
170                "mzbuild": service_name,
171            },
172        ),
173    ]

Create the services needed for SQLancer or SQLancer++ tests.

def run_sqlancer( c: materialize.mzcompose.composition.Composition, parser: materialize.mzcompose.composition.WorkflowArgumentParser, *, service_name: str, default_oracle: str, build_run_args: Callable[[argparse.Namespace, int], list[str]], docker_logs_path: str, log_prefix: str) -> None:
180def run_sqlancer(
181    c: Composition,
182    parser: WorkflowArgumentParser,
183    *,
184    service_name: str,
185    default_oracle: str,
186    build_run_args: Callable[[argparse.Namespace, int], list[str]],
187    docker_logs_path: str,
188    log_prefix: str,
189) -> None:
190    parser.add_argument("--runtime", default=600, type=int)
191    parser.add_argument("--num-tries", default=100000, type=int)
192    parser.add_argument("--num-threads", default=16, type=int)
193    parser.add_argument("--seed", default=None, type=int)
194    parser.add_argument("--qpg", default=True, action=argparse.BooleanOptionalAction)
195    parser.add_argument("--oracle", default=default_oracle, type=str)
196    args = parser.parse_args()
197
198    c.up("materialized")
199
200    c.sql(
201        "ALTER SYSTEM SET max_databases TO 1000",
202        user="mz_system",
203        port=6877,
204    )
205    c.sql(
206        "ALTER SYSTEM SET max_tables TO 1000",
207        user="mz_system",
208        port=6877,
209    )
210    c.sql(
211        "ALTER SYSTEM SET max_materialized_views TO 1000",
212        user="mz_system",
213        port=6877,
214    )
215
216    seed = args.seed or random.randint(0, 2**31)
217
218    run_args = build_run_args(args, seed)
219
220    print("--- Run in progress")
221    result = c.run(
222        service_name,
223        *run_args,
224        check=False,
225        detach=True,
226        capture=True,
227    )
228    container_id = result.stdout.strip()
229
230    # Print logs in a background thread so that we get immediate output in CI,
231    # and also when running SQLancer locally
232    thread = Thread(target=_print_logs, args=(container_id,))
233    thread.start()
234    # At the same time capture the logs to analyze for finding new issues
235    stdout = spawn.capture(
236        ["docker", "logs", "-f", container_id], stderr=subprocess.STDOUT
237    )
238
239    in_assertion = False
240    for line in stdout.splitlines():
241        if "OutOfMemoryError" in line or "IgnoreMeException" in line:
242            continue
243        if line.startswith("--java.lang."):
244            in_assertion = True
245            print(f"--- [{log_prefix}] {line.removeprefix('--java.lang.')}")
246        elif line == "":
247            in_assertion = False
248        elif in_assertion:
249            print(line)
250    print(f"--- {result.stdout.splitlines()[-1]}")
251
252    # Check for unexpected query errors in the logs
253    logs = Path("logs")
254    if logs.exists() and logs.is_dir():
255        shutil.rmtree(logs)
256    spawn.runv(
257        [
258            "docker",
259            "cp",
260            f"{container_id}:{docker_logs_path}",
261            str(logs),
262        ]
263    )
264    spawn.runv(["tar", "cfz", "logs.tar.gz", str(logs)])
265    buildkite.upload_artifact("logs.tar.gz")
266    unexpected_errors = check_query_errors(logs)
267    if unexpected_errors:
268        print("--- Unexpected query errors found:")
269        for error in unexpected_errors:
270            print(error)
271        raise Exception(
272            f"Found {len(unexpected_errors)} unexpected query error(s) in logs"
273        )