misc.python.materialize.sqlancer
Shared code for running SQLancer and SQLancer++ against Materialize.
1# Copyright Materialize, Inc. and contributors. All rights reserved. 2# 3# Use of this software is governed by the Business Source License 4# included in the LICENSE file at the root of this repository. 5# 6# As of the Change Date specified in that file, in accordance with 7# the Business Source License, use of this software will be governed 8# by the Apache License, Version 2.0. 9 10""" 11Shared code for running SQLancer and SQLancer++ against Materialize. 12""" 13 14import argparse 15import random 16import re 17import shutil 18import subprocess 19from collections.abc import Callable 20from pathlib import Path 21from threading import Thread 22 23from materialize import buildkite, spawn 24from materialize.mzcompose.composition import ( 25 Composition, 26 WorkflowArgumentParser, 27) 28from materialize.mzcompose.service import Service 29from materialize.mzcompose.services.materialized import Materialized 30 31IGNORED_ERROR_PATTERNS = [ 32 r"^(?!.*panic).*not yet supported", 33 r"does not exist", 34 r"must have", 35 r"overflow", 36 r"invalid input", 37 r"cannot be matched", 38 r"implicitly casting", 39 r"is not unique", 40 r"invalid digit", 41 r"is not defined", 42 r"is defined", 43 r"unterminated", 44 r"Expected ", 45 r"is out of range", 46 r"of a negative", 47 r"does not support", 48 r"ANALYZE", 49 r"is of type", 50 r"must appear in the", 51 r"negative substring length", 52 r"is ambiguous", 53 r"out of range", 54 r"division by zero", 55 r"is only defined for finite", 56 r"cannot canonicalize predicates that are not of type", 57 r"unexpected character in input", 58 r"could not determine polymorphic type", 59 r"cannot reference pseudo type", 60 r"string is not a valid identifier", 61 r"invalid regular expression", 62 r" violates not-null constraint", 63 r"could not convert type", 64 r"are not allowed", 65 r"requires a record", 66 r"unrecognized privilege type", 67 r"cannot cast", 68 r"requires an OVER", 69 r"value too long", 70 r"may only refer to user-defined", 71 r"cannot materialize call to", 72 r"invalid hash algorithm", 73 r"expected id", 74 r"expected exactly one statement", 75 r"regex parse error", 76 r"invalid encoding name", 77 r"must specify at least one capture group", 78 r"requires a string literal", 79 r"too large for encoding", 80 r"must be a positive integer", 81 r"bound must be less than", 82 r"not recognized", 83 r"invalid escape string", 84 r"canceling statement due to statement timeout", 85 r"invalid time zone", 86 r"result exceeds max size", 87 r"calls to mz_now in write statements", 88 r"out of valid range", 89 r"more than one record produced in subquery", 90 r"input of anonymous composite types is not implemented", 91 r"lists must all be the same length", 92 r"invalid IANA Time Zone Database identifier", 93 r"unknown schema", 94 r"octal escapes are not supported", 95 r"attempt to create relation with too many columns", 96 r"column notation applied to type text", 97 r"Unexpected EOF", 98 r"missing required exponent", 99 r"invalid unicode escape", 100 r"dimension array or low bound array must not be null", 101 r"LIKE pattern exceeds maximum length", 102 r"cannot return complex numbers", 103 r"must be greater than zero", 104 r"null character not permitted", 105 r"invalid datepart", 106 r"must use value within", 107 r"null character not permitted", 108 r"expressions must appear in select list", 109 r"expressions must match initial", 110 r"invalid selection: operation may only", 111 r"array size exceeds the maximum allowed", 112 r"does not allow subqueries", 113 r"must use value within", 114 r"zero raised to a negative power is undefined", 115 r"range type over", 116] 117 118 119def check_query_errors(logs_dir: Path) -> list[str]: 120 unexpected_errors = [] 121 log_files = list(logs_dir.glob("*.log")) 122 123 for log_file in log_files: 124 try: 125 content = log_file.read_text() 126 except Exception: 127 continue 128 129 for line in content.splitlines(): 130 if "ERROR" not in line: 131 continue 132 133 is_ignored = False 134 for pattern in IGNORED_ERROR_PATTERNS: 135 if re.search(pattern, line): 136 is_ignored = True 137 break 138 139 if not is_ignored: 140 unexpected_errors.append(f"{log_file.name}: {line}") 141 142 return unexpected_errors 143 144 145def create_services(service_name: str) -> list[Service | Materialized]: 146 """Create the services needed for SQLancer or SQLancer++ tests.""" 147 return [ 148 # Auto-restart so we can keep testing even after we ran into a panic 149 Materialized( 150 restart="on-failure", 151 default_replication_factor=1, 152 additional_system_parameter_defaults={ 153 "enable_alter_table_add_column": "true", 154 "enable_statement_lifecycle_logging": "false", 155 "enable_internal_statement_logging": "false", 156 "statement_logging_default_sample_rate": "0", 157 "statement_logging_max_sample_rate": "0", 158 "enable_repeat_row": "true", 159 "enable_list_length_max": "true", 160 "enable_list_n_layers": "true", 161 "enable_time_at_time_zone": "true", 162 "enable_date_bin_hopping": "true", 163 }, 164 ), 165 Service( 166 service_name, 167 { 168 "mzbuild": service_name, 169 }, 170 ), 171 ] 172 173 174def _print_logs(container_id: str) -> None: 175 spawn.runv(["docker", "logs", "-f", container_id]) 176 177 178def run_sqlancer( 179 c: Composition, 180 parser: WorkflowArgumentParser, 181 *, 182 service_name: str, 183 default_oracle: str, 184 build_run_args: Callable[[argparse.Namespace, int], list[str]], 185 docker_logs_path: str, 186 log_prefix: str, 187) -> None: 188 parser.add_argument("--runtime", default=600, type=int) 189 parser.add_argument("--num-tries", default=100000, type=int) 190 parser.add_argument("--num-threads", default=16, type=int) 191 parser.add_argument("--seed", default=None, type=int) 192 parser.add_argument("--qpg", default=True, action=argparse.BooleanOptionalAction) 193 parser.add_argument("--oracle", default=default_oracle, type=str) 194 args = parser.parse_args() 195 196 c.up("materialized") 197 198 c.sql( 199 "ALTER SYSTEM SET max_databases TO 1000", 200 user="mz_system", 201 port=6877, 202 ) 203 c.sql( 204 "ALTER SYSTEM SET max_tables TO 1000", 205 user="mz_system", 206 port=6877, 207 ) 208 c.sql( 209 "ALTER SYSTEM SET max_materialized_views TO 1000", 210 user="mz_system", 211 port=6877, 212 ) 213 214 seed = args.seed or random.randint(0, 2**31) 215 216 run_args = build_run_args(args, seed) 217 218 print("--- Run in progress") 219 result = c.run( 220 service_name, 221 *run_args, 222 check=False, 223 detach=True, 224 capture=True, 225 ) 226 container_id = result.stdout.strip() 227 228 # Print logs in a background thread so that we get immediate output in CI, 229 # and also when running SQLancer locally 230 thread = Thread(target=_print_logs, args=(container_id,)) 231 thread.start() 232 # At the same time capture the logs to analyze for finding new issues 233 stdout = spawn.capture( 234 ["docker", "logs", "-f", container_id], stderr=subprocess.STDOUT 235 ) 236 237 in_assertion = False 238 for line in stdout.splitlines(): 239 if "OutOfMemoryError" in line or "IgnoreMeException" in line: 240 continue 241 if line.startswith("--java.lang."): 242 in_assertion = True 243 print(f"--- [{log_prefix}] {line.removeprefix('--java.lang.')}") 244 elif line == "": 245 in_assertion = False 246 elif in_assertion: 247 print(line) 248 print(f"--- {result.stdout.splitlines()[-1]}") 249 250 # Check for unexpected query errors in the logs 251 logs = Path("logs") 252 if logs.exists() and logs.is_dir(): 253 shutil.rmtree(logs) 254 spawn.runv( 255 [ 256 "docker", 257 "cp", 258 f"{container_id}:{docker_logs_path}", 259 str(logs), 260 ] 261 ) 262 spawn.runv(["tar", "cfz", "logs.tar.gz", str(logs)]) 263 buildkite.upload_artifact("logs.tar.gz") 264 unexpected_errors = check_query_errors(logs) 265 if unexpected_errors: 266 print("--- Unexpected query errors found:") 267 for error in unexpected_errors: 268 print(error) 269 raise Exception( 270 f"Found {len(unexpected_errors)} unexpected query error(s) in logs" 271 )
IGNORED_ERROR_PATTERNS =
['^(?!.*panic).*not yet supported', 'does not exist', 'must have', 'overflow', 'invalid input', 'cannot be matched', 'implicitly casting', 'is not unique', 'invalid digit', 'is not defined', 'is defined', 'unterminated', 'Expected ', 'is out of range', 'of a negative', 'does not support', 'ANALYZE', 'is of type', 'must appear in the', 'negative substring length', 'is ambiguous', 'out of range', 'division by zero', 'is only defined for finite', 'cannot canonicalize predicates that are not of type', 'unexpected character in input', 'could not determine polymorphic type', 'cannot reference pseudo type', 'string is not a valid identifier', 'invalid regular expression', ' violates not-null constraint', 'could not convert type', 'are not allowed', 'requires a record', 'unrecognized privilege type', 'cannot cast', 'requires an OVER', 'value too long', 'may only refer to user-defined', 'cannot materialize call to', 'invalid hash algorithm', 'expected id', 'expected exactly one statement', 'regex parse error', 'invalid encoding name', 'must specify at least one capture group', 'requires a string literal', 'too large for encoding', 'must be a positive integer', 'bound must be less than', 'not recognized', 'invalid escape string', 'canceling statement due to statement timeout', 'invalid time zone', 'result exceeds max size', 'calls to mz_now in write statements', 'out of valid range', 'more than one record produced in subquery', 'input of anonymous composite types is not implemented', 'lists must all be the same length', 'invalid IANA Time Zone Database identifier', 'unknown schema', 'octal escapes are not supported', 'attempt to create relation with too many columns', 'column notation applied to type text', 'Unexpected EOF', 'missing required exponent', 'invalid unicode escape', 'dimension array or low bound array must not be null', 'LIKE pattern exceeds maximum length', 'cannot return complex numbers', 'must be greater than zero', 'null character not permitted', 'invalid datepart', 'must use value within', 'null character not permitted', 'expressions must appear in select list', 'expressions must match initial', 'invalid selection: operation may only', 'array size exceeds the maximum allowed', 'does not allow subqueries', 'must use value within', 'zero raised to a negative power is undefined', 'range type over']
def
check_query_errors(logs_dir: pathlib.Path) -> list[str]:
120def check_query_errors(logs_dir: Path) -> list[str]: 121 unexpected_errors = [] 122 log_files = list(logs_dir.glob("*.log")) 123 124 for log_file in log_files: 125 try: 126 content = log_file.read_text() 127 except Exception: 128 continue 129 130 for line in content.splitlines(): 131 if "ERROR" not in line: 132 continue 133 134 is_ignored = False 135 for pattern in IGNORED_ERROR_PATTERNS: 136 if re.search(pattern, line): 137 is_ignored = True 138 break 139 140 if not is_ignored: 141 unexpected_errors.append(f"{log_file.name}: {line}") 142 143 return unexpected_errors
def
create_services( service_name: str) -> list[materialize.mzcompose.service.Service | materialize.mzcompose.services.materialized.Materialized]:
146def create_services(service_name: str) -> list[Service | Materialized]: 147 """Create the services needed for SQLancer or SQLancer++ tests.""" 148 return [ 149 # Auto-restart so we can keep testing even after we ran into a panic 150 Materialized( 151 restart="on-failure", 152 default_replication_factor=1, 153 additional_system_parameter_defaults={ 154 "enable_alter_table_add_column": "true", 155 "enable_statement_lifecycle_logging": "false", 156 "enable_internal_statement_logging": "false", 157 "statement_logging_default_sample_rate": "0", 158 "statement_logging_max_sample_rate": "0", 159 "enable_repeat_row": "true", 160 "enable_list_length_max": "true", 161 "enable_list_n_layers": "true", 162 "enable_time_at_time_zone": "true", 163 "enable_date_bin_hopping": "true", 164 }, 165 ), 166 Service( 167 service_name, 168 { 169 "mzbuild": service_name, 170 }, 171 ), 172 ]
Create the services needed for SQLancer or SQLancer++ tests.
def
run_sqlancer( c: materialize.mzcompose.composition.Composition, parser: materialize.mzcompose.composition.WorkflowArgumentParser, *, service_name: str, default_oracle: str, build_run_args: Callable[[argparse.Namespace, int], list[str]], docker_logs_path: str, log_prefix: str) -> None:
179def run_sqlancer( 180 c: Composition, 181 parser: WorkflowArgumentParser, 182 *, 183 service_name: str, 184 default_oracle: str, 185 build_run_args: Callable[[argparse.Namespace, int], list[str]], 186 docker_logs_path: str, 187 log_prefix: str, 188) -> None: 189 parser.add_argument("--runtime", default=600, type=int) 190 parser.add_argument("--num-tries", default=100000, type=int) 191 parser.add_argument("--num-threads", default=16, type=int) 192 parser.add_argument("--seed", default=None, type=int) 193 parser.add_argument("--qpg", default=True, action=argparse.BooleanOptionalAction) 194 parser.add_argument("--oracle", default=default_oracle, type=str) 195 args = parser.parse_args() 196 197 c.up("materialized") 198 199 c.sql( 200 "ALTER SYSTEM SET max_databases TO 1000", 201 user="mz_system", 202 port=6877, 203 ) 204 c.sql( 205 "ALTER SYSTEM SET max_tables TO 1000", 206 user="mz_system", 207 port=6877, 208 ) 209 c.sql( 210 "ALTER SYSTEM SET max_materialized_views TO 1000", 211 user="mz_system", 212 port=6877, 213 ) 214 215 seed = args.seed or random.randint(0, 2**31) 216 217 run_args = build_run_args(args, seed) 218 219 print("--- Run in progress") 220 result = c.run( 221 service_name, 222 *run_args, 223 check=False, 224 detach=True, 225 capture=True, 226 ) 227 container_id = result.stdout.strip() 228 229 # Print logs in a background thread so that we get immediate output in CI, 230 # and also when running SQLancer locally 231 thread = Thread(target=_print_logs, args=(container_id,)) 232 thread.start() 233 # At the same time capture the logs to analyze for finding new issues 234 stdout = spawn.capture( 235 ["docker", "logs", "-f", container_id], stderr=subprocess.STDOUT 236 ) 237 238 in_assertion = False 239 for line in stdout.splitlines(): 240 if "OutOfMemoryError" in line or "IgnoreMeException" in line: 241 continue 242 if line.startswith("--java.lang."): 243 in_assertion = True 244 print(f"--- [{log_prefix}] {line.removeprefix('--java.lang.')}") 245 elif line == "": 246 in_assertion = False 247 elif in_assertion: 248 print(line) 249 print(f"--- {result.stdout.splitlines()[-1]}") 250 251 # Check for unexpected query errors in the logs 252 logs = Path("logs") 253 if logs.exists() and logs.is_dir(): 254 shutil.rmtree(logs) 255 spawn.runv( 256 [ 257 "docker", 258 "cp", 259 f"{container_id}:{docker_logs_path}", 260 str(logs), 261 ] 262 ) 263 spawn.runv(["tar", "cfz", "logs.tar.gz", str(logs)]) 264 buildkite.upload_artifact("logs.tar.gz") 265 unexpected_errors = check_query_errors(logs) 266 if unexpected_errors: 267 print("--- Unexpected query errors found:") 268 for error in unexpected_errors: 269 print(error) 270 raise Exception( 271 f"Found {len(unexpected_errors)} unexpected query error(s) in logs" 272 )