misc.python.materialize.sqlancer
Shared code for running SQLancer and SQLancer++ against Materialize.
1# Copyright Materialize, Inc. and contributors. All rights reserved. 2# 3# Use of this software is governed by the Business Source License 4# included in the LICENSE file at the root of this repository. 5# 6# As of the Change Date specified in that file, in accordance with 7# the Business Source License, use of this software will be governed 8# by the Apache License, Version 2.0. 9 10""" 11Shared code for running SQLancer and SQLancer++ against Materialize. 12""" 13 14import argparse 15import random 16import re 17import shutil 18import subprocess 19from collections.abc import Callable 20from pathlib import Path 21from threading import Thread 22 23from materialize import buildkite, spawn 24from materialize.mzcompose.composition import ( 25 Composition, 26 WorkflowArgumentParser, 27) 28from materialize.mzcompose.service import Service 29from materialize.mzcompose.services.materialized import Materialized 30 31IGNORED_ERROR_PATTERNS = [ 32 r"^(?!.*panic).*not yet supported", 33 r"does not exist", 34 r"must have", 35 r"overflow", 36 r"invalid input", 37 r"cannot be matched", 38 r"implicitly casting", 39 r"is not unique", 40 r"invalid digit", 41 r"is not defined", 42 r"is defined", 43 r"unterminated", 44 r"Expected ", 45 r"is out of range", 46 r"of a negative", 47 r"does not support", 48 r"ANALYZE", 49 r"is of type", 50 r"must appear in the", 51 r"negative substring length", 52 r"is ambiguous", 53 r"out of range", 54 r"division by zero", 55 r"is only defined for finite", 56 r"cannot canonicalize predicates that are not of type", 57 r"unexpected character in input", 58 r"could not determine polymorphic type", 59 r"cannot reference pseudo type", 60 r"string is not a valid identifier", 61 r"invalid regular expression", 62 r" violates not-null constraint", 63 r"could not convert type", 64 r"are not allowed", 65 r"requires a record", 66 r"unrecognized privilege type", 67 r"cannot cast", 68 r"requires an OVER", 69 r"value too long", 70 r"may only refer to user-defined", 71 r"cannot materialize call to", 72 r"invalid hash algorithm", 73 r"expected id", 74 r"expected exactly one statement", 75 r"regex parse error", 76 r"invalid encoding name", 77 r"must specify at least one capture group", 78 r"requires a string literal", 79 r"too large for encoding", 80 r"must be a positive integer", 81 r"bound must be less than", 82 r"not recognized", 83 r"invalid escape string", 84 r"canceling statement due to statement timeout", 85 r"invalid time zone", 86 r"result exceeds max size", 87 r"calls to mz_now in write statements", 88 r"out of valid range", 89 r"more than one record produced in subquery", 90 r"input of anonymous composite types is not implemented", 91 r"lists must all be the same length", 92 r"invalid IANA Time Zone Database identifier", 93 r"unknown schema", 94 r"octal escapes are not supported", 95 r"attempt to create relation with too many columns", 96 r"column notation applied to type text", 97 r"Unexpected EOF", 98 r"missing required exponent", 99 r"invalid unicode escape", 100 r"dimension array or low bound array must not be null", 101 r"dimension values must not be null", 102 r"LIKE pattern exceeds maximum length", 103 r"cannot return complex numbers", 104 r"must be greater than zero", 105 r"null character not permitted", 106 r"invalid datepart", 107 r"must use value within", 108 r"null character not permitted", 109 r"expressions must appear in select list", 110 r"expressions must match initial", 111 r"invalid selection: operation may only", 112 r"array size exceeds the maximum allowed", 113 r"does not allow subqueries", 114 r"must use value within", 115 r"zero raised to a negative power is undefined", 116 r"range type over", 117] 118 119 120def check_query_errors(logs_dir: Path) -> list[str]: 121 unexpected_errors = [] 122 log_files = list(logs_dir.glob("*.log")) 123 124 for log_file in log_files: 125 try: 126 content = log_file.read_text() 127 except Exception: 128 continue 129 130 for line in content.splitlines(): 131 if "ERROR" not in line: 132 continue 133 134 is_ignored = False 135 for pattern in IGNORED_ERROR_PATTERNS: 136 if re.search(pattern, line): 137 is_ignored = True 138 break 139 140 if not is_ignored: 141 unexpected_errors.append(f"{log_file.name}: {line}") 142 143 return unexpected_errors 144 145 146def create_services(service_name: str) -> list[Service | Materialized]: 147 """Create the services needed for SQLancer or SQLancer++ tests.""" 148 return [ 149 # Auto-restart so we can keep testing even after we ran into a panic 150 Materialized( 151 restart="on-failure", 152 default_replication_factor=1, 153 additional_system_parameter_defaults={ 154 "enable_alter_table_add_column": "true", 155 "enable_statement_lifecycle_logging": "false", 156 "enable_internal_statement_logging": "false", 157 "statement_logging_default_sample_rate": "0", 158 "statement_logging_max_sample_rate": "0", 159 "enable_repeat_row": "true", 160 "enable_list_length_max": "true", 161 "enable_list_n_layers": "true", 162 "enable_time_at_time_zone": "true", 163 "enable_date_bin_hopping": "true", 164 }, 165 ), 166 Service( 167 service_name, 168 { 169 "mzbuild": service_name, 170 }, 171 ), 172 ] 173 174 175def _print_logs(container_id: str) -> None: 176 spawn.runv(["docker", "logs", "-f", container_id]) 177 178 179def run_sqlancer( 180 c: Composition, 181 parser: WorkflowArgumentParser, 182 *, 183 service_name: str, 184 default_oracle: str, 185 build_run_args: Callable[[argparse.Namespace, int], list[str]], 186 docker_logs_path: str, 187 log_prefix: str, 188) -> None: 189 parser.add_argument("--runtime", default=600, type=int) 190 parser.add_argument("--num-tries", default=100000, type=int) 191 parser.add_argument("--num-threads", default=16, type=int) 192 parser.add_argument("--seed", default=None, type=int) 193 parser.add_argument("--qpg", default=True, action=argparse.BooleanOptionalAction) 194 parser.add_argument("--oracle", default=default_oracle, type=str) 195 args = parser.parse_args() 196 197 c.up("materialized") 198 199 c.sql( 200 "ALTER SYSTEM SET max_databases TO 1000", 201 user="mz_system", 202 port=6877, 203 ) 204 c.sql( 205 "ALTER SYSTEM SET max_tables TO 1000", 206 user="mz_system", 207 port=6877, 208 ) 209 c.sql( 210 "ALTER SYSTEM SET max_materialized_views TO 1000", 211 user="mz_system", 212 port=6877, 213 ) 214 215 seed = args.seed or random.randint(0, 2**31) 216 217 run_args = build_run_args(args, seed) 218 219 print("--- Run in progress") 220 result = c.run( 221 service_name, 222 *run_args, 223 check=False, 224 detach=True, 225 capture=True, 226 ) 227 container_id = result.stdout.strip() 228 229 # Print logs in a background thread so that we get immediate output in CI, 230 # and also when running SQLancer locally 231 thread = Thread(target=_print_logs, args=(container_id,)) 232 thread.start() 233 # At the same time capture the logs to analyze for finding new issues 234 stdout = spawn.capture( 235 ["docker", "logs", "-f", container_id], stderr=subprocess.STDOUT 236 ) 237 238 in_assertion = False 239 for line in stdout.splitlines(): 240 if "OutOfMemoryError" in line or "IgnoreMeException" in line: 241 continue 242 if line.startswith("--java.lang."): 243 in_assertion = True 244 print(f"--- [{log_prefix}] {line.removeprefix('--java.lang.')}") 245 elif line == "": 246 in_assertion = False 247 elif in_assertion: 248 print(line) 249 print(f"--- {result.stdout.splitlines()[-1]}") 250 251 # Check for unexpected query errors in the logs 252 logs = Path("logs") 253 if logs.exists() and logs.is_dir(): 254 shutil.rmtree(logs) 255 spawn.runv( 256 [ 257 "docker", 258 "cp", 259 f"{container_id}:{docker_logs_path}", 260 str(logs), 261 ] 262 ) 263 spawn.runv(["tar", "cfz", "logs.tar.gz", str(logs)]) 264 buildkite.upload_artifact("logs.tar.gz") 265 unexpected_errors = check_query_errors(logs) 266 if unexpected_errors: 267 print("--- Unexpected query errors found:") 268 for error in unexpected_errors: 269 print(error) 270 raise Exception( 271 f"Found {len(unexpected_errors)} unexpected query error(s) in logs" 272 )
IGNORED_ERROR_PATTERNS =
['^(?!.*panic).*not yet supported', 'does not exist', 'must have', 'overflow', 'invalid input', 'cannot be matched', 'implicitly casting', 'is not unique', 'invalid digit', 'is not defined', 'is defined', 'unterminated', 'Expected ', 'is out of range', 'of a negative', 'does not support', 'ANALYZE', 'is of type', 'must appear in the', 'negative substring length', 'is ambiguous', 'out of range', 'division by zero', 'is only defined for finite', 'cannot canonicalize predicates that are not of type', 'unexpected character in input', 'could not determine polymorphic type', 'cannot reference pseudo type', 'string is not a valid identifier', 'invalid regular expression', ' violates not-null constraint', 'could not convert type', 'are not allowed', 'requires a record', 'unrecognized privilege type', 'cannot cast', 'requires an OVER', 'value too long', 'may only refer to user-defined', 'cannot materialize call to', 'invalid hash algorithm', 'expected id', 'expected exactly one statement', 'regex parse error', 'invalid encoding name', 'must specify at least one capture group', 'requires a string literal', 'too large for encoding', 'must be a positive integer', 'bound must be less than', 'not recognized', 'invalid escape string', 'canceling statement due to statement timeout', 'invalid time zone', 'result exceeds max size', 'calls to mz_now in write statements', 'out of valid range', 'more than one record produced in subquery', 'input of anonymous composite types is not implemented', 'lists must all be the same length', 'invalid IANA Time Zone Database identifier', 'unknown schema', 'octal escapes are not supported', 'attempt to create relation with too many columns', 'column notation applied to type text', 'Unexpected EOF', 'missing required exponent', 'invalid unicode escape', 'dimension array or low bound array must not be null', 'dimension values must not be null', 'LIKE pattern exceeds maximum length', 'cannot return complex numbers', 'must be greater than zero', 'null character not permitted', 'invalid datepart', 'must use value within', 'null character not permitted', 'expressions must appear in select list', 'expressions must match initial', 'invalid selection: operation may only', 'array size exceeds the maximum allowed', 'does not allow subqueries', 'must use value within', 'zero raised to a negative power is undefined', 'range type over']
def
check_query_errors(logs_dir: pathlib.Path) -> list[str]:
121def check_query_errors(logs_dir: Path) -> list[str]: 122 unexpected_errors = [] 123 log_files = list(logs_dir.glob("*.log")) 124 125 for log_file in log_files: 126 try: 127 content = log_file.read_text() 128 except Exception: 129 continue 130 131 for line in content.splitlines(): 132 if "ERROR" not in line: 133 continue 134 135 is_ignored = False 136 for pattern in IGNORED_ERROR_PATTERNS: 137 if re.search(pattern, line): 138 is_ignored = True 139 break 140 141 if not is_ignored: 142 unexpected_errors.append(f"{log_file.name}: {line}") 143 144 return unexpected_errors
def
create_services( service_name: str) -> list[materialize.mzcompose.service.Service | materialize.mzcompose.services.materialized.Materialized]:
147def create_services(service_name: str) -> list[Service | Materialized]: 148 """Create the services needed for SQLancer or SQLancer++ tests.""" 149 return [ 150 # Auto-restart so we can keep testing even after we ran into a panic 151 Materialized( 152 restart="on-failure", 153 default_replication_factor=1, 154 additional_system_parameter_defaults={ 155 "enable_alter_table_add_column": "true", 156 "enable_statement_lifecycle_logging": "false", 157 "enable_internal_statement_logging": "false", 158 "statement_logging_default_sample_rate": "0", 159 "statement_logging_max_sample_rate": "0", 160 "enable_repeat_row": "true", 161 "enable_list_length_max": "true", 162 "enable_list_n_layers": "true", 163 "enable_time_at_time_zone": "true", 164 "enable_date_bin_hopping": "true", 165 }, 166 ), 167 Service( 168 service_name, 169 { 170 "mzbuild": service_name, 171 }, 172 ), 173 ]
Create the services needed for SQLancer or SQLancer++ tests.
def
run_sqlancer( c: materialize.mzcompose.composition.Composition, parser: materialize.mzcompose.composition.WorkflowArgumentParser, *, service_name: str, default_oracle: str, build_run_args: Callable[[argparse.Namespace, int], list[str]], docker_logs_path: str, log_prefix: str) -> None:
180def run_sqlancer( 181 c: Composition, 182 parser: WorkflowArgumentParser, 183 *, 184 service_name: str, 185 default_oracle: str, 186 build_run_args: Callable[[argparse.Namespace, int], list[str]], 187 docker_logs_path: str, 188 log_prefix: str, 189) -> None: 190 parser.add_argument("--runtime", default=600, type=int) 191 parser.add_argument("--num-tries", default=100000, type=int) 192 parser.add_argument("--num-threads", default=16, type=int) 193 parser.add_argument("--seed", default=None, type=int) 194 parser.add_argument("--qpg", default=True, action=argparse.BooleanOptionalAction) 195 parser.add_argument("--oracle", default=default_oracle, type=str) 196 args = parser.parse_args() 197 198 c.up("materialized") 199 200 c.sql( 201 "ALTER SYSTEM SET max_databases TO 1000", 202 user="mz_system", 203 port=6877, 204 ) 205 c.sql( 206 "ALTER SYSTEM SET max_tables TO 1000", 207 user="mz_system", 208 port=6877, 209 ) 210 c.sql( 211 "ALTER SYSTEM SET max_materialized_views TO 1000", 212 user="mz_system", 213 port=6877, 214 ) 215 216 seed = args.seed or random.randint(0, 2**31) 217 218 run_args = build_run_args(args, seed) 219 220 print("--- Run in progress") 221 result = c.run( 222 service_name, 223 *run_args, 224 check=False, 225 detach=True, 226 capture=True, 227 ) 228 container_id = result.stdout.strip() 229 230 # Print logs in a background thread so that we get immediate output in CI, 231 # and also when running SQLancer locally 232 thread = Thread(target=_print_logs, args=(container_id,)) 233 thread.start() 234 # At the same time capture the logs to analyze for finding new issues 235 stdout = spawn.capture( 236 ["docker", "logs", "-f", container_id], stderr=subprocess.STDOUT 237 ) 238 239 in_assertion = False 240 for line in stdout.splitlines(): 241 if "OutOfMemoryError" in line or "IgnoreMeException" in line: 242 continue 243 if line.startswith("--java.lang."): 244 in_assertion = True 245 print(f"--- [{log_prefix}] {line.removeprefix('--java.lang.')}") 246 elif line == "": 247 in_assertion = False 248 elif in_assertion: 249 print(line) 250 print(f"--- {result.stdout.splitlines()[-1]}") 251 252 # Check for unexpected query errors in the logs 253 logs = Path("logs") 254 if logs.exists() and logs.is_dir(): 255 shutil.rmtree(logs) 256 spawn.runv( 257 [ 258 "docker", 259 "cp", 260 f"{container_id}:{docker_logs_path}", 261 str(logs), 262 ] 263 ) 264 spawn.runv(["tar", "cfz", "logs.tar.gz", str(logs)]) 265 buildkite.upload_artifact("logs.tar.gz") 266 unexpected_errors = check_query_errors(logs) 267 if unexpected_errors: 268 print("--- Unexpected query errors found:") 269 for error in unexpected_errors: 270 print(error) 271 raise Exception( 272 f"Found {len(unexpected_errors)} unexpected query error(s) in logs" 273 )