misc.python.materialize.cli.namescore
Calculate the namescore---the perctange of column references with
name information---of an EXPLAIN PLAN. By default, runs on all SLT
files in $MZ_ROOT/test/sqllogictest.
1# Copyright Materialize, Inc. and contributors. All rights reserved. 2# 3# Use of this software is governed by the Business Source License 4# included in the LICENSE file at the root of this repository. 5# 6# As of the Change Date specified in that file, in accordance with 7# the Business Source License, use of this software will be governed 8# by the Apache License, Version 2.0. 9 10"""Calculate the `namescore`---the perctange of column references with 11name information---of an `EXPLAIN PLAN`. By default, runs on all SLT 12files in $MZ_ROOT/test/sqllogictest.""" 13 14import argparse 15import os 16import re 17 18from materialize import MZ_ROOT 19 20SLT_ROOT = MZ_ROOT / "test" / "sqllogictest" 21 22COLUMN_REF_RE = re.compile( 23 r""" 24 \#[0-9]+({[^}]+})? 25 """, 26 re.VERBOSE, 27) 28 29 30def find_slt_files() -> list[str]: 31 """Find all .slt files in $MZ_ROOT/test/sqllogictest directory""" 32 slt_files = [] 33 for root, _dirs, files in os.walk(SLT_ROOT): 34 for file in files: 35 if file.endswith(".slt"): 36 slt_files.append(os.path.join(root, file)) 37 return slt_files 38 39 40def namescore(filename: str) -> tuple[int, int, int]: 41 """Calculate the namescore of a file""" 42 named_refs = 0 43 unknown_cols = 0 44 refs = 0 45 with open(filename) as f: 46 content = f.read() 47 for match in COLUMN_REF_RE.finditer(content): 48 refs += 1 49 50 if match.group(1): 51 named_refs += 1 52 if match.group(1) == '{"?column?"}': 53 unknown_cols += 1 54 return (named_refs, unknown_cols, refs) 55 56 57def main() -> None: 58 parser = argparse.ArgumentParser( 59 prog="namescore", 60 formatter_class=argparse.RawDescriptionHelpFormatter, 61 description=""" 62calculates the `namescore` (percentage of column references with names) 63of given files (or all SLT files in $MZ_ROOT/test/sqllogictest by default)""", 64 ) 65 66 parser.add_argument( 67 "tests", 68 nargs="*", 69 help="explicit files to run on [default: all SLT files in $MZ_ROOT/test/sqllogictest]", 70 ) 71 args = parser.parse_args() 72 73 tests = args.tests or find_slt_files() 74 75 named_refs = 0 76 unknown_cols = 0 77 refs = 0 78 nonames = 0 79 total = len(tests) 80 for test in tests: 81 nr, uc, r = namescore(test) 82 if r == 0: 83 assert nr == 0 84 assert uc == 0 85 nonames += 1 86 continue 87 88 print( 89 f"{test.removeprefix(str(SLT_ROOT) + os.sep)}: {nr / r * 100:.2f}% ({nr} / {r}; {uc} unknown columns)" 90 ) 91 named_refs += nr 92 unknown_cols += uc 93 refs += r 94 print( 95 f"\nOverall namescore: {named_refs / refs * 100:.2f}% ({named_refs} / {refs}; {unknown_cols} unknown columns); {nonames} files with no column references / {total} total files" 96 ) 97 98 99if __name__ == "__main__": 100 main()
SLT_ROOT =
PosixPath('/var/lib/buildkite-agent/builds/buildkite-15f2293-i-04e0d725fdf8304db-1/materialize/deploy/test/sqllogictest')
COLUMN_REF_RE =
re.compile('\n \\#[0-9]+({[^}]+})?\n ', re.VERBOSE)
def
find_slt_files() -> list[str]:
31def find_slt_files() -> list[str]: 32 """Find all .slt files in $MZ_ROOT/test/sqllogictest directory""" 33 slt_files = [] 34 for root, _dirs, files in os.walk(SLT_ROOT): 35 for file in files: 36 if file.endswith(".slt"): 37 slt_files.append(os.path.join(root, file)) 38 return slt_files
Find all .slt files in $MZ_ROOT/test/sqllogictest directory
def
namescore(filename: str) -> tuple[int, int, int]:
41def namescore(filename: str) -> tuple[int, int, int]: 42 """Calculate the namescore of a file""" 43 named_refs = 0 44 unknown_cols = 0 45 refs = 0 46 with open(filename) as f: 47 content = f.read() 48 for match in COLUMN_REF_RE.finditer(content): 49 refs += 1 50 51 if match.group(1): 52 named_refs += 1 53 if match.group(1) == '{"?column?"}': 54 unknown_cols += 1 55 return (named_refs, unknown_cols, refs)
Calculate the namescore of a file
def
main() -> None:
58def main() -> None: 59 parser = argparse.ArgumentParser( 60 prog="namescore", 61 formatter_class=argparse.RawDescriptionHelpFormatter, 62 description=""" 63calculates the `namescore` (percentage of column references with names) 64of given files (or all SLT files in $MZ_ROOT/test/sqllogictest by default)""", 65 ) 66 67 parser.add_argument( 68 "tests", 69 nargs="*", 70 help="explicit files to run on [default: all SLT files in $MZ_ROOT/test/sqllogictest]", 71 ) 72 args = parser.parse_args() 73 74 tests = args.tests or find_slt_files() 75 76 named_refs = 0 77 unknown_cols = 0 78 refs = 0 79 nonames = 0 80 total = len(tests) 81 for test in tests: 82 nr, uc, r = namescore(test) 83 if r == 0: 84 assert nr == 0 85 assert uc == 0 86 nonames += 1 87 continue 88 89 print( 90 f"{test.removeprefix(str(SLT_ROOT) + os.sep)}: {nr / r * 100:.2f}% ({nr} / {r}; {uc} unknown columns)" 91 ) 92 named_refs += nr 93 unknown_cols += uc 94 refs += r 95 print( 96 f"\nOverall namescore: {named_refs / refs * 100:.2f}% ({named_refs} / {refs}; {unknown_cols} unknown columns); {nonames} files with no column references / {total} total files" 97 )