misc.python.materialize.cli.namescore

Calculate the namescore---the perctange of column references with name information---of an EXPLAIN PLAN. By default, runs on all SLT files in $MZ_ROOT/test/sqllogictest.

  1# Copyright Materialize, Inc. and contributors. All rights reserved.
  2#
  3# Use of this software is governed by the Business Source License
  4# included in the LICENSE file at the root of this repository.
  5#
  6# As of the Change Date specified in that file, in accordance with
  7# the Business Source License, use of this software will be governed
  8# by the Apache License, Version 2.0.
  9
 10"""Calculate the `namescore`---the perctange of column references with
 11name information---of an `EXPLAIN PLAN`. By default, runs on all SLT
 12files in $MZ_ROOT/test/sqllogictest."""
 13
 14import argparse
 15import os
 16import re
 17
 18from materialize import MZ_ROOT
 19
 20SLT_ROOT = MZ_ROOT / "test" / "sqllogictest"
 21
 22COLUMN_REF_RE = re.compile(
 23    r"""
 24    \#[0-9]+({[^}]+})?
 25    """,
 26    re.VERBOSE,
 27)
 28
 29
 30def find_slt_files() -> list[str]:
 31    """Find all .slt files in $MZ_ROOT/test/sqllogictest directory"""
 32    slt_files = []
 33    for root, _dirs, files in os.walk(SLT_ROOT):
 34        for file in files:
 35            if file.endswith(".slt"):
 36                slt_files.append(os.path.join(root, file))
 37    return slt_files
 38
 39
 40def namescore(filename: str) -> tuple[int, int, int]:
 41    """Calculate the namescore of a file"""
 42    named_refs = 0
 43    unknown_cols = 0
 44    refs = 0
 45    with open(filename) as f:
 46        content = f.read()
 47        for match in COLUMN_REF_RE.finditer(content):
 48            refs += 1
 49
 50            if match.group(1):
 51                named_refs += 1
 52                if match.group(1) == '{"?column?"}':
 53                    unknown_cols += 1
 54    return (named_refs, unknown_cols, refs)
 55
 56
 57def main() -> None:
 58    parser = argparse.ArgumentParser(
 59        prog="namescore",
 60        formatter_class=argparse.RawDescriptionHelpFormatter,
 61        description="""
 62calculates the `namescore` (percentage of column references with names)
 63of given files (or all SLT files in $MZ_ROOT/test/sqllogictest by default)""",
 64    )
 65
 66    parser.add_argument(
 67        "tests",
 68        nargs="*",
 69        help="explicit files to run on [default: all SLT files in $MZ_ROOT/test/sqllogictest]",
 70    )
 71    args = parser.parse_args()
 72
 73    tests = args.tests or find_slt_files()
 74
 75    named_refs = 0
 76    unknown_cols = 0
 77    refs = 0
 78    nonames = 0
 79    total = len(tests)
 80    for test in tests:
 81        nr, uc, r = namescore(test)
 82        if r == 0:
 83            assert nr == 0
 84            assert uc == 0
 85            nonames += 1
 86            continue
 87
 88        print(
 89            f"{test.removeprefix(str(SLT_ROOT) + os.sep)}: {nr / r * 100:.2f}% ({nr} / {r}; {uc} unknown columns)"
 90        )
 91        named_refs += nr
 92        unknown_cols += uc
 93        refs += r
 94    print(
 95        f"\nOverall namescore: {named_refs / refs * 100:.2f}% ({named_refs} / {refs}; {unknown_cols} unknown columns); {nonames} files with no column references / {total} total files"
 96    )
 97
 98
 99if __name__ == "__main__":
100    main()
SLT_ROOT = PosixPath('/var/lib/buildkite-agent/builds/buildkite-15f2293-i-04e0d725fdf8304db-1/materialize/deploy/test/sqllogictest')
COLUMN_REF_RE = re.compile('\n \\#[0-9]+({[^}]+})?\n ', re.VERBOSE)
def find_slt_files() -> list[str]:
31def find_slt_files() -> list[str]:
32    """Find all .slt files in $MZ_ROOT/test/sqllogictest directory"""
33    slt_files = []
34    for root, _dirs, files in os.walk(SLT_ROOT):
35        for file in files:
36            if file.endswith(".slt"):
37                slt_files.append(os.path.join(root, file))
38    return slt_files

Find all .slt files in $MZ_ROOT/test/sqllogictest directory

def namescore(filename: str) -> tuple[int, int, int]:
41def namescore(filename: str) -> tuple[int, int, int]:
42    """Calculate the namescore of a file"""
43    named_refs = 0
44    unknown_cols = 0
45    refs = 0
46    with open(filename) as f:
47        content = f.read()
48        for match in COLUMN_REF_RE.finditer(content):
49            refs += 1
50
51            if match.group(1):
52                named_refs += 1
53                if match.group(1) == '{"?column?"}':
54                    unknown_cols += 1
55    return (named_refs, unknown_cols, refs)

Calculate the namescore of a file

def main() -> None:
58def main() -> None:
59    parser = argparse.ArgumentParser(
60        prog="namescore",
61        formatter_class=argparse.RawDescriptionHelpFormatter,
62        description="""
63calculates the `namescore` (percentage of column references with names)
64of given files (or all SLT files in $MZ_ROOT/test/sqllogictest by default)""",
65    )
66
67    parser.add_argument(
68        "tests",
69        nargs="*",
70        help="explicit files to run on [default: all SLT files in $MZ_ROOT/test/sqllogictest]",
71    )
72    args = parser.parse_args()
73
74    tests = args.tests or find_slt_files()
75
76    named_refs = 0
77    unknown_cols = 0
78    refs = 0
79    nonames = 0
80    total = len(tests)
81    for test in tests:
82        nr, uc, r = namescore(test)
83        if r == 0:
84            assert nr == 0
85            assert uc == 0
86            nonames += 1
87            continue
88
89        print(
90            f"{test.removeprefix(str(SLT_ROOT) + os.sep)}: {nr / r * 100:.2f}% ({nr} / {r}; {uc} unknown columns)"
91        )
92        named_refs += nr
93        unknown_cols += uc
94        refs += r
95    print(
96        f"\nOverall namescore: {named_refs / refs * 100:.2f}% ({named_refs} / {refs}; {unknown_cols} unknown columns); {nonames} files with no column references / {total} total files"
97    )