Module materialize.cli.ci_closed_issues_detect

Expand source code Browse git
# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.
#
# ci_closed_issues_detect.py - Detect references to already closed issues.

import argparse
import os
import re
import sys
from collections.abc import Iterator
from dataclasses import dataclass
from typing import IO

import requests

from materialize import buildkite, spawn

ISSUE_RE = re.compile(
    r"""
    ( TimelyDataflow/timely-dataflow\#(?P<timelydataflow>[0-9]+)
    | ( materialize\# | materialize/issues/ | \# ) (?P<materialize>[0-9]+)
    )
    """,
    re.VERBOSE,
)

GROUP_REPO = {
    "timelydataflow": "TimelyDataflow/timely-dataflow",
    "materialize": "MaterializeInc/materialize",
}

REFERENCE_RE = re.compile(
    r"""
    ( reenable
    | re-enable
    | reconsider
    | TODO
    # Used in Buildkite pipeline config files
    | skip:
    # Used in platform-checks
    | @disabled
    # Used in pytest
    | @pytest.mark.skip
    # Used in output-consistency framework
    | YesIgnore
    | tracked\ with
    # Used in proto files
    | //\ buf\ breaking:\ ignore
    )
    """,
    re.VERBOSE | re.IGNORECASE,
)

IGNORE_RE = re.compile(
    r"""
    ( discussion\ of\ this\ in
    | discussed\ in
    | [sS]ee\ \<
    # is_null_propagation.slt
    | isnull\(\#0\)
    # src/transform/tests/test_transforms/column_knowledge.spec
    | \(\#1\)\ IS\ NULL
    # test/sqllogictest/cockroach/*.slt
    | cockroach\#
    | Liquibase
    # cloud repo
    | cloud\#
    # ci/test/lint-buf/README.md
    | Ignore\ because\ of\ #99999
    # src/storage-client/src/controller.rs
    | issues/20211\>
    # src/sql/src/plan/statement.rs
    | issues/20019\>
    )
    """,
    re.VERBOSE | re.IGNORECASE,
)

COMMENT_RE = re.compile(r"#|//")

IGNORE_FILENAME_RE = re.compile(
    r"""
    ( .*\.(svg|png|jpg|jpeg|avro|ico)
    | doc/developer/design/20230223_stabilize_with_mutually_recursive.md
    )
    """,
    re.VERBOSE,
)

FILENAME_REFERENCE_RE = re.compile(r".*\.(td|slt|test)\.gh(?P<materialize>[0-9]+)")


@dataclass
class IssueRef:
    repository: str
    issue_id: int
    filename: str
    line_number: int
    text: str | None


@dataclass
class CommentBlock:
    char: str
    pos: int
    text: str
    line_number: int


def comment_blocks(file: IO) -> Iterator[tuple[int, str]]:
    comment: CommentBlock | None = None

    for line_number, line in enumerate(file):
        if comment_match := COMMENT_RE.search(line):
            char = comment_match.group(0)
            pos = comment_match.span()[0]
            if comment is None:
                comment = CommentBlock(char, pos, line, line_number + 1)
                continue
            if char == comment.char and pos == comment.pos:
                comment.text += line
                continue
            yield (comment.line_number, comment.text)
            comment = CommentBlock(char, pos, line, line_number + 1)
            continue
        if comment is not None:
            yield (comment.line_number, comment.text)
            comment = None
        yield (line_number + 1, line)

    if comment is not None:
        yield (comment.line_number, comment.text)


def detect_referenced_issues(filename: str) -> list[IssueRef]:
    issue_refs: list[IssueRef] = []

    with open(filename) as file:
        for line_number, text in comment_blocks(file):
            if not REFERENCE_RE.search(text) or IGNORE_RE.search(text):
                continue

            offset = 0
            while issue_match := ISSUE_RE.search(text, offset):
                offset = issue_match.span()[1]

                groups = [
                    (key, value)
                    for key, value in issue_match.groupdict().items()
                    if value
                ]
                assert len(groups) == 1, f"Expected only 1 element in {groups}"
                group, issue_id = groups[0]

                is_referenced_with_url = "issues/" in issue_match.group(0)

                # Explain plans can look like issue references
                if int(issue_id) < 100 and not is_referenced_with_url:
                    continue

                issue_refs.append(
                    IssueRef(
                        GROUP_REPO[group],
                        int(issue_id),
                        filename,
                        line_number,
                        text.strip(),
                    )
                )

    return issue_refs


def is_issue_closed_on_github(repository: str, issue_id: int) -> bool:
    headers = {
        "Accept": "application/vnd.github+json",
        "X-GitHub-Api-Version": "2022-11-28",
    }
    if token := os.getenv("GITHUB_TOKEN"):
        headers["Authorization"] = f"Bearer {token}"

    url = f"https://api.github.com/repos/{repository}/issues/{issue_id}"
    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        raise ValueError(
            f"Bad return code from GitHub on {url}: {response.status_code}: {response.text}"
        )

    issue_json = response.json()
    assert (
        issue_json["number"] == issue_id
    ), f"Returned issue number {issue_json['number']} is not the expected issue number {issue_id}"
    return issue_json["state"] == "closed"


def filter_changed_lines(issue_refs: list[IssueRef]) -> list[IssueRef]:
    changed_lines = buildkite.find_modified_lines()
    return [
        issue_ref
        for issue_ref in issue_refs
        if issue_ref.text is not None
        and any(
            (issue_ref.filename, issue_ref.line_number + i) in changed_lines
            for i in range(issue_ref.text.count("\n"))
        )
    ]


def filter_closed_issues(issue_refs: list[IssueRef]) -> list[IssueRef]:
    issues = {(issue_ref.repository, issue_ref.issue_id) for issue_ref in issue_refs}
    closed_issues = {
        (repository, issue)
        for repository, issue in issues
        if is_issue_closed_on_github(repository, issue)
    }
    return [
        issue_ref
        for issue_ref in issue_refs
        if (issue_ref.repository, issue_ref.issue_id) in closed_issues
    ]


def main() -> int:
    parser = argparse.ArgumentParser(
        prog="ci-closed-issues-detect",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="ci-closed-issues-detect detects references to already closed GitHub issues.",
    )

    parser.add_argument(
        "--changed-lines-only",
        action="store_true",
        help="only report issues in changed files/lines",
    )

    args = parser.parse_args()

    filenames = spawn.capture(
        ["git", "ls-tree", "--full-tree", "-r", "--name-only", "HEAD"]
    )

    issue_refs: list[IssueRef] = []
    for filename in filenames.splitlines():
        if issue_match := FILENAME_REFERENCE_RE.search(filename):
            groups = [
                (key, value) for key, value in issue_match.groupdict().items() if value
            ]
            assert len(groups) == 1, f"Expected only 1 element in {groups}"
            group, issue_id = groups[0]
            issue_refs.append(
                IssueRef(
                    GROUP_REPO[group],
                    int(issue_id),
                    filename,
                    0,
                    None,
                )
            )
        # Files without any ending can be interesting datadriven test files
        if (
            not IGNORE_FILENAME_RE.match(filename)
            and not os.path.isdir(filename)
            and not os.path.islink(filename)
        ):
            issue_refs.extend(detect_referenced_issues(filename))

    if args.changed_lines_only:
        issue_refs = filter_changed_lines(issue_refs)

    issue_refs = filter_closed_issues(issue_refs)

    for issue_ref in issue_refs:
        url = buildkite.inline_link(
            f"https://github.com/{issue_ref.repository}/issues/{issue_ref.issue_id}",
            f"#{issue_ref.issue_id}",
        )
        print(f"--- Issue is referenced in comment but already closed: {url}")
        if issue_ref.text is not None:
            print(f"{issue_ref.filename}:{issue_ref.line_number}:")
            print(issue_ref.text)
        else:
            print(f"{issue_ref.filename} (filename)")

    return 1 if len(issue_refs) else 0


if __name__ == "__main__":
    sys.exit(main())

Functions

def comment_blocks(file: ) ‑> collections.abc.Iterator[tuple[int, str]]
Expand source code Browse git
def comment_blocks(file: IO) -> Iterator[tuple[int, str]]:
    comment: CommentBlock | None = None

    for line_number, line in enumerate(file):
        if comment_match := COMMENT_RE.search(line):
            char = comment_match.group(0)
            pos = comment_match.span()[0]
            if comment is None:
                comment = CommentBlock(char, pos, line, line_number + 1)
                continue
            if char == comment.char and pos == comment.pos:
                comment.text += line
                continue
            yield (comment.line_number, comment.text)
            comment = CommentBlock(char, pos, line, line_number + 1)
            continue
        if comment is not None:
            yield (comment.line_number, comment.text)
            comment = None
        yield (line_number + 1, line)

    if comment is not None:
        yield (comment.line_number, comment.text)
def detect_referenced_issues(filename: str) ‑> list[IssueRef]
Expand source code Browse git
def detect_referenced_issues(filename: str) -> list[IssueRef]:
    issue_refs: list[IssueRef] = []

    with open(filename) as file:
        for line_number, text in comment_blocks(file):
            if not REFERENCE_RE.search(text) or IGNORE_RE.search(text):
                continue

            offset = 0
            while issue_match := ISSUE_RE.search(text, offset):
                offset = issue_match.span()[1]

                groups = [
                    (key, value)
                    for key, value in issue_match.groupdict().items()
                    if value
                ]
                assert len(groups) == 1, f"Expected only 1 element in {groups}"
                group, issue_id = groups[0]

                is_referenced_with_url = "issues/" in issue_match.group(0)

                # Explain plans can look like issue references
                if int(issue_id) < 100 and not is_referenced_with_url:
                    continue

                issue_refs.append(
                    IssueRef(
                        GROUP_REPO[group],
                        int(issue_id),
                        filename,
                        line_number,
                        text.strip(),
                    )
                )

    return issue_refs
def filter_changed_lines(issue_refs: list[IssueRef]) ‑> list[IssueRef]
Expand source code Browse git
def filter_changed_lines(issue_refs: list[IssueRef]) -> list[IssueRef]:
    changed_lines = buildkite.find_modified_lines()
    return [
        issue_ref
        for issue_ref in issue_refs
        if issue_ref.text is not None
        and any(
            (issue_ref.filename, issue_ref.line_number + i) in changed_lines
            for i in range(issue_ref.text.count("\n"))
        )
    ]
def filter_closed_issues(issue_refs: list[IssueRef]) ‑> list[IssueRef]
Expand source code Browse git
def filter_closed_issues(issue_refs: list[IssueRef]) -> list[IssueRef]:
    issues = {(issue_ref.repository, issue_ref.issue_id) for issue_ref in issue_refs}
    closed_issues = {
        (repository, issue)
        for repository, issue in issues
        if is_issue_closed_on_github(repository, issue)
    }
    return [
        issue_ref
        for issue_ref in issue_refs
        if (issue_ref.repository, issue_ref.issue_id) in closed_issues
    ]
def is_issue_closed_on_github(repository: str, issue_id: int) ‑> bool
Expand source code Browse git
def is_issue_closed_on_github(repository: str, issue_id: int) -> bool:
    headers = {
        "Accept": "application/vnd.github+json",
        "X-GitHub-Api-Version": "2022-11-28",
    }
    if token := os.getenv("GITHUB_TOKEN"):
        headers["Authorization"] = f"Bearer {token}"

    url = f"https://api.github.com/repos/{repository}/issues/{issue_id}"
    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        raise ValueError(
            f"Bad return code from GitHub on {url}: {response.status_code}: {response.text}"
        )

    issue_json = response.json()
    assert (
        issue_json["number"] == issue_id
    ), f"Returned issue number {issue_json['number']} is not the expected issue number {issue_id}"
    return issue_json["state"] == "closed"
def main() ‑> int
Expand source code Browse git
def main() -> int:
    parser = argparse.ArgumentParser(
        prog="ci-closed-issues-detect",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="ci-closed-issues-detect detects references to already closed GitHub issues.",
    )

    parser.add_argument(
        "--changed-lines-only",
        action="store_true",
        help="only report issues in changed files/lines",
    )

    args = parser.parse_args()

    filenames = spawn.capture(
        ["git", "ls-tree", "--full-tree", "-r", "--name-only", "HEAD"]
    )

    issue_refs: list[IssueRef] = []
    for filename in filenames.splitlines():
        if issue_match := FILENAME_REFERENCE_RE.search(filename):
            groups = [
                (key, value) for key, value in issue_match.groupdict().items() if value
            ]
            assert len(groups) == 1, f"Expected only 1 element in {groups}"
            group, issue_id = groups[0]
            issue_refs.append(
                IssueRef(
                    GROUP_REPO[group],
                    int(issue_id),
                    filename,
                    0,
                    None,
                )
            )
        # Files without any ending can be interesting datadriven test files
        if (
            not IGNORE_FILENAME_RE.match(filename)
            and not os.path.isdir(filename)
            and not os.path.islink(filename)
        ):
            issue_refs.extend(detect_referenced_issues(filename))

    if args.changed_lines_only:
        issue_refs = filter_changed_lines(issue_refs)

    issue_refs = filter_closed_issues(issue_refs)

    for issue_ref in issue_refs:
        url = buildkite.inline_link(
            f"https://github.com/{issue_ref.repository}/issues/{issue_ref.issue_id}",
            f"#{issue_ref.issue_id}",
        )
        print(f"--- Issue is referenced in comment but already closed: {url}")
        if issue_ref.text is not None:
            print(f"{issue_ref.filename}:{issue_ref.line_number}:")
            print(issue_ref.text)
        else:
            print(f"{issue_ref.filename} (filename)")

    return 1 if len(issue_refs) else 0

Classes

class CommentBlock (char: str, pos: int, text: str, line_number: int)

CommentBlock(char: str, pos: int, text: str, line_number: int)

Expand source code Browse git
@dataclass
class CommentBlock:
    char: str
    pos: int
    text: str
    line_number: int

Class variables

var char : str
var line_number : int
var pos : int
var text : str
class IssueRef (repository: str, issue_id: int, filename: str, line_number: int, text: str | None)

IssueRef(repository: str, issue_id: int, filename: str, line_number: int, text: str | None)

Expand source code Browse git
@dataclass
class IssueRef:
    repository: str
    issue_id: int
    filename: str
    line_number: int
    text: str | None

Class variables

var filename : str
var issue_id : int
var line_number : int
var repository : str
var text : str | None