Module `materialize.feature_benchmark.scenarios.optbench`

Expand source code Browse git

# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.


import re
from pathlib import Path
from typing import Any

from parameterized import parameterized_class  # type: ignore

import materialize.optbench
import materialize.optbench.sql
from materialize.feature_benchmark.action import Action
from materialize.feature_benchmark.executor import Executor
from materialize.feature_benchmark.measurement import MeasurementType
from materialize.feature_benchmark.measurement_source import (
    MeasurementSource,
    Timestamp,
)
from materialize.feature_benchmark.scenario import Scenario

# for pdoc ignores
__pdoc__ = {}


class OptbenchInit(Action):
    def __init__(self, scenario: str, no_indexes: bool = False) -> None:
        self._executor: Executor | None = None
        self._scenario = scenario
        self._no_indexes = no_indexes

    def run(self, executor: Executor | None = None) -> None:
        e = executor or self._executor
        statements = materialize.optbench.sql.parse_from_file(
            Path(f"misc/python/materialize/optbench/schema/{self._scenario}.sql")
        )
        if self._no_indexes:
            idx_re = re.compile(r"(create|create\s+default|drop)\s+index\s+")
            statements = [
                statement
                for statement in statements
                if not idx_re.match(statement.lower())
            ]
        e._composition.sql("\n".join(statements))  # type: ignore


class OptbenchRun(MeasurementSource):
    def __init__(self, optbench_scenario: str, query: int):
        self._executor: Executor | None = None
        self._optbench_scenario = optbench_scenario
        self._query = query

    def run(self, executor: Executor | None = None) -> list[Timestamp]:
        assert not (executor is None and self._executor is None)
        assert not (executor is not None and self._executor is not None)
        e = executor or self._executor

        queries = materialize.optbench.sql.parse_from_file(
            Path(
                f"misc/python/materialize/optbench/workload/{self._optbench_scenario}.sql"
            )
        )
        assert 1 <= self._query <= len(queries)
        query = queries[self._query - 1]
        explain_query = materialize.optbench.sql.Query(query).explain(timing=True)
        explain_output = materialize.optbench.sql.ExplainOutput(
            e._composition.sql_query(explain_query)[0][0]  # type: ignore
        )
        # Optimization time is in microseconds, divide by 3 to get a more readable number (still in wrong unit)
        timestamps = [0, float(explain_output.optimization_time()) / 3]  # type: ignore
        return timestamps


def name_with_query(
    cls: type["OptbenchTPCH"], num: int, params_dict: dict[str, Any]
) -> str:
    return f"OptbenchTPCHQ{params_dict['QUERY']:02d}"


for i in range(1, 23):
    __pdoc__[f"OptbenchTPCHQ{i:02d}"] = False


@parameterized_class(
    [{"QUERY": i} for i in range(1, 23)], class_name_func=name_with_query
)
class OptbenchTPCH(Scenario):
    """Run optbench TPCH for optimizer benchmarks"""

    QUERY = 1
    RELATIVE_THRESHOLD: dict[MeasurementType, float] = {
        MeasurementType.WALLCLOCK: 0.20,  # increased because it's easy to regress
        MeasurementType.MESSAGES: 0.10,
        MeasurementType.MEMORY: 0.10,
    }

    def init(self) -> list[Action]:
        return [OptbenchInit("tpch")]

    def benchmark(self) -> MeasurementSource:
        return OptbenchRun("tpch", self.QUERY)

Functions

def name_with_query(cls: type['OptbenchTPCH'], num: int, params_dict: dict[str, typing.Any]) ‑> str

Expand source code Browse git

def name_with_query(
    cls: type["OptbenchTPCH"], num: int, params_dict: dict[str, Any]
) -> str:
    return f"OptbenchTPCHQ{params_dict['QUERY']:02d}"

Classes

class OptbenchInit (scenario: str, no_indexes: bool = False)

Expand source code Browse git

class OptbenchInit(Action):
    def __init__(self, scenario: str, no_indexes: bool = False) -> None:
        self._executor: Executor | None = None
        self._scenario = scenario
        self._no_indexes = no_indexes

    def run(self, executor: Executor | None = None) -> None:
        e = executor or self._executor
        statements = materialize.optbench.sql.parse_from_file(
            Path(f"misc/python/materialize/optbench/schema/{self._scenario}.sql")
        )
        if self._no_indexes:
            idx_re = re.compile(r"(create|create\s+default|drop)\s+index\s+")
            statements = [
                statement
                for statement in statements
                if not idx_re.match(statement.lower())
            ]
        e._composition.sql("\n".join(statements))  # type: ignore

Ancestors

Action

Methods

def run(self, executor: Executor | None = None) ‑> None

Expand source code Browse git

def run(self, executor: Executor | None = None) -> None:
    e = executor or self._executor
    statements = materialize.optbench.sql.parse_from_file(
        Path(f"misc/python/materialize/optbench/schema/{self._scenario}.sql")
    )
    if self._no_indexes:
        idx_re = re.compile(r"(create|create\s+default|drop)\s+index\s+")
        statements = [
            statement
            for statement in statements
            if not idx_re.match(statement.lower())
        ]
    e._composition.sql("\n".join(statements))  # type: ignore

class OptbenchRun (optbench_scenario: str, query: int)

Expand source code Browse git

class OptbenchRun(MeasurementSource):
    def __init__(self, optbench_scenario: str, query: int):
        self._executor: Executor | None = None
        self._optbench_scenario = optbench_scenario
        self._query = query

    def run(self, executor: Executor | None = None) -> list[Timestamp]:
        assert not (executor is None and self._executor is None)
        assert not (executor is not None and self._executor is not None)
        e = executor or self._executor

        queries = materialize.optbench.sql.parse_from_file(
            Path(
                f"misc/python/materialize/optbench/workload/{self._optbench_scenario}.sql"
            )
        )
        assert 1 <= self._query <= len(queries)
        query = queries[self._query - 1]
        explain_query = materialize.optbench.sql.Query(query).explain(timing=True)
        explain_output = materialize.optbench.sql.ExplainOutput(
            e._composition.sql_query(explain_query)[0][0]  # type: ignore
        )
        # Optimization time is in microseconds, divide by 3 to get a more readable number (still in wrong unit)
        timestamps = [0, float(explain_output.optimization_time()) / 3]  # type: ignore
        return timestamps

Ancestors

MeasurementSource

Methods

def run(self, executor: Executor | None = None) ‑> list[float]

Expand source code Browse git

def run(self, executor: Executor | None = None) -> list[Timestamp]:
    assert not (executor is None and self._executor is None)
    assert not (executor is not None and self._executor is not None)
    e = executor or self._executor

    queries = materialize.optbench.sql.parse_from_file(
        Path(
            f"misc/python/materialize/optbench/workload/{self._optbench_scenario}.sql"
        )
    )
    assert 1 <= self._query <= len(queries)
    query = queries[self._query - 1]
    explain_query = materialize.optbench.sql.Query(query).explain(timing=True)
    explain_output = materialize.optbench.sql.ExplainOutput(
        e._composition.sql_query(explain_query)[0][0]  # type: ignore
    )
    # Optimization time is in microseconds, divide by 3 to get a more readable number (still in wrong unit)
    timestamps = [0, float(explain_output.optimization_time()) / 3]  # type: ignore
    return timestamps

class OptbenchTPCH (scale: float, mz_version: MzVersion, default_size: int, seed: int)

Run optbench TPCH for optimizer benchmarks

Expand source code Browse git

@parameterized_class(
    [{"QUERY": i} for i in range(1, 23)], class_name_func=name_with_query
)
class OptbenchTPCH(Scenario):
    """Run optbench TPCH for optimizer benchmarks"""

    QUERY = 1
    RELATIVE_THRESHOLD: dict[MeasurementType, float] = {
        MeasurementType.WALLCLOCK: 0.20,  # increased because it's easy to regress
        MeasurementType.MESSAGES: 0.10,
        MeasurementType.MEMORY: 0.10,
    }

    def init(self) -> list[Action]:
        return [OptbenchInit("tpch")]

    def benchmark(self) -> MeasurementSource:
        return OptbenchRun("tpch", self.QUERY)

Ancestors

Subclasses

materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ01
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ02
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ03
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ04
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ05
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ06
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ07
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ08
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ09
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ10
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ11
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ12
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ13
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ14
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ15
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ16
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ17
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ18
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ19
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ20
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ21
materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ22

Class variables

var QUERY
var RELATIVE_THRESHOLD : dict[MeasurementType, float]

Methods

def benchmark(self) ‑> MeasurementSource

Expand source code Browse git

def benchmark(self) -> MeasurementSource:
    return OptbenchRun("tpch", self.QUERY)

def init(self) ‑> list[Action]

Expand source code Browse git

def init(self) -> list[Action]:
    return [OptbenchInit("tpch")]

Inherited members

Scenario:
- join
- name_with_scale
- table_ten
- unique_values