Module materialize.feature_benchmark.scenarios.optbench
Expand source code Browse git
# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.
import re
from pathlib import Path
from typing import Any
from parameterized import parameterized_class # type: ignore
import materialize.optbench
import materialize.optbench.sql
from materialize.feature_benchmark.action import Action
from materialize.feature_benchmark.executor import Executor
from materialize.feature_benchmark.measurement import MeasurementType
from materialize.feature_benchmark.measurement_source import (
MeasurementSource,
Timestamp,
)
from materialize.feature_benchmark.scenario import Scenario
# for pdoc ignores
__pdoc__ = {}
class OptbenchInit(Action):
def __init__(self, scenario: str, no_indexes: bool = False) -> None:
self._executor: Executor | None = None
self._scenario = scenario
self._no_indexes = no_indexes
def run(self, executor: Executor | None = None) -> None:
e = executor or self._executor
statements = materialize.optbench.sql.parse_from_file(
Path(f"misc/python/materialize/optbench/schema/{self._scenario}.sql")
)
if self._no_indexes:
idx_re = re.compile(r"(create|create\s+default|drop)\s+index\s+")
statements = [
statement
for statement in statements
if not idx_re.match(statement.lower())
]
e._composition.sql("\n".join(statements)) # type: ignore
class OptbenchRun(MeasurementSource):
def __init__(self, optbench_scenario: str, query: int):
self._executor: Executor | None = None
self._optbench_scenario = optbench_scenario
self._query = query
def run(self, executor: Executor | None = None) -> list[Timestamp]:
assert not (executor is None and self._executor is None)
assert not (executor is not None and self._executor is not None)
e = executor or self._executor
queries = materialize.optbench.sql.parse_from_file(
Path(
f"misc/python/materialize/optbench/workload/{self._optbench_scenario}.sql"
)
)
assert 1 <= self._query <= len(queries)
query = queries[self._query - 1]
explain_query = materialize.optbench.sql.Query(query).explain(timing=True)
explain_output = materialize.optbench.sql.ExplainOutput(
e._composition.sql_query(explain_query)[0][0] # type: ignore
)
# Optimization time is in microseconds, divide by 3 to get a more readable number (still in wrong unit)
timestamps = [0, float(explain_output.optimization_time()) / 3] # type: ignore
return timestamps
def name_with_query(
cls: type["OptbenchTPCH"], num: int, params_dict: dict[str, Any]
) -> str:
return f"OptbenchTPCHQ{params_dict['QUERY']:02d}"
for i in range(1, 23):
__pdoc__[f"OptbenchTPCHQ{i:02d}"] = False
@parameterized_class(
[{"QUERY": i} for i in range(1, 23)], class_name_func=name_with_query
)
class OptbenchTPCH(Scenario):
"""Run optbench TPCH for optimizer benchmarks"""
QUERY = 1
RELATIVE_THRESHOLD: dict[MeasurementType, float] = {
MeasurementType.WALLCLOCK: 0.20, # increased because it's easy to regress
MeasurementType.MESSAGES: 0.10,
MeasurementType.MEMORY: 0.10,
}
def init(self) -> list[Action]:
return [OptbenchInit("tpch")]
def benchmark(self) -> MeasurementSource:
return OptbenchRun("tpch", self.QUERY)
Functions
def name_with_query(cls: type['OptbenchTPCH'], num: int, params_dict: dict[str, typing.Any]) ‑> str
-
Expand source code Browse git
def name_with_query( cls: type["OptbenchTPCH"], num: int, params_dict: dict[str, Any] ) -> str: return f"OptbenchTPCHQ{params_dict['QUERY']:02d}"
Classes
class OptbenchInit (scenario: str, no_indexes: bool = False)
-
Expand source code Browse git
class OptbenchInit(Action): def __init__(self, scenario: str, no_indexes: bool = False) -> None: self._executor: Executor | None = None self._scenario = scenario self._no_indexes = no_indexes def run(self, executor: Executor | None = None) -> None: e = executor or self._executor statements = materialize.optbench.sql.parse_from_file( Path(f"misc/python/materialize/optbench/schema/{self._scenario}.sql") ) if self._no_indexes: idx_re = re.compile(r"(create|create\s+default|drop)\s+index\s+") statements = [ statement for statement in statements if not idx_re.match(statement.lower()) ] e._composition.sql("\n".join(statements)) # type: ignore
Ancestors
Methods
def run(self, executor: Executor | None = None) ‑> None
-
Expand source code Browse git
def run(self, executor: Executor | None = None) -> None: e = executor or self._executor statements = materialize.optbench.sql.parse_from_file( Path(f"misc/python/materialize/optbench/schema/{self._scenario}.sql") ) if self._no_indexes: idx_re = re.compile(r"(create|create\s+default|drop)\s+index\s+") statements = [ statement for statement in statements if not idx_re.match(statement.lower()) ] e._composition.sql("\n".join(statements)) # type: ignore
class OptbenchRun (optbench_scenario: str, query: int)
-
Expand source code Browse git
class OptbenchRun(MeasurementSource): def __init__(self, optbench_scenario: str, query: int): self._executor: Executor | None = None self._optbench_scenario = optbench_scenario self._query = query def run(self, executor: Executor | None = None) -> list[Timestamp]: assert not (executor is None and self._executor is None) assert not (executor is not None and self._executor is not None) e = executor or self._executor queries = materialize.optbench.sql.parse_from_file( Path( f"misc/python/materialize/optbench/workload/{self._optbench_scenario}.sql" ) ) assert 1 <= self._query <= len(queries) query = queries[self._query - 1] explain_query = materialize.optbench.sql.Query(query).explain(timing=True) explain_output = materialize.optbench.sql.ExplainOutput( e._composition.sql_query(explain_query)[0][0] # type: ignore ) # Optimization time is in microseconds, divide by 3 to get a more readable number (still in wrong unit) timestamps = [0, float(explain_output.optimization_time()) / 3] # type: ignore return timestamps
Ancestors
Methods
def run(self, executor: Executor | None = None) ‑> list[float]
-
Expand source code Browse git
def run(self, executor: Executor | None = None) -> list[Timestamp]: assert not (executor is None and self._executor is None) assert not (executor is not None and self._executor is not None) e = executor or self._executor queries = materialize.optbench.sql.parse_from_file( Path( f"misc/python/materialize/optbench/workload/{self._optbench_scenario}.sql" ) ) assert 1 <= self._query <= len(queries) query = queries[self._query - 1] explain_query = materialize.optbench.sql.Query(query).explain(timing=True) explain_output = materialize.optbench.sql.ExplainOutput( e._composition.sql_query(explain_query)[0][0] # type: ignore ) # Optimization time is in microseconds, divide by 3 to get a more readable number (still in wrong unit) timestamps = [0, float(explain_output.optimization_time()) / 3] # type: ignore return timestamps
class OptbenchTPCH (scale: float, mz_version: MzVersion, default_size: int, seed: int)
-
Run optbench TPCH for optimizer benchmarks
Expand source code Browse git
@parameterized_class( [{"QUERY": i} for i in range(1, 23)], class_name_func=name_with_query ) class OptbenchTPCH(Scenario): """Run optbench TPCH for optimizer benchmarks""" QUERY = 1 RELATIVE_THRESHOLD: dict[MeasurementType, float] = { MeasurementType.WALLCLOCK: 0.20, # increased because it's easy to regress MeasurementType.MESSAGES: 0.10, MeasurementType.MEMORY: 0.10, } def init(self) -> list[Action]: return [OptbenchInit("tpch")] def benchmark(self) -> MeasurementSource: return OptbenchRun("tpch", self.QUERY)
Ancestors
Subclasses
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ01
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ02
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ03
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ04
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ05
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ06
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ07
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ08
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ09
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ10
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ11
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ12
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ13
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ14
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ15
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ16
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ17
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ18
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ19
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ20
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ21
- materialize.feature_benchmark.scenarios.optbench.OptbenchTPCHQ22
Class variables
var QUERY
var RELATIVE_THRESHOLD : dict[MeasurementType, float]
Methods
def benchmark(self) ‑> MeasurementSource
-
Expand source code Browse git
def benchmark(self) -> MeasurementSource: return OptbenchRun("tpch", self.QUERY)
def init(self) ‑> list[Action]
-
Expand source code Browse git
def init(self) -> list[Action]: return [OptbenchInit("tpch")]
Inherited members