misc.python.materialize.version_ancestor_overrides
1# Copyright Materialize, Inc. and contributors. All rights reserved. 2# 3# Use of this software is governed by the Business Source License 4# included in the LICENSE file at the root of this repository. 5# 6# As of the Change Date specified in that file, in accordance with 7# the Business Source License, use of this software will be governed 8# by the Apache License, Version 2.0. 9 10 11from __future__ import annotations 12 13from typing import Any 14 15from materialize.mz_version import MzVersion 16 17 18def get_ancestor_overrides_for_performance_regressions( 19 scenario_class: type[Any], scale: str | None 20) -> dict[str, MzVersion]: 21 """ 22 Git revisions that are based on commits listed as keys require at least the version specified in the value. 23 Note that specified versions do not necessarily need to be already published. 24 Commits must be ordered descending by their date. 25 """ 26 27 scenario_class_name = scenario_class.__name__ 28 29 min_ancestor_mz_version_per_commit = dict() 30 31 if scenario_class_name in ("CrossJoin", "AccumulateReductions"): 32 # PR#31501 (Remove ChunkedStack and related) increases latency for inserts 33 min_ancestor_mz_version_per_commit[ 34 "e91f9d5e47f5dddf1d5d1a3afa3c27907bdbb0a7" 35 ] = MzVersion.parse_mz("v0.134.0") 36 37 if scenario_class_name == "ManySmallInserts": 38 # PR#31309 ([adapter] don't block on builtin table write in Session creation) increases latency for inserts 39 min_ancestor_mz_version_per_commit[ 40 "e8c42c65afb7acd55eb7e530a92c89a9165f2e33" 41 ] = MzVersion.parse_mz("v0.133.0") 42 43 if scenario_class_name == "SwapSchema": 44 # PR#30883 (Columnar in logging dataflows) increases Mz memory usage 45 min_ancestor_mz_version_per_commit[ 46 "a077232ffcb76ef7498da7637fbc9e80aa88765c" 47 ] = MzVersion.parse_mz("v0.131.0") 48 49 if scenario_class_name == "FastPathOrderByLimit": 50 # PR#30872 (rust: Upgrade to 1.83.0) increases wallclock 51 min_ancestor_mz_version_per_commit[ 52 "74ebdd68dd2e9ec860837d52866ab9db61a0a49e" 53 ] = MzVersion.parse_mz("v0.129.0") 54 55 if scenario_class_name == "OptbenchTPCHQ01": 56 # PR#30806 ([optimizer] report per-transform metrics) increases wallclock 57 min_ancestor_mz_version_per_commit[ 58 "a5355b2e89fedef9f7a04a96b737f7434a8e3f62" 59 ] = MzVersion.parse_mz("v0.128.0") 60 61 if scenario_class_name in ("KafkaUpsert", "KafkaUpsertUnique", "ParallelIngestion"): 62 # PR#30617 (storage/kafka: use separate consumer for metadata probing) 63 # adds 1s of delay to Kafka source startup 64 min_ancestor_mz_version_per_commit[ 65 "9f7b634e6824f73d0effcdfa86c2b8b1642a4784" 66 ] = MzVersion.parse_mz("v0.127.0") 67 if scenario_class_name == "InsertMultiRow": 68 # PR#30622 (Refactor how we run FoldConstants) increases wallclock 69 min_ancestor_mz_version_per_commit[ 70 "a558d6bdc4b29abf79457eaba52914a0d6c805b7" 71 ] = MzVersion.parse_mz("v0.127.0") 72 if "OptbenchTPCH" in scenario_class_name: 73 # PR#30602 (Replace ColumnKnowledge with EquivalencePropagation) increases wallclock 74 min_ancestor_mz_version_per_commit[ 75 "1bd45336f8335b3487153beb7ce57f6391a7cf9c" 76 ] = MzVersion.parse_mz("v0.126.0") 77 78 if "OptbenchTPCH" in scenario_class_name: 79 # PR#30506 (Remove NonNullable transform) increases wallclock 80 min_ancestor_mz_version_per_commit[ 81 "6981cb35f6a64748293867beb67e74b804f9e723" 82 ] = MzVersion.parse_mz("v0.126.0") 83 84 if scenario_class_name == "KafkaUpsertUnique": 85 # PR#29718 (storage: continual feedback upsert operator) increases CPU and memory 86 min_ancestor_mz_version_per_commit[ 87 "b16b6a2c71f6e52adcbe37988cb262c15074a63f" 88 ] = MzVersion.parse_mz("v0.125.0") 89 90 if scenario_class_name in ( 91 "SmallClusters", 92 "AccumulateReductions", 93 "CreateIndex", 94 "ManySmallUpdates", 95 "FastPathOrderByLimit", 96 "FastPathFilterIndex", 97 "ParallelIngestion", 98 "SubscribeParallelTableWithIndex", 99 "DeltaJoinMaintained", 100 "Update", 101 "Retraction", 102 ): 103 # PR#28307 (Render regions for object build and let bindings) increases messages 104 min_ancestor_mz_version_per_commit[ 105 "ffcafa5b5c3e83845a868cf6103048c045b4f155" 106 ] = MzVersion.parse_mz("v0.113.0") 107 108 if "OptbenchTPCH" in scenario_class_name: 109 # PR#28664 (Introduce MirScalarExpr::reduce_safely) increases wallclock 110 min_ancestor_mz_version_per_commit[ 111 "0a570022e1b78a205d5d9d3ebcb640b714e738c2" 112 ] = MzVersion.parse_mz("v0.111.0") 113 114 if scenario_class_name in {"OptbenchTPCHQ02", "OptbenchTPCHQ18", "OptbenchTPCHQ21"}: 115 # PR#28566 (Incorporate non-null information, and prevent its deletion) increased wallclock 116 min_ancestor_mz_version_per_commit[ 117 "45d78090f8fea353dbdff9f1b2de463d475fabc3" 118 ] = MzVersion.parse_mz("v0.111.0") 119 120 if scenario_class_name == "ManyKafkaSourcesOnSameCluster": 121 # PR#28359 (Reapply "storage: wire up new reclock implementation") increased wallclock 122 min_ancestor_mz_version_per_commit[ 123 "1937ca8b444a919e3077843980c97d61fc072252" 124 ] = MzVersion.parse_mz("v0.110.0") 125 126 if scenario_class_name == "ManyKafkaSourcesOnSameCluster": 127 # PR#28228 (storage/kafka: round-robin partition/worker assignment) increased wallclock 128 min_ancestor_mz_version_per_commit[ 129 "256e1f839ba5243293e738bcd78d0f36c1be8f3e" 130 ] = MzVersion.parse_mz("v0.109.0") 131 132 if scenario_class_name == "MinMax": 133 # PR#27988 (adapter: always declare MV imports non-monotonic) increased wallclock and memory 134 min_ancestor_mz_version_per_commit[ 135 "c18aa43828a7d2e9527151a0251c1f75a06d1469" 136 ] = MzVersion.parse_mz("v0.108.0") 137 138 if scenario_class_name == "AccumulateReductions": 139 # PR#26807 (compute: hydration status based on output frontiers) increased messages 140 min_ancestor_mz_version_per_commit[ 141 "be0e50041169a5cac80c033b083c920b067d049f" 142 ] = MzVersion.parse_mz("v0.106.0") 143 144 if scenario_class_name == "SwapSchema": 145 # PR#27607 (catalog: Listen for updates in transactions) increased wallclock 146 min_ancestor_mz_version_per_commit[ 147 "eef900de75d25fe854524dff9feeed8057e4bf79" 148 ] = MzVersion.parse_mz("v0.105.0") 149 150 if scenario_class_name == "MySqlInitialLoad": 151 # PR#27058 (storage: wire up new reclock implementation) increased memory usage 152 min_ancestor_mz_version_per_commit[ 153 "10abb1cca257ffc3d605c99ed961e037bbf3fa51" 154 ] = MzVersion.parse_mz("v0.103.0") 155 156 if "OptbenchTPCH" in scenario_class_name: 157 # PR#26652 (explain: fix tracing fast path regression) significantly increased wallclock for OptbenchTPCH 158 min_ancestor_mz_version_per_commit[ 159 "96c22562745f59010860bd825de5b4007a172c70" 160 ] = MzVersion.parse_mz("v0.97.0") 161 # PR#24155 (equivalence propagation) significantly increased wallclock for OptbenchTPCH 162 min_ancestor_mz_version_per_commit[ 163 "3cfaa8207faa7df087942cd44311a3e7b4534c25" 164 ] = MzVersion.parse_mz("v0.92.0") 165 166 if scenario_class_name == "FastPathFilterNoIndex": 167 # PR#26084 (Optimize OffsetList) increased wallclock 168 min_ancestor_mz_version_per_commit[ 169 "2abcd90ac3201b0235ea41c5db81bdd931a0fda0" 170 ] = MzVersion.parse_mz("v0.96.0") 171 172 if scenario_class_name == "ParallelDataflows": 173 # PR#26020 (Stage flatmap execution to consolidate as it goes) significantly increased wallclock 174 min_ancestor_mz_version_per_commit[ 175 "da35946d636607a11fa27d5a8ea6e9939bf9525e" 176 ] = MzVersion.parse_mz("v0.93.0") 177 178 # add legacy entries 179 min_ancestor_mz_version_per_commit.update( 180 { 181 # insert newer commits at the top 182 # PR#25502 (JoinFusion across MFPs) increased number of messages 183 "62ea182963be5b956e13115b8ad39f7835fc4351": MzVersion.parse_mz("v0.91.0"), 184 # PR#24906 (Compute operator hydration status logging) increased number of messages against v0.88.1 185 "067ae870eef724f7eb5851b5745b9ff52b881481": MzVersion.parse_mz("v0.89.0"), 186 # PR#24918 (txn-wal: switch to a new operator protocol for lazy) increased number of messages against v0.86.1 (but got reverted in 0.87.1) 187 "b648576b52b8ba9bb3a4732f7022ab5c06ebed32": MzVersion.parse_mz("v0.87.0"), 188 # PR#23659 (txn-wal: enable in CI with "eager uppers") introduces regressions against v0.79.0 189 "c4f520a57a3046e5074939d2ea345d1c72be7079": MzVersion.parse_mz("v0.80.0"), 190 # PR#23421 (coord: smorgasbord of improvements for the crdb-backed timestamp oracle) introduces regressions against 0.78.13 191 "5179ebd39aea4867622357a832aaddcde951b411": MzVersion.parse_mz("v0.79.0"), 192 # insert newer commits at the top 193 } 194 ) 195 196 return min_ancestor_mz_version_per_commit 197 198 199_MIN_ANCESTOR_MZ_VERSION_PER_COMMIT_TO_ACCOUNT_FOR_SCALABILITY_REGRESSIONS: dict[ 200 str, MzVersion 201] = { 202 # insert newer commits at the top 203 # PR#31309 ([adapter] don't block on builtin table write in Session creation) increases latency for inserts 204 "e8c42c65afb7acd55eb7e530a92c89a9165f2e33": MzVersion.parse_mz("v0.133.0"), 205 # PR#30238 (adapter: Remove the global write lock) introduces regressions against v0.123.0 206 "98678454a334a470ceea46b126586c7e60a0d8a5": MzVersion.parse_mz("v0.124.0"), 207 # PR#28307 (Render regions for object build and let bindings) introduces regressions against v0.112.0 208 "ffcafa5b5c3e83845a868cf6103048c045b4f155": MzVersion.parse_mz("v0.113.0"), 209 # PR#23659 (txn-wal: enable in CI with "eager uppers") introduces regressions against v0.79.0 210 "c4f520a57a3046e5074939d2ea345d1c72be7079": MzVersion.parse_mz("v0.80.0"), 211 # PR#23421 (coord: smorgasbord of improvements for the crdb-backed timestamp oracle) introduces regressions against 0.78.13 212 "5179ebd39aea4867622357a832aaddcde951b411": MzVersion.parse_mz("v0.79.0"), 213 # insert newer commits at the top 214} 215""" 216Git revisions that are based on commits listed as keys require at least the version specified in the value. 217Note that specified versions do not necessarily need to be already published. 218Commits must be ordered descending by their date. 219""" 220 221_MIN_ANCESTOR_MZ_VERSION_PER_COMMIT_TO_ACCOUNT_FOR_CORRECTNESS_REGRESSIONS: dict[ 222 str, MzVersion 223] = { 224 # insert newer commits at the top 225 # PR#29179: Add client_address to session 226 "deb8beb77ddb69895aad899cf2eab90a0a78585d": MzVersion.parse_mz("v0.118.0"), 227 # PR#24497 (Make sure variance never returns a negative number) changes DFR or CTF handling compared to v0.84.0 228 "82a5130a8466525c5b3bdb3eff845c7c34585774": MzVersion.parse_mz("v0.85.0"), 229} 230""" 231See: #_MIN_ANCESTOR_MZ_VERSION_PER_COMMIT_TO_ACCOUNT_FOR_PERFORMANCE_REGRESSIONS 232""" 233 234ANCESTOR_OVERRIDES_FOR_SCALABILITY_REGRESSIONS = ( 235 _MIN_ANCESTOR_MZ_VERSION_PER_COMMIT_TO_ACCOUNT_FOR_SCALABILITY_REGRESSIONS 236) 237ANCESTOR_OVERRIDES_FOR_CORRECTNESS_REGRESSIONS = ( 238 _MIN_ANCESTOR_MZ_VERSION_PER_COMMIT_TO_ACCOUNT_FOR_CORRECTNESS_REGRESSIONS 239)
def
get_ancestor_overrides_for_performance_regressions( scenario_class: type[typing.Any], scale: str | None) -> dict[str, materialize.mz_version.MzVersion]:
19def get_ancestor_overrides_for_performance_regressions( 20 scenario_class: type[Any], scale: str | None 21) -> dict[str, MzVersion]: 22 """ 23 Git revisions that are based on commits listed as keys require at least the version specified in the value. 24 Note that specified versions do not necessarily need to be already published. 25 Commits must be ordered descending by their date. 26 """ 27 28 scenario_class_name = scenario_class.__name__ 29 30 min_ancestor_mz_version_per_commit = dict() 31 32 if scenario_class_name in ("CrossJoin", "AccumulateReductions"): 33 # PR#31501 (Remove ChunkedStack and related) increases latency for inserts 34 min_ancestor_mz_version_per_commit[ 35 "e91f9d5e47f5dddf1d5d1a3afa3c27907bdbb0a7" 36 ] = MzVersion.parse_mz("v0.134.0") 37 38 if scenario_class_name == "ManySmallInserts": 39 # PR#31309 ([adapter] don't block on builtin table write in Session creation) increases latency for inserts 40 min_ancestor_mz_version_per_commit[ 41 "e8c42c65afb7acd55eb7e530a92c89a9165f2e33" 42 ] = MzVersion.parse_mz("v0.133.0") 43 44 if scenario_class_name == "SwapSchema": 45 # PR#30883 (Columnar in logging dataflows) increases Mz memory usage 46 min_ancestor_mz_version_per_commit[ 47 "a077232ffcb76ef7498da7637fbc9e80aa88765c" 48 ] = MzVersion.parse_mz("v0.131.0") 49 50 if scenario_class_name == "FastPathOrderByLimit": 51 # PR#30872 (rust: Upgrade to 1.83.0) increases wallclock 52 min_ancestor_mz_version_per_commit[ 53 "74ebdd68dd2e9ec860837d52866ab9db61a0a49e" 54 ] = MzVersion.parse_mz("v0.129.0") 55 56 if scenario_class_name == "OptbenchTPCHQ01": 57 # PR#30806 ([optimizer] report per-transform metrics) increases wallclock 58 min_ancestor_mz_version_per_commit[ 59 "a5355b2e89fedef9f7a04a96b737f7434a8e3f62" 60 ] = MzVersion.parse_mz("v0.128.0") 61 62 if scenario_class_name in ("KafkaUpsert", "KafkaUpsertUnique", "ParallelIngestion"): 63 # PR#30617 (storage/kafka: use separate consumer for metadata probing) 64 # adds 1s of delay to Kafka source startup 65 min_ancestor_mz_version_per_commit[ 66 "9f7b634e6824f73d0effcdfa86c2b8b1642a4784" 67 ] = MzVersion.parse_mz("v0.127.0") 68 if scenario_class_name == "InsertMultiRow": 69 # PR#30622 (Refactor how we run FoldConstants) increases wallclock 70 min_ancestor_mz_version_per_commit[ 71 "a558d6bdc4b29abf79457eaba52914a0d6c805b7" 72 ] = MzVersion.parse_mz("v0.127.0") 73 if "OptbenchTPCH" in scenario_class_name: 74 # PR#30602 (Replace ColumnKnowledge with EquivalencePropagation) increases wallclock 75 min_ancestor_mz_version_per_commit[ 76 "1bd45336f8335b3487153beb7ce57f6391a7cf9c" 77 ] = MzVersion.parse_mz("v0.126.0") 78 79 if "OptbenchTPCH" in scenario_class_name: 80 # PR#30506 (Remove NonNullable transform) increases wallclock 81 min_ancestor_mz_version_per_commit[ 82 "6981cb35f6a64748293867beb67e74b804f9e723" 83 ] = MzVersion.parse_mz("v0.126.0") 84 85 if scenario_class_name == "KafkaUpsertUnique": 86 # PR#29718 (storage: continual feedback upsert operator) increases CPU and memory 87 min_ancestor_mz_version_per_commit[ 88 "b16b6a2c71f6e52adcbe37988cb262c15074a63f" 89 ] = MzVersion.parse_mz("v0.125.0") 90 91 if scenario_class_name in ( 92 "SmallClusters", 93 "AccumulateReductions", 94 "CreateIndex", 95 "ManySmallUpdates", 96 "FastPathOrderByLimit", 97 "FastPathFilterIndex", 98 "ParallelIngestion", 99 "SubscribeParallelTableWithIndex", 100 "DeltaJoinMaintained", 101 "Update", 102 "Retraction", 103 ): 104 # PR#28307 (Render regions for object build and let bindings) increases messages 105 min_ancestor_mz_version_per_commit[ 106 "ffcafa5b5c3e83845a868cf6103048c045b4f155" 107 ] = MzVersion.parse_mz("v0.113.0") 108 109 if "OptbenchTPCH" in scenario_class_name: 110 # PR#28664 (Introduce MirScalarExpr::reduce_safely) increases wallclock 111 min_ancestor_mz_version_per_commit[ 112 "0a570022e1b78a205d5d9d3ebcb640b714e738c2" 113 ] = MzVersion.parse_mz("v0.111.0") 114 115 if scenario_class_name in {"OptbenchTPCHQ02", "OptbenchTPCHQ18", "OptbenchTPCHQ21"}: 116 # PR#28566 (Incorporate non-null information, and prevent its deletion) increased wallclock 117 min_ancestor_mz_version_per_commit[ 118 "45d78090f8fea353dbdff9f1b2de463d475fabc3" 119 ] = MzVersion.parse_mz("v0.111.0") 120 121 if scenario_class_name == "ManyKafkaSourcesOnSameCluster": 122 # PR#28359 (Reapply "storage: wire up new reclock implementation") increased wallclock 123 min_ancestor_mz_version_per_commit[ 124 "1937ca8b444a919e3077843980c97d61fc072252" 125 ] = MzVersion.parse_mz("v0.110.0") 126 127 if scenario_class_name == "ManyKafkaSourcesOnSameCluster": 128 # PR#28228 (storage/kafka: round-robin partition/worker assignment) increased wallclock 129 min_ancestor_mz_version_per_commit[ 130 "256e1f839ba5243293e738bcd78d0f36c1be8f3e" 131 ] = MzVersion.parse_mz("v0.109.0") 132 133 if scenario_class_name == "MinMax": 134 # PR#27988 (adapter: always declare MV imports non-monotonic) increased wallclock and memory 135 min_ancestor_mz_version_per_commit[ 136 "c18aa43828a7d2e9527151a0251c1f75a06d1469" 137 ] = MzVersion.parse_mz("v0.108.0") 138 139 if scenario_class_name == "AccumulateReductions": 140 # PR#26807 (compute: hydration status based on output frontiers) increased messages 141 min_ancestor_mz_version_per_commit[ 142 "be0e50041169a5cac80c033b083c920b067d049f" 143 ] = MzVersion.parse_mz("v0.106.0") 144 145 if scenario_class_name == "SwapSchema": 146 # PR#27607 (catalog: Listen for updates in transactions) increased wallclock 147 min_ancestor_mz_version_per_commit[ 148 "eef900de75d25fe854524dff9feeed8057e4bf79" 149 ] = MzVersion.parse_mz("v0.105.0") 150 151 if scenario_class_name == "MySqlInitialLoad": 152 # PR#27058 (storage: wire up new reclock implementation) increased memory usage 153 min_ancestor_mz_version_per_commit[ 154 "10abb1cca257ffc3d605c99ed961e037bbf3fa51" 155 ] = MzVersion.parse_mz("v0.103.0") 156 157 if "OptbenchTPCH" in scenario_class_name: 158 # PR#26652 (explain: fix tracing fast path regression) significantly increased wallclock for OptbenchTPCH 159 min_ancestor_mz_version_per_commit[ 160 "96c22562745f59010860bd825de5b4007a172c70" 161 ] = MzVersion.parse_mz("v0.97.0") 162 # PR#24155 (equivalence propagation) significantly increased wallclock for OptbenchTPCH 163 min_ancestor_mz_version_per_commit[ 164 "3cfaa8207faa7df087942cd44311a3e7b4534c25" 165 ] = MzVersion.parse_mz("v0.92.0") 166 167 if scenario_class_name == "FastPathFilterNoIndex": 168 # PR#26084 (Optimize OffsetList) increased wallclock 169 min_ancestor_mz_version_per_commit[ 170 "2abcd90ac3201b0235ea41c5db81bdd931a0fda0" 171 ] = MzVersion.parse_mz("v0.96.0") 172 173 if scenario_class_name == "ParallelDataflows": 174 # PR#26020 (Stage flatmap execution to consolidate as it goes) significantly increased wallclock 175 min_ancestor_mz_version_per_commit[ 176 "da35946d636607a11fa27d5a8ea6e9939bf9525e" 177 ] = MzVersion.parse_mz("v0.93.0") 178 179 # add legacy entries 180 min_ancestor_mz_version_per_commit.update( 181 { 182 # insert newer commits at the top 183 # PR#25502 (JoinFusion across MFPs) increased number of messages 184 "62ea182963be5b956e13115b8ad39f7835fc4351": MzVersion.parse_mz("v0.91.0"), 185 # PR#24906 (Compute operator hydration status logging) increased number of messages against v0.88.1 186 "067ae870eef724f7eb5851b5745b9ff52b881481": MzVersion.parse_mz("v0.89.0"), 187 # PR#24918 (txn-wal: switch to a new operator protocol for lazy) increased number of messages against v0.86.1 (but got reverted in 0.87.1) 188 "b648576b52b8ba9bb3a4732f7022ab5c06ebed32": MzVersion.parse_mz("v0.87.0"), 189 # PR#23659 (txn-wal: enable in CI with "eager uppers") introduces regressions against v0.79.0 190 "c4f520a57a3046e5074939d2ea345d1c72be7079": MzVersion.parse_mz("v0.80.0"), 191 # PR#23421 (coord: smorgasbord of improvements for the crdb-backed timestamp oracle) introduces regressions against 0.78.13 192 "5179ebd39aea4867622357a832aaddcde951b411": MzVersion.parse_mz("v0.79.0"), 193 # insert newer commits at the top 194 } 195 ) 196 197 return min_ancestor_mz_version_per_commit
Git revisions that are based on commits listed as keys require at least the version specified in the value. Note that specified versions do not necessarily need to be already published. Commits must be ordered descending by their date.
ANCESTOR_OVERRIDES_FOR_SCALABILITY_REGRESSIONS =
{'e8c42c65afb7acd55eb7e530a92c89a9165f2e33': MzVersion(major=0, minor=133, patch=0, prerelease=None, build=None), '98678454a334a470ceea46b126586c7e60a0d8a5': MzVersion(major=0, minor=124, patch=0, prerelease=None, build=None), 'ffcafa5b5c3e83845a868cf6103048c045b4f155': MzVersion(major=0, minor=113, patch=0, prerelease=None, build=None), 'c4f520a57a3046e5074939d2ea345d1c72be7079': MzVersion(major=0, minor=80, patch=0, prerelease=None, build=None), '5179ebd39aea4867622357a832aaddcde951b411': MzVersion(major=0, minor=79, patch=0, prerelease=None, build=None)}
ANCESTOR_OVERRIDES_FOR_CORRECTNESS_REGRESSIONS =
{'deb8beb77ddb69895aad899cf2eab90a0a78585d': MzVersion(major=0, minor=118, patch=0, prerelease=None, build=None), '82a5130a8466525c5b3bdb3eff845c7c34585774': MzVersion(major=0, minor=85, patch=0, prerelease=None, build=None)}