misc.python.materialize.xcompile

Support for cross-compiling to Linux.

  1# Copyright Materialize, Inc. and contributors. All rights reserved.
  2#
  3# Use of this software is governed by the Business Source License
  4# included in the LICENSE file at the root of this repository.
  5#
  6# As of the Change Date specified in that file, in accordance with
  7# the Business Source License, use of this software will be governed
  8# by the Apache License, Version 2.0.
  9
 10"""Support for cross-compiling to Linux."""
 11
 12import os
 13import platform
 14import sys
 15from enum import Enum
 16
 17from materialize import MZ_ROOT, spawn
 18from materialize.rustc_flags import Sanitizer
 19
 20
 21class Arch(Enum):
 22    """A CPU architecture."""
 23
 24    X86_64 = "x86_64"
 25    """The 64-bit x86 architecture."""
 26
 27    AARCH64 = "aarch64"
 28    """The 64-bit ARM architecture."""
 29
 30    def __str__(self) -> str:
 31        return self.value
 32
 33    def go_str(self) -> str:
 34        """Return the architecture name in Go nomenclature: amd64 or arm64."""
 35        if self == Arch.X86_64:
 36            return "amd64"
 37        elif self == Arch.AARCH64:
 38            return "arm64"
 39        else:
 40            raise RuntimeError("unreachable")
 41
 42    @staticmethod
 43    def host() -> "Arch":
 44        if platform.machine() == "x86_64":
 45            return Arch.X86_64
 46        elif platform.machine() in ["aarch64", "arm64"]:
 47            return Arch.AARCH64
 48        else:
 49            raise RuntimeError(f"unknown host architecture {platform.machine()}")
 50
 51
 52def target(arch: Arch) -> str:
 53    """Construct a Linux target triple for the specified architecture."""
 54    return f"{arch}-unknown-linux-gnu"
 55
 56
 57def target_cpu(arch: Arch) -> str:
 58    """
 59    Return the CPU micro architecture, assuming a Linux target, we should use for Rust compilation.
 60
 61    Sync: This target-cpu should be kept in sync with the one in ci-builder and .cargo/config.
 62    """
 63    if arch == Arch.X86_64:
 64        return "x86-64-v3"
 65    elif arch == Arch.AARCH64:
 66        return "neoverse-n1"
 67    else:
 68        raise RuntimeError("unreachable")
 69
 70
 71def target_features(arch: Arch) -> list[str]:
 72    """
 73    Returns a list of CPU features we should enable for Rust compilation.
 74
 75    Note: We also specify the CPU target when compiling Rust which should enable the majority of
 76    available CPU features.
 77
 78    Sync: This list of features should be kept in sync with the one in ci-builder and .cargo/config.
 79    """
 80    if arch == Arch.X86_64:
 81        return ["+aes", "+pclmulqdq"]
 82    elif arch == Arch.AARCH64:
 83        return ["+aes", "+sha2"]
 84    else:
 85        raise RuntimeError("unreachable")
 86
 87
 88def bazel(
 89    arch: Arch,
 90    subcommand: str,
 91    rustflags: list[str],
 92    extra_env: dict[str, str] = {},
 93) -> list[str]:
 94    """Construct a Bazel invocation for cross compiling.
 95
 96    Args:
 97        arch: The CPU architecture to build for.
 98        subcommand: The Bazel subcommand to invoke.
 99        rustflags: Override the flags passed to the Rust compiler. If the list
100            is empty, the default flags are used.
101        extra_env: Extra environment variables to set for the execution of
102            Bazel.
103        is_tagged_build: Should this build be stamped with release info.
104    """
105    # Note: Unlike `cargo`, Bazel does not use CI_BUILDER and all of the cross
106    # compilation is handled at a higher level.
107
108    platform = f"--platforms=@toolchains_llvm//platforms:linux-{str(arch)}"
109    assert not (
110        sys.platform == "darwin" and arch == Arch.X86_64
111    ), "cross compiling to Linux x86_64 is not supported from macOS"
112
113    bazel_flags = ["--config=linux"]
114
115    rustc_flags = [
116        f"--@rules_rust//:extra_rustc_flag={flag}"
117        for flag in rustflags
118        # We apply `tokio_unstable` at the `WORKSPACE` level so skip it here to
119        # prevent changing the compile options and possibly missing cache hits.
120        if "tokio_unstable" not in flag
121    ]
122
123    return ["bazel", subcommand, platform, *bazel_flags, *rustc_flags]
124
125
126def cargo(
127    arch: Arch,
128    subcommand: str,
129    rustflags: list[str],
130    channel: str | None = None,
131    extra_env: dict[str, str] = {},
132) -> list[str]:
133    """Construct a Cargo invocation for cross compiling.
134
135    Args:
136        arch: The CPU architecture to build for.
137        subcommand: The Cargo subcommand to invoke.
138        rustflags: Override the flags passed to the Rust compiler. If the list
139            is empty, the default flags are used.
140        channel: The Rust toolchain channel to use. Either None/"stable" or "nightly".
141
142    Returns:
143        A list of arguments specifying the beginning of the command to invoke.
144    """
145    _target = target(arch)
146    _target_env = _target.upper().replace("-", "_")
147
148    env = {
149        **extra_env,
150    }
151
152    rustflags += [
153        "-Clink-arg=-Wl,--compress-debug-sections=zlib",
154        "-Clink-arg=-Wl,-O3",
155        "-Csymbol-mangling-version=v0",
156        "--cfg=tokio_unstable",
157    ]
158
159    if sys.platform == "darwin":
160        _bootstrap_darwin(arch)
161        lld_prefix = spawn.capture(["brew", "--prefix", "lld"]).strip()
162        sysroot = spawn.capture([f"{_target}-cc", "-print-sysroot"]).strip()
163        rustflags += [
164            f"-L{sysroot}/lib",
165            "-Clink-arg=-fuse-ld=lld",
166            f"-Clink-arg=-B{lld_prefix}/bin",
167        ]
168        env.update(
169            {
170                "CMAKE_SYSTEM_NAME": "Linux",
171                f"CARGO_TARGET_{_target_env}_LINKER": f"{_target}-cc",
172                "CARGO_TARGET_DIR": str(MZ_ROOT / "target-xcompile"),
173                "TARGET_AR": f"{_target}-ar",
174                "TARGET_CPP": f"{_target}-cpp",
175                "TARGET_CC": f"{_target}-cc",
176                "TARGET_CXX": f"{_target}-c++",
177                "TARGET_CXXSTDLIB": "static=stdc++",
178                "TARGET_LD": f"{_target}-ld",
179                "TARGET_RANLIB": f"{_target}-ranlib",
180            }
181        )
182    else:
183        # NOTE(benesch): The required Rust flags have to be duplicated with
184        # their definitions in ci/builder/Dockerfile because `rustc` has no way
185        # to merge together Rust flags from different sources.
186        rustflags += [
187            "-Clink-arg=-fuse-ld=lld",
188            f"-L/opt/x-tools/{_target}/{_target}/sysroot/lib",
189        ]
190
191    env.update({"RUSTFLAGS": " ".join(rustflags)})
192
193    return [
194        *_enter_builder(arch, channel),
195        "env",
196        *(f"{k}={v}" for k, v in env.items()),
197        "cargo",
198        subcommand,
199        "--target",
200        _target,
201    ]
202
203
204def tool(
205    arch: Arch, name: str, channel: str | None = None, prefix_name: bool = True
206) -> list[str]:
207    """Constructs a cross-compiling binutils tool invocation.
208
209    Args:
210        arch: The CPU architecture to build for.
211        name: The name of the binutils tool to invoke.
212        channel: The Rust toolchain channel to use. Either None/"stable" or "nightly".
213        prefix_name: Whether or not the tool name should be prefixed with the target
214            architecture.
215
216    Returns:
217        A list of arguments specifying the beginning of the command to invoke.
218    """
219    if sys.platform == "darwin":
220        _bootstrap_darwin(arch)
221    tool_name = f"{target(arch)}-{name}" if prefix_name else name
222    return [
223        *_enter_builder(arch, channel),
224        tool_name,
225    ]
226
227
228def _enter_builder(arch: Arch, channel: str | None = None) -> list[str]:
229    if "MZ_DEV_CI_BUILDER" in os.environ or sys.platform == "darwin":
230        return []
231    else:
232        default_channel = (
233            "stable"
234            if Sanitizer[os.getenv("CI_SANITIZER", "none")] == Sanitizer.none
235            else "nightly"
236        )
237        return [
238            "env",
239            f"MZ_DEV_CI_BUILDER_ARCH={arch}",
240            "bin/ci-builder",
241            "run",
242            channel if channel else default_channel,
243        ]
244
245
246def _bootstrap_darwin(arch: Arch) -> None:
247    # Building in Docker for Mac is painfully slow, so we install a
248    # cross-compiling toolchain on the host and use that instead.
249
250    BOOTSTRAP_VERSION = "5"
251    BOOTSTRAP_FILE = MZ_ROOT / "target-xcompile" / target(arch) / ".xcompile-bootstrap"
252    try:
253        contents = BOOTSTRAP_FILE.read_text()
254    except FileNotFoundError:
255        contents = ""
256    if contents == BOOTSTRAP_VERSION:
257        return
258
259    spawn.runv(["brew", "install", "lld", f"materializeinc/crosstools/{target(arch)}"])
260    spawn.runv(["rustup", "target", "add", target(arch)])
261
262    BOOTSTRAP_FILE.parent.mkdir(parents=True, exist_ok=True)
263    BOOTSTRAP_FILE.write_text(BOOTSTRAP_VERSION)
class Arch(enum.Enum):
22class Arch(Enum):
23    """A CPU architecture."""
24
25    X86_64 = "x86_64"
26    """The 64-bit x86 architecture."""
27
28    AARCH64 = "aarch64"
29    """The 64-bit ARM architecture."""
30
31    def __str__(self) -> str:
32        return self.value
33
34    def go_str(self) -> str:
35        """Return the architecture name in Go nomenclature: amd64 or arm64."""
36        if self == Arch.X86_64:
37            return "amd64"
38        elif self == Arch.AARCH64:
39            return "arm64"
40        else:
41            raise RuntimeError("unreachable")
42
43    @staticmethod
44    def host() -> "Arch":
45        if platform.machine() == "x86_64":
46            return Arch.X86_64
47        elif platform.machine() in ["aarch64", "arm64"]:
48            return Arch.AARCH64
49        else:
50            raise RuntimeError(f"unknown host architecture {platform.machine()}")

A CPU architecture.

X86_64 = <Arch.X86_64: 'x86_64'>

The 64-bit x86 architecture.

AARCH64 = <Arch.AARCH64: 'aarch64'>

The 64-bit ARM architecture.

def go_str(self) -> str:
34    def go_str(self) -> str:
35        """Return the architecture name in Go nomenclature: amd64 or arm64."""
36        if self == Arch.X86_64:
37            return "amd64"
38        elif self == Arch.AARCH64:
39            return "arm64"
40        else:
41            raise RuntimeError("unreachable")

Return the architecture name in Go nomenclature: amd64 or arm64.

@staticmethod
def host() -> Arch:
43    @staticmethod
44    def host() -> "Arch":
45        if platform.machine() == "x86_64":
46            return Arch.X86_64
47        elif platform.machine() in ["aarch64", "arm64"]:
48            return Arch.AARCH64
49        else:
50            raise RuntimeError(f"unknown host architecture {platform.machine()}")
def target(arch: Arch) -> str:
53def target(arch: Arch) -> str:
54    """Construct a Linux target triple for the specified architecture."""
55    return f"{arch}-unknown-linux-gnu"

Construct a Linux target triple for the specified architecture.

def target_cpu(arch: Arch) -> str:
58def target_cpu(arch: Arch) -> str:
59    """
60    Return the CPU micro architecture, assuming a Linux target, we should use for Rust compilation.
61
62    Sync: This target-cpu should be kept in sync with the one in ci-builder and .cargo/config.
63    """
64    if arch == Arch.X86_64:
65        return "x86-64-v3"
66    elif arch == Arch.AARCH64:
67        return "neoverse-n1"
68    else:
69        raise RuntimeError("unreachable")

Return the CPU micro architecture, assuming a Linux target, we should use for Rust compilation.

Sync: This target-cpu should be kept in sync with the one in ci-builder and .cargo/config.

def target_features(arch: Arch) -> list[str]:
72def target_features(arch: Arch) -> list[str]:
73    """
74    Returns a list of CPU features we should enable for Rust compilation.
75
76    Note: We also specify the CPU target when compiling Rust which should enable the majority of
77    available CPU features.
78
79    Sync: This list of features should be kept in sync with the one in ci-builder and .cargo/config.
80    """
81    if arch == Arch.X86_64:
82        return ["+aes", "+pclmulqdq"]
83    elif arch == Arch.AARCH64:
84        return ["+aes", "+sha2"]
85    else:
86        raise RuntimeError("unreachable")

Returns a list of CPU features we should enable for Rust compilation.

Note: We also specify the CPU target when compiling Rust which should enable the majority of available CPU features.

Sync: This list of features should be kept in sync with the one in ci-builder and .cargo/config.

def bazel( arch: Arch, subcommand: str, rustflags: list[str], extra_env: dict[str, str] = {}) -> list[str]:
 89def bazel(
 90    arch: Arch,
 91    subcommand: str,
 92    rustflags: list[str],
 93    extra_env: dict[str, str] = {},
 94) -> list[str]:
 95    """Construct a Bazel invocation for cross compiling.
 96
 97    Args:
 98        arch: The CPU architecture to build for.
 99        subcommand: The Bazel subcommand to invoke.
100        rustflags: Override the flags passed to the Rust compiler. If the list
101            is empty, the default flags are used.
102        extra_env: Extra environment variables to set for the execution of
103            Bazel.
104        is_tagged_build: Should this build be stamped with release info.
105    """
106    # Note: Unlike `cargo`, Bazel does not use CI_BUILDER and all of the cross
107    # compilation is handled at a higher level.
108
109    platform = f"--platforms=@toolchains_llvm//platforms:linux-{str(arch)}"
110    assert not (
111        sys.platform == "darwin" and arch == Arch.X86_64
112    ), "cross compiling to Linux x86_64 is not supported from macOS"
113
114    bazel_flags = ["--config=linux"]
115
116    rustc_flags = [
117        f"--@rules_rust//:extra_rustc_flag={flag}"
118        for flag in rustflags
119        # We apply `tokio_unstable` at the `WORKSPACE` level so skip it here to
120        # prevent changing the compile options and possibly missing cache hits.
121        if "tokio_unstable" not in flag
122    ]
123
124    return ["bazel", subcommand, platform, *bazel_flags, *rustc_flags]

Construct a Bazel invocation for cross compiling.

Args: arch: The CPU architecture to build for. subcommand: The Bazel subcommand to invoke. rustflags: Override the flags passed to the Rust compiler. If the list is empty, the default flags are used. extra_env: Extra environment variables to set for the execution of Bazel. is_tagged_build: Should this build be stamped with release info.

def cargo( arch: Arch, subcommand: str, rustflags: list[str], channel: str | None = None, extra_env: dict[str, str] = {}) -> list[str]:
127def cargo(
128    arch: Arch,
129    subcommand: str,
130    rustflags: list[str],
131    channel: str | None = None,
132    extra_env: dict[str, str] = {},
133) -> list[str]:
134    """Construct a Cargo invocation for cross compiling.
135
136    Args:
137        arch: The CPU architecture to build for.
138        subcommand: The Cargo subcommand to invoke.
139        rustflags: Override the flags passed to the Rust compiler. If the list
140            is empty, the default flags are used.
141        channel: The Rust toolchain channel to use. Either None/"stable" or "nightly".
142
143    Returns:
144        A list of arguments specifying the beginning of the command to invoke.
145    """
146    _target = target(arch)
147    _target_env = _target.upper().replace("-", "_")
148
149    env = {
150        **extra_env,
151    }
152
153    rustflags += [
154        "-Clink-arg=-Wl,--compress-debug-sections=zlib",
155        "-Clink-arg=-Wl,-O3",
156        "-Csymbol-mangling-version=v0",
157        "--cfg=tokio_unstable",
158    ]
159
160    if sys.platform == "darwin":
161        _bootstrap_darwin(arch)
162        lld_prefix = spawn.capture(["brew", "--prefix", "lld"]).strip()
163        sysroot = spawn.capture([f"{_target}-cc", "-print-sysroot"]).strip()
164        rustflags += [
165            f"-L{sysroot}/lib",
166            "-Clink-arg=-fuse-ld=lld",
167            f"-Clink-arg=-B{lld_prefix}/bin",
168        ]
169        env.update(
170            {
171                "CMAKE_SYSTEM_NAME": "Linux",
172                f"CARGO_TARGET_{_target_env}_LINKER": f"{_target}-cc",
173                "CARGO_TARGET_DIR": str(MZ_ROOT / "target-xcompile"),
174                "TARGET_AR": f"{_target}-ar",
175                "TARGET_CPP": f"{_target}-cpp",
176                "TARGET_CC": f"{_target}-cc",
177                "TARGET_CXX": f"{_target}-c++",
178                "TARGET_CXXSTDLIB": "static=stdc++",
179                "TARGET_LD": f"{_target}-ld",
180                "TARGET_RANLIB": f"{_target}-ranlib",
181            }
182        )
183    else:
184        # NOTE(benesch): The required Rust flags have to be duplicated with
185        # their definitions in ci/builder/Dockerfile because `rustc` has no way
186        # to merge together Rust flags from different sources.
187        rustflags += [
188            "-Clink-arg=-fuse-ld=lld",
189            f"-L/opt/x-tools/{_target}/{_target}/sysroot/lib",
190        ]
191
192    env.update({"RUSTFLAGS": " ".join(rustflags)})
193
194    return [
195        *_enter_builder(arch, channel),
196        "env",
197        *(f"{k}={v}" for k, v in env.items()),
198        "cargo",
199        subcommand,
200        "--target",
201        _target,
202    ]

Construct a Cargo invocation for cross compiling.

Args: arch: The CPU architecture to build for. subcommand: The Cargo subcommand to invoke. rustflags: Override the flags passed to the Rust compiler. If the list is empty, the default flags are used. channel: The Rust toolchain channel to use. Either None/"stable" or "nightly".

Returns: A list of arguments specifying the beginning of the command to invoke.

def tool( arch: Arch, name: str, channel: str | None = None, prefix_name: bool = True) -> list[str]:
205def tool(
206    arch: Arch, name: str, channel: str | None = None, prefix_name: bool = True
207) -> list[str]:
208    """Constructs a cross-compiling binutils tool invocation.
209
210    Args:
211        arch: The CPU architecture to build for.
212        name: The name of the binutils tool to invoke.
213        channel: The Rust toolchain channel to use. Either None/"stable" or "nightly".
214        prefix_name: Whether or not the tool name should be prefixed with the target
215            architecture.
216
217    Returns:
218        A list of arguments specifying the beginning of the command to invoke.
219    """
220    if sys.platform == "darwin":
221        _bootstrap_darwin(arch)
222    tool_name = f"{target(arch)}-{name}" if prefix_name else name
223    return [
224        *_enter_builder(arch, channel),
225        tool_name,
226    ]

Constructs a cross-compiling binutils tool invocation.

Args: arch: The CPU architecture to build for. name: The name of the binutils tool to invoke. channel: The Rust toolchain channel to use. Either None/"stable" or "nightly". prefix_name: Whether or not the tool name should be prefixed with the target architecture.

Returns: A list of arguments specifying the beginning of the command to invoke.