misc.python.materialize.xcompile

Support for cross-compiling to Linux.

  1# Copyright Materialize, Inc. and contributors. All rights reserved.
  2#
  3# Use of this software is governed by the Business Source License
  4# included in the LICENSE file at the root of this repository.
  5#
  6# As of the Change Date specified in that file, in accordance with
  7# the Business Source License, use of this software will be governed
  8# by the Apache License, Version 2.0.
  9
 10"""Support for cross-compiling to Linux."""
 11
 12import os
 13import platform
 14import sys
 15from enum import Enum
 16
 17from materialize import MZ_ROOT, spawn
 18from materialize.rustc_flags import Sanitizer
 19
 20
 21class Arch(Enum):
 22    """A CPU architecture."""
 23
 24    X86_64 = "x86_64"
 25    """The 64-bit x86 architecture."""
 26
 27    AARCH64 = "aarch64"
 28    """The 64-bit ARM architecture."""
 29
 30    def __str__(self) -> str:
 31        return self.value
 32
 33    def go_str(self) -> str:
 34        """Return the architecture name in Go nomenclature: amd64 or arm64."""
 35        if self == Arch.X86_64:
 36            return "amd64"
 37        elif self == Arch.AARCH64:
 38            return "arm64"
 39        else:
 40            raise RuntimeError("unreachable")
 41
 42    @staticmethod
 43    def host() -> "Arch":
 44        if platform.machine() == "x86_64":
 45            return Arch.X86_64
 46        elif platform.machine() in ["aarch64", "arm64"]:
 47            return Arch.AARCH64
 48        else:
 49            raise RuntimeError(f"unknown host architecture {platform.machine()}")
 50
 51
 52def target(arch: Arch) -> str:
 53    """Construct a Linux target triple for the specified architecture."""
 54    return f"{arch}-unknown-linux-gnu"
 55
 56
 57def target_cpu(arch: Arch) -> str:
 58    """
 59    Return the CPU micro architecture, assuming a Linux target, we should use for Rust compilation.
 60
 61    Sync: This target-cpu should be kept in sync with the one in ci-builder and .cargo/config.
 62    """
 63    if arch == Arch.X86_64:
 64        return "x86-64-v3"
 65    elif arch == Arch.AARCH64:
 66        return "neoverse-n1"
 67    else:
 68        raise RuntimeError("unreachable")
 69
 70
 71def target_features(arch: Arch) -> list[str]:
 72    """
 73    Returns a list of CPU features we should enable for Rust compilation.
 74
 75    Note: We also specify the CPU target when compiling Rust which should enable the majority of
 76    available CPU features.
 77
 78    Sync: This list of features should be kept in sync with the one in ci-builder and .cargo/config.
 79    """
 80    if arch == Arch.X86_64:
 81        return ["+aes", "+pclmulqdq"]
 82    elif arch == Arch.AARCH64:
 83        return ["+aes", "+sha2"]
 84    else:
 85        raise RuntimeError("unreachable")
 86
 87
 88def cargo(
 89    arch: Arch,
 90    subcommand: str,
 91    rustflags: list[str],
 92    channel: str | None = None,
 93    extra_env: dict[str, str] = {},
 94) -> list[str]:
 95    """Construct a Cargo invocation for cross compiling.
 96
 97    Args:
 98        arch: The CPU architecture to build for.
 99        subcommand: The Cargo subcommand to invoke.
100        rustflags: Override the flags passed to the Rust compiler. If the list
101            is empty, the default flags are used.
102        channel: The Rust toolchain channel to use. Either None/"stable" or "nightly".
103
104    Returns:
105        A list of arguments specifying the beginning of the command to invoke.
106    """
107    _target = target(arch)
108    _target_env = _target.upper().replace("-", "_")
109    _target_cpu = target_cpu(arch)
110    _target_features = ",".join(target_features(arch))
111
112    env = {
113        **extra_env,
114    }
115
116    rustflags += [
117        "-Clink-arg=-Wl,--compress-debug-sections=zlib",
118        "-Clink-arg=-Wl,-O3",
119        "-Csymbol-mangling-version=v0",
120        f"-Ctarget-cpu={_target_cpu}",
121        f"-Ctarget-feature={_target_features}",
122        "--cfg=tokio_unstable",
123    ]
124
125    if sys.platform == "darwin":
126        _bootstrap_darwin(arch)
127        lld_prefix = spawn.capture(["brew", "--prefix", "lld"]).strip()
128        libfdb_c_prefix = spawn.capture(
129            ["brew", "--prefix", f"libfdb-c-{target(arch)}"]
130        ).strip()
131        sysroot = spawn.capture([f"{_target}-cc", "-print-sysroot"]).strip()
132        rustflags += [
133            f"-L{sysroot}/lib",
134            f"-L{libfdb_c_prefix}/lib",
135            "-Clink-arg=-fuse-ld=lld",
136            f"-Clink-arg=-B{lld_prefix}/bin",
137        ]
138        env.update(
139            {
140                "CMAKE_SYSTEM_NAME": "Linux",
141                f"CARGO_TARGET_{_target_env}_LINKER": f"{_target}-cc",
142                "CARGO_TARGET_DIR": str(MZ_ROOT / "target-xcompile"),
143                "TARGET_AR": f"{_target}-ar",
144                "TARGET_CPP": f"{_target}-cpp",
145                "TARGET_CC": f"{_target}-cc",
146                "TARGET_CXX": f"{_target}-c++",
147                "TARGET_CXXSTDLIB": "static=stdc++",
148                "TARGET_LD": f"{_target}-ld",
149                "TARGET_RANLIB": f"{_target}-ranlib",
150            }
151        )
152    else:
153        # NOTE(benesch): The required Rust flags have to be duplicated with
154        # their definitions in ci/builder/Dockerfile because `rustc` has no way
155        # to merge together Rust flags from different sources.
156        rustflags += [
157            "-Clink-arg=-fuse-ld=lld",
158            f"-L/opt/x-tools/{_target}/{_target}/sysroot/lib",
159        ]
160
161    env.update({"RUSTFLAGS": " ".join(rustflags)})
162
163    return [
164        *_enter_builder(arch, channel),
165        "env",
166        *(f"{k}={v}" for k, v in env.items()),
167        "cargo",
168        subcommand,
169        "--target",
170        _target,
171    ]
172
173
174def tool(
175    arch: Arch, name: str, channel: str | None = None, prefix_name: bool = True
176) -> list[str]:
177    """Constructs a cross-compiling binutils tool invocation.
178
179    Args:
180        arch: The CPU architecture to build for.
181        name: The name of the binutils tool to invoke.
182        channel: The Rust toolchain channel to use. Either None/"stable" or "nightly".
183        prefix_name: Whether or not the tool name should be prefixed with the target
184            architecture.
185
186    Returns:
187        A list of arguments specifying the beginning of the command to invoke.
188    """
189    if sys.platform == "darwin":
190        _bootstrap_darwin(arch)
191    tool_name = f"{target(arch)}-{name}" if prefix_name else name
192    return [
193        *_enter_builder(arch, channel),
194        tool_name,
195    ]
196
197
198def _enter_builder(arch: Arch, channel: str | None = None) -> list[str]:
199    if "MZ_DEV_CI_BUILDER" in os.environ or sys.platform == "darwin":
200        return []
201    else:
202        default_channel = (
203            "stable"
204            if Sanitizer[os.getenv("CI_SANITIZER", "none")] == Sanitizer.none
205            else "nightly"
206        )
207        return [
208            "env",
209            f"MZ_DEV_CI_BUILDER_ARCH={arch}",
210            "bin/ci-builder",
211            "run",
212            channel if channel else default_channel,
213        ]
214
215
216def _bootstrap_darwin(arch: Arch) -> None:
217    # Building in Docker for Mac is painfully slow, so we install a
218    # cross-compiling toolchain on the host and use that instead.
219
220    BOOTSTRAP_VERSION = "6"
221    BOOTSTRAP_FILE = MZ_ROOT / "target-xcompile" / target(arch) / ".xcompile-bootstrap"
222    try:
223        contents = BOOTSTRAP_FILE.read_text()
224    except FileNotFoundError:
225        contents = ""
226    if contents == BOOTSTRAP_VERSION:
227        return
228
229    spawn.runv(
230        [
231            "brew",
232            "install",
233            "lld",
234            f"materializeinc/crosstools/{target(arch)}",
235            f"materializeinc/crosstools/libfdb-c-{target(arch)}",
236        ]
237    )
238    spawn.runv(["rustup", "target", "add", target(arch)])
239
240    BOOTSTRAP_FILE.parent.mkdir(parents=True, exist_ok=True)
241    BOOTSTRAP_FILE.write_text(BOOTSTRAP_VERSION)
class Arch(enum.Enum):
22class Arch(Enum):
23    """A CPU architecture."""
24
25    X86_64 = "x86_64"
26    """The 64-bit x86 architecture."""
27
28    AARCH64 = "aarch64"
29    """The 64-bit ARM architecture."""
30
31    def __str__(self) -> str:
32        return self.value
33
34    def go_str(self) -> str:
35        """Return the architecture name in Go nomenclature: amd64 or arm64."""
36        if self == Arch.X86_64:
37            return "amd64"
38        elif self == Arch.AARCH64:
39            return "arm64"
40        else:
41            raise RuntimeError("unreachable")
42
43    @staticmethod
44    def host() -> "Arch":
45        if platform.machine() == "x86_64":
46            return Arch.X86_64
47        elif platform.machine() in ["aarch64", "arm64"]:
48            return Arch.AARCH64
49        else:
50            raise RuntimeError(f"unknown host architecture {platform.machine()}")

A CPU architecture.

X86_64 = <Arch.X86_64: 'x86_64'>

The 64-bit x86 architecture.

AARCH64 = <Arch.AARCH64: 'aarch64'>

The 64-bit ARM architecture.

def go_str(self) -> str:
34    def go_str(self) -> str:
35        """Return the architecture name in Go nomenclature: amd64 or arm64."""
36        if self == Arch.X86_64:
37            return "amd64"
38        elif self == Arch.AARCH64:
39            return "arm64"
40        else:
41            raise RuntimeError("unreachable")

Return the architecture name in Go nomenclature: amd64 or arm64.

@staticmethod
def host() -> Arch:
43    @staticmethod
44    def host() -> "Arch":
45        if platform.machine() == "x86_64":
46            return Arch.X86_64
47        elif platform.machine() in ["aarch64", "arm64"]:
48            return Arch.AARCH64
49        else:
50            raise RuntimeError(f"unknown host architecture {platform.machine()}")
def target(arch: Arch) -> str:
53def target(arch: Arch) -> str:
54    """Construct a Linux target triple for the specified architecture."""
55    return f"{arch}-unknown-linux-gnu"

Construct a Linux target triple for the specified architecture.

def target_cpu(arch: Arch) -> str:
58def target_cpu(arch: Arch) -> str:
59    """
60    Return the CPU micro architecture, assuming a Linux target, we should use for Rust compilation.
61
62    Sync: This target-cpu should be kept in sync with the one in ci-builder and .cargo/config.
63    """
64    if arch == Arch.X86_64:
65        return "x86-64-v3"
66    elif arch == Arch.AARCH64:
67        return "neoverse-n1"
68    else:
69        raise RuntimeError("unreachable")

Return the CPU micro architecture, assuming a Linux target, we should use for Rust compilation.

Sync: This target-cpu should be kept in sync with the one in ci-builder and .cargo/config.

def target_features(arch: Arch) -> list[str]:
72def target_features(arch: Arch) -> list[str]:
73    """
74    Returns a list of CPU features we should enable for Rust compilation.
75
76    Note: We also specify the CPU target when compiling Rust which should enable the majority of
77    available CPU features.
78
79    Sync: This list of features should be kept in sync with the one in ci-builder and .cargo/config.
80    """
81    if arch == Arch.X86_64:
82        return ["+aes", "+pclmulqdq"]
83    elif arch == Arch.AARCH64:
84        return ["+aes", "+sha2"]
85    else:
86        raise RuntimeError("unreachable")

Returns a list of CPU features we should enable for Rust compilation.

Note: We also specify the CPU target when compiling Rust which should enable the majority of available CPU features.

Sync: This list of features should be kept in sync with the one in ci-builder and .cargo/config.

def cargo( arch: Arch, subcommand: str, rustflags: list[str], channel: str | None = None, extra_env: dict[str, str] = {}) -> list[str]:
 89def cargo(
 90    arch: Arch,
 91    subcommand: str,
 92    rustflags: list[str],
 93    channel: str | None = None,
 94    extra_env: dict[str, str] = {},
 95) -> list[str]:
 96    """Construct a Cargo invocation for cross compiling.
 97
 98    Args:
 99        arch: The CPU architecture to build for.
100        subcommand: The Cargo subcommand to invoke.
101        rustflags: Override the flags passed to the Rust compiler. If the list
102            is empty, the default flags are used.
103        channel: The Rust toolchain channel to use. Either None/"stable" or "nightly".
104
105    Returns:
106        A list of arguments specifying the beginning of the command to invoke.
107    """
108    _target = target(arch)
109    _target_env = _target.upper().replace("-", "_")
110    _target_cpu = target_cpu(arch)
111    _target_features = ",".join(target_features(arch))
112
113    env = {
114        **extra_env,
115    }
116
117    rustflags += [
118        "-Clink-arg=-Wl,--compress-debug-sections=zlib",
119        "-Clink-arg=-Wl,-O3",
120        "-Csymbol-mangling-version=v0",
121        f"-Ctarget-cpu={_target_cpu}",
122        f"-Ctarget-feature={_target_features}",
123        "--cfg=tokio_unstable",
124    ]
125
126    if sys.platform == "darwin":
127        _bootstrap_darwin(arch)
128        lld_prefix = spawn.capture(["brew", "--prefix", "lld"]).strip()
129        libfdb_c_prefix = spawn.capture(
130            ["brew", "--prefix", f"libfdb-c-{target(arch)}"]
131        ).strip()
132        sysroot = spawn.capture([f"{_target}-cc", "-print-sysroot"]).strip()
133        rustflags += [
134            f"-L{sysroot}/lib",
135            f"-L{libfdb_c_prefix}/lib",
136            "-Clink-arg=-fuse-ld=lld",
137            f"-Clink-arg=-B{lld_prefix}/bin",
138        ]
139        env.update(
140            {
141                "CMAKE_SYSTEM_NAME": "Linux",
142                f"CARGO_TARGET_{_target_env}_LINKER": f"{_target}-cc",
143                "CARGO_TARGET_DIR": str(MZ_ROOT / "target-xcompile"),
144                "TARGET_AR": f"{_target}-ar",
145                "TARGET_CPP": f"{_target}-cpp",
146                "TARGET_CC": f"{_target}-cc",
147                "TARGET_CXX": f"{_target}-c++",
148                "TARGET_CXXSTDLIB": "static=stdc++",
149                "TARGET_LD": f"{_target}-ld",
150                "TARGET_RANLIB": f"{_target}-ranlib",
151            }
152        )
153    else:
154        # NOTE(benesch): The required Rust flags have to be duplicated with
155        # their definitions in ci/builder/Dockerfile because `rustc` has no way
156        # to merge together Rust flags from different sources.
157        rustflags += [
158            "-Clink-arg=-fuse-ld=lld",
159            f"-L/opt/x-tools/{_target}/{_target}/sysroot/lib",
160        ]
161
162    env.update({"RUSTFLAGS": " ".join(rustflags)})
163
164    return [
165        *_enter_builder(arch, channel),
166        "env",
167        *(f"{k}={v}" for k, v in env.items()),
168        "cargo",
169        subcommand,
170        "--target",
171        _target,
172    ]

Construct a Cargo invocation for cross compiling.

Args: arch: The CPU architecture to build for. subcommand: The Cargo subcommand to invoke. rustflags: Override the flags passed to the Rust compiler. If the list is empty, the default flags are used. channel: The Rust toolchain channel to use. Either None/"stable" or "nightly".

Returns: A list of arguments specifying the beginning of the command to invoke.

def tool( arch: Arch, name: str, channel: str | None = None, prefix_name: bool = True) -> list[str]:
175def tool(
176    arch: Arch, name: str, channel: str | None = None, prefix_name: bool = True
177) -> list[str]:
178    """Constructs a cross-compiling binutils tool invocation.
179
180    Args:
181        arch: The CPU architecture to build for.
182        name: The name of the binutils tool to invoke.
183        channel: The Rust toolchain channel to use. Either None/"stable" or "nightly".
184        prefix_name: Whether or not the tool name should be prefixed with the target
185            architecture.
186
187    Returns:
188        A list of arguments specifying the beginning of the command to invoke.
189    """
190    if sys.platform == "darwin":
191        _bootstrap_darwin(arch)
192    tool_name = f"{target(arch)}-{name}" if prefix_name else name
193    return [
194        *_enter_builder(arch, channel),
195        tool_name,
196    ]

Constructs a cross-compiling binutils tool invocation.

Args: arch: The CPU architecture to build for. name: The name of the binutils tool to invoke. channel: The Rust toolchain channel to use. Either None/"stable" or "nightly". prefix_name: Whether or not the tool name should be prefixed with the target architecture.

Returns: A list of arguments specifying the beginning of the command to invoke.