From 405ca31d03180ed1369058a5f611f5cac5f4e812 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Thu, 11 Jun 2026 12:43:25 +0200 Subject: [PATCH 1/5] Don't leak gpg-agent when signing with gpg mkosi signs SHA256SUMS by running gpg, which autostarts a gpg-agent if none is running. As mkosi's sandbox has no PID namespace, that agent daemonizes and is leaked when the sandbox goes away. This is even worse when running unprivileged, as the leaked agents hold systemd-nsresourced dynamic UID ranges and eventually exhaust the pool (`io.systemd.NamespaceResource.NoDynamicRange`). But the process is leaked either way. Shut the agent down after signing. Note that this will also kill a "real" user agent if one was running already; but that is hard/racy to avoid, and gpg auto-starts a new one anyway. --- mkosi/__init__.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/mkosi/__init__.py b/mkosi/__init__.py index 57555d704..9e35c53db 100644 --- a/mkosi/__init__.py +++ b/mkosi/__init__.py @@ -2565,11 +2565,22 @@ def calculate_signature_gpg(context: Context) -> None: ] # fmt: skip with complete_step("Signing SHA256SUMS…"): - run( - cmdline, - env=env, - sandbox=context.sandbox(options=options), - ) + try: + run( + cmdline, + env=env, + sandbox=context.sandbox(options=options), + ) + finally: + # gpg autostarts a gpg-agent to sign and, as the sandbox has no PID namespace, that agent is + # leaked when the sandbox goes away. Note this will also kill a "real" user agent, but + # gpg auto-starts a new one. + run( + ["gpgconf", "--kill", "gpg-agent"], + env=env, + sandbox=context.sandbox(options=options), + check=False, + ) def calculate_signature_sop(context: Context) -> None: From c41ef8c40679f45386109b7bd0c17ace66b76962 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Sat, 13 Jun 2026 05:08:08 +0200 Subject: [PATCH 2/5] tests: Reduce VM RAM size Run VMs with 1.5 GiB of RAM by default, which is enough for most tests. This doubles the test density for parallel runs, as GitHub's default runners have a little less than 4 GiB in total, which could not even fit two parallel runs before. The only exception is `test_initrd_luks`: repart's default LUKS2 KDF (Argon2id) is memory-hard and needs ~1 GiB of RAM just to derive the key. Run that with 2 GiB as before. --- tests/__init__.py | 9 +++++++-- tests/test_initrd.py | 4 +++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 02757774e..a04f45781 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -129,7 +129,12 @@ def boot(self, options: Sequence[str] = (), args: Sequence[str] = ()) -> Complet return result - def vm(self, options: Sequence[str] = (), args: Sequence[str] = ()) -> CompletedProcess: + def vm( + self, + options: Sequence[str] = (), + args: Sequence[str] = (), + ram: str = "1536M", + ) -> CompletedProcess: need_hyperv_workaround = os.uname().machine == "x86_64" result = self.mkosi( @@ -139,7 +144,7 @@ def vm(self, options: Sequence[str] = (), args: Sequence[str] = ()) -> Completed "--vsock=yes", # TODO: Drop once both Hyper-V bugs are fixed in Github Actions. *(["--qemu-args=-cpu max,pcid=off"] if need_hyperv_workaround else []), - "--ram=2G", + f"--ram={ram}", "--ephemeral=yes", "--register=no", *options, diff --git a/tests/test_initrd.py b/tests/test_initrd.py index 2cc618975..ef77c4806 100644 --- a/tests/test_initrd.py +++ b/tests/test_initrd.py @@ -150,7 +150,9 @@ def test_initrd_luks(config: ImageConfig, passphrase: Path) -> None: with Image(config) as image: image.build(["--repart-directory", repartd, "--passphrase", passphrase, "--format=disk"]) - image.vm(["--credential=cryptsetup.passphrase=mkosi"]) + # repart's default LUKS2 KDF (Argon2id) is memory-hard and needs ~1 GiB of RAM just to derive + # the key, so the default VM RAM isn't enough to unlock the root. + image.vm(["--credential=cryptsetup.passphrase=mkosi"], ram="2G") @pytest.mark.skipif(os.getuid() != 0, reason="mkosi-initrd LUKS+LVM test can only be executed as root") From cab4b6f38c15940c73d6abbbfafbc11d5db891ba Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Sat, 13 Jun 2026 07:58:11 +0200 Subject: [PATCH 3/5] tests: Use unique machine names Without `--machine`, mkosi defaults to name "mkosi". This breaks parallel test runs. The vsock CID name is derived from the machine name, so this automatically becomes unique as well. --- tests/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/__init__.py b/tests/__init__.py index a04f45781..d37c47b68 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -37,7 +37,10 @@ def __enter__(self) -> "Image": else: tmpdir = Path("/var/tmp") - self.output_dir = Path(os.getenv("TMPDIR", tmpdir)) / uuid.uuid4().hex[:16] + token = uuid.uuid4().hex[:16] + self.output_dir = Path(os.getenv("TMPDIR", tmpdir)) / token + # Unique VM name to support parallel runs; CID name is derived from machine name + self.machine = f"mkosi-{token}" return self @@ -117,6 +120,8 @@ def boot(self, options: Sequence[str] = (), args: Sequence[str] = ()) -> Complet "--runtime-build-sources=no", "--ephemeral=yes", "--register=no", + "--machine", + self.machine, *options, ], args, @@ -147,6 +152,8 @@ def vm( f"--ram={ram}", "--ephemeral=yes", "--register=no", + "--machine", + self.machine, *options, ], args, From b7a0525f0f3cef4aacd938f520d814cdfa97f7c4 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Sat, 13 Jun 2026 08:31:08 +0200 Subject: [PATCH 4/5] Put build history into the output directory Running e.g. `test_initrd` and `test_initrd_luks` in parallel fails one of them with "Image 'main' has not been built yet". The integration tests build into a per-test `--output-directory`, but `vm()`/`boot()` did not pass it, so those verbs recovered the build configuration from the *shared* global history in `/.mkosi-private/history/latest.json`. With concurrent builds that file holds whatever the last build wrote, so a verb reads back another build's config (e.g. the wrong `Format=`). Tie the build history to the output directory: when an output directory is given on the CLI, store and read the history under it instead of in the config directory. Each build's history is then isolated, and a verb pointed at a given `--output-directory` reads back exactly that build's configuration. In the tests, pass `--output-directory` to `vm()` and `boot()` as well. As a consequence, `mkosi vm` (and the other verbs that consume a previous build) now require `-O`/`--output-directory` when the build used one. This is a behaviour change, but unbreaks having more than one output dir. Note: If a config file sets `OutputDirectory=`, the history continues to be in the config dir, as before. The computation of the history directory (necessarily) happens before parsing the config files/includes. This *only* applies to the CLI option. Rejected alternatives: * This cannot be worked around with `--history=no` in the tests': that only disables *writing* history, not *reading* it, so vm/boot still pick up a stale (in our setup, empty) `latest.json` and fall back to the wrong config. * A dedicated `--history-dir` option would just be redundant with `--output-dir`. --- mkosi/config.py | 12 ++++++++++-- tests/__init__.py | 6 ++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/mkosi/config.py b/mkosi/config.py index 100849507..9513b488c 100644 --- a/mkosi/config.py +++ b/mkosi/config.py @@ -5445,7 +5445,15 @@ def want_default_initrd(config: Config) -> bool: return Path("default") in config.initrds -def finalize_historydir(args: Args) -> Path: +def finalize_historydir(args: Args, output_dir: Optional[Path] = None) -> Path: + # When an output dir is given on the CLI, store the build history there so that concurrent builds with + # different output dirs don't clobber a shared history. Don't check the finalized OutputDirectory= + # config, only the CLI value: the former isn't known yet here (config files and includes are + # parsed later) and vm/boot can't see it anyway since they recover the config from the history instead of + # parsing it. An output dir set only in config files keeps the history in the config dir. + if output_dir is not None: + return output_dir / ".mkosi-private/history" + configdir = finalize_configdir(args.directory) return (configdir or Path.cwd()) / ".mkosi-private/history" @@ -5502,7 +5510,7 @@ def parse_config( return args, None, () configdir = finalize_configdir(args.directory) - historydir = finalize_historydir(args) + historydir = finalize_historydir(args, context.cli.get("output_dir")) if have_history(args, historydir): history = Config.from_partial_json((historydir / "latest.json").read_text()) diff --git a/tests/__init__.py b/tests/__init__.py index d37c47b68..22fedaacc 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -122,12 +122,13 @@ def boot(self, options: Sequence[str] = (), args: Sequence[str] = ()) -> Complet "--register=no", "--machine", self.machine, + "--output-directory", self.output_dir, *options, ], args, stdin=sys.stdin if sys.stdin.isatty() else None, check=False, - ) + ) # fmt: skip if result.returncode != 123: raise subprocess.CalledProcessError(result.returncode, result.args, result.stdout, result.stderr) @@ -154,12 +155,13 @@ def vm( "--register=no", "--machine", self.machine, + "--output-directory", self.output_dir, *options, ], args, stdin=sys.stdin if sys.stdin.isatty() else None, check=False, - ) + ) # fmt: skip if result.returncode != 123: raise subprocess.CalledProcessError(result.returncode, result.args, result.stdout, result.stderr) From d876b6366151775cd08c2f1a2c709145412dd534 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Sun, 14 Jun 2026 14:43:06 +0200 Subject: [PATCH 5/5] Lock the package cache during package manager invocations The package cache directory is shared between all mkosi builds of the same distribution (see `Config.package_cache_dir_or_default()`) and is bind mounted read-write into every package manager sandbox by `mounts()`. When multiple builds run in parallel, they download packages into it concurrently, which corrupts in-flight cache files: dnf's rpm gets truncated mid-unpack ("Errors occurred during transaction"), zypper can't hardlink its preloaded rpm into place ("Can't hardlink/copy ... .preload/..."), etc. Observed when running `test_addon` and `test_confext` in parallel: both build extension images with `--incremental=no --package=lsof`, so they download lsof into the shared cache at the same time and clobber each other. Lock the package cache directory for the duration of every package manager invocation to serialize writes. Every package manager operation goes through `sandbox()`, so locking there covers install, sync, and remove across all package managers. Builds of different distributions use different cache directories and so don't contend, and cached/incremental builds that don't invoke the package manager never take the lock, so parallelism is preserved where it matters. Co-Authored-By: Claude Opus 4.8 --- mkosi/installer/__init__.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/mkosi/installer/__init__.py b/mkosi/installer/__init__.py index 9ebec1b2c..c65ea612e 100644 --- a/mkosi/installer/__init__.py +++ b/mkosi/installer/__init__.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: LGPL-2.1-or-later -from collections.abc import Sequence -from contextlib import AbstractContextManager +import contextlib +from collections.abc import Iterator, Sequence from pathlib import Path from mkosi.config import Config, ConfigFeature, OutputFormat @@ -9,7 +9,7 @@ from mkosi.mounts import finalize_certificate_mounts from mkosi.run import apivfs_options, finalize_interpreter, finalize_passwd_symlinks, find_binary from mkosi.tree import rmtree -from mkosi.util import PathString, flatten, startswith +from mkosi.util import PathString, flatten, flock, startswith class PackageManager: @@ -153,22 +153,32 @@ def apivfs_script_cmd(cls, context: Context) -> list[PathString]: ] # fmt: skip @classmethod + @contextlib.contextmanager def sandbox( cls, context: Context, *, apivfs: bool, options: Sequence[PathString] = (), - ) -> AbstractContextManager[list[PathString]]: - return context.sandbox( - network=True, - options=[ - *context.rootoptions(), - *cls.mounts(context), - *cls.options(root=context.root, apivfs=apivfs), - *options, - ], - ) # fmt: skip + ) -> Iterator[list[PathString]]: + # The package cache directory is shared between all mkosi builds of the same distribution (see + # Config.package_cache_dir_or_default()) and is bind mounted read-write into every package manager + # sandbox by mounts(). Avoid concurrent downloads, as those result in corruption/truncation and + # unnecessary duplicate fetches. Builds of different distributions use different cache directories + # and so don't contend. + with ( + flock(context.config.package_cache_dir_or_default()), + context.sandbox( + network=True, + options=[ + *context.rootoptions(), + *cls.mounts(context), + *cls.options(root=context.root, apivfs=apivfs), + *options, + ], + ) as sandbox, + ): + yield sandbox @classmethod def install(