From a9adfc631ad71832c2effc561fcc5803e037b1bb Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Fri, 9 May 2025 10:07:15 +0200 Subject: [PATCH 1/3] nixos/test-driver: allow assigning other vsock number ranges I'm a little annoyed at myself that I only realized this _after_ #392030 got merged. But I realized that if something else is using AF_VSOCK or you simply have another interactive test running (e.g. by another user on a larger builder), starting up VMs in the driver fails with qemu-system-x86_64: -device vhost-vsock-pci,guest-cid=3: vhost-vsock: unable to set guest cid: Address already in use Multi-user setups are broken anyways because you usually don't have permissions to remove the VM state from another user and thus starting the driver fails with PermissionError: [Errno 13] Permission denied: PosixPath('/tmp/vm-state-machine') but this is something you can work around at least. I was considering to generate random offsets, but that's not feasible given we need to know the numbers at eval time to inject them into the QEMU args. Also, while we could do this via the test-driver, we should also probe if the vsock numbers are unused making the code even more complex for a use-case I consider rather uncommon. Hence the solution is to do sshBackdoor.vsockOffset = 23542; when encountering conflicts. --- ...nning-nixos-tests-interactively.section.md | 24 ++++++++++++++++++- nixos/doc/manual/redirects.json | 3 +++ .../test-driver/src/test_driver/__init__.py | 6 ++--- .../lib/test-driver/src/test_driver/driver.py | 4 ++-- nixos/lib/testing/nodes.nix | 21 ++++++++++++++-- .../modules/testing/test-instrumentation.nix | 19 ++++++++++++++- 6 files changed, 68 insertions(+), 9 deletions(-) diff --git a/nixos/doc/manual/development/running-nixos-tests-interactively.section.md b/nixos/doc/manual/development/running-nixos-tests-interactively.section.md index 2b0f44a04e44..b29f6df5bdaa 100644 --- a/nixos/doc/manual/development/running-nixos-tests-interactively.section.md +++ b/nixos/doc/manual/development/running-nixos-tests-interactively.section.md @@ -87,11 +87,33 @@ $ ssh vsock/3 -o User=root The socket numbers correspond to the node number of the test VM, but start at three instead of one because that's the lowest possible -vsock number. +vsock number. The exact SSH commands are also printed out when starting +`nixos-test-driver`. On non-NixOS systems you'll probably need to enable the SSH config from {manpage}`systemd-ssh-proxy(1)` yourself. +If starting VM fails with an error like + +``` +qemu-system-x86_64: -device vhost-vsock-pci,guest-cid=3: vhost-vsock: unable to set guest cid: Address already in use +``` + +it means that the vsock numbers for the VMs are already in use. This can happen +if another interactive test with SSH backdoor enabled is running on the machine. + +In that case, you need to assign another range of vsock numbers. You can pick another +offset with + +```nix +{ + sshBackdoor = { + enable = true; + vsockOffset = 23542; + }; +} +``` + ## Port forwarding to NixOS test VMs {#sec-nixos-test-port-forwarding} If your test has only a single VM, you may use e.g. diff --git a/nixos/doc/manual/redirects.json b/nixos/doc/manual/redirects.json index 2c9b0242e9e8..f3548fc6ea04 100644 --- a/nixos/doc/manual/redirects.json +++ b/nixos/doc/manual/redirects.json @@ -1826,6 +1826,9 @@ "test-opt-sshBackdoor.enable": [ "index.html#test-opt-sshBackdoor.enable" ], + "test-opt-sshBackdoor.vsockOffset": [ + "index.html#test-opt-sshBackdoor.vsockOffset" + ], "test-opt-defaults": [ "index.html#test-opt-defaults" ], diff --git a/nixos/lib/test-driver/src/test_driver/__init__.py b/nixos/lib/test-driver/src/test_driver/__init__.py index ff6922e5b90e..86e663da9b7d 100755 --- a/nixos/lib/test-driver/src/test_driver/__init__.py +++ b/nixos/lib/test-driver/src/test_driver/__init__.py @@ -112,7 +112,7 @@ def main() -> None: arg_parser.add_argument( "--dump-vsocks", help="indicates that the interactive SSH backdoor is active and dumps information about it on start", - action="store_true", + type=int, ) args = arg_parser.parse_args() @@ -141,8 +141,8 @@ def main() -> None: if args.interactive: history_dir = os.getcwd() history_path = os.path.join(history_dir, ".nixos-test-history") - if args.dump_vsocks: - driver.dump_machine_ssh() + if offset := args.dump_vsocks: + driver.dump_machine_ssh(offset) ptpython.ipython.embed( user_ns=driver.test_symbols(), history_filename=history_path, diff --git a/nixos/lib/test-driver/src/test_driver/driver.py b/nixos/lib/test-driver/src/test_driver/driver.py index e65c6c5ba511..bf3dda06a617 100644 --- a/nixos/lib/test-driver/src/test_driver/driver.py +++ b/nixos/lib/test-driver/src/test_driver/driver.py @@ -178,14 +178,14 @@ class Driver: ) return {**general_symbols, **machine_symbols, **vlan_symbols} - def dump_machine_ssh(self) -> None: + def dump_machine_ssh(self, offset: int) -> None: print("SSH backdoor enabled, the machines can be accessed like this:") print( f"{Style.BRIGHT}Note:{Style.RESET_ALL} this requires {Style.BRIGHT}systemd-ssh-proxy(1){Style.RESET_ALL} to be enabled (default on NixOS 25.05 and newer)." ) names = [machine.name for machine in self.machines] longest_name = len(max(names, key=len)) - for num, name in enumerate(names, start=3): + for num, name in enumerate(names, start=offset + 1): spaces = " " * (longest_name - len(name) + 2) print( f" {name}:{spaces}{Style.BRIGHT}ssh -o User=root vsock/{num}{Style.RESET_ALL}" diff --git a/nixos/lib/testing/nodes.nix b/nixos/lib/testing/nodes.nix index 38b32dbb6701..258888fefa0a 100644 --- a/nixos/lib/testing/nodes.nix +++ b/nixos/lib/testing/nodes.nix @@ -84,6 +84,21 @@ in type = types.bool; description = "Whether to turn on the VSOCK-based access to all VMs. This provides an unauthenticated access intended for debugging."; }; + vsockOffset = mkOption { + default = 2; + type = types.ints.between 2 4294967296; + description = '' + By default this assigns vsock numbers starting at 3 to the nodes. + On e.g. large builders used by multiple people, this would cause conflicts + between multiple users doing interactive debugging. + + This option allows to assign an offset to each vsock number to + resolve this. + + This is a 32bit number. The lowest possible vsock number is `3` + (i.e. with the lowest node number being `1`, this is 2+1). + ''; + }; }; node.type = mkOption { @@ -182,7 +197,7 @@ in passthru.nodes = config.nodesCompat; extraDriverArgs = mkIf config.sshBackdoor.enable [ - "--dump-vsocks" + "--dump-vsocks=${toString config.sshBackdoor.vsockOffset}" ]; defaults = mkMerge [ @@ -191,7 +206,9 @@ in imports = [ ../../modules/misc/nixpkgs/read-only.nix ]; }) (mkIf config.sshBackdoor.enable { - testing.sshBackdoor.enable = true; + testing.sshBackdoor = { + inherit (config.sshBackdoor) enable vsockOffset; + }; }) ]; diff --git a/nixos/modules/testing/test-instrumentation.nix b/nixos/modules/testing/test-instrumentation.nix index 8ace4ab30abf..38784ab1c5fc 100644 --- a/nixos/modules/testing/test-instrumentation.nix +++ b/nixos/modules/testing/test-instrumentation.nix @@ -89,6 +89,21 @@ in sshBackdoor = { enable = mkEnableOption "vsock-based ssh backdoor for the VM"; + vsockOffset = mkOption { + default = 2; + type = types.ints.between 2 4294967296; + description = '' + By default this assigns vsock numbers starting at 3 to the nodes. + On e.g. large builders used by multiple people, this would cause conflicts + between multiple users doing interactive debugging. + + This option allows to assign an offset to each vsock number to + resolve this. + + This is a 32bit number. The lowest possible vsock number is `3` + (i.e. with the lowest node number being `1`, this is 2+1). + ''; + }; }; }; @@ -193,7 +208,9 @@ in package = lib.mkDefault pkgs.qemu_test; options = mkIf config.testing.sshBackdoor.enable [ - "-device vhost-vsock-pci,guest-cid=${toString (config.virtualisation.test.nodeNumber + 2)}" + "-device vhost-vsock-pci,guest-cid=${ + toString (config.virtualisation.test.nodeNumber + config.testing.sshBackdoor.vsockOffset) + }" ]; }; }; From 12c544e008ad9db38835c1eff99d94571738b5db Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Fri, 9 May 2025 19:15:51 +0200 Subject: [PATCH 2/3] nixos/testing: improve wording of `vsockOffset` description Co-authored-by: Jacek Galowicz --- nixos/lib/testing/nodes.nix | 7 ++++--- nixos/modules/testing/test-instrumentation.nix | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/nixos/lib/testing/nodes.nix b/nixos/lib/testing/nodes.nix index 258888fefa0a..2afafe52952e 100644 --- a/nixos/lib/testing/nodes.nix +++ b/nixos/lib/testing/nodes.nix @@ -88,9 +88,10 @@ in default = 2; type = types.ints.between 2 4294967296; description = '' - By default this assigns vsock numbers starting at 3 to the nodes. - On e.g. large builders used by multiple people, this would cause conflicts - between multiple users doing interactive debugging. + This field is only relevant when multiple users run the (interactive) + driver outside the sandbox and with the SSH backdoor activated. + The typical symptom for this being a problem are error messages like this: + `vhost-vsock: unable to set guest cid: Address already in use` This option allows to assign an offset to each vsock number to resolve this. diff --git a/nixos/modules/testing/test-instrumentation.nix b/nixos/modules/testing/test-instrumentation.nix index 38784ab1c5fc..72098f76ddc5 100644 --- a/nixos/modules/testing/test-instrumentation.nix +++ b/nixos/modules/testing/test-instrumentation.nix @@ -93,9 +93,10 @@ in default = 2; type = types.ints.between 2 4294967296; description = '' - By default this assigns vsock numbers starting at 3 to the nodes. - On e.g. large builders used by multiple people, this would cause conflicts - between multiple users doing interactive debugging. + This field is only relevant when multiple users run the (interactive) + driver outside the sandbox and with the SSH backdoor activated. + The typical symptom for this being a problem are error messages like this: + `vhost-vsock: unable to set guest cid: Address already in use` This option allows to assign an offset to each vsock number to resolve this. From b8b86834b74b0b350be3ff3b2d1cdf5146db9dfa Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Sat, 10 May 2025 10:38:38 +0200 Subject: [PATCH 3/3] nixos/test-driver: move sshBackdoor cfg from test-instrumentation to driver That way, we don't need to duplicate the sshBackdoor options on NixOS-level. Suggested-by: Jacek Galowicz --- nixos/lib/testing/nodes.nix | 32 ++++++++++++--- .../modules/testing/test-instrumentation.nix | 39 ------------------- 2 files changed, 26 insertions(+), 45 deletions(-) diff --git a/nixos/lib/testing/nodes.nix b/nixos/lib/testing/nodes.nix index 2afafe52952e..b2352c478110 100644 --- a/nixos/lib/testing/nodes.nix +++ b/nixos/lib/testing/nodes.nix @@ -88,7 +88,7 @@ in default = 2; type = types.ints.between 2 4294967296; description = '' - This field is only relevant when multiple users run the (interactive) + This field is only relevant when multiple users run the (interactive) driver outside the sandbox and with the SSH backdoor activated. The typical symptom for this being a problem are error messages like this: `vhost-vsock: unable to set guest cid: Address already in use` @@ -206,11 +206,31 @@ in nixpkgs.pkgs = config.node.pkgs; imports = [ ../../modules/misc/nixpkgs/read-only.nix ]; }) - (mkIf config.sshBackdoor.enable { - testing.sshBackdoor = { - inherit (config.sshBackdoor) enable vsockOffset; - }; - }) + (mkIf config.sshBackdoor.enable ( + let + inherit (config.sshBackdoor) vsockOffset; + in + { config, ... }: + { + services.openssh = { + enable = true; + settings = { + PermitRootLogin = "yes"; + PermitEmptyPasswords = "yes"; + }; + }; + + security.pam.services.sshd = { + allowNullPassword = true; + }; + + virtualisation.qemu.options = [ + "-device vhost-vsock-pci,guest-cid=${ + toString (config.virtualisation.test.nodeNumber + vsockOffset) + }" + ]; + } + )) ]; }; diff --git a/nixos/modules/testing/test-instrumentation.nix b/nixos/modules/testing/test-instrumentation.nix index 72098f76ddc5..80852be51f1d 100644 --- a/nixos/modules/testing/test-instrumentation.nix +++ b/nixos/modules/testing/test-instrumentation.nix @@ -86,27 +86,6 @@ in enables commands to be sent to test and debug stage 1. Use machine.switch_root() to leave stage 1 and proceed to stage 2 ''; - - sshBackdoor = { - enable = mkEnableOption "vsock-based ssh backdoor for the VM"; - vsockOffset = mkOption { - default = 2; - type = types.ints.between 2 4294967296; - description = '' - This field is only relevant when multiple users run the (interactive) - driver outside the sandbox and with the SSH backdoor activated. - The typical symptom for this being a problem are error messages like this: - `vhost-vsock: unable to set guest cid: Address already in use` - - This option allows to assign an offset to each vsock number to - resolve this. - - This is a 32bit number. The lowest possible vsock number is `3` - (i.e. with the lowest node number being `1`, this is 2+1). - ''; - }; - }; - }; config = { @@ -120,18 +99,6 @@ in } ]; - services.openssh = mkIf config.testing.sshBackdoor.enable { - enable = true; - settings = { - PermitRootLogin = "yes"; - PermitEmptyPasswords = "yes"; - }; - }; - - security.pam.services.sshd = mkIf config.testing.sshBackdoor.enable { - allowNullPassword = true; - }; - systemd.services.backdoor = lib.mkMerge [ backdoorService { @@ -207,12 +174,6 @@ in # we avoid defining attributes if not possible. # TODO: refactor such that test-instrumentation can import qemu-vm package = lib.mkDefault pkgs.qemu_test; - - options = mkIf config.testing.sshBackdoor.enable [ - "-device vhost-vsock-pci,guest-cid=${ - toString (config.virtualisation.test.nodeNumber + config.testing.sshBackdoor.vsockOffset) - }" - ]; }; };