Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

112 lines
3.0 KiB
Nix
Raw Permalink Normal View History

{
config,
lib,
pkgs,
...
}:
let
domain = "example.test";
in
{
# Caddy only supports useACMEHost, hence we use a distinct test suite
name = "caddy";
meta = {
maintainers = lib.teams.acme.members;
# Hard timeout in seconds. Average run time is about 60 seconds.
timeout = 180;
};
nodes = {
# The fake ACME server which will respond to client requests
acme =
{ nodes, ... }:
{
imports = [ ../common/acme/server ];
};
caddy =
{ nodes, config, ... }:
let
fqdn = config.networking.fqdn;
in
{
imports = [ ../common/acme/client ];
networking.domain = domain;
networking.firewall.allowedTCPPorts = [
80
443
];
# Resolve the vhosts the easy way
networking.hosts."127.0.0.1" = [
"caddy-alt.${domain}"
];
# OpenSSL will be used for more thorough certificate validation
environment.systemPackages = [ pkgs.openssl ];
security.acme.certs."${fqdn}" = {
listenHTTP = ":8080";
reloadServices = [ "caddy.service" ];
};
users.users."${config.services.caddy.user}".extraGroups = [ "acme" ];
services.caddy = {
enable = true;
# FIXME reloading caddy is not sufficient to load new certs.
# Restart it manually until this is fixed.
enableReload = false;
globalConfig = ''
auto_https off
'';
virtualHosts."${fqdn}:443" = {
useACMEHost = fqdn;
};
virtualHosts.":80".extraConfig = ''
reverse_proxy localhost:8080
'';
};
specialisation.add_domain.configuration = {
security.acme.certs.${fqdn}.extraDomainNames = [
"caddy-alt.${domain}"
];
};
};
};
testScript =
{ nodes, ... }:
''
${(import ./utils.nix).pythonUtils}
domain = "${domain}"
ca_domain = "${nodes.acme.test-support.acme.caDomain}"
fqdn = "${nodes.caddy.networking.fqdn}"
nixos/acme: improve scalability - reduce superfluous unit activations The previous setup caused all renewal units to be triggered upon ever so slight changes in config. In larger setups (100+ certificates) adding a new certificate caused high system load and/or large memory consumption issues. The memory issues are already a alleviated with the locking mechanism. However, this then causes long delays upwards of multiple minutes depending on individual runs and also caused superfluous activations. In this change we streamline the overall setup of units: 1. The unit that other services can depend upon is 'acme-{cert}.service'. We call this the 'base unit'. As this one as `RemainAfterExit` set the `acme-finished-{cert}` targets are not required any longer. 2. We now always generate initial self-signed certificates to simplify the dependency structure. This deprecates the `preliminarySelfsigned` option. 3. The `acme-order-renew-{cert}` service gets activated after the base unit and services using certificates have started and performs all acme interactions. When it finishes others services (like web servers) will be notified through the `reloadServices` option or they can use `wantedBy` and `after` dependencies if they implement their own reload units. The renewal timer also triggers this unit. 4. The timer unit is explicitly blocked from being started by s-t-c. 5. Permission management has been cleaned up a bit: there was an inconsistency between having the .lego files set to 600 vs 640 on the exposed side. This is unified to 640 now. 6. Exempt the account target from being restarted by s-t-c. This will happen automatically if something relevant to the account changes.
2025-08-08 16:28:42 +02:00
with subtest("Boot and start with selfsigned certificates"):
caddy.start()
caddy.wait_for_unit("caddy.service")
check_issuer(caddy, fqdn, "minica")
# Check that the web server has picked up the selfsigned cert
check_connection(caddy, fqdn, minica=True)
acme.start()
wait_for_running(acme)
acme.wait_for_open_port(443)
nixos/acme: improve scalability - reduce superfluous unit activations The previous setup caused all renewal units to be triggered upon ever so slight changes in config. In larger setups (100+ certificates) adding a new certificate caused high system load and/or large memory consumption issues. The memory issues are already a alleviated with the locking mechanism. However, this then causes long delays upwards of multiple minutes depending on individual runs and also caused superfluous activations. In this change we streamline the overall setup of units: 1. The unit that other services can depend upon is 'acme-{cert}.service'. We call this the 'base unit'. As this one as `RemainAfterExit` set the `acme-finished-{cert}` targets are not required any longer. 2. We now always generate initial self-signed certificates to simplify the dependency structure. This deprecates the `preliminarySelfsigned` option. 3. The `acme-order-renew-{cert}` service gets activated after the base unit and services using certificates have started and performs all acme interactions. When it finishes others services (like web servers) will be notified through the `reloadServices` option or they can use `wantedBy` and `after` dependencies if they implement their own reload units. The renewal timer also triggers this unit. 4. The timer unit is explicitly blocked from being started by s-t-c. 5. Permission management has been cleaned up a bit: there was an inconsistency between having the .lego files set to 600 vs 640 on the exposed side. This is unified to 640 now. 6. Exempt the account target from being restarted by s-t-c. This will happen automatically if something relevant to the account changes.
2025-08-08 16:28:42 +02:00
with subtest("Acquire a new cert"):
caddy.succeed(f"systemctl restart acme-{fqdn}.service")
check_issuer(caddy, fqdn, "pebble")
check_domain(caddy, fqdn, fqdn)
download_ca_certs(caddy, ca_domain)
check_connection(caddy, fqdn)
with subtest("security.acme changes reflect on caddy"):
check_connection(caddy, f"caddy-alt.{domain}", fail=True)
switch_to(caddy, "add_domain")
check_connection(caddy, f"caddy-alt.{domain}")
'';
}