nixos/postgresql: set Restart=always for postgresql.service

...including a slightly more careful config around restarts, i.e.

* We have intervals of 5 seconds between restarts instead of 100ms.

* If we exceed 5 start attempts in 5*120s (with 120s being the timeout),
  start job gets rate-limited and thus aborted. Do note that there are
  at most 5 start attempts allowed in ~625s by default. If the startup
  fails very quickly, either wait until the rate-limit is over or reset
  the counter using `systemctl reset-failed postgresql.service`.

* The interval of 625s (plus 5s of buffer) are automatically derived
  from RestartSec & TimeoutSec. Changing either will also affect
  StartLimitIntervalSec unless overridden with `mkForce`.
This commit is contained in:
Maximilian Bosch 2025-07-26 19:11:54 +02:00
parent 03d0fed6f8
commit 6ae194e419
No known key found for this signature in database
2 changed files with 22 additions and 1 deletions

View File

@ -826,6 +826,8 @@ in
ExecStart = "${cfg.finalPackage}/bin/postgres";
Restart = "always";
# Hardening
CapabilityBoundingSet = [ "" ];
DevicePolicy = "closed";
@ -877,7 +879,20 @@ in
})
];
unitConfig.RequiresMountsFor = "${cfg.dataDir}";
unitConfig =
let
inherit (config.systemd.services.postgresql.serviceConfig) TimeoutSec;
maxTries = 5;
bufferSec = 5;
in
{
RequiresMountsFor = "${cfg.dataDir}";
# The max. time needed to perform `maxTries` start attempts of systemd
# plus a bit of buffer time (bufferSec) on top.
StartLimitIntervalSec = TimeoutSec * maxTries + bufferSec;
StartLimitBurst = maxTries;
};
};
systemd.services.postgresql-setup = {

View File

@ -101,6 +101,12 @@ let
machine.fail(check_count("SELECT * FROM sth;", 4))
machine.succeed(check_count("SELECT xpath('/test/text()', doc) FROM xmltest;", 1))
with subtest("killing postgres process should trigger an automatic restart"):
machine.succeed("systemctl kill -s KILL postgresql")
machine.wait_until_succeeds("systemctl is-active postgresql.service")
machine.wait_until_succeeds("systemctl is-active postgresql.target")
with subtest("Backup service works"):
machine.succeed(
"systemctl start ${backupService}.service",