nixos/prometheus: Extend prometheus-pair test to verify first compaction succeeds

This commit is contained in:
Jonathan Davies 2025-05-28 20:10:59 +00:00
parent 36965c62be
commit ddf9a3eba5
No known key found for this signature in database

View File

@ -14,6 +14,9 @@
services.prometheus = {
enable = true;
globalConfig.scrape_interval = "2s";
extraFlags = [
"--storage.tsdb.min-block-duration=15s"
];
scrapeConfigs = [
{
job_name = "prometheus";
@ -40,6 +43,9 @@
services.prometheus = {
enable = true;
globalConfig.scrape_interval = "2s";
extraFlags = [
"--storage.tsdb.min-block-duration=15s"
];
scrapeConfigs = [
{
job_name = "prometheus";
@ -86,6 +92,38 @@
+ "jq '.data.result[0].value[1]' | grep '\"2\"'"
)
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_tsdb_head_series_created_total\{instance=\"prometheus1:9090\"\}' | "
+ "jq '.data.result[0].value[1]' | grep -v '\"0\"'"
)
with subtest("Compaction verification"):
for machine in prometheus1, prometheus2:
machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep -E '(log=ERROR|write block)'")
machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'Head GC completed'")
machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'Creating checkpoint'")
machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'WAL checkpoint complete'")
machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'compact blocks'")
machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'Deleting obsolete block'")
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_tsdb_compactions_total\{instance=\"prometheus1:9090\"\}' | "
+ "jq '.data.result[0].value[1]' | grep -v '\"0\"'"
)
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_tsdb_compactions_failed_total\{instance=\"prometheus1:9090\"\}' | "
+ "jq '.data.result[0].value[1]' | grep '\"0\"'"
)
for machine in prometheus1, prometheus2:
machine.fail("journalctl -o cat -u prometheus.service | grep 'level=ERROR'")
prometheus1.log(prometheus1.succeed("systemd-analyze security prometheus.service | grep -v ''"))
'';
}