nixos/prometheus: Move config reload test to subtest

This commit is contained in:
Jonathan Davies 2024-06-02 11:48:57 +01:00
parent 0c99c5f8b7
commit 2c6830c47e
No known key found for this signature in database
3 changed files with 117 additions and 58 deletions

View File

@ -0,0 +1,116 @@
import ../make-test-python.nix ({ lib, pkgs, ... }:
{
name = "prometheus-config-reload";
nodes = {
prometheus = { config, pkgs, ... }: {
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
services.prometheus = {
enable = true;
enableReload = true;
globalConfig.scrape_interval = "2s";
scrapeConfigs = [
{
job_name = "prometheus";
static_configs = [
{
targets = [
"prometheus:${toString config.services.prometheus.port}"
];
}
];
}
];
};
specialisation = {
"prometheus-config-change" = {
configuration = {
environment.systemPackages = [ pkgs.yq ];
# This configuration just adds a new prometheus job
# to scrape the node_exporter metrics of the s3 machine.
services.prometheus = {
scrapeConfigs = [
{
job_name = "node";
static_configs = [
{
targets = [ "node:${toString config.services.prometheus.exporters.node.port}" ];
}
];
}
];
};
};
};
};
};
};
testScript = ''
prometheus.wait_for_unit("prometheus")
prometheus.wait_for_open_port(9090)
# Check if switching to a NixOS configuration that changes the prometheus
# configuration reloads (instead of restarts) prometheus before the switch
# finishes successfully:
with subtest("config change reloads prometheus"):
import json
# We check if prometheus has finished reloading by looking for the message
# "Completed loading of configuration file" in the journal between the start
# and finish of switching to the new NixOS configuration.
#
# To mark the start we record the journal cursor before starting the switch:
cursor_before_switching = json.loads(
prometheus.succeed("journalctl -n1 -o json --output-fields=__CURSOR")
)["__CURSOR"]
# Now we switch:
prometheus_config_change = prometheus.succeed(
"readlink /run/current-system/specialisation/prometheus-config-change"
).strip()
prometheus.succeed(prometheus_config_change + "/bin/switch-to-configuration test")
# Next we retrieve all logs since the start of switching:
logs_after_starting_switching = prometheus.succeed(
"""
journalctl --after-cursor='{cursor_before_switching}' -o json --output-fields=MESSAGE
""".format(
cursor_before_switching=cursor_before_switching
)
)
# Finally we check if the message "Completed loading of configuration file"
# occurs before the "finished switching to system configuration" message:
finished_switching_msg = (
"finished switching to system configuration " + prometheus_config_change
)
reloaded_before_switching_finished = False
finished_switching = False
for log_line in logs_after_starting_switching.split("\n"):
msg = json.loads(log_line)["MESSAGE"]
if "Completed loading of configuration file" in msg:
reloaded_before_switching_finished = True
if msg == finished_switching_msg:
finished_switching = True
break
assert reloaded_before_switching_finished
assert finished_switching
# Check if the reloaded config includes the new node job:
prometheus.succeed(
"""
curl -sf http://127.0.0.1:9090/api/v1/status/config \
| jq -r .data.yaml \
| yq '.scrape_configs | any(.job_name == "node")' \
| grep true
"""
)
'';
})

View File

@ -5,6 +5,7 @@
{
alertmanager = import ./alertmanager.nix { inherit system pkgs; };
config-reload = import ./config-reload.nix { inherit system pkgs; };
federation = import ./federation.nix { inherit system pkgs; };
prometheus-pair = import ./prometheus-pair.nix { inherit system pkgs; };
pushgateway = import ./pushgateway.nix { inherit system pkgs; };

View File

@ -212,8 +212,6 @@ in import ./make-test-python.nix {
};
testScript = { nodes, ... } : ''
import json
# Before starting the other machines we first make sure that our S3 service is online
# and has a bucket added for thanos:
s3.start()
@ -289,61 +287,5 @@ in import ./make-test-python.nix {
+ "jq .thanos.labels.some_label | "
+ "grep 'required by thanos'"
)
# Check if switching to a NixOS configuration that changes the prometheus
# configuration reloads (instead of restarts) prometheus before the switch
# finishes successfully:
with subtest("config change reloads prometheus"):
# We check if prometheus has finished reloading by looking for the message
# "Completed loading of configuration file" in the journal between the start
# and finish of switching to the new NixOS configuration.
#
# To mark the start we record the journal cursor before starting the switch:
cursor_before_switching = json.loads(
prometheus.succeed("journalctl -n1 -o json --output-fields=__CURSOR")
)["__CURSOR"]
# Now we switch:
prometheus_config_change = prometheus.succeed(
"readlink /run/current-system/specialisation/prometheus-config-change"
).strip()
prometheus.succeed(prometheus_config_change + "/bin/switch-to-configuration test")
# Next we retrieve all logs since the start of switching:
logs_after_starting_switching = prometheus.succeed(
"""
journalctl --after-cursor='{cursor_before_switching}' -o json --output-fields=MESSAGE
""".format(
cursor_before_switching=cursor_before_switching
)
)
# Finally we check if the message "Completed loading of configuration file"
# occurs before the "finished switching to system configuration" message:
finished_switching_msg = (
"finished switching to system configuration " + prometheus_config_change
)
reloaded_before_switching_finished = False
finished_switching = False
for log_line in logs_after_starting_switching.split("\n"):
msg = json.loads(log_line)["MESSAGE"]
if "Completed loading of configuration file" in msg:
reloaded_before_switching_finished = True
if msg == finished_switching_msg:
finished_switching = True
break
assert reloaded_before_switching_finished
assert finished_switching
# Check if the reloaded config includes the new s3-node_exporter job:
prometheus.succeed(
"""
curl -sf http://127.0.0.1:${toString queryPort}/api/v1/status/config \
| jq -r .data.yaml \
| yq '.scrape_configs | any(.job_name == "s3-node_exporter")' \
| grep true
"""
)
'';
}