From 94b9049bcc66171e46d7dfa9166d6c2d45c4f5bf Mon Sep 17 00:00:00 2001 From: Jan David Mol <mol@astron.nl> Date: Mon, 2 Jun 2025 06:13:51 +0000 Subject: [PATCH] Add consul exporter --- .deploy.gitlab-ci.yml | 1 + .gitlab-ci.yml | 2 +- README.md | 1 + .../station/server-monitoring.levant.nomad | 31 ++++++++++++++++++- infra/station/consul/consul-agent.hcl.j2 | 18 ++++++++--- infra/station/consul/consul.hcl.j2 | 1 + tangostationcontrol/VERSION | 2 +- 7 files changed, 49 insertions(+), 7 deletions(-) diff --git a/.deploy.gitlab-ci.yml b/.deploy.gitlab-ci.yml index f76272dde..1657d6899 100644 --- a/.deploy.gitlab-ci.yml +++ b/.deploy.gitlab-ci.yml @@ -32,6 +32,7 @@ deploy_nomad: - dsconfig - ec-sim - jupyter + - server-monitoring - network-monitoring - landing-page - rpc-server diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 77e653921..8a873c01d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -87,7 +87,7 @@ run_shellcheck: DEV_STATIONS: dts-lab rs311 # Repeat this list in .deploy.gitlab-ci.yml - COMPONENTS: mesh-gateway monitoring logging tango object-storage object-replication sdptr device-server dsconfig ec-sim jupyter network-monitoring landing-page rpc-server + COMPONENTS: mesh-gateway monitoring logging tango object-storage object-replication sdptr device-server dsconfig ec-sim jupyter server-monitoring network-monitoring landing-page rpc-server # Generate the station-specific TangoDB configuration based on LOFAR1 information. # Run manually to bootstrap CDB/stations/$station.json for stations diff --git a/README.md b/README.md index a7742b80b..530c93ff5 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,7 @@ Next change the version in the following places: through [https://git.astron.nl/lofar2.0/tango/-/tags](Deploy Tags) # Release Notes +* 0.51.7 Add metrics for consul services health * 0.51.6 Fix powering off antennas * 0.51.5 Use MinIO credentials from Vault * 0.51.4 Ping FPGAs and PIs as part of monitoring diff --git a/infra/jobs/station/server-monitoring.levant.nomad b/infra/jobs/station/server-monitoring.levant.nomad index a71dcb7b6..563255235 100644 --- a/infra/jobs/station/server-monitoring.levant.nomad +++ b/infra/jobs/station/server-monitoring.levant.nomad @@ -75,7 +75,6 @@ job "server-monitoring" { config { image = "quay.io/superq/chrony-exporter:latest" - ports = ["http"] # NB: For --collector.serverstats, we need to connect to chrony over the UNIX domain socket, # see https://github.com/SuperQ/chrony_exporter/issues/66 args = ["--collector.sources", "--chrony.address=10.99.250.250:323", "--web.listen-address=:80"] @@ -86,4 +85,34 @@ job "server-monitoring" { } } } + + group "consul-exporter" { + count = 1 + + network { + mode = "cni/station" + } + + service { + tags = ["scrape"] + name = "consul-exporter" + task = "consul-exporter" + port = "9107" + address_mode = "alloc" + } + + task "consul-exporter" { + driver = "docker" + + config { + image = "prom/consul-exporter:latest" + args = ["--consul.server=consul.service.consul:8500"] + } + + resources { + cpu = 100 + memory = 100 + } + } + } } diff --git a/infra/station/consul/consul-agent.hcl.j2 b/infra/station/consul/consul-agent.hcl.j2 index 7f83413b6..abad97334 100644 --- a/infra/station/consul/consul-agent.hcl.j2 +++ b/infra/station/consul/consul-agent.hcl.j2 @@ -18,11 +18,21 @@ connect { telemetry { prometheus_retention_time = "24h" + disable_hostname = true } -verify_incoming = true -verify_outgoing = true -verify_server_hostname = true -ca_file = "/host/agent-certs/ca.crt" + +tls { + defaults { + verify_incoming = true + verify_outgoing = true + ca_file = "/host/agent-certs/ca.crt" + } + + internal_rpc { + verify_server_hostname = true + } +} + auto_encrypt { tls = true } diff --git a/infra/station/consul/consul.hcl.j2 b/infra/station/consul/consul.hcl.j2 index c82369292..8429d6cf3 100644 --- a/infra/station/consul/consul.hcl.j2 +++ b/infra/station/consul/consul.hcl.j2 @@ -55,4 +55,5 @@ ui_config { telemetry { prometheus_retention_time = "24h" + disable_hostname = true } diff --git a/tangostationcontrol/VERSION b/tangostationcontrol/VERSION index 6114cad8f..d1321c4b8 100644 --- a/tangostationcontrol/VERSION +++ b/tangostationcontrol/VERSION @@ -1 +1 @@ -0.51.6 +0.51.7 -- GitLab