diff --git a/.deploy.gitlab-ci.yml b/.deploy.gitlab-ci.yml index f76272dde035f239385cdecb23c7a7231aac9109..1657d689929e5d22d4cde2025bb06f2e6db208c8 100644 --- a/.deploy.gitlab-ci.yml +++ b/.deploy.gitlab-ci.yml @@ -32,6 +32,7 @@ deploy_nomad: - dsconfig - ec-sim - jupyter + - server-monitoring - network-monitoring - landing-page - rpc-server diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 77e6539216e8dbb68b143732abb2ae25ff2f2de4..8a873c01dd5b9424b4374defb45a90cd2be4090c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -87,7 +87,7 @@ run_shellcheck: DEV_STATIONS: dts-lab rs311 # Repeat this list in .deploy.gitlab-ci.yml - COMPONENTS: mesh-gateway monitoring logging tango object-storage object-replication sdptr device-server dsconfig ec-sim jupyter network-monitoring landing-page rpc-server + COMPONENTS: mesh-gateway monitoring logging tango object-storage object-replication sdptr device-server dsconfig ec-sim jupyter server-monitoring network-monitoring landing-page rpc-server # Generate the station-specific TangoDB configuration based on LOFAR1 information. # Run manually to bootstrap CDB/stations/$station.json for stations diff --git a/README.md b/README.md index a7742b80b7cd3a7b0709989c1b6b7b185d5dbcd5..530c93ff5ef13dbe4d67fb08084576ae6b1cf2a9 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,7 @@ Next change the version in the following places: through [https://git.astron.nl/lofar2.0/tango/-/tags](Deploy Tags) # Release Notes +* 0.51.7 Add metrics for consul services health * 0.51.6 Fix powering off antennas * 0.51.5 Use MinIO credentials from Vault * 0.51.4 Ping FPGAs and PIs as part of monitoring diff --git a/infra/jobs/station/server-monitoring.levant.nomad b/infra/jobs/station/server-monitoring.levant.nomad index a71dcb7b6a216a4688379e575ff8783515fc7e27..56325523597b658bc068550d94facc24f839bba5 100644 --- a/infra/jobs/station/server-monitoring.levant.nomad +++ b/infra/jobs/station/server-monitoring.levant.nomad @@ -75,7 +75,6 @@ job "server-monitoring" { config { image = "quay.io/superq/chrony-exporter:latest" - ports = ["http"] # NB: For --collector.serverstats, we need to connect to chrony over the UNIX domain socket, # see https://github.com/SuperQ/chrony_exporter/issues/66 args = ["--collector.sources", "--chrony.address=10.99.250.250:323", "--web.listen-address=:80"] @@ -86,4 +85,34 @@ job "server-monitoring" { } } } + + group "consul-exporter" { + count = 1 + + network { + mode = "cni/station" + } + + service { + tags = ["scrape"] + name = "consul-exporter" + task = "consul-exporter" + port = "9107" + address_mode = "alloc" + } + + task "consul-exporter" { + driver = "docker" + + config { + image = "prom/consul-exporter:latest" + args = ["--consul.server=consul.service.consul:8500"] + } + + resources { + cpu = 100 + memory = 100 + } + } + } } diff --git a/infra/station/consul/consul-agent.hcl.j2 b/infra/station/consul/consul-agent.hcl.j2 index 7f83413b68e1642ba7c9d761ec869da62f7ff515..abad9733449c7cc68441f4b9f29f0f31e3c58dbc 100644 --- a/infra/station/consul/consul-agent.hcl.j2 +++ b/infra/station/consul/consul-agent.hcl.j2 @@ -18,11 +18,21 @@ connect { telemetry { prometheus_retention_time = "24h" + disable_hostname = true } -verify_incoming = true -verify_outgoing = true -verify_server_hostname = true -ca_file = "/host/agent-certs/ca.crt" + +tls { + defaults { + verify_incoming = true + verify_outgoing = true + ca_file = "/host/agent-certs/ca.crt" + } + + internal_rpc { + verify_server_hostname = true + } +} + auto_encrypt { tls = true } diff --git a/infra/station/consul/consul.hcl.j2 b/infra/station/consul/consul.hcl.j2 index c8236929247d4cdf466e6d41c699bf7ba6a4b631..8429d6cf37a3951b0c47ec68f9c58d08259b207e 100644 --- a/infra/station/consul/consul.hcl.j2 +++ b/infra/station/consul/consul.hcl.j2 @@ -55,4 +55,5 @@ ui_config { telemetry { prometheus_retention_time = "24h" + disable_hostname = true } diff --git a/tangostationcontrol/VERSION b/tangostationcontrol/VERSION index 6114cad8f05b7ec55eccdede6d6f179bf4c5f8b5..d1321c4b8a675ad89029a2b30f65530f2f0f011a 100644 --- a/tangostationcontrol/VERSION +++ b/tangostationcontrol/VERSION @@ -1 +1 @@ -0.51.6 +0.51.7