diff --git a/README.md b/README.md index 8697c5556c729726fa7e1e3271fb3fb338fcc0b7..5a273c70f6426ffb9fbc0e459090470b35190f23 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,9 @@ Next change the version in the following places: through [https://git.astron.nl/lofar2.0/tango/-/tags](Deploy Tags) # Release Notes +* 0.52.2 Expose Prometheus interface at http://hostname/prometheus. + Scrape HAProxy metrics. + Fix scraping node exporter. * 0.52.1 Fix beamlet source and destination addresses. Dropped landing page dependency on nomad-consul link. Always serve it. * 0.52.0 Remove ansible scripts, assume ICT-installed base system. diff --git a/docker/landing-page/conf.d/default.conf b/docker/landing-page/conf.d/default.conf index 6697cfebec80b628d1bd653bbb17490c559c4bc1..ce4741f2b0b6eed6ab9dcf9ffe15ed2716ea0772 100644 --- a/docker/landing-page/conf.d/default.conf +++ b/docker/landing-page/conf.d/default.conf @@ -25,6 +25,17 @@ server { proxy_pass http://grafana-backend; } + location /prometheus { + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Host $http_host; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_pass http://prometheus-backend; + } + location /jupyter { proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; diff --git a/docker/landing-page/html/index.html b/docker/landing-page/html/index.html index 543b28f00cbfcf205f261fdc0d064ac85f2b3039..e318150393d746285fdc5bf84e526957a9db09ef 100644 --- a/docker/landing-page/html/index.html +++ b/docker/landing-page/html/index.html @@ -6,8 +6,9 @@ <body> Welcome to this LOFAR2 station! The following web interfaces are available to you: <ul> - <li><a href="/grafana"><img src="/grafana/public/img/apple-touch-icon.png" height="12pt"/> Grafana</a> (monitoring)</li> - <li><a href="/jupyter"><img src="/jupyter/static/logo/logo.png" height="12pt"/> Jupyter Labs</a> (control)</li> + <li><a href="/grafana"><img src="/grafana/public/img/apple-touch-icon.png" height="12pt"/>Grafana</a> (monitoring)</li> + <li><a href="/jupyter"><img src="/jupyter/static/logo/logo.png" height="12pt"/>Jupyter Labs</a> (control)</li> + <li><a href="/prometheus"><img src="/prometheus/favicon.svg" height="12pt"/>Prometheus</a> (metrics)</li> <li><a href="/minio"><img src="/icon-minio.png" height="12pt"/>Minio</a> (object-storage)</li> <li><a href="/nomad"><img src="/icon-nomad.ico" height="12pt"/>Nomad</a> (workload / depoyment management)</li> <li><a href="/consul"><img src="/icon-consul.png" height="12pt"/>Consul</a> (service mesh / virtual networking)</li> diff --git a/infra/jobs/station/landing-page.levant.nomad b/infra/jobs/station/landing-page.levant.nomad index 49c524a826b8a23a6a503a433137396609b17884..ce3fb56e9fb0975c4962b9e34f0730defe48df75 100644 --- a/infra/jobs/station/landing-page.levant.nomad +++ b/infra/jobs/station/landing-page.levant.nomad @@ -28,6 +28,32 @@ job "landing-page" { } } + service { + name = "landing-page" + port = "http" + + check { + type = "http" + path = "/" + interval = "10s" + timeout = "20s" + } + } + + service { + tags = ["scrape"] + name = "haproxy" + port = "8405" + address_mode = "alloc" + + check { + type = "tcp" + interval = "10s" + timeout = "20s" + address_mode = "alloc" + } + } + task "nginx" { driver = "docker" @@ -49,6 +75,10 @@ job "landing-page" { template { destination = "conf.d/load-balancer.conf" data = <<EOF +upstream prometheus-backend { + server prometheus.service.consul:9090; +} + upstream grafana-backend { server grafana.service.consul:3000; } @@ -82,11 +112,18 @@ resolvers consul hold valid 5s nameserver consul 10.99.250.250:8600 +frontend prometheus + bind :8405 + mode http + http-request use-service prometheus-exporter if { path /metrics } + no log + frontend minio_console bind *:9001 bind :::9001 mode tcp default_backend b_minio_console + frontend minio bind *:9000 bind :::9000 diff --git a/infra/jobs/station/monitoring.levant.nomad b/infra/jobs/station/monitoring.levant.nomad index 75c89bc2722a0a93095b045bd5d11f7658ac09ec..3998047202411b1e7f43b47b25e228cc268e8afb 100644 --- a/infra/jobs/station/monitoring.levant.nomad +++ b/infra/jobs/station/monitoring.levant.nomad @@ -205,11 +205,14 @@ postgres.service.consul:5432:grafana:postgres:password port = "9090" address_mode = "alloc" + meta { + metrics_path = "/prometheus/metrics" + } check { type = "http" name = "prometheus_health" - path = "/-/healthy" + path = "/prometheus/-/healthy" interval = "20s" timeout = "30s" address_mode = "alloc" @@ -230,7 +233,8 @@ postgres.service.consul:5432:grafana:postgres:password args = [ "--config.file=/local/prometheus.yaml", "--web.enable-remote-write-receiver", - "--storage.tsdb.retention.time=10y" + "--storage.tsdb.retention.time=10y", + "--web.external-url=http://[[.station]]-lcu.lofar.eu/prometheus", ] } diff --git a/infra/jobs/station/network-monitoring.levant.nomad b/infra/jobs/station/network-monitoring.levant.nomad index 4f8ce4931e7dba39fc889c97bd3af2ef290c3b7a..5a9631cf7f4489fcaa1c2a8c58970da363c168e1 100644 --- a/infra/jobs/station/network-monitoring.levant.nomad +++ b/infra/jobs/station/network-monitoring.levant.nomad @@ -153,22 +153,22 @@ EOF services { name = "ipmi-exporter" - address = "[[ .station ]]-lcu.lofar.eu" + address = "10.99.250.250" port = 9290 tags = ["scrape"] check { - http = "http://[[ .station ]]-lcu.lofar.eu:9290/" + http = "http://10.99.250.250:9290/" interval = "10s" } } services { name = "node-exporter" - address = "[[ .station ]]-lcu.lofar.eu" + address = "10.99.250.250" port = 9100 tags = ["scrape"] check { - http = "http://[[ .station ]]-lcu.lofar.eu:9100/" + http = "http://10.99.250.250:9100/" interval = "10s" } } diff --git a/requirements.txt b/requirements.txt index d9ef5aae97dbe2ff992a1a9e2733f5502742f7ba..9d00be23a27ed6767e19c2f3e304e0ccd14b5feb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # order of appearance. Changing the order has an impact on the overall # integration process, which may cause wedges in the gate later. -lofar-sid >= 1.1.0 # Apache 2 +lofar-sid == 1.1.0 # Apache 2 lofar-lotus>=0.0.4 # Apache 2 PyTango>=10.0.0 # LGPL v3 numpy>=1.21.6 # BSD3 diff --git a/tangostationcontrol/VERSION b/tangostationcontrol/VERSION index 27d68a6aacb8ca9af0aa47553133a61824bd93e7..3f01561613eb05fe601dba9da13816b3d9fbfe39 100644 --- a/tangostationcontrol/VERSION +++ b/tangostationcontrol/VERSION @@ -1 +1 @@ -0.52.1 +0.52.2 diff --git a/tangostationcontrol/devices/base_device_classes/antennafield_device.py b/tangostationcontrol/devices/base_device_classes/antennafield_device.py index b1e1a28b7d629f3c5276cb81d3a595de66a7b524..44d65fd463e15da7ba1fab67920cce8f13330658 100644 --- a/tangostationcontrol/devices/base_device_classes/antennafield_device.py +++ b/tangostationcontrol/devices/base_device_classes/antennafield_device.py @@ -658,8 +658,7 @@ class AF(LOFARDevice): return len(self.Control_to_RECV_mapping) // 2 def read_Antenna_Names_R(self): - antenna_names = numpy.array(self.Antenna_Names) - antenna_names.resize(self.nr_antennas) + antenna_names = numpy.array(self.Antenna_Names)[: self.nr_antennas] return antenna_names @command(dtype_in=str, dtype_out=DevVarBooleanArray) @@ -748,8 +747,7 @@ class AF(LOFARDevice): ) def read_Antenna_Cables_R(self): - antenna_cables = numpy.array(self.Antenna_Cables) - antenna_cables.resize(self.nr_antennas) + antenna_cables = numpy.array(self.Antenna_Cables)[: self.nr_antennas] return antenna_cables def read_Antenna_Cables_Delay_R(self): @@ -782,18 +780,15 @@ class AF(LOFARDevice): ) def read_Antenna_Needs_Power_R(self): - antenna_needs_power = numpy.array(self.Antenna_Needs_Power) - antenna_needs_power.resize(self.nr_antennas) + antenna_needs_power = numpy.array(self.Antenna_Needs_Power)[: self.nr_antennas] return antenna_needs_power def read_Antenna_Use_R(self): - antenna_use = numpy.array(self.Antenna_Use) - antenna_use.resize(self.nr_antennas) + antenna_use = numpy.array(self.Antenna_Use)[: self.nr_antennas] return numpy.array([AntennaUse(x) for x in antenna_use], dtype=AntennaUse) def read_Antenna_Status_R(self): - antenna_status = numpy.array(self.Antenna_Status) - antenna_status.resize(self.nr_antennas) + antenna_status = numpy.array(self.Antenna_Status)[: self.nr_antennas] return numpy.array( [AntennaStatus(x) for x in antenna_status], dtype=AntennaStatus )