From 8e91d3a01f23e4000a147aa9307d3057ef24e933 Mon Sep 17 00:00:00 2001 From: Jan David Mol <mol@astron.nl> Date: Thu, 15 May 2025 07:37:39 +0000 Subject: [PATCH] Resolve L2SS-2313 "Ping devices" --- .gitlab-ci.yml | 6 +- README.md | 2 + infra/env/cs.yaml | 39 ++++++++ infra/env/rs.yaml | 38 ++++++++ .../station/network-monitoring.levant.nomad | 94 +++++++++++++++++++ infra/jobs/station/snmp-exporter.levant.nomad | 40 -------- tangostationcontrol/VERSION | 2 +- 7 files changed, 177 insertions(+), 44 deletions(-) create mode 100644 infra/jobs/station/network-monitoring.levant.nomad delete mode 100644 infra/jobs/station/snmp-exporter.levant.nomad diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4d2630056..b1d4bb9a0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -49,7 +49,7 @@ include: # only disregard them. As such, additional # rules are always needed, either to filter # further or to always accept. - - if: $CI_PIPELINE_SOURCE == "pipeline" + - if: $CI_PIPELINE_SOURCE == "pipeline" && $STATION != "" when: never # Prepare image to run ci on @@ -85,7 +85,7 @@ run_shellcheck: LOFAR1_STATIONS: cs002 cs003 cs004 cs005 cs006 cs007 cs011 cs013 cs017 cs021 cs024 cs026 cs028 cs030 cs031 cs101 cs103 cs201 cs301 cs302 cs401 cs501 rs106 rs205 rs208 rs210 rs305 rs306 rs310 rs406 rs407 rs409 rs503 rs508 rs509 LOFAR2_STATIONS: cs001 cs032 rs307 DEV_STATIONS: dts-lab - COMPONENTS: mesh-gateway monitoring logging tango object-storage object-replication sdptr device-server dsconfig ec-sim jupyter snmp-exporter landing-page rpc-server + COMPONENTS: mesh-gateway monitoring logging tango object-storage object-replication sdptr device-server dsconfig ec-sim jupyter network-monitoring landing-page rpc-server # Generate the station-specific TangoDB configuration based on LOFAR1 information. # Run manually to bootstrap CDB/stations/$station.json for stations @@ -666,7 +666,7 @@ deploy_auto: stage: deploy extends: .components rules: - - if: $CI_PIPELINE_SOURCE == "pipeline" + - if: $CI_PIPELINE_SOURCE == "pipeline" && $STATION != "" variables: PARENT_PIPELINE_ID: $CI_PIPELINE_ID trigger: diff --git a/README.md b/README.md index 871bfe24e..6edc2dcea 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,8 @@ Next change the version in the following places: through [https://git.astron.nl/lofar2.0/tango/-/tags](Deploy Tags) # Release Notes +* 0.51.4 Ping FPGAs and PIs as part of monitoring +* 0.51.3 Mirror central calibration tables into Calibration device using a volume and sync nomad job. * 0.51.3 Mirror central calibration tables into Calibration device using a volume and sync nomad job. * 0.51.2 Allow automated deployment from a multi-project pipeline in Gitlab CI/CD * 0.51.1 Generate caltables from LOFAR1 data or by generating dummies diff --git a/infra/env/cs.yaml b/infra/env/cs.yaml index a7526f21b..61d819d5f 100644 --- a/infra/env/cs.yaml +++ b/infra/env/cs.yaml @@ -4,9 +4,48 @@ sdptr: lba: first_gn: 0 fpgas: 16 + ips: + - 10.99.0.1 + - 10.99.0.2 + - 10.99.0.3 + - 10.99.0.4 + - 10.99.1.1 + - 10.99.1.2 + - 10.99.1.3 + - 10.99.1.4 + - 10.99.2.1 + - 10.99.2.2 + - 10.99.2.3 + - 10.99.2.4 + - 10.99.3.1 + - 10.99.3.2 + - 10.99.3.3 + - 10.99.3.4 hba0: first_gn: 16 fpgas: 4 + ips: + - 10.99.4.1 + - 10.99.4.2 + - 10.99.4.3 + - 10.99.4.4 hba1: first_gn: 20 fpgas: 4 + ips: + - 10.99.5.1 + - 10.99.5.2 + - 10.99.5.3 + - 10.99.5.4 + +pi: + instances: + l0: + ip: 10.99.0.100 + l1: + ip: 10.99.1.100 + h0: + ip: 10.99.2.100 + ccd: + ip: 10.99.250.90 + diff --git a/infra/env/rs.yaml b/infra/env/rs.yaml index 9458b8ee6..8329426f6 100644 --- a/infra/env/rs.yaml +++ b/infra/env/rs.yaml @@ -4,6 +4,44 @@ sdptr: lba: first_gn: 0 fpgas: 16 + ips: + - 10.99.0.1 + - 10.99.0.2 + - 10.99.0.3 + - 10.99.0.4 + - 10.99.1.1 + - 10.99.1.2 + - 10.99.1.3 + - 10.99.1.4 + - 10.99.2.1 + - 10.99.2.2 + - 10.99.2.3 + - 10.99.2.4 + - 10.99.3.1 + - 10.99.3.2 + - 10.99.3.3 + - 10.99.3.4 hba: first_gn: 16 fpgas: 8 + ips: + - 10.99.4.1 + - 10.99.4.2 + - 10.99.4.3 + - 10.99.4.4 + - 10.99.5.1 + - 10.99.5.2 + - 10.99.5.3 + - 10.99.5.4 + +pi: + instances: + l0: + ip: 10.99.0.100 + l1: + ip: 10.99.1.100 + h0: + ip: 10.99.2.100 + ccd: + ip: 10.99.250.90 + diff --git a/infra/jobs/station/network-monitoring.levant.nomad b/infra/jobs/station/network-monitoring.levant.nomad new file mode 100644 index 000000000..b0ef5555c --- /dev/null +++ b/infra/jobs/station/network-monitoring.levant.nomad @@ -0,0 +1,94 @@ +job "network-monitoring" { + region = "[[.region]]" + datacenters = ["stat"] + type = "service" + + group "snmp-exporter" { + count = 1 + + network { + mode = "cni/station" + } + + service { + tags = ["scrape"] + name = "snmp-exporter" + task = "snmp-exporter" + port = "9116" + address_mode = "alloc" + + meta { + metrics_address = "snmp-exporter.service.consul:9116" + metrics_path = "/metrics" + } + } + + task "snmp-exporter" { + driver = "docker" + + config { + image = "[[.registry.astron.url]]/snmp-exporter:[[.image_tag]]" + ports = ["http"] + args = ["--config.file=/etc/snmp_exporter/snmp.yml", "--snmp.module-concurrency=3", "--snmp.wrap-large-counters"] + } + resources { + cpu = 100 + memory = 100 + } + } + } + + group "ping-exporter" { + count = 1 + + network { + mode = "cni/station" + } + + service { + tags = ["scrape"] + name = "ping-exporter" + port = "9427" + address_mode = "alloc" + } + + task "ping-exporter" { + driver = "docker" + + config { + image = "czerwonk/ping_exporter:latest" + cap_add = ["net_raw"] + args = ["./ping_exporter", "--config.path=/local/config.yml"] + } + resources { + cpu = 100 + memory = 100 + } + template { + destination = "local/config.yml" + data = <<EOF +targets: + [[ range $name, $sdp := $.sdptr.instances ]] + [[ range $ip := $sdp.ips ]] + - "[[ $ip ]]": + type: "sdp-fpga" + name: "[[ $name ]]" + [[ end ]] + [[ end ]] + + [[ range $name, $pi := $.pi.instances ]] + - "[[ $pi.ip ]]": + type: "pi" + name: "[[ $name ]]" + [[ end ]] + + {{range services}}{{ if and (in .Tags "snmp-scrape") (.Name | contains "sidecar" | not) }} + - "{{ .Address }}": + type: "snmp" + name: "{{ .Name }}" + {{ end }}{{ end }} +EOF + } + } + } +} diff --git a/infra/jobs/station/snmp-exporter.levant.nomad b/infra/jobs/station/snmp-exporter.levant.nomad deleted file mode 100644 index ab6588023..000000000 --- a/infra/jobs/station/snmp-exporter.levant.nomad +++ /dev/null @@ -1,40 +0,0 @@ -job "snmp-exporter" { - region = "[[.region]]" - datacenters = ["stat"] - type = "service" - - group "snmp-exporter" { - count = 1 - - network { - mode = "cni/station" - } - - service { - tags = ["scrape"] - name = "snmp-exporter" - task = "snmp-exporter" - port = "9116" - address_mode = "alloc" - - meta { - metrics_address = "snmp-exporter.service.consul:9116" - metrics_path = "/metrics" - } - } - - task "snmp-exporter" { - driver = "docker" - - config { - image = "[[.registry.astron.url]]/snmp-exporter:[[.image_tag]]" - ports = ["http"] - args = ["--config.file=/etc/snmp_exporter/snmp.yml", "--snmp.module-concurrency=3", "--snmp.wrap-large-counters"] - } - resources { - cpu = 100 - memory = 100 - } - } - } -} diff --git a/tangostationcontrol/VERSION b/tangostationcontrol/VERSION index cf2529d2b..0c2d2af31 100644 --- a/tangostationcontrol/VERSION +++ b/tangostationcontrol/VERSION @@ -1 +1 @@ -0.51.3 +0.51.4 -- GitLab