From 8e91d3a01f23e4000a147aa9307d3057ef24e933 Mon Sep 17 00:00:00 2001
From: Jan David Mol <mol@astron.nl>
Date: Thu, 15 May 2025 07:37:39 +0000
Subject: [PATCH] Resolve L2SS-2313 "Ping devices"

---
 .gitlab-ci.yml                                |  6 +-
 README.md                                     |  2 +
 infra/env/cs.yaml                             | 39 ++++++++
 infra/env/rs.yaml                             | 38 ++++++++
 .../station/network-monitoring.levant.nomad   | 94 +++++++++++++++++++
 infra/jobs/station/snmp-exporter.levant.nomad | 40 --------
 tangostationcontrol/VERSION                   |  2 +-
 7 files changed, 177 insertions(+), 44 deletions(-)
 create mode 100644 infra/jobs/station/network-monitoring.levant.nomad
 delete mode 100644 infra/jobs/station/snmp-exporter.levant.nomad

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 4d2630056..b1d4bb9a0 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -49,7 +49,7 @@ include:
     # only disregard them. As such, additional
     # rules are always needed, either to filter
     # further or to always accept.
-    - if: $CI_PIPELINE_SOURCE == "pipeline"
+    - if: $CI_PIPELINE_SOURCE == "pipeline" && $STATION != ""
       when: never
 
 # Prepare image to run ci on
@@ -85,7 +85,7 @@ run_shellcheck:
     LOFAR1_STATIONS: cs002 cs003 cs004 cs005 cs006 cs007 cs011 cs013 cs017 cs021 cs024 cs026 cs028 cs030 cs031 cs101 cs103 cs201 cs301 cs302 cs401 cs501 rs106 rs205 rs208 rs210 rs305 rs306 rs310 rs406 rs407 rs409 rs503 rs508 rs509
     LOFAR2_STATIONS: cs001 cs032 rs307
     DEV_STATIONS: dts-lab
-    COMPONENTS: mesh-gateway monitoring logging tango object-storage object-replication sdptr device-server dsconfig ec-sim jupyter snmp-exporter landing-page rpc-server
+    COMPONENTS: mesh-gateway monitoring logging tango object-storage object-replication sdptr device-server dsconfig ec-sim jupyter network-monitoring landing-page rpc-server
 
 # Generate the station-specific TangoDB configuration based on LOFAR1 information.
 # Run manually to bootstrap CDB/stations/$station.json for stations
@@ -666,7 +666,7 @@ deploy_auto:
   stage: deploy
   extends: .components
   rules:
-    - if: $CI_PIPELINE_SOURCE == "pipeline"
+    - if: $CI_PIPELINE_SOURCE == "pipeline" && $STATION != ""
   variables:
     PARENT_PIPELINE_ID: $CI_PIPELINE_ID
   trigger:
diff --git a/README.md b/README.md
index 871bfe24e..6edc2dcea 100644
--- a/README.md
+++ b/README.md
@@ -150,6 +150,8 @@ Next change the version in the following places:
    through [https://git.astron.nl/lofar2.0/tango/-/tags](Deploy Tags)
 
 # Release Notes
+* 0.51.4 Ping FPGAs and PIs as part of monitoring
+* 0.51.3 Mirror central calibration tables into Calibration device using a volume and sync nomad job.
 * 0.51.3 Mirror central calibration tables into Calibration device using a volume and sync nomad job.
 * 0.51.2 Allow automated deployment from a multi-project pipeline in Gitlab CI/CD
 * 0.51.1 Generate caltables from LOFAR1 data or by generating dummies
diff --git a/infra/env/cs.yaml b/infra/env/cs.yaml
index a7526f21b..61d819d5f 100644
--- a/infra/env/cs.yaml
+++ b/infra/env/cs.yaml
@@ -4,9 +4,48 @@ sdptr:
     lba:
       first_gn: 0
       fpgas: 16
+      ips:
+        - 10.99.0.1
+        - 10.99.0.2
+        - 10.99.0.3
+        - 10.99.0.4
+        - 10.99.1.1
+        - 10.99.1.2
+        - 10.99.1.3
+        - 10.99.1.4
+        - 10.99.2.1
+        - 10.99.2.2
+        - 10.99.2.3
+        - 10.99.2.4
+        - 10.99.3.1
+        - 10.99.3.2
+        - 10.99.3.3
+        - 10.99.3.4
     hba0:
       first_gn: 16
       fpgas: 4
+      ips:
+        - 10.99.4.1
+        - 10.99.4.2
+        - 10.99.4.3
+        - 10.99.4.4
     hba1:
       first_gn: 20
       fpgas: 4
+      ips:
+        - 10.99.5.1
+        - 10.99.5.2
+        - 10.99.5.3
+        - 10.99.5.4
+
+pi:
+  instances:
+    l0:
+      ip: 10.99.0.100
+    l1:
+      ip: 10.99.1.100
+    h0:
+      ip: 10.99.2.100
+    ccd:
+      ip: 10.99.250.90
+
diff --git a/infra/env/rs.yaml b/infra/env/rs.yaml
index 9458b8ee6..8329426f6 100644
--- a/infra/env/rs.yaml
+++ b/infra/env/rs.yaml
@@ -4,6 +4,44 @@ sdptr:
     lba:
       first_gn: 0
       fpgas: 16
+      ips:
+        - 10.99.0.1
+        - 10.99.0.2
+        - 10.99.0.3
+        - 10.99.0.4
+        - 10.99.1.1
+        - 10.99.1.2
+        - 10.99.1.3
+        - 10.99.1.4
+        - 10.99.2.1
+        - 10.99.2.2
+        - 10.99.2.3
+        - 10.99.2.4
+        - 10.99.3.1
+        - 10.99.3.2
+        - 10.99.3.3
+        - 10.99.3.4
     hba:
       first_gn: 16
       fpgas: 8
+      ips:
+        - 10.99.4.1
+        - 10.99.4.2
+        - 10.99.4.3
+        - 10.99.4.4
+        - 10.99.5.1
+        - 10.99.5.2
+        - 10.99.5.3
+        - 10.99.5.4
+
+pi:
+  instances:
+    l0:
+      ip: 10.99.0.100
+    l1:
+      ip: 10.99.1.100
+    h0:
+      ip: 10.99.2.100
+    ccd:
+      ip: 10.99.250.90
+
diff --git a/infra/jobs/station/network-monitoring.levant.nomad b/infra/jobs/station/network-monitoring.levant.nomad
new file mode 100644
index 000000000..b0ef5555c
--- /dev/null
+++ b/infra/jobs/station/network-monitoring.levant.nomad
@@ -0,0 +1,94 @@
+job "network-monitoring" {
+  region      = "[[.region]]"
+  datacenters = ["stat"]
+  type        = "service"
+
+  group "snmp-exporter" {
+    count = 1
+
+    network {
+      mode = "cni/station"
+    }
+
+    service {
+      tags = ["scrape"]
+      name = "snmp-exporter"
+      task = "snmp-exporter"
+      port = "9116"
+      address_mode = "alloc"
+
+      meta {
+        metrics_address = "snmp-exporter.service.consul:9116"
+        metrics_path = "/metrics"
+      }
+    }
+
+    task "snmp-exporter" {
+      driver = "docker"
+
+      config {
+        image   = "[[.registry.astron.url]]/snmp-exporter:[[.image_tag]]"
+        ports   = ["http"]
+        args    = ["--config.file=/etc/snmp_exporter/snmp.yml", "--snmp.module-concurrency=3", "--snmp.wrap-large-counters"]
+      }
+      resources {
+        cpu    = 100
+        memory = 100
+      }
+    }
+  }
+
+  group "ping-exporter" {
+    count = 1
+
+    network {
+      mode = "cni/station"
+    }
+
+    service {
+      tags = ["scrape"]
+      name = "ping-exporter"
+      port = "9427"
+      address_mode = "alloc"
+    }
+
+    task "ping-exporter" {
+      driver = "docker"
+
+      config {
+        image   = "czerwonk/ping_exporter:latest"
+        cap_add = ["net_raw"]
+        args    = ["./ping_exporter", "--config.path=/local/config.yml"]
+      }
+      resources {
+        cpu    = 100
+        memory = 100
+      }
+      template {
+        destination     = "local/config.yml"
+        data = <<EOF
+targets:
+    [[ range $name, $sdp := $.sdptr.instances ]]
+    [[ range $ip := $sdp.ips ]]
+    - "[[ $ip ]]":
+        type: "sdp-fpga"
+        name: "[[ $name ]]"
+    [[ end ]]
+    [[ end ]]
+
+    [[ range $name, $pi := $.pi.instances ]]
+    - "[[ $pi.ip ]]":
+        type: "pi"
+        name: "[[ $name ]]"
+    [[ end ]]
+
+    {{range services}}{{ if and (in .Tags "snmp-scrape") (.Name | contains "sidecar" | not) }}
+    - "{{ .Address }}":
+      type: "snmp"
+      name: "{{ .Name }}"
+    {{ end }}{{ end }}
+EOF
+      }
+    }
+  }
+}
diff --git a/infra/jobs/station/snmp-exporter.levant.nomad b/infra/jobs/station/snmp-exporter.levant.nomad
deleted file mode 100644
index ab6588023..000000000
--- a/infra/jobs/station/snmp-exporter.levant.nomad
+++ /dev/null
@@ -1,40 +0,0 @@
-job "snmp-exporter" {
-  region      = "[[.region]]"
-  datacenters = ["stat"]
-  type        = "service"
-
-  group "snmp-exporter" {
-    count = 1
-
-    network {
-      mode = "cni/station"
-    }
-
-    service {
-      tags = ["scrape"]
-      name = "snmp-exporter"
-      task = "snmp-exporter"
-      port = "9116"
-      address_mode = "alloc"
-
-      meta {
-        metrics_address = "snmp-exporter.service.consul:9116"
-        metrics_path = "/metrics"
-      }
-    }
-
-    task "snmp-exporter" {
-      driver = "docker"
-
-      config {
-        image   = "[[.registry.astron.url]]/snmp-exporter:[[.image_tag]]"
-        ports   = ["http"]
-        args    = ["--config.file=/etc/snmp_exporter/snmp.yml", "--snmp.module-concurrency=3", "--snmp.wrap-large-counters"]
-      }
-      resources {
-        cpu    = 100
-        memory = 100
-      }
-    }
-  }
-}
diff --git a/tangostationcontrol/VERSION b/tangostationcontrol/VERSION
index cf2529d2b..0c2d2af31 100644
--- a/tangostationcontrol/VERSION
+++ b/tangostationcontrol/VERSION
@@ -1 +1 @@
-0.51.3
+0.51.4
-- 
GitLab