diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5d090b31f2f1d9dbbf27ade3fa7b10171ff959fa..b2d57046f025abe47abac39d8b5fdd486bebba03 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -49,6 +49,7 @@ render_levant: - logs - grafana-lofar - metrics + - monitoring - tracing script: - | @@ -155,6 +156,7 @@ deploy-levant: - logs - grafana-lofar - metrics + - monitoring - tracing environment: name: ${SERVER}.control.lofar diff --git a/infra/logs-central.levant.nomad b/infra/logs-central.levant.nomad index a3b2d5b88c4c46d04db7ab8507c4b7cf8db8d838..ed63563080dd625c28e413e8998b363e9b70d039 100644 --- a/infra/logs-central.levant.nomad +++ b/infra/logs-central.levant.nomad @@ -62,6 +62,7 @@ job "logs" { region: NL limits_config: + allow_structured_metadata: false ingestion_rate_mb: 100 ingestion_burst_size_mb: 1000 @@ -78,8 +79,7 @@ job "logs" { object_store: s3 schema: v12 store: tsdb - configs: - - from: "2025-01-09" + - from: "2025-01-11" index: period: 24h prefix: index_ diff --git a/infra/metrics-central.levant.nomad b/infra/metrics-central.levant.nomad index 8ce4869ea1372e9e0e6721d918895f591bafcd85..0a7230eda71d9ba3b62a824d529b7d8bc38ffd65 100644 --- a/infra/metrics-central.levant.nomad +++ b/infra/metrics-central.levant.nomad @@ -73,6 +73,28 @@ job "metrics" { - job_name: 'lta_ingest_transfer_service' static_configs: - targets: ['lexar003.control.lofar:8001'] + - job_name: 'blackbox' + params: + module: + - http_2xx + metrics_path: '/probe' + static_configs: + - targets: + - vault.lofar.net + - s3.lofar.net + - tmss.lofar.eu + - lta.lofar.eu + - git.astron.nl + {{range services}}{{if in .Tags "cert"}}{{ if .Name | regexMatch "(.+)-sidecar-proxy$" }}{{ else }} + - '{{.Name}}.lofar.net' + {{end}}{{end}}{{end}} + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: 'blackbox.service.consul:9115' - job_name: 'consul' metrics_path: '/v1/agent/metrics' params: @@ -80,6 +102,9 @@ job "metrics" { static_configs: - targets: ['consul.service.consul:8500'] - job_name: 'nomad' + metrics_path: '/v1/metrics' + params: + format: ['prometheus'] consul_sd_configs: - server: 'consul.service.consul:8500' services: ['nomad'] @@ -191,7 +216,6 @@ job "metrics" { skip_label_name_validation_header_enabled: true limits: - max_global_series_per_user: 2000000 max_label_value_length: 100000 max_label_name_length: 100000 ingestion_rate: 100000 diff --git a/infra/monitoring-central.levant.nomad b/infra/monitoring-central.levant.nomad new file mode 100644 index 0000000000000000000000000000000000000000..bd5d597e4b7b39ff11e4b68c71784c6bcfeca382 --- /dev/null +++ b/infra/monitoring-central.levant.nomad @@ -0,0 +1,37 @@ +job "monitoring" { + datacenters = ["nl-north"] + type = "service" + namespace = "observability" + + # blackbox monitors http/https/tcp endpoints + group "blackbox" { + count = 1 + + network { + port "http" { + to = 9115 + static = 9115 + } + } + + service { + tags = ["scrape"] + name = "blackbox" + task = "blackbox" + port = "http" + } + + task "blackbox-exporter" { + driver = "docker" + + config { + image = "quay.io/prometheus/blackbox-exporter:latest" + ports = ["http"] + } + resources { + cpu = 100 + memory = 100 + } + } + } +}