Skip to content
Snippets Groups Projects
Commit 26de9962 authored by Hannes Feldt's avatar Hannes Feldt
Browse files

fix

parent 2927b09b
No related branches found
No related tags found
4 merge requests!752Resolve L2SS-1525 "Migrate minio",!751Resolve L2SS-1340 "Deploy monitoring to nomad",!749Resolve L2SS-1342 "Migrate jupyter",!685Resolve L2SS-1340 "Deploy monitoring to nomad"
...@@ -9,6 +9,18 @@ plugin "docker" { ...@@ -9,6 +9,18 @@ plugin "docker" {
} }
client { client {
host_volume "monitoring-postgresql-data" {
path = "/localdata/volumes/monitoring-postgresql-data"
}
host_volume "monitoring-loki-data" {
path = "/localdata/volumes/monitoring-loki-data"
}
host_volume "monitoring-prometheus-data" {
path = "/localdata/volumes/monitoring-prometheus-data"
}
host_volume "tango-database" { host_volume "tango-database" {
path = "/localdata/volumes/tango-database" path = "/localdata/volumes/tango-database"
} }
......
...@@ -118,3 +118,14 @@ resource "nomad_job" "tango" { ...@@ -118,3 +118,14 @@ resource "nomad_job" "tango" {
jobs = ["tango"] jobs = ["tango"]
} }
} }
resource "nomad_job" "monitoring" {
cluster = resource.nomad_cluster.station
paths = ["./jobs/station/monitoring.nomad", "./jobs/station/logging.nomad"]
health_check {
timeout = "300s"
jobs = ["monitoring"]
}
}
...@@ -18,6 +18,7 @@ job "log-scraping" { ...@@ -18,6 +18,7 @@ job "log-scraping" {
mode = "fail" mode = "fail"
} }
network { network {
mode = "bridge"
port "api" { port "api" {
to = 8686 to = 8686
} }
...@@ -25,7 +26,7 @@ job "log-scraping" { ...@@ -25,7 +26,7 @@ job "log-scraping" {
volume "docker-sock" { volume "docker-sock" {
type = "host" type = "host"
source = "docker-sock" source = "docker-sock-ro"
read_only = true read_only = true
} }
...@@ -34,6 +35,23 @@ job "log-scraping" { ...@@ -34,6 +35,23 @@ job "log-scraping" {
sticky = true sticky = true
} }
service {
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "prometheus"
local_bind_port = 9090
}
upstreams {
destination_name = "loki"
local_bind_port = 3100
}
}
}
}
}
task "vector" { task "vector" {
driver = "docker" driver = "docker"
config { config {
...@@ -61,36 +79,64 @@ job "log-scraping" { ...@@ -61,36 +79,64 @@ job "log-scraping" {
destination = "local/vector.toml" destination = "local/vector.toml"
change_mode = "signal" change_mode = "signal"
change_signal = "SIGHUP" change_signal = "SIGHUP"
left_delimiter = "##" left_delimiter = "(("
right_delimiter = "##" right_delimiter = "))"
data = <<EOH data = <<EOF
data_dir = "alloc/data/vector/" data_dir = "alloc/data/vector/"
healthchecks.require_healthy = true
[api] [api]
enabled = true enabled = true
address = "0.0.0.0:8686" address = "0.0.0.0:8686"
playground = true playground = false
[sources.logs]
[sources.docker-local]
type = "docker_logs" type = "docker_logs"
[sinks.out] docker_host = "/var/run/docker.sock"
type = "console" exclude_containers = ["vector-"]
inputs = [ "logs" ] include_containers = ["grafana-", "loki-"]
encoding.codec = "json" [transforms.nomad-flags]
inputs = ["docker-local"]
type = "remap"
source = '''
structured =
parse_syslog(.message) ??
parse_json(.message) ??
parse_common_log(.message) ??
parse_key_value!(.message)
. = merge!(., structured)
.nomad.job = .label."com.hashicorp.nomad.job_name"
.nomad.task = .label."com.hashicorp.nomad.task_name"
.nomad.group = .label."com.hashicorp.nomad.task_group_name"
.nomad.namespace = .label."com.hashicorp.nomad.namespace"
.nomad.node = .label."com.hashicorp.nomad.node_name"
.nomad.job_id = .label."com.hashicorp.nomad.job_id"
.nomad.node_id = .label."com.hashicorp.nomad.node_id"
'''
[sinks.loki] [sinks.loki]
type = "loki" type = "loki"
inputs = ["logs"] inputs = [ "nomad-flags" ]
endpoint = "http://## range service "loki" #### .Address ##:## .Port #### end ##" endpoint = "http://localhost:3100"
encoding.codec = "json" encoding.codec = "json"
healthcheck.enabled = true healthcheck.enabled = true
# since . is used by Vector to denote a parent-child relationship, and Nomad's Docker labels contain ".",
# we need to escape them twice, once for TOML, once for Vector
labels.job = "{{ label.com\\.hashicorp\\.nomad\\.job_name }}"
labels.task = "{{ label.com\\.hashicorp\\.nomad\\.task_name }}"
labels.group = "{{ label.com\\.hashicorp\\.nomad\\.task_group_name }}"
labels.namespace = "{{ label.com\\.hashicorp\\.nomad\\.namespace }}"
labels.node = "{{ label.com\\.hashicorp\\.nomad\\.node_name }}"
# remove fields that have been converted to labels to avoid having the field twice
remove_label_fields = true remove_label_fields = true
EOH [sinks.loki.labels]
"nomad_*" = '{{ nomad }}'
[sources.host_metrics]
type = "host_metrics"
scrape_interval_secs = 10
[sources.nomad_metrics]
type = "prometheus_scrape"
scrape_interval_secs = 10
endpoints = [ "http://(( env "attr.unique.network.ip-address" )):4646/v1/metrics?format=prometheus" ]
[sinks.prometheus_remote_write]
type = "prometheus_remote_write"
inputs = [ "host_metrics", "nomad_metrics" ]
endpoint = "http://localhost:9090/api/v1/write"
healthcheck.enabled = false
EOF
} }
service { service {
check { check {
......
job "monitoring" {
datacenters = ["stat"]
type = "service"
group "grafana" {
network {
mode = "bridge"
port "http" {
# should be migrated to port 3000 when fully replaces docker-compose setup
static = 3001
to = 3000
}
}
service {
tags = ["haproxy", "scrape"]
name = "grafana"
port = "http"
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "postgres"
local_bind_port = 5432
}
upstreams {
destination_name = "prometheus"
local_bind_port = 9090
}
upstreams {
destination_name = "loki"
local_bind_port = 3100
}
}
}
}
}
task "grafana" {
driver = "docker"
config {
image = "git.astron.nl:5000/lofar2.0/tango/grafana:[[.image_tag]]"
ports = ["http"]
mount {
type = "bind"
source = "local/datasource-prometheus.yaml"
target = "/etc/grafana/provisioning/datasources/prometheus.yaml"
}
mount {
type = "bind"
source = "local/datasource-loki.yaml"
target = "/etc/grafana/provisioning/datasources/loki.yaml"
}
}
env {
GF_SERVER_DOMAIN = "[[.station]]c.control.lofar"
GF_DATABASE_TYPE = "postgres"
GF_DATABASE_HOST = "localhost:5432"
GF_DATABASE_NAME = "grafana"
GF_DATABASE_USER = "postgres"
GF_DATABASE_PASSWORD = "password"
}
template {
data = <<EOH
datasources:
- name: Prometheus
type: prometheus
access: proxy
orgId: 1
uid: prometheus
url: http://localhost:9090
EOH
destination = "local/datasource-prometheus.yaml"
}
template {
data = <<EOH
apiVersion: 1
datasources:
- name: Loki
type: loki
access: proxy
url: http://localhost:3100
jsonData:
esVersion: 7.10.0
includeFrozen: false
logLevelField:
logMessageField:
maxConcurrentShardRequests: 5
timeField: "@timestamp"
EOH
destination = "local/datasource-loki.yaml"
}
resources {
cpu = 250
memory = 256
}
}
}
group "postgres" {
count = 1
network {
mode = "bridge"
port "postgres" {
to = 5432
}
}
volume "postgresql" {
type = "host"
read_only = false
source = "monitoring-postgresql-data"
}
service {
name = "postgres"
port = "postgres"
task = "postgres"
address_mode = "alloc"
connect {
sidecar_service {}
}
}
task "postgres" {
driver = "docker"
volume_mount {
volume = "postgresql"
destination = "/var/lib/postgresql/data"
read_only = false
}
config {
image = "postgres:[[.monitoring.db.version]]"
ports = ["postgres"]
}
env {
POSTGRES_DB = "grafana"
POSTGRES_USER = "postgres"
POSTGRES_PASSWORD = "password"
}
resources {
cpu = 250
memory = 512
}
}
}
group "prometheus" {
network {
mode = "bridge"
port "prometheus" {
to = 9090
# should be activated when fully replaces docker-compose setup
#static = 9090
}
}
volume "prometheus" {
type = "host"
read_only = false
source = "monitoring-prometheus-data"
}
service {
tags = ["haproxy", "scrape"]
name = "prometheus"
port = "prometheus"
address_mode = "alloc"
connect {
sidecar_service {}
}
check {
type = "http"
name = "prometheus_health"
port = "prometheus"
path = "/-/healthy"
interval = "20s"
timeout = "30s"
}
}
task "prometheus" {
driver = "docker"
volume_mount {
volume = "prometheus"
destination = "/prometheus"
read_only = false
}
config {
image = "git.astron.nl:5000/lofar2.0/tango/prometheus:[[.image_tag]]"
ports = ["prometheus"]
mount {
type = "bind"
source = "local/prometheus.yaml"
target = "/etc/prometheus/prometheus.yml"
}
}
template {
data = <<EOH
global:
evaluation_interval: 10s
scrape_interval: 10s
scrape_timeout: 10s
scrape_configs:
- job_name: tango
static_configs:
- targets: ["tango-prometheus-exporter:8000"]
labels:
"host": "localhost"
- job_name: tango-fast
scrape_interval: 1s
static_configs:
- targets: ["tango-prometheus-fast-exporter:8000"]
labels:
"host": "localhost"
- job_name: host
scrape_interval: 60s
static_configs:
- targets: ["host.docker.internal:9100"]
labels:
"host": "localhost"
- job_name: logstash
static_configs:
- targets: ["logstash-exporter:9198"]
labels:
"host": "localhost"
- job_name: 'consul-server'
metrics_path: '/v1/agent/metrics'
params:
format: ['prometheus']
static_configs:
- targets: ['{{ with node }}{{ .Node.Address }}:8500{{ end }}']
- job_name: 'nomad_metrics'
consul_sd_configs:
- server: '{{ with node }}{{ .Node.Address }}:8500{{ end }}'
services: ['nomad-client', 'nomad']
relabel_configs:
- source_labels: ['__meta_consul_tags']
regex: '(.*)http(.*)'
action: keep
scrape_interval: 5s
metrics_path: /v1/metrics
params:
format: ['prometheus']
{{range services}}{{if in .Tags "scrape"}}{{ if .Name | regexMatch "(.+)-sidecar-proxy$" }}{{ else }}
- job_name: {{.Name}}
consul_sd_configs:
- server: '{{ with node }}{{ .Node.Address }}:8500{{ end }}'
services:
- '{{.Name}}'
{{end}}{{end}}{{end}}
EOH
destination = "local/prometheus.yaml"
}
resources {
cpu = 250
memory = 768
}
}
}
group "loki" {
network {
mode = "bridge"
port "loki" {
to = 3100
# should be activated when fully replaces docker-compose setup
#static = 3100
}
}
volume "loki" {
type = "host"
read_only = false
source = "monitoring-loki-data"
}
service {
tags = ["haproxy", "scrape"]
name = "loki"
port = "loki"
address_mode = "alloc"
connect {
sidecar_service {}
}
}
task "loki" {
driver = "docker"
volume_mount {
volume = "loki"
destination = "/loki"
read_only = false
}
config {
image = "git.astron.nl:5000/lofar2.0/tango/loki:[[.image_tag]]"
ports = ["prometheus"]
}
resources {
cpu = 250
memory = 768
}
}
}
}
job "monitoring" {
datacenters = ["stat"]
type = "service"
group "grafana" {
network {
mode = "bridge"
port "http" {
# should be migrated to port 3000 when fully replaces docker-compose setup
static = 3001
to = 3000
}
}
service {
tags = ["haproxy", "scrape"]
name = "grafana"
port = "http"
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "postgres"
local_bind_port = 5432
}
upstreams {
destination_name = "prometheus"
local_bind_port = 9090
}
upstreams {
destination_name = "loki"
local_bind_port = 3100
}
}
}
}
}
task "grafana" {
driver = "docker"
config {
image = "git.astron.nl:5000/lofar2.0/tango/grafana:[[.image_tag]]"
ports = ["http"]
mount {
type = "bind"
source = "local/datasource-prometheus.yaml"
target = "/etc/grafana/provisioning/datasources/prometheus.yaml"
}
mount {
type = "bind"
source = "local/datasource-loki.yaml"
target = "/etc/grafana/provisioning/datasources/loki.yaml"
}
}
env {
GF_SERVER_DOMAIN = "[[.station]]c.control.lofar"
GF_DATABASE_TYPE = "postgres"
GF_DATABASE_HOST = "localhost:5432"
GF_DATABASE_NAME = "grafana"
GF_DATABASE_USER = "postgres"
GF_DATABASE_PASSWORD = "password"
}
template {
data = <<EOH
datasources:
- name: Prometheus
type: prometheus
access: proxy
orgId: 1
uid: prometheus
url: http://localhost:9090
EOH
destination = "local/datasource-prometheus.yaml"
}
template {
data = <<EOH
apiVersion: 1
datasources:
- name: Loki
type: loki
access: proxy
url: http://localhost:3100
jsonData:
esVersion: 7.10.0
includeFrozen: false
logLevelField:
logMessageField:
maxConcurrentShardRequests: 5
timeField: "@timestamp"
EOH
destination = "local/datasource-loki.yaml"
}
resources {
cpu = 250
memory = 256
}
}
}
group "postgres" {
count = 1
network {
mode = "bridge"
port "postgres" {
to = 5432
}
}
volume "postgresql" {
type = "host"
read_only = false
source = "monitoring-postgresql-data"
}
service {
name = "postgres"
port = "postgres"
task = "postgres"
address_mode = "alloc"
connect {
sidecar_service {}
}
}
task "postgres" {
driver = "docker"
volume_mount {
volume = "postgresql"
destination = "/var/lib/postgresql/data"
read_only = false
}
config {
image = "postgres:[[.monitoring.db.version]]"
ports = ["postgres"]
}
env {
POSTGRES_DB = "grafana"
POSTGRES_USER = "postgres"
POSTGRES_PASSWORD = "password"
}
resources {
cpu = 250
memory = 512
}
}
}
group "prometheus" {
network {
mode = "bridge"
port "prometheus" {
to = 9090
# should be activated when fully replaces docker-compose setup
#static = 9090
}
}
volume "prometheus" {
type = "host"
read_only = false
source = "monitoring-prometheus-data"
}
service {
tags = ["haproxy", "scrape"]
name = "prometheus"
port = "prometheus"
address_mode = "alloc"
connect {
sidecar_service {}
}
check {
type = "http"
name = "prometheus_health"
port = "prometheus"
path = "/-/healthy"
interval = "20s"
timeout = "30s"
}
}
task "prometheus" {
driver = "docker"
volume_mount {
volume = "prometheus"
destination = "/prometheus"
read_only = false
}
config {
image = "git.astron.nl:5000/lofar2.0/tango/prometheus:[[.image_tag]]"
ports = ["prometheus"]
args = [
"--config.file=/etc/prometheus/prometheus.yml",
"--web.enable-remote-write-receiver"
]
mount {
type = "bind"
source = "local/prometheus.yaml"
target = "/etc/prometheus/prometheus.yml"
}
}
template {
data = <<EOH
global:
evaluation_interval: 10s
scrape_interval: 10s
scrape_timeout: 10s
scrape_configs:
- job_name: tango
static_configs:
- targets: ["tango-prometheus-exporter:8000"]
labels:
"host": "localhost"
- job_name: tango-fast
scrape_interval: 1s
static_configs:
- targets: ["tango-prometheus-fast-exporter:8000"]
labels:
"host": "localhost"
- job_name: host
scrape_interval: 60s
static_configs:
- targets: ["host.docker.internal:9100"]
labels:
"host": "localhost"
- job_name: logstash
static_configs:
- targets: ["logstash-exporter:9198"]
labels:
"host": "localhost"
- job_name: 'consul-server'
metrics_path: '/v1/agent/metrics'
params:
format: ['prometheus']
static_configs:
- targets: ['{{ with node }}{{ .Node.Address }}:8500{{ end }}']
- job_name: 'nomad_metrics'
consul_sd_configs:
- server: '{{ with node }}{{ .Node.Address }}:8500{{ end }}'
services: ['nomad-client', 'nomad']
relabel_configs:
- source_labels: ['__meta_consul_tags']
regex: '(.*)http(.*)'
action: keep
scrape_interval: 5s
metrics_path: /v1/metrics
params:
format: ['prometheus']
{{range services}}{{if in .Tags "scrape"}}{{ if .Name | regexMatch "(.+)-sidecar-proxy$" }}{{ else }}
- job_name: {{.Name}}
consul_sd_configs:
- server: '{{ with node }}{{ .Node.Address }}:8500{{ end }}'
services:
- '{{.Name}}'
{{end}}{{end}}{{end}}
EOH
destination = "local/prometheus.yaml"
}
resources {
cpu = 250
memory = 768
}
}
}
group "loki" {
network {
mode = "bridge"
port "loki" {
to = 3100
# should be activated when fully replaces docker-compose setup
#static = 3100
}
}
volume "loki" {
type = "host"
read_only = false
source = "monitoring-loki-data"
}
service {
tags = ["haproxy", "scrape"]
name = "loki"
port = "loki"
address_mode = "alloc"
connect {
sidecar_service {}
}
}
task "loki" {
driver = "docker"
volume_mount {
volume = "loki"
destination = "/loki"
read_only = false
}
config {
image = "git.astron.nl:5000/lofar2.0/tango/loki:[[.image_tag]]"
ports = ["prometheus"]
}
resources {
cpu = 250
memory = 768
}
}
}
}
...@@ -25,6 +25,12 @@ fi ...@@ -25,6 +25,12 @@ fi
if [ "$(docker volume list | grep -c dev_nomad_station)" = "0" ]; then if [ "$(docker volume list | grep -c dev_nomad_station)" = "0" ]; then
docker volume create "dev_nomad_station" docker volume create "dev_nomad_station"
docker pull -q bash
docker run -i --rm -v dev_nomad_station:/mnt bash bash -c 'mkdir -p /mnt/volumes/tango-database'
fi fi
docker pull -q bash
echo | docker run --rm -i -v dev_nomad_station:/mnt bash bash <<- EOM
mkdir -p /mnt/volumes/tango-database
mkdir -p /mnt/volumes/monitoring-postgresql-data
mkdir -p /mnt/volumes/monitoring-loki-data
mkdir -p /mnt/volumes/monitoring-prometheus-data
EOM
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment