diff --git a/.gitignore b/.gitignore index 00941bb5af067040269d23ab91d781bfb6cd5bc7..cfd4dc461a50e0a01b60ca0f88152e9ca9a2d787 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ tangostationcontrol/docs/build **/pending_log_messages.db **/.eggs +docker-compose/alerta-web/alerta-secrets.json diff --git a/CDB/stations/DTS_ConfigDb.json b/CDB/stations/DTS_ConfigDb.json index 7017e28e75f1883d61c89038dbb6f95892995b90..398ef7d63577ce62f61c2374b9335a905ebce566 100644 --- a/CDB/stations/DTS_ConfigDb.json +++ b/CDB/stations/DTS_ConfigDb.json @@ -357,6 +357,24 @@ "902", "902", "902" + ], + "TR_fpga_mask_RW_default": [ + "True", + "True", + "True", + "True", + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "False", + "False" ] } } diff --git a/docker-compose/alerta-web/Dockerfile b/docker-compose/alerta-web/Dockerfile index 8a1845a7a3b9aed0a20cf30505be1cd3218bf729..80431da39da9ddb7ff0c28997660163234eb6d57 100644 --- a/docker-compose/alerta-web/Dockerfile +++ b/docker-compose/alerta-web/Dockerfile @@ -1,3 +1,14 @@ FROM alerta/alerta-web -RUN pip install git+https://github.com/alerta/alerta-contrib.git#subdirectory=plugins/slack +RUN bash -c 'source /venv/bin/activate; pip install git+https://github.com/alerta/alerta-contrib.git#subdirectory=plugins/slack' +RUN bash -c 'source /venv/bin/activate; pip install git+https://github.com/alerta/alerta-contrib.git#subdirectory=plugins/jira' + +COPY grafana-plugin /tmp/grafana-plugin +RUN bash -c 'source /venv/bin/activate; pip install /tmp/grafana-plugin' + +COPY lofar-plugin /tmp/lofar-plugin +RUN bash -c 'source /venv/bin/activate; pip install /tmp/lofar-plugin' + +COPY alertad.conf /app/alertad.conf +COPY alerta.conf /app/alerta.conf +COPY config.json /web/config.json diff --git a/docker-compose/alerta-web/README.md b/docker-compose/alerta-web/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8900026226cb6e3ee9c987792f24b44d8beff374 --- /dev/null +++ b/docker-compose/alerta-web/README.md @@ -0,0 +1,23 @@ +You need: + +* Your own Slack App: + * Give it channel write rights + * Get the OAuth token + * Install it in your slack + * Invite the app into your channel + * Feed the OAuth token to the config + * Add it to alerta-secrets.json +* Grafana: + * By default, grafana resends alarms every 4h, configure this in the notification settings to faster resend deleted alarms for testing + * Add alerts by hand + * add "Summary" as alert text + * add label "severity": "major"/"minor"/etc (see https://docs.alerta.io/webui/configuration.html#severity-colors) + +* Create alerta-secrets.json in this directory: + +Example alerta-secrets.json: + +{ + "SLACK_TOKEN": "xoxb-...", + "SLACK_CHANNEL": "#lofar20-alerta" +} diff --git a/docker-compose/alerta-web/alerta-secrets.json b/docker-compose/alerta-web/alerta-secrets.json new file mode 100644 index 0000000000000000000000000000000000000000..8fb44d7b830c3090408fb3bd576fa297e0e2dcc9 --- /dev/null +++ b/docker-compose/alerta-web/alerta-secrets.json @@ -0,0 +1,4 @@ +{ + "SLACK_TOKEN": "xoxb-get-this-from-your-slack-app", + "SLACK_CHANNEL": "#your-channel" +} diff --git a/docker-compose/alerta-web/alerta.conf b/docker-compose/alerta-web/alerta.conf new file mode 100644 index 0000000000000000000000000000000000000000..64c8ec7019847aff146f166699aef5fc933c7560 --- /dev/null +++ b/docker-compose/alerta-web/alerta.conf @@ -0,0 +1,7 @@ +[DEFAULT] +sslverify = no +output = presto +endpoint = http://localhost:8080/api +timezone = Europe/London +key = NpzX0z_fX8TVKZtXpzop-pi2MhaGnLawKVqbJBoA +debug = yes diff --git a/docker-compose/alerta-web/alertad.conf b/docker-compose/alerta-web/alertad.conf new file mode 100644 index 0000000000000000000000000000000000000000..dc7b6c2e295ae4230a9373ed26f148d6aad59cd0 --- /dev/null +++ b/docker-compose/alerta-web/alertad.conf @@ -0,0 +1,59 @@ +DEBUG = True +SECRET = "T=&7xvF2S&x7w_JAcq$h1x5ocfA)8H2i" + +# Allow non-admin views +CUSTOMER_VIEWS = True + +# Never timeout alerts +ALERT_TIMEOUT = 0 +# Auto unack after a day +ACK_TIMEOUT = 24 * 3600 +# Auto unshelve after 2 hours +SHELVE_TIMEOUT = 2 * 3600 + +# Use custom date formats +DATE_FORMAT_MEDIUM_DATE = "dd DD/MM HH:mm" +DATE_FORMAT_LONG_DATE = "yyyy-MM-DD HH:mm:ss.sss" + +# Default overview settings +COLUMNS = ['severity', 'status', 'createTime', 'lastReceiveTime', 'resource', 'grafanaDashboardHtml', 'grafanaPanelHtml', 'event', 'text'] +DEFAULT_FILTER = {'status': ['open']} +SORT_LIST_BY = "createTime" +AUTO_REFRESH_INTERVAL = 5000 # ms + +# ------------------------------------ +# Plugin configuration +# ------------------------------------ + +PLUGINS = ['reject', 'blackout', 'acked_by', 'enhance', 'grafana', 'lofar', 'slack'] + +# Slack plugin settings, see https://github.com/alerta/alerta-contrib/tree/master/plugins/slack +import os, json + +with open("/run/secrets/alerta-secrets") as secrets_file: + secrets = json.load(secrets_file) + +SLACK_WEBHOOK_URL = 'https://slack.com/api/chat.postMessage' +SLACK_TOKEN = secrets["SLACK_TOKEN"] +SLACK_CHANNEL = secrets["SLACK_CHANNEL"] +SLACK_ATTACHMENTS = True +BASE_URL = os.environ.get("BASE_URL", "") + +# for the Slack message configuration syntax, see https://api.slack.com/methods/chat.postMessage +# and https://app.slack.com/block-kit-builder +SLACK_PAYLOAD = { + "channel": "{{ channel }}", + "emoji": ":fire:", + "text": "*{{ alert.severity|capitalize }}* :: *{{ alert.resource }}* :: _{{ alert.event }}_\n\n```{{ alert.text }}```", + "attachments": [{ + "color": "{{ color }}", + "fields": [ + {"title": "Device", "value": "{{ alert.attributes.lofarDevice }}", "short": True }, + {"title": "Attribute", "value": "{{ alert.attributes.lofarAttribute }}", "short": True }, + {"title": "Resource", "value": "{{ alert.resource }}", "short": True }, + {"title": "Status", "value": "{{ status|capitalize }}", "short": True }, + {"title": "Dashboards", "value": "<{{ config.BASE_URL }}/#/alert/{{ alert.id }}|Alerta>\nGrafana <{{ alert.attributes.grafanaDashboardUrl }}|Dashboard> <{{ alert.attributes.grafanaPanelUrl }}|Panel>", "short": True }, + {"title": "Configure", "value": "Grafana <{{ alert.attributes.grafanaAlertUrl }}|View> <{{ alert.attributes.grafanaSilenceUrl }}|Silence>", "short": True }, + ], + }] +} diff --git a/docker-compose/alerta-web/config.json b/docker-compose/alerta-web/config.json new file mode 100644 index 0000000000000000000000000000000000000000..004ae8e0a0ef78ef99f8fd1b0ea68d851624f84d --- /dev/null +++ b/docker-compose/alerta-web/config.json @@ -0,0 +1 @@ +{"endpoint": "/api"} diff --git a/docker-compose/alerta-web/grafana-plugin/alerta_grafana.py b/docker-compose/alerta-web/grafana-plugin/alerta_grafana.py new file mode 100644 index 0000000000000000000000000000000000000000..7f6b840a4e6517bd5be2afa083ee317196725e0e --- /dev/null +++ b/docker-compose/alerta-web/grafana-plugin/alerta_grafana.py @@ -0,0 +1,60 @@ +import os +import json +import logging + +from alerta.plugins import PluginBase + +LOG = logging.getLogger() + + +class EnhanceGrafana(PluginBase): + """ + Plugin for parsing alerts coming from Grafana + """ + + def pre_receive(self, alert, **kwargs): + # Parse Grafana-specific fields + alert.attributes['grafanaStatus'] = alert.raw_data.get('status', '') + + def htmlify(link: str, desc: str) -> str: + return f'<a href="{link}" target="_blank">{desc}</a>'; + + # User-specified "Panel ID" annotation + panelURL = alert.raw_data.get('panelURL', '') + if panelURL: + alert.attributes['grafanaPanelUrl'] = panelURL + alert.attributes['grafanaPanelHtml'] = htmlify(panelURL, "Grafana Panel") + + # User-specified "Dashboard UID" annotation + dashboardURL = alert.raw_data.get('dashboardURL', '') + if dashboardURL: + alert.attributes['grafanaDashboardUrl'] = dashboardURL + alert.attributes['grafanaDashboardHtml'] = htmlify(dashboardURL, "Grafana Dashboard") + + alertURL = alert.raw_data.get('generatorURL', '') + if alertURL: + # expose alert view URL, as user may not have edit rights + # Convert from + # http://host:3000/alerting/kujybCynk/edit + # to + # http://host:3000/alerting/grafana/kujybCynk/view + alertURL = alertURL.replace("/alerting/", "/alerting/grafana/").replace("/edit", "/view") + + alert.attributes['grafanaAlertUrl'] = alertURL + alert.attributes['grafanaAlertHtml'] = htmlify(alertURL, "Grafana Alert") + + silenceURL = alert.raw_data.get('silenceURL', '') + if silenceURL: + alert.attributes['grafanaSilenceUrl'] = silenceURL + alert.attributes['grafanaSilenceHtml'] = htmlify(silenceURL, "Grafana Silence Alert") + + return alert + + def post_receive(self, alert, **kwargs): + return + + def status_change(self, alert, status, text, **kwargs): + return + + def take_action(self, alert, action, text, **kwargs): + raise NotImplementedError diff --git a/docker-compose/alerta-web/grafana-plugin/setup.py b/docker-compose/alerta-web/grafana-plugin/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..cb06d95919fde788f299e8318bfc23ef01dbfb79 --- /dev/null +++ b/docker-compose/alerta-web/grafana-plugin/setup.py @@ -0,0 +1,24 @@ + +from setuptools import setup, find_packages + +version = '1.0.0' + +setup( + name="alerta-grafana", + version=version, + description='Alerta plugin for enhancing Grafana alerts', + url='https://git.astron.nl/lofar2.0/tango', + license='Apache License 2.0', + author='Jan David Mol', + author_email='mol@astron.nl', + packages=find_packages(), + py_modules=['alerta_grafana'], + include_package_data=True, + zip_safe=True, + entry_points={ + 'alerta.plugins': [ + 'grafana = alerta_grafana:EnhanceGrafana' + ] + }, + python_requires='>=3.5' +) diff --git a/docker-compose/alerta-web/lofar-plugin/alerta_lofar.py b/docker-compose/alerta-web/lofar-plugin/alerta_lofar.py new file mode 100644 index 0000000000000000000000000000000000000000..c4f618d2d6675feab78fce49cedc9f8030766c97 --- /dev/null +++ b/docker-compose/alerta-web/lofar-plugin/alerta_lofar.py @@ -0,0 +1,41 @@ +import os +import json +import logging + +from alerta.plugins import PluginBase + +LOG = logging.getLogger() + + +class EnhanceLOFAR(PluginBase): + """ + Plugin for enhancing alerts with LOFAR-specific information + """ + + def pre_receive(self, alert, **kwargs): + # Parse LOFAR-specific fields + for tag in alert.tags: + try: + key, value = tag.split("=", 1) + except ValueError: + continue + + if key == "device": + alert.attributes['lofarDevice'] = value + + if key == "name": + alert.attributes['lofarAttribute'] = value + + if key == "station": + alert.resource = value + + return alert + + def post_receive(self, alert, **kwargs): + return + + def status_change(self, alert, status, text, **kwargs): + return + + def take_action(self, alert, action, text, **kwargs): + raise NotImplementedError diff --git a/docker-compose/alerta-web/lofar-plugin/setup.py b/docker-compose/alerta-web/lofar-plugin/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..70ab552180a5ad10a978fb10f1deeb0d87319bb7 --- /dev/null +++ b/docker-compose/alerta-web/lofar-plugin/setup.py @@ -0,0 +1,24 @@ + +from setuptools import setup, find_packages + +version = '1.0.0' + +setup( + name="alerta-lofar", + version=version, + description='Alerta plugin for enhancing LOFAR alerts', + url='https://git.astron.nl/lofar2.0/tango', + license='Apache License 2.0', + author='Jan David Mol', + author_email='mol@astron.nl', + packages=find_packages(), + py_modules=['alerta_lofar'], + include_package_data=True, + zip_safe=True, + entry_points={ + 'alerta.plugins': [ + 'lofar = alerta_lofar:EnhanceLOFAR' + ] + }, + python_requires='>=3.5' +) diff --git a/docker-compose/alerta-web/rules.json b/docker-compose/alerta-web/rules.json new file mode 100644 index 0000000000000000000000000000000000000000..ca8df8cf7b01a4bd014387e045a2492d35292300 --- /dev/null +++ b/docker-compose/alerta-web/rules.json @@ -0,0 +1 @@ +{"test":[{"name":"test2","interval":"10s","rules":[{"expr":"","for":"20s","labels":{"severity":"major"},"annotations":{"__dashboardUid__":"nC8N_kO7k","__panelId__":"9","summary":"My test alert"},"grafana_alert":{"id":3,"orgId":1,"title":"FPGA processing error 2","condition":"B","data":[{"refId":"A","queryType":"","relativeTimeRange":{"from":600,"to":0},"datasourceUid":"ZqArtG97z","model":{"exemplar":false,"expr":"device_attribute{device=\"stat/sdp/1\",name=\"FPGA_error_R\"}","format":"time_series","group":[],"hide":false,"interval":"","intervalMs":1000,"legendFormat":"","maxDataPoints":43200,"metricColumn":"name","rawQuery":true,"rawSql":"SELECT\n data_time AS \"time\",\n x::text,\n device,\n name,\n case when value then 1 else 0 end AS value\nFROM lofar_array_boolean\nWHERE\n $__timeFilter(data_time) AND\n name = 'fpga_error_r'\nORDER BY 1,2","refId":"A","select":[[{"params":["x"],"type":"column"}],[{"params":["value"],"type":"column"}]],"table":"lofar_array_boolean","timeColumn":"data_time","timeColumnType":"timestamptz","where":[{"name":"$__timeFilter","params":[],"type":"macro"},{"datatype":"text","name":"","params":["name","=","'fpga_error_r'"],"type":"expression"}]}},{"refId":"B","queryType":"","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"-100","model":{"conditions":[{"evaluator":{"params":[0,0],"type":"gt"},"operator":{"type":"and"},"query":{"params":[]},"reducer":{"params":[],"type":"avg"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"A","hide":false,"intervalMs":1000,"maxDataPoints":43200,"reducer":"last","refId":"B","settings":{"mode":"dropNN"},"type":"reduce"}}],"updated":"2022-04-04T14:18:48Z","intervalSeconds":10,"version":1,"uid":"waXdSCynk","namespace_uid":"9DkbdYy7z","namespace_id":6,"rule_group":"test2","no_data_state":"OK","exec_err_state":"Error"}}]},{"name":"test","interval":"10s","rules":[{"expr":"","for":"20s","labels":{"severity":"major"},"annotations":{"__dashboardUid__":"nC8N_kO7k","__panelId__":"9","summary":"My test alert"},"grafana_alert":{"id":2,"orgId":1,"title":"FPGA processing error","condition":"B","data":[{"refId":"A","queryType":"","relativeTimeRange":{"from":600,"to":0},"datasourceUid":"ZqArtG97z","model":{"exemplar":false,"expr":"device_attribute{device=\"stat/sdp/1\",name=\"FPGA_error_R\"}","format":"time_series","group":[],"hide":false,"interval":"","intervalMs":1000,"legendFormat":"","maxDataPoints":43200,"metricColumn":"name","rawQuery":true,"rawSql":"SELECT\n data_time AS \"time\",\n x::text,\n device,\n name,\n case when value then 1 else 0 end AS value\nFROM lofar_array_boolean\nWHERE\n $__timeFilter(data_time) AND\n name = 'fpga_error_r'\nORDER BY 1,2","refId":"A","select":[[{"params":["x"],"type":"column"}],[{"params":["value"],"type":"column"}]],"table":"lofar_array_boolean","timeColumn":"data_time","timeColumnType":"timestamptz","where":[{"name":"$__timeFilter","params":[],"type":"macro"},{"datatype":"text","name":"","params":["name","=","'fpga_error_r'"],"type":"expression"}]}},{"refId":"B","queryType":"","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"-100","model":{"conditions":[{"evaluator":{"params":[0,0],"type":"gt"},"operator":{"type":"and"},"query":{"params":[]},"reducer":{"params":[],"type":"avg"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"A","hide":false,"intervalMs":1000,"maxDataPoints":43200,"reducer":"last","refId":"B","settings":{"mode":"dropNN"},"type":"reduce"}}],"updated":"2022-04-04T14:16:22Z","intervalSeconds":10,"version":1,"uid":"MIt4Ijs7k","namespace_uid":"9DkbdYy7z","namespace_id":6,"rule_group":"test","no_data_state":"OK","exec_err_state":"Error"}}]}]} \ No newline at end of file diff --git a/docker-compose/alerta.yml b/docker-compose/alerta.yml index 5465fe19fe10a563df8527bc8cc64c4d93ee2895..2ae3be42c17e450007914facd2a686c7cce1d63e 100644 --- a/docker-compose/alerta.yml +++ b/docker-compose/alerta.yml @@ -3,6 +3,10 @@ version: '2.1' volumes: alerta-postgres-data: {} +secrets: + alerta-secrets: + file: alerta-web/alerta-secrets.json + services: alerta-web: build: alerta-web @@ -13,13 +17,16 @@ services: - "8081:8080" depends_on: - alerta-db + secrets: + - alerta-secrets environment: - DEBUG=1 # remove this line to turn DEBUG off - DATABASE_URL=postgres://postgres:postgres@alerta-db:5432/monitoring + - BASE_URL=http://${HOSTNAME}:8081 + - DASHBOARD_URL=http://${HOSTNAME}:8081 - AUTH_REQUIRED=True - ADMIN_USERS=admin #default password: alerta - ADMIN_KEY=demo-key - - PLUGINS=reject,blackout,normalise,enhance restart: always alerta-db: diff --git a/docker-compose/grafana.yml b/docker-compose/grafana.yml index f298db2746961b7d30d2e147192d0dfc58530725..73c508440cd63ad201b0b4199c2443b18be804a5 100644 --- a/docker-compose/grafana.yml +++ b/docker-compose/grafana.yml @@ -24,6 +24,8 @@ services: # - grafana-configs:/etc/grafana ports: - "3000:3000" + environment: + - GF_SERVER_DOMAIN=${HOSTNAME} logging: driver: syslog options: diff --git a/docker-compose/grafana/Dockerfile b/docker-compose/grafana/Dockerfile index e51cce5eeaa0310c1ecd698d8d797e3163ce4457..7eceb9c154c654da53eb0a4b060df945013bf766 100644 --- a/docker-compose/grafana/Dockerfile +++ b/docker-compose/grafana/Dockerfile @@ -3,6 +3,7 @@ FROM grafana/grafana # Install some plugins RUN grafana-cli plugins install briangann-datatable-panel RUN grafana-cli plugins install ae3e-plotly-panel +RUN grafana-cli plugins install yesoreyeram-infinity-datasource COPY grafana.ini /etc/grafana/ diff --git a/docker-compose/grafana/README.md b/docker-compose/grafana/README.md new file mode 100644 index 0000000000000000000000000000000000000000..754c00a75abde5600ee65088d057558eabe02352 --- /dev/null +++ b/docker-compose/grafana/README.md @@ -0,0 +1,21 @@ +# Post configuration + +To export all current alert rules, use: + +To import rules into a fresh Grafana instance: + + * Obtain an 'editor' API key through the Grafan GUI (cogwheel -> API keys), + * Run: + + curl http://localhost:3000/api/alertmanager/grafana/config/api/v1/alerts -H 'Authorization: Bearer (api key)' > alerting.json + curl localhost:3000/api/ruler/grafana/api/v1/rules > rules.json + + * Delete the UIDs in alerting.json + +To import rules into a fresh Grafana instance: + + * Obtain an 'editor' API key through the Grafan GUI (cogwheel -> API keys), + * Run (first without piping to bash): + + python3 import-rules.py -c alerting.json -r rules.json -B key | bash + diff --git a/docker-compose/grafana/alerting.json b/docker-compose/grafana/alerting.json index 1a08e2cebfe5ebb77b22afbca6a0f70dd86ff4e5..d5193964ae1127c0f76cc60a05dfc8f0dd4e1bf4 100644 --- a/docker-compose/grafana/alerting.json +++ b/docker-compose/grafana/alerting.json @@ -2,7 +2,8 @@ "template_files": {}, "alertmanager_config": { "route": { - "receiver": "Alerta" + "receiver": "Alerta", + "repeat_interval": "10m" }, "templates": null, "receivers": [ @@ -10,7 +11,6 @@ "name": "Alerta", "grafana_managed_receiver_configs": [ { - "uid": "ROaAvQEnz", "name": "Alerta", "type": "webhook", "disableResolveMessage": false, diff --git a/docker-compose/grafana/dashboards/home.json b/docker-compose/grafana/dashboards/home.json index 98250c378ec60c9a79205cbb5afc3e125f75e31c..7aa5d7aad44152e32732a4b6c37f165694066f91 100644 --- a/docker-compose/grafana/dashboards/home.json +++ b/docker-compose/grafana/dashboards/home.json @@ -20,20 +20,313 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "gnetId": null, "graphTooltip": 0, - "id": 5, + "id": 8, "links": [], "liveNow": false, "panels": [ + { + "alignNumbersToRightEnabled": true, + "columnAliases": [], + "columnFiltersEnabled": false, + "columnWidthHints": [], + "columns": [], + "compactRowsEnabled": true, + "datasource": { + "type": "yesoreyeram-infinity-datasource", + "uid": "alertaui" + }, + "datatablePagingType": "simple_numbers", + "datatableTheme": "basic_theme", + "emptyData": false, + "fontSize": "100%", + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 0 + }, + "hoverEnabled": true, + "id": 58, + "infoEnabled": false, + "lengthChangeEnabled": true, + "orderColumnEnabled": true, + "pagingTypes": [ + { + "$$hashKey": "object:142", + "text": "Page number buttons only", + "value": "numbers" + }, + { + "$$hashKey": "object:143", + "text": "'Previous' and 'Next' buttons only", + "value": "simple" + }, + { + "$$hashKey": "object:144", + "text": "'Previous' and 'Next' buttons, plus page numbers", + "value": "simple_numbers" + }, + { + "$$hashKey": "object:145", + "text": "'First', 'Previous', 'Next' and 'Last' buttons", + "value": "full" + }, + { + "$$hashKey": "object:146", + "text": "'First', 'Previous', 'Next' and 'Last' buttons, plus page numbers", + "value": "full_numbers" + }, + { + "$$hashKey": "object:147", + "text": "'First' and 'Last' buttons, plus page numbers", + "value": "first_last_numbers" + } + ], + "panelHeight": 130, + "pluginVersion": "8.4.5", + "rowNumbersEnabled": false, + "rowsPerPage": 5, + "scroll": false, + "scrollHeight": "default", + "searchEnabled": true, + "searchHighlightingEnabled": false, + "showCellBorders": false, + "showHeader": true, + "showRowBorders": true, + "sort": { + "col": 0, + "desc": true + }, + "sortByColumns": [ + { + "$$hashKey": "object:17", + "columnData": 0, + "sortMethod": "desc" + } + ], + "sortByColumnsData": [ + [ + 0, + "desc" + ] + ], + "stripedRowsEnabled": true, + "styles": [ + { + "$$hashKey": "object:19", + "dateFormat": "dd DD/MM HH:mm", + "pattern": "Time", + "type": "date" + }, + { + "$$hashKey": "object:45", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Panel|Dashboard|Alert", + "sanitize": true, + "splitPattern": "/ /", + "thresholds": [], + "type": "string", + "unit": "short", + "valueMaps": [] + }, + { + "$$hashKey": "object:76", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "mappingType": 1, + "pattern": "Alerta Link", + "splitPattern": "/ /", + "thresholds": [], + "type": "string", + "unit": "short", + "valueMaps": [] + }, + { + "$$hashKey": "object:867", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Since", + "splitPattern": "/ /", + "thresholds": [], + "type": "date", + "unit": "short" + } + ], + "targets": [ + { + "columns": [ + { + "selector": "createTime", + "text": "Since", + "type": "string" + }, + { + "selector": "severity", + "text": "Severity", + "type": "string" + }, + { + "selector": "status", + "text": "Status", + "type": "string" + }, + { + "selector": "event", + "text": "Name", + "type": "string" + }, + { + "selector": "text", + "text": "Description", + "type": "string" + }, + { + "selector": "attributes.lofarDevice", + "text": "Device", + "type": "string" + }, + { + "selector": "attributes.lofarAttribute", + "text": "Attribute", + "type": "string" + }, + { + "selector": "attributes.grafanaDashboardHtml", + "text": "Dashboard", + "type": "string" + }, + { + "selector": "attributes.grafanaPanelHtml", + "text": "Panel", + "type": "string" + }, + { + "selector": "attributes.grafanaAlertHtml", + "text": "Alert", + "type": "string" + }, + { + "selector": "href", + "text": "Alerta Link", + "type": "string" + } + ], + "datasource": { + "type": "yesoreyeram-infinity-datasource", + "uid": "alertaui" + }, + "filters": [], + "format": "table", + "global_query_id": "", + "hide": false, + "refId": "A", + "root_selector": "", + "source": "url", + "type": "json", + "url": "http://alerta-web:8080/api/alerts", + "url_options": { + "data": "", + "method": "GET" + } + } + ], + "themeOptions": { + "dark": "./styles/dark.scss", + "light": "./styles/light.scss" + }, + "themes": [ + { + "$$hashKey": "object:117", + "disabled": false, + "text": "Basic", + "value": "basic_theme" + }, + { + "$$hashKey": "object:118", + "disabled": true, + "text": "Bootstrap", + "value": "bootstrap_theme" + }, + { + "$$hashKey": "object:119", + "disabled": true, + "text": "Foundation", + "value": "foundation_theme" + }, + { + "$$hashKey": "object:120", + "disabled": true, + "text": "ThemeRoller", + "value": "themeroller_theme" + } + ], + "title": "Alerta Alerts", + "transform": "table", + "type": "briangann-datatable-panel" + }, + { + "description": "", + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 60, + "links": [ + { + "targetBlank": true, + "title": "Details", + "url": "/alerting/list" + } + ], + "options": { + "alertInstanceLabelFilter": "", + "alertName": "", + "dashboardAlerts": false, + "groupBy": [], + "groupMode": "default", + "maxItems": 20, + "sortOrder": 1, + "stateFilter": { + "error": true, + "firing": true, + "inactive": false, + "noData": false, + "normal": false, + "pending": true + } + }, + "title": "Firing Alerts", + "type": "alertlist" + }, { "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 0 + "y": 11 }, "id": 15, "panels": [], @@ -41,7 +334,10 @@ "type": "row" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "Progress of station initialisation", "fieldConfig": { "defaults": { @@ -76,7 +372,7 @@ "h": 6, "w": 4, "x": 0, - "y": 1 + "y": 12 }, "id": 43, "options": { @@ -92,7 +388,7 @@ "showThresholdMarkers": false, "text": {} }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -106,7 +402,10 @@ "type": "gauge" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "fieldConfig": { "defaults": { "color": { @@ -166,7 +465,7 @@ "h": 9, "w": 6, "x": 4, - "y": 1 + "y": 12 }, "id": 4, "options": { @@ -187,7 +486,7 @@ }, "textMode": "value_and_name" }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -211,7 +510,10 @@ "type": "stat" }, { - "datasource": "ELK logs", + "datasource": { + "type": "elasticsearch", + "uid": "RuQjz8V7z" + }, "fieldConfig": { "defaults": { "color": { @@ -266,7 +568,7 @@ "h": 9, "w": 10, "x": 10, - "y": 1 + "y": 12 }, "id": 32, "options": { @@ -276,7 +578,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "targets": [ @@ -329,13 +632,12 @@ "type": "timeseries" }, { - "datasource": null, "description": "Links to other dashboards", "gridPos": { "h": 9, "w": 4, "x": 20, - "y": 1 + "y": 12 }, "id": 47, "options": { @@ -348,7 +650,7 @@ "showStarred": false, "tags": [] }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "format": "time_series", @@ -383,7 +685,10 @@ "type": "dashlist" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "", "fieldConfig": { "defaults": { @@ -415,13 +720,20 @@ "h": 3, "w": 4, "x": 0, - "y": 7 + "y": 18 }, "id": 44, "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": false }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -485,7 +797,10 @@ "type": "table" }, { - "datasource": "ELK logs", + "datasource": { + "type": "elasticsearch", + "uid": "RuQjz8V7z" + }, "description": "List of the errors in the selected timespan", "fieldConfig": { "defaults": { @@ -579,14 +894,21 @@ "h": 5, "w": 24, "x": 0, - "y": 10 + "y": 21 }, "id": 56, "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": true, "sortBy": [] }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "alias": "", @@ -676,12 +998,11 @@ }, { "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 15 + "y": 26 }, "id": 49, "panels": [], @@ -689,7 +1010,10 @@ "type": "row" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "State of APSCT", "fieldConfig": { "defaults": { @@ -717,7 +1041,7 @@ "h": 3, "w": 21, "x": 0, - "y": 16 + "y": 27 }, "id": 24, "options": { @@ -735,7 +1059,7 @@ "text": {}, "textMode": "name" }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -797,7 +1121,10 @@ "type": "stat" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "State of APSPU", "fieldConfig": { "defaults": { @@ -825,7 +1152,7 @@ "h": 3, "w": 3, "x": 21, - "y": 16 + "y": 27 }, "id": 50, "options": { @@ -843,7 +1170,7 @@ "text": {}, "textMode": "name" }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -859,12 +1186,11 @@ }, { "collapsed": true, - "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 19 + "y": 30 }, "id": 53, "panels": [], @@ -872,7 +1198,10 @@ "type": "row" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "State of Unboard 2 I2C Bus", "fieldConfig": { "defaults": { @@ -904,7 +1233,7 @@ "h": 3, "w": 24, "x": 0, - "y": 20 + "y": 31 }, "id": 54, "options": { @@ -922,7 +1251,7 @@ "text": {}, "textMode": "name" }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -970,12 +1299,11 @@ }, { "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 23 + "y": 34 }, "id": 17, "panels": [], @@ -983,7 +1311,10 @@ "type": "row" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "fieldConfig": { "defaults": { "color": { @@ -1014,7 +1345,7 @@ "h": 8, "w": 6, "x": 0, - "y": 24 + "y": 35 }, "id": 21, "options": { @@ -1032,7 +1363,7 @@ "text": {}, "textMode": "name" }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -1047,7 +1378,10 @@ "type": "stat" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "fieldConfig": { "defaults": { "color": { @@ -1078,7 +1412,7 @@ "h": 8, "w": 6, "x": 6, - "y": 24 + "y": 35 }, "id": 25, "options": { @@ -1096,7 +1430,7 @@ "text": {}, "textMode": "name" }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -1112,7 +1446,10 @@ "type": "stat" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "", "fieldConfig": { "defaults": { @@ -1144,7 +1481,7 @@ "h": 8, "w": 6, "x": 12, - "y": 24 + "y": 35 }, "id": 51, "options": { @@ -1162,7 +1499,7 @@ "text": {}, "textMode": "name" }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -1179,12 +1516,11 @@ }, { "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 32 + "y": 43 }, "id": 19, "panels": [], @@ -1192,7 +1528,10 @@ "type": "row" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "", "fieldConfig": { "defaults": { @@ -1224,7 +1563,7 @@ "h": 8, "w": 5, "x": 0, - "y": 33 + "y": 44 }, "id": 11, "options": { @@ -1242,7 +1581,7 @@ "text": {}, "textMode": "name" }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -1260,7 +1599,10 @@ "type": "stat" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "", "fieldConfig": { "defaults": { @@ -1292,7 +1634,7 @@ "h": 8, "w": 5, "x": 5, - "y": 33 + "y": 44 }, "id": 9, "options": { @@ -1310,7 +1652,7 @@ "text": {}, "textMode": "name" }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -1328,7 +1670,10 @@ "type": "stat" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "Number of inputs that are fed from the SDP wave-form generator", "fieldConfig": { "defaults": { @@ -1366,7 +1711,7 @@ "h": 4, "w": 3, "x": 10, - "y": 33 + "y": 44 }, "id": 12, "options": { @@ -1384,7 +1729,7 @@ "text": {}, "textMode": "value" }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -1403,12 +1748,11 @@ }, { "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 41 + "y": 52 }, "id": 27, "panels": [], @@ -1416,7 +1760,10 @@ "type": "row" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "", "fieldConfig": { "defaults": { @@ -1448,7 +1795,7 @@ "h": 8, "w": 5, "x": 0, - "y": 42 + "y": 53 }, "id": 28, "options": { @@ -1466,7 +1813,7 @@ "text": {}, "textMode": "name" }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -1484,7 +1831,10 @@ "type": "stat" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "", "fieldConfig": { "defaults": { @@ -1546,7 +1896,7 @@ "h": 8, "w": 5, "x": 5, - "y": 42 + "y": 53 }, "id": 29, "options": { @@ -1556,7 +1906,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "pluginVersion": "8.1.2", @@ -1593,7 +1944,10 @@ "type": "timeseries" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "", "fieldConfig": { "defaults": { @@ -1655,7 +2009,7 @@ "h": 8, "w": 5, "x": 10, - "y": 42 + "y": 53 }, "id": 30, "options": { @@ -1665,7 +2019,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "pluginVersion": "8.1.2", @@ -1686,7 +2041,10 @@ "type": "timeseries" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "Rate of SSTs replicated to connected clients.", "fieldConfig": { "defaults": { @@ -1748,7 +2106,7 @@ "h": 8, "w": 5, "x": 15, - "y": 42 + "y": 53 }, "id": 33, "options": { @@ -1758,7 +2116,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "pluginVersion": "8.1.2", @@ -1779,7 +2138,10 @@ "type": "timeseries" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "Load of TCPReplicator class, which sends statistics packets to connected clients.", "fieldConfig": { "defaults": { @@ -1843,7 +2205,7 @@ "h": 8, "w": 3, "x": 20, - "y": 42 + "y": 53 }, "id": 34, "options": { @@ -1853,7 +2215,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "pluginVersion": "8.1.2", @@ -1875,12 +2238,11 @@ }, { "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 50 + "y": 61 }, "id": 36, "panels": [], @@ -1888,7 +2250,10 @@ "type": "row" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "", "fieldConfig": { "defaults": { @@ -1920,7 +2285,7 @@ "h": 4, "w": 5, "x": 0, - "y": 51 + "y": 62 }, "id": 37, "options": { @@ -1938,7 +2303,7 @@ "text": {}, "textMode": "name" }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -1956,7 +2321,10 @@ "type": "stat" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "", "fieldConfig": { "defaults": { @@ -2018,7 +2386,7 @@ "h": 8, "w": 5, "x": 5, - "y": 51 + "y": 62 }, "id": 38, "options": { @@ -2028,7 +2396,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "pluginVersion": "8.1.2", @@ -2065,7 +2434,10 @@ "type": "timeseries" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "", "fieldConfig": { "defaults": { @@ -2127,7 +2499,7 @@ "h": 8, "w": 5, "x": 10, - "y": 51 + "y": 62 }, "id": 39, "options": { @@ -2137,7 +2509,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "pluginVersion": "8.1.2", @@ -2158,7 +2531,10 @@ "type": "timeseries" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "Rate of XSTs replicated to connected clients.", "fieldConfig": { "defaults": { @@ -2220,7 +2596,7 @@ "h": 8, "w": 5, "x": 15, - "y": 51 + "y": 62 }, "id": 40, "options": { @@ -2230,7 +2606,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "pluginVersion": "8.1.2", @@ -2251,7 +2628,10 @@ "type": "timeseries" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "Load of TCPReplicator class, which sends statistics packets to connected clients.", "fieldConfig": { "defaults": { @@ -2315,7 +2695,7 @@ "h": 8, "w": 3, "x": 20, - "y": 51 + "y": 62 }, "id": 41, "options": { @@ -2325,7 +2705,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "pluginVersion": "8.1.2", @@ -2346,7 +2727,10 @@ "type": "timeseries" }, { - "datasource": "Prometheus", + "datasource": { + "type": "prometheus", + "uid": "6W2nM-Vnz" + }, "description": "", "fieldConfig": { "defaults": { @@ -2378,7 +2762,7 @@ "h": 4, "w": 5, "x": 0, - "y": 55 + "y": 66 }, "id": 45, "options": { @@ -2396,7 +2780,7 @@ "text": {}, "textMode": "name" }, - "pluginVersion": "8.2.1", + "pluginVersion": "8.4.5", "targets": [ { "exemplar": true, @@ -2415,7 +2799,7 @@ } ], "refresh": false, - "schemaVersion": 31, + "schemaVersion": 35, "style": "dark", "tags": [], "templating": { @@ -2429,5 +2813,6 @@ "timezone": "", "title": "Home", "uid": "nC8N_kO7k", - "version": 6 + "version": 1, + "weekStart": "" } diff --git a/docker-compose/grafana/datasources/alertaui.yaml b/docker-compose/grafana/datasources/alertaui.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8fa7ddcfe36d5b1fcaf04a79a7defe166c26bcf8 --- /dev/null +++ b/docker-compose/grafana/datasources/alertaui.yaml @@ -0,0 +1,41 @@ +apiVersion: 1 + +datasources: + # <string, required> name of the datasource. Required + - name: Alerta UI + # <string, required> datasource type. Required + type: yesoreyeram-infinity-datasource + # <string, required> access mode. proxy or direct (Server or Browser in the UI). Required + access: proxy + # <int> org id. will default to orgId 1 if not specified + orgId: 1 + # <string> custom UID which can be used to reference this datasource in other parts of the configuration, if not specified will be generated automatically + uid: alertaui + # <string> url + url: http://alerta-web:8080/api + # <string> Deprecated, use secureJsonData.password + password: + # <string> database user, if used + user: postgres + # <string> database name, if used + database: hdb + # <bool> enable/disable basic auth + basicAuth: false + # <string> basic auth username + basicAuthUser: + # <string> Deprecated, use secureJsonData.basicAuthPassword + basicAuthPassword: + # <bool> enable/disable with credentials headers + withCredentials: + # <bool> mark as default datasource. Max one per org + isDefault: false + # <map> fields that will be converted to json and stored in jsonData + jsonData: + secureQueryName1: "api-key" + # <string> json object of data that will be encrypted. + secureJsonData: + secureQueryValue1: "demo-key" + version: 1 + # <bool> allow users to edit datasources from the UI. + editable: false + diff --git a/docker-compose/grafana/grafana.ini b/docker-compose/grafana/grafana.ini index 82f1f4bb004e5ba3c1078226e96decf09cdca4f5..acfabe0f10190c2b07ae579d21bd1abfc1891ff3 100644 --- a/docker-compose/grafana/grafana.ini +++ b/docker-compose/grafana/grafana.ini @@ -58,7 +58,7 @@ ;static_root_path = public # enable gzip -;enable_gzip = false +enable_gzip = true # https certs & key file ;cert_file = @@ -867,7 +867,9 @@ enabled = true [panels] # If set to true Grafana will allow script tags in text panels. Not recommended as it enable XSS vulnerabilities. -;disable_sanitize_html = false + +# enable this to allow us to create mash ups with other pages +disable_sanitize_html = true [plugins] ;enable_alpha = false diff --git a/docker-compose/grafana/import-rules.py b/docker-compose/grafana/import-rules.py new file mode 100755 index 0000000000000000000000000000000000000000..340215ce1e53744aef3a2722f69c3ecdfd28ca82 --- /dev/null +++ b/docker-compose/grafana/import-rules.py @@ -0,0 +1,74 @@ +#!/usr/bin/python3 +import json +import os +import argparse + +parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description= +""" +Generate rule import files and script for Grafana. + +This script expands a given rules.json file into individual rules and +prints the bash commands to import them in Grafana. + +To export rules from Grafana, use + curl <grafana>/api/ruler/grafana/api/v1/rules > rules.json +""") +parser.add_argument( + '-c', '--alert-config-file', type=str, required=False, help="Input alertmanager configuration JSON to parse, output of 'curl <grafana>/api/ruler/grafana/api/v1/rules' [%(default)s]") +parser.add_argument( + '-r', '--rules-file', type=str, required=True, help="Input rules JSON to parse, output of 'curl <grafana>/api/ruler/grafana/api/v1/rules' [%(default)s]") +parser.add_argument( + '-o', '--output-dir', type=str, default="rules", help="Directory to store the output [%(default)s]") +parser.add_argument( + '-B', '--authorization-bearer', type=str, default="abcdefghijklmnopqrstuvwxyz", help="Authorization bearer from the Grafana 'editor' API key [%(default)s]") +parser.add_argument( + '-g', '--grafana_url', type=str, default="http://localhost:3000", help="Base URL of Grafana [%(default)s]") +parser.add_argument( + '-u', '--update', default=False, action='store_true', help="Update existing alerts, instead of creating new ones [%(default)s]") + +args = parser.parse_args() + +if args.alert_config_file: + print(f"echo Importing alert configuration file {args.alert_config_file}") + print(f"curl -X POST {args.grafana_url}/api/alertmanager/grafana/config/api/v1/alerts -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'Authorization: Bearer {args.authorization_bearer}' -d '@{args.alert_config_file}'") + print(f"echo ''") + +with open(args.rules_file) as f: + data=json.load(f) + + try: + os.mkdir(args.output_dir) + except FileExistsError as e: + pass + + # the rules are of format {"folder": [{alert}, {alert}] } + for folder, rules in data.items(): + try: + os.mkdir(f"{args.output_dir}/{folder}") + except FileExistsError as e: + pass + + # print command to create folder + payload = json.dumps({"title": folder}) + print(f"echo Creating folder {folder}") + print(f"curl -X POST {args.grafana_url}/api/folders -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'Authorization: Bearer {args.authorization_bearer}' -d '{payload}'") + print(f"echo ''") + + for rule in rules: + rule_filename = f"{args.output_dir}/{folder}/{rule['name']}.json" + + if not args.update: + # strip rule UIDs + for subrule in rule["rules"]: + del subrule["grafana_alert"]["uid"] + + # dump this rule + with open(rule_filename, "w") as rule_file: + json.dump(rule, rule_file) + + # print import statement for this rule + print(f"echo Processing rule {folder}/{rule['name']}") + print(f"curl -X POST {args.grafana_url}/api/ruler/grafana/api/v1/rules/{folder} -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'Authorization: Bearer {args.authorization_bearer}' -d '@{rule_filename}'") + print(f"echo ''") diff --git a/docker-compose/grafana/rules.json b/docker-compose/grafana/rules.json new file mode 100644 index 0000000000000000000000000000000000000000..81467dd918dd0be60e7bde30dcce798d8f209892 --- /dev/null +++ b/docker-compose/grafana/rules.json @@ -0,0 +1 @@ +{"station":[{"name":"FPGA processing error","interval":"10s","rules":[{"expr":"","for":"20s","labels":{"severity":"major"},"annotations":{"__dashboardUid__":"nC8N_kO7k","__panelId__":"9","summary":"One or more FPGAs are unusable."},"grafana_alert":{"id":1,"orgId":1,"title":"FPGA processing error","condition":"B","data":[{"refId":"A","queryType":"","relativeTimeRange":{"from":600,"to":0},"datasourceUid":"ZqArtG97z","model":{"format":"time_series","group":[],"hide":false,"intervalMs":1000,"maxDataPoints":43200,"metricColumn":"none","rawQuery":true,"rawSql":"SELECT\n $__timeGroup(data_time, $__interval),\n x::text,\n device,\n name,\n value\nFROM lofar_array_boolean\nWHERE\n $__timeFilter(data_time) AND\n name = 'fpga_error_r'\nORDER BY 1,2","refId":"A","select":[[{"params":["value_r"],"type":"column"}]],"table":"att_scalar_devdouble","timeColumn":"data_time","timeColumnType":"timestamp","where":[{"name":"$__timeFilter","params":[],"type":"macro"}]}},{"refId":"B","queryType":"","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"-100","model":{"conditions":[{"evaluator":{"params":[0],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"params":[],"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"-100"},"expression":"A","hide":false,"intervalMs":1000,"maxDataPoints":43200,"reducer":"last","refId":"B","settings":{"mode":"dropNN"},"type":"reduce"}}],"updated":"2022-04-04T18:01:53Z","intervalSeconds":10,"version":3,"uid":"kujybCynk","namespace_uid":"R_jsbCynz","namespace_id":6,"rule_group":"FPGA processing error","no_data_state":"NoData","exec_err_state":"Alerting"}}]}]} \ No newline at end of file diff --git a/docker-compose/timescaledb/resources/13_lofar_views.sql b/docker-compose/timescaledb/resources/13_lofar_views.sql index 9df29a278d2c12b3a058e3973f79007a95f3f379..21448342df3ff5bf2fe0fa5991309cf2bcdb676a 100644 --- a/docker-compose/timescaledb/resources/13_lofar_views.sql +++ b/docker-compose/timescaledb/resources/13_lofar_views.sql @@ -40,7 +40,7 @@ CREATE OR REPLACE VIEW lofar_array_double AS CONCAT_WS('/', domain, family, member) AS device, ac.name AS name, array_element.idx - 1 AS x, - array_element.val as value + CASE WHEN array_element.val THEN 1 ELSE 0 END AS value FROM att_array_devboolean att -- add array values, and their index JOIN LATERAL UNNEST(att.value_r) WITH ORDINALITY AS array_element(val,idx) ON TRUE @@ -53,7 +53,7 @@ CREATE OR REPLACE VIEW lofar_array_double AS att.data_time AS data_time, CONCAT_WS('/', domain, family, member) AS device, ac.name AS name, - value_r as value + CASE WHEN value_r THEN 1 ELSE 0 END AS value FROM att_scalar_devboolean att -- add the device information JOIN att_conf ac ON att.att_conf_id = ac.att_conf_id @@ -374,4 +374,4 @@ CREATE OR REPLACE VIEW lofar_scalar_enum AS - \ No newline at end of file + diff --git a/tangostationcontrol/docs/source/alerting.rst b/tangostationcontrol/docs/source/alerting.rst new file mode 100644 index 0000000000000000000000000000000000000000..032bcd379f68d3fa719dc8956334a910bf6227ee --- /dev/null +++ b/tangostationcontrol/docs/source/alerting.rst @@ -0,0 +1,152 @@ +Alerting +================== + +We use the following setup to forward alarms: + +- The Tango Controls `hdbpp subsystem <https://tango-controls.readthedocs.io/en/latest/administration/services/hdbpp/hdb++-design-guidelines.html>`_ archives data-value changes into a `TimescaleDB <http://timescale.com>`_ database, +- Grafana allows `Alert rules <https://grafana.com/docs/grafana/latest/alerting/>`_ to be configured, which poll TimescaleDB and generate an *alert* when the configured condition is met. It also maintains a list of currently firing alerts, +- `Alerta <https://alerta.io/>`_ is the *alert manager*: itreceives these alerts, manages duplicates, and maintains alerts until the operator explicitly acknowledges them. It thus also has a list of alerts that fired in the past. + +Archiving attributes +``````````````````````` + +The attributes of interest will have to be *archived* periodically to be able to see them in Grafana, and thus to be able to define alerts for them. In Tango Controls, there is an *configuration manager* that provides an interface to manage what is archived, and one or more *event subscribers* to subscribe to event changes and forward them to the archive database. + +The ``tangoncontrols.toolkit.archiver.Archiver`` class provides an easy interface to the archiver. It uses the ``device/attribute`` notation for attributes, f.e. ``STAT/SDP/1/FPGA_error_R``. Some of the functions it provides: + +:add_attribute_to_archiver(attribute, polling_period, event_period): Register the given attribute every ``polling_period`` ms. Also attribute on changes with a maximum rate of ``event_period`` ms. + +:remove_attribute_from_archiver(attribute): Unregister the given attribute. + +:start_archiving_attribute(attribute): Start archiving the given attribute. + +:stop_archiving_attribute(attribute): Stop archiving the given attribute. + +:get_attribute_errors(attribute): Return any errors detected while trying to archive the attribute. + +:get_subscriber_errors(): Return any errors detected by the subscribers. + +So a useful idiom to archive an individual attribute is:: + + from tangostationcontrol.archiver import Archiver + + archiver = Archiver() + attribute = "STAT/SDP/1/FPGA_error_R" + archiver.add_attribute_to_archiver(attribute, 1000, 1000) + archiver.start_archiving_attribute(attribute) + +.. note:: The archive subscriber gets confused if attributes it archives disappear from the monitoring database. This can cause an archive subscriber to stall. To fix this, get a proxy to the event subscriber, f.e. ``DeviceProxy("archiving/hdbppts/eventsubscriber01")``, and remove the offending attribute(s) from thr ``ArchivingList`` property using ``proxy.get_property("ArchivingList")`` and ``proxy.put_property({"ArchivingList": [...])``. + +Inspecting the database +````````````````````````` + +The archived attributes end up in a `TimescaleDB <http://timescale.com>`_ database, exposed on port 5432, with credentials ``postgres/pasword``. Key tables are: + +:att_conf: Describes which attributes are registered. Note that any device and attribute names are in lower case. + +:att_scalar_devXXX: Contains the attribute history for scalar attributes of type XXX. + +:att_array_devXXX: Contains the attribute history for 1D array attributes of type XXX. + +:att_image_devXXX: Contains the attribute history for 2D array attributes of type XXX. + +Each of the attribute history tables contains entries for any recorded value changes, but also for changes in ``quality`` (0=ok, >0=issues), and any error ``att_error_desc_id``. Futhermore, we provide specialised views which combine tables into more readable information: + +:lofar_scalar_XXX: View on the attribute history for scalar attributes of type XXX. + +:lofar_array_XXX: View on the attribute history for 1D array attributes of type XXX. Each array element is returned in its own row, with ``x`` denoting the index. + +:lofar_image_XXX: View on the attribute history for 2D array attributes of type XXX. Each array element is returned in its own row, with ``x`` and ``y`` denoting the indices. + +A typical selection could thus look like:: + + SELECT + date_time AS time, device, name, x, value + FROM lofar_array_boolean + WHERE device = 'stat/sdp/1' AND name = 'fpga_error_r' + ORDER BY time DESC + LIMIT 16 + +Attributes in Grafana +```````````````````````` + +The Grafana instance (http://localhost:3000) is linked to TimescaleDB by default. The query for plotting an attribute requires some Grafana-specific macros to select the exact data points Grafana requires:: + + SELECT + $__timeGroup(data_time, $__interval), + x::text, device, name, + value + FROM lofar_array_boolean + WHERE + $__timeFilter(data_time) AND name = 'fpga_error_r' + ORDER BY 1,2 + +The fields ``x``, ``device``, and ``name`` are retrieved as *string*, as that makes them labels to the query, which Grafana then uses to identify the different metrics for each array element. + +.. hint:: Grafana orders labels alphabetically. To order the ``x`` element properly, one could use the ``TO_CHAR(x, '00')`` function instead of ``x::text`` to prepend values with 0. + +Setting up alerts +``````````````````` + +We use the `Grafana 8+ alerts <https://grafana.com/docs/grafana/latest/alerting/>`_ to monitor our system, and the alerts are to be forwarded to our Alerta instance. Both our default set of alerts and this forwarding has to be post-configured after installation: + +- Go to Grafana (http://localhost:3000) and sign in with an administration account (default: admin/admin), +- Go to ``(cogwheel) -> API keys`` and create an ``editor`` API key. Copy the resulting hash, +- Go to the ``docker-compose/grafana/`` source directory, and run:: + + ./import-rules.py -c alerting.json -r rules.json -B <apikey> | bash + +.. hint:: Whether Grafana can send alerts to Alerta can be tested by sending a `test alert <http://localhost:3000/alerting/notifications/receivers/Alerta/edit?alertmanager=grafana>`_. + +The following enhancements are useful to configure for the alerts: + +- You'll want to alert on a query, followed by a ``Reduce`` step with Function ``Last`` and Mode ``Drop Non-numeric Value``. This triggers the alert on the latest value(s), but keeps the individual array elements separated, +- In ``Add details``, the ``Dashboard UID`` and ``Panel ID`` annotations are useful to configure to where you want the user to go, as Grafana will generate hyperlinks from them. To obtain a dashboard uid, go to ``Dashboards -> Browse`` and check out its URL. For the panel id, view a panel and check the URL, +- In ``Add details``, the ``Summary`` annotation will be used as the alert description, +- In ``Custom labels``, add ``severity = major`` to raise the severity of the alert (default: warning). See also the `supported values <https://docs.alerta.io/webui/configuration.html#severity-colors>`_. + +Alerta dashboard +`````````````````` + +The Alerta dashboard (http://localhost:8081) provides an overview of received alerts, which stay in the list until the alert condition disappears, and the alert is explicitly acknowledged or deleted: + +- *Acknowledging* an alert silences it for a day, +- *Shelving* an alert silences it for 2 hours, and removes it from more overviews, +- *Watching* an alert means receiving browser notifications on changes, +- *Deleting* an alert removes it until Grafana sends it again (default: 10 minutes). + +See ``docker-compose/alerta-web/alertad.conf`` for these settings. + +Several installed plugins enhance the received events: + +- ``slack`` plugin forwards alerts to Slack (see below), +- Our own ``grafana`` plugin parses Grafana-specific fields and adds them to the alert, +- Our own ``lofar`` plugin parses and generates LOFAR-specific fields. + +Slack integration +``````````````````` + +Our Alerta setup is configured to send alerts to Slack. To set this up, you need to: + +- Create a Slack App: https://api.slack.com/apps?new_app=1 +- Under ``OAuth & Permissions``, add the following ``OAuth Scope``: ``chat:write``, +- Install the App in your Workspace, +- Copy the ``OAuth Token``. + +.. hint:: To obtain the ``OAuth Token`` later on, go to https://api.slack.com/apps, click on your App, and look under ``Install App``. + +Now, edit ``docker-compose/alerta-web/alerta-secrets.json``: + +.. literalinclude:: ../../../docker-compose/alerta-web/alerta-secrets.json + +The ``SLACK_TOKEN`` is the ``OAuth Token``, and the ``SLACK_CHANNEL`` is the channel in which to post the alerts. + +Any further tweaking can be done by modifying ``docker-compose/alerta-web/alertad.conf``. + +Debugging hints +```````````````````````` + +- Grafana sends alerts to Alerta using the *Prometheus AlertManager* format, and thus uses the Prometheus webhook to do so. To see what Grafana emits, configure it to send to your custom https://hookbin.com/ endpoint, +- Grafana by default resends firing alerts every 4 hours, and we set this to 10 minutes. This means that if an alert was succesfully sent but lost (or deleted), it takes that long to get it back. For debugging, you may want to lower this to f.e. 10 seconds in the ``Alerting -> Notification policies`` settings of Grafana, +- Alerta has a plugin system which allows easily modifying the attributes of an alert (see ``docker-compose/alerta-web`` and https://github.com/alerta/alerta-contrib). To see which attributes an alert has, simply go to the alert in the web GUI, press *Copy*, and paste in your editor, +- Alerta allows a ``DEBUG=True`` parameter in ``docker-compose/alerta-web/alertad.conf`` to generate debug output. diff --git a/tangostationcontrol/docs/source/index.rst b/tangostationcontrol/docs/source/index.rst index c0b64b2a83975abf0636157347354506c0532d9d..d89a0cda0bef89798497ae94263cd0204c4dfe3a 100644 --- a/tangostationcontrol/docs/source/index.rst +++ b/tangostationcontrol/docs/source/index.rst @@ -30,6 +30,7 @@ Even without having access to any LOFAR2.0 hardware, you can install the full st devices/sst-xst devices/configure configure_station + alerting signal_chain beam_tracking developer diff --git a/tangostationcontrol/docs/source/installation.rst b/tangostationcontrol/docs/source/installation.rst index fd01fe45e0b27de2170c23341ab04e1c6b97f900..09877ef26a8c5f2ea71822338910b884e9bd7a3b 100644 --- a/tangostationcontrol/docs/source/installation.rst +++ b/tangostationcontrol/docs/source/installation.rst @@ -78,13 +78,3 @@ Configuration These sections are optional, to configure specific functionality you may or may not want to use. -Alerta -```````` - -If you want Grafana alerts to appear in Alerta, you need to manually configure Grafana to forward them. Import the alert settings manually: - -- Go to Grafana (http://localhost:3000) and sign in with an administration account (default: admin/admin), -- Go to ``Alerting`` and select ``Admin`` in the left menu bar, -- Copy/paste the following information, and press ``Save``: - -.. literalinclude:: ../../../docker-compose/grafana/alerting.json diff --git a/tangostationcontrol/tangostationcontrol/devices/sdp/sdp.py b/tangostationcontrol/tangostationcontrol/devices/sdp/sdp.py index f356c0dfc0e70632d8f57d40dbf32b31d90a49c4..d551c0104eee3a13fcb792e168f2dce97ac3a84b 100644 --- a/tangostationcontrol/tangostationcontrol/devices/sdp/sdp.py +++ b/tangostationcontrol/tangostationcontrol/devices/sdp/sdp.py @@ -180,8 +180,9 @@ class SDP(opcua_device): def read_FPGA_error_R(self): return self.read_attribute("TR_fpga_mask_R") & ( self.read_attribute("TR_fpga_communication_error_R") - | (self.read_attribute("FPGA_firmware_version_R") != "") - | (self.read_attribute("FPGA_jesd204b_csr_dev_syncn_R") == 0).any(axis=1) + | (self.read_attribute("FPGA_firmware_version_R") == "") + # we cannot assume all inputs of an FPGA are working until we have a mask for it + #| (self.read_attribute("FPGA_jesd204b_csr_dev_syncn_R") == 0).any(axis=1) ) def read_FPGA_processing_error_R(self): diff --git a/tangostationcontrol/tangostationcontrol/integration_test/default/toolkit/test_archiver.py b/tangostationcontrol/tangostationcontrol/integration_test/default/toolkit/test_archiver.py index ebe7a59e57632b107ef1eaa61fb9e36de56b168b..091db2c253e5e163167afc204170d89cf087c61a 100644 --- a/tangostationcontrol/tangostationcontrol/integration_test/default/toolkit/test_archiver.py +++ b/tangostationcontrol/tangostationcontrol/integration_test/default/toolkit/test_archiver.py @@ -63,7 +63,7 @@ class TestArchiver(BaseIntegrationTestCase): """ polling_period=1000 archive_event_period=3000 - attr_fullname = 'stat/recv/1/recvtr_translator_busy_r' # boolean + attr_fullname = 'stat/recv/1/recvtr_translator_busy_r' # boolean, but lofar view returns int self.archiver.add_attribute_to_archiver(attr_fullname, polling_period, archive_event_period) time.sleep(3) # Test if the attribute has been correctly added to event subscriber @@ -78,7 +78,7 @@ class TestArchiver(BaseIntegrationTestCase): self.assertEqual('stat/recv/1',item.device) # column device self.assertEqual('recvtr_translator_busy_r',item.name) # column attribute self.assertEqual(datetime,type(item.data_time)) # column datetime - self.assertEqual(bool,type(item.value)) # column value + self.assertEqual(int,type(item.value)) # column value """ # Remove attribute at the end of the test