From f31787e30e8d69ea998476507dbb0efda033eb67 Mon Sep 17 00:00:00 2001 From: Jan David Mol <mol@astron.nl> Date: Mon, 18 Apr 2022 15:58:03 +0200 Subject: [PATCH] L2SS-766: First stab at moving to the ISA 18.2 alert model, and building a custom version of the alert-webui to fix some UI issues with it --- .gitmodules | 4 ++ .../{alerta-web => alerta-server}/Dockerfile | 0 .../{alerta-web => alerta-server}/README.md | 0 .../alerta-secrets.json | 0 .../{alerta-web => alerta-server}/alerta.conf | 0 .../alertad.conf | 34 ++++++++- .../{alerta-web => alerta-server}/config.json | 0 .../grafana-plugin/alerta_grafana.py | 0 .../grafana-plugin/setup.py | 0 .../lofar-plugin/alerta_lofar.py | 69 +++++++++++++++++++ .../lofar-plugin/setup.py | 0 docker-compose/alerta-web | 1 + .../alerta-web/lofar-plugin/alerta_lofar.py | 41 ----------- docker-compose/alerta-web/rules.json | 1 - docker-compose/alerta.yml | 18 ++++- docker-compose/grafana/alerting.json | 2 +- 16 files changed, 122 insertions(+), 48 deletions(-) rename docker-compose/{alerta-web => alerta-server}/Dockerfile (100%) rename docker-compose/{alerta-web => alerta-server}/README.md (100%) rename docker-compose/{alerta-web => alerta-server}/alerta-secrets.json (100%) rename docker-compose/{alerta-web => alerta-server}/alerta.conf (100%) rename docker-compose/{alerta-web => alerta-server}/alertad.conf (75%) rename docker-compose/{alerta-web => alerta-server}/config.json (100%) rename docker-compose/{alerta-web => alerta-server}/grafana-plugin/alerta_grafana.py (100%) rename docker-compose/{alerta-web => alerta-server}/grafana-plugin/setup.py (100%) create mode 100644 docker-compose/alerta-server/lofar-plugin/alerta_lofar.py rename docker-compose/{alerta-web => alerta-server}/lofar-plugin/setup.py (100%) create mode 160000 docker-compose/alerta-web delete mode 100644 docker-compose/alerta-web/lofar-plugin/alerta_lofar.py delete mode 100644 docker-compose/alerta-web/rules.json diff --git a/.gitmodules b/.gitmodules index 1c9e69fc5..f1248450a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,7 @@ [submodule "tangostationcontrol/tangostationcontrol/toolkit/libhdbpp-python"] path = tangostationcontrol/tangostationcontrol/toolkit/libhdbpp-python url = https://gitlab.com/tango-controls/hdbpp/libhdbpp-python.git +[submodule "docker-compose/alerta-web"] + path = docker-compose/alerta-web + url = https://github.com/jjdmol/alerta-webui + branch = add-isa-18-2-states diff --git a/docker-compose/alerta-web/Dockerfile b/docker-compose/alerta-server/Dockerfile similarity index 100% rename from docker-compose/alerta-web/Dockerfile rename to docker-compose/alerta-server/Dockerfile diff --git a/docker-compose/alerta-web/README.md b/docker-compose/alerta-server/README.md similarity index 100% rename from docker-compose/alerta-web/README.md rename to docker-compose/alerta-server/README.md diff --git a/docker-compose/alerta-web/alerta-secrets.json b/docker-compose/alerta-server/alerta-secrets.json similarity index 100% rename from docker-compose/alerta-web/alerta-secrets.json rename to docker-compose/alerta-server/alerta-secrets.json diff --git a/docker-compose/alerta-web/alerta.conf b/docker-compose/alerta-server/alerta.conf similarity index 100% rename from docker-compose/alerta-web/alerta.conf rename to docker-compose/alerta-server/alerta.conf diff --git a/docker-compose/alerta-web/alertad.conf b/docker-compose/alerta-server/alertad.conf similarity index 75% rename from docker-compose/alerta-web/alertad.conf rename to docker-compose/alerta-server/alertad.conf index dc7b6c2e2..b0088c6c2 100644 --- a/docker-compose/alerta-web/alertad.conf +++ b/docker-compose/alerta-server/alertad.conf @@ -1,15 +1,22 @@ +import os + DEBUG = True SECRET = "T=&7xvF2S&x7w_JAcq$h1x5ocfA)8H2i" # Allow non-admin views CUSTOMER_VIEWS = True +# Use more advanced ANSI/ISA 18.2 alarm model, +# which does not auto-close alarms and thus +# allows for tracking alarms that came and went. +ALARM_MODEL = "ISA_18_2" + # Never timeout alerts ALERT_TIMEOUT = 0 # Auto unack after a day ACK_TIMEOUT = 24 * 3600 # Auto unshelve after 2 hours -SHELVE_TIMEOUT = 2 * 3600 +SHELVE_TIMEOUT = 7 * 24 * 3600 # Use custom date formats DATE_FORMAT_MEDIUM_DATE = "dd DD/MM HH:mm" @@ -17,10 +24,31 @@ DATE_FORMAT_LONG_DATE = "yyyy-MM-DD HH:mm:ss.sss" # Default overview settings COLUMNS = ['severity', 'status', 'createTime', 'lastReceiveTime', 'resource', 'grafanaDashboardHtml', 'grafanaPanelHtml', 'event', 'text'] -DEFAULT_FILTER = {'status': ['open']} +DEFAULT_FILTER = {'status': ['UNACK', 'RTNUN']} SORT_LIST_BY = "createTime" AUTO_REFRESH_INTERVAL = 5000 # ms +COLOR_MAP = { + 'severity': { + 'Critical': 'red', + 'High': 'orange', + 'Medium': '#FFF380', # corn yellow + 'Low': 'dodgerblue', + 'Advisory': 'lightblue', + 'OK': '#00CC00', # lime green + 'Unknown': 'silver' + }, + 'text': 'black' +} + +# Allow alerta-web to refer to alerta-server for the client +CORS_ORIGINS = [ + 'http://localhost:8081', + 'http://localhost:8082', + os.environ.get("BASE_URL", ""), + os.environ.get("DASHBOARD_URL", ""), +] + # ------------------------------------ # Plugin configuration # ------------------------------------ @@ -28,7 +56,7 @@ AUTO_REFRESH_INTERVAL = 5000 # ms PLUGINS = ['reject', 'blackout', 'acked_by', 'enhance', 'grafana', 'lofar', 'slack'] # Slack plugin settings, see https://github.com/alerta/alerta-contrib/tree/master/plugins/slack -import os, json +import json with open("/run/secrets/alerta-secrets") as secrets_file: secrets = json.load(secrets_file) diff --git a/docker-compose/alerta-web/config.json b/docker-compose/alerta-server/config.json similarity index 100% rename from docker-compose/alerta-web/config.json rename to docker-compose/alerta-server/config.json diff --git a/docker-compose/alerta-web/grafana-plugin/alerta_grafana.py b/docker-compose/alerta-server/grafana-plugin/alerta_grafana.py similarity index 100% rename from docker-compose/alerta-web/grafana-plugin/alerta_grafana.py rename to docker-compose/alerta-server/grafana-plugin/alerta_grafana.py diff --git a/docker-compose/alerta-web/grafana-plugin/setup.py b/docker-compose/alerta-server/grafana-plugin/setup.py similarity index 100% rename from docker-compose/alerta-web/grafana-plugin/setup.py rename to docker-compose/alerta-server/grafana-plugin/setup.py diff --git a/docker-compose/alerta-server/lofar-plugin/alerta_lofar.py b/docker-compose/alerta-server/lofar-plugin/alerta_lofar.py new file mode 100644 index 000000000..b227069c8 --- /dev/null +++ b/docker-compose/alerta-server/lofar-plugin/alerta_lofar.py @@ -0,0 +1,69 @@ +import os +import json +import logging + +from alerta.plugins import PluginBase +import alerta.models.alarms.isa_18_2 as isa_18_2 + +LOG = logging.getLogger() + + +class EnhanceLOFAR(PluginBase): + """ + Plugin for enhancing alerts with LOFAR-specific information + """ + + @staticmethod + def _fix_severity(alert): + """ + Force conversion of severity to ISA 18.2 model, to allow Alerta to parse the alert. + + For example, the 'prometheus' webhook by default uses the 'warning' severity, + but also users might specify a non-existing severity level. + """ + + if alert.severity not in isa_18_2.SEVERITY_MAP: + # Save original severity + alert.attributes['unparsableSeverity'] = alert.severity + + translation = { + "normal": isa_18_2.OK, + "ok": isa_18_2.OK, + "cleared": isa_18_2.OK, + "warning": isa_18_2.LOW, + "minor": isa_18_2.MEDIUM, + "major": isa_18_2.HIGH, + "critical": isa_18_2.CRITICAL, + } + + alert.severity = translation.get(alert.severity.lower(), isa_18_2.MEDIUM) + + def pre_receive(self, alert, **kwargs): + self._fix_severity(alert) + + # Parse LOFAR-specific fields + for tag in alert.tags: + try: + key, value = tag.split("=", 1) + except ValueError: + continue + + if key == "device": + alert.attributes['lofarDevice'] = value + + if key == "name": + alert.attributes['lofarAttribute'] = value + + if key == "station": + alert.resource = value + + return alert + + def post_receive(self, alert, **kwargs): + return + + def status_change(self, alert, status, text, **kwargs): + return + + def take_action(self, alert, action, text, **kwargs): + raise NotImplementedError diff --git a/docker-compose/alerta-web/lofar-plugin/setup.py b/docker-compose/alerta-server/lofar-plugin/setup.py similarity index 100% rename from docker-compose/alerta-web/lofar-plugin/setup.py rename to docker-compose/alerta-server/lofar-plugin/setup.py diff --git a/docker-compose/alerta-web b/docker-compose/alerta-web new file mode 160000 index 000000000..9ee69dfbd --- /dev/null +++ b/docker-compose/alerta-web @@ -0,0 +1 @@ +Subproject commit 9ee69dfbd0e33604169604b5a5cc506d560cb60b diff --git a/docker-compose/alerta-web/lofar-plugin/alerta_lofar.py b/docker-compose/alerta-web/lofar-plugin/alerta_lofar.py deleted file mode 100644 index c4f618d2d..000000000 --- a/docker-compose/alerta-web/lofar-plugin/alerta_lofar.py +++ /dev/null @@ -1,41 +0,0 @@ -import os -import json -import logging - -from alerta.plugins import PluginBase - -LOG = logging.getLogger() - - -class EnhanceLOFAR(PluginBase): - """ - Plugin for enhancing alerts with LOFAR-specific information - """ - - def pre_receive(self, alert, **kwargs): - # Parse LOFAR-specific fields - for tag in alert.tags: - try: - key, value = tag.split("=", 1) - except ValueError: - continue - - if key == "device": - alert.attributes['lofarDevice'] = value - - if key == "name": - alert.attributes['lofarAttribute'] = value - - if key == "station": - alert.resource = value - - return alert - - def post_receive(self, alert, **kwargs): - return - - def status_change(self, alert, status, text, **kwargs): - return - - def take_action(self, alert, action, text, **kwargs): - raise NotImplementedError diff --git a/docker-compose/alerta-web/rules.json b/docker-compose/alerta-web/rules.json deleted file mode 100644 index ca8df8cf7..000000000 --- a/docker-compose/alerta-web/rules.json +++ /dev/null @@ -1 +0,0 @@ -{"test":[{"name":"test2","interval":"10s","rules":[{"expr":"","for":"20s","labels":{"severity":"major"},"annotations":{"__dashboardUid__":"nC8N_kO7k","__panelId__":"9","summary":"My test alert"},"grafana_alert":{"id":3,"orgId":1,"title":"FPGA processing error 2","condition":"B","data":[{"refId":"A","queryType":"","relativeTimeRange":{"from":600,"to":0},"datasourceUid":"ZqArtG97z","model":{"exemplar":false,"expr":"device_attribute{device=\"stat/sdp/1\",name=\"FPGA_error_R\"}","format":"time_series","group":[],"hide":false,"interval":"","intervalMs":1000,"legendFormat":"","maxDataPoints":43200,"metricColumn":"name","rawQuery":true,"rawSql":"SELECT\n data_time AS \"time\",\n x::text,\n device,\n name,\n case when value then 1 else 0 end AS value\nFROM lofar_array_boolean\nWHERE\n $__timeFilter(data_time) AND\n name = 'fpga_error_r'\nORDER BY 1,2","refId":"A","select":[[{"params":["x"],"type":"column"}],[{"params":["value"],"type":"column"}]],"table":"lofar_array_boolean","timeColumn":"data_time","timeColumnType":"timestamptz","where":[{"name":"$__timeFilter","params":[],"type":"macro"},{"datatype":"text","name":"","params":["name","=","'fpga_error_r'"],"type":"expression"}]}},{"refId":"B","queryType":"","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"-100","model":{"conditions":[{"evaluator":{"params":[0,0],"type":"gt"},"operator":{"type":"and"},"query":{"params":[]},"reducer":{"params":[],"type":"avg"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"A","hide":false,"intervalMs":1000,"maxDataPoints":43200,"reducer":"last","refId":"B","settings":{"mode":"dropNN"},"type":"reduce"}}],"updated":"2022-04-04T14:18:48Z","intervalSeconds":10,"version":1,"uid":"waXdSCynk","namespace_uid":"9DkbdYy7z","namespace_id":6,"rule_group":"test2","no_data_state":"OK","exec_err_state":"Error"}}]},{"name":"test","interval":"10s","rules":[{"expr":"","for":"20s","labels":{"severity":"major"},"annotations":{"__dashboardUid__":"nC8N_kO7k","__panelId__":"9","summary":"My test alert"},"grafana_alert":{"id":2,"orgId":1,"title":"FPGA processing error","condition":"B","data":[{"refId":"A","queryType":"","relativeTimeRange":{"from":600,"to":0},"datasourceUid":"ZqArtG97z","model":{"exemplar":false,"expr":"device_attribute{device=\"stat/sdp/1\",name=\"FPGA_error_R\"}","format":"time_series","group":[],"hide":false,"interval":"","intervalMs":1000,"legendFormat":"","maxDataPoints":43200,"metricColumn":"name","rawQuery":true,"rawSql":"SELECT\n data_time AS \"time\",\n x::text,\n device,\n name,\n case when value then 1 else 0 end AS value\nFROM lofar_array_boolean\nWHERE\n $__timeFilter(data_time) AND\n name = 'fpga_error_r'\nORDER BY 1,2","refId":"A","select":[[{"params":["x"],"type":"column"}],[{"params":["value"],"type":"column"}]],"table":"lofar_array_boolean","timeColumn":"data_time","timeColumnType":"timestamptz","where":[{"name":"$__timeFilter","params":[],"type":"macro"},{"datatype":"text","name":"","params":["name","=","'fpga_error_r'"],"type":"expression"}]}},{"refId":"B","queryType":"","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"-100","model":{"conditions":[{"evaluator":{"params":[0,0],"type":"gt"},"operator":{"type":"and"},"query":{"params":[]},"reducer":{"params":[],"type":"avg"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"A","hide":false,"intervalMs":1000,"maxDataPoints":43200,"reducer":"last","refId":"B","settings":{"mode":"dropNN"},"type":"reduce"}}],"updated":"2022-04-04T14:16:22Z","intervalSeconds":10,"version":1,"uid":"MIt4Ijs7k","namespace_uid":"9DkbdYy7z","namespace_id":6,"rule_group":"test","no_data_state":"OK","exec_err_state":"Error"}}]}]} \ No newline at end of file diff --git a/docker-compose/alerta.yml b/docker-compose/alerta.yml index 2ae3be42c..f828f1413 100644 --- a/docker-compose/alerta.yml +++ b/docker-compose/alerta.yml @@ -5,7 +5,7 @@ volumes: secrets: alerta-secrets: - file: alerta-web/alerta-secrets.json + file: alerta-server/alerta-secrets.json services: alerta-web: @@ -14,7 +14,21 @@ services: networks: - control ports: - - "8081:8080" + - 8081:80 + depends_on: + - alerta-server + command: > + sh -c 'echo {\"endpoint\": \"http://\${HOSTNAME}:8082/api\"} > /usr/share/nginx/html/config.json && + nginx -g "daemon off;"' + restart: always + + alerta-server: + build: alerta-server + container_name: alerta-server + networks: + - control + ports: + - 8082:8080 # NOTE: This exposes an API and a web UI. Ignore the web UI as we replaced it with alerta-web depends_on: - alerta-db secrets: diff --git a/docker-compose/grafana/alerting.json b/docker-compose/grafana/alerting.json index d5193964a..bc5c76e7f 100644 --- a/docker-compose/grafana/alerting.json +++ b/docker-compose/grafana/alerting.json @@ -15,7 +15,7 @@ "type": "webhook", "disableResolveMessage": false, "settings": { - "url": "http://alerta-web:8080/api/webhooks/prometheus?api-key=demo-key" + "url": "http://alerta-server:8080/api/webhooks/prometheus?api-key=demo-key" }, "secureFields": {} } -- GitLab