Skip to content
Snippets Groups Projects
Commit f31787e3 authored by Jan David Mol's avatar Jan David Mol
Browse files

L2SS-766: First stab at moving to the ISA 18.2 alert model, and building a...

L2SS-766: First stab at moving to the ISA 18.2 alert model, and building a custom version of the alert-webui to fix some UI issues with it
parent fbcd5d4f
No related branches found
No related tags found
1 merge request!313L2SS-766: First stab at moving to the ISA 18.2 alert model
Showing
with 122 additions and 7 deletions
[submodule "tangostationcontrol/tangostationcontrol/toolkit/libhdbpp-python"]
path = tangostationcontrol/tangostationcontrol/toolkit/libhdbpp-python
url = https://gitlab.com/tango-controls/hdbpp/libhdbpp-python.git
[submodule "docker-compose/alerta-web"]
path = docker-compose/alerta-web
url = https://github.com/jjdmol/alerta-webui
branch = add-isa-18-2-states
import os
DEBUG = True
SECRET = "T=&7xvF2S&x7w_JAcq$h1x5ocfA)8H2i"
# Allow non-admin views
CUSTOMER_VIEWS = True
# Use more advanced ANSI/ISA 18.2 alarm model,
# which does not auto-close alarms and thus
# allows for tracking alarms that came and went.
ALARM_MODEL = "ISA_18_2"
# Never timeout alerts
ALERT_TIMEOUT = 0
# Auto unack after a day
ACK_TIMEOUT = 24 * 3600
# Auto unshelve after 2 hours
SHELVE_TIMEOUT = 2 * 3600
SHELVE_TIMEOUT = 7 * 24 * 3600
# Use custom date formats
DATE_FORMAT_MEDIUM_DATE = "dd DD/MM HH:mm"
......@@ -17,10 +24,31 @@ DATE_FORMAT_LONG_DATE = "yyyy-MM-DD HH:mm:ss.sss"
# Default overview settings
COLUMNS = ['severity', 'status', 'createTime', 'lastReceiveTime', 'resource', 'grafanaDashboardHtml', 'grafanaPanelHtml', 'event', 'text']
DEFAULT_FILTER = {'status': ['open']}
DEFAULT_FILTER = {'status': ['UNACK', 'RTNUN']}
SORT_LIST_BY = "createTime"
AUTO_REFRESH_INTERVAL = 5000 # ms
COLOR_MAP = {
'severity': {
'Critical': 'red',
'High': 'orange',
'Medium': '#FFF380', # corn yellow
'Low': 'dodgerblue',
'Advisory': 'lightblue',
'OK': '#00CC00', # lime green
'Unknown': 'silver'
},
'text': 'black'
}
# Allow alerta-web to refer to alerta-server for the client
CORS_ORIGINS = [
'http://localhost:8081',
'http://localhost:8082',
os.environ.get("BASE_URL", ""),
os.environ.get("DASHBOARD_URL", ""),
]
# ------------------------------------
# Plugin configuration
# ------------------------------------
......@@ -28,7 +56,7 @@ AUTO_REFRESH_INTERVAL = 5000 # ms
PLUGINS = ['reject', 'blackout', 'acked_by', 'enhance', 'grafana', 'lofar', 'slack']
# Slack plugin settings, see https://github.com/alerta/alerta-contrib/tree/master/plugins/slack
import os, json
import json
with open("/run/secrets/alerta-secrets") as secrets_file:
secrets = json.load(secrets_file)
......
......@@ -3,6 +3,7 @@ import json
import logging
from alerta.plugins import PluginBase
import alerta.models.alarms.isa_18_2 as isa_18_2
LOG = logging.getLogger()
......@@ -12,7 +13,34 @@ class EnhanceLOFAR(PluginBase):
Plugin for enhancing alerts with LOFAR-specific information
"""
@staticmethod
def _fix_severity(alert):
"""
Force conversion of severity to ISA 18.2 model, to allow Alerta to parse the alert.
For example, the 'prometheus' webhook by default uses the 'warning' severity,
but also users might specify a non-existing severity level.
"""
if alert.severity not in isa_18_2.SEVERITY_MAP:
# Save original severity
alert.attributes['unparsableSeverity'] = alert.severity
translation = {
"normal": isa_18_2.OK,
"ok": isa_18_2.OK,
"cleared": isa_18_2.OK,
"warning": isa_18_2.LOW,
"minor": isa_18_2.MEDIUM,
"major": isa_18_2.HIGH,
"critical": isa_18_2.CRITICAL,
}
alert.severity = translation.get(alert.severity.lower(), isa_18_2.MEDIUM)
def pre_receive(self, alert, **kwargs):
self._fix_severity(alert)
# Parse LOFAR-specific fields
for tag in alert.tags:
try:
......
Subproject commit 9ee69dfbd0e33604169604b5a5cc506d560cb60b
{"test":[{"name":"test2","interval":"10s","rules":[{"expr":"","for":"20s","labels":{"severity":"major"},"annotations":{"__dashboardUid__":"nC8N_kO7k","__panelId__":"9","summary":"My test alert"},"grafana_alert":{"id":3,"orgId":1,"title":"FPGA processing error 2","condition":"B","data":[{"refId":"A","queryType":"","relativeTimeRange":{"from":600,"to":0},"datasourceUid":"ZqArtG97z","model":{"exemplar":false,"expr":"device_attribute{device=\"stat/sdp/1\",name=\"FPGA_error_R\"}","format":"time_series","group":[],"hide":false,"interval":"","intervalMs":1000,"legendFormat":"","maxDataPoints":43200,"metricColumn":"name","rawQuery":true,"rawSql":"SELECT\n data_time AS \"time\",\n x::text,\n device,\n name,\n case when value then 1 else 0 end AS value\nFROM lofar_array_boolean\nWHERE\n $__timeFilter(data_time) AND\n name = 'fpga_error_r'\nORDER BY 1,2","refId":"A","select":[[{"params":["x"],"type":"column"}],[{"params":["value"],"type":"column"}]],"table":"lofar_array_boolean","timeColumn":"data_time","timeColumnType":"timestamptz","where":[{"name":"$__timeFilter","params":[],"type":"macro"},{"datatype":"text","name":"","params":["name","=","'fpga_error_r'"],"type":"expression"}]}},{"refId":"B","queryType":"","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"-100","model":{"conditions":[{"evaluator":{"params":[0,0],"type":"gt"},"operator":{"type":"and"},"query":{"params":[]},"reducer":{"params":[],"type":"avg"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"A","hide":false,"intervalMs":1000,"maxDataPoints":43200,"reducer":"last","refId":"B","settings":{"mode":"dropNN"},"type":"reduce"}}],"updated":"2022-04-04T14:18:48Z","intervalSeconds":10,"version":1,"uid":"waXdSCynk","namespace_uid":"9DkbdYy7z","namespace_id":6,"rule_group":"test2","no_data_state":"OK","exec_err_state":"Error"}}]},{"name":"test","interval":"10s","rules":[{"expr":"","for":"20s","labels":{"severity":"major"},"annotations":{"__dashboardUid__":"nC8N_kO7k","__panelId__":"9","summary":"My test alert"},"grafana_alert":{"id":2,"orgId":1,"title":"FPGA processing error","condition":"B","data":[{"refId":"A","queryType":"","relativeTimeRange":{"from":600,"to":0},"datasourceUid":"ZqArtG97z","model":{"exemplar":false,"expr":"device_attribute{device=\"stat/sdp/1\",name=\"FPGA_error_R\"}","format":"time_series","group":[],"hide":false,"interval":"","intervalMs":1000,"legendFormat":"","maxDataPoints":43200,"metricColumn":"name","rawQuery":true,"rawSql":"SELECT\n data_time AS \"time\",\n x::text,\n device,\n name,\n case when value then 1 else 0 end AS value\nFROM lofar_array_boolean\nWHERE\n $__timeFilter(data_time) AND\n name = 'fpga_error_r'\nORDER BY 1,2","refId":"A","select":[[{"params":["x"],"type":"column"}],[{"params":["value"],"type":"column"}]],"table":"lofar_array_boolean","timeColumn":"data_time","timeColumnType":"timestamptz","where":[{"name":"$__timeFilter","params":[],"type":"macro"},{"datatype":"text","name":"","params":["name","=","'fpga_error_r'"],"type":"expression"}]}},{"refId":"B","queryType":"","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"-100","model":{"conditions":[{"evaluator":{"params":[0,0],"type":"gt"},"operator":{"type":"and"},"query":{"params":[]},"reducer":{"params":[],"type":"avg"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"A","hide":false,"intervalMs":1000,"maxDataPoints":43200,"reducer":"last","refId":"B","settings":{"mode":"dropNN"},"type":"reduce"}}],"updated":"2022-04-04T14:16:22Z","intervalSeconds":10,"version":1,"uid":"MIt4Ijs7k","namespace_uid":"9DkbdYy7z","namespace_id":6,"rule_group":"test","no_data_state":"OK","exec_err_state":"Error"}}]}]}
\ No newline at end of file
......@@ -5,7 +5,7 @@ volumes:
secrets:
alerta-secrets:
file: alerta-web/alerta-secrets.json
file: alerta-server/alerta-secrets.json
services:
alerta-web:
......@@ -14,7 +14,21 @@ services:
networks:
- control
ports:
- "8081:8080"
- 8081:80
depends_on:
- alerta-server
command: >
sh -c 'echo {\"endpoint\": \"http://\${HOSTNAME}:8082/api\"} > /usr/share/nginx/html/config.json &&
nginx -g "daemon off;"'
restart: always
alerta-server:
build: alerta-server
container_name: alerta-server
networks:
- control
ports:
- 8082:8080 # NOTE: This exposes an API and a web UI. Ignore the web UI as we replaced it with alerta-web
depends_on:
- alerta-db
secrets:
......
......@@ -15,7 +15,7 @@
"type": "webhook",
"disableResolveMessage": false,
"settings": {
"url": "http://alerta-web:8080/api/webhooks/prometheus?api-key=demo-key"
"url": "http://alerta-server:8080/api/webhooks/prometheus?api-key=demo-key"
},
"secureFields": {}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment