Skip to content
Snippets Groups Projects
Commit f31787e3 authored by Jan David Mol's avatar Jan David Mol
Browse files

L2SS-766: First stab at moving to the ISA 18.2 alert model, and building a...

L2SS-766: First stab at moving to the ISA 18.2 alert model, and building a custom version of the alert-webui to fix some UI issues with it
parent fbcd5d4f
No related branches found
No related tags found
1 merge request!313L2SS-766: First stab at moving to the ISA 18.2 alert model
Showing
with 122 additions and 7 deletions
[submodule "tangostationcontrol/tangostationcontrol/toolkit/libhdbpp-python"] [submodule "tangostationcontrol/tangostationcontrol/toolkit/libhdbpp-python"]
path = tangostationcontrol/tangostationcontrol/toolkit/libhdbpp-python path = tangostationcontrol/tangostationcontrol/toolkit/libhdbpp-python
url = https://gitlab.com/tango-controls/hdbpp/libhdbpp-python.git url = https://gitlab.com/tango-controls/hdbpp/libhdbpp-python.git
[submodule "docker-compose/alerta-web"]
path = docker-compose/alerta-web
url = https://github.com/jjdmol/alerta-webui
branch = add-isa-18-2-states
import os
DEBUG = True DEBUG = True
SECRET = "T=&7xvF2S&x7w_JAcq$h1x5ocfA)8H2i" SECRET = "T=&7xvF2S&x7w_JAcq$h1x5ocfA)8H2i"
# Allow non-admin views # Allow non-admin views
CUSTOMER_VIEWS = True CUSTOMER_VIEWS = True
# Use more advanced ANSI/ISA 18.2 alarm model,
# which does not auto-close alarms and thus
# allows for tracking alarms that came and went.
ALARM_MODEL = "ISA_18_2"
# Never timeout alerts # Never timeout alerts
ALERT_TIMEOUT = 0 ALERT_TIMEOUT = 0
# Auto unack after a day # Auto unack after a day
ACK_TIMEOUT = 24 * 3600 ACK_TIMEOUT = 24 * 3600
# Auto unshelve after 2 hours # Auto unshelve after 2 hours
SHELVE_TIMEOUT = 2 * 3600 SHELVE_TIMEOUT = 7 * 24 * 3600
# Use custom date formats # Use custom date formats
DATE_FORMAT_MEDIUM_DATE = "dd DD/MM HH:mm" DATE_FORMAT_MEDIUM_DATE = "dd DD/MM HH:mm"
...@@ -17,10 +24,31 @@ DATE_FORMAT_LONG_DATE = "yyyy-MM-DD HH:mm:ss.sss" ...@@ -17,10 +24,31 @@ DATE_FORMAT_LONG_DATE = "yyyy-MM-DD HH:mm:ss.sss"
# Default overview settings # Default overview settings
COLUMNS = ['severity', 'status', 'createTime', 'lastReceiveTime', 'resource', 'grafanaDashboardHtml', 'grafanaPanelHtml', 'event', 'text'] COLUMNS = ['severity', 'status', 'createTime', 'lastReceiveTime', 'resource', 'grafanaDashboardHtml', 'grafanaPanelHtml', 'event', 'text']
DEFAULT_FILTER = {'status': ['open']} DEFAULT_FILTER = {'status': ['UNACK', 'RTNUN']}
SORT_LIST_BY = "createTime" SORT_LIST_BY = "createTime"
AUTO_REFRESH_INTERVAL = 5000 # ms AUTO_REFRESH_INTERVAL = 5000 # ms
COLOR_MAP = {
'severity': {
'Critical': 'red',
'High': 'orange',
'Medium': '#FFF380', # corn yellow
'Low': 'dodgerblue',
'Advisory': 'lightblue',
'OK': '#00CC00', # lime green
'Unknown': 'silver'
},
'text': 'black'
}
# Allow alerta-web to refer to alerta-server for the client
CORS_ORIGINS = [
'http://localhost:8081',
'http://localhost:8082',
os.environ.get("BASE_URL", ""),
os.environ.get("DASHBOARD_URL", ""),
]
# ------------------------------------ # ------------------------------------
# Plugin configuration # Plugin configuration
# ------------------------------------ # ------------------------------------
...@@ -28,7 +56,7 @@ AUTO_REFRESH_INTERVAL = 5000 # ms ...@@ -28,7 +56,7 @@ AUTO_REFRESH_INTERVAL = 5000 # ms
PLUGINS = ['reject', 'blackout', 'acked_by', 'enhance', 'grafana', 'lofar', 'slack'] PLUGINS = ['reject', 'blackout', 'acked_by', 'enhance', 'grafana', 'lofar', 'slack']
# Slack plugin settings, see https://github.com/alerta/alerta-contrib/tree/master/plugins/slack # Slack plugin settings, see https://github.com/alerta/alerta-contrib/tree/master/plugins/slack
import os, json import json
with open("/run/secrets/alerta-secrets") as secrets_file: with open("/run/secrets/alerta-secrets") as secrets_file:
secrets = json.load(secrets_file) secrets = json.load(secrets_file)
......
...@@ -3,6 +3,7 @@ import json ...@@ -3,6 +3,7 @@ import json
import logging import logging
from alerta.plugins import PluginBase from alerta.plugins import PluginBase
import alerta.models.alarms.isa_18_2 as isa_18_2
LOG = logging.getLogger() LOG = logging.getLogger()
...@@ -12,7 +13,34 @@ class EnhanceLOFAR(PluginBase): ...@@ -12,7 +13,34 @@ class EnhanceLOFAR(PluginBase):
Plugin for enhancing alerts with LOFAR-specific information Plugin for enhancing alerts with LOFAR-specific information
""" """
@staticmethod
def _fix_severity(alert):
"""
Force conversion of severity to ISA 18.2 model, to allow Alerta to parse the alert.
For example, the 'prometheus' webhook by default uses the 'warning' severity,
but also users might specify a non-existing severity level.
"""
if alert.severity not in isa_18_2.SEVERITY_MAP:
# Save original severity
alert.attributes['unparsableSeverity'] = alert.severity
translation = {
"normal": isa_18_2.OK,
"ok": isa_18_2.OK,
"cleared": isa_18_2.OK,
"warning": isa_18_2.LOW,
"minor": isa_18_2.MEDIUM,
"major": isa_18_2.HIGH,
"critical": isa_18_2.CRITICAL,
}
alert.severity = translation.get(alert.severity.lower(), isa_18_2.MEDIUM)
def pre_receive(self, alert, **kwargs): def pre_receive(self, alert, **kwargs):
self._fix_severity(alert)
# Parse LOFAR-specific fields # Parse LOFAR-specific fields
for tag in alert.tags: for tag in alert.tags:
try: try:
......
Subproject commit 9ee69dfbd0e33604169604b5a5cc506d560cb60b
{"test":[{"name":"test2","interval":"10s","rules":[{"expr":"","for":"20s","labels":{"severity":"major"},"annotations":{"__dashboardUid__":"nC8N_kO7k","__panelId__":"9","summary":"My test alert"},"grafana_alert":{"id":3,"orgId":1,"title":"FPGA processing error 2","condition":"B","data":[{"refId":"A","queryType":"","relativeTimeRange":{"from":600,"to":0},"datasourceUid":"ZqArtG97z","model":{"exemplar":false,"expr":"device_attribute{device=\"stat/sdp/1\",name=\"FPGA_error_R\"}","format":"time_series","group":[],"hide":false,"interval":"","intervalMs":1000,"legendFormat":"","maxDataPoints":43200,"metricColumn":"name","rawQuery":true,"rawSql":"SELECT\n data_time AS \"time\",\n x::text,\n device,\n name,\n case when value then 1 else 0 end AS value\nFROM lofar_array_boolean\nWHERE\n $__timeFilter(data_time) AND\n name = 'fpga_error_r'\nORDER BY 1,2","refId":"A","select":[[{"params":["x"],"type":"column"}],[{"params":["value"],"type":"column"}]],"table":"lofar_array_boolean","timeColumn":"data_time","timeColumnType":"timestamptz","where":[{"name":"$__timeFilter","params":[],"type":"macro"},{"datatype":"text","name":"","params":["name","=","'fpga_error_r'"],"type":"expression"}]}},{"refId":"B","queryType":"","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"-100","model":{"conditions":[{"evaluator":{"params":[0,0],"type":"gt"},"operator":{"type":"and"},"query":{"params":[]},"reducer":{"params":[],"type":"avg"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"A","hide":false,"intervalMs":1000,"maxDataPoints":43200,"reducer":"last","refId":"B","settings":{"mode":"dropNN"},"type":"reduce"}}],"updated":"2022-04-04T14:18:48Z","intervalSeconds":10,"version":1,"uid":"waXdSCynk","namespace_uid":"9DkbdYy7z","namespace_id":6,"rule_group":"test2","no_data_state":"OK","exec_err_state":"Error"}}]},{"name":"test","interval":"10s","rules":[{"expr":"","for":"20s","labels":{"severity":"major"},"annotations":{"__dashboardUid__":"nC8N_kO7k","__panelId__":"9","summary":"My test alert"},"grafana_alert":{"id":2,"orgId":1,"title":"FPGA processing error","condition":"B","data":[{"refId":"A","queryType":"","relativeTimeRange":{"from":600,"to":0},"datasourceUid":"ZqArtG97z","model":{"exemplar":false,"expr":"device_attribute{device=\"stat/sdp/1\",name=\"FPGA_error_R\"}","format":"time_series","group":[],"hide":false,"interval":"","intervalMs":1000,"legendFormat":"","maxDataPoints":43200,"metricColumn":"name","rawQuery":true,"rawSql":"SELECT\n data_time AS \"time\",\n x::text,\n device,\n name,\n case when value then 1 else 0 end AS value\nFROM lofar_array_boolean\nWHERE\n $__timeFilter(data_time) AND\n name = 'fpga_error_r'\nORDER BY 1,2","refId":"A","select":[[{"params":["x"],"type":"column"}],[{"params":["value"],"type":"column"}]],"table":"lofar_array_boolean","timeColumn":"data_time","timeColumnType":"timestamptz","where":[{"name":"$__timeFilter","params":[],"type":"macro"},{"datatype":"text","name":"","params":["name","=","'fpga_error_r'"],"type":"expression"}]}},{"refId":"B","queryType":"","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"-100","model":{"conditions":[{"evaluator":{"params":[0,0],"type":"gt"},"operator":{"type":"and"},"query":{"params":[]},"reducer":{"params":[],"type":"avg"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"A","hide":false,"intervalMs":1000,"maxDataPoints":43200,"reducer":"last","refId":"B","settings":{"mode":"dropNN"},"type":"reduce"}}],"updated":"2022-04-04T14:16:22Z","intervalSeconds":10,"version":1,"uid":"MIt4Ijs7k","namespace_uid":"9DkbdYy7z","namespace_id":6,"rule_group":"test","no_data_state":"OK","exec_err_state":"Error"}}]}]}
\ No newline at end of file
...@@ -5,7 +5,7 @@ volumes: ...@@ -5,7 +5,7 @@ volumes:
secrets: secrets:
alerta-secrets: alerta-secrets:
file: alerta-web/alerta-secrets.json file: alerta-server/alerta-secrets.json
services: services:
alerta-web: alerta-web:
...@@ -14,7 +14,21 @@ services: ...@@ -14,7 +14,21 @@ services:
networks: networks:
- control - control
ports: ports:
- "8081:8080" - 8081:80
depends_on:
- alerta-server
command: >
sh -c 'echo {\"endpoint\": \"http://\${HOSTNAME}:8082/api\"} > /usr/share/nginx/html/config.json &&
nginx -g "daemon off;"'
restart: always
alerta-server:
build: alerta-server
container_name: alerta-server
networks:
- control
ports:
- 8082:8080 # NOTE: This exposes an API and a web UI. Ignore the web UI as we replaced it with alerta-web
depends_on: depends_on:
- alerta-db - alerta-db
secrets: secrets:
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
"type": "webhook", "type": "webhook",
"disableResolveMessage": false, "disableResolveMessage": false,
"settings": { "settings": {
"url": "http://alerta-web:8080/api/webhooks/prometheus?api-key=demo-key" "url": "http://alerta-server:8080/api/webhooks/prometheus?api-key=demo-key"
}, },
"secureFields": {} "secureFields": {}
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment