Skip to content
Snippets Groups Projects
Commit 30bdfa01 authored by Jan David Mol's avatar Jan David Mol
Browse files

L2SS-685: Add and configure Alerta SLACK & Grafana & LOFAR plugins

parent eb3bc285
No related branches found
No related tags found
1 merge request!290Resolve L2SS-685 "Add alerta docker"
Showing
with 328 additions and 15 deletions
...@@ -27,3 +27,4 @@ tangostationcontrol/docs/build ...@@ -27,3 +27,4 @@ tangostationcontrol/docs/build
**/pending_log_messages.db **/pending_log_messages.db
**/.eggs **/.eggs
docker-compose/alerta-web/alerta-secrets.json
FROM alerta/alerta-web FROM alerta/alerta-web
RUN bash -c 'source /venv/bin/activate; pip install git+https://github.com/alerta/alerta-contrib.git#subdirectory=plugins/slack'
RUN bash -c 'source /venv/bin/activate; pip install git+https://github.com/alerta/alerta-contrib.git#subdirectory=plugins/jira'
COPY grafana-plugin /tmp/grafana-plugin
RUN bash -c 'source /venv/bin/activate; pip install /tmp/grafana-plugin'
COPY lofar-plugin /tmp/lofar-plugin
RUN bash -c 'source /venv/bin/activate; pip install /tmp/lofar-plugin'
COPY alertad.conf /app/alertad.conf COPY alertad.conf /app/alertad.conf
COPY alerta.conf /app/alerta.conf COPY alerta.conf /app/alerta.conf
COPY config.json /web/config.json COPY config.json /web/config.json
RUN pip install git+https://github.com/alerta/alerta-contrib.git#subdirectory=plugins/slack
You need:
* Your own Slack App:
* Give it channel write rights
* Get the OAuth token
* Install it in your slack
* Invite the app into your channel
* Feed the OAuth token to the config
* Add it to alerta-secrets.json
* Grafana:
* By default, grafana resends alarms every 4h, configure this in the notification settings to faster resend deleted alarms for testing
* Add alerts by hand
* add "Summary" as alert text
* add label "severity": "major"/"minor"/etc (see https://docs.alerta.io/webui/configuration.html#severity-colors)
* Create alerta-secrets.json in this directory:
Example alerta-secrets.json:
{
"SLACK_TOKEN": "xoxb-...",
"SLACK_CHANNEL": "#lofar20-alerta"
}
{
"SLACK_TOKEN": "xoxb-get-this-from-your-slack-app",
"SLACK_CHANNEL": "#your-channel"
}
DEBUG = True DEBUG = True
SECRET = "T=&7xvF2S&x7w_JAcq$h1x5ocfA)8H2i" SECRET = "T=&7xvF2S&x7w_JAcq$h1x5ocfA)8H2i"
PLUGINS = ['reject', 'blackout', 'normalise', 'enhance', 'slack'] # Allow non-admin views
CUSTOMER_VIEWS = True
# Never timeout alerts
ALERT_TIMEOUT = 0
# Auto unack after a day
ACK_TIMEOUT = 24 * 3600
# Auto unshelve after 2 hours
SHELVE_TIMEOUT = 2 * 3600
# Use custom date formats
DATE_FORMAT_MEDIUM_DATE = "dd DD/MM HH:mm"
DATE_FORMAT_LONG_DATE = "yyyy-MM-DD HH:mm:ss.sss"
# Default columns to list
COLUMNS = ['severity', 'status', 'lastReceiveTime', 'environment', 'resource', 'lofarDevice', 'lofarAttribute', 'event', 'text']
# ------------------------------------
# Plugin configuration
# ------------------------------------
PLUGINS = ['reject', 'blackout', 'acked_by', 'enhance', 'grafana', 'lofar', 'slack']
# Slack plugin settings, see https://github.com/alerta/alerta-contrib/tree/master/plugins/slack
import os, json
with open("/run/secrets/alerta-secrets") as secrets_file:
secrets = json.load(secrets_file)
SLACK_WEBHOOK_URL = 'https://slack.com/api/chat.postMessage'
SLACK_TOKEN = secrets["SLACK_TOKEN"]
SLACK_CHANNEL = secrets["SLACK_CHANNEL"]
SLACK_ATTACHMENTS = True
DASHBOARD_URL = os.environ.get("DASHBOARD_URL", "")
# for the Slack message configuration syntax, see https://api.slack.com/methods/chat.postMessage
# and https://app.slack.com/block-kit-builder
SLACK_PAYLOAD = {
"channel": "{{ channel }}",
"emoji": ":fire:",
"text": "*{{ alert.severity|capitalize }}* :: _{{ alert.event }}_\n\n```{{ alert.text }}```",
"attachments": [{
"color": "{{ color }}",
"fields": [
{"title": "Device", "value": "{{ alert.attributes.lofarDevice }}", "short": True },
{"title": "Attribute", "value": "{{ alert.attributes.lofarAttribute }}", "short": True },
{"title": "Environment", "value": "{{ alert.environment }}", "short": True },
{"title": "Status", "value": "{{ status|capitalize }}", "short": True },
{"title": "Dashboards", "value": "<{{ config.DASHBOARD_URL }}/#/alert/{{ alert.id }}|Alerta>\nGrafana <{{ alert.attributes.grafanaDashboardUrl }}|Dashboard> <{{ alert.attributes.grafanaPanelUrl }}|Panel>", "short": True },
{"title": "Configure", "value": "Grafana <{{ alert.attributes.grafanaAlertUrl }}|Edit> <{{ alert.attributes.grafanaSilenceUrl }}|Silence>", "short": True },
],
}]
}
import os
import json
import logging
from alerta.plugins import PluginBase
LOG = logging.getLogger()
class EnhanceGrafana(PluginBase):
"""
Plugin for parsing alerts coming from Grafana
"""
def pre_receive(self, alert, **kwargs):
# Parse Grafana-specific fields
alert.attributes['grafanaStatus'] = alert.raw_data.get('status', '')
alert.attributes['grafanaPanelUrl'] = alert.raw_data.get('panelURL', '')
alert.attributes['grafanaDashboardUrl'] = alert.raw_data.get('dashboardURL', '')
alert.attributes['grafanaAlertUrl'] = alert.raw_data.get('generatorURL', '')
alert.attributes['grafanaSilenceUrl'] = alert.raw_data.get('silenceURL', '')
return alert
def post_receive(self, alert, **kwargs):
return
def status_change(self, alert, status, text, **kwargs):
return
def take_action(self, alert, action, text, **kwargs):
raise NotImplementedError
from setuptools import setup, find_packages
version = '1.0.0'
setup(
name="alerta-grafana",
version=version,
description='Alerta plugin for enhancing Grafana alerts',
url='https://git.astron.nl/lofar2.0/tango',
license='Apache License 2.0',
author='Jan David Mol',
author_email='mol@astron.nl',
packages=find_packages(),
py_modules=['alerta_grafana'],
include_package_data=True,
zip_safe=True,
entry_points={
'alerta.plugins': [
'grafana = alerta_grafana:EnhanceGrafana'
]
},
python_requires='>=3.5'
)
import os
import json
import logging
from alerta.plugins import PluginBase
LOG = logging.getLogger()
class EnhanceLOFAR(PluginBase):
"""
Plugin for enhancing alerts with LOFAR-specific information
"""
def pre_receive(self, alert, **kwargs):
# Parse LOFAR-specific fields
for tag in alert.tags:
try:
key, value = tag.split("=", 1)
except ValueError:
continue
if key == "device":
alert.attributes['lofarDevice'] = value
if key == "name":
alert.attributes['lofarAttribute'] = value
return alert
def post_receive(self, alert, **kwargs):
return
def status_change(self, alert, status, text, **kwargs):
return
def take_action(self, alert, action, text, **kwargs):
raise NotImplementedError
from setuptools import setup, find_packages
version = '1.0.0'
setup(
name="alerta-lofar",
version=version,
description='Alerta plugin for enhancing LOFAR alerts',
url='https://git.astron.nl/lofar2.0/tango',
license='Apache License 2.0',
author='Jan David Mol',
author_email='mol@astron.nl',
packages=find_packages(),
py_modules=['alerta_lofar'],
include_package_data=True,
zip_safe=True,
entry_points={
'alerta.plugins': [
'lofar = alerta_lofar:EnhanceLOFAR'
]
},
python_requires='>=3.5'
)
{"test":[{"name":"test2","interval":"10s","rules":[{"expr":"","for":"20s","labels":{"severity":"major"},"annotations":{"__dashboardUid__":"nC8N_kO7k","__panelId__":"9","summary":"My test alert"},"grafana_alert":{"id":3,"orgId":1,"title":"FPGA processing error 2","condition":"B","data":[{"refId":"A","queryType":"","relativeTimeRange":{"from":600,"to":0},"datasourceUid":"ZqArtG97z","model":{"exemplar":false,"expr":"device_attribute{device=\"stat/sdp/1\",name=\"FPGA_error_R\"}","format":"time_series","group":[],"hide":false,"interval":"","intervalMs":1000,"legendFormat":"","maxDataPoints":43200,"metricColumn":"name","rawQuery":true,"rawSql":"SELECT\n data_time AS \"time\",\n x::text,\n device,\n name,\n case when value then 1 else 0 end AS value\nFROM lofar_array_boolean\nWHERE\n $__timeFilter(data_time) AND\n name = 'fpga_error_r'\nORDER BY 1,2","refId":"A","select":[[{"params":["x"],"type":"column"}],[{"params":["value"],"type":"column"}]],"table":"lofar_array_boolean","timeColumn":"data_time","timeColumnType":"timestamptz","where":[{"name":"$__timeFilter","params":[],"type":"macro"},{"datatype":"text","name":"","params":["name","=","'fpga_error_r'"],"type":"expression"}]}},{"refId":"B","queryType":"","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"-100","model":{"conditions":[{"evaluator":{"params":[0,0],"type":"gt"},"operator":{"type":"and"},"query":{"params":[]},"reducer":{"params":[],"type":"avg"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"A","hide":false,"intervalMs":1000,"maxDataPoints":43200,"reducer":"last","refId":"B","settings":{"mode":"dropNN"},"type":"reduce"}}],"updated":"2022-04-04T14:18:48Z","intervalSeconds":10,"version":1,"uid":"waXdSCynk","namespace_uid":"9DkbdYy7z","namespace_id":6,"rule_group":"test2","no_data_state":"OK","exec_err_state":"Error"}}]},{"name":"test","interval":"10s","rules":[{"expr":"","for":"20s","labels":{"severity":"major"},"annotations":{"__dashboardUid__":"nC8N_kO7k","__panelId__":"9","summary":"My test alert"},"grafana_alert":{"id":2,"orgId":1,"title":"FPGA processing error","condition":"B","data":[{"refId":"A","queryType":"","relativeTimeRange":{"from":600,"to":0},"datasourceUid":"ZqArtG97z","model":{"exemplar":false,"expr":"device_attribute{device=\"stat/sdp/1\",name=\"FPGA_error_R\"}","format":"time_series","group":[],"hide":false,"interval":"","intervalMs":1000,"legendFormat":"","maxDataPoints":43200,"metricColumn":"name","rawQuery":true,"rawSql":"SELECT\n data_time AS \"time\",\n x::text,\n device,\n name,\n case when value then 1 else 0 end AS value\nFROM lofar_array_boolean\nWHERE\n $__timeFilter(data_time) AND\n name = 'fpga_error_r'\nORDER BY 1,2","refId":"A","select":[[{"params":["x"],"type":"column"}],[{"params":["value"],"type":"column"}]],"table":"lofar_array_boolean","timeColumn":"data_time","timeColumnType":"timestamptz","where":[{"name":"$__timeFilter","params":[],"type":"macro"},{"datatype":"text","name":"","params":["name","=","'fpga_error_r'"],"type":"expression"}]}},{"refId":"B","queryType":"","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"-100","model":{"conditions":[{"evaluator":{"params":[0,0],"type":"gt"},"operator":{"type":"and"},"query":{"params":[]},"reducer":{"params":[],"type":"avg"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"A","hide":false,"intervalMs":1000,"maxDataPoints":43200,"reducer":"last","refId":"B","settings":{"mode":"dropNN"},"type":"reduce"}}],"updated":"2022-04-04T14:16:22Z","intervalSeconds":10,"version":1,"uid":"MIt4Ijs7k","namespace_uid":"9DkbdYy7z","namespace_id":6,"rule_group":"test","no_data_state":"OK","exec_err_state":"Error"}}]}]}
\ No newline at end of file
...@@ -3,6 +3,10 @@ version: '2.1' ...@@ -3,6 +3,10 @@ version: '2.1'
volumes: volumes:
alerta-postgres-data: {} alerta-postgres-data: {}
secrets:
alerta-secrets:
file: alerta-web/alerta-secrets.json
services: services:
alerta-web: alerta-web:
build: alerta-web build: alerta-web
...@@ -13,9 +17,12 @@ services: ...@@ -13,9 +17,12 @@ services:
- "8081:8080" - "8081:8080"
depends_on: depends_on:
- alerta-db - alerta-db
secrets:
- alerta-secrets
environment: environment:
- DEBUG=1 # remove this line to turn DEBUG off - DEBUG=1 # remove this line to turn DEBUG off
- DATABASE_URL=postgres://postgres:postgres@alerta-db:5432/monitoring - DATABASE_URL=postgres://postgres:postgres@alerta-db:5432/monitoring
- DASHBOARD_URL=http://${HOSTNAME}:8081
- AUTH_REQUIRED=True - AUTH_REQUIRED=True
- ADMIN_USERS=admin #default password: alerta - ADMIN_USERS=admin #default password: alerta
- ADMIN_KEY=demo-key - ADMIN_KEY=demo-key
......
...@@ -3,6 +3,7 @@ FROM grafana/grafana ...@@ -3,6 +3,7 @@ FROM grafana/grafana
# Install some plugins # Install some plugins
RUN grafana-cli plugins install briangann-datatable-panel RUN grafana-cli plugins install briangann-datatable-panel
RUN grafana-cli plugins install ae3e-plotly-panel RUN grafana-cli plugins install ae3e-plotly-panel
RUN grafana-cli plugins install yesoreyeram-infinity-datasource
COPY grafana.ini /etc/grafana/ COPY grafana.ini /etc/grafana/
......
# Post configuration
To export all current alert rules, use:
To import rules into a fresh Grafana instance:
* Obtain an 'editor' API key through the Grafan GUI (cogwheel -> API keys),
* Run:
curl http://localhost:3000/api/alertmanager/grafana/config/api/v1/alerts -H 'Authorization: Bearer (api key)' > alerting.json
curl localhost:3000/api/ruler/grafana/api/v1/rules > rules.json
* Delete the UIDs in alerting.json
To import rules into a fresh Grafana instance:
* Obtain an 'editor' API key through the Grafan GUI (cogwheel -> API keys),
* Run (first without piping to bash):
python3 import-rules.py -c alerting.json -r rules.json -B key | bash
...@@ -867,7 +867,9 @@ enabled = true ...@@ -867,7 +867,9 @@ enabled = true
[panels] [panels]
# If set to true Grafana will allow script tags in text panels. Not recommended as it enable XSS vulnerabilities. # If set to true Grafana will allow script tags in text panels. Not recommended as it enable XSS vulnerabilities.
;disable_sanitize_html = false
# enable this to allow us to create mash ups with other pages
disable_sanitize_html = true
[plugins] [plugins]
;enable_alpha = false ;enable_alpha = false
......
#!/usr/bin/python3
import json
import os
import argparse
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=
"""
Generate rule import files and script for Grafana.
This script expands a given rules.json file into individual rules and
prints the bash commands to import them in Grafana.
To export rules from Grafana, use
curl <grafana>/api/ruler/grafana/api/v1/rules > rules.json
""")
parser.add_argument(
'-c', '--alert-config-file', type=str, required=False, help="Input alertmanager configuration JSON to parse, output of 'curl <grafana>/api/ruler/grafana/api/v1/rules' [%(default)s]")
parser.add_argument(
'-r', '--rules-file', type=str, required=True, help="Input rules JSON to parse, output of 'curl <grafana>/api/ruler/grafana/api/v1/rules' [%(default)s]")
parser.add_argument(
'-o', '--output-dir', type=str, default="rules", help="Directory to store the output [%(default)s]")
parser.add_argument(
'-B', '--authorization-bearer', type=str, default="abcdefghijklmnopqrstuvwxyz", help="Authorization bearer from the Grafana 'editor' API key [%(default)s]")
parser.add_argument(
'-g', '--grafana_url', type=str, default="http://localhost:3000", help="Base URL of Grafana [%(default)s]")
parser.add_argument(
'-u', '--update', default=False, action='store_true', help="Update existing alerts, instead of creating new ones [%(default)s]")
args = parser.parse_args()
if args.alert_config_file:
print(f"echo Importing alert configuration file {args.alert_config_file}")
print(f"curl -X POST {args.grafana_url}/api/alertmanager/grafana/config/api/v1/alerts -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'Authorization: Bearer {args.authorization_bearer}' -d '@{args.alert_config_file}'")
print(f"echo ''")
with open(args.rules_file) as f:
data=json.load(f)
try:
os.mkdir(args.output_dir)
except FileExistsError as e:
pass
# the rules are of format {"folder": [{alert}, {alert}] }
for folder, rules in data.items():
try:
os.mkdir(f"{args.output_dir}/{folder}")
except FileExistsError as e:
pass
# print command to create folder
payload = json.dumps({"title": folder})
print(f"echo Creating folder {folder}")
print(f"curl -X POST {args.grafana_url}/api/folders -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'Authorization: Bearer {args.authorization_bearer}' -d '{payload}'")
print(f"echo ''")
for rule in rules:
rule_filename = f"{args.output_dir}/{folder}/{rule['name']}.json"
if not args.update:
# strip rule UIDs
for subrule in rule["rules"]:
del subrule["grafana_alert"]["uid"]
# dump this rule
with open(rule_filename, "w") as rule_file:
json.dump(rule, rule_file)
# print import statement for this rule
print(f"echo Processing rule {folder}/{rule['name']}")
print(f"curl -X POST {args.grafana_url}/api/ruler/grafana/api/v1/rules/{folder} -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'Authorization: Bearer {args.authorization_bearer}' -d '@{rule_filename}'")
print(f"echo ''")
{"station":[{"name":"FPGA processing error","interval":"10s","rules":[{"expr":"","for":"20s","labels":{"severity":"major"},"annotations":{"__dashboardUid__":"nC8N_kO7k","__panelId__":"9","summary":"One or more FPGAs are unusable."},"grafana_alert":{"id":4,"orgId":1,"title":"FPGA processing error","condition":"B","data":[{"refId":"A","queryType":"","relativeTimeRange":{"from":600,"to":0},"datasourceUid":"ZqArtG97z","model":{"format":"time_series","group":[],"hide":false,"intervalMs":1000,"maxDataPoints":43200,"metricColumn":"none","rawQuery":true,"rawSql":"SELECT\n data_time AS \"time\",\n x::text,\n device,\n name,\n case when value then 1 else 0 end AS value\nFROM lofar_array_boolean\nWHERE\n $__timeFilter(data_time) AND\n name = 'fpga_error_r'\nORDER BY 1,2","refId":"A","select":[[{"params":["value_r"],"type":"column"}]],"table":"att_scalar_devdouble","timeColumn":"data_time","timeColumnType":"timestamp","where":[{"name":"$__timeFilter","params":[],"type":"macro"}]}},{"refId":"B","queryType":"","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"-100","model":{"conditions":[{"evaluator":{"params":[0],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"params":[],"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"-100"},"expression":"A","hide":false,"intervalMs":1000,"maxDataPoints":43200,"reducer":"last","refId":"B","settings":{"mode":"dropNN"},"type":"reduce"}}],"updated":"2022-04-04T14:29:45Z","intervalSeconds":10,"version":1,"uid":"tzj3Ijynk","namespace_uid":"9DkbdYy7z","namespace_id":6,"rule_group":"FPGA processing error","no_data_state":"NoData","exec_err_state":"Alerting"}}]}]}
Alerting
==================
To setup alerting, you first need to post-configure Grafana to populate it with alerting rules, and a policy to forward rules to Grafana:
- Go to Grafana (http://localhost:3000) and sign in with an administration account (default: admin/admin),
- Go to ``(cogwheel) -> API keys`` and create an ``editor`` API key. Copy the resulting hash,
- Go to the ``docker-compose/grafana/`` source directory, and run::
./import-rules.py -c alerting.json -r rules.json -B <apikey> | bash
...@@ -30,6 +30,7 @@ Even without having access to any LOFAR2.0 hardware, you can install the full st ...@@ -30,6 +30,7 @@ Even without having access to any LOFAR2.0 hardware, you can install the full st
devices/sst-xst devices/sst-xst
devices/configure devices/configure
configure_station configure_station
alerting
signal_chain signal_chain
beam_tracking beam_tracking
developer developer
......
...@@ -78,13 +78,3 @@ Configuration ...@@ -78,13 +78,3 @@ Configuration
These sections are optional, to configure specific functionality you may or may not want to use. These sections are optional, to configure specific functionality you may or may not want to use.
Alerta
````````
If you want Grafana alerts to appear in Alerta, you need to manually configure Grafana to forward them. Import the alert settings manually:
- Go to Grafana (http://localhost:3000) and sign in with an administration account (default: admin/admin),
- Go to ``Alerting`` and select ``Admin`` in the left menu bar,
- Copy/paste the following information, and press ``Save``:
.. literalinclude:: ../../../docker-compose/grafana/alerting.json
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment