Skip to content
Snippets Groups Projects
Commit f019f1ab authored by Corné Lukken's avatar Corné Lukken
Browse files

L2SS-1849: Provision alerting setup

parent 87f8333e
No related branches found
No related tags found
1 merge request!22L2SS-1849: Provision alerting setup
.idea
......@@ -5,6 +5,8 @@ USER root
RUN apk --no-cache add curl
RUN apk --no-cache add jq
RUN wget https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64.tar.gz -O - |\
tar xz && mv yq_linux_amd64 /usr/bin/yq && chmod +x /usr/bin/yq
USER grafana
......@@ -21,15 +23,16 @@ RUN grafana cli plugins install grafana-oncall-app
RUN wget https://algenty.github.io/flowcharting-repository/archives/agenty-flowcharting-panel-1.0.0b-SNAPSHOT.zip -O /tmp/agenty-flowcharting-panel.zip
RUN cd /var/lib/grafana/plugins/ && unzip /tmp/agenty-flowcharting-panel.zip && mv grafana-flowcharting agenty-flowcharting-panel
COPY grafana.ini /etc/grafana/
COPY alerting.json /opt/grafana-import/
COPY rules.json /opt/grafana-import/
COPY import-rules.sh /opt/grafana-import/
COPY grafana.ini /etc/grafana/grafana.ini
COPY imports/populate-tokens.sh /opt/grafana-import/populate-tokens.sh
# Add default configuration through provisioning (see https://grafana.com/docs/grafana/latest/administration/provisioning)
# https://grafana.com/docs/grafana/latest/alerting/set-up/provision-alerting-resources/file-provisioning/
COPY alerting /etc/grafana/provisioning/alerting/
COPY datasources /etc/grafana/provisioning/datasources/
COPY dashboards /var/lib/grafana/dashboards/
COPY panels /var/lib/grafana/panels/
COPY station-dashboards.yaml /etc/grafana/provisioning/dashboards/
COPY dashboards /var/lib/grafana/dashboards/station/station/
COPY panels /var/lib/grafana/panels/panels/
COPY run-wrapper.sh /run-wrapper.sh
ENTRYPOINT ["/run-wrapper.sh"]
......@@ -2,12 +2,19 @@
These dashboards show the state of a single station. They are tailored to be installed:
* Locally on a station, through https://git.astron.nl/lofar2.0/tango/-/tree/master/docker-compose/grafana
* Locally on a station, through https://git.astron.nl/lofar2.0/tango/-/tree/master/docker/grafana
* Centrally to monitor a group of stations, through https://git.astron.nl/lofar2.0/operations-central-management/-/tree/main/grafana-central/dashboards
## Environment variables
This container uses the following environment variables upon starting for configuration
1. `SLACK_TOKEN`: Authentication token for slack alarms
## Datasources
The Grafana installation in this repo exposes and uses the following data sources, as configured in the `dashboards/` directory:
The Grafana installation in this repo exposes and uses the following data sources,
as configured in the `dashboards/` directory:
* Prometheus, at `http://prometheus:9090`, serving metrics,
* Loki, at `http://loki:3100`, serving logs,
......@@ -25,6 +32,6 @@ To cover both these use cases, the designer must consider:
To deploy changes, they must be:
1. Committed to this repository,
1. Commit to this repository,
2. The submodules in the tango and operations-central-management repositories must link to the new commit,
3. Those repositories need to be redeployed on the stations and centrally, respectively.
{
"template_files": {},
"alertmanager_config": {
"route": {
"receiver": "Alerta",
"repeat_interval": "10m"
},
"templates": null,
"receivers": [
{
"name": "Alerta",
"grafana_managed_receiver_configs": [
{
"name": "Alerta",
"type": "webhook",
"disableResolveMessage": false,
"settings": {
"url": "http://alerta-server:8080/api/webhooks/prometheus?api-key=demo-key"
},
"secureFields": {}
}
]
}
]
}
}
This diff is collapsed.
apiVersion: 1
contactPoints:
- orgId: 1
name: Corne Alarms
receivers:
- uid: slackalarms
type: slack
settings:
recipient: C04411Y8EAU # Corne slack user id
# Dummy token replaced by scripts
token: "xoxb-noop"
disableResolveMessage: true
apiVersion: 1
policies:
- orgId: 1
receiver: Corne Alarms
group_by:
- grafana_folder
group_interval: 1d
repeat_interval: 2d
This diff is collapsed.
apiVersion: 1
datasources:
# <string, required> name of the datasource. Required
- name: Grafana API
# <string, required> datasource type. Required
type: yesoreyeram-infinity-datasource
# <string, required> access mode. proxy or direct (Server or Browser in the UI). Required
access: proxy
# <int> org id. will default to orgId 1 if not specified
orgId: 1
# <string> custom UID which can be used to reference this datasource in other parts of the configuration, if not specified will be generated automatically
uid: grafanaapi
# <string> url
url: http://localhost:3000/api
# <string> Deprecated, use secureJsonData.password
password:
# <string> database user, if used
user: postgres
# <string> database name, if used
database: hdb
# <bool> enable/disable basic auth
basicAuth: false
# <string> basic auth username
basicAuthUser:
# <string> Deprecated, use secureJsonData.basicAuthPassword
basicAuthPassword:
# <bool> enable/disable with credentials headers
withCredentials:
# <bool> mark as default datasource. Max one per org
isDefault: false
# <map> fields that will be converted to json and stored in jsonData
version: 1
# <bool> allow users to edit datasources from the UI.
editable: false
......@@ -13,21 +13,6 @@ datasources:
uid: loki
# <string> url
url: http://loki:3100
# <string> Deprecated, use secureJsonData.password
password:
# <string> database user, if used
user:
# <string> database name, if used
database:
# <bool> enable/disable basic auth
basicAuth: false
# <string> basic auth username
basicAuthUser:
# <string> Deprecated, use secureJsonData.basicAuthPassword
basicAuthPassword:
# <bool> enable/disable with credentials headers
withCredentials:
# <bool> mark as default datasource. Max one per org
isDefault: false
# <map> fields that will be converted to json and stored in jsonData
jsonData:
......@@ -37,9 +22,3 @@ datasources:
logMessageField:
maxConcurrentShardRequests: 5
timeField: "@timestamp"
# <string> json object of data that will be encrypted.
secureJsonData:
version: 1
# <bool> allow users to edit datasources from the UI.
editable: false
......@@ -13,27 +13,4 @@ datasources:
uid: prometheus
# <string> url
url: http://prometheus:9090
# <string> Deprecated, use secureJsonData.password
password:
# <string> database user, if used
user:
# <string> database name, if used
database:
# <bool> enable/disable basic auth
basicAuth: false
# <string> basic auth username
basicAuthUser:
# <string> Deprecated, use secureJsonData.basicAuthPassword
basicAuthPassword:
# <bool> enable/disable with credentials headers
withCredentials:
# <bool> mark as default datasource. Max one per org
isDefault: true
# <map> fields that will be converted to json and stored in jsonData
jsonData:
httpMethod: POST
# <string> json object of data that will be encrypted.
secureJsonData:
version: 1
# <bool> allow users to edit datasources from the UI.
editable: false
#! /bin/bash
until curl -s -X GET "http://localhost:3000" -o /dev/null
do
echo "Wait until grafana is ready..."
sleep 5
done
# Create API key
API_RESPONSE=`curl -s -X POST -H "Content-Type: application/json" -d '{"name":"apikeycurl", "role": "Admin"}' http://admin:admin@localhost:3000/api/auth/keys`
API_KEY=`echo "$API_RESPONSE" | jq -j '.key'`
API_KEY_ID=`echo "$API_RESPONSE" | jq -j '.id'`
# Import alerts
curl -X POST http://localhost:3000/api/alertmanager/grafana/config/api/v1/alerts -H 'Content-Type: application/json' -H 'Accept: application/json' -H "Authorization: Bearer $API_KEY" -d '@alerting.json'
curl -X POST http://localhost:3000/api/folders -H 'Content-Type: application/json' -H 'Accept: application/json' -H "Authorization: Bearer $API_KEY" -d '{"title": "station"}'
# Import station rules
jq -c '.station[]' rules.json | while read rule; do
echo $rule | curl -X POST http://localhost:3000/api/ruler/grafana/api/v1/rules/station -H 'Content-Type: application/json' -H 'Accept: application/json' -H "Authorization: Bearer $API_KEY" -d '@-'
done
# Cleanup api key
curl -s -X DELETE "http://localhost:3000/api/auth/keys/$API_KEY_ID" -H 'Content-Type: application/json' -H 'Accept: application/json' -H "Authorization: Bearer $API_KEY"
\ No newline at end of file
# Imports
These files are manually copied by the Dockerfile, intended for Alert but not
compatible with alert provisioning (disables admin API)
\ No newline at end of file
#! /bin/bash
# Populate tokens from environment
ALERT_DIR="/etc/grafana/provisioning/alerting/"
if [[ -n "${SLACK_TOKEN}" ]]; then
yq -i '(.contactPoints[].receivers[] | select(has("type")) | select(.type == "slack")) .settings.token = ("${SLACK_TOKEN}" | envsubst)' ${ALERT_DIR}/cp.yaml
else
echo "SLACK_TOKEN not set alarm notifications for slack will not work!" >&2
fi
{"station":[{"name":"FPGA processing error","interval":"10s","rules":[{"expr":"","for":"20s","labels":{"severity":"major"},"annotations":{"__dashboardUid__":"nC8N_kO7k","__panelId__":"9","summary":"One or more FPGAs are unusable."},"grafana_alert":{"id":1,"orgId":1,"title":"FPGA processing error","condition":"B","data":[{"refId":"A","queryType":"","relativeTimeRange":{"from":600,"to":0},"datasourceUid":"timescaledb","model":{"format":"time_series","group":[],"hide":false,"intervalMs":1000,"maxDataPoints":43200,"metricColumn":"none","rawQuery":true,"rawSql":"SELECT\n $__timeGroup(data_time, $__interval),\n x::text,\n device,\n name,\n value\nFROM lofar_array_boolean\nWHERE\n $__timeFilter(data_time) AND\n name = 'fpga_error_r'\nORDER BY 1,2","refId":"A","select":[[{"params":["value_r"],"type":"column"}]],"table":"att_scalar_devdouble","timeColumn":"data_time","timeColumnType":"timestamp","where":[{"name":"$__timeFilter","params":[],"type":"macro"}]}},{"refId":"B","queryType":"","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"-100","model":{"conditions":[{"evaluator":{"params":[0],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"params":[],"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"-100"},"expression":"A","hide":false,"intervalMs":1000,"maxDataPoints":43200,"reducer":"last","refId":"B","settings":{"mode":"dropNN"},"type":"reduce"}}],"updated":"2022-04-04T18:01:53Z","intervalSeconds":10,"version":3,"uid":"kujybCynk","namespace_uid":"R_jsbCynz","namespace_id":6,"rule_group":"FPGA processing error","no_data_state":"NoData","exec_err_state":"Alerting"}}]}]}
\ No newline at end of file
#! /bin/bash
/opt/grafana-import/import-rules.sh &
echo "Starting grafana, with provisioned alarms"
#/opt/grafana-import/import-rules.sh & # disabled due to incompatibility with alert provisioning
/opt/grafana-import/populate-tokens.sh
/run.sh
apiVersion: 1
providers:
# <string> an unique provider name. Required
- name: 'StationControl (panels)'
# <int> Org id. Default to 1
orgId: 1
# <string> name of the dashboard folder.
folder: ''
# <string> folder UID. will be automatically generated if not specified
folderUid: ''
# <string> provider type. Default to 'file'
type: file
# <bool> disable dashboard deletion
disableDeletion: true
# <int> how often Grafana will scan for changed dashboards
updateIntervalSeconds: 60
# <bool> allow updating provisioned dashboards from the UI
allowUiUpdates: true
options:
# <string, required> path to dashboard files on disk. Required when using the 'file' type
path: /var/lib/grafana/panels
# <bool> use folder names from filesystem to create folders in Grafana
foldersFromFilesStructure: true
# <string> an unique provider name. Required
- name: 'StationControl (dashboards)'
# <int> Org id. Default to 1
orgId: 1
# <string> name of the dashboard folder.
folder: ''
# <string> folder UID. will be automatically generated if not specified
folderUid: ''
# <string> provider type. Default to 'file'
type: file
# <bool> disable dashboard deletion
disableDeletion: true
# <int> how often Grafana will scan for changed dashboards
updateIntervalSeconds: 60
# <bool> allow updating provisioned dashboards from the UI
allowUiUpdates: true
options:
# <string, required> path to dashboard files on disk. Required when using the 'file' type
path: /var/lib/grafana/dashboards/station
# <bool> use folder names from filesystem to create folders in Grafana
foldersFromFilesStructure: true
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment