diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8b1a90b70cc2a34955ca4d2c0588ec323da8a9af..82cbc3f552989949dbe2d5ac79cdd47b83152971 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -97,6 +97,8 @@ docker_build_image_all: - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh prometheus latest - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh itango latest - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh grafana latest + - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh loki latest + - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh logstash latest - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh jupyter latest - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh apsct-sim latest - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh ccd-sim latest @@ -187,6 +189,28 @@ docker_build_image_grafana: script: # Do not remove 'bash' or statement will be ignored by primitive docker shell - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh grafana $tag +docker_build_image_loki: + extends: .base_docker_images_except + only: + refs: + - merge_requests + changes: + - docker-compose/loki.yml + - docker-compose/loki/* + script: +# Do not remove 'bash' or statement will be ignored by primitive docker shell + - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh loki $tag +docker_build_image_logstash: + extends: .base_docker_images_except + only: + refs: + - merge_requests + changes: + - docker-compose/logstash.yml + - docker-compose/logstash/* + script: +# Do not remove 'bash' or statement will be ignored by primitive docker shell + - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh logstash $tag docker_build_image_jupyter: extends: .base_docker_images_except only: diff --git a/docker-compose/elk.yml b/docker-compose/elk.yml index 786e843ce85c16e7604341a7138c5030f1356fed..d671ba0c7708d3ae9cb37956f9bc7884462f7389 100644 --- a/docker-compose/elk.yml +++ b/docker-compose/elk.yml @@ -44,10 +44,10 @@ services: ports: - "5601:5601" # kibana - "9200:9200" # elasticsearch - - "5044:5044" # logstash beats input - - "1514:1514/tcp" # logstash syslog input - - "1514:1514/udp" # logstash syslog input - - "5959:5959" # logstash tcp json input + # - "5044:5044" # logstash beats input + # - "1514:1514/tcp" # logstash syslog input + # - "1514:1514/udp" # logstash syslog input + # - "5959:5959" # logstash tcp json input depends_on: - elk-configure-host restart: unless-stopped diff --git a/docker-compose/grafana/datasources/loki.yaml b/docker-compose/grafana/datasources/loki.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9108f15f3791de72fb8c80cc24ae156e0bfea73 --- /dev/null +++ b/docker-compose/grafana/datasources/loki.yaml @@ -0,0 +1,45 @@ +apiVersion: 1 + +datasources: + # <string, required> name of the datasource. Required + - name: Loki + # <string, required> datasource type. Required + type: loki + # <string, required> access mode. proxy or direct (Server or Browser in the UI). Required + access: proxy + # <int> org id. will default to orgId 1 if not specified + orgId: 1 + # <string> custom UID which can be used to reference this datasource in other parts of the configuration, if not specified will be generated automatically + uid: loki + # <string> url + url: http://loki:3100 + # <string> Deprecated, use secureJsonData.password + password: + # <string> database user, if used + user: + # <string> database name, if used + database: + # <bool> enable/disable basic auth + basicAuth: false + # <string> basic auth username + basicAuthUser: + # <string> Deprecated, use secureJsonData.basicAuthPassword + basicAuthPassword: + # <bool> enable/disable with credentials headers + withCredentials: + # <bool> mark as default datasource. Max one per org + isDefault: false + # <map> fields that will be converted to json and stored in jsonData + jsonData: + esVersion: 7.10.0 + includeFrozen: false + logLevelField: + logMessageField: + maxConcurrentShardRequests: 5 + timeField: "@timestamp" + # <string> json object of data that will be encrypted. + secureJsonData: + version: 1 + # <bool> allow users to edit datasources from the UI. + editable: false + diff --git a/docker-compose/logstash.yml b/docker-compose/logstash.yml new file mode 100644 index 0000000000000000000000000000000000000000..73a13e346433a3a337bf66383132d8c1e24e0352 --- /dev/null +++ b/docker-compose/logstash.yml @@ -0,0 +1,29 @@ +# +# Docker compose file that launches Logstash-output-loki +# +# + +version: '2.1' + +services: + logstash: + image: logstash + build: + context: logstash + args: + SOURCE_IMAGE: grafana/logstash-output-loki:main + container_name: ${CONTAINER_NAME_PREFIX}logstash + logging: + driver: "json-file" + options: + max-size: "100m" + max-file: "10" + networks: + - control + ports: + - "5044:5044" # logstash beats input + - "1514:1514/tcp" # logstash syslog input + - "1514:1514/udp" # logstash syslog input + - "5959:5959" # logstash tcp json input + - "9600:9600" + restart: unless-stopped diff --git a/docker-compose/logstash/Dockerfile b/docker-compose/logstash/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..94fa5ab4bfe9a6d5946da6dda9a018c8385ef592 --- /dev/null +++ b/docker-compose/logstash/Dockerfile @@ -0,0 +1,10 @@ +ARG SOURCE_IMAGE +FROM ${SOURCE_IMAGE} + +# Disable Elastic Search connection +ENV ELASTIC_CONTAINER=false + +# Provide our logstash config +COPY loki.conf /home/logstash/ +COPY logstash.yml /usr/share/logstash/config/logstash.yml +COPY loki.conf /usr/share/logstash/pipeline/logstash.conf diff --git a/docker-compose/logstash/README.md b/docker-compose/logstash/README.md new file mode 100644 index 0000000000000000000000000000000000000000..156aaae85a210ee963a83baadc39ccdadbbef756 --- /dev/null +++ b/docker-compose/logstash/README.md @@ -0,0 +1,41 @@ +# Logstash + +Grafana Loki has a Logstash output plugin called logstash-output-loki that enables shipping logs to a Loki instance + +## Usage and configuration + +To configure Logstash to forward logs to Loki, simply add the loki output to your Logstash configuration file as documented below: + + output { + loki { + [url => "" | default = none | required=true] + + [tenant_id => string | default = nil | required=false] + + [message_field => string | default = "message" | required=false] + + [include_fields => array | default = [] | required=false] + + [batch_wait => number | default = 1(s) | required=false] + + [batch_size => number | default = 102400(bytes) | required=false] + + [min_delay => number | default = 1(s) | required=false] + + [max_delay => number | default = 300(s) | required=false] + + [retries => number | default = 10 | required=false] + + [username => string | default = nil | required=false] + + [password => secret | default = nil | required=false] + + [cert => path | default = nil | required=false] + + [key => path | default = nil| required=false] + + [ca_cert => path | default = nil | required=false] + + [insecure_skip_verify => boolean | default = false | required=false] + } + } diff --git a/docker-compose/logstash/logstash.yml b/docker-compose/logstash/logstash.yml new file mode 100644 index 0000000000000000000000000000000000000000..5f80650fe6fc635570fd9f7e4888da17eddf4e70 --- /dev/null +++ b/docker-compose/logstash/logstash.yml @@ -0,0 +1,2 @@ +http.host: "0.0.0.0" +#xpack.monitoring.elasticsearch.hosts: [ "http://loki:3100" ] diff --git a/docker-compose/logstash/loki.conf b/docker-compose/logstash/loki.conf new file mode 100644 index 0000000000000000000000000000000000000000..e22f53dbee2bac168d7e4d3bc572c854f6e81663 --- /dev/null +++ b/docker-compose/logstash/loki.conf @@ -0,0 +1,152 @@ +input { + beats { + port => 5044 + # ssl => true + # ssl_certificate => "/etc/pki/tls/certs/logstash-beats.crt" + # ssl_key => "/etc/pki/tls/private/logstash-beats.key" + } +} + +input { + syslog { + port => 1514 + } +} + +input { + tcp { + port => 5959 + codec => json + } +} + +filter { + if [type] == "syslog" { + grok { + match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" } + add_field => [ "received_at", "%{@timestamp}" ] + add_field => [ "received_from", "%{host}" ] + } + syslog_pri { } + date { + match => [ "syslog_timestamp", "MMM d HH:mm:ss", "MMM dd HH:mm:ss" ] + } + } +} + +# filter { +# if [type] == "nginx-access" { +# grok { +# match => { "message" => "%{NGINXACCESS}" } +# } +# } +# } + +filter { + if [program] == "grafana" { + kv { } + mutate { + rename => { + "t" => "timestamp" + "lvl" => "level" + "msg" => "message" + } + uppercase => [ "level" ] + } + date { + match => [ "timestamp", "ISO8601" ] + } + } +} + +filter { + if [program] == "prometheus" { + kv { } + mutate { + rename => { + "ts" => "timestamp" + "msg" => "message" + } + uppercase => [ "level" ] + } + date { + match => [ "timestamp", "ISO8601" ] + } + } +} + +filter { + if [program] == "prometheus" { + kv { } + mutate { + rename => { + "ts" => "timestamp" + "msg" => "message" + } + uppercase => [ "level" ] + } + date { + match => [ "timestamp", "ISO8601" ] + } + } +} + +filter { + if [program] == "tango-rest" { + grok { + match => { + "message" => "%{TIMESTAMP_ISO8601:timestamp} %{WORD:level} %{GREEDYDATA:message}" + } + "overwrite" => [ "timestamp", "level", "message" ] + } + date { + match => [ "timestamp", "YYYY-MM-dd HH:mm:ss,SSS" ] + timezone => "UTC" + } + } +} + +filter { + # mark all our mariadb instances + grok { + match => { + "program" => [ "archiver-maria-db", "tangodb" ] + } + add_tag => [ "mariadb" ] + } + + # parse mariadb output + if "mariadb" in [tags] { + grok { + match => { + "message" => [ + "%{TIMESTAMP_ISO8601:timestamp} .%{WORD:level}. %{GREEDYDATA:message}", + "%{TIMESTAMP_ISO8601:timestamp} 0 .%{WORD:level}. %{GREEDYDATA:message}" + ] + } + "overwrite" => [ "timestamp", "level", "message" ] + } + mutate { + gsub => [ + "level", "Note", "Info" + ] + uppercase => [ "level" ] + } + date { + match => [ "timestamp", "YYYY-MM-dd HH:mm:ssZZ", "YYYY-MM-dd HH:mm:ss", "YYYY-MM-dd H:mm:ss" ] + timezone => "UTC" + } + } +} + +output { + # elasticsearch { + # hosts => ["localhost"] + # manage_template => false + # index => "logstash-%{+YYYY.MM.dd}" + # } + loki { + url => "http://loki:3100/loki/api/v1/push" + } +} + diff --git a/docker-compose/loki.yml b/docker-compose/loki.yml new file mode 100644 index 0000000000000000000000000000000000000000..2007e16aa7e06726cc5b8be75553e4ef2df56475 --- /dev/null +++ b/docker-compose/loki.yml @@ -0,0 +1,40 @@ +# +# Docker compose file that launches a LOKI instance. +# See https://grafana.com/docs/loki/latest/installation/docker/ +# +# + +version: "3" + +services: + loki: + image: grafana/loki:2.6.0 + container_name: ${CONTAINER_NAME_PREFIX}loki + logging: + driver: "json-file" + options: + max-size: "100m" + max-file: "10" + networks: + - control + ports: + - "3100:3100" + command: -config.file=/etc/loki/local-config.yaml + restart: unless-stopped + + promtail: + image: grafana/promtail:2.6.0 + container_name: ${CONTAINER_NAME_PREFIX}promtail + logging: + driver: "json-file" + options: + max-size: "100m" + max-file: "10" + volumes: + - /var/log:/var/log + command: -config.file=/etc/promtail/config.yml + networks: + - control + ports: + - "9080:9080" + restart: unless-stopped diff --git a/sbin/run_integration_test.sh b/sbin/run_integration_test.sh index 57e4fff1458ead7733250ad130da5ff1602a1bdb..563b2c7fbf793bcbd9db84a45635433c0b742c26 100755 --- a/sbin/run_integration_test.sh +++ b/sbin/run_integration_test.sh @@ -86,14 +86,16 @@ SIMULATORS="sdptr-sim recv-sim unb2-sim apsct-sim apspu-sim ccd-sim" # shellcheck disable=SC2086 make build $DEVICES $SIMULATORS -make build elk integration-test +# make build elk integration-test # L2SS-970: elk temporarily disabled +make build logstash integration-test make build archiver-timescale hdbppts-cm hdbppts-es # Start and stop sequence # shellcheck disable=SC2086 make stop $DEVICES $SIMULATORS hdbppts-es hdbppts-cm archiver-timescale make stop device-docker # this one does not test well in docker-in-docker -make stop elk +# make stop elk # L2SS-970: elk temporarily disabled +make stop logstash # Run dummy integration test to install pytango in tox virtualenv without # the memory pressure of the ELK stack. @@ -103,7 +105,8 @@ make stop elk # TODO(L2SS-992): Remove me and above documentation integration_test dummy -make start elk +# make start elk # L2SS-970: elk temporarily disabled +make start logstash # Give elk time to start # TODO(L2SS-988): Use a nicer more reliable mechanism diff --git a/sbin/tag_and_push_docker_image.sh b/sbin/tag_and_push_docker_image.sh index 3e10110ed1ac4dad5e0f6fb0521a314c0ae06fbe..d3b5ea894e0b13eb3bcb7755656381235d664a5a 100755 --- a/sbin/tag_and_push_docker_image.sh +++ b/sbin/tag_and_push_docker_image.sh @@ -61,6 +61,7 @@ REMOTE_IMAGES=( # TODO(Corne): Have this list generated from the .yml files LOCAL_IMAGES=( "elk elk y" "elk-configure-host elk-configure-host y" + "logstash logstash y" "lofar-device-base lofar-device-base y" "apsct-sim docker-compose_apsct-sim y" "apspu-sim docker-compose_apspu-sim y" diff --git a/tangostationcontrol/tangostationcontrol/common/lofar_logging.py b/tangostationcontrol/tangostationcontrol/common/lofar_logging.py index 89ab11c0db3b31af805b5cda78ce21a77ee9318a..13d20551b0def7a72cd75ff750232b861ebd9b0e 100644 --- a/tangostationcontrol/tangostationcontrol/common/lofar_logging.py +++ b/tangostationcontrol/tangostationcontrol/common/lofar_logging.py @@ -56,7 +56,7 @@ class LogSuppressErrorSpam(logging.Formatter): self.error_suppress_interval = error_suppress_interval def is_error_to_suppress(self, record): - # Errors occuring by not being able to connect to the ELK stack, f.e. because it is down. + # Errors occuring by not being able to connect to the log processing container, f.e. because it is down. return record.name == "LogProcessingWorker" and record.msg == "An error occurred while sending events: %s" def filter(self, record): @@ -105,7 +105,7 @@ class LogAnnotator(logging.Formatter): def configure_logger(logger: logging.Logger=None, log_extra=None, debug=False): """ Configure the given logger (or root if None) to: - - send logs to the ELK stack + - send logs to Loki through Logstash - send logs to Tango - send logs to stdout """ @@ -128,7 +128,7 @@ def configure_logger(logger: logging.Logger=None, log_extra=None, debug=False): # don't spam debug messages when fetching URLs logging.getLogger("urllib3").setLevel(logging.INFO) - # don't spam error messages when having trouble connecting to ELK + # don't spam error messages when having connection troubles logging.getLogger("LogProcessingWorker").setLevel(logging.CRITICAL) # for now, also log to stderr @@ -151,12 +151,12 @@ def configure_logger(logger: logging.Logger=None, log_extra=None, debug=False): if debug: return logger - # Log to ELK stack + # Log to Logstash-Loki try: from logstash_async.handler import AsynchronousLogstashHandler, LogstashFormatter - # log to the tcp_input of logstash in our ELK stack - handler = AsynchronousLogstashHandler("elk", 5959, database_path='/tmp/lofar_pending_log_messages.db') + # log to the tcp_input of logstash in our logstash-loki container + handler = AsynchronousLogstashHandler("logstash", 5959, database_path='/tmp/lofar_pending_log_messages.db') # configure log messages formatter = LogstashFormatter(extra=log_extra, tags=["python", "lofar"]) @@ -167,9 +167,9 @@ def configure_logger(logger: logging.Logger=None, log_extra=None, debug=False): # install the handler logger.addHandler(handler) except ImportError: - logger.exception("Cannot forward logs to ELK: logstash_async module not found.") + logger.exception("Cannot forward logs to Logstash-Loki: logstash_async module not found.") except Exception: - logger.exception("Cannot forward logs to ELK.") + logger.exception("Cannot forward logs to Logstash-Loki.") # Don't log to Tango to reduce log spam """ diff --git a/tangostationcontrol/tangostationcontrol/devices/docker_device.py b/tangostationcontrol/tangostationcontrol/devices/docker_device.py index 8ee1301c6eeb89d91fa166bc4691a9d84b0bb77a..71e13119de523f829092d040ab62bc11330ac4ad 100644 --- a/tangostationcontrol/tangostationcontrol/devices/docker_device.py +++ b/tangostationcontrol/tangostationcontrol/devices/docker_device.py @@ -103,6 +103,10 @@ class Docker(lofar_device): elk_RW = attribute_wrapper(comms_annotation={"container": "elk"}, datatype=bool, access=AttrWriteType.READ_WRITE) grafana_R = attribute_wrapper(comms_annotation={"container": "grafana"}, datatype=bool) grafana_RW = attribute_wrapper(comms_annotation={"container": "grafana"}, datatype=bool, access=AttrWriteType.READ_WRITE) + logstash_R = attribute_wrapper(comms_annotation={"container": "logstash"}, datatype=bool) + logstash_RW = attribute_wrapper(comms_annotation={"container": "logstash"}, datatype=bool, access=AttrWriteType.READ_WRITE) + loki_R = attribute_wrapper(comms_annotation={"container": "loki"}, datatype=bool) + loki_RW = attribute_wrapper(comms_annotation={"container": "loki"}, datatype=bool, access=AttrWriteType.READ_WRITE) hdbppts_cm_R = attribute_wrapper(comms_annotation={"container": "hdbppts-cm"}, datatype=bool) hdbppts_cm_RW = attribute_wrapper(comms_annotation={"container": "hdbppts-cm"}, datatype=bool, access=AttrWriteType.READ_WRITE) hdbppts_es_R = attribute_wrapper(comms_annotation={"container": "hdbppts-es"}, datatype=bool)