diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8b1a90b70cc2a34955ca4d2c0588ec323da8a9af..82cbc3f552989949dbe2d5ac79cdd47b83152971 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -97,6 +97,8 @@ docker_build_image_all: - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh prometheus latest - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh itango latest - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh grafana latest + - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh loki latest + - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh logstash latest - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh jupyter latest - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh apsct-sim latest - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh ccd-sim latest @@ -187,6 +189,28 @@ docker_build_image_grafana: script: # Do not remove 'bash' or statement will be ignored by primitive docker shell - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh grafana $tag +docker_build_image_loki: + extends: .base_docker_images_except + only: + refs: + - merge_requests + changes: + - docker-compose/loki.yml + - docker-compose/loki/* + script: +# Do not remove 'bash' or statement will be ignored by primitive docker shell + - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh loki $tag +docker_build_image_logstash: + extends: .base_docker_images_except + only: + refs: + - merge_requests + changes: + - docker-compose/logstash.yml + - docker-compose/logstash/* + script: +# Do not remove 'bash' or statement will be ignored by primitive docker shell + - bash $CI_PROJECT_DIR/sbin/tag_and_push_docker_image.sh logstash $tag docker_build_image_jupyter: extends: .base_docker_images_except only: diff --git a/CDB/stations/CS001_ConfigDb.json b/CDB/stations/CS001_ConfigDb.json index 8036e8444a02d04d2d9059ae09c9c826cf0b044f..e078b8c7a9daf205a064a665a934bfb15671225b 100644 --- a/CDB/stations/CS001_ConfigDb.json +++ b/CDB/stations/CS001_ConfigDb.json @@ -274,8 +274,8 @@ ], "Antenna_to_SDP_Mapping": [ "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", - "0", "-1", "0", "-1", "3", "0", "3", "1", "3", "2", "0", "-1", - "0", "-1", "0", "-1", "3", "3", "3", "4", "3", "5", "0", "-1", + "0", "-1", "0", "-1", "2", "0", "2", "1", "2", "2", "0", "-1", + "0", "-1", "0", "-1", "2", "3", "2", "4", "2", "5", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", @@ -446,9 +446,9 @@ "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "4", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "5", - "0", "-1", "0", "-1", "0", "-1", "0", "-1", "1", "6", "1", "7", + "0", "-1", "0", "-1", "0", "-1", "0", "-1", "1", "0", "1", "1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", "0", "-1", - "0", "-1", "0", "-1", "0", "-1", "0", "-1", "1", "8", "0", "-1" + "0", "-1", "0", "-1", "0", "-1", "0", "-1", "1", "2", "0", "-1" ], "Antenna_Field_Reference_ETRS": [ "3826923.942", "460915.117", "5064643.229" diff --git a/README.md b/README.md index c72d120f54c46d5bf9eba273863b4572de9595ff..cb44d8f3aebe5a1c2a47239eda784959da4ca0c7 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,16 @@ Station Control software related to Tango devices. # Index +* [Installation](#installation) + * [Prerequisites](#prerequisites) + * [Bootstrap](#bootstrap) +* [User documentation (ReadTheDocs (Sphinx / ReStructuredText))](tangostationcontrol/docs/README.md) * [Docker compose documentation](docker-compose/README.md) * [Timescaledb](docker-compose/timescaledb/README.md) * [Jupyter startup files](docker-compose/jupyter/ipython-profiles/stationcontrol-jupyter/startup/README.md) * [Tango Prometheus exporter](https://git.astron.nl/lofar2.0/ska-tango-grafana-exporter) -* [ReadTheDocs (Sphinx / ReStructuredText) documentation](tangostationcontrol/docs/README.md) +* [Developer Documentation](#development) + * [Versioning](#versioning) * Source code documentation * [Attribute wrapper documentation](tangostationcontrol/tangostationcontrol/clients/README.md) * [Archiver documentation](tangostationcontrol/tangostationcontrol/toolkit/README.md) @@ -20,6 +25,7 @@ Station Control software related to Tango devices. * [HDF5 statistics](tangostationcontrol/tangostationcontrol/statistics/README.md) * [Unit tests](tangostationcontrol/tangostationcontrol/test/README.md) * [Integration tests](tangostationcontrol/tangostationcontrol/integration_test/README.md) +* [Release Notes](#release-notes) # Installation @@ -41,20 +47,16 @@ You will also need: ## Bootstrap -The bootstrap procedure is needed only once. First we build all docker containers, and load the initial configuration. This may take a while: +The bootstrap procedure is needed only once. First we build all docker +containers, and load the initial configuration. This may take a while: ``` cd docker-compose make bootstrap ``` -If you lack access to LOFAR station hardware, configure the devices to use their simulators instead: - -``` -for sim in ../CDB/*-sim-config.json; do - ../sbin/update_ConfigDb.sh ../CDB${sim}-config.json -done -``` +By default bootstrap will configure the station to use simulators. You can +lookup alternative configurations in the CDB directory. Now we can start all containers, and make sure everything is up: @@ -63,6 +65,43 @@ make start make status ``` -If not, you can inspect why with `docker logs <container>`. The containers will automatically be restarted on reboot or failure. Stop them explicitly to bring them down (`make stop <container>`). +If not, you can inspect why with `docker logs <container>`. The containers will +automatically be restarted on reboot or failure. Stop them explicitly to bring +them down (`make stop <container>`). + +Most notably, you will have web interfaces available at: + - http://localhost:8888 (Jupyter Notebook) + - http://localhost:3000 (Grafana). + +# Development + +For development you will need several dependencies including: + +``` +git g++ gcc make docker docker-compose shellcheck graphviz python3-dev \ +python3-pip python3-tox libboost-python-dev libtango-cpp pkg-config +``` + +Of these docker-compose must be at least 2.0 and Python 3.7 or higher. +Alternatively, tox can be installed through pip using `pip install tox`. + +Finally, running unit tests relies on availability of casacore data see: +[lofar-device-base Dockerfile](docker-compose/lofar-device-base/Dockerfile) +for details. + +## Versioning + +When changing behavior a new version for Lofar Station Control should be +reserved. To do this please follow [semantic versioning](https://semver.org/). + +Next change the version in the following places: + +1. The [VERSION](VERSION) file. +2. In [test_writer_sst.py](tangostationcontrol/tangostationcontrol/integration_test/default/statistics/test_writer_sst.py) + for the `test_header_info` test. +3. Add a [Release note](#release-notes) for the given version. +3. Once the merge requests is merged to master, add a tag with the version (just x.x.x not Vx.x.x) + +# Release Notes -Most notably, you will have web interfaces available at http://localhost:8888 (Jupyter Notebook), and http://localhost:3000 (Grafana). +* 0.1.2 Fix `StatisticsClient` accessing `last_invalid_packet_exception` parameter diff --git a/docker-compose/elk.yml b/docker-compose/elk.yml index 786e843ce85c16e7604341a7138c5030f1356fed..d671ba0c7708d3ae9cb37956f9bc7884462f7389 100644 --- a/docker-compose/elk.yml +++ b/docker-compose/elk.yml @@ -44,10 +44,10 @@ services: ports: - "5601:5601" # kibana - "9200:9200" # elasticsearch - - "5044:5044" # logstash beats input - - "1514:1514/tcp" # logstash syslog input - - "1514:1514/udp" # logstash syslog input - - "5959:5959" # logstash tcp json input + # - "5044:5044" # logstash beats input + # - "1514:1514/tcp" # logstash syslog input + # - "1514:1514/udp" # logstash syslog input + # - "5959:5959" # logstash tcp json input depends_on: - elk-configure-host restart: unless-stopped diff --git a/docker-compose/grafana/datasources/loki.yaml b/docker-compose/grafana/datasources/loki.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9108f15f3791de72fb8c80cc24ae156e0bfea73 --- /dev/null +++ b/docker-compose/grafana/datasources/loki.yaml @@ -0,0 +1,45 @@ +apiVersion: 1 + +datasources: + # <string, required> name of the datasource. Required + - name: Loki + # <string, required> datasource type. Required + type: loki + # <string, required> access mode. proxy or direct (Server or Browser in the UI). Required + access: proxy + # <int> org id. will default to orgId 1 if not specified + orgId: 1 + # <string> custom UID which can be used to reference this datasource in other parts of the configuration, if not specified will be generated automatically + uid: loki + # <string> url + url: http://loki:3100 + # <string> Deprecated, use secureJsonData.password + password: + # <string> database user, if used + user: + # <string> database name, if used + database: + # <bool> enable/disable basic auth + basicAuth: false + # <string> basic auth username + basicAuthUser: + # <string> Deprecated, use secureJsonData.basicAuthPassword + basicAuthPassword: + # <bool> enable/disable with credentials headers + withCredentials: + # <bool> mark as default datasource. Max one per org + isDefault: false + # <map> fields that will be converted to json and stored in jsonData + jsonData: + esVersion: 7.10.0 + includeFrozen: false + logLevelField: + logMessageField: + maxConcurrentShardRequests: 5 + timeField: "@timestamp" + # <string> json object of data that will be encrypted. + secureJsonData: + version: 1 + # <bool> allow users to edit datasources from the UI. + editable: false + diff --git a/docker-compose/logstash.yml b/docker-compose/logstash.yml new file mode 100644 index 0000000000000000000000000000000000000000..73a13e346433a3a337bf66383132d8c1e24e0352 --- /dev/null +++ b/docker-compose/logstash.yml @@ -0,0 +1,29 @@ +# +# Docker compose file that launches Logstash-output-loki +# +# + +version: '2.1' + +services: + logstash: + image: logstash + build: + context: logstash + args: + SOURCE_IMAGE: grafana/logstash-output-loki:main + container_name: ${CONTAINER_NAME_PREFIX}logstash + logging: + driver: "json-file" + options: + max-size: "100m" + max-file: "10" + networks: + - control + ports: + - "5044:5044" # logstash beats input + - "1514:1514/tcp" # logstash syslog input + - "1514:1514/udp" # logstash syslog input + - "5959:5959" # logstash tcp json input + - "9600:9600" + restart: unless-stopped diff --git a/docker-compose/logstash/Dockerfile b/docker-compose/logstash/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..94fa5ab4bfe9a6d5946da6dda9a018c8385ef592 --- /dev/null +++ b/docker-compose/logstash/Dockerfile @@ -0,0 +1,10 @@ +ARG SOURCE_IMAGE +FROM ${SOURCE_IMAGE} + +# Disable Elastic Search connection +ENV ELASTIC_CONTAINER=false + +# Provide our logstash config +COPY loki.conf /home/logstash/ +COPY logstash.yml /usr/share/logstash/config/logstash.yml +COPY loki.conf /usr/share/logstash/pipeline/logstash.conf diff --git a/docker-compose/logstash/README.md b/docker-compose/logstash/README.md new file mode 100644 index 0000000000000000000000000000000000000000..156aaae85a210ee963a83baadc39ccdadbbef756 --- /dev/null +++ b/docker-compose/logstash/README.md @@ -0,0 +1,41 @@ +# Logstash + +Grafana Loki has a Logstash output plugin called logstash-output-loki that enables shipping logs to a Loki instance + +## Usage and configuration + +To configure Logstash to forward logs to Loki, simply add the loki output to your Logstash configuration file as documented below: + + output { + loki { + [url => "" | default = none | required=true] + + [tenant_id => string | default = nil | required=false] + + [message_field => string | default = "message" | required=false] + + [include_fields => array | default = [] | required=false] + + [batch_wait => number | default = 1(s) | required=false] + + [batch_size => number | default = 102400(bytes) | required=false] + + [min_delay => number | default = 1(s) | required=false] + + [max_delay => number | default = 300(s) | required=false] + + [retries => number | default = 10 | required=false] + + [username => string | default = nil | required=false] + + [password => secret | default = nil | required=false] + + [cert => path | default = nil | required=false] + + [key => path | default = nil| required=false] + + [ca_cert => path | default = nil | required=false] + + [insecure_skip_verify => boolean | default = false | required=false] + } + } diff --git a/docker-compose/logstash/logstash.yml b/docker-compose/logstash/logstash.yml new file mode 100644 index 0000000000000000000000000000000000000000..5f80650fe6fc635570fd9f7e4888da17eddf4e70 --- /dev/null +++ b/docker-compose/logstash/logstash.yml @@ -0,0 +1,2 @@ +http.host: "0.0.0.0" +#xpack.monitoring.elasticsearch.hosts: [ "http://loki:3100" ] diff --git a/docker-compose/logstash/loki.conf b/docker-compose/logstash/loki.conf new file mode 100644 index 0000000000000000000000000000000000000000..e22f53dbee2bac168d7e4d3bc572c854f6e81663 --- /dev/null +++ b/docker-compose/logstash/loki.conf @@ -0,0 +1,152 @@ +input { + beats { + port => 5044 + # ssl => true + # ssl_certificate => "/etc/pki/tls/certs/logstash-beats.crt" + # ssl_key => "/etc/pki/tls/private/logstash-beats.key" + } +} + +input { + syslog { + port => 1514 + } +} + +input { + tcp { + port => 5959 + codec => json + } +} + +filter { + if [type] == "syslog" { + grok { + match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" } + add_field => [ "received_at", "%{@timestamp}" ] + add_field => [ "received_from", "%{host}" ] + } + syslog_pri { } + date { + match => [ "syslog_timestamp", "MMM d HH:mm:ss", "MMM dd HH:mm:ss" ] + } + } +} + +# filter { +# if [type] == "nginx-access" { +# grok { +# match => { "message" => "%{NGINXACCESS}" } +# } +# } +# } + +filter { + if [program] == "grafana" { + kv { } + mutate { + rename => { + "t" => "timestamp" + "lvl" => "level" + "msg" => "message" + } + uppercase => [ "level" ] + } + date { + match => [ "timestamp", "ISO8601" ] + } + } +} + +filter { + if [program] == "prometheus" { + kv { } + mutate { + rename => { + "ts" => "timestamp" + "msg" => "message" + } + uppercase => [ "level" ] + } + date { + match => [ "timestamp", "ISO8601" ] + } + } +} + +filter { + if [program] == "prometheus" { + kv { } + mutate { + rename => { + "ts" => "timestamp" + "msg" => "message" + } + uppercase => [ "level" ] + } + date { + match => [ "timestamp", "ISO8601" ] + } + } +} + +filter { + if [program] == "tango-rest" { + grok { + match => { + "message" => "%{TIMESTAMP_ISO8601:timestamp} %{WORD:level} %{GREEDYDATA:message}" + } + "overwrite" => [ "timestamp", "level", "message" ] + } + date { + match => [ "timestamp", "YYYY-MM-dd HH:mm:ss,SSS" ] + timezone => "UTC" + } + } +} + +filter { + # mark all our mariadb instances + grok { + match => { + "program" => [ "archiver-maria-db", "tangodb" ] + } + add_tag => [ "mariadb" ] + } + + # parse mariadb output + if "mariadb" in [tags] { + grok { + match => { + "message" => [ + "%{TIMESTAMP_ISO8601:timestamp} .%{WORD:level}. %{GREEDYDATA:message}", + "%{TIMESTAMP_ISO8601:timestamp} 0 .%{WORD:level}. %{GREEDYDATA:message}" + ] + } + "overwrite" => [ "timestamp", "level", "message" ] + } + mutate { + gsub => [ + "level", "Note", "Info" + ] + uppercase => [ "level" ] + } + date { + match => [ "timestamp", "YYYY-MM-dd HH:mm:ssZZ", "YYYY-MM-dd HH:mm:ss", "YYYY-MM-dd H:mm:ss" ] + timezone => "UTC" + } + } +} + +output { + # elasticsearch { + # hosts => ["localhost"] + # manage_template => false + # index => "logstash-%{+YYYY.MM.dd}" + # } + loki { + url => "http://loki:3100/loki/api/v1/push" + } +} + diff --git a/docker-compose/loki.yml b/docker-compose/loki.yml new file mode 100644 index 0000000000000000000000000000000000000000..2007e16aa7e06726cc5b8be75553e4ef2df56475 --- /dev/null +++ b/docker-compose/loki.yml @@ -0,0 +1,40 @@ +# +# Docker compose file that launches a LOKI instance. +# See https://grafana.com/docs/loki/latest/installation/docker/ +# +# + +version: "3" + +services: + loki: + image: grafana/loki:2.6.0 + container_name: ${CONTAINER_NAME_PREFIX}loki + logging: + driver: "json-file" + options: + max-size: "100m" + max-file: "10" + networks: + - control + ports: + - "3100:3100" + command: -config.file=/etc/loki/local-config.yaml + restart: unless-stopped + + promtail: + image: grafana/promtail:2.6.0 + container_name: ${CONTAINER_NAME_PREFIX}promtail + logging: + driver: "json-file" + options: + max-size: "100m" + max-file: "10" + volumes: + - /var/log:/var/log + command: -config.file=/etc/promtail/config.yml + networks: + - control + ports: + - "9080:9080" + restart: unless-stopped diff --git a/sbin/run_integration_test.sh b/sbin/run_integration_test.sh index 57e4fff1458ead7733250ad130da5ff1602a1bdb..563b2c7fbf793bcbd9db84a45635433c0b742c26 100755 --- a/sbin/run_integration_test.sh +++ b/sbin/run_integration_test.sh @@ -86,14 +86,16 @@ SIMULATORS="sdptr-sim recv-sim unb2-sim apsct-sim apspu-sim ccd-sim" # shellcheck disable=SC2086 make build $DEVICES $SIMULATORS -make build elk integration-test +# make build elk integration-test # L2SS-970: elk temporarily disabled +make build logstash integration-test make build archiver-timescale hdbppts-cm hdbppts-es # Start and stop sequence # shellcheck disable=SC2086 make stop $DEVICES $SIMULATORS hdbppts-es hdbppts-cm archiver-timescale make stop device-docker # this one does not test well in docker-in-docker -make stop elk +# make stop elk # L2SS-970: elk temporarily disabled +make stop logstash # Run dummy integration test to install pytango in tox virtualenv without # the memory pressure of the ELK stack. @@ -103,7 +105,8 @@ make stop elk # TODO(L2SS-992): Remove me and above documentation integration_test dummy -make start elk +# make start elk # L2SS-970: elk temporarily disabled +make start logstash # Give elk time to start # TODO(L2SS-988): Use a nicer more reliable mechanism diff --git a/sbin/tag_and_push_docker_image.sh b/sbin/tag_and_push_docker_image.sh index 3e10110ed1ac4dad5e0f6fb0521a314c0ae06fbe..d3b5ea894e0b13eb3bcb7755656381235d664a5a 100755 --- a/sbin/tag_and_push_docker_image.sh +++ b/sbin/tag_and_push_docker_image.sh @@ -61,6 +61,7 @@ REMOTE_IMAGES=( # TODO(Corne): Have this list generated from the .yml files LOCAL_IMAGES=( "elk elk y" "elk-configure-host elk-configure-host y" + "logstash logstash y" "lofar-device-base lofar-device-base y" "apsct-sim docker-compose_apsct-sim y" "apspu-sim docker-compose_apspu-sim y" diff --git a/tangostationcontrol/VERSION b/tangostationcontrol/VERSION index 6da28dde76d6550e3d398a70a9a8231256774669..d917d3e26adc9854b4569871e20111c38de2606f 100644 --- a/tangostationcontrol/VERSION +++ b/tangostationcontrol/VERSION @@ -1 +1 @@ -0.1.1 \ No newline at end of file +0.1.2 diff --git a/tangostationcontrol/requirements.txt b/tangostationcontrol/requirements.txt index 1101bd06624daa6fb01aaaa1a6aab0e5c8a82ddc..a6662e4b263ca26aa93ad216c9b104ffd0e8e735 100644 --- a/tangostationcontrol/requirements.txt +++ b/tangostationcontrol/requirements.txt @@ -3,7 +3,7 @@ # integration process, which may cause wedges in the gate later. importlib-metadata<2.0.0,>=0.12;python_version<"3.8" -lofar-station-client@git+https://git.astron.nl/lofar2.0/lofar-station-client@0.6.0 +lofar-station-client@git+https://git.astron.nl/lofar2.0/lofar-station-client@0.9.1 numpy mock asyncua >= 0.9.90 # LGPLv3 diff --git a/tangostationcontrol/setup.cfg b/tangostationcontrol/setup.cfg index 510034a343b89b2bc2a24bd212bafe77a4565a67..f42357ef73a2f50734d03f26444b32b905ac0042 100644 --- a/tangostationcontrol/setup.cfg +++ b/tangostationcontrol/setup.cfg @@ -26,7 +26,7 @@ package_dir= packages=find: python_requires => 3.7 install_requires = - importlib-metadata>=0.12;python_version<"3.8" + importlib-metadata>=0.12, <5.0;python_version<"3.8" pip>=1.5 [options.packages.find] diff --git a/tangostationcontrol/tangostationcontrol/clients/statistics/consumer.py b/tangostationcontrol/tangostationcontrol/clients/statistics/consumer.py index 8930cdb371cd4ac23b299d479e19dcd28dd08d38..497c7eb763c362ee3ba3c8c7854be8d03664efda 100644 --- a/tangostationcontrol/tangostationcontrol/clients/statistics/consumer.py +++ b/tangostationcontrol/tangostationcontrol/clients/statistics/consumer.py @@ -10,6 +10,7 @@ import logging from threading import Thread from queue import Queue +import time from lofar_station_client.statistics.collector import StatisticsCollector @@ -24,6 +25,9 @@ class StatisticsConsumer(Thread, StatisticsClientThread): # Maximum time to wait for the Thread to get unstuck, if we want to stop DISCONNECT_TIMEOUT = 10.0 + # Minimum time between packet exception logging + LOGGING_TIME = 30 + # No default options required, for now? _DEFAULT_OPTIONS = {} @@ -35,10 +39,30 @@ class StatisticsConsumer(Thread, StatisticsClientThread): super().__init__() self.start() + self.last_exception_time = time.time() + self.exception_counter = 0 + @property def _options(self) -> dict: return StatisticsConsumer._DEFAULT_OPTIONS + def _exception_logging(self, err): + # get the time since we last logged a message + time_since_log = time.time() - self.last_exception_time + self.exception_counter += 1 + + # if the time since we last logged an exeption is greater than LOGGING TIME + if time_since_log < self.LOGGING_TIME: + return + + if self.exception_counter == 1: + logger.exception(f"Could not parse statistics packet") + else: + logger.exception(f"Could not parse {self.exception_counter} statistics packets in the last {int(time_since_log)} seconds") + + self.last_exception_time = time.time() + self.exception_counter = 0 + def run(self): logger.info("Starting statistics thread") @@ -53,8 +77,7 @@ class StatisticsConsumer(Thread, StatisticsClientThread): try: self.collector.process_packet(self.last_packet) except ValueError as e: - logger.exception("Could not parse statistics packet") - + self._exception_logging() # continue processing logger.info("Stopped statistics thread") diff --git a/tangostationcontrol/tangostationcontrol/common/lofar_logging.py b/tangostationcontrol/tangostationcontrol/common/lofar_logging.py index 89ab11c0db3b31af805b5cda78ce21a77ee9318a..13d20551b0def7a72cd75ff750232b861ebd9b0e 100644 --- a/tangostationcontrol/tangostationcontrol/common/lofar_logging.py +++ b/tangostationcontrol/tangostationcontrol/common/lofar_logging.py @@ -56,7 +56,7 @@ class LogSuppressErrorSpam(logging.Formatter): self.error_suppress_interval = error_suppress_interval def is_error_to_suppress(self, record): - # Errors occuring by not being able to connect to the ELK stack, f.e. because it is down. + # Errors occuring by not being able to connect to the log processing container, f.e. because it is down. return record.name == "LogProcessingWorker" and record.msg == "An error occurred while sending events: %s" def filter(self, record): @@ -105,7 +105,7 @@ class LogAnnotator(logging.Formatter): def configure_logger(logger: logging.Logger=None, log_extra=None, debug=False): """ Configure the given logger (or root if None) to: - - send logs to the ELK stack + - send logs to Loki through Logstash - send logs to Tango - send logs to stdout """ @@ -128,7 +128,7 @@ def configure_logger(logger: logging.Logger=None, log_extra=None, debug=False): # don't spam debug messages when fetching URLs logging.getLogger("urllib3").setLevel(logging.INFO) - # don't spam error messages when having trouble connecting to ELK + # don't spam error messages when having connection troubles logging.getLogger("LogProcessingWorker").setLevel(logging.CRITICAL) # for now, also log to stderr @@ -151,12 +151,12 @@ def configure_logger(logger: logging.Logger=None, log_extra=None, debug=False): if debug: return logger - # Log to ELK stack + # Log to Logstash-Loki try: from logstash_async.handler import AsynchronousLogstashHandler, LogstashFormatter - # log to the tcp_input of logstash in our ELK stack - handler = AsynchronousLogstashHandler("elk", 5959, database_path='/tmp/lofar_pending_log_messages.db') + # log to the tcp_input of logstash in our logstash-loki container + handler = AsynchronousLogstashHandler("logstash", 5959, database_path='/tmp/lofar_pending_log_messages.db') # configure log messages formatter = LogstashFormatter(extra=log_extra, tags=["python", "lofar"]) @@ -167,9 +167,9 @@ def configure_logger(logger: logging.Logger=None, log_extra=None, debug=False): # install the handler logger.addHandler(handler) except ImportError: - logger.exception("Cannot forward logs to ELK: logstash_async module not found.") + logger.exception("Cannot forward logs to Logstash-Loki: logstash_async module not found.") except Exception: - logger.exception("Cannot forward logs to ELK.") + logger.exception("Cannot forward logs to Logstash-Loki.") # Don't log to Tango to reduce log spam """ diff --git a/tangostationcontrol/tangostationcontrol/devices/docker_device.py b/tangostationcontrol/tangostationcontrol/devices/docker_device.py index 8ee1301c6eeb89d91fa166bc4691a9d84b0bb77a..71e13119de523f829092d040ab62bc11330ac4ad 100644 --- a/tangostationcontrol/tangostationcontrol/devices/docker_device.py +++ b/tangostationcontrol/tangostationcontrol/devices/docker_device.py @@ -103,6 +103,10 @@ class Docker(lofar_device): elk_RW = attribute_wrapper(comms_annotation={"container": "elk"}, datatype=bool, access=AttrWriteType.READ_WRITE) grafana_R = attribute_wrapper(comms_annotation={"container": "grafana"}, datatype=bool) grafana_RW = attribute_wrapper(comms_annotation={"container": "grafana"}, datatype=bool, access=AttrWriteType.READ_WRITE) + logstash_R = attribute_wrapper(comms_annotation={"container": "logstash"}, datatype=bool) + logstash_RW = attribute_wrapper(comms_annotation={"container": "logstash"}, datatype=bool, access=AttrWriteType.READ_WRITE) + loki_R = attribute_wrapper(comms_annotation={"container": "loki"}, datatype=bool) + loki_RW = attribute_wrapper(comms_annotation={"container": "loki"}, datatype=bool, access=AttrWriteType.READ_WRITE) hdbppts_cm_R = attribute_wrapper(comms_annotation={"container": "hdbppts-cm"}, datatype=bool) hdbppts_cm_RW = attribute_wrapper(comms_annotation={"container": "hdbppts-cm"}, datatype=bool, access=AttrWriteType.READ_WRITE) hdbppts_es_R = attribute_wrapper(comms_annotation={"container": "hdbppts-es"}, datatype=bool) diff --git a/tangostationcontrol/tangostationcontrol/devices/recv.py b/tangostationcontrol/tangostationcontrol/devices/recv.py index fc1b8fcb110dc99fcfd80538707d45ef6b6534de..8a339543c62ab6f629e9353e14871ea0a10f9dc3 100644 --- a/tangostationcontrol/tangostationcontrol/devices/recv.py +++ b/tangostationcontrol/tangostationcontrol/devices/recv.py @@ -139,14 +139,14 @@ class RECV(opcua_device): # The HBAT beamformer delays represent 32 delays for each of the 96 inputs. # The 32 delays deconstruct as delays[polarisation][dipole], and each delay is the number of 'delay steps' to apply (0.5ns for HBAT1). - HBAT_BF_delay_steps_R = attribute_wrapper(comms_annotation=["HBAT_BF_delay_steps_R" ],datatype=numpy.int64 , dims=(96,2,16)) - HBAT_BF_delay_steps_RW = attribute_wrapper(comms_annotation=["HBAT_BF_delay_steps_RW" ],datatype=numpy.int64 , dims=(96,2,16), access=AttrWriteType.READ_WRITE) - HBAT_LED_on_R = attribute_wrapper(comms_annotation=["HBAT_LED_on_R" ],datatype=bool , dims=(96,2,16)) - HBAT_LED_on_RW = attribute_wrapper(comms_annotation=["HBAT_LED_on_RW" ],datatype=bool , dims=(96,2,16), access=AttrWriteType.READ_WRITE) - HBAT_PWR_LNA_on_R = attribute_wrapper(comms_annotation=["HBAT_PWR_LNA_on_R" ],datatype=bool , dims=(96,2,16)) - HBAT_PWR_LNA_on_RW = attribute_wrapper(comms_annotation=["HBAT_PWR_LNA_on_RW" ],datatype=bool , dims=(96,2,16), access=AttrWriteType.READ_WRITE) - HBAT_PWR_on_R = attribute_wrapper(comms_annotation=["HBAT_PWR_on_R" ],datatype=bool , dims=(96,2,16)) - HBAT_PWR_on_RW = attribute_wrapper(comms_annotation=["HBAT_PWR_on_RW" ],datatype=bool , dims=(96,2,16), access=AttrWriteType.READ_WRITE) + HBAT_BF_delay_steps_R = attribute_wrapper(comms_annotation=["HBAT_BF_delay_steps_R" ],datatype=numpy.int64 , dims=(96,16,2)) + HBAT_BF_delay_steps_RW = attribute_wrapper(comms_annotation=["HBAT_BF_delay_steps_RW" ],datatype=numpy.int64 , dims=(96,16,2), access=AttrWriteType.READ_WRITE) + HBAT_LED_on_R = attribute_wrapper(comms_annotation=["HBAT_LED_on_R" ],datatype=bool , dims=(96,16,2)) + HBAT_LED_on_RW = attribute_wrapper(comms_annotation=["HBAT_LED_on_RW" ],datatype=bool , dims=(96,16,2), access=AttrWriteType.READ_WRITE) + HBAT_PWR_LNA_on_R = attribute_wrapper(comms_annotation=["HBAT_PWR_LNA_on_R" ],datatype=bool , dims=(96,16,2)) + HBAT_PWR_LNA_on_RW = attribute_wrapper(comms_annotation=["HBAT_PWR_LNA_on_RW" ],datatype=bool , dims=(96,16,2), access=AttrWriteType.READ_WRITE) + HBAT_PWR_on_R = attribute_wrapper(comms_annotation=["HBAT_PWR_on_R" ],datatype=bool , dims=(96,16,2)) + HBAT_PWR_on_RW = attribute_wrapper(comms_annotation=["HBAT_PWR_on_RW" ],datatype=bool , dims=(96,16,2), access=AttrWriteType.READ_WRITE) RCU_ADC_locked_R = attribute_wrapper(comms_annotation=["RCU_ADC_locked_R" ],datatype=bool , dims=(96,)) RCU_attenuator_dB_R = attribute_wrapper(comms_annotation=["RCU_attenuator_dB_R" ],datatype=numpy.int64 , dims=(96,)) RCU_attenuator_dB_RW = attribute_wrapper(comms_annotation=["RCU_attenuator_dB_RW" ],datatype=numpy.int64 , dims=(96,), access=AttrWriteType.READ_WRITE) @@ -274,7 +274,7 @@ class RECV(opcua_device): which is a value per tile per dipole per polarisation. """ # Duplicate delay values per polarisation - polarised_delays = numpy.tile(delays, 2) # output dims -> 96x32 + polarised_delays = numpy.repeat(delays, 2, axis=1) # output dims -> 96x32 # Add signal input delay calibrated_delays = numpy.add(polarised_delays, self.HBAT_signal_input_delays) diff --git a/tangostationcontrol/tangostationcontrol/devices/sdp/statistics.py b/tangostationcontrol/tangostationcontrol/devices/sdp/statistics.py index c9c3d76338d1b8b7da039a4571355c975dbc6f1e..1581b695bf35e1f5ead27b292d134f29da62a5be 100644 --- a/tangostationcontrol/tangostationcontrol/devices/sdp/statistics.py +++ b/tangostationcontrol/tangostationcontrol/devices/sdp/statistics.py @@ -67,6 +67,7 @@ class Statistics(opcua_device): # when last packet was received last_packet_timestamp_R = attribute_wrapper(comms_id=StatisticsClient, comms_annotation={"type": "udp", "parameter": "last_packet_timestamp"}, datatype=numpy.uint64) + # queue fill percentage, as reported by the consumer queue_collector_fill_percentage_R = attribute_wrapper(comms_id=StatisticsClient, comms_annotation={"type": "queue", "parameter": "collector_fill_percentage"}, datatype=numpy.uint64) queue_replicator_fill_percentage_R = attribute_wrapper(comms_id=StatisticsClient, comms_annotation={"type": "queue", "parameter": "replicator_fill_percentage"}, datatype=numpy.uint64) @@ -83,6 +84,8 @@ class Statistics(opcua_device): nof_invalid_packets_R = attribute_wrapper(comms_id=StatisticsClient, comms_annotation={"type": "statistics", "parameter": "nof_invalid_packets"}, datatype=numpy.uint64) # last packet that could not be parsed last_invalid_packet_R = attribute_wrapper(comms_id=StatisticsClient, comms_annotation={"type": "statistics", "parameter": "last_invalid_packet"}, dims=(9000,), datatype=numpy.uint8) + # what the last exception was + last_invalid_packet_exception_R = attribute_wrapper(comms_id=StatisticsClient, comms_annotation={"type": "statistics", "parameter": "last_invalid_packet_exception"}, datatype=str) # -------- # Overloaded functions diff --git a/tangostationcontrol/tangostationcontrol/integration_test/default/devices/test_device_tilebeam.py b/tangostationcontrol/tangostationcontrol/integration_test/default/devices/test_device_tilebeam.py index 541e04bc4e878043dfa80569b4342712be22cf0e..c474e6628d695d4fd29eba7e0fed04e296477a8a 100644 --- a/tangostationcontrol/tangostationcontrol/integration_test/default/devices/test_device_tilebeam.py +++ b/tangostationcontrol/tangostationcontrol/integration_test/default/devices/test_device_tilebeam.py @@ -122,7 +122,7 @@ class TestDeviceTileBeam(AbstractTestBases.TestDeviceBase): self.proxy.set_pointing(["AZELGEO","0deg","0deg"] * self.NR_TILES) # obtain delays of the X polarisation of all the elements of the first tile - north_beam_delay_steps = antennafield_proxy.HBAT_BF_delay_steps_RW[0].reshape(2,4,4)[0] + north_beam_delay_steps = antennafield_proxy.HBAT_BF_delay_steps_RW[0].reshape(4,4,2)[:,:,0] # delays must differ under rotation, or our test will give a false positive self.assertNotEqual(north_beam_delay_steps.tolist(), numpy.rot90(north_beam_delay_steps).tolist()) @@ -132,7 +132,7 @@ class TestDeviceTileBeam(AbstractTestBases.TestDeviceBase): self.proxy.set_pointing(["AZELGEO",f"{angle}deg","0deg"] * self.NR_TILES) # obtain delays of the X polarisation of all the elements of the first tile - angled_beam_delay_steps = antennafield_proxy.HBAT_BF_delay_steps_RW[0].reshape(2,4,4)[0] + angled_beam_delay_steps = antennafield_proxy.HBAT_BF_delay_steps_RW[0].reshape(4,4,2)[:,:,0] expected_delay_steps = numpy.rot90(north_beam_delay_steps, k=-(angle/90)) @@ -166,7 +166,7 @@ class TestDeviceTileBeam(AbstractTestBases.TestDeviceBase): # The [3] = 28 diff is explained that we match the closest delay step and LOFAR 1 wants the one with # in 0.2ns but if it can't it will do a int(delay / 0.5ns) so we get slightly different results but # they can be explained. - expected_HBAT_delay_steps = numpy.array([24, 25, 27, 29, 17, 18, 20, 21, 10, 11, 13, 14, 3, 4, 5, 7] * 2, dtype=numpy.int64) + expected_HBAT_delay_steps = numpy.repeat(numpy.array([24, 25, 27, 29, 17, 18, 20, 21, 10, 11, 13, 14, 3, 4, 5, 7], dtype=numpy.int64), 2) numpy.testing.assert_equal(calculated_HBAT_delay_steps[0], expected_HBAT_delay_steps) numpy.testing.assert_equal(calculated_HBAT_delay_steps[self.NR_TILES - 1], expected_HBAT_delay_steps) diff --git a/tangostationcontrol/tangostationcontrol/integration_test/default/statistics/test_writer_sst.py b/tangostationcontrol/tangostationcontrol/integration_test/default/statistics/test_writer_sst.py index cf730e6461dfc4e438fc77ea8d8150cb59dfa90f..504ac85566148762867c73e01b634afb4820e850 100644 --- a/tangostationcontrol/tangostationcontrol/integration_test/default/statistics/test_writer_sst.py +++ b/tangostationcontrol/tangostationcontrol/integration_test/default/statistics/test_writer_sst.py @@ -82,14 +82,14 @@ class TestStatisticsWriterSST(BaseIntegrationTestCase): '2021-09-20T12:17:40.000+00:00' ) self.assertIsNotNone(stat) - self.assertEqual("0.1.1", stat.station_version_id) + self.assertEqual("0.1.2", stat.station_version_id) self.assertEqual("0.1", stat.writer_version_id) def test_insert_tango_SST_statistics(self): self.assertEqual(DevState.ON, self.recv_proxy.state()) collector = StationSSTCollector(device=self.recv_proxy) - # Test attribute values retrieval + # Test attribute values retrieval collector.parse_device_attributes() numpy.testing.assert_equal( collector.parameters["rcu_attenuator_dB"].flatten(), @@ -186,7 +186,7 @@ class TestStatisticsWriterSST(BaseIntegrationTestCase): self.assertEqual(stat.rcu_band_select, None) self.assertEqual(stat.rcu_dth_on, None) - def test_SST_statistics_with_device_in_off(self): + def test_SST_statistics_with_device_in_off(self): self.setup_recv_proxy() self.recv_proxy.Off() self.assertEqual(DevState.OFF, self.recv_proxy.state()) @@ -221,7 +221,7 @@ class TestStatisticsWriterSST(BaseIntegrationTestCase): stat = stat_parser.get_statistic('2021-09-20T12:17:40.000+00:00') # same as stat_parser.statistics[0] self.assertIsNotNone(stat) self.assertEqual(121, stat.data_id_signal_input_index) - # Test RECV attributes + # Test RECV attributes self.assertEqual(stat.rcu_attenuator_dB, None) self.assertEqual(stat.rcu_band_select, None) self.assertEqual(stat.rcu_dth_on, None)