diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ca3a617c1b052564c46e2a5e426fe9a1e86787d6..350b264d8891a58f8b268c36b7d0315962ff382e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -33,7 +33,6 @@ bandit: - tox -e bandit shellcheck: stage: static-analysis - allow_failure: true before_script: - sudo apt-get update - sudo apt-get install -y shellcheck diff --git a/bin/dump_ConfigDb.sh b/bin/dump_ConfigDb.sh index bbd97a2208381c2fcf39710b9f908814313bdd7b..7745c18482000fe2e7a726e27b6fa5eeae57e88e 100755 --- a/bin/dump_ConfigDb.sh +++ b/bin/dump_ConfigDb.sh @@ -1,6 +1,4 @@ -if [ ${#} -ne 1 ]; then - echo "You must provide a file name for the TANGO_HOST DB dump!" - exit -1 -fi +#!/bin/bash -docker exec -it dsconfig python -m dsconfig.dump > ${1} +# writes the JSON dump to stdout +docker exec -it dsconfig python -m dsconfig.dump diff --git a/bin/start-DS.sh b/bin/start-DS.sh index a9c9765d52db4fecd744117ef64938f20288511d..83a6eec6dd30f2e496fa03ffc6f7351d8e9a664d 100755 --- a/bin/start-DS.sh +++ b/bin/start-DS.sh @@ -1,7 +1,8 @@ +#!/bin/bash function help() { why="${1}" - echo -e "*** Cannot start the Python device server.\n${why}\n\n* The Python file for the device server must be the 1st parameter that is provided.\n* The instance of this device server must be the 2nd parameter that is provided." + echo -e "*** Cannot start the Python device server.\\n${why}\\n\\n* The Python file for the device server must be the 1st parameter that is provided.\\n* The instance of this device server must be the 2nd parameter that is provided." exit -1 } @@ -29,14 +30,14 @@ esac # ATTENTION # This is assuming that the device server's Python file exists # on the Docker's host in the user's ${HOME} directory. -runThis=$(basename ${deviceServer}) +runThis=$(basename "${deviceServer}") runThis=${runThis//.sh/.py} -if [ -f ${runThis} ]; then +if [ -f "${runThis}" ]; then myDir=${PWD} else - myDir=${PWD}/$(dirname ${deviceServer}) + myDir=${PWD}/$(dirname "${deviceServer}") fi deviceServerPath=${myDir/${HOME}/\/hosthome} # Tango log lines start with a UNIX timestamp. Replace them with the UTC time. -docker exec -it itango python3 ${deviceServerPath}/${runThis} ${instance} ${@} | perl -ne 'use Time::Piece; s/^([0-9]+)/gmtime($1)->strftime("%F %T")/e; print;' +docker exec -it itango python3 "${deviceServerPath}/${runThis}" "${instance}" "${@}" | perl -ne 'use Time::Piece; s/^([0-9]+)/gmtime($1)->strftime("%F %T")/e; print;' diff --git a/bin/start-jive.sh b/bin/start-jive.sh index fcbb9f8b5e95a4bbbfb6b2895c30d4d2a1914340..38e04ce1837f2351a46f0f5f3c55936825cf5d7b 100755 --- a/bin/start-jive.sh +++ b/bin/start-jive.sh @@ -1,9 +1,10 @@ +#!/bin/bash OS=$(uname) case ${OS} in Linux) display="" - XTRA_OPTIONS="-u $(id -u ${USER}):$(id -g ${USER}) -v /etc/passwd:/etc/passwd:ro -v /etc/groups:/etc/groups:ro" + XTRA_OPTIONS="-u $(id -u "${USER}"):$(id -g "${USER}") -v /etc/passwd:/etc/passwd:ro -v /etc/groups:/etc/groups:ro" ;; Darwin) @@ -28,10 +29,10 @@ else fi #docker run --rm -it --network host ${OPTIONS} nexus.engageska-portugal.pt/ska-docker/tango-java:latest ${command} ${@} -container_name=artefact.skatelescope.org/ska-tango-images/tango-java:9.3.3.2 -container=$(docker ps | egrep ${container_name} | cut -d' ' -f1) -if [ ! -z ${container} ]; then - docker exec -it ${container} ${command} ${@} +container_name=artefact.skao.int/ska-tango-images-tango-java:9.3.4 +container=$(docker ps | grep -E ${container_name} | cut -d' ' -f1) +if [ ! -z "${container}" ]; then + docker exec -it "${container}" ${command} "${@}" else echo "Container \"${container_name}\" is not running." fi diff --git a/devices/common/lofar_logging.py b/devices/common/lofar_logging.py index c59979636f718d233f293f1b87139c0115f9ab3d..4a9f67ca2be587530bc3ff7805fb99e2801f96d5 100644 --- a/devices/common/lofar_logging.py +++ b/devices/common/lofar_logging.py @@ -100,7 +100,7 @@ class LogAnnotator(logging.Formatter): # we just annotate, we don't filter return True -def configure_logger(logger: logging.Logger=None, log_extra=None): +def configure_logger(logger: logging.Logger=None, log_extra=None, debug=False): """ Configure the given logger (or root if None) to: - send logs to the ELK stack @@ -120,6 +120,26 @@ def configure_logger(logger: logging.Logger=None, log_extra=None): # remove spam from the OPC-UA client connection logging.getLogger("asyncua").setLevel(logging.WARN) + # for now, also log to stderr + # Set up logging in a way that it can be understood by a human reader, be + # easily grep'ed, be parsed with a couple of shell commands and + # easily fed into an Kibana/Elastic search system. + handler = logging.StreamHandler() + + # Always also log the hostname because it makes the origin of the log clear. + hostname = socket.gethostname() + + formatter = logging.Formatter(fmt = '%(asctime)s.%(msecs)d %(levelname)s - HOST="{}" DEVICE="%(tango_device)s" PID="%(process)d" TNAME="%(threadName)s" FILE="%(pathname)s" LINE="%(lineno)d" FUNC="%(funcName)s" MSG="%(message)s"'.format(hostname), datefmt = '%Y-%m-%dT%H:%M:%S') + handler.setFormatter(formatter) + handler.addFilter(LogSuppressErrorSpam()) + handler.addFilter(LogAnnotator()) + + logger.addHandler(handler) + + # If configuring for debug; exit early + if debug: + return logger + # Log to ELK stack try: from logstash_async.handler import AsynchronousLogstashHandler, LogstashFormatter @@ -149,23 +169,6 @@ def configure_logger(logger: logging.Logger=None, log_extra=None): except Exception: logger.exception("Cannot forward logs to Tango.") - - # for now, also log to stderr - # Set up logging in a way that it can be understood by a human reader, be - # easily grep'ed, be parsed with a couple of shell commands and - # easily fed into an Kibana/Elastic search system. - handler = logging.StreamHandler() - - # Always also log the hostname because it makes the origin of the log clear. - hostname = socket.gethostname() - - formatter = logging.Formatter(fmt = '%(asctime)s.%(msecs)d %(levelname)s - HOST="{}" DEVICE="%(tango_device)s" PID="%(process)d" TNAME="%(threadName)s" FILE="%(pathname)s" LINE="%(lineno)d" FUNC="%(funcName)s" MSG="%(message)s"'.format(hostname), datefmt = '%Y-%m-%dT%H:%M:%S') - handler.setFormatter(formatter) - handler.addFilter(LogSuppressErrorSpam()) - handler.addFilter(LogAnnotator()) - - logger.addHandler(handler) - return logger def device_logging_to_python(): diff --git a/devices/devices/sdp/sdp.py b/devices/devices/sdp/sdp.py index 693fab3a9b912a3e8e3e30d58dc237be6149df56..1575aaa6b74c373fd952820365d6790450491d36 100644 --- a/devices/devices/sdp/sdp.py +++ b/devices/devices/sdp/sdp.py @@ -56,6 +56,27 @@ class SDP(opcua_device): mandatory=False, default_value=[[False] * 12] * 16 ) + + # If we enable the waveform generator, we want some sane defaults. + + FPGA_wg_amplitude_RW = device_property( + dtype='DevVarDoubleArray', + mandatory=False, + default_value=[[0.1] * 12] * 16 + ) + + FPGA_wg_frequency_RW = device_property( + dtype='DevVarDoubleArray', + mandatory=False, + # Emit a signal on subband 102 + default_value=[[102 * 200e6/1024] * 12] * 16 + ) + + FPGA_wg_phase_RW = device_property( + dtype='DevVarDoubleArray', + mandatory=False, + default_value=[[0.0] * 12] * 16 + ) FPGA_sdp_info_station_id_RW_default = device_property( dtype='DevVarULongArray', diff --git a/devices/devices/sdp/statistics_collector.py b/devices/devices/sdp/statistics_collector.py index 1bd8f3c12135a818526c48ecbff80408f290b7c9..d9e5668b7e9b3db288a4b2360f4fa298594bbc1c 100644 --- a/devices/devices/sdp/statistics_collector.py +++ b/devices/devices/sdp/statistics_collector.py @@ -131,6 +131,8 @@ class XSTCollector(StatisticsCollector): # Last value array we've constructed out of the packets "xst_blocks": numpy.zeros((self.MAX_BLOCKS, self.BLOCK_LENGTH * self.BLOCK_LENGTH * self.VALUES_PER_COMPLEX), dtype=numpy.int64), + # Whether the values are actually conjugated and transposed + "xst_conjugated": numpy.zeros((self.MAX_BLOCKS,), dtype=numpy.bool_), "xst_timestamps": numpy.zeros((self.MAX_BLOCKS,), dtype=numpy.float64), "xst_subbands": numpy.zeros((self.MAX_BLOCKS,), dtype=numpy.uint16), "integration_intervals": numpy.zeros((self.MAX_BLOCKS,), dtype=numpy.float32), @@ -162,20 +164,29 @@ class XSTCollector(StatisticsCollector): if fields.first_baseline[antenna] % self.BLOCK_LENGTH != 0: raise ValueError("Packet describes baselines starting at %s, but we require a multiple of BLOCK_LENGTH=%d" % (fields.first_baseline, self.MAX_INPUTS)) + # Make sure we always have a baseline (a,b) with a>=b. If not, we swap the indices and mark that the data must be conjugated and transposed when processed. + first_baseline = fields.first_baseline + if first_baseline[0] < first_baseline[1]: + conjugated = True + first_baseline = (first_baseline[1], first_baseline[0]) + else: + conjugated = False + # the payload contains complex values for the block of baselines of size BLOCK_LENGTH x BLOCK_LENGTH # starting at baseline first_baseline. # # we honour this format, as we want to keep the metadata together with these blocks. we do need to put the blocks in a linear # and tight order, however, so we calculate a block index. - block_index = baseline_index(fields.first_baseline[0] // self.BLOCK_LENGTH, fields.first_baseline[1] // self.BLOCK_LENGTH) + block_index = baseline_index(first_baseline[0] // self.BLOCK_LENGTH, first_baseline[1] // self.BLOCK_LENGTH) + + # We did enough checks on first_baseline for this to be a logic error in our code + assert 0 <= block_index < self.MAX_BLOCKS, f"Received block {block_index}, but have only room for {self.MAX_BLOCKS}. Block starts at baseline {first_baseline}." # process the packet self.parameters["nof_valid_payloads"][fields.gn_index] += numpy.uint64(1) - - block_index = baseline_index(fields.first_baseline[0], fields.first_baseline[1]) - self.parameters["xst_blocks"][block_index][:fields.nof_statistics_per_packet] = fields.payload self.parameters["xst_timestamps"][block_index] = numpy.float64(fields.timestamp().timestamp()) + self.parameters["xst_conjugated"][block_index] = conjugated self.parameters["xst_subbands"][block_index] = numpy.uint16(fields.subband_index) self.parameters["integration_intervals"][block_index] = fields.integration_interval() @@ -184,11 +195,16 @@ class XSTCollector(StatisticsCollector): matrix = numpy.zeros((self.MAX_INPUTS, self.MAX_INPUTS), dtype=numpy.complex64) xst_blocks = self.parameters["xst_blocks"] + xst_conjugated = self.parameters["xst_conjugated"] for block_index in range(self.MAX_BLOCKS): # convert real/imag int to complex float values. this works as real/imag come in pairs block = xst_blocks[block_index].astype(numpy.float32).view(numpy.complex64) + if xst_conjugated[block_index]: + # block is conjugated and transposed. process. + block = block.conjugate().transpose() + # reshape into [a][b] block = block.reshape(self.BLOCK_LENGTH, self.BLOCK_LENGTH) diff --git a/devices/devices/sdp/xst.py b/devices/devices/sdp/xst.py index 928637ce5e548dcc2c418a2874dcaed9abd662b4..c9883303b80425f0c142181994d43e477ec5431c 100644 --- a/devices/devices/sdp/xst.py +++ b/devices/devices/sdp/xst.py @@ -72,6 +72,12 @@ class XST(Statistics): default_value=[[0,102,0,0,0,0,0,0]] * 16 ) + FPGA_xst_integration_interval_RW_default = device_property( + dtype='DevVarDoubleArray', + mandatory=False, + default_value=[1.0] * 16 + ) + FPGA_xst_offload_enable_RW_default = device_property( dtype='DevVarBooleanArray', mandatory=False, @@ -84,6 +90,7 @@ class XST(Statistics): 'FPGA_xst_offload_hdr_udp_destination_port_RW', 'FPGA_xst_subband_select_RW', + 'FPGA_xst_integration_interval_RW', # enable only after the offloading is configured correctly 'FPGA_xst_offload_enable_RW' @@ -94,8 +101,8 @@ class XST(Statistics): # ---------- # FPGA control points for XSTs - FPGA_xst_integration_interval_RW = attribute_wrapper(comms_id=OPCUAConnection, comms_annotation=["2:FPGA_xst_integration_interval_RW"], datatype=numpy.double, dims=(8,16), access=AttrWriteType.READ_WRITE) - FPGA_xst_integration_interval_R = attribute_wrapper(comms_id=OPCUAConnection, comms_annotation=["2:FPGA_xst_integration_interval_R"], datatype=numpy.double, dims=(8,16)) + FPGA_xst_integration_interval_RW = attribute_wrapper(comms_id=OPCUAConnection, comms_annotation=["2:FPGA_xst_integration_interval_RW"], datatype=numpy.double, dims=(16,), access=AttrWriteType.READ_WRITE) + FPGA_xst_integration_interval_R = attribute_wrapper(comms_id=OPCUAConnection, comms_annotation=["2:FPGA_xst_integration_interval_R"], datatype=numpy.double, dims=(16,)) FPGA_xst_offload_enable_RW = attribute_wrapper(comms_id=OPCUAConnection, comms_annotation=["2:FPGA_xst_offload_enable_RW"], datatype=numpy.bool_, dims=(16,), access=AttrWriteType.READ_WRITE) FPGA_xst_offload_enable_R = attribute_wrapper(comms_id=OPCUAConnection, comms_annotation=["2:FPGA_xst_offload_enable_R"], datatype=numpy.bool_, dims=(16,)) FPGA_xst_offload_hdr_eth_destination_mac_RW = attribute_wrapper(comms_id=OPCUAConnection, comms_annotation=["2:FPGA_xst_offload_hdr_eth_destination_mac_RW"], datatype=numpy.str, dims=(16,), access=AttrWriteType.READ_WRITE) @@ -115,6 +122,8 @@ class XST(Statistics): nof_payload_errors_R = attribute_wrapper(comms_id=StatisticsClient, comms_annotation={"type": "statistics", "parameter": "nof_payload_errors"}, dims=(XSTCollector.MAX_FPGAS,), datatype=numpy.uint64) # latest XSTs xst_blocks_R = attribute_wrapper(comms_id=StatisticsClient, comms_annotation={"type": "statistics", "parameter": "xst_blocks"}, dims=(XSTCollector.BLOCK_LENGTH * XSTCollector.BLOCK_LENGTH * XSTCollector.VALUES_PER_COMPLEX, XSTCollector.MAX_BLOCKS), datatype=numpy.int64) + # whether the values in the block are conjugated and transposed + xst_conjugated_R = attribute_wrapper(comms_id=StatisticsClient, comms_annotation={"type": "statistics", "parameter": "xst_conjugated"}, dims=(XSTCollector.MAX_BLOCKS,), datatype=numpy.bool_) # reported timestamp for each row in the latest XSTs xst_timestamp_R = attribute_wrapper(comms_id=StatisticsClient, comms_annotation={"type": "statistics", "parameter": "xst_timestamps"}, dims=(XSTCollector.MAX_BLOCKS,), datatype=numpy.uint64) # which subband the XSTs describe diff --git a/devices/devices/unb2.py b/devices/devices/unb2.py index e2f781a24e5e59c52591f0826e36000a38687aa1..4b071950bb68c52a41758a5eedba32605c0214cf 100644 --- a/devices/devices/unb2.py +++ b/devices/devices/unb2.py @@ -38,6 +38,12 @@ class UNB2(opcua_device): # Device Properties # ----------------- + UNB2_mask_RW_default = device_property( + dtype='DevVarBooleanArray', + mandatory=False, + default_value=[True] * 2 + ) + # ---------- # Attributes # ---------- diff --git a/devices/integration_test/base.py b/devices/integration_test/base.py index 3583d1901a3ae7cecfefee1ac6ad698f0c098456..241f0ecd409fd16484d81e31f1e1f83dc1b9d81b 100644 --- a/devices/integration_test/base.py +++ b/devices/integration_test/base.py @@ -7,10 +7,15 @@ # Distributed under the terms of the APACHE license. # See LICENSE.txt for more info. +from common.lofar_logging import configure_logger + import unittest import asynctest import testscenarios +"""Setup logging for integration tests""" +configure_logger(debug=True) + class BaseIntegrationTestCase(testscenarios.WithScenarios, unittest.TestCase): """Integration test base class.""" diff --git a/devices/statistics_writer/README.md b/devices/statistics_writer/README.md index e2111f3d203158706f96a3eaee6004f3121f00ea..9c3e24a6ed360701778e023a9cc42d46b4b5dc8e 100644 --- a/devices/statistics_writer/README.md +++ b/devices/statistics_writer/README.md @@ -44,13 +44,20 @@ File ... ``` -###explorer -There is an hdf5 explorer that will walk through specified hdf5 files. -Its called `hdf5_explorer.py` and can be called with a `--file` argument -ex: `python3 hdf5_explorer.py --file data/SST_1970-01-01-00-00-00.h5` This allows for easy manual checking -of the structure and content of hdf5 files. useful for testing and debugging. -Can also be used as example of how to read the HDF5 statistics data files. -Provides a number of example functions inside that go through the file in various ways. +###reader +There is a statistics reader that is capable of parsing multiple HDF5 statistics files in to +a more easily usable format. It also allows for filtering between certain timestamps. +`statistics_reader.py` takes the following arguments: +`--files list of files to parse` +`--end_time highest timestamp to process in isoformat` +`--start_time lowest timestamp to process in isoformat` + +ex: `python3 statistics_reader.py --files SST_2021-10-04-07-36-52.h5 --end_time 2021-10-04#07:50:08.937+00:00` +This will parse all the statistics in the file `SST_2021-10-04-07-36-52.h5` up to the timestamp `2021-10-04#07:50:08.937+00:00` + +This file can be used as both a testing tool and an example for dealing with HDF5 statistics. +The code serves can serve as a starting point for further development. To help with these purposes a bunch of simple +helper functions are provided. ###test server There is a test server that will continuously send out the same statistics packet. diff --git a/devices/statistics_writer/SST_2021-10-04-07-36-52.h5 b/devices/statistics_writer/SST_2021-10-04-07-36-52.h5 new file mode 100644 index 0000000000000000000000000000000000000000..26179fc59a2fb032bb35d779676befd4ebe26356 Binary files /dev/null and b/devices/statistics_writer/SST_2021-10-04-07-36-52.h5 differ diff --git a/devices/statistics_writer/hdf5_writer.py b/devices/statistics_writer/hdf5_writer.py index 197c3242fe48a8f99d4d1e79eb5412a6b8d90e2a..6715dd870608a0202610ea52c417695844f0d1c9 100644 --- a/devices/statistics_writer/hdf5_writer.py +++ b/devices/statistics_writer/hdf5_writer.py @@ -133,7 +133,7 @@ class hdf5_writer: """ # create the new hdf5 group based on the timestamp of packets - current_group = self.file.create_group("{}_{}".format(self.mode, self.current_timestamp.strftime("%Y-%m-%d-%H-%M-%S-%f")[:-3])) + current_group = self.file.create_group("{}_{}".format(self.mode, self.current_timestamp.isoformat(timespec="milliseconds"))) # store the statistics values for the current group self.store_function(current_group) @@ -158,11 +158,11 @@ class hdf5_writer: def write_sst_matrix(self, current_group): # store the SST values - current_group.create_dataset(name="sst_values", data=self.current_matrix.parameters["sst_values"].astype(numpy.float32), compression="gzip") + current_group.create_dataset(name="values", data=self.current_matrix.parameters["sst_values"].astype(numpy.float32), compression="gzip") def write_xst_matrix(self, current_group): # requires a function call to transform the xst_blocks in to the right structure - current_group.create_dataset(name="xst_values", data=self.current_matrix.xst_values().astype(numpy.cfloat), compression="gzip") + current_group.create_dataset(name="values", data=self.current_matrix.xst_values().astype(numpy.cfloat), compression="gzip") def write_bst_matrix(self, current_group): raise NotImplementedError("BST values not implemented") diff --git a/devices/statistics_writer/statistics_reader.py b/devices/statistics_writer/statistics_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..f0906e7d4122b2f1d0d8d864d8c6a47ad793c0f4 --- /dev/null +++ b/devices/statistics_writer/statistics_reader.py @@ -0,0 +1,246 @@ +import h5py +import numpy +import datetime +import argparse +import os +import psutil +import pytz +import time + +process = psutil.Process(os.getpid()) + +parser = argparse.ArgumentParser(description='Select a file to explore') +parser.add_argument('--files', type=str, nargs="+", help='the name and path of the files, takes one or more files') +parser.add_argument('--start_time', type=str, help='lowest timestamp to process (uses isoformat, ex: 2021-10-04T07:50:08.937+00:00)') +parser.add_argument('--end_time', type=str, help='highest timestamp to process (uses isoformat, ex: 2021-10-04T07:50:08.937+00:00)') + + +import logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("hdf5_explorer") +logger.setLevel(logging.DEBUG) + + +def timeit(method): + """ + Simple decorator function to log time, function and process memory usage + """ + + def timed(*args, **kw): + global RESULT + s = datetime.datetime.now() + RESULT = method(*args, **kw) + e = datetime.datetime.now() + + sizeMb = process.memory_info().rss / 1024 / 1024 + sizeMbStr = "{0:,}".format(round(sizeMb, 2)) + + logger.debug('Time taken = %s, %s ,size = %s MB' % (e - s, method.__name__, sizeMbStr)) + return RESULT + return timed + + +class statistics_parser: + """ + This class goes through the file and creates a list of all statistics in the file it is given + """ + + def __init__(self): + + # list of all statistics + self.statistics = [] + + # dict of all statistics, allows for easier access. + self.statistics_dict = {} + + # for setting the range of times to parse. Initialise with the build in minimum and maximum values + self.start_time = datetime.datetime.min.replace(tzinfo=pytz.UTC) + self.end_time = datetime.datetime.max.replace(tzinfo=pytz.UTC) + + def set_start_time(self, start_time): + """ + set the lowest statistics timestamp to store + """ + self.start_time = datetime.datetime.fromisoformat(start_time) + + def set_end_time(self, end_time): + """ + set the highest statistics timestamp to store + """ + self.end_time = datetime.datetime.fromisoformat(end_time) + + @timeit + def parse_file(self, files): + """ + This function opens and parses the statistics HDF5 file and adds it to self.statistics. + """ + + # if its just a single file the type could be string + if type(files) is str: + files = [files] + + for file in files: + hdf5_file = h5py.File(file, 'r') + + # go through all the groups + logger.debug(f"Parsing hdf5 statistics file") + + for group_key in hdf5_file.keys(): + try: + # first get the statistic + statistic = statistics_data(hdf5_file, group_key) + + # extract the timestamp and convert to datetime + statistic_time = statistic.timestamp + + # check if the timestamp is before the start time + if statistic_time < self.start_time: + continue + + # check if the timestamp is after the end times + if statistic_time > self.end_time: + # Exit, we're done + logger.debug(f"Parsed {len(self.statistics)} statistics") + return + + # append to the statistics list + self.statistics.append(statistic) + self.statistics_dict[statistic.timestamp.isoformat(timespec="milliseconds")] = statistic + + except: + logger.warning(f"Encountered an error while parsing statistic. Skipped: {group_key}") + + logger.debug(f"Parsed {len(self.statistics)} statistics") + + @timeit + def collect_values(self): + """" + Collects all of the statistics values in to a single giant numpy array + Uses a lot more memory (Basically double since the values make up the bulk of memory) + """ + lst = [i.values for i in self.statistics] + value_array = numpy.stack(lst) + return value_array + + def sort_by_timestamp(self): + """ + Ensures the statistics are correctly sorted. + In case files arent given in sequential order. + """ + self.statistics.sort(key=lambda r: r.timestamp) + + def get_statistic(self, timestamp): + """ + Returns a statistic object based on the timestamp given. + """ + for i in self.statistics: + if i.timestamp == datetime.datetime.fromisoformat(timestamp): + return i + + raise ValueError(f"No statistic with timestamp {timestamp} found, make sure to use the isoformat") + + def list_statistics(self): + """ + Returns a list of all statistics + """ + return self.statistics_dict.keys() + + def get_statistics_count(self): + """ + Simply returns the amount of statistics + """ + return len(self.statistics) + + +class statistics_data: + """ + This class takes the file and the statistics name as its __init__ arguments and then stores the + the datasets in them. + """ + + # we will be creating potentially tens of thousands of these object. Using __slots__ makes them faster and uses less memory. At the cost of + # having to list all self attributes here. + __slots__ = ("version_id", "timestamp", "station_id", "source_info_t_adc", "source_info_subband_calibrated_flag", "source_info_payload_error", + "source_info_payload_error", "source_info_payload_error", "source_info_nyquist_zone_index", "source_info_gn_index", + "source_info_fsub_type", "source_info_beam_repositioning_flag", "source_info_antenna_band_index", "source_info__raw", + "observation_id", "nof_statistics_per_packet", "nof_signal_inputs", "nof_bytes_per_statistic", "marker", "integration_interval_raw", + "integration_interval", "data_id__raw", "block_serial_number", "block_period_raw", "block_period", "data_id_signal_input_index", + "data_id_subband_index", "data_id_first_baseline", "data_id_beamlet_index", "nof_valid_payloads", "nof_payload_errors", "values", ) + + + def __init__(self, file, group_key): + + # get all the general header info + self.version_id = file[group_key].attrs["version_id"] + self.station_id = file[group_key].attrs["station_id"] + + # convert string timestamp to datetime object + self.timestamp = datetime.datetime.fromisoformat(file[group_key].attrs["timestamp"]) + + self.source_info_t_adc = file[group_key].attrs["source_info_t_adc"] + self.source_info_subband_calibrated_flag = file[group_key].attrs["source_info_subband_calibrated_flag"] + self.source_info_payload_error = file[group_key].attrs["source_info_payload_error"] + self.source_info_nyquist_zone_index = file[group_key].attrs["source_info_payload_error"] + self.source_info_gn_index = file[group_key].attrs["source_info_gn_index"] + self.source_info_fsub_type = file[group_key].attrs["source_info_fsub_type"] + self.source_info_beam_repositioning_flag = file[group_key].attrs["source_info_beam_repositioning_flag"] + self.source_info_antenna_band_index = file[group_key].attrs["source_info_antenna_band_index"] + self.source_info__raw = file[group_key].attrs["source_info__raw"] + + self.observation_id = file[group_key].attrs["observation_id"] + self.nof_statistics_per_packet = file[group_key].attrs["nof_statistics_per_packet"] + self.nof_signal_inputs = file[group_key].attrs["nof_signal_inputs"] + self.nof_bytes_per_statistic = file[group_key].attrs["nof_bytes_per_statistic"] + self.marker = file[group_key].attrs["marker"] + self.integration_interval_raw = file[group_key].attrs["integration_interval_raw"] + self.integration_interval = file[group_key].attrs["integration_interval"] + self.data_id__raw = file[group_key].attrs["data_id__raw"] + + self.block_serial_number = file[group_key].attrs["block_serial_number"] + self.block_period_raw = file[group_key].attrs["block_period_raw"] + self.block_period = file[group_key].attrs["block_period"] + + # get SST specific stuff + if self.marker == "S": + self.data_id_signal_input_index = file[group_key].attrs["data_id_signal_input_index"] + + # get XST specific stuff + if self.marker == "X": + self.data_id_subband_index = file[group_key].attrs["data_id_subband_index"] + self.data_id_first_baseline = file[group_key].attrs["data_id_first_baseline"] + + # get BST specific stuff + if self.marker == "B": + self.data_id_beamlet_index = file[group_key].attrs["data_id_beamlet_index"] + + # get the datasets + self.nof_valid_payloads = numpy.array(file.get(f"{group_key}/nof_valid_payloads")) + self.nof_payload_errors = numpy.array(file.get(f"{group_key}/nof_payload_errors")) + self.values = numpy.array(file.get(f"{group_key}/values")) + + +if __name__ == "__main__": + args = parser.parse_args() + files = args.files + end_time = args.end_time + start_time = args.start_time + + # create the parser + parser = statistics_parser() + + # set the correct time ranges + if end_time is not None: + parser.set_end_time(end_time) + if start_time is not None: + parser.set_start_time(start_time) + + # parse all the files + parser.parse_file(files) + + # for good measure sort all the statistics by timestamp. Useful when multiple files are given out of order + parser.sort_by_timestamp() + + # get a single numpy array of all the statistics stored. + array = parser.collect_values() + + logger.debug(f"Collected the statistics values of {parser.get_statistics_count()} statistics in to one gaint array of shape: {array.shape} and type: {array.dtype}") diff --git a/devices/statistics_writer/statistics_writer.py b/devices/statistics_writer/statistics_writer.py index e2d4666fd581b01cdb99e9ad717fbccd32cfa33c..594e261c6d1e00e0ea7882c595449813c305c8ce 100644 --- a/devices/statistics_writer/statistics_writer.py +++ b/devices/statistics_writer/statistics_writer.py @@ -70,5 +70,3 @@ if __name__ == "__main__": logger.info("End of input.") finally: writer.close_writer() - - diff --git a/devices/statistics_writer/test/SST_10m_test_1.h5 b/devices/statistics_writer/test/SST_10m_test_1.h5 new file mode 100644 index 0000000000000000000000000000000000000000..2d04a526e1ef73d7bd636e3b564192d95e49cef5 Binary files /dev/null and b/devices/statistics_writer/test/SST_10m_test_1.h5 differ diff --git a/devices/statistics_writer/test/SST_10m_test_2.h5 b/devices/statistics_writer/test/SST_10m_test_2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..45fd32d831508f8d632c6f1778d4d9bb73059294 Binary files /dev/null and b/devices/statistics_writer/test/SST_10m_test_2.h5 differ diff --git a/devices/statistics_writer/test/SST_10m_test_3.h5 b/devices/statistics_writer/test/SST_10m_test_3.h5 new file mode 100644 index 0000000000000000000000000000000000000000..5c971e8e2cea131d6c9ba8b7e6b1d645f205f276 Binary files /dev/null and b/devices/statistics_writer/test/SST_10m_test_3.h5 differ diff --git a/devices/statistics_writer/test/hdf5_explorer.py b/devices/statistics_writer/test/hdf5_explorer.py deleted file mode 100644 index 102c36b79f7beeb6a34ffba9b95a495a85a76f6e..0000000000000000000000000000000000000000 --- a/devices/statistics_writer/test/hdf5_explorer.py +++ /dev/null @@ -1,95 +0,0 @@ -import h5py -import numpy - -import argparse - -parser = argparse.ArgumentParser(description='Select a file to explore') -parser.add_argument('--file', type=str, help='the name and path of the file') - -import logging -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger("hdf5_explorer") -logger.setLevel(logging.DEBUG) - - -class statistics_data: - """ - Example class not used by anything - This class takes the file and the statistics name as its __init__ arguments and then stores the - the datasets in them. - """ - -class explorer: - """ - This class serves both as a tool to test and verify the content of HDF5 files as well as provide an example - of how you can go through HDF5 files. - """ - - - def __init__(self, filename): - self.file = h5py.File(filename, 'r') - - def print_all_statistics_full(self): - """ - Explores the file with knowledge of the file structure. assumes all top level groups are statistics - and that all statistics groups are made up of datasets. - Prints the groups, the datasets and the content of the datasets. - - Can easily be modified to instead of just logging all the data, store it in whatever structure is needed. - """ - - for group_key in self.file.keys(): - dataset = list(self.file[group_key]) - - #print group name - logger.debug(f" \n\ngroup: {group_key}") - - # Go through all the datasets - for i in dataset: - data = self.file.get(f"{group_key}/{i}") - logger.debug(f" dataset: {i}") - logger.debug(f" Data: {numpy.array(data)}") - - # go through all the attributes in the group (This is the header info) - attr_keys = self.file[group_key].attrs.keys() - for i in attr_keys: - attr = self.file[group_key].attrs[i] - - logger.debug(f" {i}: {attr}") - - def print_all_statistics_top_level(self): - """ - Explores the file with knowledge of the file structure. assumes all top level groups are statistics - and that all statistics groups are made up of datasets. - This function prints only the top level groups, AKA all the statistics collected. Useful when dealing with - potentially hundreds of statistics. - """ - # List all groups - logger.debug("Listing all statistics stored in this file:") - - for group_key in self.file.keys(): - logger.debug(group_key) - - -# create a data dumper that creates a new file every 10s (for testing) -if __name__ == "__main__": - args = parser.parse_args() - Explorer = explorer(args.file) - - """ - Print the entire files content - """ - Explorer.print_all_statistics_full() - - """ - Print only the names of all the statistics in this file - """ - logger.debug("--------------Top level groups--------------") - Explorer.print_all_statistics_top_level() - - - - - - - diff --git a/devices/test/base.py b/devices/test/base.py index 81a76c46e843dd7af91f19e1c142f612916157c7..66e64ea9a8669713f672db2088344d96a17f6e7c 100644 --- a/devices/test/base.py +++ b/devices/test/base.py @@ -7,10 +7,15 @@ # Distributed under the terms of the APACHE license. # See LICENSE.txt for more info. +from common.lofar_logging import configure_logger + import unittest import testscenarios import asynctest +"""Setup logging for unit tests""" +configure_logger(debug=True) + class BaseTestCase(testscenarios.WithScenarios, unittest.TestCase): """Test base class.""" diff --git a/devices/test/devices/test_statistics_collector.py b/devices/test/devices/test_statistics_collector.py index a3568b8e56452259b8754be3a76e862a20845fcb..5fe4e24dabbf169664b19250cba13f19b8020327 100644 --- a/devices/test/devices/test_statistics_collector.py +++ b/devices/test/devices/test_statistics_collector.py @@ -7,13 +7,16 @@ class TestXSTCollector(base.TestCase): def test_valid_packet(self): collector = XSTCollector() - # a valid packet as obtained from SDP, with 64-bit BE 1+1j as payload - packet = b'X\x05\x00\x00\x00\x00\x00\x00\x10\x08\x00\x02\xfa\xef\x00f\x00\x00\x0c\x08\x01 \x14\x00\x00\x01!\xd9&z\x1b\xb3' + 288 * b'\x00\x00\x00\x00\x00\x00\x00\x01' + # a valid packet as obtained from SDP, with 64-bit BE 1+1j as payload at (12,0) + packet = b'X\x05\x00\x00\x00\x00\x00\x00\x10\x08\x00\x02\xfa\xef\x00f\x0c\x00\x0c\x08\x01 \x14\x00\x00\x01!\xd9&z\x1b\xb3' + 288 * b'\x00\x00\x00\x00\x00\x00\x00\x01' # parse it ourselves to extract info nicely fields = XSTPacket(packet) fpga_index = fields.gn_index + # baseline indeed should be (12,0) + self.assertEqual((12,0), fields.first_baseline) + # this should not throw collector.process_packet(packet) @@ -41,10 +44,51 @@ class TestXSTCollector(base.TestCase): else: self.assertEqual(0+0j, xst_values[baseline_a][baseline_b], msg=f'element [{baseline_a}][{baseline_b}] was not in packet, but was written to the XST matrix.') + def test_conjugated_packet(self): + """ Test whether a packet with a baseline (a,b) with a<b will get its payload conjugated. """ + + collector = XSTCollector() + + # a valid packet as obtained from SDP, with 64-bit BE 1+1j as payload, at baseline (0,12) + # VV VV + packet = b'X\x05\x00\x00\x00\x00\x00\x00\x10\x08\x00\x02\xfa\xef\x00f\x00\x0c\x0c\x08\x01 \x14\x00\x00\x01!\xd9&z\x1b\xb3' + 288 * b'\x00\x00\x00\x00\x00\x00\x00\x01' + + # parse it ourselves to extract info nicely + fields = XSTPacket(packet) + + # baseline indeed should be (0,12) + self.assertEqual((0,12), fields.first_baseline) + + # this should not throw + collector.process_packet(packet) + + # counters should now be updated + self.assertEqual(1, collector.parameters["nof_packets"]) + self.assertEqual(0, collector.parameters["nof_invalid_packets"]) + + # check whether the data ended up in the right block, and the rest is still zero + xst_values = collector.xst_values() + + for baseline_a in range(collector.MAX_INPUTS): + for baseline_b in range(collector.MAX_INPUTS): + if baseline_b > baseline_a: + # only scan top-left triangle + continue + + # use swapped indices! + baseline_a_was_in_packet = (fields.first_baseline[1] <= baseline_a < fields.first_baseline[1] + fields.nof_signal_inputs) + baseline_b_was_in_packet = (fields.first_baseline[0] <= baseline_b < fields.first_baseline[0] + fields.nof_signal_inputs) + + if baseline_a_was_in_packet and baseline_b_was_in_packet: + self.assertEqual(1-1j, xst_values[baseline_a][baseline_b], msg=f'element [{baseline_a}][{baseline_b}] did not end up conjugated in XST matrix.') + else: + self.assertEqual(0+0j, xst_values[baseline_a][baseline_b], msg=f'element [{baseline_a}][{baseline_b}] was not in packet, but was written to the XST matrix.') + def test_invalid_packet(self): collector = XSTCollector() # an invalid packet + # V packet = b'S\x05\x00\x00\x00\x00\x00\x00\x10\x08\x00\x02\xfa\xef\x00f\x00\x00\x0c\x08\x01 \x14\x00\x00\x01!\xd9&z\x1b\xb3' + 288 * b'\x00\x00\x00\x00\x00\x00\x00\x01' # this should throw @@ -62,6 +106,7 @@ class TestXSTCollector(base.TestCase): collector = XSTCollector() # an valid packet with a payload error + # V packet = b'X\x05\x00\x00\x00\x00\x00\x00\x14\x08\x00\x02\xfa\xef\x00f\x00\x00\x0c\x08\x01 \x14\x00\x00\x01!\xd9&z\x1b\xb3' + 288 * b'\x00\x00\x00\x00\x00\x00\x00\x01' # parse it ourselves to extract info nicely diff --git a/docker-compose/jupyter.yml b/docker-compose/jupyter.yml index e7bbd5d00a3813dc0ce9562d64de77683f1eeaee..1e1deea6f0e22299544f988602efc676bbe6200c 100644 --- a/docker-compose/jupyter.yml +++ b/docker-compose/jupyter.yml @@ -25,8 +25,6 @@ services: - ${HOME}:/hosthome environment: - TANGO_HOST=${TANGO_HOST} - - XAUTHORITY=${XAUTHORITY} - - DISPLAY=${DISPLAY} ports: - "8888:8888" user: ${CONTAINER_EXECUTION_UID} diff --git a/docker-compose/jupyter/Dockerfile b/docker-compose/jupyter/Dockerfile index b69ddfa7e5b6d6eaeab11b25f99258d0f0743daa..8be3e9f3900b01e80893d38aedcb4f6397aa8fd0 100644 --- a/docker-compose/jupyter/Dockerfile +++ b/docker-compose/jupyter/Dockerfile @@ -10,6 +10,9 @@ ENV HOME=/home/user RUN sudo mkdir -p ${HOME} RUN sudo chown ${CONTAINER_EXECUTION_UID} -R ${HOME} +# ipython 7.28 is broken in combination with Jupyter, it causes connection errors with notebooks +RUN sudo pip3 install ipython==7.27.0 + RUN sudo pip3 install jupyter RUN sudo pip3 install ipykernel RUN sudo pip3 install jupyter_bokeh @@ -46,7 +49,7 @@ COPY jupyter-notebook /usr/local/bin/jupyter-notebook RUN sudo pip3 install PyMySQL[rsa] sqlalchemy # Packages to interface with testing hardware directly -RUN sudo pip3 install pyvisa pyvisa-py +RUN sudo pip3 install pyvisa pyvisa-py opcua # Add Tini. Tini operates as a process subreaper for jupyter. This prevents kernel crashes. ENV TINI_VERSION v0.6.0 diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d0c3cbf1020d5c292abdedf27627c6abe25e2293 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d49db9ded07b3dbeb1087b90b99367a465d169fe --- /dev/null +++ b/docs/README.md @@ -0,0 +1,9 @@ +To build the sphinx documentation, run: + +``` +pip3 install sphinx sphinx-rtd-theme + +make html +``` + +After which the documentation will be available in html format in the `build/html` directory. diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..cf6f1dea2270d3d372ae1fa1d7a5abc136d6d343 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,52 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +#import os +#import sys +#sys.path.insert(0, os.path.abspath('../../devices')) + + +# -- Project information ----------------------------------------------------- + +project = 'LOFAR2.0 Station Control' +copyright = '2021, Stichting ASTRON' +author = 'Stichting ASTRON' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] diff --git a/docs/source/configure_station.rst b/docs/source/configure_station.rst new file mode 100644 index 0000000000000000000000000000000000000000..412795ff05d649ab57d255f566178a50614091bb --- /dev/null +++ b/docs/source/configure_station.rst @@ -0,0 +1,70 @@ +Enter your LOFAR2.0 Hardware Configuration +=========================================== + +The software will need to be told various aspects of your station configuration, for example, the hostnames of the station hardware to control. The following settings are installation specific, and are stored as *properties* in the :ref:`tangodb`. The format used here is ``device.property``: + +Mandatory settings +------------------- + +Without these settings, you will not obtain the associated functionality: + +:RECV.OPC_Server_Name: Hostname of RECVTR. + + :type: ``string`` + +:UNB2.OPC_Server_Name: Hostname of UNB2TR. + + :type: ``string`` + +:SDP.OPC_Server_Name: Hostname of SDPTR. + + :type: ``string`` + +:SST.OPC_Server_Name: Hostname of SDPTR. + + :type: ``string`` + +:SST.FPGA_sst_offload_hdr_eth_destination_mac_RW_default: MAC address of the network interface on the host running this software stack, on which the SSTs are to be received. This network interface must be capable of receiving Jumbo (MTU=9000) frames. + + :type: ``string[N_fpgas]`` + +:SST.FPGA_sst_offload_hdr_ip_destination_address_RW_default: IP address of the network interface on the host running this software stack, on which the SSTs are to be received. + + :type: ``string[N_fpgas]`` + +:XST.OPC_Server_Name: Hostname of SDPTR. + + :type: ``string`` + +:XST.FPGA_xst_offload_hdr_eth_destination_mac_RW_default: MAC address of the network interface on the host running this software stack, on which the XSTs are to be received. This network interface must be capable of receiving Jumbo (MTU=9000) frames. + + :type: ``string[N_fpgas]`` + +:XST.FPGA_xst_offload_hdr_ip_destination_address_RW_default: IP address of the network interface on the host running this software stack, on which the XSTs are to be received. + + :type: ``string[N_fpgas]`` + +Optional settings +------------------- + +These settings make life nicer, but are not strictly necessary to get your software up and running: + +:RECV.Ant_mask_RW_default: Which antennas are installed. + + :type: ``bool[N_RCUs][N_antennas_per_RCU]`` + +:SDP.RCU_mask_RW_default: Which RCUs are installed. + + :type: ``bool[N_RCUs]`` + +:UNB2.UNB2_mask_RW_default: Which Uniboard2s are installed in SDP. + + :type: ``bool[N_unb]`` + +:SDP.TR_fpga_mask_RW_default: Which FPGAs are installed in SDP. + + :type: ``bool[N_fpgas]`` + +:SDP.FPGA_sdp_info_station_id_RW_default: Numeric identifier for this station. + + :type: ``uint32[N_fpgas]`` diff --git a/docs/source/developer.rst b/docs/source/developer.rst new file mode 100644 index 0000000000000000000000000000000000000000..517dfa324298e9451bfa5f9b25eef9726476686e --- /dev/null +++ b/docs/source/developer.rst @@ -0,0 +1,61 @@ +Developer information +========================= + +This chapter describes key areas useful for developers. + +Docker compose +------------------------- + +The docker setup is managed using ``make`` in the ``docker-compose`` directory. Key commands are: + +- ``make status`` to check which containers are running, +- ``make build <container>`` to rebuild the image for the container, +- ``make build-nocache <container>`` to rebuild the image for the container from scratch, +- ``make restart <container>`` to restart a specific container, for example to effectuate a code change. +- ``make clean`` to remove all images and containers, and the ``tangodb`` volume. To do a deeper clean, we need to remove all volumes and rebuild all containers from scratch:: + + make clean + docker volume prune + docker build-nocache + +Since the *Python code is taken from the host when the container starts*, restarting is enough to use the code you have in your local git repo. Rebuilding is unnecessary. + +Docker networking +------------------------- + +The Docker containers started use a *virtual network* to communicate among each other. This means that: + +- Containers address each other by a host name equal to the container name (f.e. ``elk`` for the elk stack, and ``databaseds`` for the TANGO_HOST), +- ``localhost`` cannot be used within the containers to access ports of other containers. +- ``host.docker.internal`` resolves to the actual host running the containers, +- All ports used by external parties need to be exposed explicitly in the docker-compose files. The container must open the same port as is thus exposed, or the port will not be reachable. + +The networks are defined in ``docker-compose/networks.yml``: + +.. literalinclude:: ../../docker-compose/networks.yml + +The ``$NETWORK_MODE`` defaults to ``tangonet`` in the ``docker-compose/Makefile``. + +.. _corba: + +CORBA +```````````````````` + +Tango devices use CORBA, which require all servers to be able to reach each other directly. Each CORBA device opens a port and advertises its address to the CORBA broker. The broker then forwards this address to any interested clients. A device within a docker container cannot know under which name it can be reached, however, and any port opened needs to be exposed explicitly in the docker-compose file for the device. To solve all this, we *assign a unique port to each device*, and explictly tell CORBA to use that port, and what the hostname is under which others can reach it. Each device thus has these lines in their compose file:: + + ports: + - "5701:5701" # unique port for this DS + entrypoint: + # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA + # can't know about our Docker port forwarding + - python3 -u /opt/lofar/tango/devices/devices/sdp/sdp.py LTS -v -ORBendPoint giop:tcp:0:5701 -ORBendPointPublish giop:tcp:${HOSTNAME}:5701 + +Specifying the wrong ``$HOSTNAME`` or port can make your device unreachable, even if it is running. Note that ``$HOSTNAME`` is advertised as is, that is, it is resolved to an IP address by any client that wants to connect. This means the ``$HOSTNAME`` needs to be correct for both the other containers, and external clients. + +The ``docker-compose/Makefile`` tries to set a good default for ``$HOSTNAME``, but you can override it by exporting the environment variable yourself (and run ``make restart <container>`` to effectuate the change). + +For more information, see: + +- https://huihoo.org/ace_tao/ACE-5.2+TAO-1.2/TAO/docs/ORBEndpoint.html +- http://omniorb.sourceforge.net/omni42/omniNames.html +- https://sourceforge.net/p/omniorb/svn/HEAD/tree/trunk/omniORB/src/lib/omniORB/orbcore/tcp/tcpEndpoint.cc diff --git a/docs/source/devices/configure.rst b/docs/source/devices/configure.rst new file mode 100644 index 0000000000000000000000000000000000000000..aa96966d2ee9d383c60e6a1651d0064bb8b914d2 --- /dev/null +++ b/docs/source/devices/configure.rst @@ -0,0 +1,63 @@ +Device Configuration +========================= + +The devices receive their configuration from two sources: + +- The TangoDB database, for static *properties*, +- Externally, from the user, or a control system, that set *control attributes* (see :doc:`devices` for what to set, and :ref:`attributes` for how to set them). + +.. _tangodb: + +TangoDB +------------------------- + +The TangoDB database is a persistent store for the properties of each device. The properties encode static settings, such as the hardware addresses, and default values for control attributes. + +Each device queries the TangoDB for the value of its properties during the ``initialise()`` call. Default values for control attributes can then be applied by explicitly calling ``set_defaults()``. The ``boot`` device also calls ``set_defaults()`` when initialising the station. The rationale being that the defaults can be applied at boot, but shouldn't be applied automatically during operations, as not to disturb running hardware. + +Device interaction +```````````````````````````` + +The properties of a device can be queried from the device directly:: + + # get a list of all the properties + property_names = device.get_property_list("*") + + # fetch the values of the given properties. returns a {property: value} dict. + property_dict = device.get_property(property_names) + +Properties can also be changed:: + + changeset = { "property": "new value" } + + device.put_property(changeset) + +Note that new values for properties will only be picked up by the device during ``initialise()``, so you will have to turn the device off and on. + +Command-line interaction +`````````````````````````` + +The content of the TangoDB can be dumped from the command line using:: + + bin/dump_ConfigDb.sh > tangodb-dump.json + +and changes can be applied using:: + + bin/update_ConfigDb.sh changeset.json + +.. note:: The ``dsconfig`` docker container needs to be running for these commands to work. + +Jive +`````````````````````````` + +The TangoDB can also be interactively queried and modified using Jive. Jive is an X11 application provided by the ``jive`` image as part of the software stack of the station. It must however be started on-demand, with a correctly configured ``$DISPLAY``:: + + cd docker-compose + make start jive + +If Jive does not appear, check ``docker logs jive`` to see what went wrong. + +For information on how to use Jive, see https://tango-controls.readthedocs.io/en/latest/tools-and-extensions/built-in/jive/. + +.. note:: If you need an X11 server on Windows, see :ref:`x11_on_windows`. + diff --git a/docs/source/devices/devices.rst b/docs/source/devices/devices.rst new file mode 100644 index 0000000000000000000000000000000000000000..1c6090bef3066def70a032b191688d8d0444cb03 --- /dev/null +++ b/docs/source/devices/devices.rst @@ -0,0 +1,179 @@ +Devices +============ + +.. _boot: + +Boot +--------- + +The ``boot == DeviceProxy("LTS/Boot/1")`` device is responsible for (re)starting and initialising the other devices. Devices which are not reachable, for example because their docker container is explicitly stopped, are skipped during initialisation. This device provides the following commands: + +:initialise_station(): Stop and start the other devices in the correct order, set their default values, and command them to initialise their hardware. This procedure runs asynchronously, causing this command to return immediately. Initialisation is aborted if an error is encountered. + + :returns: ``None`` + +The initialisation process can subsequently be followed through monitoring the following attributes: + +:initialising_R: Whether the initialisation procedure is still ongoing. + + :type: ``bool`` + +:initialisation_progress_R: Percentage completeness of the initialisation procedure. Each succesfully configured device increments progress. + + :type: ``int`` + +:initialisation_status_R: A description of what the device is currently trying to do. If an error occurs, this will hint towards the cause. + + :type: ``str`` + +A useful pattern is thus to call ``initialise_station()``, wait for ``initialising_R == False``, and then check whether the initalisation was succesful, if ``initialisation_progress_R == 100``. If a device fails to initialise, most likely the :doc:`../interfaces/logs` will need to be consulted. + +.. _docker: + +Docker +--------- + +The ``docker == DeviceProxy("LTS/Docker/1")`` device controls the docker containers. It allows starting and stopping them, and querying whether they are running. Each container is represented by two attributes: + +:<container>_R: Returns whether the container is running. + + :type: ``bool`` + +:<container>_RW: Set to ``True`` to start the container, and to ``False`` to stop it. + + :type: ``bool`` + +.. warning:: Do *not* stop the ``tango`` container, as doing so cripples the Tango infrastructure, leaving the station inoperable. It is also not wise to stop the ``device_docker`` container, as doing so would render this device unreachable. + + +RECV +---------- + +The ``recv == DeviceProxy("LTS/RECV/1")`` device controls the RCUs, the LBA antennas, and HBA tiles. Central to its operation are the masks (see also :ref:`attribute-masks`): + +:RCU_mask_RW: Controls which RCUs will actually be configured when attributes referring to RCUs are written. + + :type: ``bool[N_RCUs]`` + +:Ant_mask_RW: Controls which antennas will actually be configured when attributes referring to antennas are written. + + :type: ``bool[N_RCUs][N_antennas_per_RCU]`` + +Typically, ``N_RCUs == 32``, and ``N_antennas_per_RCU == 3``. + +SDP +----------- + +The ``sdp == DeviceProxy("LTS/SDP/1")``` device controls the digital signal processing in SDP, performed by the firmware on the FPGAs on the Uniboards. Central to its operation is the mask (see also :ref:`attribute-masks`): + +:TR_fpga_mask_RW: Controls which FPGAs will actually be configured when attributes referring to FPGAs are written. + + :type: ``bool[N_fpgas]`` + +Typically, ``N_fpgas == 16``. + +SST and XST +----------- + +The ``sst == DeviceProxy("LTS/SST/1")`` and ``xst == DeviceProxy("LTS/XST/1")`` devices manages the SSTs (subband statistics) and XSTs (crosslet statistics), respectively. The statistics are emitted piece-wise through UDP packets by the FPGAs on the Uniboards in SDP. By default, each device configures the statistics to be streamed to itself (the device), from where the user can obtain them. + +The statistics are exposed in two ways, as: + +- *Attributes*, representing the most recently received values, +- *TCP stream*, to allow the capture and recording of the statistics over any period of time. + +SST Statistics attributes +````````````````````````` + +The SSTs represent the amplitude of the signal in each subband, for each antenna, as an integer value. They are exposed through the following attributes: + +:sst_R: Amplitude of each subband, from each antenna. + + :type: ``uint64[N_ant][N_subbands]`` + +:sst_timestamp_R: Timestamp of the data, per antenna. + + :type: ``uint64[N_ant]`` + +:integration_interval_R: Timespan over which the SSTs were integrated, per antenna. + + :type: ``float32[N_ant]`` + +:subbands_calibrated_R: Whether the subband data was calibrated using the subband weights. + + :type: ``bool[N_ant]`` + +Typically, ``N_ant == 192``, and ``N_subbands == 512``. + +XST Statistics attributes +````````````````````````` + +The XSTs represent the cross-correlations between each pair of antennas, as complex values. The phases and amplitudes of the XSTs represent the phase and amplitude difference between the antennas, respectively. They are exposed as a matrix ``xst[a][b]``, of which only the triangle ``a<=b`` is filled, as the cross-correlation between antenna pairs ``(b,a)`` is equal to the complex conjugate of the cross-correlation of ``(a,b)``. The other triangle contains incidental values, but will be mostly 0. + +Complex values which cannot be represented in Tango attributes. Instead, the XST matrix is exposed as both their carthesian and polar parts: + +:xst_power_R, xst_phase_R: Amplitude and phase of the crosslet statistics. + + :type: ``float32[N_ant][N_ant]`` + +:xst_real_R, xst_imag_R: Real and imaginary parts of the crosslet statistics. + + :type: ``float32[N_ant][N_ant]`` + +:xst_timestamp_R: Timestamp of each block. + + :type: ``int64[N_blocks]`` + +:integration_interval_R: Timespan over which the XSTs were integrated, for each block. + + :type: ``float32[N_blocks]`` + +Typically, ``N_ant == 192``, and ``N_blocks == 136``. + +The metadata refers to the *blocks*, which are emitted by the FPGAs to represent the XSTs between 12 x 12 consecutive antennas. The following code converts block numbers to the indices of the first antenna pair in a block:: + + from common.baselines import baseline_from_index + + def first_antenna_pair(block_nr: int) -> int: + coarse_a, coarse_b = baseline_from_index(block_nr) + return (coarse_a * 12, coarse_b * 12) + +Conversely, to calculate the block index for an antenna pair ``(a,b)``, use:: + + from common.baselines import baseline_index + + def block_nr(a: int, b: int) -> int: + return baseline_index(a // 12, b // 12) + +TCP stream +`````````` + +The TCP stream interface allows a user to subscribe to the statistics packet streams, combined into a single TCP stream. The statistics will be streamed until the user disconnects, or the device is turned off. Any number of subscribers is supported, as bandwidth allows. Simply connect to the following port: + ++----------+----------------+ +| Device | TCP end point | ++==========+================+ +| SST | localhost:5101 | ++----------+----------------+ +| XST | localhost:5102 | ++----------+----------------+ + +The easiest way to capture this stream is to use our ``statistics_writer``, which will capture the statistics and store them in HDF5 file(s). The writer: + +- computes packet boundaries, +- processes the data of each packet, and stores their values into the matrix relevant for the mode, +- stores a matrix per timestamp, +- stores packet header information per timestamp, as HDF5 attributes, +- writes to a new file at a configurable interval. + +To run the writer:: + + cd devices/statistics_writer + python3 statistics_writer.py --mode SST --host localhost + +The correct port will automatically be chosen, depending on the given mode. See also ``statistics_writer.py -h`` for more information. + +The writer can also parse a statistics stream stored in a file. This allows the stream to be captured and processed independently. Capturing the stream can for example be done using ``netcat``:: + + nc localhost 5101 > SST-packets.bin + diff --git a/docs/source/devices/using.rst b/docs/source/devices/using.rst new file mode 100644 index 0000000000000000000000000000000000000000..8c2a58ca814fdea541e8e5dbcbe5b9ae189b5e84 --- /dev/null +++ b/docs/source/devices/using.rst @@ -0,0 +1,143 @@ +Using Devices +============= + +The station exposes *devices*, each of which is a remote software object that manages part of the station. Each device has the following properties: + +- It has a *state*, +- Many devices manage and represent hardware in the station, +- It exposes *read-only attributes*, that expose values from within the device or from the hardware it represents, +- It exposes *read-write attributes*, that allow controlling the functionality of the device, or the hardware it represents, +- It exposes *properties*, which are fixed configuration parameters (such as port numbers and timeouts), +- It exposes *commands*, that request the execution of a procedure in the device or in the hardware it manages. + +The devices are accessed remotely using ``DeviceProxy`` objects. See :doc:`../interfaces/control` on how to do this. + +States +------------ + +The state of a device is then queried with ``device.state()``. Each device can be in one of the following states: + +- ``DevState.OFF``: The device is not operating, +- ``DevState.INIT``: The device is being initialised, +- ``DevState.STANDBY``: The device is initialised and ready to be configured further, +- ``DevState.ON``: The device is operational. +- ``DevState.FAULT``: The device is malfunctioning. Functionality cannot be counted on. +- The ``device.state()`` function can throw an error, if the device cannot be reached at all. For example, because it's docker container is not running. See the :ref:`docker` device on how to start it. + +Each device provides the following commands to change the state: + +:off(): Turn the device ``OFF`` from any state. + +:initialise(): Initialise the device from the ``OFF`` state, to bring it to the ``STANDBY`` state. + +:on(): Mark the device as operational, from the ``STANDBY`` state, bringing it to ``ON``. + +The following procedure is a good way to bring a device to ``ON`` from any state:: + + def force_start(device): + if device.state() == DevState.FAULT: + device.off() + if device.state() == DevState.OFF: + device.initialise() + if device.state() == DevState.STANDBY: + device.on() + + return device.state() + +.. hint:: If a command gives you a timeout, the command will still be running until it finishes. You just won't know when it does or its result. In order to increase the timeout, use ``device.set_timeout_millis(timeout * 1000)``. + +FAULT +`````````` + +If a device enters the ``FAULT`` state, it means an error occurred that is fundamental to the operation of the software device. For example, the connection +to the hardware was lost. + +Interaction with the device in the ``FAULT`` state is undefined, and attributes cannot be read or written. The device needs to be reinitialised, which +typically involves the following sequence of commands:: + + # turn the device off completely first. + device.off() + + # setup any connections and threads + device.initialise() + + # turn on the device + device.on() + +Of course, the device could go into ``FAULT`` again, even during the ``initialise()`` command, for example because the hardware it manages is unreachable. To debug the fault condition, check the :doc:`../interfaces/logs` of the device in question. + +Initialise hardware +```````````````````` + +Most devices provide the following commands, in order to configure the hardware with base settings: + +:set_defaults(): Upload default attribute settings from the TangoDB to the hardware. + +:initialise_hardware(): For devices that control hardware, this command runs the hardware initialisation procedure. + +Typically, ``set_defaults()`` and ``initialise_hardware()`` are called in that order in the ``STANDBY`` state. The :ref:`boot` device runs these commands as part of its station initialsation sequence. + +.. _attributes: + +Attributes +------------ + +The device can be operated in ``ON`` state, where it exposes *attributes* and *commands*. The attributes can be accessed as python properties, for example:: + + recv = DeviceProxy("LTS/RECV/1") + + # turn on all LED0s + recv.RCU_LED0_RW = [True] * 32 + + # retrieve the status of all LED0s + print(recv.RCU_LED0_R) + +The attributes with an: + +- ``_R`` suffix are monitoring points, reflecting the state of the hardware, and are thus read-only. +- ``_RW`` suffix are control points, reflecting the desired state of the hardware. They are read-write, where writing requests the hardware to set the specified value. Reading them returns the last requested value. + +Meta data +````````````` + +A description of the attribute can be retrieved using:: + + print(recv.get_attribute_config("RCU_LED0_R").description) + +.. _attribute-masks: + +Attribute masks +--------------------- + +Several devices employ *attribute masks* in order to toggle which elements in their hardware array are actually to be controlled. This construct is necessary as most control points consist of arrays of values that cover all hardware elements. These array control points are always fully sent: it is not possible to update only a single element without uploading the rest. Without a mask, it is impossible to control a subset of the hardware. + +The masks only affect *writing* to attributes. Reading attributes (monitoring points) always result in data for all elements in the array. + +For example, the ``RCU_mask_RW`` array is the RCU mask in the ``recv`` device. It behaves as follows, when we interact with the ``RCU_LED0_R(W)`` attributes:: + + recv = DeviceProxy("LTS/RECV/1") + + # set mask to control all RCUs + recv.RCU_mask_RW = [True] * 32 + + # request to turn off LED0 for all RCUs + recv.RCU_LED0_RW = [False] * 32 + + # <--- all LED0s are now off + # recv.RCU_LED0_R should show this, + # if you have the RCU hardware installed. + + # set mask to only control RCU 3 + mask = [False] * 32 + mask[3] = True + recv.RCU_mask_RW = mask + + # request to turn on LED0, for all RCUs + # due to the mask, only LED0 on RCU 3 + # will be set. + recv.RCU_LED0_RW = [True] * 32 + + # <--- only LED0 on RCU3 is now on + # recv.RCU_LED0_R should show this, + # if you have the RCU hardware installed. + diff --git a/docs/source/faq.rst b/docs/source/faq.rst new file mode 100644 index 0000000000000000000000000000000000000000..367492e002e5d0d4bf20442c6e5e596ef78b852f --- /dev/null +++ b/docs/source/faq.rst @@ -0,0 +1,145 @@ +FAQ +=================================== + +Connecting to devices +-------------------------------------------------------------------------------------------------------------- + +My device is unreachable, but the device logs say it's running fine? +`````````````````````````````````````````````````````````````````````````````````````````````````````````````` + +The ``$HOSTNAME`` may have been incorrectly guessed by ``docker-compose/Makefile``, or you accidently set it to an incorrect value. See :ref:`corba`. + +I get "API_CorbaException: TRANSIENT CORBA system exception: TRANSIENT_NoUsableProfile" when trying to connect to a device? +```````````````````````````````````````````````````````````````````````````````````````````````````````````````````````````````` + +The ``$HOSTNAME`` may have been incorrectly guessed by ``docker-compose/Makefile``, or you accidently set it to an incorrect value. See :ref:`corba`. + +Docker +-------------------------------------------------------------------------------------------------------------- + +How do I prevent my containers from starting when I boot my computer? +```````````````````````````````````````````````````````````````````````````````````````````````````````````````````````````````` + +You have to explicitly stop a container to prevent it from restarting. Use:: + + cd docker-compose + make stop <container> + +or plain ``make stop`` to stop all of them. + +Windows +-------------------------------------------------------------------------------------------------------------- + +How do I develop from Windows? +`````````````````````````````````````````````````````````````````````````````````````````````````````````````` + +Our setup is Linux-based, so the easiest way to develop is by using WSL2, which lets you run a Linux distro under Windows. You'll need to: + +- Install WSL2. See f.e. https://www.omgubuntu.co.uk/how-to-install-wsl2-on-windows-10 +- Install `Docker Desktop <https://hub.docker.com/editions/community/docker-ce-desktop-windows/>`_ +- Enable the WSL2 backend in Docker Desktop +- We also recommend to install `Windows Terminal <https://www.microsoft.com/en-us/p/windows-terminal/9n0dx20hk701>`_ + +.. _x11_on_windows: + +How do I run X11 applications on Windows? +`````````````````````````````````````````````````````````````````````````````````````````````````````````````` +If you need an X11 server on Windows: + +- Install `VcXsrv <https://sourceforge.net/projects/vcxsrv/>`_ +- Disable access control during its startup, +- Use ``export DISPLAY=host.docker.internal:0`` in WSL. + +You should now be able to run X11 applications from WSL and Docker. Try running ``xterm`` or ``xeyes`` to test. + + +SSTs/XSTs +-------------------------------------------------------------------------------------------------------------- + +Some SSTs/XSTs packets do arrive, but not all, and/or the matrices remain zero? +`````````````````````````````````````````````````````````````````````````````````````````````````````````````` + +So ``sst.nof_packets_received`` / ``xst.nof_packets_received`` is increasing, telling you packets are arriving. But they're apparently dropped or contain zeroes. First, check the following settings: + +- ``sdp.TR_fpga_mask_RW[x] == True``, to make sure we're actually configuring the FPGAs, +- ``sdp.FPGA_wg_enable_RW[x] == False``, or the Waveform Generator might be replacing our the antenna data with zeroes, +- ``sdp.FPGA_processing_enabled_R[x] == True``, to verify that the FPGAs are processing, or the values and timestamps will be zero, +- For XSTs, ``xst.FPGA_xst_processing_enabled_R[x] == True``, to verify that the FPGAs are computing XSTs, or the values will be zero. + +Furthermore, the ``sst`` and ``xst`` devices expose several packet counters to indicate where incoming packets were dropped before or during processing: + +- ``nof_invalid_packets_R`` increases if packets arrive with an invalid header, or of the wrong statistic for this device, +- ``nof_packets_dropped_R`` increases if packets could not be processed because the processing queue is full, so the CPU cannot keep up with the flow, +- ``nof_payload_errors_R`` increases if the packet was marked by the FPGA to have an invalid payload, which causes the device to discard the packet, + +I am not receiving any XSTs and/or SSTs packets from SDP! +`````````````````````````````````````````````````````````````````````````````````````````````````````````````` + +Are you sure? If ``sst.nof_packets_received`` / ``xst.nof_packets_received`` is actually increasing, the packets are arriving, but are not parsable by the SST/XST device. If so, see the previous question. + +Many settings need to be correct for the statistics emitted by the SDP FPGAs to reach our devices correctly. Here is a brief overview: + +- ``sdp.TR_fpga_mask_RW[x] == True``, to make sure we're actually configuring the FPGAs, +- ``sdp.FPGA_communication_error_R[x] == False``, to verify the FPGAs can be reached by SDP, +- SSTs: + + - ``sst.FPGA_sst_offload_enable_RW[x] == True``, to verify that the FPGAs are actually emitting the SSTs, + - ``sst.FPGA_sst_offload_hdr_eth_destination_mac_R[x] == <MAC of your machine's mtu=9000 interface>``, or the FPGAs will not send it to your machine. Use f.e. ``ip addr`` on the host to find the MAC address of your interface, and verify that its MTU is 9000, + - ``sst.FPGA_sst_offload_hdr_ip_destination_address_R[x] == <IP of your machine's mtu=9000 interface>``, or the packets will be dropped by the network or the kernel of your machine, + - ``sst.FPGA_sst_offload_hdr_ip_destination_address_R[x] == 5001``, or the packets will not be sent to a port that the SST device listens on. + +- XSTs: + + - ``xst.FPGA_sst_offload_enable_RW[x] == True``, to verify that the FPGAs are actually emitting the SSTs, + - ``xst.FPGA_xst_offload_hdr_eth_destination_mac_R[x] == <MAC of your machine's mtu=9000 interface>``, or the FPGAs will not send it to your machine. Use f.e. ``ip addr`` on the host to find the MAC address of your interface, and verify that its MTU is 9000, + - ``xst.FPGA_xst_offload_hdr_ip_destination_address_R[x] == <IP of your machine's mtu=9000 interface>``, or the packets will be dropped by the network or the kernel of your machine, + - ``xst.FPGA_xst_offload_hdr_ip_destination_address_R[x] == 5002``, or the packets will not be sent to a port that the XST device listens on. + +If this fails, see the next question. + +I am still not receiving XSTs and/or SSTs, even though the settings appear correct! +`````````````````````````````````````````````````````````````````````````````````````````````````````````````` + +Let's see where the packets get stuck. Let us assume your MTU=9000 network interface is called ``em2`` (see ``ip addr`` to check): + +- Check whether the data arrives on ``em2``. Run ``tcpdump -i em2 udp -nn -vvv -c 10`` to capture the first 10 packets. Verify: + + - The destination MAC must match that of ``em2``, + - The destination IP must match that of ``em2``, + - The destination port is correct (5001 for SST, 5002 for XST), + - The source IP falls within the netmask of ``em2`` (unless ``net.ipv4.conf.em2.rp_filter=0`` is configured), + - TTL >= 2, + +- If you see no data at all, the network will have swallowed it. Try to use a direct network connection, or a hub (which broadcasts all packets, unlike a switch), to see what is being emitted by the FPGAs. +- Check whether the data reaches user space on the host: + + - Turn off the ``sst`` or ``xst`` device. This will not stop the FPGAs from sending. + - Run ``nc -u -l -p 5001 -vv`` (or port 5002 for XSTs). You should see raw packets being printed. + - If not, the Linux kernel is swallowing the packets, even before it can be sent to our docker container. + +- Check whether the data reaches kernel space in the container: + + - Enter the docker device by running ``docker exec -it device-sst bash``. + - Run ``sudo bash`` to become root, + - Run ``apt-get install -y tcpdump`` to install tcpdump, + - Check whether packets arrive using ``tcpdump -i eth0 udp -c 10 -nn``, + - If not, Linux is not routing the packets to the docker container. + +- Check whether the data reaches user space in the container: + + - Turn off the ``sst`` or ``xst`` device. This will not stop the FPGAs from sending. + - Enter the docker device by running ``docker exec -it device-sst bash``. + - Run ``sudo bash`` to become root, + - Run ``apt-get install -y netcat`` to install netcat, + - Check whether packets arrive using ``nc -u -l -p 5001 -vv`` (or port 5002 for XSTs), + - If not, Linux is not routing the packets to the docker container correctly. + +- If still on error was found, you've likely hit a bug in our software. + +Other containers +-------------------------------------------------------------------------------------------------------------- + +The ELK container won't start, saying "max virtual memory areas vm.max_map_count [65530] is too low"? +`````````````````````````````````````````````````````````````````````````````````````````````````````````````` + +The ELK stack needs the ``vm.max_map_count`` sysctl kernel parameter to be at least 262144 to run. See :ref:`elk-kernel-settings`. diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..524d21369c9e0ded662f12a365d479ce3dc39abc --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,34 @@ +.. LOFAR2.0 Station Control documentation master file, created by + sphinx-quickstart on Wed Oct 6 13:31:53 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to LOFAR2.0 Station Control's documentation! +==================================================== + +LOFAR2.0 Station Control is a software stack aimed to monitor, control, and manage a LOFAR2.0 station. In order to do so, it whips up a series of Docker containers, and combines the power of `Tango Controls <https://www.tango-controls.org/>`_, `PyTango <https://pytango.readthedocs.io/en/stable/>`_, `Docker <https://www.docker.com/>`_, `Grafana <https://grafana.com/>`_, `ELK <https://www.elastic.co/what-is/elk-stack>`_, `Jupyter Notebook <https://jupyter.org/>`_, and many others to provide a rich and powerful experience in using the station. + +Full monitoring and control access to the LOFAR2.0 station hardware is provided, by marshalling their rich `OPC-UA <https://opcfoundation.org/about/opc-technologies/opc-ua/>`_ interfaces. Higher-level logic makes it possible to easily configure and obtain the LOFAR station data products (beamlets, XSTs, SSTs, BSTs) from your local machine using Python, or through one of our provided web interfaces. + +Even without having access to any LOFAR2.0 hardware, you can install the full stack on your laptop, and experiment with the software interfaces. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + installation + interfaces/overview + devices/using + devices/devices + devices/configure + configure_station + developer + faq + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/installation.rst b/docs/source/installation.rst new file mode 100644 index 0000000000000000000000000000000000000000..cb0122ae95cc01de7f55e333345a6ec4d41bc369 --- /dev/null +++ b/docs/source/installation.rst @@ -0,0 +1,89 @@ +Installation +================== + +You will need the following dependencies installed: + +- docker +- docker-compose +- git +- make + +You start with checking out the source code, f.e. the master branch, as well as the git submodules we use:: + + git clone https://git.astron.nl/lofar2.0/tango.git + cd tango + git submodule init + git submodule update + +Next, we bootstrap the system. This will build our docker images, start key ones, and load the base configuration. This may take a while:: + + cd docker-compose + make bootstrap + +If you lack access to LOFAR station hardware, load additional configurations to use the simulators instead:: + + for sim in ../CDB/*-sim-config.json; do + ../sbin/update_ConfigDb.sh ../CDB${sim}-config.json + done + +If you do have access to LOFAR station hardware, you will have to :doc:`configure_station`. + +Now we are ready to start the other containers:: + + make start + +and make sure they are all up and running:: + + make status + +You should see the following state: + +- Containers ``astor``, ``hdbpp-viewer``, ``jive``, ``log-viewer`` and ``pogo`` will have State ``Exit 1``. These are containers that are interactive X11 tools, and not needed for now, +- Other containers have either State ``Up`` or ``Exit 0``. + +If not, you can inspect why with ``docker logs <container>``. Note that the containers will automatically be restarted on failure, and also if you reboot. Stop them explicitly to bring them down (``make stop <container>``). + +Post-boot Initialisation +--------------------------- + +After bootstrapping, and after a reboot, the software and hardware of the station needs to be explicitly initialised. Note that the docker containers do restart automatically at system boot. + +The following commands start all the software devices to control the station hardware, and initialise the hardware with the configured default settings. Go to http://localhost:8888, start a new *Station Control* notebook, and initiate the software boot sequence:: + + # reset our boot device + boot.off() + assert boot.state() == DevState.OFF + boot.initialise() + assert boot.state() == DevState.STANDBY + boot.on() + assert boot.state() == DevState.ON + + # start and initialise the other devices + boot.initialise_station() + + # wait for the devices to be initialised + import time + + while boot.initialising_station_R: + print(f"Still initialising station. {boot.initialisation_progress_R}% complete. State: {boot.initialisation_status_R}") + time.sleep(1) + + # print conclusion + if boot.initialisation_progress_R == 100: + print("Done initialising station.") + else: + print(f"Failed to initialise station: {boot.initialisation_status_R}") + +See :ref:`boot` for more information on the ``boot`` device. + +.. _elk-kernel-settings: + +ELK +```` + +The ELK stack requires some kernel settings to be tuned, before it will start. Although ``make bootstrap`` configures the kernel, these settings will not stick after a reboot. You will need to run either:: + + make start elk-configure-host + make restart elk + +after reboot, or configure your system to set ``sysctl -w vm.max_map_count=262144`` (or higher) as root during boot. diff --git a/docs/source/interfaces/control.rst b/docs/source/interfaces/control.rst new file mode 100644 index 0000000000000000000000000000000000000000..3c514f11d7a3e5a4bbc1c7339bac3bed0820d70f --- /dev/null +++ b/docs/source/interfaces/control.rst @@ -0,0 +1,84 @@ +Monitoring & Control +======================== + +The main API to control the station is through the `Tango Controls <https://tango-controls.readthedocs.io/en/latest/>`_ API we expose on port 10000, which is most easily accessed using a `PyTango <https://pytango.readthedocs.io/en/stable/client_api/index.html>`_ client. The Jupyter Notebook installation we provide is such a client. + +.. _jupyter: + +Jupyter Notebooks +------------------------ + +The station offers Juypyter notebooks On http://localhost:8888, which allow one to interact with the station, for example to set control points, access monitoring points, or to graph their values. + +The notebooks provide some predefined variables, so you don't have to look them up: + +.. literalinclude:: ../../../docker-compose/jupyter/ipython-profiles/stationcontrol-jupyter/startup/01-devices.py + +Note: the Jupyter notebooks use enhancements from the ``itango`` suite, which provide tab completions, but also the ``Device`` alias for ``DeviceProxy`` as was used in the Python examples in the next section. + +For example, you can start a new *Station Control* notebook (File->New Notebook->StationControl), and access these devices: + +.. image:: jupyter_basic_example.png + +.. _pytango-section: + +PyTango +------------------------ + +To access a station from scratch using Python, we need to install some dependencies:: + + pip3 install tango + +Then, if we know what devices are available on the station, we can access them directly:: + + import tango + import os + + # Tango needs to know where our Tango API is running. + os.environ["TANGO_HOST"] = "localhost:10000" + + # Construct a remote reference to a specific device. + # One can also use "tango://localhost:10000/LTS/Boot/1" if TANGO_HOST is not set + boot_device = tango.DeviceProxy("LTS/Boot/1") + + # Print the device's state. + print(boot_device.state()) + +To obtain a list of all devices, we need to access the database:: + + import tango + + # Tango needs to know where our Tango API is running. + import os + os.environ["TANGO_HOST"] = "localhost:10000" + + # Connect to the database. + db = tango.Database() + + # Retrieve the available devices, excluding any Tango-internal ones. + # This returns for example: ['LTS/Boot/1', 'LTS/Docker/1', ...] + devices = list(db.get_device_exported("LTS/*")) + + # Connect to any of them. + any_device = tango.DeviceProxy(devices[0]) + + # Print the device's state. + print(any_device.state()) + +.. _rest-api: + +ReST API +------------------------ + +We also provide a ReST API to allow the station to be controlled without needing to use the Tango API. The root access point is http://localhost:8080/tango/rest/v10/hosts/databaseds;port=10000/ (credentials: tango-cs/tango). This API allows for: + +- getting and setting attribute values, +- calling commands, +- retrieving the device state, +- and more. + +For example, retrieving http://localhost:8080/tango/rest/v10/hosts/databaseds;port=10000/devices/LTS/SDP/1/state returns the following JSON document:: + + {"state":"ON","status":"The device is in ON state."} + +For a full description of this API, see https://tango-rest-api.readthedocs.io/en/latest/. diff --git a/docs/source/interfaces/elk_last_hour.png b/docs/source/interfaces/elk_last_hour.png new file mode 100644 index 0000000000000000000000000000000000000000..d6f2a73c9ba754a5a6d5aeece1382906040acb15 Binary files /dev/null and b/docs/source/interfaces/elk_last_hour.png differ diff --git a/docs/source/interfaces/elk_log_fields.png b/docs/source/interfaces/elk_log_fields.png new file mode 100644 index 0000000000000000000000000000000000000000..c5774931f23933be6033e396220b2459409b1def Binary files /dev/null and b/docs/source/interfaces/elk_log_fields.png differ diff --git a/docs/source/interfaces/grafana_dashboard_1.png b/docs/source/interfaces/grafana_dashboard_1.png new file mode 100644 index 0000000000000000000000000000000000000000..448a9bd993b264cf35e98229f12829256f775029 Binary files /dev/null and b/docs/source/interfaces/grafana_dashboard_1.png differ diff --git a/docs/source/interfaces/grafana_dashboard_2.png b/docs/source/interfaces/grafana_dashboard_2.png new file mode 100644 index 0000000000000000000000000000000000000000..d7c34991d97cd22a209d1f02502afa1f439acf4e Binary files /dev/null and b/docs/source/interfaces/grafana_dashboard_2.png differ diff --git a/docs/source/interfaces/jupyter_basic_example.png b/docs/source/interfaces/jupyter_basic_example.png new file mode 100644 index 0000000000000000000000000000000000000000..c7e35204cc72b63e8ea2d81c2bdad337d3ce72a1 Binary files /dev/null and b/docs/source/interfaces/jupyter_basic_example.png differ diff --git a/docs/source/interfaces/logs.rst b/docs/source/interfaces/logs.rst new file mode 100644 index 0000000000000000000000000000000000000000..2b5c605ec5e47cf8b98b09dba47f6e6954f468ba --- /dev/null +++ b/docs/source/interfaces/logs.rst @@ -0,0 +1,44 @@ +Logs +================== + +The devices, and the docker containers in general, produce logging output. The easiest way to access the logs of a specific container is to ask docker directly. For example, to access and follow the most recent logs of the ``device-sdp`` container, execute on the host:: + + docker logs -n 100 -f device-sdp + +This is mostly useful for interactive use. + +.. _elk: + +ELK +------------------ + +To monitor the logs remotely, or to browse older logs, use the *ELK stack* that is included on the station, and served on http://localhost:5601. ELK, or ElasticSearch + Logstash + Kibana, is a popular log collection and querying system. Currently, the following logs are collected in our ELK installation: + +- Logs of all devices, +- Logs of the Jupyter notebook server. + +If you browse to the ELK stack (actually, it is Kibana providing the GUI), your go-to is the *Discover* view at http://localhost:5601/app/discover. There, you can construct (and save, load) a dashboard that provides a custom view of the logs, based on the *index pattern* ``logstash-*``. There is a lot to take in, and there are excellent Kibana tutorials on the web. + +To get going, use for example `this dashboard <http://localhost:5601/app/discover#/?_g=(filters:!(),refreshInterval:(pause:!t,value:0),time:(from:now-60m,to:now))&_a=(columns:!(extra.tango_device,level,message),filters:!(),index:'1e8ca200-1be0-11ec-a85f-b97e4206c18b',interval:auto,query:(language:kuery,query:''),sort:!())>`_, which shows the logs of the last hour, with some useful columns added to the default timestamp and message columns. Expand the time range if no logs appear, to look further back. You should see something like: + +.. image:: elk_last_hour.png + +ELK allows you to filter, edit the columns, and a lot more. We enrich the log entries with several extra fields, for example the device that generated it, and stack traces if available. Click on the ``>`` before a log entry and the information expands, showing for example: + +.. image:: elk_log_fields.png + +Furthermore, statistics from the ELK stack, such as the number of ERROR log messages, are made available as a data source in :doc:`monitoring`. + +LogViewer +------------------ + +For each device, Tango collects the logs as well. These can be viewed with the LogViewer X11 application. Make sure ``$DISPLAY`` is set, and run:: + + cd docker-compose + make start logviewer + +If LogViewer does not appear, check ``docker logs logviewer`` to see what went wrong. + +For information on how to use the LogViewer, see https://tango-controls.readthedocs.io/en/latest/tools-and-extensions/built-in/logviewer/logviewer.html. + +.. note:: If you need an X11 server on Windows, see :ref:`x11_on_windows`. diff --git a/docs/source/interfaces/monitoring.rst b/docs/source/interfaces/monitoring.rst new file mode 100644 index 0000000000000000000000000000000000000000..7d8a85fdf5bd7c103119a89a8dbae127040a5240 --- /dev/null +++ b/docs/source/interfaces/monitoring.rst @@ -0,0 +1,51 @@ +Monitoring GUIs +======================== + +Each device exposes a list of monitoring points as attributes with the ``_R`` prefix. These can be accessed interactively from a controle console (such as Jupyter), but that will not scale. + +Grafana +------------------------ + +We offer `Grafana <https://grafana.com/>`_ dashboards on http://localhost:3000 that provide a quick overview of the station's status, including temperatures and settings. Several dashboards are included. An example: + +.. image:: grafana_dashboard_1.png +.. image:: grafana_dashboard_2.png + +NOTE: These dashboards are highly subject to change. The above examples provide an impression of a possible overview of the station state. + +You are encouraged to inspect each panel (graph) to see the underlying database query and settings. Use the small arrow in the panel's title to get a drop-down menu of options, and select *inspect*. See the Grafana documentation for further information. + +The Grafana dashboards are configured with the following data sources: + +- :ref:`prometheus-section`, the time-series database that caches the latest values of all monitoring points (see next section), +- *Archiver DB*, the database that provides a long-term cache of attributes, +- :ref:`tangodb`, providing access to device properties (fixed settings), +- :ref:`elk`, the log output of the devices. + +.. _prometheus-section: + +Prometheus +------------------------- + +`Prometheus <https://prometheus.io/docs/introduction/overview/>`_ is a low-level monitoring system that allows us to periodically retrieve the values of all the attributes of all our devices, and cache them to be used in Grafana: + +- Every several seconds, Prometheus scrapes our `TANGO-Grafana Exporter <https://git.astron.nl/lofar2.0/ska-tango-grafana-exporter>`_ (our fork of https://gitlab.com/ska-telescope/TANGO-grafana.git), collecting all values of all the device attributes (except the large ones, for performance reasons). +- Prometheus can be queried directly on http://localhost:9090, +- The TANGO-Grafana Exporter can be queried directly on http://localhost:8000, +- The query language is `PromQL <https://prometheus.io/docs/prometheus/latest/querying/basics/>`_, which is also used in Grafana to query Prometheus, + +Prometheus stores attributes in the following format:: + + device_attribute{device="lts/recv/1", + dim_x="32", dim_y="0", + instance="tango-prometheus-exporter:8000", + job="tango", + label="RCU_temperature_R", + name="RCU_temperature_R", + type="float", + x="00", y="0"} + +The above describes a single data point and its labels. The primary identifying labels are ``device`` and ``name``. Each point furthermore has a value (integer) and a timestamp. The following transformations take place: + +- For 1D and 2D attributes, each array element is its own monitoring point, with ``x`` and ``y`` labels describing the indices. The labels ``dim_x`` and ``dim_y`` describe the array dimensionality, +- Attributes with string values get a ``str_value`` label describing their value. diff --git a/docs/source/interfaces/overview.rst b/docs/source/interfaces/overview.rst new file mode 100644 index 0000000000000000000000000000000000000000..a00ab5710ad863b4f10d1bb0ee93ab3f547826d5 --- /dev/null +++ b/docs/source/interfaces/overview.rst @@ -0,0 +1,41 @@ +Interfaces +====================== + +The station provides the following interfaces accessible through your browser (assuming you run on `localhost`): + ++---------------------+---------+----------------------+-------------------+ +|Interface |Subsystem|URL |Default credentials| ++=====================+=========+======================+===================+ +| :ref:`jupyter` |Jupyter |http://localhost:8888 | | ++---------------------+---------+----------------------+-------------------+ +| :doc:`monitoring` |Grafana |http://localhost:3000 |admin/admin | ++---------------------+---------+----------------------+-------------------+ +| :doc:`logs` |Kibana |http://localhost:5601 | | ++---------------------+---------+----------------------+-------------------+ + +Futhermore, there are some low-level interfaces: + ++---------------------------+------------------+-----------------------+-------------------+ +|Interface |Subsystem |URL |Default credentials| ++===========================+==================+=======================+===================+ +| :ref:`pytango-section` |Tango |tango://localhost:10000| | ++---------------------------+------------------+-----------------------+-------------------+ +| :ref:`prometheus-section` |Prometheus |http://localhost:9090 | | ++---------------------------+------------------+-----------------------+-------------------+ +| TANGO-Grafana Exporter |Python HTTPServer |http://localhost:8000 | | ++---------------------------+------------------+-----------------------+-------------------+ +| :ref:`rest-api` |tango-rest |http://localhost:8080 |tango-cs/tango | ++---------------------------+------------------+-----------------------+-------------------+ +| :ref:`tangodb` |MariaDB |http://localhost:3306 |tango/tango | ++---------------------------+------------------+-----------------------+-------------------+ +|Archive Database |MariaDB |http://localhost:3307 |tango/tango | ++---------------------------+------------------+-----------------------+-------------------+ +|Log Database |ElasticSearch |http://localhost:9200 | | ++---------------------------+------------------+-----------------------+-------------------+ + +.. toctree:: + :hidden: + + control + monitoring + logs diff --git a/sbin/run_integration_test.sh b/sbin/run_integration_test.sh index 9d9ec12ae79a2336d5bfd88191930f8c6fa9db36..e0b87940b9ac9776a812b2e7cda5d9fa851e9200 100755 --- a/sbin/run_integration_test.sh +++ b/sbin/run_integration_test.sh @@ -1,12 +1,12 @@ #!/bin/bash -e -if [ -z "$LOFA20_DIR"]; then +if [ -z "$LOFAR20_DIR" ]; then # We assume we aren't in the PATH, so we can derive our path. # We need our parent directory. - LOFAR20_DIR_RELATIVE=`dirname "$0"`/.. + LOFAR20_DIR_RELATIVE=$(dirname "$0")/.. # As an absolute path - LOFAR20_DIR=`readlink -f "${LOFAR20_DIR_RELATIVE}"` + LOFAR20_DIR=$(readlink -f "${LOFAR20_DIR_RELATIVE}") fi # Start and stop sequence @@ -18,7 +18,7 @@ make start databaseds dsconfig jupyter elk sleep 15 # Update the dsconfig -${LOFAR20_DIR}/sbin/update_ConfigDb.sh ${LOFAR20_DIR}/CDB/integration_ConfigDb.json +"${LOFAR20_DIR}"/sbin/update_ConfigDb.sh "${LOFAR20_DIR}"/CDB/integration_ConfigDb.json cd "$LOFAR20_DIR/docker-compose" || exit 1 make start sdptr-sim recv-sim unb2-sim diff --git a/sbin/tag_and_push_docker_image.sh b/sbin/tag_and_push_docker_image.sh index ad94ae4b2ca6418e0d89347d4b37b47ef1a16a5a..799ab1cd779bb5caf840685f339080b57916063b 100755 --- a/sbin/tag_and_push_docker_image.sh +++ b/sbin/tag_and_push_docker_image.sh @@ -1,4 +1,4 @@ -#! /usr/bin/env bash -e +#!/bin/bash -e # Tag and push which image version? DOCKER_TAG=latest @@ -10,16 +10,16 @@ SKA_REPO="nexus.engageska-portugal.pt/ska-docker" LOFAR_REPO="git.astron.nl:5000/lofar2.0/tango" # Compile a list of the SKA images -SKA_IMAGES=$(for i in $(docker images | egrep ${DOCKER_TAG} | egrep ${SKA_REPO} | cut -d' ' -f1); do printf "%s " ${i}; done) +SKA_IMAGES=$(for i in $(docker images | grep -E ${DOCKER_TAG} | grep -E ${SKA_REPO} | cut -d' ' -f1); do printf "%s " "${i}"; done) # Compile a list of LOFAR2.0 images -LOFAR_IMAGES=$(for i in $(docker images | egrep ${DOCKER_TAG} | egrep -v "${SKA_REPO}|${LOFAR_REPO}" | cut -d' ' -f1); do printf "%s " ${i}; done) +LOFAR_IMAGES=$(for i in $(docker images | grep -E ${DOCKER_TAG} | grep -E -v "${SKA_REPO}|${LOFAR_REPO}" | cut -d' ' -f1); do printf "%s " "${i}"; done) function tag_and_push() { ( - docker tag ${1} ${2} - docker push ${2} + docker tag "${1}" "${2}" + docker push "${2}" ) & } @@ -27,14 +27,14 @@ function tag_and_push() # and push them to the LOFAR2.0 repo for IMAGE in ${SKA_IMAGES}; do PUSH_IMAGE=${IMAGE//${SKA_REPO}/${LOFAR_REPO}}:${VERSION} - tag_and_push ${IMAGE} ${PUSH_IMAGE} + tag_and_push "${IMAGE}" "${PUSH_IMAGE}" done # Rename the LOFAR2.0 images for the LOFAR2.0 repo # and push them to the LOFAR2.0 repo for IMAGE in ${LOFAR_IMAGES}; do - PUSH_IMAGES=${LOFAR_REPO}/${IMAGE}:${VERSIN} - tag_and_push ${IMAGE} ${PUSH_IMAGE} + PUSH_IMAGE=${LOFAR_REPO}/${IMAGE}:${VERSION} + tag_and_push "${IMAGE}" "${PUSH_IMAGE}" done wait