diff --git a/devices/statistics_writer/README.md b/devices/statistics_writer/README.md index 62e940bacb6512eb702cc4fdd816b8ba61153958..e2111f3d203158706f96a3eaee6004f3121f00ea 100644 --- a/devices/statistics_writer/README.md +++ b/devices/statistics_writer/README.md @@ -12,16 +12,16 @@ and writing those matrices (as well as a bunch of metadata) to hdf5. The TCP statistics writer can be called with the `tcp_hdf5_writer.py` script. This script can be called with the following arguments: ``` - --host the address to connect to - --port the port to use - --file file to read from (as opposed to host and port) - --interval The time between creating new files in hours - --output_dir specifies the folder to write all the files - --mode sets the statistics type to be decoded options: "SST", "XST", "BST" - --debug takes no arguments, when used prints a lot of extra data to help with debugging + -a --host the address to connect to + -p --port the port to use + -f --file file to read from (as opposed to host and port) + -i --interval The time between creating new files in hours + -o --output_dir specifies the folder to write all the files + -m --mode sets the statistics type to be decoded options: "SST", "XST", "BST" + -v --debug takes no arguments, when used prints a lot of extra data to help with debugging + -d --decimation Configure the writer to only store one every n samples. Saves storage space ``` - ##HFD5 structure Statistics packets are collected by the StatisticsCollector in to a matrix. Once the matrix is done or a newer timestamp arrives this matrix along with the header of first packet header, nof_payload_errors and nof_valid_payloads. diff --git a/devices/statistics_writer/hdf5_writer.py b/devices/statistics_writer/hdf5_writer.py index 5a5eb5ee3fe4d5d05094874d65293ea7fbc68314..197c3242fe48a8f99d4d1e79eb5412a6b8d90e2a 100644 --- a/devices/statistics_writer/hdf5_writer.py +++ b/devices/statistics_writer/hdf5_writer.py @@ -15,8 +15,7 @@ from devices.sdp.statistics_packet import SSTPacket, XSTPacket, BSTPacket, Stati import devices.sdp.statistics_collector as statistics_collector -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger("hdf5_writer") +logger = logging.getLogger("statistics_writer") __all__ = ["hdf5_writer"] @@ -26,19 +25,22 @@ class hdf5_writer: XST_MODE = "XST" BST_MODE = "BST" - - def __init__(self, new_file_time_interval, file_location, statistics_mode): + def __init__(self, new_file_time_interval, file_location, statistics_mode, decimation_factor): # all variables that deal with the matrix that's currently being decoded self.current_matrix = None self.current_timestamp = datetime.min.replace(tzinfo=pytz.UTC) + # counter that tracks how many statistics have been received + self.statistics_counter = 0 + # the header of the first packet of a new matrix is written as metadata. # Assumes all subsequent headers of the same matrix are identical (minus index) self.statistics_header = None # file handing self.file_location = file_location + self.decimation_factor = decimation_factor self.new_file_time_interval = timedelta(seconds=new_file_time_interval) self.last_file_time = datetime.min.replace(tzinfo=pytz.UTC) self.file = None @@ -88,7 +90,6 @@ class hdf5_writer: self.process_packet(packet) def start_new_matrix(self, timestamp): - logger.info(f"starting new matrix with timestamp: {timestamp}") """ is called when a statistics packet with a newer timestamp is received. Writes the matrix to the hdf5 file @@ -96,6 +97,19 @@ class hdf5_writer: updates current timestamp and statistics matrix collector """ + # only write the specified fraction of statistics, skip the rest + if self.statistics_counter % self.decimation_factor != 0: + logger.debug(f"Skipping statistic with timestamp: {timestamp}. Only writing 1/{self.decimation_factor} statistics") + + # increment even though its skipped + self.statistics_counter += 1 + return + + # received new statistic, so increment counter + self.statistics_counter += 1 + + logger.debug(f"starting new matrix with timestamp: {timestamp}") + # write the finished (and checks if its the first matrix) if self.current_matrix is not None: try: @@ -113,7 +127,7 @@ class hdf5_writer: self.statistics_header = None def write_matrix(self): - logger.info("writing matrix to file") + logger.debug("writing matrix to file") """ Writes the finished matrix to the hdf5 file """ @@ -155,10 +169,13 @@ class hdf5_writer: def process_packet(self, packet): - logger.debug(f"Processing packet") """ Adds the newly received statistics packet to the statistics matrix """ + # only process the packets of the wanted fraction + if self.statistics_counter % self.decimation_factor != 0: + return + self.current_matrix.process_packet(packet) def start_new_hdf5(self, timestamp): @@ -219,5 +236,7 @@ class hdf5_writer: try: self.write_matrix() finally: + filename = str(self.file) self.file.close() - logger.debug(f"{self.file} closed") + logger.debug(f"{filename} closed") + logger.debug(f"Received a total of {self.statistics_counter} statistics while running. With {int(self.statistics_counter/self.decimation_factor)} written to disk ") diff --git a/devices/statistics_writer/statistics_writer.py b/devices/statistics_writer/statistics_writer.py index 444ee2323e950a0428513cb4506d8b2b2376fc27..e2d4666fd581b01cdb99e9ad717fbccd32cfa33c 100644 --- a/devices/statistics_writer/statistics_writer.py +++ b/devices/statistics_writer/statistics_writer.py @@ -10,17 +10,17 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger("statistics_writer") parser = argparse.ArgumentParser(description='Converts a stream of statistics packets into HDF5 files.') -parser.add_argument('--host', type=str, help='the host to connect to') -parser.add_argument('--port', type=int, default=0, help='the port to connect to, or 0 to use default port for the selected mode (default: %(default)s)') -parser.add_argument('--file', type=str, help='the file to read from') +parser.add_argument('-a', '--host', type=str, help='the host to connect to') +parser.add_argument('-p', '--port', type=int, default=0, help='the port to connect to, or 0 to use default port for the selected mode (default: %(default)s)') +parser.add_argument('-f', '--file', type=str, help='the file to read from') -parser.add_argument('--mode', type=str, choices=['SST', 'XST', 'BST'], default='SST', help='sets the statistics type to be decoded options (default: %(default)s)') -parser.add_argument('--interval', type=float, default=3600, nargs="?", help='The time between creating new files in seconds (default: %(default)s)') -parser.add_argument('--output_dir', type=str, default=".", nargs="?", help='specifies the folder to write all the files (default: %(default)s)') -parser.add_argument('--debug', dest='debug', action='store_true', default=False, help='increase log output') +parser.add_argument('-m', '--mode', type=str, choices=['SST', 'XST', 'BST'], default='SST', help='sets the statistics type to be decoded options (default: %(default)s)') +parser.add_argument('-i', '--interval', type=float, default=3600, nargs="?", help='The time between creating new files in seconds (default: %(default)s)') +parser.add_argument('-o', '--output_dir', type=str, default=".", nargs="?", help='specifies the folder to write all the files (default: %(default)s)') +parser.add_argument('-v', '--debug', dest='debug', action='store_true', default=False, help='increase log output') +parser.add_argument('-d', '--decimation', type=int, default=1, help='Configure the writer to only store one every n samples. Saves storage space') -# create a data dumper that creates a new file every 10s (for testing) if __name__ == "__main__": args = parser.parse_args() @@ -32,6 +32,10 @@ if __name__ == "__main__": interval = args.interval mode = args.mode debug = args.debug + decimation = args.decimation + + if decimation < 1: + raise ValueError("Please use an integer --Decimation value 1 or higher to only store one every n statistics' ") if port == 0: default_ports = { "SST": 5101, "XST": 5102, "BST": 5103 } @@ -51,7 +55,7 @@ if __name__ == "__main__": sys.exit(1) # create the writer - writer = hdf5_writer(new_file_time_interval=interval, file_location=output_dir, statistics_mode=mode) + writer = hdf5_writer(new_file_time_interval=interval, file_location=output_dir, statistics_mode=mode, decimation_factor=decimation) # start looping try: