From 6362e3bb5b049b7cd6086faeb958c4afdcb0f5ab Mon Sep 17 00:00:00 2001 From: Mattia Mancini <mancini@astron.nl> Date: Wed, 8 May 2019 14:27:20 +0000 Subject: [PATCH] SSB-44: added convenience stuff to access the datatable --- .gitattributes | 4 + CAL/CalibrationCommon/lib/CMakeLists.txt | 2 + .../lib/datacontainers/holography_dataset.py | 20 +- .../datacontainers/holography_datatable.py | 196 ++++++++++++++++++ .../datacontainers/holography_observation.py | 14 +- CAL/CalibrationCommon/test/CMakeLists.txt | 1 + .../test/t_holography_datatable_class.py | 145 +++++++++++++ .../test/t_holography_datatable_class.run | 22 ++ .../test/t_holography_datatable_class.sh | 20 ++ 9 files changed, 407 insertions(+), 17 deletions(-) create mode 100644 CAL/CalibrationCommon/lib/datacontainers/holography_datatable.py create mode 100755 CAL/CalibrationCommon/test/t_holography_datatable_class.py create mode 100755 CAL/CalibrationCommon/test/t_holography_datatable_class.run create mode 100755 CAL/CalibrationCommon/test/t_holography_datatable_class.sh diff --git a/.gitattributes b/.gitattributes index de65e4ae336..469eee059fc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -13,6 +13,7 @@ CAL/CalibrationCommon/lib/coordinates.py -text CAL/CalibrationCommon/lib/datacontainers/__init__.py -text CAL/CalibrationCommon/lib/datacontainers/holography_dataset.py -text CAL/CalibrationCommon/lib/datacontainers/holography_dataset_definitions.py -text +CAL/CalibrationCommon/lib/datacontainers/holography_datatable.py -text CAL/CalibrationCommon/lib/datacontainers/holography_measurementset.py -text CAL/CalibrationCommon/lib/datacontainers/holography_observation.py -text CAL/CalibrationCommon/lib/datacontainers/holography_specification.py -text @@ -28,6 +29,9 @@ CAL/CalibrationCommon/test/t_holography_dataset_class.sh -text CAL/CalibrationCommon/test/t_holography_dataset_class2.py -text CAL/CalibrationCommon/test/t_holography_dataset_class2.run -text CAL/CalibrationCommon/test/t_holography_dataset_class2.sh -text +CAL/CalibrationCommon/test/t_holography_datatable_class.py -text +CAL/CalibrationCommon/test/t_holography_datatable_class.run -text +CAL/CalibrationCommon/test/t_holography_datatable_class.sh -text CAL/CalibrationCommon/test/t_holography_ms_class.py -text CAL/CalibrationCommon/test/t_holography_ms_class.run -text CAL/CalibrationCommon/test/t_holography_ms_class.sh -text diff --git a/CAL/CalibrationCommon/lib/CMakeLists.txt b/CAL/CalibrationCommon/lib/CMakeLists.txt index 9cdf3ccdd2b..649177b5079 100644 --- a/CAL/CalibrationCommon/lib/CMakeLists.txt +++ b/CAL/CalibrationCommon/lib/CMakeLists.txt @@ -21,6 +21,8 @@ python_install( __init__.py datacontainers/holography_dataset_definitions.py datacontainers/holography_dataset.py + datacontainers/holography_datatable.py + datacontainers/holography_specification.py datacontainers/holography_observation.py datacontainers/holography_measurementset.py diff --git a/CAL/CalibrationCommon/lib/datacontainers/holography_dataset.py b/CAL/CalibrationCommon/lib/datacontainers/holography_dataset.py index b74bc1ea74d..9881f1d0dd0 100644 --- a/CAL/CalibrationCommon/lib/datacontainers/holography_dataset.py +++ b/CAL/CalibrationCommon/lib/datacontainers/holography_dataset.py @@ -72,7 +72,7 @@ class HolographyDataset(): self.antenna_field_position = [] # list of reference station names self.reference_stations = list() - # list of frequencies + # list of _frequencies self.frequencies = list() # dict(frequency: # array that contains the ra_dec of which a beam @@ -180,7 +180,7 @@ class HolographyDataset(): @param ra_dec: A dict[frequency][beamlet] that contains the right ascension (ra_dec[frequency][beamlet][0]) and declination (ra_dec[frequency][beamlet][1]) for every beamlet. - @param frequencies: A dict that contains the frequencies at which the + @param frequencies: A dict that contains the _frequencies at which the holography observation was performed. @param beamlets: A list of beamlet numbers. ''' @@ -189,7 +189,7 @@ class HolographyDataset(): source_dec = source["DEC"] # Store each pseudo distance in a dict of dicts: pd[frequency][beamlet] pseudo_distance = dict() - # Iterate over all frequencies... + # Iterate over all _frequencies... for frequency in frequencies: frequency_string = str(frequency) pseudo_distance[frequency_string] = dict() @@ -210,7 +210,7 @@ class HolographyDataset(): pseudo_distance[frequency_string][beamlet_string] = abs(ra - source_ra) + abs(dec - source_dec) # OK. Done with all the iteration business. Now check if across all - # frequencies the same beamlet is the central one. It is allowed that + # _frequencies the same beamlet is the central one. It is allowed that # the central beamlets are not always the same but it would be better # and let the astronomers sleep better if it would always be the same. # And also check if there is actually only one central beamlet per @@ -231,13 +231,13 @@ class HolographyDataset(): logger.error(text) raise ValueError(text) - # Now check if the central beamlet is the same across all frequencies. + # Now check if the central beamlet is the same across all _frequencies. # I do that by creating a set from the central beamlets of all stations. - # If there is only one central beamlet for all frequencies, the size of + # If there is only one central beamlet for all _frequencies, the size of # the set will be 1. central_beamlet_set = set(central_beamlet.values()) if len(central_beamlet_set) == 1: - logger.debug("All is good, unicorns everywhere, there is only one central beamlet \"%s\" for all frequencies.", central_beamlet_set) + logger.debug("All is good, unicorns everywhere, there is only one central beamlet \"%s\" for all _frequencies.", central_beamlet_set) else: logger.warning("Multiple central beamlets have been identified: ", central_beamlet) return central_beamlet @@ -303,7 +303,7 @@ class HolographyDataset(): observation to gather the list of rcus, the mode, the target station name and position, the source name and position, the start and the end time, the rotation matrix to convert from ra and dec to l and m, the antenna field positions, the list of the reference - stations, the frequencies, the ra and dec at which the beams point at. + stations, the _frequencies, the ra and dec at which the beams point at. All this data is essential to interpret the recorded holography beams cross correlations :param list_of_hbs_ho_tuples: a list containing (hbs, ho) per frequency @@ -627,12 +627,12 @@ class HolographyDataset(): f.attrs[HDS_ROTATION_MATRIX] = self.rotation_matrix f.attrs[HDS_ANTENNA_FIELD_POSITION] = self.antenna_field_position f.attrs[HDS_BEAMLETS] = self.beamlets - # Store the list of reference stations and frequencies. We just + # Store the list of reference stations and _frequencies. We just # want to keep 'em around for quick reference. f[HDS_REFERENCE_STATION] = to_numpy_array_string(self.reference_stations) f[HDS_FREQUENCY] = self.frequencies - # We create groups for the reference stations and the frequencies. + # We create groups for the reference stations and the _frequencies. # Then we store the data samples [XX, YY, XY, YX, t, l, m, flag] # in an array. The reference station name, the frequency and the # beamlet number (index of the data sample array) allow random diff --git a/CAL/CalibrationCommon/lib/datacontainers/holography_datatable.py b/CAL/CalibrationCommon/lib/datacontainers/holography_datatable.py new file mode 100644 index 00000000000..7146ce82874 --- /dev/null +++ b/CAL/CalibrationCommon/lib/datacontainers/holography_datatable.py @@ -0,0 +1,196 @@ +import logging + +import numpy + +logger = logging.getLogger(__name__) + + +def _check_index_type_or_raise_index_error(key): + """ + + :param key: the key to be checked + :type key; tuple(str) + :return: None + """ + if not isinstance(key, tuple) and not isinstance(key, str): + raise IndexError("please specify a string or a tuple. Key is " + type(key)) + + +class LazyH5Table(): + def __init__(self, uri, h5_file): + """ + Constructs the LazyH5Table + :param uri: location of the data in the H5 hierarchy + :type uri: str + :param h5_file: file descriptor of the h5 file + :type h5_file: h5py.File + """ + self.uri = uri + self.h5_file = h5_file + + def __get_data_set(self, key, if_not_present_create=False): + """ + Get the item at the specified index + :param key: index key + :type key: tuple(str) + :param if_not_present_create: if not present create subgroup or dataset + :return: h5py.Dataset + """ + if if_not_present_create and self.uri not in self.h5_file: + self.h5_file.create_group(self.uri) + + data_set = self.h5_file[self.uri] + _check_index_type_or_raise_index_error(key) + + for index in key: + if if_not_present_create and index not in data_set: + data_set.create_group(index) + + data_set = data_set[index] + + return data_set + + def __getitem__(self, key): + """ + Get the item at the specified index + :param key: index key + :type key: tuple(str) + :return: + """ + data_item = self.__get_data_set(key) + value = numpy.array(data_item) + return value + + def __setitem__(self, key, value): + """ + Set the item at the specified index + :param key: index key + :param value: value to be set + :return: + """ + data_item = self.__get_data_set(key[:-1], if_not_present_create=True) + data_item[key[-1]] = value + + +class HolographyDataTable(LazyH5Table): + def __init__(self, uri, h5_file): + super(HolographyDataTable, self).__init__(uri, h5_file) + self._frequencies = set() + self._reference_stations = set() + self._beam_numbers = set() + + self.__try_construct_index_array() + + def __try_construct_index_array(self): + """ + Try constructing the indexes if they exists + :return: + """ + try: + self.__get_index_arrays() + except KeyError: + logger.debug('uri missing creating datable at: ' + self.uri) + + def __get_index_arrays(self): + """ + Get the a list of indices for each datatable axis. + Note. it is assumed that the @see Holography Data Set is respected. + Hence, the first axis is the frequency + the second axis is the reference station + the third axis is the beam number + :return: None + """ + reference_stations = [station_name + for station_name in self.h5_file[self.uri]] + # I select the first frequency just for convenience to iterate on the second level + first_reference_station = reference_stations[0] + frequencies_string = [frequency + for frequency in self.h5_file[self.uri][first_reference_station]] + # I select the first reference station just for convenience to iterate on the third level + first_frequency = frequencies_string[0] + beam_numbers = {beam_number for beam_number in self.h5_file[self.uri] + [first_frequency] + [first_reference_station]} + + self._frequencies = set(sorted(map(float, frequencies_string))) + self._reference_stations = reference_stations + self._beam_numbers = set(sorted(map(int, beam_numbers))) + + @property + def frequencies(self): + return self._frequencies + + @property + def reference_stations(self): + return self._reference_stations + + @property + def beam_numbers(self): + return self._beam_numbers + + def __getitem__(self, key): + """ + Get the value in the specified key + :param key: (frequency, reference station name, beam number) to get + :type key: (float, str, int) + :rtype: numpy.ndarray + """ + HolographyDataTable.__raise_if_index_is_invalid(key) + + reference_station, frequency, beam_number = key + + return super().__getitem__((reference_station, + str(frequency), + str(beam_number))) + + @staticmethod + def __raise_if_index_is_invalid(index): + try: + reference_station, frequency, beam_number = index + except ValueError: + raise IndexError('Index error (station_name, frequency, beamlet) required got ' + + repr(index)) + if not isinstance(reference_station, str): + raise IndexError('Reference station is expected to be a string. Got : ' + + str(type(reference_station))) + if not isinstance(frequency, float): + raise IndexError('Frequency is expected to be a float. Got : ' + + str(type(frequency))) + if not isinstance(beam_number, int): + raise IndexError('Beam number is expected to be a int. Got : ' + + str(type(beam_number))) + + def __setitem__(self, key, value): + """ + Set the value in the specified key + :param key: (frequency, reference station name, beam number) to set + :type key: (float, str, int) + :param value: value to set + :return: None + """ + HolographyDataTable.__raise_if_index_is_invalid(key) + + reference_station, frequency, beam_number = key + + self._frequencies.add(float(frequency)) + self._reference_stations.add(frequency) + self._beam_numbers.add(int(beam_number)) + + super().__setitem__((reference_station, + str(frequency), + str(beam_number)), value) + + def iter_items(self): + for frequency in self._frequencies: + for station in self._reference_stations: + for beam in self._beam_numbers: + yield (float(frequency), station, int(beam)), \ + self.__getitem__((station, frequency, beam)) + + def keys(self): + key_set = set() + for frequency in self._frequencies: + for station in self._reference_stations: + for beam in self._beam_numbers: + key_set.add((frequency, station, beam)) + return key_set diff --git a/CAL/CalibrationCommon/lib/datacontainers/holography_observation.py b/CAL/CalibrationCommon/lib/datacontainers/holography_observation.py index d49b2f7cc98..81a2d333efc 100644 --- a/CAL/CalibrationCommon/lib/datacontainers/holography_observation.py +++ b/CAL/CalibrationCommon/lib/datacontainers/holography_observation.py @@ -68,9 +68,9 @@ class HolographyObservation(): def extract_unique_source_names_from_ms_list(ms_list): """ Returns a set of unique source names given a list of measurement sets - :param ms_list: a list of measurement set where to extract the reference frequencies + :param ms_list: a list of measurement set where to extract the reference _frequencies :type ms_list: list[HolographyMeasurementSet] - :return: a set of frequencies + :return: a set of _frequencies :rtype: set[str] """ return {ms.get_source_name() for ms in ms_list} @@ -79,7 +79,7 @@ class HolographyObservation(): def extract_unique_subband_from_ms_list(ms_list): """ Returns a set of unique rcu modes given a list of measurement sets - :param ms_list: a list of measurement set where to extract the reference frequencies + :param ms_list: a list of measurement set where to extract the reference _frequencies :type ms_list: list[HolographyMeasurementSet] :return: a set of rcu modes :rtype: set[int] @@ -121,7 +121,7 @@ class HolographyObservation(): frequency = unique_frequencies.pop() else: raise ValueError( - 'Multiple reference frequencies per observation are not supported') + 'Multiple reference _frequencies per observation are not supported') unique_source_names = HolographyObservation.\ extract_unique_source_names_from_ms_list( @@ -154,10 +154,10 @@ class HolographyObservation(): @staticmethod def extract_unique_reference_frequencies_from_ms_list(ms_list): """ - Returns a set of reference frequencies given a list of measurement setss - :param ms_list: a list of measurement set where to extract the reference frequencies + Returns a set of reference _frequencies given a list of measurement setss + :param ms_list: a list of measurement set where to extract the reference _frequencies :type ms_list: list[HolographyMeasurementSet] - :return: returns a set of frequencies + :return: returns a set of _frequencies :rtype: set[float] """ return {ms.get_frequency() for ms in ms_list} diff --git a/CAL/CalibrationCommon/test/CMakeLists.txt b/CAL/CalibrationCommon/test/CMakeLists.txt index 02ef455d948..17249cdfb5a 100644 --- a/CAL/CalibrationCommon/test/CMakeLists.txt +++ b/CAL/CalibrationCommon/test/CMakeLists.txt @@ -21,6 +21,7 @@ include(LofarCTest) lofar_add_test(t_holography_ms_class) lofar_add_test(t_holography_beam_specification_class) +lofar_add_test(t_holography_datatable_class) lofar_add_test(t_holography_dataset_class) lofar_add_test(t_holography_dataset_class2) lofar_add_test(t_holography_observation) diff --git a/CAL/CalibrationCommon/test/t_holography_datatable_class.py b/CAL/CalibrationCommon/test/t_holography_datatable_class.py new file mode 100755 index 00000000000..d4bc32f7a29 --- /dev/null +++ b/CAL/CalibrationCommon/test/t_holography_datatable_class.py @@ -0,0 +1,145 @@ +import logging +import unittest +import numpy +import tempfile +import h5py + +from lofar.calibration.common.datacontainers.holography_datatable import LazyH5Table,\ + HolographyDataTable + +logger = logging.getLogger('t_holography_datatable_class') + + +def test_data_dict_string_indexes(): + frequencies = [str(i) for i in numpy.linspace(0, 1000, 100, dtype=float)] + stations = ['CS001', 'CS002'] + beams = [str(int(i)) for i in range(100)] + + dset = numpy.random.rand(10) + data_dict = {} + for fr in frequencies: + for station in stations: + for beam in beams: + data_dict[(fr, station, beam)] = dset + return data_dict + + +class TestLazyH5Table(unittest.TestCase): + def test_create_table(self): + test_data = test_data_dict_string_indexes() + test_uri = '/mytest_uri' + temp_file_name = 'tempFile.h5' + with tempfile.TemporaryDirectory() as temporary_dir: + full_path = temporary_dir + '/' + temp_file_name + logger.info('creating temp file %s', full_path) + h5file = h5py.File(full_path) + data_table = LazyH5Table(uri=test_uri, h5_file=h5file) + + for key, value in test_data.items(): + + data_table[key] = value + + h5file.close() + + h5file = h5py.File(full_path) + + for key, value in test_data.items(): + + data_uri = '/'.join(key) + self.assertIn(test_uri + '/' + data_uri, h5file) + + +def test_data_dict_holography_index_types(): + frequencies = [i for i in numpy.linspace(0, 1000, 100, dtype=float)] + stations = ['CS001', 'CS002'] + beams = [int(i) for i in range(100)] + + dset = numpy.random.rand(10) + data_dict = {} + for station in stations: + for fr in frequencies: + for beam in beams: + data_dict[(station, fr, beam)] = dset + return data_dict + + +class TestHolographyDatasetClass(unittest.TestCase): + + def test_create_read_dataset(self): + test_data = test_data_dict_holography_index_types() + + test_uri = '/mytest_uri' + temp_file_name = 'tempFile.h5' + with tempfile.TemporaryDirectory() as temporary_dir: + full_path = temporary_dir + '/' + temp_file_name + logger.info('creating temp file %s', full_path) + h5file = h5py.File(full_path) + data_table = HolographyDataTable(uri=test_uri, h5_file=h5file) + + for key, value in test_data.items(): + data_table[key] = value + + h5file.close() + + h5file = h5py.File(full_path, 'r') + data_table = HolographyDataTable(uri=test_uri, h5_file=h5file) + + stored_keys = data_table.keys() + + self.assertEqual(stored_keys.difference(test_data.keys()), set()) + + for key, value in data_table.iter_items(): + self.assertIn(key, test_data) + frequency, station_name, beam = key + numpy.testing.assert_array_equal(data_table[station_name, frequency, beam], test_data[key]) + + for frequency in data_table.frequencies: + for reference_station in data_table.reference_stations: + for beam in data_table.beam_numbers: + self.assertIn((reference_station, frequency, beam), stored_keys) + + def test_wrong_index_types(self): + test_uri = '/mytest_uri' + temp_file_name = 'tempFile.h5' + with tempfile.TemporaryDirectory() as temporary_dir: + full_path = temporary_dir + '/' + temp_file_name + logger.info('creating temp file %s', full_path) + h5file = h5py.File(full_path) + data_table = HolographyDataTable(uri=test_uri, h5_file=h5file) + with self.assertRaises(IndexError): + data_table['ref', 1, 1] = 1 + with self.assertRaises(IndexError): + data_table['ref', 1., ""] = 1 + with self.assertRaises(IndexError): + data_table[1 , 1., 1] = 1 + + def test_wrong_index_number(self): + test_uri = '/mytest_uri' + temp_file_name = 'tempFile.h5' + with tempfile.TemporaryDirectory() as temporary_dir: + full_path = temporary_dir + '/' + temp_file_name + logger.info('creating temp file %s', full_path) + h5file = h5py.File(full_path) + data_table = HolographyDataTable(uri=test_uri, h5_file=h5file) + + with self.assertRaises(IndexError): + data_table[1 , 1.] = 1 + with self.assertRaises(IndexError): + data_table[1 , 1., 1, 1] = 1 + + def test_error_getting_non_existing_data(self): + test_uri = '/mytest_uri' + temp_file_name = 'tempFile.h5' + with tempfile.TemporaryDirectory() as temporary_dir: + full_path = temporary_dir + '/' + temp_file_name + logger.info('creating temp file %s', full_path) + h5file = h5py.File(full_path) + data_table = HolographyDataTable(uri=test_uri, h5_file=h5file) + + with self.assertRaises(KeyError): + _ = data_table['stat', 1., 1] + + +if __name__ == '__main__': + logging.basicConfig(format='%(name)s : %(message)s') + unittest.main() diff --git a/CAL/CalibrationCommon/test/t_holography_datatable_class.run b/CAL/CalibrationCommon/test/t_holography_datatable_class.run new file mode 100755 index 00000000000..196521525ce --- /dev/null +++ b/CAL/CalibrationCommon/test/t_holography_datatable_class.run @@ -0,0 +1,22 @@ +#!/bin/bash + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +# Run the unit test +source python-coverage.sh +python_coverage_test "*holography_datatable*" t_holography_datatable_class.py \ No newline at end of file diff --git a/CAL/CalibrationCommon/test/t_holography_datatable_class.sh b/CAL/CalibrationCommon/test/t_holography_datatable_class.sh new file mode 100755 index 00000000000..7969ebdd383 --- /dev/null +++ b/CAL/CalibrationCommon/test/t_holography_datatable_class.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +# Copyright (C) 2018 ASTRON (Netherlands Institute for Radio Astronomy) +# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. + +./runctest.sh t_holography_datatable_class \ No newline at end of file -- GitLab