From 281c23b82ce8081c74addbd4a4c53daa6d7fef12 Mon Sep 17 00:00:00 2001 From: Mattia Mancini <mancini@astron.nl> Date: Wed, 3 Oct 2018 09:46:01 +0000 Subject: [PATCH] SW-43: Implementing routine to read and store the derived data attribute --- .../lib/datacontainers/holography_dataset.py | 132 ++++++++++++++---- .../test/t_holography_dataset_class.py | 19 ++- 2 files changed, 125 insertions(+), 26 deletions(-) diff --git a/CAL/CalibrationCommon/lib/datacontainers/holography_dataset.py b/CAL/CalibrationCommon/lib/datacontainers/holography_dataset.py index 1574638bda7..43d442171d3 100644 --- a/CAL/CalibrationCommon/lib/datacontainers/holography_dataset.py +++ b/CAL/CalibrationCommon/lib/datacontainers/holography_dataset.py @@ -33,36 +33,53 @@ class HolographyDataset(): # list of 3 floats self.target_station_position = None - self.source_name = None # string - self.source_position = None # list of 3 floats - self.start_time = None # date time when the observation started in MJD in seconds (float) - self.end_time = None # date time when the observation started in MJD in seconds (float) - self.rotation_matrix = None # array(3,3), translation matrix for + # name of the source (str) + self.source_name = None + # position of the source array[ (RA, numpy.float64), (DEC, numpy.float64), (EPOCH, 'S10')] + self.source_position = None + # date time when the observation started in MJD in seconds (float) + self.start_time = None + # date time when the observation started in MJD in seconds (float) + self.end_time = None + # array(3,3), translation matrix for # (RA, DEC) <-> (l, m) conversion + self.rotation_matrix = None - self.beamlets = list() # list of beamlet numbers + # list of beamlet numbers + self.beamlets = list() # coordinates of the antenna position in the target self.antenna_field_position = [] - # station - self.reference_stations = list() # list of reference station names - self.frequencies = list() # list of frequencies - self.ra_dec = dict() # array(Nfrequency, Nbeamlets) contains the ra_dec of which a beam - # points at given a frequency and a beamlet number - # numpy.dtype([('RA', numpy.float64), - # ('DEC',numpy.float64), - # ('EPOCH', 'S10')]) - self.data = dict() # array(NreferenceStations, Nfrequencies, Nbeamlets) that contains the - # 4 polarization crosscorrelation for the 4 polarizations, the l and m coordinates, and - # the timestamp in mjd of the sample, and whether or not the data has been flagged - # numpy.dtype([('XX', numpy.float), - # ('YY', numpy.float), - # ('XY', numpy.float), - # ('YX', numpy.float), - # ('l', numpy.float), - # ('m', numpy.float), - # ('t', numpy.float), + # list of reference station names + self.reference_stations = list() + # list of frequencies + self.frequencies = list() + # dict(reference_station_name: + # dict(frequency: + # array that contains the ra_dec of which a beam + # points at given a frequency and a beamlet number + # numpy.dtype([('RA', numpy.float64), + # ('DEC',numpy.float64), + # ('EPOCH', 'S10')]) + self.ra_dec = dict() + # dict(reference_station_name: + # dict(frequency: + # dict(beamlet_number: + # array that contains the 4 polarization crosscorrelation for + # the 4 polarizations, the l and m coordinates, and the timestamp + # in mjd of the sample, and whether or not the data has been flagged + # numpy.dtype([('XX', numpy.float64), + # ('YY', numpy.float64), + # ('XY', numpy.float64), + # ('YX', numpy.float64), + # ('l', numpy.float64), + # ('m', numpy.float64), + # ('t', numpy.float64), # ('flag', numpy.bool)] # ) + self.data = dict() + # a dict of dicts and eventually str, ndarray or that can be converted in a ndarray calling + # numpy.array() + self.derived_data = None @staticmethod def compare_dicts(dict1, dict2): @@ -331,6 +348,64 @@ class HolographyDataset(): else: logger.warn("The object passed is not a HolographyDataset instance. Cannot print any data.") + @staticmethod + def _read_grouped_data(h5file, uri): + """ + Read the data in a nested hierarchy starting from the address uri + into a python dict + :param h5file: input HDF5 file + :type h5file: h5py.File + :param uri: starting point address + :type uri: str + :return: a dict encoding the structure + """ + starting_leaf = h5file[uri] + result = dict() + for key, value in starting_leaf.items(): + + if isinstance(value, h5py.Group) is True: + result[key] = HolographyDataset._read_grouped_data(h5file, value.name) + else: + try: + result[key] = numpy.array(value) + except ValueError as e: + logger.exception('Cannot interpret %s a a numpy array: %s', type(value), e) + raise e + return result + + @staticmethod + def _store_grouped_data(h5file, uri, data_to_store): + """ + Store the data in a nested hierarchy starting from the address uri + into the HDS + :param h5file: input HDF5 file + :type h5file: h5py.File + :param uri: starting point address + :type uri: str + :param data_to_store: dict that contains the data to store + :type data_to_store: dict + :return: a dict encoding the structure + """ + if uri not in h5file: + starting_leaf = h5file.create_group(uri) + else: + starting_leaf = h5file[uri] + + for key, value in data_to_store.items(): + + if isinstance(value, dict) is True: + HolographyDataset._store_grouped_data(h5file, '/'.join([uri, key]), value) + else: + try: + if isinstance(value, str): + starting_leaf[key] = numpy.string_(value) + else: + starting_leaf[key] = value + + except ValueError as e: + logger.exception('Cannot interpret %s a a numpy array: %s', type(value), e) + raise e + @staticmethod def load_from_file(path): """ @@ -381,6 +456,9 @@ class HolographyDataset(): f["CROSSCORRELATION"][reference_station][frequency][beamlet]) result.beamlets = list(beamlets) + + if '/DERIVED_DATA' in f: + result.derived_data = HolographyDataset._read_grouped_data(f, '/DERIVED_DATA') except Exception as e: logger.exception( "Cannot read the Holography Data Set data from the HDF5 file \"%s\". This is the exception that was thrown: %s", @@ -450,6 +528,12 @@ class HolographyDataset(): for beamlet in self.data[reference_station][frequency].keys(): f["CROSSCORRELATION"][reference_station][frequency][beamlet] = \ self.data[reference_station][frequency][beamlet] + + if self.derived_data: + self._store_grouped_data(h5file=f, + uri='/DERIVED_DATA', + data_to_store=self.derived_data) + except Exception as e: logger.exception( "Cannot write the Holography Data Set data to the HDF5 file \"%s\". This is the exception that was thrown: %s", diff --git a/CAL/CalibrationCommon/test/t_holography_dataset_class.py b/CAL/CalibrationCommon/test/t_holography_dataset_class.py index 0a8207e89cd..2701ada53d5 100755 --- a/CAL/CalibrationCommon/test/t_holography_dataset_class.py +++ b/CAL/CalibrationCommon/test/t_holography_dataset_class.py @@ -1,6 +1,8 @@ import logging import unittest import tempfile +import h5py +import numpy import os from lofar.calibration.common.datacontainers import HolographyDataset, HolographySpecification from lofar.calibration.common.datacontainers import HolographyObservation @@ -9,8 +11,8 @@ from lofar.calibration.common.datacontainers import HolographyObservation logger = logging.getLogger('t_holography_dataset_class') # READ doc/Holography_Data_Set.md! It contains the location from which the # test data must be downloaded. -path_to_test_data = '/var/tmp/holography' - +path_to_test_data = '/data/test/HolographyObservation' +path_to_test_dataset = path_to_test_data + '/CS001HBA0.hdf5' class TestHolographyDatasetClass(unittest.TestCase): def test_create_hds(self): @@ -32,6 +34,19 @@ class TestHolographyDatasetClass(unittest.TestCase): # Make sure the data in memory is OK. self.assertEqual(holography_dataset.source_name, '3C 147') + def test_store_and_read_from_hdf(self): + test_dict = dict(CS001=dict(BEAM0=1, BEAM1=2, BEAM3=dict(FR1='str2')), CS003='str') + + with tempfile.NamedTemporaryFile(suffix='.hdf5') as tfile: + tfile.close() + + h5file = h5py.File(tfile.name) + HolographyDataset._store_grouped_data(h5file, '/test', test_dict) + h5file.close() + + h5file = h5py.File(tfile.name) + read_dict = HolographyDataset._read_grouped_data(h5file, '/test') + self.assertDictEqual(test_dict, read_dict) if __name__ == '__main__': logging.basicConfig(format='%(name)s : %(message)s') -- GitLab