From 281c23b82ce8081c74addbd4a4c53daa6d7fef12 Mon Sep 17 00:00:00 2001
From: Mattia Mancini <mancini@astron.nl>
Date: Wed, 3 Oct 2018 09:46:01 +0000
Subject: [PATCH] SW-43: Implementing routine to read and store the derived
 data attribute

---
 .../lib/datacontainers/holography_dataset.py  | 132 ++++++++++++++----
 .../test/t_holography_dataset_class.py        |  19 ++-
 2 files changed, 125 insertions(+), 26 deletions(-)

diff --git a/CAL/CalibrationCommon/lib/datacontainers/holography_dataset.py b/CAL/CalibrationCommon/lib/datacontainers/holography_dataset.py
index 1574638bda7..43d442171d3 100644
--- a/CAL/CalibrationCommon/lib/datacontainers/holography_dataset.py
+++ b/CAL/CalibrationCommon/lib/datacontainers/holography_dataset.py
@@ -33,36 +33,53 @@ class HolographyDataset():
 
         # list of 3 floats
         self.target_station_position = None
-        self.source_name = None  # string
-        self.source_position = None  # list of 3 floats
-        self.start_time = None  # date time when the observation started in MJD in seconds (float)
-        self.end_time = None  # date time when the observation started in MJD in seconds (float)
-        self.rotation_matrix = None  # array(3,3), translation matrix for
+        # name of the source (str)
+        self.source_name = None
+        # position of the source array[ (RA, numpy.float64), (DEC, numpy.float64), (EPOCH, 'S10')]
+        self.source_position = None
+        # date time when the observation started in MJD in seconds (float)
+        self.start_time = None
+        # date time when the observation started in MJD in seconds (float)
+        self.end_time = None
+        # array(3,3), translation matrix for
         # (RA, DEC) <-> (l, m) conversion
+        self.rotation_matrix = None
 
-        self.beamlets = list()  # list of beamlet numbers
+        # list of beamlet numbers
+        self.beamlets = list()
         # coordinates of the antenna position in the target
         self.antenna_field_position = []
-        # station
-        self.reference_stations = list()  # list of reference station names
-        self.frequencies = list()  # list of frequencies
-        self.ra_dec = dict()  # array(Nfrequency, Nbeamlets) contains the ra_dec of which a beam
-        # points at given a frequency and a beamlet number
-        # numpy.dtype([('RA', numpy.float64),
-        #              ('DEC',numpy.float64),
-        #              ('EPOCH', 'S10')]) 
-        self.data = dict()  # array(NreferenceStations, Nfrequencies, Nbeamlets) that contains the
-        # 4 polarization crosscorrelation for the 4 polarizations, the l and m coordinates, and
-        # the timestamp in mjd of the sample, and whether or not the data has been flagged
-        # numpy.dtype([('XX', numpy.float),
-        #              ('YY', numpy.float),
-        #              ('XY', numpy.float),
-        #              ('YX', numpy.float),
-        #              ('l', numpy.float),
-        #              ('m', numpy.float),
-        #              ('t', numpy.float),
+        # list of reference station names
+        self.reference_stations = list()
+        # list of frequencies
+        self.frequencies = list()
+        # dict(reference_station_name:
+        #      dict(frequency:
+        #                array that contains the ra_dec of which a beam
+        #                points at given a frequency and a beamlet number
+        #                numpy.dtype([('RA', numpy.float64),
+        #                             ('DEC',numpy.float64),
+        #                             ('EPOCH', 'S10')])
+        self.ra_dec = dict()
+        # dict(reference_station_name:
+        #      dict(frequency:
+        #           dict(beamlet_number:
+        #                array that contains the 4 polarization crosscorrelation for
+        #                the 4 polarizations, the l and m coordinates, and the timestamp
+        #                in mjd of the sample, and whether or not the data has been flagged
+        # numpy.dtype([('XX', numpy.float64),
+        #              ('YY', numpy.float64),
+        #              ('XY', numpy.float64),
+        #              ('YX', numpy.float64),
+        #              ('l', numpy.float64),
+        #              ('m', numpy.float64),
+        #              ('t', numpy.float64),
         #              ('flag', numpy.bool)]
         #              )
+        self.data = dict()
+        # a dict of dicts and eventually str, ndarray or that can be converted in a ndarray calling
+        # numpy.array()
+        self.derived_data = None
 
     @staticmethod
     def compare_dicts(dict1, dict2):
@@ -331,6 +348,64 @@ class HolographyDataset():
         else:
             logger.warn("The object passed is not a HolographyDataset instance.  Cannot print any data.")
 
+    @staticmethod
+    def _read_grouped_data(h5file, uri):
+        """
+        Read the data in a nested hierarchy starting from the address uri
+        into a python dict
+        :param h5file: input HDF5 file
+        :type h5file:  h5py.File
+        :param uri: starting point address
+        :type uri: str
+        :return: a dict encoding the structure
+        """
+        starting_leaf = h5file[uri]
+        result = dict()
+        for key, value in starting_leaf.items():
+
+            if isinstance(value, h5py.Group) is True:
+                result[key] = HolographyDataset._read_grouped_data(h5file, value.name)
+            else:
+                try:
+                    result[key] = numpy.array(value)
+                except ValueError as e:
+                    logger.exception('Cannot interpret %s a a numpy array: %s', type(value), e)
+                    raise e
+        return result
+
+    @staticmethod
+    def _store_grouped_data(h5file, uri, data_to_store):
+        """
+        Store the data in a nested hierarchy starting from the address uri
+        into the HDS
+        :param h5file: input HDF5 file
+        :type h5file:  h5py.File
+        :param uri: starting point address
+        :type uri: str
+        :param data_to_store: dict that contains the data to store
+        :type data_to_store: dict
+        :return: a dict encoding the structure
+        """
+        if uri not in h5file:
+            starting_leaf = h5file.create_group(uri)
+        else:
+            starting_leaf = h5file[uri]
+
+        for key, value in data_to_store.items():
+
+            if isinstance(value, dict) is True:
+                HolographyDataset._store_grouped_data(h5file, '/'.join([uri, key]), value)
+            else:
+                try:
+                    if isinstance(value, str):
+                        starting_leaf[key] = numpy.string_(value)
+                    else:
+                        starting_leaf[key] = value
+
+                except ValueError as e:
+                    logger.exception('Cannot interpret %s a a numpy array: %s', type(value), e)
+                    raise e
+
     @staticmethod
     def load_from_file(path):
         """
@@ -381,6 +456,9 @@ class HolographyDataset():
                             f["CROSSCORRELATION"][reference_station][frequency][beamlet])
 
             result.beamlets = list(beamlets)
+
+            if '/DERIVED_DATA' in f:
+                result.derived_data = HolographyDataset._read_grouped_data(f, '/DERIVED_DATA')
         except Exception as e:
             logger.exception(
                 "Cannot read the Holography Data Set data from the HDF5 file \"%s\".  This is the exception that was thrown:  %s",
@@ -450,6 +528,12 @@ class HolographyDataset():
                     for beamlet in self.data[reference_station][frequency].keys():
                         f["CROSSCORRELATION"][reference_station][frequency][beamlet] = \
                             self.data[reference_station][frequency][beamlet]
+
+            if self.derived_data:
+                self._store_grouped_data(h5file=f,
+                                         uri='/DERIVED_DATA',
+                                         data_to_store=self.derived_data)
+
         except Exception as e:
             logger.exception(
                 "Cannot write the Holography Data Set data to the HDF5 file \"%s\".  This is the exception that was thrown:  %s",
diff --git a/CAL/CalibrationCommon/test/t_holography_dataset_class.py b/CAL/CalibrationCommon/test/t_holography_dataset_class.py
index 0a8207e89cd..2701ada53d5 100755
--- a/CAL/CalibrationCommon/test/t_holography_dataset_class.py
+++ b/CAL/CalibrationCommon/test/t_holography_dataset_class.py
@@ -1,6 +1,8 @@
 import logging
 import unittest
 import tempfile
+import h5py
+import numpy
 import os
 from lofar.calibration.common.datacontainers import HolographyDataset, HolographySpecification
 from lofar.calibration.common.datacontainers import HolographyObservation
@@ -9,8 +11,8 @@ from lofar.calibration.common.datacontainers import HolographyObservation
 logger = logging.getLogger('t_holography_dataset_class')
 # READ doc/Holography_Data_Set.md!  It contains the location from which the
 # test data must be downloaded.
-path_to_test_data = '/var/tmp/holography'
-
+path_to_test_data = '/data/test/HolographyObservation'
+path_to_test_dataset = path_to_test_data + '/CS001HBA0.hdf5'
 
 class TestHolographyDatasetClass(unittest.TestCase):
     def test_create_hds(self):
@@ -32,6 +34,19 @@ class TestHolographyDatasetClass(unittest.TestCase):
         # Make sure the data in memory is OK.
         self.assertEqual(holography_dataset.source_name, '3C 147')
 
+    def test_store_and_read_from_hdf(self):
+        test_dict = dict(CS001=dict(BEAM0=1, BEAM1=2, BEAM3=dict(FR1='str2')), CS003='str')
+
+        with tempfile.NamedTemporaryFile(suffix='.hdf5') as tfile:
+            tfile.close()
+
+            h5file = h5py.File(tfile.name)
+            HolographyDataset._store_grouped_data(h5file, '/test', test_dict)
+            h5file.close()
+
+            h5file = h5py.File(tfile.name)
+            read_dict = HolographyDataset._read_grouped_data(h5file, '/test')
+            self.assertDictEqual(test_dict, read_dict)
 
 if __name__ == '__main__':
     logging.basicConfig(format='%(name)s : %(message)s')
-- 
GitLab