added logger to the explorer, cleaned up code and added lots of comments

28fbe68f · Taya Snijder · f9a9acc2 · 28fbe68f
Commit 28fbe68f authored Sep 22, 2021 by Taya Snijder
--- a/devices/statistics_writer/test/hdf5_explorer.py
+++ b/devices/statistics_writer/test/hdf5_explorer.py
@@ -8,10 +8,18 @@ parser.add_argument('--filename', type=str, help='the name and path of the file'
 import logging
 logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("statistics_writer")
+logger = logging.getLogger("hdf5_explorer")
+logger.setLevel(logging.DEBUG)
 class statistics_data:
+    """
+    Example class not used by anything
+    This class takes the file and the statistics name as its __init__ arguments and then stores the
+    the datasets in them.
+    """
    NOF_PAYLOAD_ERRORS = "nof_payload_errors"
    NOF_VALID_PAYLOADS = "nof_valid_payloads"
    FIRST_PACKET_HEADER = "first_packet_header"
@@ -23,52 +31,98 @@ class statistics_data:
        self.first_packet_header = file.get(f"{statistics_name}/{statistics_data.FIRST_PACKET_HEADER}")
        self.statistics_values = file.get(f"{statistics_name}/{statistics_data.STATISTICS_VALUES}")
-    def printout(self):
-        print()
 class explorer:
+    """
+    This class serves both as a tool to test and verify the content of HDF5 files as well as provide an example
+    of how you can go through HDF5 files.
+    The first 2 functions, print_high_level and print_full both call the hdf5 file.visititems function. this function
+    takes another function as argument and then calls that function for each and every group and dataset in the file.
+    The last 2 functions do this without this file.visititems function and instead have knowledge of how we structure the
+    statistics data.
+    """
    def __init__(self, filename):
        self.file = h5py.File(filename, 'r')
-    def explore(self):
+    def print_high_level(self):
-        self.file.visititems(self.tree_explore)
+        """Calls a function that will go through all groups and datasets in the file and pass data along to another specified function"""
+        self.file.visititems(self._high_level_explorer)
+    def print_full(self):
+        """Calls a function that will go through all groups and datasets in the file and pass data along to another specified function"""
+        self.file.visititems(self._full_explorer)
+    def _full_explorer(self, name, obj):
+        """
+        Called by the file.visititems(func) function. Gets called for each and every group and dataset.
+        Prints all groups and datasets including their content.
+        """
-    def tree_explore(self, name, obj):
        shift = name.count('/') * '    '
        data = self.file.get(name)
-        print(f"{shift}{name}: {data}")
+        logger.debug(f"{shift}{name}: {data}")
-        print(numpy.array(data))
+        logger.debug(numpy.array(data))
+    def _high_level_explorer(self, name, obj):
+        """
+        Called by the file.visititems(func) function. Gets called for each and every group and dataset.
+        Only lists the groups and datasets without the actual content.
+        """
+        shift = name.count('/') * '    '
+        data = self.file.get(name)
+        logger.debug(f"{shift}{name}: {data}")
-    def get_all_statistics(self):
+    def print_all_statistics_full(self):
        """
-        On the surface does a similar thing as tree_explore, but instead of exploring everything, has knowledge about the structure of the
+        Explores the file with knowledge of the file structure. assumes all top level groups are statistics
-        statistics hdf5 files. Namely it assumes that all
+        and that all statistics groups are made up of datasets.
+        Prints the groups, the datasets and the content of the datasets.
        """
        # List all groups
-        print("Keys: %s" % self.file.keys())
+        logger.debug("Keys: %s" % self.file.keys())
        for group_key in self.file.keys():
            dataset = list(self.file[group_key])
            for i in dataset:
                data = self.file.get(f"{group_key}/{i}")
-                print(numpy.array(data))
+                logger.debug(group_key)
+                logger.debug(numpy.array(data))
+    def print_all_statistics_top_level(self):
+        """
+        Explores the file with knowledge of the file structure. assumes all top level groups are statistics
+        and that all statistics groups are made up of datasets.
+        This function prints only the top level groups, AKA all the statistics collected. Useful when dealing with
+        potentially hundreds of statistics.
+        """
+        # List all groups
+        logger.debug("Listing all statistics stored in this file:")
+        for group_key in self.file.keys():
+            logger.debug(group_key)
 # create a data dumper that creates a new file every 10s (for testing)
 if __name__ == "__main__":
    args = parser.parse_args()
-    example = explorer(args.filename)
+    Explorer = explorer(args.filename)
-    example.get_all_statistics()
-    #example.explore()
+    """
+    Print the entire files content
+    """
+    Explorer.print_all_statistics_full()
+    """
+    Print only the names of all the statistics in this file
+    """
+    Explorer.print_all_statistics_top_level()