From b5d56a7460823e3ccda59018403bb408dbfc1f10 Mon Sep 17 00:00:00 2001
From: Jorrit Schaap <schaap@astron.nl>
Date: Tue, 26 Jun 2018 07:18:48 +0000
Subject: [PATCH] SW-378: processed review comment: added clarification
 comments

---
 QA/QA_Common/bin/find_hdf5 | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/QA/QA_Common/bin/find_hdf5 b/QA/QA_Common/bin/find_hdf5
index 78694e0ece9..e5fe0540bda 100755
--- a/QA/QA_Common/bin/find_hdf5
+++ b/QA/QA_Common/bin/find_hdf5
@@ -86,10 +86,12 @@ if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
                         level=logging.DEBUG if options.verbose else logging.WARN)
 
+    # parse the options; if no specific filter for either obs or pipeline was given, then search for both.
     if not options.observation and not options.pipeline:
         options.observation = True
         options.pipeline = True
 
+    # parse the options; if no specific filter for either lba or hba was given, then search for both.
     if options.lba or options.hba:
         options.observation = True
         options.pipeline = False
@@ -105,6 +107,7 @@ if __name__ == '__main__':
 
     info_dicts = {}
 
+    # gather all info_dicts for all files...
     for file in files:
         try:
             info_dict = read_info_dict(file)
@@ -113,52 +116,64 @@ if __name__ == '__main__':
         except:
             pass
 
+    # ...and filter out the files that do not match the search filters
+
     if not (options.observation and options.pipeline):
         if options.observation:
+            # keep observations
             files = [f for f in files
                      if f in info_dicts and 'observation' in info_dicts[f].get('type', '').lower()]
 
         if options.pipeline:
+            # keep pipelines
             files = [f for f in files
                      if f in info_dicts and 'pipeline' in info_dicts[f].get('type', '').lower()]
 
 
     if not (options.lba and options.hba):
         if options.lba:
+            # keep lba
             files = [f for f in files
                      if f in info_dicts and 'lba' in info_dicts[f].get('antenna_array', '').lower()]
 
         if options.hba:
+            # keep hba
             files = [f for f in files
                      if f in info_dicts and 'hba' in info_dicts[f].get('antenna_array', '').lower()]
 
     if options.name:
+        # keep matching task names
         files = [f for f in files if f in info_dicts and
                  fnmatch.fnmatch(info_dicts[f].get('name', '').lower(), options.name.lower())]
 
     if options.project:
+        # keep matching project names
         files = [f for f in files if f in info_dicts and
                  (fnmatch.fnmatch(info_dicts[f].get('project', '').lower(), options.project.lower()) or
                   fnmatch.fnmatch(info_dicts[f].get('project_description', '').lower(), options.project.lower()))]
 
     if options.date:
+        # keep matching date
         options.date = datetime.strptime(options.date, '%Y-%m-%d').date()
         files = [f for f in files if f in info_dicts and
                  'start_time' in info_dicts[f] and info_dicts[f]['start_time'].date() == options.date]
 
     if options.min_duration:
+        # keep matching duration
         hours, sep, minutes = options.min_duration.partition(':')
         options.min_duration = timedelta(hours=int(hours), minutes=int(minutes))
         files = [f for f in files if f in info_dicts and
                  'stop_time' in info_dicts[f] and info_dicts[f]['stop_time'].date() == options.date]
 
     if options.max_duration:
+        # keep matching duration
         hours, sep, minutes = options.max_duration.partition(':')
         options.max_duration = timedelta(hours=int(hours), minutes=int(minutes))
         files = [f for f in files
                  if f in info_dicts and info_dicts[f].get('duration', timedelta()) <= options.max_duration]
 
     if options.clusters or options.no_clusters:
+        # keep matching have/havenot clusters
         def has_clusters(h5_path):
             with h5py.File(h5_path, "r+") as file:
                 return len(file.get('clustering',{}))
@@ -169,10 +184,15 @@ if __name__ == '__main__':
         if options.no_clusters:
             files = [f for f in files if not has_clusters(f)]
 
+    # the final files list now contains only the files matching all given filters.
+
+    # lastly, print the results...
     if options.info:
+        # print the full file info
         for file in files:
             print read_info_from_hdf5(file, read_data_info=False)
     else:
+        # just print the filtered filenames
         print '\n'.join(files)
 
 
-- 
GitLab