From c69c67e5aebe4a3e47e63af665cb41bd197a4c71 Mon Sep 17 00:00:00 2001
From: Alexander van Amesfoort <amesfoort@astron.nl>
Date: Mon, 24 Apr 2017 11:26:09 +0000
Subject: [PATCH] Task #9939: RA: translator.py: fix uv pipeline
 location/filename output for sap nrs > 0. Also clarify 2 estimator log msgs.

---
 .../lib/translator.py                         | 25 +++++++++++--------
 .../calibration_pipeline.py                   |  4 +--
 .../resource_estimators/pulsar_pipeline.py    |  2 +-
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py b/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py
index 3e70917d144..10f9c3b0d04 100755
--- a/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py
+++ b/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py
@@ -98,6 +98,7 @@ class RAtoOTDBTranslator():
         result = {}
 
         # Split storage_property_list in items wrt observation and wrt pipeline.
+        total_nr_files = 0
         obs_max_uv_sap_nr = -1
         obs_next_sb_nrs_per_sap = []
         obs_locations = [None] * 1024  # the easy way and 1024 is enough even in 4 bit mode
@@ -131,20 +132,22 @@ class RAtoOTDBTranslator():
                                                              prop['resource_name']) + '/uv'
                     obs_filenames[sb_nr] = "L%d_SAP%03d_SB%03d_uv.MS" % (otdb_id, sap_nr, sb_nr)
                     obs_skip.append("0")  # what's this for?
+                    total_nr_files += 1
                 obs_next_sb_nrs_per_sap[sap_nr] = sb_nr + 1
 
         if obs_max_uv_sap_nr >= 0:
-            total_nr_files = obs_next_sb_nrs_per_sap[-1]
-            if total_nr_files < 1:
+            if total_nr_files == 0:
                 raise Exception('CreateCorrelated: skipping obs parset filenames: total_nr_files = %d' % total_nr_files)
             logger.info('CreateCorrelated: total_nr_files = %d', total_nr_files)
 
-            obs_locations = obs_locations[ : total_nr_files]  # truncate trailing None init values
-            obs_filenames = obs_filenames[ : total_nr_files]  # idem
-            if None in obs_locations or None in obs_filenames:
-                logger.error('CreateCorrelated: skipping obs parset filenames: None in locations = %s', obs_locations)
-                logger.error('CreateCorrelated: skipping obs parset filenames: None in filenames = %s', obs_filenames)
-                raise Exception('CreateCorrelated: skipping obs parset filenames: None in locations and/or filenames')
+            obs_locations = [loc for loc in obs_locations if loc is not None]  # strip unused init values
+            obs_filenames = [fn  for fn  in obs_filenames if fn  is not None]  # idem
+            if len(obs_locations) != total_nr_files or len(obs_filenames) != total_nr_files:
+                # If the total nr_of_uv_files in a SAP does not correspond to the start_sb_nr
+                # props of this and the next SAP, entries have been overwritten. Bail in that case.
+                logger.error('CreateCorrelated: skipping obs parset filenames: len(obs_locations) = %d and/or len(obs_filenames) = %d not equal to total_nr_files = %d',
+                             len(obs_locations), len(obs_filenames), total_nr_files)
+                raise Exception('CreateCorrelated: skipping obs parset filenames: unexpected nr of locations and/or filenames vs total_nr_files')
 
             result[PREFIX + 'DataProducts.%s_Correlated.locations' % (io_type)] = '[' + to_csv_string(obs_locations) + ']'
             result[PREFIX + 'DataProducts.%s_Correlated.filenames' % (io_type)] = '[' + to_csv_string(obs_filenames) + ']'
@@ -369,7 +372,7 @@ class RAtoOTDBTranslator():
             if 'start_sb_nr' in prop:
                 sb_nr = prop['start_sb_nr']
 
-            otdb_id = sap['properties']['im_otdb_id']
+            otdb_id = prop['im_otdb_id']
             for _ in xrange(prop['nr_of_im_files']):
                 locations.append(self.locationPath(cluster, project_name, otdb_id,
                                                    prop['resource_name']) + '/im')
@@ -396,7 +399,7 @@ class RAtoOTDBTranslator():
             if 'nr_of_img_files' not in prop:
                 continue
 
-            otdb_id = sap['properties']['img_otdb_id']
+            otdb_id = prop['img_otdb_id']
             for _ in xrange(prop['nr_of_img_files']):
                 locations.append(self.locationPath(cluster, project_name, otdb_id,
                                                    prop['resource_name']) + '/img')
@@ -423,7 +426,7 @@ class RAtoOTDBTranslator():
             if 'nr_of_pulp_files' not in prop:
                 continue
 
-            otdb_id = sap['properties']['pulp_otdb_id']
+            otdb_id = prop['pulp_otdb_id']
             for _ in xrange(prop['nr_of_pulp_files']):
                 locations.append(self.locationPath(cluster, project_name, otdb_id,
                                                    prop['resource_name']) + '/pulp')
diff --git a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/calibration_pipeline.py b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/calibration_pipeline.py
index 1fd5610b3c1..ca2d4c243e5 100644
--- a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/calibration_pipeline.py
+++ b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/calibration_pipeline.py
@@ -162,7 +162,7 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
             uv_output_file_size = int(new_size + new_size / 64.0 * (1.0 + reduction_factor) + new_size / 2.0)
 
             nr_output_files = nr_input_files  # pure 'map' (bijective) operation, no split or reduce
-            logger.info("correlated_uv: {} files {} bytes each".format(nr_output_files, uv_output_file_size))
+            logger.info("correlated_uv: {}x {} files of {} bytes each".format(pred_est['resource_count'], nr_output_files, uv_output_file_size))
             estimate['output_files'] = {'uv': {'identification': output_ident_uv,
                     'properties': {'nr_of_uv_files': nr_output_files, 'uv_file_size': uv_output_file_size, 'start_sb_nr': start_sb_nr}}}
             data_size = uv_output_file_size
@@ -173,7 +173,7 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
                 logger.info("calculate instrument-model data size")
                 im_file_size = 1000  # TODO: 1 kB was hardcoded in the Scheduler
 
-                logger.info("correlated_im: {} files {} bytes each".format(nr_output_files, im_file_size))
+                logger.info("correlated_im: {}x {} files {} bytes each".format(pred_est['resource_count'], nr_output_files, im_file_size))
                 estimate['output_files']['im'] = {'identification': output_ident_im,
                         'properties': {'nr_of_im_files': nr_output_files, 'im_file_size': im_file_size, 'start_sb_nr': start_sb_nr}}
                 # FIXME I don't think this is technically correct, as the IM files are sometimes created and used, just not a required export?
diff --git a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/pulsar_pipeline.py b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/pulsar_pipeline.py
index 82dd7e52515..4a8b6b6a903 100644
--- a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/pulsar_pipeline.py
+++ b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/pulsar_pipeline.py
@@ -116,7 +116,7 @@ class PulsarPipelineResourceEstimator(BasePipelineResourceEstimator):
             logger.debug("calculate pulp data size")
             pulp_file_size = 1000  # TODO: 1 kB was hardcoded in the Scheduler
 
-            logger.info("pulsar_pipeline pulp: {} files {} bytes each".format(nr_output_files, pulp_file_size))
+            logger.info("pulsar_pipeline pulp: {}x {} files {} bytes each".format(pred_est['resource_count'], nr_output_files, pulp_file_size))
             estimate['output_files'] = {'pulp': {'identification': output_ident_pulp,
                                                  'properties': {'nr_of_pulp_files': nr_output_files,  # times pred_est['resource_count'] in total
                                                                 'pulp_file_size': pulp_file_size}}}
-- 
GitLab