Task #9939: RA estimator: address Adriaan's review comments + a few small cleanups of my own

82a89225 · Alexander van Amesfoort · 8862c83f · 82a89225 · 82a89225 · 82a89225
Commit 82a89225 authored 7 years ago by Alexander van Amesfoort
--- a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/base_pipeline_estimator.py
+++ b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/base_pipeline_estimator.py
@@ -46,7 +46,7 @@ class BasePipelineResourceEstimator(BaseResourceEstimator):
        except Exception as e:
            logger.error(e)
            logger.info("Could not get duration from parset, returning default pipeline duration of 1 hour")
-            return 3600
+            return 3600.0

    def _getOutputIdentification(self, identifications):
        """ For pipeline output, there must be exactly 1 (non-duplicate) identification string per

--- a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/base_resource_estimator.py
+++ b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/base_resource_estimator.py
@@ -34,7 +34,7 @@ logger = logging.getLogger(__name__)


 class BaseResourceEstimator(object):
-    """ Base class for all other resource estimater classes
+    """ Base class for all other resource estimator classes
    """
    def __init__(self, name):
        self.name = name
@@ -51,11 +51,14 @@ class BaseResourceEstimator(object):
        return True

    def _getDuration(self, start, end):
+        """ Returns number of fractional seconds as a float(!) (as totalSeconds())
+            between start and end.
+        """
        startTime = parseDatetime(start)
        endTime = parseDatetime(end)
        if startTime >= endTime:
            logger.warning("startTime is not before endTime")
-            return 1 ##TODO To prevent divide by zero later
+            return 1.0 ##TODO To prevent divide by zero later
        return totalSeconds(endTime - startTime)
        #TODO check if this makes duration = int(parset.get('duration', 0)) as a key reduntant?

@@ -81,7 +84,7 @@ class BaseResourceEstimator(object):
            input_files[dptype].append(copy.deepcopy(dt_values))

            # Observation estimates have resource_count > 1 to be able to assign each output to another resource,
-            # but that is not supported atm for pipelines. We only use input params to produce parset filenames etc,
+            # but that is currently not supported for pipelines. We only use input parameters to produce parset filenames etc,
            # but not to reserve resources (not covered by resource count). Collapse to implied resource_count of 1.
            input_files[dptype][-1]['properties']['nr_of_' + dptype + '_files'] *= predecessor_estimate['resource_count']
            return True
@@ -90,7 +93,7 @@ class BaseResourceEstimator(object):

    def get_inputs_from_predecessors(self, predecessor_estimates, identifications, dptype):
        """ Return copy of parts with dptype in predecessor_estimates matching identifications
-            If any of any of identifications could not be found, the empty dict is returned.
+            If any of identifications could not be found, the empty dict is returned.
            dptype is one of the observation/pipeline data product types, e.g. 'uv', 'cs', 'pulp', ...
            No duplicates in the identifications iterable!


--- a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/calibration_pipeline.py
+++ b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/calibration_pipeline.py
@@ -63,8 +63,15 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
           'resource_count': 20, 'root_resource_group': 'CEP4',
           'output_files': {
             'uv': [{'sap_nr': 2, 'identification': 'mom.G777955.B2.1.C.SAP002.uv.dps',
-                     'properties': {'uv_file_size': 1073741824, 'nr_of_uv_files': 1, 'start_sb_nr': 0}},
-                    {'sap_nr': 3, 'identification': 'mom.G777955.B2.1.C.SAP003.uv.dps',
+                     'properties': {'uv_file_size': 1073741824, 'nr_of_uv_files': 1, 'start_sb_nr': 0}}
+                   ]
+           }
+         },
+         {
+           'resource_types': {'bandwidth': 286331153, 'storage': 1073741824},  # per 'uv' dict
+           'resource_count': 20, 'root_resource_group': 'CEP4',
+           'output_files': {
+             'uv': [{'sap_nr': 3, 'identification': 'mom.G777955.B2.1.C.SAP003.uv.dps',
                     'properties': {'uv_file_size': 1073741824, 'nr_of_uv_files': 1, 'start_sb_nr': 20}}
                   ]
           }
@@ -81,7 +88,8 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
          'resource_types': {'bandwidth': 2236995 * 20, 'storage': 67109864 * 20},
          'resource_count': 1, 'root_resource_group': 'CEP4',

-          # input resources not (yet) allocated: bandwidth only, but coupled to specific storage resource
+          # Note that the 2 predecessor estimates have been converted into an input 'uv' list. This works,
+          # as long as input resources are not (yet) scheduled. Currently, resource_* values apply to output_files only.
          'input_files': {
            'uv': [{'sap_nr': 2, 'identification': 'mom.G777955.B2.1.C.SAP002.uv.dps',  # w/ sap only if predecessor is an observation
                    'properties': {'uv_file_size': 1073741824, 'nr_of_uv_files': 20, 'start_sb_nr': 0}},
@@ -110,19 +118,19 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):

        For each estimate, the total output_files resources to be claimed is resource_count * resources_types.
        Thus resource_types is a total across all output_files content. The idea is to keep this
-        singular per data product type (inner list len 1), but for pipelines this is not possible atm.
+        singular per data product type (inner list size 1), but for pipelines this is currently not possible.

-        Note that atm input_files resources are not included or claimed.
-        However, input_files properties must be added to resource claims to later generate the parset.
+        Note that input_files resources are currently not included or claimed.
+        However, input_files properties must be added to resource claims to later generate parset values.
        This caveat must be fixed at some point, but until then, we cannot have input_files-only estimates.
-        (After it is fixed, we should not have that either; it makes no sense.)
+        (After it is fixed, we should not have input_files-only estimates either; it makes no sense.)

-        For pipelines we don't support output to multiple storage areas atm, so resource_count is 1.
+        For pipelines we currently do not support output to multiple storage areas, so resource_count is 1.
        We still have to deal with input_files from an observation with >1 SAP (used for the pulsar pipeline).
        For this case, we generate 1 estimate, but use a list per data product type (e.g. 'uv': [...]).
        Also, we may need multiple data product types in one pipeline estimate, but there the reason
-        is that e.g. 'uv' and 'im' file(s) belong together, so we must produce one estimate per pair,
-        (but again, it's a pipeline so atm it is collapsed to a single estimate, i.e. resource_count 1).
+        is that e.g. 'uv' and 'im' files belong together, so we produce one estimate per pair,
+        (but again, it is a pipeline so currently it is collapsed to a single estimate, thus resource_count 1).
        The inner data product type list can be removed once pipelines also use resource_count > 1.

        Some RA_Services design aspects work well. Others fail to capture the underlying concepts close enough, hence inelegance.
@@ -147,10 +155,11 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
                                     parset.getString('Observation.stopTime'))

        input_idents_uv = parset.getStringVector(DATAPRODUCTS + 'Input_Correlated.identifications')
-        input_files = self.get_inputs_from_predecessors(predecessor_estimates, input_idents_uv, 'uv')
-        if not input_files:
+        input_files_uv = self.get_inputs_from_predecessors(predecessor_estimates, input_idents_uv, 'uv')
+        if not input_files_uv:
            logger.error('Missing uv dataproducts in predecessor output_files')
            result['errors'].append('Missing uv dataproducts in predecessor output_files')
+        input_files = input_files_uv

        have_im_input = parset.getBool(DATAPRODUCTS + 'Input_InstrumentModel.enabled')
        if have_im_input:
@@ -166,8 +175,8 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):

        estimate = {'input_files': input_files}

-        # NOTE: input bandwidth is not included in the resulting estimate atm.
-        # Proper input bandwidth estimation has limited use atm and is tricky, because of pipeline duration est, tmp files,
+        # NOTE: input bandwidth is currently not included in the resulting estimate.
+        # Proper input bandwidth estimation has limited use currently and is tricky, because of pipeline duration estimation, tmp files,
        # multiple passes, nr nodes and caching, but for sure also because bandwidth must be tied to *predecessor* storage!
        #input_cluster_uv = parset.getString(DATAPRODUCTS + 'Input_Correlated.storageClusterName')

@@ -179,19 +188,20 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):

            output_cluster_im = parset.getString(DATAPRODUCTS + 'Output_InstrumentModel.storageClusterName')
            if output_cluster_uv != output_cluster_im:
-                logger.warn('storageClusterName differs between uv: \'%s\' and im: \'%s\': to be packed in 1 estimate, so ignoring \'im\' storageClusterName',
-                            output_cluster_uv, output_cluster_im)
+                logger.error('Output_InstrumentModel is enabled, but its storageClusterName \'%s\' differs from Output_Correlated.storageClusterName \'%s\'',
+                             output_cluster_uv, output_cluster_im)
+                result['errors'].append('Output_InstrumentModel is enabled, but its storageClusterName \'%s\' differs from Output_Correlated.storageClusterName \'%s\'' % (output_cluster_im, output_cluster_uv))

-        # Observations can have multiple output estimates, but atm pipelines do not.
-        # (Reason: incomplete info avail and effective assigner claim merging is harder)
-        # As long as this is the case, try to do a best effort to map any predecessor (obs or pipeline) to single estimate output.
+        # Observations can have multiple output estimates, but currently pipelines do not.
+        # (Reason: incomplete info available and effective assigner claim merging is harder)
+        # As long as this is the case, try to do a best effort to map any predecessor (observation or pipeline) to single estimate output.
        nr_input_files = sum([uv_dict['properties']['nr_of_uv_files'] for uv_dict in input_files['uv']])

        # Assume all uv file sizes are the same size as in dict 0. For uv data, we never had pipelines with >1 dict,
        # but this could be meaningful when averaging multiple SAPs in 1 go (and no further processing steps).
        # (Never done, since subsequent pipeline steps must then also work on all SAPs. But averaging could be the last step.)
-        # The potential other case is >1 dict from different obs with different file sizes.
-        # In general, this requires >1 output est dict, which the estimate fmt allows, but atm is only used for observations.
+        # The potential other case is >1 dict from different observations with different file sizes.
+        # In general, this requires >1 output estimate dict, which the estimate format allows, but is currently only used for observations.
        uv_input_file_size = input_files['uv'][0]['properties']['uv_file_size']

        # For start_sb_nr, take the minimum of all start_sb_nr values.
@@ -201,8 +211,8 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
        start_sb_nr = min([uv_dict['properties']['start_sb_nr'] for uv_dict in input_files['uv']])

        # TODO: This output file size calculation comes from the (old) Scheduler without explaining comments.
-        # The reason why it isn't a simple div, is that parts of the metadata are not reduced in size (and casacore storage mgrs).
-        # With reduction_factor 1, computed output size increases by 53%. Casacore storage mgrs may change size, but that much?!?
+        # The reason why it isn't a simple division, is that parts of the metadata are not reduced in size (and casacore storage managers).
+        # With reduction_factor 1, computed output size increases by 53%... Casacore storage managers may change size, but that much?!?
        # If you can figure out what/how, please fix this calculation. Avoid unnamed magic values and document why!
        logger.debug("calculate correlated data size")
        new_size = uv_input_file_size / float(reduction_factor)
@@ -227,15 +237,15 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
            # Need to split averaging pipeline and calibration pipeline
            data_size += im_file_size

-        data_size *= nr_output_files  # bytes
-        if data_size:
-            bandwidth = int(ceil(8 * data_size / duration))  # bits/second
-            estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': data_size}
+        total_data_size = data_size * nr_output_files  # bytes
+        if total_data_size:
+            bandwidth = int(ceil(8 * total_data_size / duration))  # bits/second
+            estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': total_data_size}
            estimate['resource_count'] = 1
            estimate['root_resource_group'] = output_cluster_uv
        else:
-            logger.error('An estimate of zero was calculated!')
-            result['errors'].append('An estimate of zero was calculated!')
+            logger.error('Estimated total data size is zero!')
+            result['errors'].append('Estimated total data size is zero!')

        result['estimates'].append(estimate)


--- a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/image_pipeline.py
+++ b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/image_pipeline.py
@@ -89,14 +89,14 @@ class ImagePipelineResourceEstimator(BasePipelineResourceEstimator):

        estimate = {'input_files': input_files}

-        # NOTE: input bandwidth is not included in the resulting estimate atm.
+        # NOTE: input bandwidth is currently not included in the resulting estimate.
        # Proper input bandwidth est has limited use and is tricky, because of pipeline duration est, tmp files, multiple passes, nr nodes and caching, ...
        #input_cluster_uv = parset.getString(DATAPRODUCTS + 'Input_Correlated.storageClusterName')

        output_ident_img = self._getOutputIdentification( parset.getStringVector(DATAPRODUCTS + 'Output_SkyImage.identifications') )
        output_cluster_img = parset.getString(DATAPRODUCTS + 'Output_SkyImage.storageClusterName')

-        # See the calibration pipeline estimator for why this is done in this way atm.
+        # See the calibration pipeline estimator for why this is currently done this way.
        nr_input_subbands = sum([uv_dict['properties']['nr_of_uv_files'] for uv_dict in input_files['uv']])
        uv_file_size = input_files['uv'][0]['properties']['uv_file_size']
        if nr_input_subbands % (subbands_per_image * slices_per_image) > 0:
@@ -112,16 +112,15 @@ class ImagePipelineResourceEstimator(BasePipelineResourceEstimator):
                                             'properties': {'nr_of_img_files': nr_images,
                                                            'img_file_size': img_file_size}}]}

-        # count total data size
-        data_size = nr_images * img_file_size  # bytes
-        if data_size:
-            bandwidth = int(ceil(8 * data_size / duration))  # bits/second
-            estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': data_size}
+        total_data_size = nr_images * img_file_size  # bytes
+        if total_data_size:
+            bandwidth = int(ceil(8 * total_data_size / duration))  # bits/second
+            estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': total_data_size}
            estimate['resource_count'] = 1
            estimate['root_resource_group'] = output_cluster_img
        else:
-            logger.error('An estimate of zero was calculated!')
-            result['errors'].append('An estimate of zero was calculated!')
+            logger.error('Estimated total data size is zero!')
+            result['errors'].append('Estimated total data size is zero!')

        result['estimates'].append(estimate)


--- a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/longbaseline_pipeline.py
+++ b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/longbaseline_pipeline.py
@@ -89,14 +89,14 @@ class LongBaselinePipelineResourceEstimator(BasePipelineResourceEstimator):

        estimate = {'input_files': input_files}

-        # NOTE: input bandwidth is not included in the resulting estimate atm.
+        # NOTE: input bandwidth is currently not included in the resulting estimate.
        # Proper input bandwidth est has limited use and is tricky, because of pipeline duration est, tmp files, multiple passes, nr nodes and caching, ...
        #input_cluster_uv = parset.getString(DATAPRODUCTS + 'Input_Correlated.storageClusterName')

        output_ident_uv = self._getOutputIdentification( parset.getStringVector(DATAPRODUCTS + 'Output_Correlated.identifications') )
        output_cluster_uv = parset.getString(DATAPRODUCTS + 'Output_Correlated.storageClusterName')

-        # See the calibration pipeline estimator for why this is done in this way atm.
+        # See the calibration pipeline estimator for why this is currently done this way.
        nr_input_files = sum([uv_dict['properties']['nr_of_uv_files'] for uv_dict in input_files['uv']])
        uv_input_file_size = input_files['uv'][0]['properties']['uv_file_size']
        start_sb_nr = min([uv_dict['properties']['start_sb_nr'] for uv_dict in input_files['uv']])
@@ -119,16 +119,15 @@ class LongBaselinePipelineResourceEstimator(BasePipelineResourceEstimator):
                                                           'uv_file_size': uv_output_file_size,
                                                           'start_sbg_nr': start_sbg_nr}}]}

-        # count total data size
-        data_size = nr_output_files * uv_output_file_size  # bytes
-        if data_size:
-            bandwidth = int(ceil(8 * data_size / duration))  # bits/second
-            estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': data_size}
+        total_data_size = nr_output_files * uv_output_file_size  # bytes
+        if total_data_size:
+            bandwidth = int(ceil(8 * total_data_size / duration))  # bits/second
+            estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': total_data_size}
            estimate['resource_count'] = 1
            estimate['root_resource_group'] = output_cluster_uv
        else:
-            logger.error('An estimate of zero was calculated!')
-            result['errors'].append('An estimate of zero was calculated!')
+            logger.error('Estimated total data size is zero!')
+            result['errors'].append('Estimated total data size is zero!')

        result['estimates'].append(estimate)


--- a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/observation.py
+++ b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/observation.py
--- a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/pulsar_pipeline.py
+++ b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/pulsar_pipeline.py
@@ -96,7 +96,7 @@ class PulsarPipelineResourceEstimator(BasePipelineResourceEstimator):

        estimate = {'input_files': input_files}

-        # NOTE: input bandwidth is not included in the resulting estimate atm.
+        # NOTE: input bandwidth is currently not included in the resulting estimate.
        # Proper input bandwidth est has limited use and is tricky, because of pipeline duration est, tmp files, multiple passes, nr nodes and caching, ...
        #input_cluster_cs = parset.getString(DATAPRODUCTS + 'Input_CoherentStokes.storageClusterName')
        #input_cluster_is = parset.getString(DATAPRODUCTS + 'Input_IncoherentStokes.storageClusterName')
@@ -105,7 +105,7 @@ class PulsarPipelineResourceEstimator(BasePipelineResourceEstimator):
        output_cluster_pulp = parset.getString(DATAPRODUCTS + 'Output_Pulsar.storageClusterName')

        # The pulsar pipeline ('pulp') produces 1 data product per tied-array beam, it seems also for complex voltages (XXYY) and stokes IQUV(?).
-        # For XXYY it really needs all 4 components at once. For IQUV this is less important, but atm we treat it the same (1 obs output estimate).
+        # For XXYY it really needs all 4 components at once. For IQUV this is less important, but currently we treat it the same (1 obs output estimate).
        # Note that it also produces 1 additional "summary" data product per data product *type* (i.e. 1 for 'cs' and/or 1 for 'is'),
        # but the RA_Services sub-system does not know about it. Adding support may be a waste of time(?).
        # Currently, RO controlled pulp grabs all inputs given some project name/id(?) and obs id, not from rotspservice generated parset parts.
@@ -126,15 +126,15 @@ class PulsarPipelineResourceEstimator(BasePipelineResourceEstimator):
                                                             'pulp_file_size': pulp_file_size}}]}

        # count total data size
-        data_size = nr_input_files * pulp_file_size
-        if data_size > 0:
-            bandwidth = int(ceil(8 * data_size / duration))  # bits/second
-            estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': data_size}
+        total_data_size = nr_input_files * pulp_file_size
+        if total_data_size > 0:
+            bandwidth = int(ceil(8 * total_data_size / duration))  # bits/second
+            estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': total_data_size}
            estimate['resource_count'] = 1
            estimate['root_resource_group'] = output_cluster_pulp
        else:
-            logger.error('An estimate of zero was calculated!')
-            result['errors'].append('An estimate of zero was calculated!')
+            logger.error('Estimated total data size is zero!')
+            result['errors'].append('Estimated total data size is zero!')

        result['estimates'].append(estimate)


--- a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/reservation.py
+++ b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/reservation.py
@@ -50,7 +50,8 @@ class ReservationResourceEstimator(BaseResourceEstimator):
        logger.info("start estimate '{}'".format(self.name))
        logger.info('parset: %s ' % parset)
        # NOTE: Observation.stopTime may differ from real stop time, because of Cobalt block size not being exactly 1.0 s.
-        duration = self._getDuration(parset.getString('Observation.startTime'), parset.getString('Observation.stopTime'))
+        duration = self._getDuration(parset.getString('Observation.startTime'),
+                                     parset.getString('Observation.stopTime'))

        errors = []
        estimates = []
@@ -85,7 +86,7 @@ class ReservationResourceEstimator(BaseResourceEstimator):

            rsps, channelbits = self._max_rsps(station)

-            bitfield = len(rculists[station])*'1' # claim all RCUs irrespective of use in given antennaset, we actually only need the AntennasetsParser to obatin the numbe rof RCUs
+            bitfield = len(rculists[station])*'1' # claim all RCUs irrespective of use in given antennaset, we actually only need the AntennasetsParser to obtain the number of RCUs

            est = {'resource_types': {'rcu': bitfield},
                   'resource_count': 1,

--- a/SAS/ResourceAssignment/ResourceAssignmentEstimator/service.py
+++ b/SAS/ResourceAssignment/ResourceAssignmentEstimator/service.py
@@ -89,13 +89,22 @@ class ResourceEstimatorHandler(MessageHandlerInterface):
                predecessor_estimates = []
                for branch_otdb_id, branch_estimate in branch_estimates.items():
                    logger.info('Looking at predecessor %s' % branch_otdb_id)
+
                    estimates = branch_estimate.values()[0]['estimates']
-                    if any(['uv' in est['output_files'] and 'im' not in est['output_files'] for est in estimates if 'output_files' in est]):  # Not a calibrator pipeline
-                        logger.info('found %s as the target of pipeline %s' % (branch_otdb_id, otdb_id))
-                        predecessor_estimates.extend(estimates)
-                    elif any(['im' in est['output_files'] for est in estimates if 'output_files' in est]):
-                        logger.info('found %s as the calibrator of pipeline %s' % (branch_otdb_id, otdb_id))
-                        predecessor_estimates.extend(estimates)
+                    for est in estimates:
+                        if 'output_files' not in est:
+                            continue
+                        has_uv = 'uv' in est['output_files']
+                        has_im = 'im' in est['output_files']
+                        if has_uv and not has_im:  # Not a calibrator pipeline
+                            logger.info('found %s as the target of pipeline %s' % (branch_otdb_id, otdb_id))
+                            predecessor_estimates.extend(estimates)
+                            break
+                        elif has_im:
+                            logger.info('found %s as the calibrator of pipeline %s' % (branch_otdb_id, otdb_id))
+                            predecessor_estimates.extend(estimates)
+                            break
+
                return {str(otdb_id): self.add_id(self.calibration_pipeline.verify_and_estimate(parset, predecessor_estimates), otdb_id)}

            if len(branch_estimates) > 1: