Skip to content
Snippets Groups Projects
Commit 82a89225 authored by Alexander van Amesfoort's avatar Alexander van Amesfoort
Browse files

Task #9939: RA estimator: address Adriaan's review comments + a few small cleanups of my own

parent 8862c83f
No related branches found
No related tags found
No related merge requests found
Showing
with 188 additions and 162 deletions
......@@ -46,7 +46,7 @@ class BasePipelineResourceEstimator(BaseResourceEstimator):
except Exception as e:
logger.error(e)
logger.info("Could not get duration from parset, returning default pipeline duration of 1 hour")
return 3600
return 3600.0
def _getOutputIdentification(self, identifications):
""" For pipeline output, there must be exactly 1 (non-duplicate) identification string per
......
......@@ -34,7 +34,7 @@ logger = logging.getLogger(__name__)
class BaseResourceEstimator(object):
""" Base class for all other resource estimater classes
""" Base class for all other resource estimator classes
"""
def __init__(self, name):
self.name = name
......@@ -51,11 +51,14 @@ class BaseResourceEstimator(object):
return True
def _getDuration(self, start, end):
""" Returns number of fractional seconds as a float(!) (as totalSeconds())
between start and end.
"""
startTime = parseDatetime(start)
endTime = parseDatetime(end)
if startTime >= endTime:
logger.warning("startTime is not before endTime")
return 1 ##TODO To prevent divide by zero later
return 1.0 ##TODO To prevent divide by zero later
return totalSeconds(endTime - startTime)
#TODO check if this makes duration = int(parset.get('duration', 0)) as a key reduntant?
......@@ -81,7 +84,7 @@ class BaseResourceEstimator(object):
input_files[dptype].append(copy.deepcopy(dt_values))
# Observation estimates have resource_count > 1 to be able to assign each output to another resource,
# but that is not supported atm for pipelines. We only use input params to produce parset filenames etc,
# but that is currently not supported for pipelines. We only use input parameters to produce parset filenames etc,
# but not to reserve resources (not covered by resource count). Collapse to implied resource_count of 1.
input_files[dptype][-1]['properties']['nr_of_' + dptype + '_files'] *= predecessor_estimate['resource_count']
return True
......@@ -90,7 +93,7 @@ class BaseResourceEstimator(object):
def get_inputs_from_predecessors(self, predecessor_estimates, identifications, dptype):
""" Return copy of parts with dptype in predecessor_estimates matching identifications
If any of any of identifications could not be found, the empty dict is returned.
If any of identifications could not be found, the empty dict is returned.
dptype is one of the observation/pipeline data product types, e.g. 'uv', 'cs', 'pulp', ...
No duplicates in the identifications iterable!
......
......@@ -63,8 +63,15 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
'resource_count': 20, 'root_resource_group': 'CEP4',
'output_files': {
'uv': [{'sap_nr': 2, 'identification': 'mom.G777955.B2.1.C.SAP002.uv.dps',
'properties': {'uv_file_size': 1073741824, 'nr_of_uv_files': 1, 'start_sb_nr': 0}},
{'sap_nr': 3, 'identification': 'mom.G777955.B2.1.C.SAP003.uv.dps',
'properties': {'uv_file_size': 1073741824, 'nr_of_uv_files': 1, 'start_sb_nr': 0}}
]
}
},
{
'resource_types': {'bandwidth': 286331153, 'storage': 1073741824}, # per 'uv' dict
'resource_count': 20, 'root_resource_group': 'CEP4',
'output_files': {
'uv': [{'sap_nr': 3, 'identification': 'mom.G777955.B2.1.C.SAP003.uv.dps',
'properties': {'uv_file_size': 1073741824, 'nr_of_uv_files': 1, 'start_sb_nr': 20}}
]
}
......@@ -81,7 +88,8 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
'resource_types': {'bandwidth': 2236995 * 20, 'storage': 67109864 * 20},
'resource_count': 1, 'root_resource_group': 'CEP4',
# input resources not (yet) allocated: bandwidth only, but coupled to specific storage resource
# Note that the 2 predecessor estimates have been converted into an input 'uv' list. This works,
# as long as input resources are not (yet) scheduled. Currently, resource_* values apply to output_files only.
'input_files': {
'uv': [{'sap_nr': 2, 'identification': 'mom.G777955.B2.1.C.SAP002.uv.dps', # w/ sap only if predecessor is an observation
'properties': {'uv_file_size': 1073741824, 'nr_of_uv_files': 20, 'start_sb_nr': 0}},
......@@ -110,19 +118,19 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
For each estimate, the total output_files resources to be claimed is resource_count * resources_types.
Thus resource_types is a total across all output_files content. The idea is to keep this
singular per data product type (inner list len 1), but for pipelines this is not possible atm.
singular per data product type (inner list size 1), but for pipelines this is currently not possible.
Note that atm input_files resources are not included or claimed.
However, input_files properties must be added to resource claims to later generate the parset.
Note that input_files resources are currently not included or claimed.
However, input_files properties must be added to resource claims to later generate parset values.
This caveat must be fixed at some point, but until then, we cannot have input_files-only estimates.
(After it is fixed, we should not have that either; it makes no sense.)
(After it is fixed, we should not have input_files-only estimates either; it makes no sense.)
For pipelines we don't support output to multiple storage areas atm, so resource_count is 1.
For pipelines we currently do not support output to multiple storage areas, so resource_count is 1.
We still have to deal with input_files from an observation with >1 SAP (used for the pulsar pipeline).
For this case, we generate 1 estimate, but use a list per data product type (e.g. 'uv': [...]).
Also, we may need multiple data product types in one pipeline estimate, but there the reason
is that e.g. 'uv' and 'im' file(s) belong together, so we must produce one estimate per pair,
(but again, it's a pipeline so atm it is collapsed to a single estimate, i.e. resource_count 1).
is that e.g. 'uv' and 'im' files belong together, so we produce one estimate per pair,
(but again, it is a pipeline so currently it is collapsed to a single estimate, thus resource_count 1).
The inner data product type list can be removed once pipelines also use resource_count > 1.
Some RA_Services design aspects work well. Others fail to capture the underlying concepts close enough, hence inelegance.
......@@ -147,10 +155,11 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
parset.getString('Observation.stopTime'))
input_idents_uv = parset.getStringVector(DATAPRODUCTS + 'Input_Correlated.identifications')
input_files = self.get_inputs_from_predecessors(predecessor_estimates, input_idents_uv, 'uv')
if not input_files:
input_files_uv = self.get_inputs_from_predecessors(predecessor_estimates, input_idents_uv, 'uv')
if not input_files_uv:
logger.error('Missing uv dataproducts in predecessor output_files')
result['errors'].append('Missing uv dataproducts in predecessor output_files')
input_files = input_files_uv
have_im_input = parset.getBool(DATAPRODUCTS + 'Input_InstrumentModel.enabled')
if have_im_input:
......@@ -166,8 +175,8 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
estimate = {'input_files': input_files}
# NOTE: input bandwidth is not included in the resulting estimate atm.
# Proper input bandwidth estimation has limited use atm and is tricky, because of pipeline duration est, tmp files,
# NOTE: input bandwidth is currently not included in the resulting estimate.
# Proper input bandwidth estimation has limited use currently and is tricky, because of pipeline duration estimation, tmp files,
# multiple passes, nr nodes and caching, but for sure also because bandwidth must be tied to *predecessor* storage!
#input_cluster_uv = parset.getString(DATAPRODUCTS + 'Input_Correlated.storageClusterName')
......@@ -179,19 +188,20 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
output_cluster_im = parset.getString(DATAPRODUCTS + 'Output_InstrumentModel.storageClusterName')
if output_cluster_uv != output_cluster_im:
logger.warn('storageClusterName differs between uv: \'%s\' and im: \'%s\': to be packed in 1 estimate, so ignoring \'im\' storageClusterName',
output_cluster_uv, output_cluster_im)
logger.error('Output_InstrumentModel is enabled, but its storageClusterName \'%s\' differs from Output_Correlated.storageClusterName \'%s\'',
output_cluster_uv, output_cluster_im)
result['errors'].append('Output_InstrumentModel is enabled, but its storageClusterName \'%s\' differs from Output_Correlated.storageClusterName \'%s\'' % (output_cluster_im, output_cluster_uv))
# Observations can have multiple output estimates, but atm pipelines do not.
# (Reason: incomplete info avail and effective assigner claim merging is harder)
# As long as this is the case, try to do a best effort to map any predecessor (obs or pipeline) to single estimate output.
# Observations can have multiple output estimates, but currently pipelines do not.
# (Reason: incomplete info available and effective assigner claim merging is harder)
# As long as this is the case, try to do a best effort to map any predecessor (observation or pipeline) to single estimate output.
nr_input_files = sum([uv_dict['properties']['nr_of_uv_files'] for uv_dict in input_files['uv']])
# Assume all uv file sizes are the same size as in dict 0. For uv data, we never had pipelines with >1 dict,
# but this could be meaningful when averaging multiple SAPs in 1 go (and no further processing steps).
# (Never done, since subsequent pipeline steps must then also work on all SAPs. But averaging could be the last step.)
# The potential other case is >1 dict from different obs with different file sizes.
# In general, this requires >1 output est dict, which the estimate fmt allows, but atm is only used for observations.
# The potential other case is >1 dict from different observations with different file sizes.
# In general, this requires >1 output estimate dict, which the estimate format allows, but is currently only used for observations.
uv_input_file_size = input_files['uv'][0]['properties']['uv_file_size']
# For start_sb_nr, take the minimum of all start_sb_nr values.
......@@ -201,8 +211,8 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
start_sb_nr = min([uv_dict['properties']['start_sb_nr'] for uv_dict in input_files['uv']])
# TODO: This output file size calculation comes from the (old) Scheduler without explaining comments.
# The reason why it isn't a simple div, is that parts of the metadata are not reduced in size (and casacore storage mgrs).
# With reduction_factor 1, computed output size increases by 53%. Casacore storage mgrs may change size, but that much?!?
# The reason why it isn't a simple division, is that parts of the metadata are not reduced in size (and casacore storage managers).
# With reduction_factor 1, computed output size increases by 53%... Casacore storage managers may change size, but that much?!?
# If you can figure out what/how, please fix this calculation. Avoid unnamed magic values and document why!
logger.debug("calculate correlated data size")
new_size = uv_input_file_size / float(reduction_factor)
......@@ -227,15 +237,15 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator):
# Need to split averaging pipeline and calibration pipeline
data_size += im_file_size
data_size *= nr_output_files # bytes
if data_size:
bandwidth = int(ceil(8 * data_size / duration)) # bits/second
estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': data_size}
total_data_size = data_size * nr_output_files # bytes
if total_data_size:
bandwidth = int(ceil(8 * total_data_size / duration)) # bits/second
estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': total_data_size}
estimate['resource_count'] = 1
estimate['root_resource_group'] = output_cluster_uv
else:
logger.error('An estimate of zero was calculated!')
result['errors'].append('An estimate of zero was calculated!')
logger.error('Estimated total data size is zero!')
result['errors'].append('Estimated total data size is zero!')
result['estimates'].append(estimate)
......
......@@ -89,14 +89,14 @@ class ImagePipelineResourceEstimator(BasePipelineResourceEstimator):
estimate = {'input_files': input_files}
# NOTE: input bandwidth is not included in the resulting estimate atm.
# NOTE: input bandwidth is currently not included in the resulting estimate.
# Proper input bandwidth est has limited use and is tricky, because of pipeline duration est, tmp files, multiple passes, nr nodes and caching, ...
#input_cluster_uv = parset.getString(DATAPRODUCTS + 'Input_Correlated.storageClusterName')
output_ident_img = self._getOutputIdentification( parset.getStringVector(DATAPRODUCTS + 'Output_SkyImage.identifications') )
output_cluster_img = parset.getString(DATAPRODUCTS + 'Output_SkyImage.storageClusterName')
# See the calibration pipeline estimator for why this is done in this way atm.
# See the calibration pipeline estimator for why this is currently done this way.
nr_input_subbands = sum([uv_dict['properties']['nr_of_uv_files'] for uv_dict in input_files['uv']])
uv_file_size = input_files['uv'][0]['properties']['uv_file_size']
if nr_input_subbands % (subbands_per_image * slices_per_image) > 0:
......@@ -112,16 +112,15 @@ class ImagePipelineResourceEstimator(BasePipelineResourceEstimator):
'properties': {'nr_of_img_files': nr_images,
'img_file_size': img_file_size}}]}
# count total data size
data_size = nr_images * img_file_size # bytes
if data_size:
bandwidth = int(ceil(8 * data_size / duration)) # bits/second
estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': data_size}
total_data_size = nr_images * img_file_size # bytes
if total_data_size:
bandwidth = int(ceil(8 * total_data_size / duration)) # bits/second
estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': total_data_size}
estimate['resource_count'] = 1
estimate['root_resource_group'] = output_cluster_img
else:
logger.error('An estimate of zero was calculated!')
result['errors'].append('An estimate of zero was calculated!')
logger.error('Estimated total data size is zero!')
result['errors'].append('Estimated total data size is zero!')
result['estimates'].append(estimate)
......
......@@ -89,14 +89,14 @@ class LongBaselinePipelineResourceEstimator(BasePipelineResourceEstimator):
estimate = {'input_files': input_files}
# NOTE: input bandwidth is not included in the resulting estimate atm.
# NOTE: input bandwidth is currently not included in the resulting estimate.
# Proper input bandwidth est has limited use and is tricky, because of pipeline duration est, tmp files, multiple passes, nr nodes and caching, ...
#input_cluster_uv = parset.getString(DATAPRODUCTS + 'Input_Correlated.storageClusterName')
output_ident_uv = self._getOutputIdentification( parset.getStringVector(DATAPRODUCTS + 'Output_Correlated.identifications') )
output_cluster_uv = parset.getString(DATAPRODUCTS + 'Output_Correlated.storageClusterName')
# See the calibration pipeline estimator for why this is done in this way atm.
# See the calibration pipeline estimator for why this is currently done this way.
nr_input_files = sum([uv_dict['properties']['nr_of_uv_files'] for uv_dict in input_files['uv']])
uv_input_file_size = input_files['uv'][0]['properties']['uv_file_size']
start_sb_nr = min([uv_dict['properties']['start_sb_nr'] for uv_dict in input_files['uv']])
......@@ -119,16 +119,15 @@ class LongBaselinePipelineResourceEstimator(BasePipelineResourceEstimator):
'uv_file_size': uv_output_file_size,
'start_sbg_nr': start_sbg_nr}}]}
# count total data size
data_size = nr_output_files * uv_output_file_size # bytes
if data_size:
bandwidth = int(ceil(8 * data_size / duration)) # bits/second
estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': data_size}
total_data_size = nr_output_files * uv_output_file_size # bytes
if total_data_size:
bandwidth = int(ceil(8 * total_data_size / duration)) # bits/second
estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': total_data_size}
estimate['resource_count'] = 1
estimate['root_resource_group'] = output_cluster_uv
else:
logger.error('An estimate of zero was calculated!')
result['errors'].append('An estimate of zero was calculated!')
logger.error('Estimated total data size is zero!')
result['errors'].append('Estimated total data size is zero!')
result['estimates'].append(estimate)
......
......@@ -96,7 +96,7 @@ class PulsarPipelineResourceEstimator(BasePipelineResourceEstimator):
estimate = {'input_files': input_files}
# NOTE: input bandwidth is not included in the resulting estimate atm.
# NOTE: input bandwidth is currently not included in the resulting estimate.
# Proper input bandwidth est has limited use and is tricky, because of pipeline duration est, tmp files, multiple passes, nr nodes and caching, ...
#input_cluster_cs = parset.getString(DATAPRODUCTS + 'Input_CoherentStokes.storageClusterName')
#input_cluster_is = parset.getString(DATAPRODUCTS + 'Input_IncoherentStokes.storageClusterName')
......@@ -105,7 +105,7 @@ class PulsarPipelineResourceEstimator(BasePipelineResourceEstimator):
output_cluster_pulp = parset.getString(DATAPRODUCTS + 'Output_Pulsar.storageClusterName')
# The pulsar pipeline ('pulp') produces 1 data product per tied-array beam, it seems also for complex voltages (XXYY) and stokes IQUV(?).
# For XXYY it really needs all 4 components at once. For IQUV this is less important, but atm we treat it the same (1 obs output estimate).
# For XXYY it really needs all 4 components at once. For IQUV this is less important, but currently we treat it the same (1 obs output estimate).
# Note that it also produces 1 additional "summary" data product per data product *type* (i.e. 1 for 'cs' and/or 1 for 'is'),
# but the RA_Services sub-system does not know about it. Adding support may be a waste of time(?).
# Currently, RO controlled pulp grabs all inputs given some project name/id(?) and obs id, not from rotspservice generated parset parts.
......@@ -126,15 +126,15 @@ class PulsarPipelineResourceEstimator(BasePipelineResourceEstimator):
'pulp_file_size': pulp_file_size}}]}
# count total data size
data_size = nr_input_files * pulp_file_size
if data_size > 0:
bandwidth = int(ceil(8 * data_size / duration)) # bits/second
estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': data_size}
total_data_size = nr_input_files * pulp_file_size
if total_data_size > 0:
bandwidth = int(ceil(8 * total_data_size / duration)) # bits/second
estimate['resource_types'] = {'bandwidth': bandwidth, 'storage': total_data_size}
estimate['resource_count'] = 1
estimate['root_resource_group'] = output_cluster_pulp
else:
logger.error('An estimate of zero was calculated!')
result['errors'].append('An estimate of zero was calculated!')
logger.error('Estimated total data size is zero!')
result['errors'].append('Estimated total data size is zero!')
result['estimates'].append(estimate)
......
......@@ -50,7 +50,8 @@ class ReservationResourceEstimator(BaseResourceEstimator):
logger.info("start estimate '{}'".format(self.name))
logger.info('parset: %s ' % parset)
# NOTE: Observation.stopTime may differ from real stop time, because of Cobalt block size not being exactly 1.0 s.
duration = self._getDuration(parset.getString('Observation.startTime'), parset.getString('Observation.stopTime'))
duration = self._getDuration(parset.getString('Observation.startTime'),
parset.getString('Observation.stopTime'))
errors = []
estimates = []
......@@ -85,7 +86,7 @@ class ReservationResourceEstimator(BaseResourceEstimator):
rsps, channelbits = self._max_rsps(station)
bitfield = len(rculists[station])*'1' # claim all RCUs irrespective of use in given antennaset, we actually only need the AntennasetsParser to obatin the numbe rof RCUs
bitfield = len(rculists[station])*'1' # claim all RCUs irrespective of use in given antennaset, we actually only need the AntennasetsParser to obtain the number of RCUs
est = {'resource_types': {'rcu': bitfield},
'resource_count': 1,
......
......@@ -89,13 +89,22 @@ class ResourceEstimatorHandler(MessageHandlerInterface):
predecessor_estimates = []
for branch_otdb_id, branch_estimate in branch_estimates.items():
logger.info('Looking at predecessor %s' % branch_otdb_id)
estimates = branch_estimate.values()[0]['estimates']
if any(['uv' in est['output_files'] and 'im' not in est['output_files'] for est in estimates if 'output_files' in est]): # Not a calibrator pipeline
logger.info('found %s as the target of pipeline %s' % (branch_otdb_id, otdb_id))
predecessor_estimates.extend(estimates)
elif any(['im' in est['output_files'] for est in estimates if 'output_files' in est]):
logger.info('found %s as the calibrator of pipeline %s' % (branch_otdb_id, otdb_id))
predecessor_estimates.extend(estimates)
for est in estimates:
if 'output_files' not in est:
continue
has_uv = 'uv' in est['output_files']
has_im = 'im' in est['output_files']
if has_uv and not has_im: # Not a calibrator pipeline
logger.info('found %s as the target of pipeline %s' % (branch_otdb_id, otdb_id))
predecessor_estimates.extend(estimates)
break
elif has_im:
logger.info('found %s as the calibrator of pipeline %s' % (branch_otdb_id, otdb_id))
predecessor_estimates.extend(estimates)
break
return {str(otdb_id): self.add_id(self.calibration_pipeline.verify_and_estimate(parset, predecessor_estimates), otdb_id)}
if len(branch_estimates) > 1:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment