diff --git a/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py b/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py index 3e70917d14489880d1c847322ef6badc0f2560e1..10f9c3b0d044b51e9a954da6f249741826f7d87c 100755 --- a/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py +++ b/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py @@ -98,6 +98,7 @@ class RAtoOTDBTranslator(): result = {} # Split storage_property_list in items wrt observation and wrt pipeline. + total_nr_files = 0 obs_max_uv_sap_nr = -1 obs_next_sb_nrs_per_sap = [] obs_locations = [None] * 1024 # the easy way and 1024 is enough even in 4 bit mode @@ -131,20 +132,22 @@ class RAtoOTDBTranslator(): prop['resource_name']) + '/uv' obs_filenames[sb_nr] = "L%d_SAP%03d_SB%03d_uv.MS" % (otdb_id, sap_nr, sb_nr) obs_skip.append("0") # what's this for? + total_nr_files += 1 obs_next_sb_nrs_per_sap[sap_nr] = sb_nr + 1 if obs_max_uv_sap_nr >= 0: - total_nr_files = obs_next_sb_nrs_per_sap[-1] - if total_nr_files < 1: + if total_nr_files == 0: raise Exception('CreateCorrelated: skipping obs parset filenames: total_nr_files = %d' % total_nr_files) logger.info('CreateCorrelated: total_nr_files = %d', total_nr_files) - obs_locations = obs_locations[ : total_nr_files] # truncate trailing None init values - obs_filenames = obs_filenames[ : total_nr_files] # idem - if None in obs_locations or None in obs_filenames: - logger.error('CreateCorrelated: skipping obs parset filenames: None in locations = %s', obs_locations) - logger.error('CreateCorrelated: skipping obs parset filenames: None in filenames = %s', obs_filenames) - raise Exception('CreateCorrelated: skipping obs parset filenames: None in locations and/or filenames') + obs_locations = [loc for loc in obs_locations if loc is not None] # strip unused init values + obs_filenames = [fn for fn in obs_filenames if fn is not None] # idem + if len(obs_locations) != total_nr_files or len(obs_filenames) != total_nr_files: + # If the total nr_of_uv_files in a SAP does not correspond to the start_sb_nr + # props of this and the next SAP, entries have been overwritten. Bail in that case. + logger.error('CreateCorrelated: skipping obs parset filenames: len(obs_locations) = %d and/or len(obs_filenames) = %d not equal to total_nr_files = %d', + len(obs_locations), len(obs_filenames), total_nr_files) + raise Exception('CreateCorrelated: skipping obs parset filenames: unexpected nr of locations and/or filenames vs total_nr_files') result[PREFIX + 'DataProducts.%s_Correlated.locations' % (io_type)] = '[' + to_csv_string(obs_locations) + ']' result[PREFIX + 'DataProducts.%s_Correlated.filenames' % (io_type)] = '[' + to_csv_string(obs_filenames) + ']' @@ -369,7 +372,7 @@ class RAtoOTDBTranslator(): if 'start_sb_nr' in prop: sb_nr = prop['start_sb_nr'] - otdb_id = sap['properties']['im_otdb_id'] + otdb_id = prop['im_otdb_id'] for _ in xrange(prop['nr_of_im_files']): locations.append(self.locationPath(cluster, project_name, otdb_id, prop['resource_name']) + '/im') @@ -396,7 +399,7 @@ class RAtoOTDBTranslator(): if 'nr_of_img_files' not in prop: continue - otdb_id = sap['properties']['img_otdb_id'] + otdb_id = prop['img_otdb_id'] for _ in xrange(prop['nr_of_img_files']): locations.append(self.locationPath(cluster, project_name, otdb_id, prop['resource_name']) + '/img') @@ -423,7 +426,7 @@ class RAtoOTDBTranslator(): if 'nr_of_pulp_files' not in prop: continue - otdb_id = sap['properties']['pulp_otdb_id'] + otdb_id = prop['pulp_otdb_id'] for _ in xrange(prop['nr_of_pulp_files']): locations.append(self.locationPath(cluster, project_name, otdb_id, prop['resource_name']) + '/pulp') diff --git a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/calibration_pipeline.py b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/calibration_pipeline.py index 1fd5610b3c1a07a1d2bd5bba806f2c08a6dc2fca..ca2d4c243e560195d3c5871015d5813c80e6c1ec 100644 --- a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/calibration_pipeline.py +++ b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/calibration_pipeline.py @@ -162,7 +162,7 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator): uv_output_file_size = int(new_size + new_size / 64.0 * (1.0 + reduction_factor) + new_size / 2.0) nr_output_files = nr_input_files # pure 'map' (bijective) operation, no split or reduce - logger.info("correlated_uv: {} files {} bytes each".format(nr_output_files, uv_output_file_size)) + logger.info("correlated_uv: {}x {} files of {} bytes each".format(pred_est['resource_count'], nr_output_files, uv_output_file_size)) estimate['output_files'] = {'uv': {'identification': output_ident_uv, 'properties': {'nr_of_uv_files': nr_output_files, 'uv_file_size': uv_output_file_size, 'start_sb_nr': start_sb_nr}}} data_size = uv_output_file_size @@ -173,7 +173,7 @@ class CalibrationPipelineResourceEstimator(BasePipelineResourceEstimator): logger.info("calculate instrument-model data size") im_file_size = 1000 # TODO: 1 kB was hardcoded in the Scheduler - logger.info("correlated_im: {} files {} bytes each".format(nr_output_files, im_file_size)) + logger.info("correlated_im: {}x {} files {} bytes each".format(pred_est['resource_count'], nr_output_files, im_file_size)) estimate['output_files']['im'] = {'identification': output_ident_im, 'properties': {'nr_of_im_files': nr_output_files, 'im_file_size': im_file_size, 'start_sb_nr': start_sb_nr}} # FIXME I don't think this is technically correct, as the IM files are sometimes created and used, just not a required export? diff --git a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/pulsar_pipeline.py b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/pulsar_pipeline.py index 82dd7e52515d81710cb4d21914f002bc618f592a..4a8b6b6a9031b8fd5e1063acc589b97ed19e2651 100644 --- a/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/pulsar_pipeline.py +++ b/SAS/ResourceAssignment/ResourceAssignmentEstimator/resource_estimators/pulsar_pipeline.py @@ -116,7 +116,7 @@ class PulsarPipelineResourceEstimator(BasePipelineResourceEstimator): logger.debug("calculate pulp data size") pulp_file_size = 1000 # TODO: 1 kB was hardcoded in the Scheduler - logger.info("pulsar_pipeline pulp: {} files {} bytes each".format(nr_output_files, pulp_file_size)) + logger.info("pulsar_pipeline pulp: {}x {} files {} bytes each".format(pred_est['resource_count'], nr_output_files, pulp_file_size)) estimate['output_files'] = {'pulp': {'identification': output_ident_pulp, 'properties': {'nr_of_pulp_files': nr_output_files, # times pred_est['resource_count'] in total 'pulp_file_size': pulp_file_size}}}