diff --git a/CEP/Pipeline/framework/lofarpipe/CMakeLists.txt b/CEP/Pipeline/framework/lofarpipe/CMakeLists.txt index 0ac8b2d2d22d7404b12dad8944a9382d026129c7..2992d61320795c34393a5316c85250c2c2774049 100644 --- a/CEP/Pipeline/framework/lofarpipe/CMakeLists.txt +++ b/CEP/Pipeline/framework/lofarpipe/CMakeLists.txt @@ -18,6 +18,7 @@ python_install( support/baserecipe.py support/clusterdesc.py support/control.py + support/data_map.py support/group_data.py support/jobserver.py support/lofarexceptions.py diff --git a/CEP/Pipeline/framework/lofarpipe/cuisine/cook.py b/CEP/Pipeline/framework/lofarpipe/cuisine/cook.py index de2bece9051b7924b4d8ae7a96018b5aa636d8d8..070d3a1085a2c045af142baade8fd9d868326851 100644 --- a/CEP/Pipeline/framework/lofarpipe/cuisine/cook.py +++ b/CEP/Pipeline/framework/lofarpipe/cuisine/cook.py @@ -1,5 +1,5 @@ #from message import ErrorLevel, NotifyLevel, VerboseLevel, DebugLevel -import time, os, select, pty, fcntl, sys, logging, imp +import time, os, select, sys, logging, imp from lofarpipe.support.pipelinelogging import getSearchingLogger class CookError(Exception): @@ -95,6 +95,7 @@ class SystemCook(WSRTCook): def spawn(self, env=None): """Try to start the task.""" + import pty try: (self._pid, self._child_fd) = pty.fork() except OSError, e: @@ -120,6 +121,7 @@ class SystemCook(WSRTCook): sys.stderr.write('Process could not be started: ' + self.task) os._exit(1) else: ## the parent + import fcntl ## self.poll.register(self._child_fd) ## self.poll.register(self._errorpipe_end) os.close(self._errorpipe_front) ## close what we don't need diff --git a/CEP/Pipeline/framework/lofarpipe/support/baserecipe.py b/CEP/Pipeline/framework/lofarpipe/support/baserecipe.py index 3b43a90c4864da9ab9b3f5cc6885d8723b31fab8..7753efa750678998e3ae3f8ee4915a02444aedc3 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/baserecipe.py +++ b/CEP/Pipeline/framework/lofarpipe/support/baserecipe.py @@ -18,7 +18,7 @@ import lofarpipe.support.utilities as utilities from lofarpipe.support.lofarexceptions import PipelineException, PipelineRecipeFailed from lofarpipe.cuisine.WSRTrecipe import WSRTrecipe from lofarpipe.support.lofaringredient import RecipeIngredients, LOFARinput, LOFARoutput -from lofarpipe.support.group_data import store_data_map +from lofarpipe.support.data_map import DataMap class BaseRecipe(RecipeIngredients, WSRTrecipe): """ @@ -40,7 +40,7 @@ class BaseRecipe(RecipeIngredients, WSRTrecipe): self.error.clear() # Environment variables we like to pass on to the node script. self.environment = dict( - (k,v) for (k,v) in os.environ.iteritems() + (k, v) for (k, v) in os.environ.iteritems() if k.endswith('PATH') or k.endswith('ROOT') ) @@ -245,11 +245,11 @@ class BaseRecipe(RecipeIngredients, WSRTrecipe): self.logger.debug("Pipeline start time: %s" % self.inputs['start_time']) - def _store_data_map(self, path, mapfile, message=""): + def _store_data_map(self, path, data_map, message=""): """ - Write mapfile to path, display debug error message on the logger + Write data_map to path, display debug error message on the logger """ - store_data_map(path, mapfile) - self.logger.debug("Wrote mapfile <{0}>: {1}".format( + data_map.save(path) + self.logger.debug("Wrote data_map <{0}>: {1}".format( path, message)) diff --git a/CEP/Pipeline/framework/lofarpipe/support/control.py b/CEP/Pipeline/framework/lofarpipe/support/control.py index eabd0069f78f3db53aabf91f76a038ed609ede17..87fd76794a3e6a19c9ab3efa4e5eba4ef1660a48 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/control.py +++ b/CEP/Pipeline/framework/lofarpipe/support/control.py @@ -43,9 +43,11 @@ class control(StatefulRecipe): self.logger.error("*******************************************") self.logger.error("Failed pipeline run: {0}".format( self.inputs['job_name'])) + self.logger.error(message) #message does not contain the original exception thrown in recipe - self.logger.error("\n" + - get_active_stack(self).toprettyxml(encoding='ascii')) + if get_active_stack(self) != None: + self.logger.error("\n" + + get_active_stack(self).toprettyxml(encoding='ascii')) self.logger.error("*******************************************") return 1 diff --git a/CEP/Pipeline/framework/lofarpipe/support/data_map.py b/CEP/Pipeline/framework/lofarpipe/support/data_map.py new file mode 100644 index 0000000000000000000000000000000000000000..d95ef2be7209f2e78b70cd62ebe22d10de8d63a8 --- /dev/null +++ b/CEP/Pipeline/framework/lofarpipe/support/data_map.py @@ -0,0 +1,312 @@ +# LOFAR PIPELINE FRAMEWORK +# +# Handle data-map file containing Data Product descriptions +# Marcel Loose, 2012 +# loose@astron.nl +# ------------------------------------------------------------------------------ + +""" +This module contains methods to load and store so-called data-map file and +to iterate over these maps. Data-map file contain a description of the +different input- and output- data products in a human readable form. +""" + +from lofarpipe.support.lofarexceptions import DataMapError +from lofarpipe.support.utilities import deprecated + +class DataProduct(object): + """ + Class representing a single data product. + """ + def __init__(self, host, file, skip=True): + + self.host = str(host) + self.file = str(file) + self.skip = bool(skip) + + def __repr__(self): + """Represent an instance as a Python dict""" + return ( + "{'host': '%s', 'file': '%s', 'skip': %s}" % + (self.host, self.file, self.skip) + ) + + def __str__(self): + """Print an instance as 'host:file'""" + return ':'.join((self.host, self.file)) + + def __eq__(self, other): + """Compare for equality""" + return ( + self.host == other.host and + self.file == other.file and + self.skip == other.skip + ) + + def __ne__(self, other): + """Compare for non-equality""" + return not self.__eq__(other) + + +class DataMap(object): + """ + Class representing a data-map, which basically is a collection of data + products. + """ + class TupleIterator(object): + """ + Iterator returning data-map entries as tuple (host, file). Use this + iterator for backward compatibility. + """ + def __init__(self, data): + self.data = data + self.index = 0 + + def __iter__(self): + return self + + def next(self): + try: + value = self.data[self.index] + except IndexError: + raise StopIteration + self.index += 1 + return (value.host, value.file) + + class SkipIterator(object): + """ + Iterator returning only data-map entries whose `skip` attribute is + False. + """ + def __init__(self, data): + self.data = data + self.index = 0 + + def __iter__(self): + return self + + def next(self): + while(True): + try: + value = self.data[self.index] + except IndexError: + raise StopIteration + self.index += 1 + if not value.skip: + return value + + def __init__(self, data=list(), iterator=iter): + + self._data = list() + self.data = data + self.iterator = iterator + + def __repr__(self): + return repr(self.data) + + def __iter__(self): + return self.iterator(self.data) + + def __len__(self): + return len(self.data) + + def __getitem__(self, index): + return self.data[index] + + def __eq__(self, other): + if type(other) != type(self): + # comparison of DataMap and MultiDataMap should always fail. + # ( We do engineering and not set logic) + return False + return all(x == y for (x, y) in zip(self.data, other.data)) + + def __ne__(self, other): + return not self.__eq__(other) + + @classmethod + def load(cls, filename): + """Load a data map from file `filename`. Return a DataMap instance.""" + with open(filename) as f: + return cls(eval(f.read())) + + def save(self, filename): + """Save a data map to file `filename` in human-readable form.""" + with open(filename, 'w') as f: + f.write(repr(self.data)) + + @property + def data(self): + """Property to get self.data""" + return self._data + + @data.setter + def data(self, data): + """Property to set self.data, allows do input validation.""" + self._set_data(data, dtype=DataProduct) + + def _set_data(self, data, dtype=DataProduct): + try: + if all(isinstance(item, dtype) for item in data): + self._data = data + elif all(isinstance(item, dict) for item in data): + self._data = [dtype(**item) for item in data] + elif all(isinstance(item, tuple) for item in data): + self._data = [dtype(*item) for item in data] + else: + raise TypeError + except TypeError: + raise DataMapError("Failed to validate data map: %s" % repr(data)) + +class MultiDataMap(DataMap): + """ + Class representing a specialization of data-map, a collection of data + products located on the same node, skippable as a set and individually + """ + @DataMap.data.setter + def data(self, data): + self._set_data(data, dtype=MultiDataProduct) + + + +class MultiDataProduct(object): + """ + Class representing a single data product. + """ + def __init__(self, host, file, skip=True, file_skip=None): + self.host = str(host) + try: + # Try parsing as a list + file_list = eval(file) + if isinstance(file_list, list): + self.file = file_list + except: + self.file = file + + # Create a default skip field with the same truth value is the 'parent' + if file_skip == None: + self.file_skip = [bool(skip)] * len(self.file) + else: + # All entries in the boollist should be a bool + for entry in file_skip: + if not isinstance(entry, bool): + raise TypeError + # The length of the boollist should + if len(file_skip) != len(self.file): + raise TypeError + self.file_skip = file_skip + + self.skip = bool(skip) + + def __repr__(self): + """Represent an instance as a Python dict""" + return ( + "{'host': '%s', 'skip': %s, 'file': \"%s\", 'file_skip' : %s}" % + (self.host, self.skip, self.file, self.file_skip) + ) + + def __str__(self): + """Print an instance as 'host:file'""" + return ':'.join((self.host, self.file)) + + def __eq__(self, other): + """Compare for equality""" + return ( + self.host == other.host and + self.file == other.file and + self.file_skip == other.file_skip and + self.skip == other.skip + ) + + def __ne__(self, other): + """Compare for non-equality""" + return not self.__eq__(other) + +@deprecated +def load_data_map(filename): + """ + Load a data map from file `filename` and return it as a DataMap object. + The file should either contain a list of dict (new-style), containing items + host, file, and skip; or a list of tuple (old-style), containing host and + file. + This method is for backward compatibility. New code should use + `DataMap.load` instead. The iterator of the returned DataMap object is set + to TupleIterator, so that existing code that expects tuples of (host,file) + when iterating over a data map's contents won't break. + """ + data_map = DataMap.load(filename) + data_map.iterator = DataMap.TupleIterator + return data_map + + +@deprecated +def store_data_map(filename, data): + """ + Store a data map in map-file `filename`. Assume the argument is a new-style + DataMap object. If that fails, assume data is old-style list of tuples of + (host, filepath). In either case, the data is written in the new format: + a list of dict containing items host, file, and skip. + This method is for backward compatibility. New code should use the method + `DataMap.save` instead. + """ + try: + # Assume `data` is a DataMap object. + data.save(filename) + except AttributeError: + # Assume `data` can be used to construct a DataMap object. + DataMap(data).save(filename) + + +def validate_data_maps(*args): + """ + Validate the IO product specifications in the data maps `args`. + + Requirements imposed on product specifications: + - Length of all product lists must be equal. + - All data-products must reside on the same host. + + Return True if all requirements are met, otherwise return False. + """ + # Check if all data maps have equal length. We do this by creating a set + # from a tuple of lenghts of `args`. The set must have length 1. + if len(set(len(arg) for arg in args)) != 1: + return False + + # Next, check if the data products in `args`, when matched by index, + # reside on the same host. We can use the same trick as before, by + # checking the size of a set created from a tuple of hostnames. + for i in xrange(len(args[0])): + if len(set(arg[i].host for arg in args)) != 1: + return False + + return True + + +@deprecated +def tally_data_map(data, glob, logger=None): + """ + Verify that the file specified in the data map `data` exist on the cluster. + The glob pattern `glob` should contain the pattern to be used in the search. + This function will return a list of booleans: True for each item in `data` + that is present on the cluster; False otherwise. + This method is deprecated, because the new data-map file keep track of the + `skip` attribute of each data product in the data-map. + """ + import os + from lofar.mstools import findFiles + + # Determine the directories to search. Get unique directory names from + # `data` by creating a set first. + dirs = list(set(os.path.dirname(d.file) for d in data)) + + # Compose the filename glob-pattern. + glob = ' '.join(os.path.join(d, glob) for d in dirs) + + # Search the file on the cluster using the glob-pattern; turn them into a + # list of tuples. + if logger: + logger.debug("Searching for file: %s" % glob) + found = zip(*findFiles(glob, '-1d')) + + # Return a mask containing True if file exists, False otherwise + return [(f.host, f.file) in found for f in data] + diff --git a/CEP/Pipeline/framework/lofarpipe/support/group_data.py b/CEP/Pipeline/framework/lofarpipe/support/group_data.py index 06af8ce7366463fd679beaa694d76a6be3a4540f..89b1c92fb2ea7c40cb378a0b62054c478ac9ffdb 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/group_data.py +++ b/CEP/Pipeline/framework/lofarpipe/support/group_data.py @@ -3,18 +3,24 @@ # Group data into appropriate chunks for processing # John Swinbank, 2009-10 # swinbank@transientskp.org +# Marcel Loose, 2011-12 +# loose@astron.nl # ------------------------------------------------------------------------------ from collections import defaultdict -import os import subprocess -from lofar.mstools import findFiles import lofarpipe.support.utilities as utilities from lofarpipe.support.clusterdesc import get_compute_nodes from lofarpipe.support.parset import Parset +# Data map methods were moved to a separate module. +# Importing them for backward compatibility. +from lofarpipe.support.data_map import ( + load_data_map, store_data_map, validate_data_maps, tally_data_map +) + def group_files(logger, clusterdesc, node_directory, group_size, filenames): """ Group a list of files into blocks suitable for simultaneous @@ -91,110 +97,3 @@ def gvds_iterator(gvds_file, nproc=4): else: yield yieldable -#def load_data_map(filename): - #""" - #Load a mapping of filename <-> compute node from a parset on disk. - #""" - #datamap = Parset(filename) - #data = [] - #for host in datamap: - #for filename in datamap.getStringVector(host): - #data.append((host, filename)) - #return data - -#def store_data_map(filename, data): - #""" - #Store a mapping of filename <-> compute node as a parset on disk. - #""" - #datamap = defaultdict(list) - #for (host,file) in data: - #datamap[host].append(file) - #outfile = open(filename, 'w') - #for key in sorted(datamap): - #outfile.write('%s = %s\n' % (key, datamap[key])) - #outfile.close() - - -def validate_data_maps(*args): - """ - Validate the IO product specifications in the data maps `args`. Each data - map must be a list of tuples (hostname, filepath). - - Requirements imposed on product specifiations: - - Length of all product lists must be equal. - - All data-products must reside on the same node. - - Return True if all requirements are met, otherwise return False. - """ - # Precondition check on `args`. All arguments must be lists; and all - # lists must contains tuples of length 2. - for arg in args: - assert( - isinstance(arg, list) and - all(isinstance(item, tuple) and len(item) == 2 for item in arg) - ) - - # Check if all lists have equal length. We do this by creating a set - # from a tuple of lenghts of `args`. The set must have length 1. - if len(set(len(arg) for arg in args)) != 1: - return False - - # Next, check if the data products in `args`, when matched by index, - # reside on the same node. We can use the same trick as before, by - # checking the size of a set created from a tuple of hostnames. - for i in xrange(len(args[0])): - if len(set(arg[i][0] for arg in args)) != 1: - return False - - return True - - -def load_data_map(filename): - """ - Load map-file `filename` containing tuples of (host,filepath) - """ - file = open(filename) - data = eval(file.read()) - file.close() - if not validate_data_maps(data): - raise TypeError("Map-file data validation failed") - return data - - -def store_data_map(filename, data): - """ - Store tuples of (host,filepath) in a map-file `filename`. - """ - if not validate_data_maps(data): - raise TypeError("Map-file data validation failed") - file = open(filename, 'w') - file.write(repr(data)) - file.close() - - -def tally_data_map(data, glob, logger=None): - """ - Verify that the files specified in the data map `data` exist on the cluster. - The glob pattern `glob` should contain the pattern to be used in the search. - This function will return a list of booleans: True for each item in `data` - that is present on the cluster; False otherwise. - """ - # Check that `data` is in the correct format - validate_data_maps(data) - - # Determine the directories to search. Get unique directory names from - # `data` by creating a set first. - dirs = list(set(os.path.dirname(d[1]) for d in data)) - - # Compose the filename glob-pattern. - glob = ' '.join(os.path.join(d, glob) for d in dirs) - - # Search the files on the cluster using the glob-pattern; turn them into a - # list of tuples. - if logger: - logger.debug("Searching for files: %s" % glob) - found = zip(*findFiles(glob, '-1d')) - - # Return a mask containing True if file exists, False otherwise - return [f in found for f in data] - diff --git a/CEP/Pipeline/framework/lofarpipe/support/lofarexceptions.py b/CEP/Pipeline/framework/lofarpipe/support/lofarexceptions.py index 2df127fe0d975f153d026726d486ac82f7656c44..64969513c2da6e4e8c6145d7f2096dcbfba6b72b 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/lofarexceptions.py +++ b/CEP/Pipeline/framework/lofarpipe/support/lofarexceptions.py @@ -26,3 +26,7 @@ class PipelineQuit(PipelineException): class ClusterError(PipelineException): pass + +class DataMapError(PipelineException): + pass + diff --git a/CEP/Pipeline/framework/lofarpipe/support/loggingdecorators.py b/CEP/Pipeline/framework/lofarpipe/support/loggingdecorators.py index 66e3c27a787e50d85b24a16060f3d53b0b2b9cf4..981f49be9ae297aa6c30959e3884763f0a223984 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/loggingdecorators.py +++ b/CEP/Pipeline/framework/lofarpipe/support/loggingdecorators.py @@ -9,7 +9,6 @@ import time from lofarpipe.support.xmllogging import enter_active_stack, \ exit_active_stack, get_active_stack - def xml_node(target): """ function decorator to be used on member functions of (pipeline) @@ -71,23 +70,27 @@ def mail_log_on_exception(target): if return_value != 0: raise Exception("Non zero pipeline output") # Mail main dev on succesfull run - msg_string = get_active_stack( - calling_object).toprettyxml(encoding='ascii') + stack = get_active_stack(calling_object) + if stack != None: + msg_string = stack.toprettyxml(encoding='ascii') + else: + msg_string = "No additional pipeline data available" + _mail_msg_to("pipeline_finished", "klijn@astron.nl", "pipeline finished", msg_string) - except Exception, message: # Static list of mail to be send (could be made configurable, # but yeah temp mail functionality so...) mail_list = ["klijn@astron.nl", - "pizzo@astron.nl", "orru@astron.nl" + pizzo@astron.nl", "orru@astron.nl" ] # get the active stack - active_stack_data = get_active_stack( - calling_object).toprettyxml(encoding='ascii') - + stack = get_active_stack(calling_object) + active_stack_data = "" + if stack != None: + active_stack_data = stack.toprettyxml(encoding='ascii') # get the Obsid etc for subject subject = "Failed pipeline run: {0}".format( calling_object.inputs['job_name']) @@ -101,6 +104,8 @@ def mail_log_on_exception(target): _mail_msg_to("pipeline_error", entry, subject, msg) + raise + calling_object.logger.info("pipeline_finished" + " xml summary:") calling_object.logger.info(msg_string) diff --git a/CEP/Pipeline/framework/lofarpipe/support/pipelinelogging.py b/CEP/Pipeline/framework/lofarpipe/support/pipelinelogging.py index 5e70d5ab32feca7c23ac6a8dbed7fab1f970dcf3..fd839afa082a46e36d216688cdf3e4e3937a3886 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/pipelinelogging.py +++ b/CEP/Pipeline/framework/lofarpipe/support/pipelinelogging.py @@ -12,7 +12,6 @@ from string import Template import os import time -import resource import threading import logging import re @@ -213,6 +212,7 @@ def log_time(logger): :param logger: logger to which timing information should be sent. """ + import resource def get_rusage(): return [ x + y for x, y in zip( diff --git a/CEP/Pipeline/recipes/sip/bin/msss_calibrator_pipeline.py b/CEP/Pipeline/recipes/sip/bin/msss_calibrator_pipeline.py index 2eb6640974898cf6b8be36cb5c99f1f40f9c2d29..364237d89377cf6936db96e564b1e4beea3ce779 100755 --- a/CEP/Pipeline/recipes/sip/bin/msss_calibrator_pipeline.py +++ b/CEP/Pipeline/recipes/sip/bin/msss_calibrator_pipeline.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -# LOFAR CALIBRATION PIPELINE +# STANDARD IMAGING PIPELINE # -# Calibrator Pipeline recipe +# MSSS Calibrator Pipeline # Marcel Loose, 2011-2012 # loose@astron.nl # ------------------------------------------------------------------------------ @@ -11,8 +11,8 @@ import sys from lofarpipe.support.control import control from lofarpipe.support.lofarexceptions import PipelineException -from lofarpipe.support.group_data import validate_data_maps -from lofarpipe.support.group_data import tally_data_map +from lofarpipe.support.data_map import DataMap +from lofarpipe.support.data_map import validate_data_maps#, tally_data_map from lofarpipe.support.utilities import create_directory from lofar.parameterset import parameterset @@ -67,29 +67,25 @@ class msss_calibrator_pipeline(control): Get input- and output-data product specifications from the parset-file, and do some sanity checks. """ - dataproducts = self.parset.makeSubset( + dps = self.parset.makeSubset( self.parset.fullModuleName('DataProducts') + '.' ) - self.input_data = [ - tuple(os.path.join(location, filename).split(':')) + self.input_data = DataMap([ + tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( - dataproducts.getStringVector('Input_Correlated.locations'), - dataproducts.getStringVector('Input_Correlated.filenames'), - dataproducts.getBoolVector('Input_Correlated.skip')) - if not skip - ] + dps.getStringVector('Input_Correlated.locations'), + dps.getStringVector('Input_Correlated.filenames'), + dps.getBoolVector('Input_Correlated.skip')) + ]) self.logger.debug("%d Input_Correlated data products specified" % len(self.input_data)) - self.output_data = [ - tuple(os.path.join(location, filename).split(':')) + self.output_data = DataMap([ + tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( - dataproducts.getStringVector( - 'Output_InstrumentModel.locations'), - dataproducts.getStringVector( - 'Output_InstrumentModel.filenames'), - dataproducts.getBoolVector('Output_InstrumentModel.skip')) - if not skip - ] + dps.getStringVector('Output_InstrumentModel.locations'), + dps.getStringVector('Output_InstrumentModel.filenames'), + dps.getBoolVector('Output_InstrumentModel.skip')) + ]) self.logger.debug("%d Output_InstrumentModel data products specified" % len(self.output_data)) # Sanity checks on input- and output data product specifications @@ -97,38 +93,38 @@ class msss_calibrator_pipeline(control): raise PipelineException( "Validation of input/output data product specification failed!" ) - # Validate input data, by searching the cluster for files - self._validate_input_data() - # Update input- and output-data product specifications if needed - if not all(self.io_data_mask): - self.logger.info("Updating input/output product specifications") - self.input_data = [ - f for (f, m) in zip(self.input_data, self.io_data_mask) if m - ] - self.output_data = [ - f for (f, m) in zip(self.output_data, self.io_data_mask) if m - ] - - - def _validate_input_data(self): - """ - Search for the requested input files and mask the files in - `self.input_data[]` that could not be found on the system. - """ - # Use filename glob-pattern as defined in LOFAR-USG-ICD-005. - self.io_data_mask = tally_data_map( - self.input_data, 'L*_SB???_uv.MS', self.logger - ) - # Log a warning if not all input data files were found. - if not all(self.io_data_mask): - self.logger.warn( - "The following input data files were not found: %s" % - ', '.join( - ':'.join(f) for (f, m) in zip( - self.input_data, self.io_data_mask - ) if not m - ) - ) +# # Validate input data, by searching the cluster for files +# self._validate_input_data() +# # Update input- and output-data product specifications if needed +# if not all(self.io_data_mask): +# self.logger.info("Updating input/output product specifications") +# for (f, m) in zip(self.input_data, self.io_data_mask): +# if not m: +# f.skip = True +# for (f, m) in zip(self.output_data, self.io_data_mask): +# if not m: +# f.skip = True + + +# def _validate_input_data(self): +# """ +# Search for the requested input files and mask the files in +# `self.input_data[]` that could not be found on the system. +# """ +# # Use filename glob-pattern as defined in LOFAR-USG-ICD-005. +# self.io_data_mask = tally_data_map( +# self.input_data, 'L*_SB???_uv.MS', self.logger +# ) +# # Log a warning if not all input data files were found. +# if not all(self.io_data_mask): +# self.logger.warn( +# "The following input data files were not found: %s" % +# ', '.join( +# str(f) for (f, m) in zip( +# self.input_data, self.io_data_mask +# ) if not m +# ) +# ) def go(self): @@ -180,16 +176,18 @@ class msss_calibrator_pipeline(control): # Write input- and output data map-files data_mapfile = os.path.join(mapfile_dir, "data.mapfile") - self._store_data_map(data_mapfile, self.input_data, "inputs") + self.input_data.save(data_mapfile) +# self._store_data_map(data_mapfile, self.input_data, "inputs") instrument_mapfile = os.path.join(mapfile_dir, "instrument.mapfile") - self._store_data_map(instrument_mapfile, self.output_data, "output") +# self._store_data_map(instrument_mapfile, self.output_data, "output") + self.output_data.save(instrument_mapfile) if len(self.input_data) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % - ', '.join(':'.join(f) for f in self.input_data)) + ', '.join(str(f) for f in self.input_data)) # ********************************************************************* # 2. Create database needed for performing work: # Vds, descibing data on the nodes diff --git a/CEP/Pipeline/recipes/sip/bin/msss_imager_pipeline.py b/CEP/Pipeline/recipes/sip/bin/msss_imager_pipeline.py index ee991ad270745081098768ea644aeb6c9dd46e24..ee685d6f24957e1bfb4a66898e45b7bb2c7fe789 100755 --- a/CEP/Pipeline/recipes/sip/bin/msss_imager_pipeline.py +++ b/CEP/Pipeline/recipes/sip/bin/msss_imager_pipeline.py @@ -9,12 +9,12 @@ # ----------------------------------------------------------------------------- import os import sys +import copy from lofarpipe.support.control import control from lofarpipe.support.utilities import create_directory from lofarpipe.support.lofarexceptions import PipelineException -from lofarpipe.support.group_data import load_data_map, store_data_map -from lofarpipe.support.group_data import validate_data_maps +from lofarpipe.support.data_map import DataMap, validate_data_maps, MultiDataMap from lofarpipe.support.utilities import patch_parset, get_parset from lofarpipe.support.loggingdecorators import xml_node, mail_log_on_exception @@ -94,9 +94,9 @@ class msss_imager_pipeline(control): """ control.__init__(self) self.parset = parameterset() - self.input_data = [] - self.target_data = [] - self.output_data = [] + self.input_data = DataMap() + self.target_data = DataMap() + self.output_data = DataMap() self.scratch_directory = None self.parset_feedback_file = None self.parset_dir = None @@ -160,19 +160,20 @@ class msss_imager_pipeline(control): # (INPUT) Get the input from external sources and create pipeline types # Input measure ment sets input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile") - store_data_map(input_mapfile, self.input_data) + self.input_data.save(input_mapfile) + #storedata_map(input_mapfile, self.input_data) self.logger.debug( "Wrote input UV-data mapfile: {0}".format(input_mapfile)) - # TODO: What is the difference between target and output??? - # output datafiles + # Provides location for the scratch directory and concat.ms location target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile") - store_data_map(target_mapfile, self.target_data) + self.target_data.save(target_mapfile) self.logger.debug( "Wrote target mapfile: {0}".format(target_mapfile)) + # images datafiles output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile") - store_data_map(output_image_mapfile, self.output_data) + self.output_data.save(output_image_mapfile) self.logger.debug( "Wrote output sky-image mapfile: {0}".format(output_image_mapfile)) @@ -180,7 +181,7 @@ class msss_imager_pipeline(control): # (1) prepare phase: copy and collect the ms concat_ms_map_path, timeslice_map_path, raw_ms_per_image_map_path, \ processed_ms_dir = self._prepare_phase(input_mapfile, - target_mapfile, skip=False) + target_mapfile) #We start with an empty source_list source_list = "" # path to local sky model (list of 'found' sources) @@ -199,6 +200,9 @@ class msss_imager_pipeline(control): bbs_output = self._bbs(timeslice_map_path, parmdbs_path, sourcedb_map_path, skip=False) + # TODO: Extra recipe: concat timeslices using pyrap.concatms + # (see prepare) + # ***************************************************************** # (4) Get parameters awimager from the prepare_parset and inputs aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path, @@ -239,33 +243,43 @@ class msss_imager_pipeline(control): Get input- and output-data product specifications from the parset-file, and do some sanity checks. """ - odp = self.parset.makeSubset( + dps = self.parset.makeSubset( self.parset.fullModuleName('DataProducts') + '.' ) - self.input_data = [tuple(os.path.join(*x).split(':')) for x in zip( - odp.getStringVector('Input_Correlated.locations', []), - odp.getStringVector('Input_Correlated.filenames', [])) - ] + # convert input dataproducts from parset value to DataMap + self.input_data = DataMap([ + tuple(os.path.join(location, filename).split(':')) + (skip,) + for location, filename, skip in zip( + dps.getStringVector('Input_Correlated.locations'), + dps.getStringVector('Input_Correlated.filenames'), + dps.getBoolVector('Input_Correlated.skip')) + ]) self.logger.debug("%d Input_Correlated data products specified" % len(self.input_data)) - self.output_data = [tuple(os.path.join(*x).split(':')) for x in zip( - odp.getStringVector('Output_SkyImage.locations', []), - odp.getStringVector('Output_SkyImage.filenames', [])) - ] + + self.output_data = DataMap([ + tuple(os.path.join(location, filename).split(':')) + (skip,) + for location, filename, skip in zip( + dps.getStringVector('Output_SkyImage.locations'), + dps.getStringVector('Output_SkyImage.filenames'), + dps.getBoolVector('Output_SkyImage.skip')) + ]) self.logger.debug("%d Output_SkyImage data products specified" % len(self.output_data)) - # Sanity checks on input- and output data product specifications - if not(validate_data_maps(self.input_data) and - validate_data_maps(self.output_data)): - raise PipelineException( - "Validation of input/output data product specification failed!" - ) + + ## Sanity checks on input- and output data product specifications + #if not validate_data_maps(self.input_data, self.output_data): + # raise PipelineException( + # "Validation of input/output data product specification failed!" + # )#Turned off untill DataMap is extended.. + # Target data is basically scratch data, consisting of one concatenated # MS per image. It must be stored on the same host as the final image. - for host, path in self.output_data: - self.target_data.append( - (host, os.path.join(self.scratch_directory, 'concat.ms')) - ) + self.target_data = copy.deepcopy(self.output_data) + + for item in self.target_data: + item.file = os.path.join(self.scratch_directory, 'concat.ms') + @xml_node def _finalize(self, awimager_output_map, processed_ms_dir, @@ -357,7 +371,7 @@ class msss_imager_pipeline(control): #create parset for bbs run parset = self.parset.makeSubset("BBS.") parset_path = self._write_parset_to_file(parset, "bbs", - "Parset for calibration on local sky model") + "Parset for calibration with a local sky model") # create the output file path output_mapfile = self._write_datamap_to_file(None, "bbs_output", @@ -374,33 +388,23 @@ class msss_imager_pipeline(control): # sourcelist location: This allows validation of maps in combination # get the original map data - sourcedb_map = load_data_map(sourcedb_map_path) - parmdbs_map = load_data_map(parmdbs_map_path) + sourcedb_map = DataMap.load(sourcedb_map_path) + parmdbs_map = MultiDataMap.load(parmdbs_map_path) converted_sourcedb_map = [] - # walk the two maps in pairs - for (source_db_pair, parmdbs) in zip(sourcedb_map, parmdbs_map): - (host_sourcedb, sourcedb_path) = source_db_pair - (host_parmdbs, parmdbs_entries) = parmdbs - # sanity check: host should be the same - if host_parmdbs != host_sourcedb: - self.logger.error("The input files for bbs do not contain " - "matching host names for each entry") - self.logger.error(repr(sourcedb_map)) - self.logger.error(repr(parmdbs_map_path)) - - #add the entries but with skymap multiplied with len (parmds list) - converted_sourcedb_map.append((host_sourcedb, - [sourcedb_path] * len(parmdbs_entries))) - #save the new mapfile - store_data_map(converted_sourcedb_map_path, converted_sourcedb_map) - self.logger.error("Wrote converted sourcedb datamap: {0}".format( - converted_sourcedb_map_path)) + + # sanity check for correcy output from previous recipes + if not validate_data_maps(sourcedb_map, parmdbs_map): + self.logger.error("The input files for bbs do not contain " + "matching host names for each entry content:") + self.logger.error(repr(sourcedb_map)) + self.logger.error(repr(parmdbs_map_path)) + raise PipelineException("Invalid input data for imager_bbs recipe") self.run_task("imager_bbs", timeslice_map_path, parset=parset_path, instrument_mapfile=parmdbs_map_path, - sourcedb_mapfile=converted_sourcedb_map_path, + sourcedb_mapfile=sourcedb_map_path, mapfile=output_mapfile, working_directory=self.scratch_directory) @@ -452,8 +456,7 @@ class msss_imager_pipeline(control): return output_mapfile, max_baseline @xml_node - def _prepare_phase(self, input_ms_map_path, target_mapfile, - skip=False): + def _prepare_phase(self, input_ms_map_path, target_mapfile): """ Copy ms to correct location, combine the ms in slices and combine the time slices into a large virtual measurement set @@ -480,42 +483,39 @@ class msss_imager_pipeline(control): slices_per_image = self.parset.getInt("Imaging.slices_per_image") subbands_per_image = self.parset.getInt("Imaging.subbands_per_image") - if skip: - pass - else: - outputs = self.run_task("imager_prepare", input_ms_map_path, - parset=ndppp_parset_path, - target_mapfile=target_mapfile, - slices_per_image=slices_per_image, - subbands_per_image=subbands_per_image, - mapfile=output_mapfile, - slices_mapfile=time_slices_mapfile, - raw_ms_per_image_mapfile=raw_ms_per_image_mapfile, - working_directory=self.scratch_directory, - processed_ms_dir=processed_ms_dir) - - #validate that the prepare phase produced the correct data - output_keys = outputs.keys() - if not ('mapfile' in output_keys): - error_msg = "The imager_prepare master script did not"\ - "return correct data. missing: {0}".format('mapfile') - self.logger.error(error_msg) - raise PipelineException(error_msg) - if not ('slices_mapfile' in output_keys): - error_msg = "The imager_prepare master script did not"\ - "return correct data. missing: {0}".format( - 'slices_mapfile') - self.logger.error(error_msg) - raise PipelineException(error_msg) - if not ('raw_ms_per_image_mapfile' in output_keys): - error_msg = "The imager_prepare master script did not"\ - "return correct data. missing: {0}".format( - 'raw_ms_per_image_mapfile') - self.logger.error(error_msg) - raise PipelineException(error_msg) + outputs = self.run_task("imager_prepare", input_ms_map_path, + parset=ndppp_parset_path, + target_mapfile=target_mapfile, + slices_per_image=slices_per_image, + subbands_per_image=subbands_per_image, + mapfile=output_mapfile, + slices_mapfile=time_slices_mapfile, + raw_ms_per_image_mapfile=raw_ms_per_image_mapfile, + working_directory=self.scratch_directory, + processed_ms_dir=processed_ms_dir) + + #validate that the prepare phase produced the correct data + output_keys = outputs.keys() + if not ('mapfile' in output_keys): + error_msg = "The imager_prepare master script did not"\ + "return correct data. missing: {0}".format('mapfile') + self.logger.error(error_msg) + raise PipelineException(error_msg) + if not ('slices_mapfile' in output_keys): + error_msg = "The imager_prepare master script did not"\ + "return correct data. missing: {0}".format( + 'slices_mapfile') + self.logger.error(error_msg) + raise PipelineException(error_msg) + if not ('raw_ms_per_image_mapfile' in output_keys): + error_msg = "The imager_prepare master script did not"\ + "return correct data. missing: {0}".format( + 'raw_ms_per_image_mapfile') + self.logger.error(error_msg) + raise PipelineException(error_msg) # Return the mapfiles paths with processed data - return output_mapfile, time_slices_mapfile, raw_ms_per_image_mapfile, \ + return output_mapfile, outputs["slices_mapfile"], raw_ms_per_image_mapfile, \ processed_ms_dir @xml_node @@ -573,7 +573,7 @@ class msss_imager_pipeline(control): parset.writeFile(parset_path) #display a debug log entrie with path and message - self.logger.debug("Wrote parset to path <{0}> : ".format( + self.logger.debug("Wrote parset to path <{0}> : {1}".format( parset_path, message)) return parset_path @@ -598,14 +598,16 @@ class msss_imager_pipeline(control): #display a debug log entrie with path and message if datamap != None: - store_data_map(mapfile_path, datamap) + datamap.save(mapfile_path) + self.logger.debug( "Wrote mapfile <{0}>: {1}".format(mapfile_path, message)) else: if not os.path.exists(mapfile_path): - store_data_map(mapfile_path, []) + DataMap().save(mapfile_path) + self.logger.debug( - "Touched mapfile <{0}>: {1}".format(mapfile_path, message)) + "Touched mapfile <{0}>: {1}".format(mapfile_path, message)) return mapfile_path diff --git a/CEP/Pipeline/recipes/sip/bin/msss_target_pipeline.py b/CEP/Pipeline/recipes/sip/bin/msss_target_pipeline.py index a952274df85b62eef3714571e774933a25bceba8..b3ebbaebbc36c3d6f42e023e22b6c79d2b01d042 100755 --- a/CEP/Pipeline/recipes/sip/bin/msss_target_pipeline.py +++ b/CEP/Pipeline/recipes/sip/bin/msss_target_pipeline.py @@ -6,14 +6,14 @@ # loose@astron.nl # ------------------------------------------------------------------------------ +import copy import os import sys from lofarpipe.support.control import control from lofarpipe.support.lofarexceptions import PipelineException -from lofarpipe.support.group_data import store_data_map, validate_data_maps, \ - load_data_map -from lofarpipe.support.group_data import tally_data_map +from lofarpipe.support.data_map import DataMap, validate_data_maps +#from lofarpipe.support.group_data import tally_data_map from lofarpipe.support.utilities import create_directory from lofar.parameterset import parameterset @@ -65,34 +65,31 @@ class msss_target_pipeline(control): parset-file, and do some sanity checks. """ odp = self.parset.makeSubset('ObsSW.Observation.DataProducts.') - self.input_data['data'] = [ - tuple(os.path.join(location, filename).split(':')) + self.input_data['data'] = DataMap([ + tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( odp.getStringVector('Input_Correlated.locations'), odp.getStringVector('Input_Correlated.filenames'), odp.getBoolVector('Input_Correlated.skip')) - if not skip - ] + ]) self.logger.debug("%d Input_Correlated data products specified" % len(self.input_data['data'])) - self.input_data['instrument'] = [ - tuple(os.path.join(location, filename).split(':')) + self.input_data['instrument'] = DataMap([ + tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( odp.getStringVector('Input_InstrumentModel.locations'), odp.getStringVector('Input_InstrumentModel.filenames'), odp.getBoolVector('Input_InstrumentModel.skip')) - if not skip - ] + ]) self.logger.debug("%d Input_InstrumentModel data products specified" % len(self.input_data['instrument'])) - self.output_data['data'] = [ - tuple(os.path.join(location, filename).split(':')) + self.output_data['data'] = DataMap([ + tuple(os.path.join(location, filename).split(':')) + (skip,) for location, filename, skip in zip( odp.getStringVector('Output_Correlated.locations'), odp.getStringVector('Output_Correlated.filenames'), odp.getBoolVector('Output_Correlated.skip')) - if not skip - ] + ]) self.logger.debug("%d Output_Correlated data products specified" % len(self.output_data['data'])) @@ -108,58 +105,58 @@ class msss_target_pipeline(control): ): raise PipelineException( "Validation of input/output data product specification failed!" ) - # Validate input data, by searching the cluster for files - self._validate_input_data() - # Update input- and output-data product specifications if needed. - if not all(self.io_data_mask): - self.logger.info("Updating input/output product specifications") - self.input_data['data'] = [f for (f, m) - in zip(self.input_data['data'], self.io_data_mask) if m - ] - self.input_data['instrument'] = [f for (f, m) - in zip(self.input_data['instrument'], self.io_data_mask) if m - ] - self.output_data['data'] = [f for (f, m) - in zip(self.output_data['data'], self.io_data_mask) if m - ] - - - def _validate_input_data(self): - """ - Search for the requested input files and mask the files in - `self.input_data{}` that could not be found on the system. - """ - # Use filename glob-pattern as defined in LOFAR-USG-ICD-005. - data_mask = tally_data_map( - self.input_data['data'], 'L*_SB???_uv.MS', self.logger - ) - # Log a warning if not all input data files were found. - if not all(data_mask): - self.logger.warn( - "The following input data files were not found: %s" % - ', '.join( - ':'.join(f) for (f, m) in zip( - self.input_data['data'], data_mask - ) if not m - ) - ) - # Use filename glob-pattern as defined in LOFAR-USG-ICD-005. - inst_mask = tally_data_map( - self.input_data['instrument'], 'L*_SB???_inst.INST', self.logger - ) - # Log a warning if not all input instrument files were found. - if not all(inst_mask): - self.logger.warn( - "The following input instrument files were not found: %s" % - ', '.join( - ':'.join(f) for (f, m) in zip( - self.input_data['instrument'], inst_mask - ) if not m - ) - ) - - # Set the IO data mask - self.io_data_mask = [x and y for (x, y) in zip(data_mask, inst_mask)] +# # Validate input data, by searching the cluster for files +# self._validate_input_data() +# # Update input- and output-data product specifications if needed. +# if not all(self.io_data_mask): +# self.logger.info("Updating input/output product specifications") +# self.input_data['data'] = [f for (f, m) +# in zip(self.input_data['data'], self.io_data_mask) if m +# ] +# self.input_data['instrument'] = [f for (f, m) +# in zip(self.input_data['instrument'], self.io_data_mask) if m +# ] +# self.output_data['data'] = [f for (f, m) +# in zip(self.output_data['data'], self.io_data_mask) if m +# ] + + +# def _validate_input_data(self): +# """ +# Search for the requested input files and mask the files in +# `self.input_data{}` that could not be found on the system. +# """ +# # Use filename glob-pattern as defined in LOFAR-USG-ICD-005. +# data_mask = tally_data_map( +# self.input_data['data'], 'L*_SB???_uv.MS', self.logger +# ) +# # Log a warning if not all input data files were found. +# if not all(data_mask): +# self.logger.warn( +# "The following input data files were not found: %s" % +# ', '.join( +# ':'.join(f) for (f, m) in zip( +# self.input_data['data'], data_mask +# ) if not m +# ) +# ) +# # Use filename glob-pattern as defined in LOFAR-USG-ICD-005. +# inst_mask = tally_data_map( +# self.input_data['instrument'], 'L*_SB???_inst.INST', self.logger +# ) +# # Log a warning if not all input instrument files were found. +# if not all(inst_mask): +# self.logger.warn( +# "The following input instrument files were not found: %s" % +# ', '.join( +# ':'.join(f) for (f, m) in zip( +# self.input_data['instrument'], inst_mask +# ) if not m +# ) +# ) + +# # Set the IO data mask +# self.io_data_mask = [x and y for (x, y) in zip(data_mask, inst_mask)] def _create_target_map_for_instruments(self): @@ -171,14 +168,12 @@ class msss_target_pipeline(control): scratch_dir = os.path.join( self.inputs['working_directory'], self.inputs['job_name']) - target_locations = [] - for instrument_pair, data_pair \ - in zip(self.input_data['instrument'], self.input_data['data']): - path_instr = instrument_pair[1] - host_data = data_pair[0] - # target location == working dir instrument file name - target_path = os.path.join(scratch_dir, os.path.basename(path_instr)) - target_locations.append((host_data, target_path)) + target_locations = copy.deepcopy(self.input_data['instrument']) + for data, target in zip(self.input_data['data'], target_locations): + target.host = data.host + target.file = os.path.join( + scratch_dir, os.path.basename(target.file) + ) return target_locations @@ -193,10 +188,10 @@ class msss_target_pipeline(control): #Write the two needed maps to file source_path = os.path.join(copier_map_path, "source_instruments.map") - store_data_map(source_path, self.input_data['instrument']) + self.input_data['instrument'].save(source_path) target_path = os.path.join(copier_map_path, "target_instruments.map") - store_data_map(target_path, target_map) + target_map.save(target_path) copied_files_path = os.path.join(copier_map_path, "copied_instruments.map") @@ -208,23 +203,17 @@ class msss_target_pipeline(control): mapfiles_dir=copier_map_path, mapfile=copied_files_path)['mapfile_target_copied'] - # Some copy action might fail, these files need to be removed from - # both the data and the instrument file!! - copied_instruments_map = load_data_map(copied_instruments_mapfile) - new_instrument_map = [] - new_input_data_map = [] - new_output_data_map = [] - for instrument_pair, input_data_pair, output_data_pair in \ - zip(target_map, self.input_data['data'], self.output_data['data']): - if instrument_pair in copied_instruments_map: - new_instrument_map.append(instrument_pair) - new_input_data_map.append(input_data_pair) - new_output_data_map.append(output_data_pair) - # else: Do not process further in the recipe - - self.input_data['instrument'] = new_instrument_map - self.input_data['data'] = new_input_data_map - self.output_data['data'] = new_output_data_map + # Some copy action might fail; the skip fields in the other map-files + # need to be updated these to reflect this. + self.input_data['instrument'] = DataMap.load(copied_instruments_mapfile) + for data, inst, outp in zip( + self.input_data['data'], + self.input_data['instrument'], + self.output_data['data'] + ): + data.skip = inst.skip = outp.skip = ( + data.skip or inst.skip or outp.skip + ) def go(self): @@ -278,20 +267,18 @@ class msss_target_pipeline(control): # update both intrument and datamap to contain only successes! self._copy_instrument_files(mapfile_dir) - # Write input- and output data map-files. data_mapfile = os.path.join(mapfile_dir, "data.mapfile") - store_data_map(data_mapfile, self.input_data['data']) + self.input_data['data'].save(data_mapfile) copied_instrument_mapfile = os.path.join(mapfile_dir, "copied_instrument.mapfile") - store_data_map(copied_instrument_mapfile, - self.input_data['instrument']) + self.input_data['instrument'].save(copied_instrument_mapfile) self.logger.debug( "Wrote input data mapfile: %s" % data_mapfile ) # Save copied files to a new mapfile corrected_mapfile = os.path.join(mapfile_dir, "corrected_data.mapfile") - store_data_map(corrected_mapfile, self.output_data['data']) + self.output_data['data'].save(corrected_mapfile) self.logger.debug( "Wrote output corrected data mapfile: %s" % corrected_mapfile ) @@ -302,7 +289,7 @@ class msss_target_pipeline(control): return 0 self.logger.debug("Processing: %s" % - ', '.join(':'.join(f) for f in self.input_data['data']) + ', '.join(str(f) for f in self.input_data['data']) ) # ********************************************************************* @@ -367,7 +354,7 @@ class msss_target_pipeline(control): parset=bbs_parset, instrument_mapfile=copied_instrument_mapfile, sky_mapfile=sourcedb_mapfile - )['mapfile'] + )['data_mapfile'] # ********************************************************************* # 6. Second dppp run for flaging NaN's in the MS. diff --git a/CEP/Pipeline/recipes/sip/master/bbs_reducer.py b/CEP/Pipeline/recipes/sip/master/bbs_reducer.py index d59a92eb4d936ac00adf6cd1806e3f675f85f95d..5fda848e56d55e3bc278e8898c1125beef19e4c3 100644 --- a/CEP/Pipeline/recipes/sip/master/bbs_reducer.py +++ b/CEP/Pipeline/recipes/sip/master/bbs_reducer.py @@ -9,8 +9,7 @@ import sys import lofarpipe.support.lofaringredient as ingredient from lofarpipe.support.baserecipe import BaseRecipe -from lofarpipe.support.group_data import load_data_map, store_data_map -from lofarpipe.support.group_data import validate_data_maps +from lofarpipe.support.data_map import DataMap, validate_data_maps from lofarpipe.support.remotecommand import ComputeJob from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn @@ -44,13 +43,16 @@ class bbs_reducer(BaseRecipe, RemoteCommandRecipeMixIn): 'data_mapfile': ingredient.StringField( '--data-mapfile', help="Full path to the mapfile that will contain the names of the " - "data files that were successfully processed by BBS" + "data files that were processed by BBS" ), } - outputs = { - 'mapfile': ingredient.FileField( + 'data_mapfile': ingredient.FileField( help="Full path to a mapfile describing the processed data" + ), + 'instrument_mapfile': ingredient.FileField( + help="Full path to the (updated) mapfile containing the names of " + "the instrument model files that were processed by BBS" ) } @@ -62,47 +64,79 @@ class bbs_reducer(BaseRecipe, RemoteCommandRecipeMixIn): super(bbs_reducer, self).__init__() self.bbs_map = list() self.jobs = list() + self.data_map = DataMap() + self.inst_map = DataMap() + self.sky_map = DataMap() - def _make_bbs_map(self): + def _load_mapfiles(self): """ - This method bundles the contents of three different map-files. - All three map-files contain a list of tuples of hostname and filename. - The contents of these files are related by index in the list. They - form triplets of MS-file, its associated instrument model and its - associated sky model. - - The data structure `self.bbs_map` is a list of tuples, where each - tuple is a pair of hostname and the aforementioned triplet. - - For example: - bbs_map[0] = ('locus001', - ('/data/L29697/L29697_SAP000_SB000_uv.MS', - '/data/scratch/loose/L29697/L29697_SAP000_SB000_uv.MS.instrument', - '/data/scratch/loose/L29697/L29697_SAP000_SB000_uv.MS.sky') - ) - - Returns `False` if validation of the three map-files fails, otherwise - returns `True`. + Load data map file, instrument map file, and sky map file. + Update the 'skip' fields in these map files: if 'skip' is True in any + of the maps, then 'skip' must be set to True in all maps. """ - self.logger.debug("Creating BBS map-file using: %s, %s, %s" % - (self.inputs['args'][0], - self.inputs['instrument_mapfile'], - self.inputs['sky_mapfile'])) - data_map = load_data_map(self.inputs['args'][0]) - instrument_map = load_data_map(self.inputs['instrument_mapfile']) - sky_map = load_data_map(self.inputs['sky_mapfile']) - - if not validate_data_maps(data_map, instrument_map, sky_map): + self.logger.debug("Loading map files:" + "\n\tdata map: %s\n\tinstrument map: %s\n\tsky map: %s" % ( + self.inputs['args'][0], + self.inputs['instrument_mapfile'], + self.inputs['sky_mapfile'] + ) + ) + self.data_map = DataMap.load(self.inputs['args'][0]) + self.inst_map = DataMap.load(self.inputs['instrument_mapfile']) + self.sky_map = DataMap.load(self.inputs['sky_mapfile']) + + if not validate_data_maps(self.data_map, self.inst_map, self.sky_map): self.logger.error("Validation of input data mapfiles failed") return False - self.bbs_map = [ - (dat[0], (dat[1], ins[1], sky[1])) - for dat, ins, sky in zip(data_map, instrument_map, sky_map) - ] + # Update the skip fields of the three maps. If 'skip' is True in any of + # these maps, then 'skip' must be set to True in all maps. + for x, y, z in zip(self.data_map, self.inst_map, self.sky_map): + x.skip = y.skip = z.skip = (x.skip or y.skip or z.skip) return True + + + def _run_jobs(self): + """ + Create and schedule the compute jobs + """ + command = "python %s" % (self.__file__.replace('master', 'nodes')) + self.data_map.iterator = DataMap.SkipIterator + self.inst_map.iterator = DataMap.SkipIterator + self.sky_map.iterator = DataMap.SkipIterator + for data, inst, sky in zip(self.data_map, self.inst_map, self.sky_map): + self.jobs.append( + ComputeJob( + data.host, command, + arguments=[ + (data.file, inst.file, sky.file), + self.inputs['executable'], + self.inputs['parset'], + self.environment + ] + ) + ) + self._schedule_jobs(self.jobs) + + + def _update_mapfiles(self): + """ + Update the data- and instrument- map files, taking into account any + failed runs. + """ + self.logger.debug("Updating map files:" + "\n\tdata map: %s\n\tinstrument map: %s" % + (self.inputs['args'][0], self.inputs['instrument_mapfile']) + ) + for job, data, inst in zip(self.jobs, self.data_map, self.inst_map): + if job.results['returncode'] != 0: + data.skip = inst.skip = True + self.data_map.save(self.inputs['data_mapfile']) + self.inst_map.save(self.inputs['instrument_mapfile']) + self.outputs['data_mapfile'] = self.inputs['args'][0] + self.outputs['instrument_mapfile'] = self.inputs['instrument_mapfile'] def _handle_errors(self): @@ -125,26 +159,6 @@ class bbs_reducer(BaseRecipe, RemoteCommandRecipeMixIn): return 0 - def _write_data_mapfile(self): - """ - Write a new data map-file containing only the successful runs. - """ - outdata = [] - for job in self.jobs: - if job.results['returncode'] == 0: - # The first item in job.arguments is a tuple of file names, - # whose first item is the name of the MS-file - # (see `_make_bbs_map` for details). - outdata.append((job.host, job.arguments[0][0])) - - # Write output data-mapfile - self.logger.debug( - "Writing data map file: %s" % self.inputs['data_mapfile'] - ) - store_data_map(self.inputs['data_mapfile'], outdata) - self.outputs['mapfile'] = self.inputs['data_mapfile'] - - def go(self): """ This it the actual workhorse. It is called by the framework. We pass @@ -155,28 +169,15 @@ class bbs_reducer(BaseRecipe, RemoteCommandRecipeMixIn): self.logger.info("Starting BBS-reducer run") super(bbs_reducer, self).go() - # Create a bbs_map describing the file mapping on disk - if not self._make_bbs_map(): + # Load the required map-files. + if not self._load_mapfiles(): return 1 # Create and schedule the compute jobs - command = "python %s" % (self.__file__.replace('master', 'nodes')) - for host, files in self.bbs_map: - self.jobs.append( - ComputeJob( - host, command, - arguments=[ - files, - self.inputs['executable'], - self.inputs['parset'], - self.environment - ] - ) - ) - self._schedule_jobs(self.jobs) + self._run_jobs() - # Write output data map-file - self._write_data_mapfile() + # Update the instrument map file, taking failed runs into account. + self._update_mapfiles() # Handle errors, if any. return self._handle_errors() diff --git a/CEP/Pipeline/recipes/sip/master/copier.py b/CEP/Pipeline/recipes/sip/master/copier.py index d45421a537f38960fb5de092cca4e972d4a30c7b..a5a404905aa55e78500b14ac70a8755b81517893 100644 --- a/CEP/Pipeline/recipes/sip/master/copier.py +++ b/CEP/Pipeline/recipes/sip/master/copier.py @@ -13,45 +13,36 @@ import lofarpipe.support.lofaringredient as ingredient from lofarpipe.support.baserecipe import BaseRecipe from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn from lofarpipe.support.remotecommand import ComputeJob -from lofarpipe.support.group_data import load_data_map, store_data_map -from lofarpipe.support.lofarexceptions import PipelineException +from lofarpipe.support.data_map import DataMap class MasterNodeInterface(BaseRecipe, RemoteCommandRecipeMixIn): """ - Abstract class for master script collecting functionality regarding - master node communication in a single interface - - The abstract part of this class definition indicates that this class is - intended only to be a base class of other classes: It contains basic - functionality with a number of stubs to be implemented in the inheriting - class: - on_error(this) : Called when a node recipe returned with an invalid return - code - on_succes(this): Called when all node recipes returned with a correct - return code + Base class for master script collecting functionality regarding master node + communication in a single interface. + + This class contains basic functionality with a number of methods that are + meant to be reimplemented in the derived class: + on_failure(this) : Called when all node recipes returned with a non-zero + exit status. + on_error(this) : Called when some node recipes returned with a non-zero + exit status. + on_success(this) : Called when all node recipes returned with a zero + exit status. TODO: Suggested improvements on_partial_succes(this): To prepare for rerun of partial runs on_warn(this): To distinguish between """ - def __init__(self, command=None): + def __init__(self, command): """ constructor, expects a string command used for calling the node script This class cannot be created with the base constructor. Inheriting should call this constructor with an command string """ self.logger = None - if not isinstance(command, basestring): - # Pipeline logger NOT called: This is an 'language' type error and - # has nothing to do with the pipelines - raise NotImplementedError("MasterNodeInterface not constructed" - "with a command string. This is an abstract class, inheriting" - "class should implement an constructor calling this function" - "with an command string") - - # call possible baseclass constructors - super(MasterNodeInterface, self).__init__() self._command = command self._jobs = [] + # call possible baseclass constructors + super(MasterNodeInterface, self).__init__() def append_job(self, host, arguments): """ @@ -63,15 +54,15 @@ class MasterNodeInterface(BaseRecipe, RemoteCommandRecipeMixIn): def run_jobs(self): """ - Starts the set of tasks in the job lists. Call the on_error function if - errors occured. On finish it will call the _on_succes function - finishing the output of the recipe. + Starts the set of tasks in the job lists. If all jobs succeed, + on_success() will be called. If some jobs fail, on_error() will be + called. If all jobs fail, on_failure() will be called. An log message is displayed on the stdout or in a logger if the object contains one. """ log_message = "Start scheduling jobs with command {0}".format( self._command) - if hasattr(self, 'logger'): + if self.logger: self.logger.info(log_message) else: print log_message @@ -79,30 +70,40 @@ class MasterNodeInterface(BaseRecipe, RemoteCommandRecipeMixIn): self._schedule_jobs(self._jobs) if self.error.isSet(): - return self.on_error() + if all(job.results['returncode'] != 0 for job in self._jobs): + return self.on_failure() + else: + return self.on_error() else: return self.on_succes() - def on_error(self): + def on_failure(self): """ - on_error should be implemented by the inheriting class. This function - is called when the node script returned with a invalid return value + This method is called when all node recipes return with a non-zero + exit status. It should return a value that can be cast to an integer, + where 0 indicates success. The default behaviour is to return -1. + This method can be overridden in the derived class. """ - raise NotImplementedError("on_error called on abstract class" - " MasterNodeInterface.\n Inheriting classes should implement an " - "on_error function") + return -1 + def on_error(self): + """ + This method is called when some node recipes return with a non-zero + exit status. It should return a value that can be cast to an integer, + where 0 indicates success. The default behaviour is to return 1. + This method can be overridden in the derived class. + """ + return 1 + def on_succes(self): """ - on_succes should be implemented by the inheriting class. This function - is called when the node script return with a valid return value == 0 - Typical usage would be the construction of the return dictionary - containing processed data. + This method is called when all node recipes return with a zero exit + status. It should return a value that can be cast to an integer, + where 0 indicates success. The default behaviour is to return 0. + This method can be overridden in the derived class. """ - raise NotImplementedError("on_succes called on abstract class" - " MasterNodeInterface.\n Inheriting classes should implement an " - "on_succes function") - + return 0 + class copier(MasterNodeInterface): """ @@ -164,111 +165,92 @@ class copier(MasterNodeInterface): """ super(copier, self).__init__( "python {0}".format(self.__file__.replace('master', 'nodes'))) + self.source_map = DataMap() + self.target_map = DataMap() - def go(self): - # TODO: Remove dependency on mapfile_dir - self.logger.info("Starting copier run") - super(copier, self).go() - - # Load data from mapfiles - source_map = load_data_map(self.inputs['mapfile_source']) - target_map = load_data_map(self.inputs['mapfile_target']) - - # validate data in mapfiles - if not self._validate_source_target_mapfile(source_map, target_map, - self.inputs['allow_rename']): - return 1 + def _validate_mapfiles(self, allow_rename=False): + """ + Validation of input source and target map files. They must have equal + length. Furthermore, if rename is not allowed, test that 'file names' + are the same. + """ + # Same length? If not, then fail + if len(self.source_map) != len(self.target_map): + self.logger.error("Number of entries in the source and target map" + "Is not the same: \n target \n {0}\n source \n {1}".format( + self.target_map, self.source_map)) + return False - # Run the compute nodes with the node specific mapfiles - for source_pair, target_pair in zip(source_map, target_map): - source_node, source_path = source_pair - target_node, target_path = target_pair - args = [source_node, source_path, target_path] - self.append_job(target_node, args) + for source, target in zip(self.source_map, self.target_map): + # skip strict checking of basename equality if rename is allowed + if not allow_rename: + target_name = os.path.basename(target.file) + source_name = os.path.basename(source.file) + if not (target_name == source_name): + self.logger.error("One of the suplied source target pairs" + "contains a different 'filename': {0} != {1}\n" + " aborting".format(target_name, source_name)) + return False - # start the jobs - exit_value_jobs = self.run_jobs() - return exit_value_jobs + return True - ## Function expected by MasterNodeInterface + def _write_mapfile(self): + """ + Write an (updated) mapfile. + """ + self.logger.debug("Writing mapfile: %s" % self.inputs['mapfile']) + self.target_map.save(self.inputs['mapfile']) + self.outputs['mapfile_target_copied'] = self.inputs['mapfile'] + + def on_failure(self): + """ + All copier jobs failed. Bailing out. + """ + self.logger.error("All copier jobs failed. Bailing out!") + return 1 + def on_error(self): """ - Return the succesfull copied files or fail the current run if - none of the copy actions succeeded + Some copier jobs failed. Update the target map, setting 'skip' to True + for failed runs, and save it. """ - # Scan the job outputs for success full runs - successfull_copies = [] - for job in self._jobs: - if 'target' in job.results: - successfull_copies.append((job.host, job.results['target' ])) - - if len(successfull_copies) == 0: - self.logger.error("None of the copy action succeeded!") - self.logger.error("Exiting with a failure state") - return 1 - - - store_data_map(self.inputs['mapfile'], successfull_copies) - - self.logger.warn("Not all instrument we copied") self.logger.warn( - "wrote mapfile with successful copied files: {0}".format( - self.inputs['mapfile'])) - self.logger.info("copier exiting with Partial succesfull run") - self.outputs['mapfile_target_copied'] = self.inputs['mapfile'] + "Some copier jobs failed, continuing with succeeded runs" + ) + for job, target in zip(self._jobs, self.target_map): + if job.results['returncode'] != 0: + target.skip = True + self._write_mapfile() return 0 - - ## Function expected by MasterNodeInterface + def on_succes(self): """ - on_succes is called after all jobs have finished and - no errors have been encountered. - It constructs the output to be generated from this recipe based on - results from the node script + All copier jobs succeeded. Save an updated mapfile. """ - successfull_copies = [] - for job in self._jobs: - if 'target' in job.results: - successfull_copies.append((job.host, job.results['target' ])) - - store_data_map(self.inputs['mapfile'], successfull_copies) - self.logger.debug( - "wrote mapfile with (to be copied) files: {0}".format( - self.inputs['mapfile'])) - self.logger.info("copier exiting with succesfull run") - self.outputs['mapfile_target_copied'] = self.inputs['mapfile'] + self.logger.info("All copier jobs succeeded") + self._write_mapfile() return 0 + def go(self): + # TODO: Remove dependency on mapfile_dir + self.logger.info("Starting copier run") + super(copier, self).go() - def _validate_source_target_mapfile(self, source_map, target_map, - allow_rename=False): - """ - Validation the input source and target files, are they the same size - And if rename is not allowed, test if the 'file names' are the same - """ - # Same length? Of no then fail - if len(source_map) != len(target_map): - self.logger.error("Number of entries in the source and target map" - "Is not the same: \n target \n {0}\n source \n {1}".format( - target_map, source_map)) - return False + # Load data from mapfiles + self.source_map = DataMap.load(self.inputs['mapfile_source']) + self.target_map = DataMap.load(self.inputs['mapfile_target']) - # Construct mapfiles for each node containing the source and target - for source_pair, target_pair in zip(source_map, target_map): - target_node, target_path = target_pair - source_node, source_path = source_pair + # validate data in mapfiles + if not self._validate_mapfiles(self.inputs['allow_rename']): + return 1 - # skip strict checking of basename equality of rename is allowed - if not allow_rename: - target_name = os.path.basename(target_path) - source_name = os.path.basename(source_path) - if not (target_name == source_name): - self.logger.error("One of the suplied source target pairs" - "contains a different 'filename': {0} != {1}\n" - " aborting".format(target_name, source_name)) - return False + # Run the compute nodes with the node specific mapfiles + for source, target in zip(self.source_map, self.target_map): + args = [source.host, source.file, target.file] + self.append_job(target.host, args) - return True + # start the jobs, return the exit status. + return self.run_jobs() if __name__ == '__main__': diff --git a/CEP/Pipeline/recipes/sip/master/dppp.py b/CEP/Pipeline/recipes/sip/master/dppp.py index eed0d24f72c02a84ecd3f6860bb9ca52ae23640d..f8a4467eff36a6e8bc880671c658be2748904662 100644 --- a/CEP/Pipeline/recipes/sip/master/dppp.py +++ b/CEP/Pipeline/recipes/sip/master/dppp.py @@ -5,8 +5,7 @@ # swinbank@transientskp.org # ------------------------------------------------------------------------------ -from collections import defaultdict - +import copy import sys import os @@ -15,26 +14,22 @@ import lofarpipe.support.lofaringredient as ingredient from lofarpipe.support.baserecipe import BaseRecipe from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn from lofarpipe.support.remotecommand import ComputeJob -from lofarpipe.support.group_data import load_data_map, store_data_map -from lofarpipe.support.group_data import validate_data_maps +from lofarpipe.support.data_map import DataMap, validate_data_maps class dppp(BaseRecipe, RemoteCommandRecipeMixIn): """ - Runs DPPP (either ``NDPPP`` or -- in the unlikely event it's required -- - ``IDPPP``) on a number of MeasurementSets. This is used for compressing - and/or flagging data + Runs ``NDPPP`` on a number of MeasurementSets. This is used for averaging, + and/or flagging, and/or demixing of data. 1. Load input data files 2. Load parmdb and sourcedb 3. Call the node side of the recipe - 4. Parse logfile for fully flagged baselines - 5. Create mapfile with successful noderecipe runs + 4. Create mapfile with successful noderecipe runs **Command line arguments** 1. A mapfile describing the data to be processed. - 2. Mapfile with target output locations <if procided input and output - mapfiles are validated> + 2. Optionally, a mapfile with target output locations. """ inputs = { @@ -142,10 +137,10 @@ class dppp(BaseRecipe, RemoteCommandRecipeMixIn): outputs = { 'mapfile': ingredient.FileField( help="The full path to a mapfile describing the processed data" - ), - 'fullyflagged': ingredient.ListField( - help="A list of all baselines which were completely flagged in any " - "of the input MeasurementSets" +# ), +# 'fullyflagged': ingredient.ListField( +# help="A list of all baselines which were completely flagged in any " +# "of the input MeasurementSets" ) } @@ -154,33 +149,27 @@ class dppp(BaseRecipe, RemoteCommandRecipeMixIn): self.logger.info("Starting DPPP run") super(dppp, self).go() - # Keep track of "Total flagged" messages in the DPPP logs - # ---------------------------------------------------------------------- - self.logger.searchpatterns["fullyflagged"] = "Fully flagged baselines" +# # Keep track of "Total flagged" messages in the DPPP logs +# # ---------------------------------------------------------------------- +# self.logger.searchpatterns["fullyflagged"] = "Fully flagged baselines" # ********************************************************************* # 1. load input data file, validate output vs the input location if # output locations are provided args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) - indata = load_data_map(args[0]) + indata = DataMap.load(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) - outdata = load_data_map(args[1]) - if not validate_data_maps(indata, outdata): - self.logger.error( - "Validation of input/output data mapfiles failed" - ) - return 1 + outdata = DataMap.load(args[1]) else: - outdata = [ - (host, - os.path.join( + outdata = copy.deepcopy(indata) + for item in outdata: + item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], - os.path.basename(infile) + self.inputs['suffix']) - ) for host, infile in indata - ] + os.path.basename(item.file) + self.inputs['suffix'] + ) # ******************************************************************** # 2. Load parmdb and sourcedb @@ -189,34 +178,55 @@ class dppp(BaseRecipe, RemoteCommandRecipeMixIn): self.logger.debug( "Loading parmdb mapfile: %s" % self.inputs['parmdb_mapfile'] ) - parmdbdata = load_data_map(self.inputs['parmdb_mapfile']) + parmdbdata = DataMap.load(self.inputs['parmdb_mapfile']) else: - parmdbdata = [(None, None)] * len(indata) + parmdbdata = copy.deepcopy(indata) + for item in parmdbdata: + item.file = '' # Load sourcedb-mapfile, if one was given. if self.inputs.has_key('sourcedb_mapfile'): self.logger.debug( "Loading sourcedb mapfile: %s" % self.inputs['sourcedb_mapfile'] ) - sourcedbdata = load_data_map(self.inputs['sourcedb_mapfile']) + sourcedbdata = DataMap.load(self.inputs['sourcedb_mapfile']) else: - sourcedbdata = [(None, None)] * len(indata) + sourcedbdata = copy.deepcopy(indata) + for item in sourcedbdata: + item.file = '' + + # Validate all the data maps. + if not validate_data_maps(indata, outdata, parmdbdata, sourcedbdata): + self.logger.error( + "Validation of data mapfiles failed!" + ) + return 1 + + # Update the skip fields of the four maps. If 'skip' is True in any of + # these maps, then 'skip' must be set to True in all maps. + for w, x, y, z in zip(indata, outdata, parmdbdata, sourcedbdata): + w.skip = x.skip = y.skip = z.skip = ( + w.skip or x.skip or y.skip or z.skip + ) # ******************************************************************** # 3. Call the node side of the recipe # Create and schedule the compute jobs command = "python %s" % (self.__file__.replace('master', 'nodes')) + indata.iterator = outdata.iterator = DataMap.SkipIterator + parmdbdata.iterator = sourcedbdata.iterator = DataMap.SkipIterator jobs = [] - for host, infile, outfile, parmdb, sourcedb in (w + (x[1], y[1], z[1]) - for w, x, y, z in zip(indata, outdata, parmdbdata, sourcedbdata)): + for inp, outp, pdb, sdb in zip( + indata, outdata, parmdbdata, sourcedbdata + ): jobs.append( ComputeJob( - host, command, + inp.host, command, arguments=[ - infile, - outfile, - parmdb, - sourcedb, + inp.file, + outp.file, + pdb.file, + sdb.file, self.inputs['parset'], self.inputs['executable'], self.environment, @@ -230,57 +240,36 @@ class dppp(BaseRecipe, RemoteCommandRecipeMixIn): ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) + for job, outp in zip(jobs, outdata): + if job.results['returncode'] != 0: + outp.skip = True - # ********************************************************************* - # 4. parse logfile for fully flagged baselines - matches = self.logger.searchpatterns["fullyflagged"].results - self.logger.searchpatterns.clear() # finished searching - stripchars = "".join(set("Fully flagged baselines: ")) - baselinecounter = defaultdict(lambda: 0) - for match in matches: - for pair in ( - pair.strip(stripchars) for pair in match.getMessage().split(";") - ): - baselinecounter[pair] += 1 - self.outputs['fullyflagged'] = baselinecounter.keys() +# # ********************************************************************* +# # 4. parse logfile for fully flagged baselines +# matches = self.logger.searchpatterns["fullyflagged"].results +# self.logger.searchpatterns.clear() # finished searching +# stripchars = "".join(set("Fully flagged baselines: ")) +# baselinecounter = defaultdict(lambda: 0) +# for match in matches: +# for pair in ( +# pair.strip(stripchars) for pair in match.getMessage().split(";") +# ): +# baselinecounter[pair] += 1 +# self.outputs['fullyflagged'] = baselinecounter.keys() # ********************************************************************* - # 5. Create mapfile with successful noderecipe runs - # fail if no runs succeeded + # 4. Check job results, and create output data map file if self.error.isSet(): - # dppp needs to continue on partial succes. - # Get the status of the jobs - node_status = {} - ok_counter = 0 - for job in jobs: - if job.results.has_key("ok"): - node_status[job.host] = True - ok_counter += 1 - else: - node_status[job.host] = False - - # if all nodes failed abort - if ok_counter == 0: - self.logger.error("None of the dppp runs finished with an ok status") - self.logger.error("Exiting recipe with fail status") + # Abort if all jobs failed + if all(job.results['returncode'] != 0 for job in jobs): + self.logger.error("All jobs failed. Bailing out!") return 1 - - # Create new mapfile with only the successful runs - new_outdata = [] - for host, path in outdata: - if node_status[host]: - new_outdata.append((host, path)) - # else do not put in the outdata list - #swap the outputfiles - outdata = new_outdata - - self.logger.warn("Failed DPPP process detected," - "continue with succeeded runs") - - # Write output data return ok status - self.logger.debug("Writing data map file: %s" % - self.inputs['mapfile']) - store_data_map(self.inputs['mapfile'], outdata) + else: + self.logger.warn( + "Some jobs failed, continuing with succeeded runs" + ) + self.logger.debug("Writing data map file: %s" % self.inputs['mapfile']) + outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0 diff --git a/CEP/Pipeline/recipes/sip/master/gainoutliercorrection.py b/CEP/Pipeline/recipes/sip/master/gainoutliercorrection.py index 708ba258f4b1d4744196c6376b9f5b74ea074091..fdf431ead01e0ec4fc1bd094afcfa8b9440b57c0 100644 --- a/CEP/Pipeline/recipes/sip/master/gainoutliercorrection.py +++ b/CEP/Pipeline/recipes/sip/master/gainoutliercorrection.py @@ -13,8 +13,7 @@ import lofarpipe.support.lofaringredient as ingredient from lofarpipe.support.baserecipe import BaseRecipe from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn from lofarpipe.support.remotecommand import ComputeJob -from lofarpipe.support.group_data import load_data_map, store_data_map -from lofarpipe.support.group_data import validate_data_maps +from lofarpipe.support.data_map import DataMap, validate_data_maps class gainoutliercorrection(BaseRecipe, RemoteCommandRecipeMixIn): @@ -92,39 +91,43 @@ class gainoutliercorrection(BaseRecipe, RemoteCommandRecipeMixIn): # 2. load mapfiles, validate if a target output location is provided args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) - indata = load_data_map(args[0]) + indata = DataMap.load(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) - outdata = load_data_map(args[1]) + outdata = DataMap.load(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed" ) return 1 else: - outdata = [ - (host, - os.path.join( + outdata = copy.deepcopy(indata) + for item in outdata: + item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], (os.path.splitext(os.path.basename(infile))[0] + - self.inputs['suffix'])) - ) for host, infile in indata - ] + self.inputs['suffix']) + ) + # Update the skip fields of the two maps. If 'skip' is True in any of + # these maps, then 'skip' must be set to True in all maps. + for x, y in zip(indata, outdata): + x.skip = y.skip = (x.skip or y.skip) + # ******************************************************************** # 3. Call node side of the recipe command = "python %s" % (self.__file__.replace('master', 'nodes')) + indata.iterator = outdata.iterator = DataMap.SkipIterator jobs = [] - for host, infile, outfile in (x + (y[1],) - for x, y in zip(indata, outdata)): + for inp, outp in zip(indata, outdata): jobs.append( ComputeJob( - host, + outp.host, command, arguments=[ - infile, - outfile, + inp.file, + outp.file, self.inputs['executable'], self.environment, self.inputs['sigma'] @@ -132,6 +135,9 @@ class gainoutliercorrection(BaseRecipe, RemoteCommandRecipeMixIn): ) ) self._schedule_jobs(jobs) + for job, outp in zip(jobs, outdata): + if job.results['returncode'] != 0: + outp.skip = True # ******************************************************************** # 4. validate performance, return corrected files @@ -141,7 +147,7 @@ class gainoutliercorrection(BaseRecipe, RemoteCommandRecipeMixIn): else: self.logger.debug("Writing instrument map file: %s" % self.inputs['mapfile']) - store_data_map(self.inputs['mapfile'], outdata) + outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0 diff --git a/CEP/Pipeline/recipes/sip/master/get_metadata.py b/CEP/Pipeline/recipes/sip/master/get_metadata.py index 43c174be838773b2f6a7eb72a9e3be6b736ec14e..a438da42f8317c0fe11a7af6dfc2767edd2e5973 100644 --- a/CEP/Pipeline/recipes/sip/master/get_metadata.py +++ b/CEP/Pipeline/recipes/sip/master/get_metadata.py @@ -12,7 +12,7 @@ import lofarpipe.support.lofaringredient as ingredient from lofarpipe.support.baserecipe import BaseRecipe from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn from lofarpipe.support.remotecommand import ComputeJob -from lofarpipe.support.group_data import load_data_map +from lofarpipe.support.data_map import DataMap from lofarpipe.recipes.helpers import metadata from lofar.parameterset import parameterset from lofarpipe.support.utilities import create_directory @@ -73,29 +73,42 @@ class get_metadata(BaseRecipe, RemoteCommandRecipeMixIn): # ******************************************************************** # 2. Load mapfiles self.logger.debug("Loading input-data mapfile: %s" % args[0]) - data = load_data_map(args[0]) + data = DataMap.load(args[0]) # ******************************************************************** # 3. call node side of the recipe command = "python %s" % (self.__file__.replace('master', 'nodes')) + data.iterator = DataMap.SkipIterator jobs = [] - for host, infile in data: + for inp in data: jobs.append( ComputeJob( - host, command, + inp.host, command, arguments=[ - infile, + inp.file, self.inputs['product_type'] ] ) ) self._schedule_jobs(jobs) + for job, inp in zip(jobs, data): + if job.results['returncode'] != 0: + inp.skip = True # ******************************************************************** # 4. validate performance + # 4. Check job results, and create output data map file if self.error.isSet(): - self.logger.warn("Failed get_metadata process detected") - return 1 + # Abort if all jobs failed + if all(job.results['returncode'] != 0 for job in jobs): + self.logger.error("All jobs failed. Bailing out!") + return 1 + else: + self.logger.warn( + "Some jobs failed, continuing with succeeded runs" + ) + self.logger.debug("Updating data map file: %s" % args[0]) + data.save(args[0]) # ******************************************************************** # 5. Create the parset-file and write it to disk. diff --git a/CEP/Pipeline/recipes/sip/master/imager_awimager.py b/CEP/Pipeline/recipes/sip/master/imager_awimager.py index 1394e2f58d4cd4bac69aba9d46282b1cc2e0934a..b657042b7d3b151f0f573e6982bd81d183fc58b5 100644 --- a/CEP/Pipeline/recipes/sip/master/imager_awimager.py +++ b/CEP/Pipeline/recipes/sip/master/imager_awimager.py @@ -9,7 +9,7 @@ import lofarpipe.support.lofaringredient as ingredient from lofarpipe.support.baserecipe import BaseRecipe from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn from lofarpipe.support.remotecommand import ComputeJob -from lofarpipe.support.group_data import load_data_map, validate_data_maps +from lofarpipe.support.data_map import DataMap, validate_data_maps class imager_awimager(BaseRecipe, RemoteCommandRecipeMixIn): """ @@ -74,8 +74,8 @@ class imager_awimager(BaseRecipe, RemoteCommandRecipeMixIn): # ********************************************************************* # 1. collect the inputs and validate - input_map = load_data_map(self.inputs['args'][0]) - sourcedb_map = load_data_map(self.inputs['sourcedb_path']) + input_map = DataMap.load(self.inputs['args'][0]) + sourcedb_map = DataMap.load(self.inputs['sourcedb_path']) if not validate_data_maps(input_map, sourcedb_map): self.logger.error( @@ -90,11 +90,11 @@ class imager_awimager(BaseRecipe, RemoteCommandRecipeMixIn): # Compile the command to be executed on the remote machine node_command = "python %s" % (self.__file__.replace("master", "nodes")) jobs = [] - for measurement_set, source in zip(input_map, sourcedb_map): + for measurement_item, source_item in zip(input_map, sourcedb_map): # both the sourcedb and the measurement are in a map # unpack both - host , measurement_set = measurement_set - host2 , sourcedb_path = source + host , measurement_path = measurement_item.host, measurement_item.file + host2 , sourcedb_path = source_item.host, source_item.file #construct and save the output name arguments = [self.inputs['executable'], @@ -102,7 +102,7 @@ class imager_awimager(BaseRecipe, RemoteCommandRecipeMixIn): self.inputs['parset'], self.inputs['working_directory'], self.inputs['output_image'], - measurement_set, + measurement_path, sourcedb_path, self.inputs['mask_patch_size']] @@ -114,7 +114,10 @@ class imager_awimager(BaseRecipe, RemoteCommandRecipeMixIn): created_awimages = [] for job in jobs: if job.results.has_key("image"): - created_awimages.append((job.host, job.results["image"])) + created_awimages.append(tuple([job.host, job.results["image"], False])) + else: + created_awimages.append(tuple([job.host, "failed", True])) + # If not succesfull runs abort if len(created_awimages) == 0: @@ -128,8 +131,8 @@ class imager_awimager(BaseRecipe, RemoteCommandRecipeMixIn): if self.error.isSet(): self.logger.error("Failed awimager node run detected. continue with" "successful tasks.") - - self._store_data_map(self.inputs['mapfile'], created_awimages, + datamap_of_created_im = DataMap(created_awimages) + self._store_data_map(self.inputs['mapfile'], datamap_of_created_im, "mapfile containing produces awimages") self.outputs["mapfile"] = self.inputs['mapfile'] diff --git a/CEP/Pipeline/recipes/sip/master/imager_bbs.py b/CEP/Pipeline/recipes/sip/master/imager_bbs.py index ea1fc9cc04c6222f07549d951e60c58d0cd5796d..ecdef9a3b6363cd6c5369bcf5af4a91afe41a3f4 100644 --- a/CEP/Pipeline/recipes/sip/master/imager_bbs.py +++ b/CEP/Pipeline/recipes/sip/master/imager_bbs.py @@ -9,7 +9,7 @@ import os from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn from lofarpipe.support.baserecipe import BaseRecipe -from lofarpipe.support.group_data import load_data_map, validate_data_maps +from lofarpipe.support.data_map import DataMap, MultiDataMap import lofarpipe.support.lofaringredient as ingredient from lofarpipe.support.remotecommand import ComputeJob @@ -76,17 +76,18 @@ class imager_bbs(BaseRecipe, RemoteCommandRecipeMixIn): # ******************************************************************** # 1. Load the and validate the data - ms_map = load_data_map(self.inputs['args'][0]) - parmdb_map = load_data_map(self.inputs['instrument_mapfile']) - sourcedb_map = load_data_map(self.inputs['sourcedb_mapfile']) - - #Check if the input has equal length and on the same nodes - if not validate_data_maps(ms_map, parmdb_map, sourcedb_map): - self.logger.error("The combination of mapfiles failed validation:") - self.logger.error("ms_map: \n{0}".format(ms_map)) - self.logger.error("parmdb_map: \n{0}".format(parmdb_map)) - self.logger.error("sourcedb_map: \n{0}".format(sourcedb_map)) - return 1 + + ms_map = MultiDataMap.load(self.inputs['args'][0]) + parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile']) + sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile']) + + # TODO: DataMap extention +# #Check if the input has equal length and on the same nodes +# if not validate_data_maps(ms_map, parmdb_map): +# self.logger.error("The combination of mapfiles failed validation:") +# self.logger.error("ms_map: \n{0}".format(ms_map)) +# self.logger.error("parmdb_map: \n{0}".format(parmdb_map)) +# return 1 # ********************************************************************* # 2. Start the node scripts @@ -95,26 +96,24 @@ class imager_bbs(BaseRecipe, RemoteCommandRecipeMixIn): map_dir = os.path.join( self.config.get("layout", "job_directory"), "mapfiles") run_id = str(self.inputs.get("id")) - for (ms, parmdb, sourcedb) in zip(ms_map, parmdb_map, sourcedb_map): #host is same for each entry (validate_data_maps) - (host, ms_list) = ms + host, ms_list = ms.host, ms.file - # Write data maps to mapfiles: The (node, data) pairs are inserted - # into an array to allow writing of the mapfiles using the default - # functions + # Write data maps to MultaDataMaps ms_list_path = os.path.join( map_dir, host + "_ms_" + run_id + ".map") - self._store_data_map( - ms_list_path, [ms], "mapfile with ms") + MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path) + parmdb_list_path = os.path.join( map_dir, host + "_parmdb_" + run_id + ".map") - self._store_data_map( - parmdb_list_path, [parmdb], "mapfile with parmdb") + MultiDataMap( + [tuple([host, parmdb.file, False])]).save(parmdb_list_path) + sourcedb_list_path = os.path.join( map_dir, host + "_sky_" + run_id + ".map") - self._store_data_map( - sourcedb_list_path, [sourcedb], "mapfile with sourcedbs") + MultiDataMap( + [tuple([host, [sourcedb.file], False])]).save(sourcedb_list_path) arguments = [self.inputs['bbs_executable'], self.inputs['parset'], @@ -128,13 +127,14 @@ class imager_bbs(BaseRecipe, RemoteCommandRecipeMixIn): # 3. validate the node output and construct the output mapfile. if self.error.isSet(): #if one of the nodes failed self.logger.error("One of the nodes failed while performing" - "a BBS run. Aborting") + "a BBS run. Aborting: concat.ms corruption") return 1 # return the output: The measurement set that are calibrated: # calibrated data is placed in the ms sets - self._store_data_map( - self.inputs['mapfile'], ms_map, "datamap with calibrated data") + MultiDataMap(ms_map).save(self.inputs['mapfile']) + self.logger.info("Wrote file with calibrated data") + self.outputs['mapfile'] = self.inputs['mapfile'] return 0 diff --git a/CEP/Pipeline/recipes/sip/master/imager_create_dbs.py b/CEP/Pipeline/recipes/sip/master/imager_create_dbs.py index b5afaec1b66f0b139d36ccd76a168198c3d6f76b..44ceeb5e871c064190822b79ff12f4b09ad026bc 100644 --- a/CEP/Pipeline/recipes/sip/master/imager_create_dbs.py +++ b/CEP/Pipeline/recipes/sip/master/imager_create_dbs.py @@ -10,8 +10,7 @@ import lofarpipe.support.lofaringredient as ingredient from lofarpipe.support.baserecipe import BaseRecipe from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn from lofarpipe.support.remotecommand import ComputeJob -from lofarpipe.support.group_data import load_data_map, store_data_map, \ - validate_data_maps +from lofarpipe.support.data_map import DataMap, MultiDataMap, validate_data_maps class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): @@ -119,9 +118,11 @@ class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): assoc_theta = None # Load mapfile data from files - slice_paths_map = load_data_map(self.inputs["slice_paths_mapfile"]) - input_map = load_data_map(self.inputs['args'][0]) - if self._validate_input_data(slice_paths_map, input_map): + self.logger.error(self.inputs["slice_paths_mapfile"]) + slice_paths_map = MultiDataMap.load(self.inputs["slice_paths_mapfile"]) + input_map = DataMap.load(self.inputs['args'][0]) + + if self._validate_input_data(input_map, slice_paths_map): return 1 # Run the nodes with now collected inputs @@ -170,16 +171,18 @@ class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): node_command = " python %s" % (self.__file__.replace("master", "nodes")) # create jobs jobs = [] - for (input_ms, slice_paths) in zip(input_map, slice_paths_map): - host_ms, concatenated_measurement_set = input_ms - host_slice, slice_paths = slice_paths + for (input_item, slice_item) in zip(input_map, slice_paths_map): + if input_item.skip and slice_item.skip: + continue + host_ms, concat_ms = input_item.host, input_item.file + host_slice, slice_paths = slice_item.host, slice_item.file # Create the parameters depending on the input_map sourcedb_target_path = os.path.join( - concatenated_measurement_set + self.inputs["sourcedb_suffix"]) + concat_ms + self.inputs["sourcedb_suffix"]) # The actual call for the node script - arguments = [concatenated_measurement_set, + arguments = [concat_ms, sourcedb_target_path, self.inputs["monetdb_hostname"], self.inputs["monetdb_port"], @@ -197,7 +200,8 @@ class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): jobs.append(ComputeJob(host_ms, node_command, arguments)) # Wait the nodes to finish - self._schedule_jobs(jobs) + if len(jobs) > 0: + self._schedule_jobs(jobs) return jobs @@ -214,14 +218,14 @@ class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): for job in jobs: host = job.host if job.results.has_key("sourcedb"): - sourcedb_files.append((host, job.results["sourcedb"])) + sourcedb_files.append(tuple([host, job.results["sourcedb"], False])) else: self.logger.warn("Warning failed ImagerCreateDBs run " "detected: No sourcedb file created, {0} continue".format( host)) if job.results.has_key("parmdbms"): - parmdbs.append((host, job.results["parmdbms"])) + parmdbs.append(tuple([host, job.results["parmdbms"], False])) else: self.logger.warn("Failed ImagerCreateDBs run detected: No " "parmdbms created{0} continue".format(host)) @@ -230,14 +234,14 @@ class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): if len(sourcedb_files) == 0 or len(parmdbs) == 0: self.logger.error("The creation of dbs on the nodes failed:") self.logger.error("Not a single node produces all needed data") - self.logger.error("products. sourcedb_files: {0} ".format( - sourcedb_files)) + self.logger.error( + "products. sourcedb_files: {0}".format(sourcedb_files)) self.logger.error("parameter dbs: {0}".format(parmdbs)) return 1 # write the mapfiles - store_data_map(self.inputs["sourcedb_map_path"], sourcedb_files) - store_data_map(self.inputs["parmdbs_map_path"], parmdbs) + DataMap(sourcedb_files).save(self.inputs["sourcedb_map_path"]) + MultiDataMap(parmdbs).save(self.inputs["parmdbs_map_path"]) self.logger.debug("Wrote sourcedb dataproducts: {0} \n {1}".format( self.inputs["sourcedb_map_path"], self.inputs["parmdbs_map_path"])) diff --git a/CEP/Pipeline/recipes/sip/master/imager_finalize.py b/CEP/Pipeline/recipes/sip/master/imager_finalize.py index 72ce770a928510077b828774fd95775360a77b5a..a2409902505dada9138427a19b1f20ebd55cd196 100644 --- a/CEP/Pipeline/recipes/sip/master/imager_finalize.py +++ b/CEP/Pipeline/recipes/sip/master/imager_finalize.py @@ -5,8 +5,7 @@ import lofarpipe.support.lofaringredient as ingredient from lofarpipe.support.baserecipe import BaseRecipe from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn from lofarpipe.support.remotecommand import ComputeJob -from lofarpipe.support.group_data import load_data_map, validate_data_maps, \ - store_data_map +from lofarpipe.support.data_map import DataMap, validate_data_maps class imager_finalize(BaseRecipe, RemoteCommandRecipeMixIn): """ @@ -87,66 +86,56 @@ class imager_finalize(BaseRecipe, RemoteCommandRecipeMixIn): super(imager_finalize, self).go() # ********************************************************************* # 1. Load the datamaps - awimager_output_map = load_data_map( + awimager_output_map = DataMap.load( self.inputs["awimager_output_map"]) - raw_ms_per_image_map = load_data_map( + raw_ms_per_image_map = DataMap.load( self.inputs["raw_ms_per_image_map"]) - sourcelist_map = load_data_map(self.inputs["sourcelist_map"]) - sourcedb_map = load_data_map(self.inputs["sourcedb_map"]) - target_mapfile = load_data_map(self.inputs["target_mapfile"]) - output_image_mapfile = load_data_map( + sourcelist_map = DataMap.load(self.inputs["sourcelist_map"]) + sourcedb_map = DataMap.load(self.inputs["sourcedb_map"]) + target_mapfile = DataMap.load(self.inputs["target_mapfile"]) + output_image_mapfile = DataMap.load( self.inputs["output_image_mapfile"]) processed_ms_dir = self.inputs["processed_ms_dir"] fillrootimagegroup_exec = self.inputs["fillrootimagegroup_exec"] - # The input mapfiles might nog be of the same length: + # The input mapfiles might not be of the same length: # host_source are unique and can be used to match the entries! # Final step is the source_finder: use this mapfile as 'source' awimager_output_map_new = [] raw_ms_per_image_map_new = [] target_map_new = [] output_image_map_new = [] - for host_source, path_source in sourcelist_map: - for host_comp, path_comp in awimager_output_map: - if host_comp == host_source: - awimager_output_map_new.append((host_comp, path_comp)) - - for host_comp, path_comp in raw_ms_per_image_map: - if host_comp == host_source: - raw_ms_per_image_map_new.append((host_comp, path_comp)) - - for host_comp, path_comp in target_mapfile: - if host_comp == host_source: - target_map_new.append((host_comp, path_comp)) - - for host_comp, path_comp in output_image_mapfile: - if host_comp == host_source: - output_image_map_new.append((host_comp, path_comp)) - # The input mapfiles might nog be of the same length: - # host_source are unique and can be used to match the entries! - # Final step is the source_finder: use this mapfile as 'source' - awimager_output_map_new = [] - raw_ms_per_image_map_new = [] - target_map_new = [] - output_image_map_new = [] - for host_source, path_source in sourcelist_map: - for host_comp, path_comp in awimager_output_map: + for item in sourcelist_map: + host_source, path_source = item.host, item.file + for item in awimager_output_map: + host_comp, path_comp = item.host, item.file if host_comp == host_source: - awimager_output_map_new.append((host_comp, path_comp)) + awimager_output_map_new.append( + tuple([host_comp, path_comp, False])) - for host_comp, path_comp in raw_ms_per_image_map: + for item in raw_ms_per_image_map: + host_comp, path_comp = item.host, item.file if host_comp == host_source: - raw_ms_per_image_map_new.append((host_comp, path_comp)) + raw_ms_per_image_map_new.append( + tuple([host_comp, path_comp, False])) - for host_comp, path_comp in target_mapfile: + for item in target_mapfile: + host_comp, path_comp = item.host, item.file if host_comp == host_source: - target_map_new.append((host_comp, path_comp)) + target_map_new.append( + tuple([host_comp, path_comp, False])) - for host_comp, path_comp in output_image_mapfile: + for item in output_image_mapfile: + host_comp, path_comp = item.host, item.file if host_comp == host_source: - output_image_map_new.append((host_comp, path_comp)) + output_image_map_new.append( + tuple([host_comp, path_comp, False])) + awimager_output_map_new = DataMap(awimager_output_map_new) + raw_ms_per_image_map_new = DataMap(raw_ms_per_image_map_new) + target_map_new = DataMap(target_map_new) + output_image_map_new = DataMap(output_image_map_new) # chech validity of the maps: all on same node with the same length if not validate_data_maps(awimager_output_map_new, raw_ms_per_image_map_new, sourcelist_map, target_map_new, output_image_map_new): @@ -168,18 +157,18 @@ class imager_finalize(BaseRecipe, RemoteCommandRecipeMixIn): # 2. Run the node side of the recupe command = " python %s" % (self.__file__.replace("master", "nodes")) jobs = [] - for (awimager_output_pair, raw_ms_per_image_pair, sourcelist_pair, - target_pair, output_image_pair, sourcedb_pair) in zip( + for (awimager_output_item, raw_ms_per_image_item, sourcelist_item, + target_item, output_image_item, sourcedb_item) in zip( awimager_output_map_new, raw_ms_per_image_map_new, sourcelist_map, target_map_new, output_image_map_new, sourcedb_map): # collect the data for the current node from the indexes in the # mapfiles - (host, awimager_output) = awimager_output_pair - (host, raw_ms_per_image) = raw_ms_per_image_pair - (host, sourcelist) = sourcelist_pair - (host, target) = target_pair - (host, output_image) = output_image_pair - (host, sourcedb) = sourcedb_pair + host, target = target_item.host, target_item.file + awimager_output = awimager_output_item.file + raw_ms_per_image = raw_ms_per_image_item.file + sourcelist = sourcelist_item.file + output_image = output_image_item.file + sourcedb = sourcedb_item.file arguments = [awimager_output, raw_ms_per_image, sourcelist, target, output_image, self.inputs["minbaseline"], @@ -194,13 +183,15 @@ class imager_finalize(BaseRecipe, RemoteCommandRecipeMixIn): placed_images = [] for job in jobs: if job.results.has_key("hdf5"): - placed_images.append((job.host, job.results["image"])) + placed_images.append(tuple([job.host, job.results["image"], False])) + else: + placed_images.append(tuple([job.host, job.results["image"], True])) - if self.error.isSet(): + if len(placed_images) == 0: self.logger.warn("Failed finalizer node run detected") return 1 - store_data_map(self.inputs['placed_image_mapfile'], placed_images) + DataMap(placed_images).save(self.inputs['placed_image_mapfile']) self.logger.debug( "Wrote mapfile containing placed hdf5 images: {0}".format( self.inputs['placed_image_mapfile'])) diff --git a/CEP/Pipeline/recipes/sip/master/imager_prepare.py b/CEP/Pipeline/recipes/sip/master/imager_prepare.py index b157da011a2007b13876ce3c30c114515f6da5fd..7929e8fe181b07a1c7bfa2d46d3f9411035faa2e 100644 --- a/CEP/Pipeline/recipes/sip/master/imager_prepare.py +++ b/CEP/Pipeline/recipes/sip/master/imager_prepare.py @@ -13,16 +13,17 @@ from __future__ import with_statement import os import sys +import copy import lofarpipe.support.lofaringredient as ingredient from lofarpipe.support.baserecipe import BaseRecipe from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn from lofarpipe.support.remotecommand import ComputeJob -from lofarpipe.support.group_data import store_data_map, load_data_map +from lofarpipe.support.data_map import DataMap, MultiDataMap class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): """ Prepare phase master: - + 1. Validate input 2. Create mapfiles with input for work to be perform on the individual nodes based on the structured input mapfile. The input mapfile contains a list @@ -126,12 +127,12 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): self.logger.info("Starting imager_prepare run") # ********************************************************************* # input data - input_map = load_data_map(self.inputs['args'][0]) - output_map = load_data_map(self.inputs['target_mapfile']) + input_map = DataMap.load(self.inputs['args'][0]) + output_map = DataMap.load(self.inputs['target_mapfile']) slices_per_image = self.inputs['slices_per_image'] subbands_per_image = self.inputs['subbands_per_image'] # Validate input - if self._validate_input_map(input_map, output_map, slices_per_image, + if not self._validate_input_map(input_map, output_map, slices_per_image, subbands_per_image): return 1 @@ -146,12 +147,11 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): jobs = [] paths_to_image_mapfiles = [] - n_subband_groups = len(output_map) - for idx_sb_group, (host, output_measurement_set) in enumerate( - output_map): + n_subband_groups = len(output_map) output_map): + for idx_sb_group, item in enumerate(output_map): #create the input files for this node self.logger.debug("Creating input data subset for processing" - "on: {0}".format(host)) + "on: {0}".format(item.host)) inputs_for_image_map = \ self._create_input_map_for_sbgroup( slices_per_image, n_subband_groups, @@ -167,15 +167,15 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): inputs_for_image_map, "inputmap for location") #save the (input) ms, as a list of mapfiles - paths_to_image_mapfiles.append((host, - inputs_for_image_mapfile_path)) + paths_to_image_mapfiles.append( + tuple([item.host, inputs_for_image_mapfile_path, False])) arguments = [self.environment, self.inputs['parset'], self.inputs['working_directory'], self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'], - output_measurement_set, + item.file, slices_per_image, subbands_per_image, inputs_for_image_mapfile_path, @@ -184,56 +184,60 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): self.inputs['msselect_executable'], self.inputs['rficonsole_executable']] - jobs.append(ComputeJob(host, node_command, arguments)) + jobs.append(ComputeJob(item.host, node_command, arguments)) # Hand over the job(s) to the pipeline scheduler self._schedule_jobs(jobs) # ********************************************************************* # validate the output, cleanup, return output - slices = [] if self.error.isSet(): #if one of the nodes failed self.logger.warn("Failed prepare_imager run detected: Generating " "new output_ms_mapfile_path without failed runs:" " {0}".format(output_ms_mapfile_path)) - concatenated_timeslices = [] - #scan the return dict for completed key - for ((host, output_measurement_set), job) in zip(output_map, jobs): - if job.results.has_key("completed"): - concatenated_timeslices.append( - (host, output_measurement_set)) - - #only save the slices if the node has completed succesfull - if job.results.has_key("time_slices"): - slices.append((host, job.results["time_slices"])) - else: - self.logger.warn( - "Failed run on {0}. NOT Created: {1} ".format( - host, output_measurement_set)) - if len(concatenated_timeslices) == 0: - self.logger.error("None of the started compute node finished:" - "The current recipe produced no output, aborting") - return 1 - - self._store_data_map(output_ms_mapfile_path, - concatenated_timeslices, - "mapfile with concatenated timeslace == ms with all data") - - else: #Copy output map from input output_ms_mapfile_path and return - store_data_map(output_ms_mapfile_path, output_map) - for ((host, output_measurement_set), job) in zip(output_map, jobs): - if job.results.has_key("time_slices"): - slices.append((host, job.results["time_slices"])) - - self._store_data_map(self.inputs['slices_mapfile'], slices, - "mapfile with Time_slice") + + concat_ms = copy.deepcopy(output_map) + slices = [] + finished_runs = 0 + #scan the return dict for completed key + for (item, job) in zip(concat_ms, jobs): + # only save the slices if the node has completed succesfull + if job.results["returncode"] == 0: + finished_runs += 1 + slices.append(tuple([item.host, + job.results["time_slices"], False])) + else: + # Set the dataproduct to skipped!! + item.skip = True + slices.append(tuple([item.host, "/Failed", True])) + msg = "Failed run on {0}. NOT Created: {1} ".format( + item.host, item.file) + self.logger.warn(msg) + + if finished_runs == 0: + self.logger.error("None of the started compute node finished:" + "The current recipe produced no output, aborting") + return 1 + + # Write the output mapfiles: + # concat.ms paths: + self._store_data_map(output_ms_mapfile_path, concat_ms, + "mapfile with concat.ms") + + # timeslices + MultiDataMap(slices).save(self.inputs['slices_mapfile']) + self.logger.info( + "Wrote MultiMapfile with produces timeslice: {0}".format( + self.inputs['slices_mapfile'])) + + #map with actual input mss. self._store_data_map(self.inputs["raw_ms_per_image_mapfile"], - paths_to_image_mapfiles, - " mapfile containing (raw) input ms:") + DataMap(paths_to_image_mapfiles), + "mapfile containing (raw) input ms per image:") - # Set the outputs + # Set the return values self.outputs['mapfile'] = output_ms_mapfile_path - self.outputs['slices_mapfile'] = self.inputs["slices_mapfile"] + self.outputs['slices_mapfile'] = self.inputs['slices_mapfile'] self.outputs['raw_ms_per_image_mapfile'] = \ self.inputs["raw_ms_per_image_mapfile"] return 0 @@ -260,14 +264,15 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): #extend inputs with the files for the current time slice inputs_for_image.extend(input_mapfile[line_idx_start: line_idx_end]) - return inputs_for_image + return DataMap(inputs_for_image) def _validate_input_map(self, input_map, output_map, slices_per_image, subbands_per_image): """ - Return 1 if the inputs supplied are incorrect, the number if inputs and - output does not match. Return 0 if correct. + Return False if the inputs supplied are incorrect: + the number if inputs and output does not match. + Return True if correct. The number of inputs is correct iff. len(input_map) == len(output_map) * slices_per_image * subbands_per_image @@ -289,9 +294,9 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): len(output_map) * slices_per_image * subbands_per_image ) ) - return 1 + return False - return 0 + return True if __name__ == "__main__": diff --git a/CEP/Pipeline/recipes/sip/master/imager_source_finding.py b/CEP/Pipeline/recipes/sip/master/imager_source_finding.py index f7883836b802253dfd97ddd6b61fbc3db41b8324..9b3be83a608881fece24398dfdb19936c57cc5f7 100644 --- a/CEP/Pipeline/recipes/sip/master/imager_source_finding.py +++ b/CEP/Pipeline/recipes/sip/master/imager_source_finding.py @@ -6,7 +6,7 @@ from lofarpipe.support.baserecipe import BaseRecipe import lofarpipe.support.lofaringredient as ingredient from lofarpipe.support.remotecommand import ComputeJob from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn -from lofarpipe.support.group_data import load_data_map +from lofarpipe.support.data_map import DataMap class imager_source_finding(BaseRecipe, RemoteCommandRecipeMixIn): """ @@ -81,17 +81,16 @@ class imager_source_finding(BaseRecipe, RemoteCommandRecipeMixIn): # ******************************************************************** # 1. load mapfiles with input images and collect some parameters from # The input ingredients - input_map = load_data_map(self.inputs['args'][0]) + input_map = DataMap.load(self.inputs['args'][0]) catalog_output_path = self.inputs["catalog_output_path"] # ******************************************************************** # 2. Start the node script node_command = " python %s" % (self.__file__.replace("master", "nodes")) jobs = [] - created_sourcelists = [] - created_sourcedbs = [] - for host, data in input_map: - arguments = [data, + + for item in input_map: + arguments = [item.file, self.inputs["bdsm_parset_file_run1"], self.inputs["bdsm_parset_file_run2x"], catalog_output_path, @@ -103,10 +102,8 @@ class imager_source_finding(BaseRecipe, RemoteCommandRecipeMixIn): self.inputs['working_directory'], self.inputs['makesourcedb_path'] ] - created_sourcelists.append((host, catalog_output_path)) - created_sourcedbs.append((host, - self.inputs['sourcedb_target_path'])) - jobs.append(ComputeJob(host, node_command, arguments)) + + jobs.append(ComputeJob(item.host, node_command, arguments)) # Hand over the job(s) to the pipeline scheduler self._schedule_jobs(jobs) @@ -121,11 +118,17 @@ class imager_source_finding(BaseRecipe, RemoteCommandRecipeMixIn): catalog_output_path_from_nodes = [] for job in jobs: if "source_db" in job.results: - source_dbs_from_nodes.append(( - job.host, job.results["source_db"])) + source_dbs_from_nodes.append(tuple([ + job.host, job.results["source_db"], False])) + # We now also have catalog path + catalog_output_path_from_nodes.append(tuple([ + job.host, job.results["catalog_output_path"], False])) + else: + source_dbs_from_nodes.append(tuple([ + job.host, "/failed", True])) # We now also have catalog path - catalog_output_path_from_nodes.append(( - job.host, job.results["catalog_output_path"])) + catalog_output_path_from_nodes.append(tuple([ + job.host, "/failed", True])) # Abort if none of the recipes succeeded if len(source_dbs_from_nodes) == 0: @@ -133,11 +136,12 @@ class imager_source_finding(BaseRecipe, RemoteCommandRecipeMixIn): self.logger.error("Exiting with a failure status") return 1 - self.logger.info(created_sourcelists) - self._store_data_map(self.inputs['mapfile'], created_sourcelists, + self._store_data_map(self.inputs['mapfile'], + DataMap(catalog_output_path_from_nodes), "datamap with created sourcelists") self._store_data_map(self.inputs['sourcedb_map_path'], - created_sourcedbs, " datamap with created sourcedbs") + DataMap(source_dbs_from_nodes), + " datamap with created sourcedbs") self.outputs["mapfile"] = self.inputs['mapfile'] self.outputs["sourcedb_map_path"] = self.inputs['sourcedb_map_path'] diff --git a/CEP/Pipeline/recipes/sip/master/setupparmdb.py b/CEP/Pipeline/recipes/sip/master/setupparmdb.py index 98de085b74c28a152f4ba74faf0cc42e416e183b..0931ec90e35776e10dece8d66690790fe92cbf6c 100644 --- a/CEP/Pipeline/recipes/sip/master/setupparmdb.py +++ b/CEP/Pipeline/recipes/sip/master/setupparmdb.py @@ -7,6 +7,7 @@ # loose@astron.nl # ------------------------------------------------------------------------------ +import copy import os import sys import subprocess @@ -16,8 +17,7 @@ import tempfile from lofarpipe.support.baserecipe import BaseRecipe from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn from lofarpipe.support.remotecommand import ComputeJob -from lofarpipe.support.group_data import load_data_map, store_data_map -from lofarpipe.support.group_data import validate_data_maps +from lofarpipe.support.data_map import DataMap, validate_data_maps from lofarpipe.support.pipelinelogging import log_process_output import lofarpipe.support.lofaringredient as ingredient @@ -116,11 +116,11 @@ class setupparmdb(BaseRecipe, RemoteCommandRecipeMixIn): # ------------------------------------------------------------------ args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) - indata = load_data_map(args[0]) + indata = DataMap.load(args[0]) if len(args) > 1: # If output location provide validate the input and outputmap self.logger.debug("Loading output-data mapfile: %s" % args[1]) - outdata = load_data_map(args[1]) + outdata = DataMap.load(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed" @@ -128,29 +128,32 @@ class setupparmdb(BaseRecipe, RemoteCommandRecipeMixIn): return 1 # else output location is inputlocation+suffix else: - outdata = [ - (host, - os.path.join( + outdata = copy.deepcopy(indata) + for item in outdata: + item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], - os.path.basename(infile) + self.inputs['suffix']) - ) for host, infile in indata - ] + os.path.basename(item.file) + self.inputs['suffix'] + ) # Call the node side command = "python %s" % (self.__file__.replace('master', 'nodes')) + outdata.iterator = DataMap.SkipIterator jobs = [] - for host, outfile in outdata: + for outp in outdata: jobs.append( ComputeJob( - host, + outp.host, command, arguments=[ pdbfile, - outfile + outp.file ] ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) + for job, outp in zip(jobs, outdata): + if job.results['returncode'] != 0: + outp.skip = True # ********************************************************************* # 3. validate performance, cleanup of temp files, construct output @@ -159,14 +162,20 @@ class setupparmdb(BaseRecipe, RemoteCommandRecipeMixIn): shutil.rmtree(pdbdir, ignore_errors=True) if self.error.isSet(): - self.logger.warn("Detected failed parmdb job") - return 1 - else: - self.logger.debug("Writing parmdb map file: %s" % - self.inputs['mapfile']) - store_data_map(self.inputs['mapfile'], outdata) - self.outputs['mapfile'] = self.inputs['mapfile'] - return 0 + # Abort if all jobs failed + if all(job.results['returncode'] != 0 for job in jobs): + self.logger.error("All jobs failed. Bailing out!") + return 1 + else: + self.logger.warn( + "Some jobs failed, continuing with succeeded runs" + ) + self.logger.debug( + "Writing parmdb map file: %s" % self.inputs['mapfile'] + ) + outdata.save(self.inputs['mapfile']) + self.outputs['mapfile'] = self.inputs['mapfile'] + return 0 if __name__ == '__main__': diff --git a/CEP/Pipeline/recipes/sip/master/setupsourcedb.py b/CEP/Pipeline/recipes/sip/master/setupsourcedb.py index 086134921fcae7ea6dc3570bc8eb4e9ef7ec90bf..7a6d121e4be4c077401e2578b90b231c5111dc02 100644 --- a/CEP/Pipeline/recipes/sip/master/setupsourcedb.py +++ b/CEP/Pipeline/recipes/sip/master/setupsourcedb.py @@ -7,15 +7,14 @@ # loose@astron.nl # ------------------------------------------------------------------------------ -from __future__ import with_statement +import copy import os import sys import lofarpipe.support.lofaringredient as ingredient from lofarpipe.support.baserecipe import BaseRecipe from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn -from lofarpipe.support.group_data import load_data_map, store_data_map -from lofarpipe.support.group_data import validate_data_maps +from lofarpipe.support.data_map import DataMap, validate_data_maps from lofarpipe.support.remotecommand import ComputeJob class setupsourcedb(BaseRecipe, RemoteCommandRecipeMixIn): @@ -72,8 +71,9 @@ class setupsourcedb(BaseRecipe, RemoteCommandRecipeMixIn): } outputs = { - 'mapfile': ingredient.FileField(help="mapfile with created sourcedb" - "paths") + 'mapfile': ingredient.FileField( + help="mapfile with created sourcedb paths" + ) } @@ -86,24 +86,23 @@ class setupsourcedb(BaseRecipe, RemoteCommandRecipeMixIn): args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) - indata = load_data_map(args[0]) + indata = DataMap.load(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) - outdata = load_data_map(args[1]) + outdata = DataMap.load(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed" ) return 1 else: - outdata = [ - (host, - os.path.join( + outdata = copy.deepcopy(indata) + for item in outdata: + item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], - os.path.basename(infile) + self.inputs['suffix']) - ) for host, infile in indata - ] + os.path.basename(item.file) + self.inputs['suffix'] + ) # ********************************************************************* # 2. Check if input skymodel file exists. If not, make filename empty. @@ -116,32 +115,41 @@ class setupsourcedb(BaseRecipe, RemoteCommandRecipeMixIn): # ******************************************************************** # 3. Call node side of script command = "python %s" % (self.__file__.replace('master', 'nodes')) + outdata.iterator = DataMap.SkipIterator jobs = [] - for host, outfile in outdata: + for outp in outdata: jobs.append( ComputeJob( - host, + outp.host, command, arguments=[ self.inputs['executable'], skymodel, - outfile, + outp.file, self.inputs['type'] ] ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) + for job, outp in zip(jobs, outdata): + if job.results['returncode'] != 0: + outp.skip = True # ********************************************************************* - # 4. check performance and create output data + # 4. Check job results, and create output data map file if self.error.isSet(): - return 1 - else: - self.logger.debug("Writing sky map file: %s" % - self.inputs['mapfile']) - store_data_map(self.inputs['mapfile'], outdata) - self.outputs['mapfile'] = self.inputs['mapfile'] - return 0 + # Abort if all jobs failed + if all(job.results['returncode'] != 0 for job in jobs): + self.logger.error("All jobs failed. Bailing out!") + return 1 + else: + self.logger.warn( + "Some jobs failed, continuing with succeeded runs" + ) + self.logger.debug("Writing sky map file: %s" % self.inputs['mapfile']) + outdata.save(self.inputs['mapfile']) + self.outputs['mapfile'] = self.inputs['mapfile'] + return 0 if __name__ == '__main__': diff --git a/CEP/Pipeline/recipes/sip/master/vdsmaker.py b/CEP/Pipeline/recipes/sip/master/vdsmaker.py index 355f76d347f4d9e1fea113e806fd8f106eb35429..6d000ba0f915871174867358b0f6a4799a8f521e 100644 --- a/CEP/Pipeline/recipes/sip/master/vdsmaker.py +++ b/CEP/Pipeline/recipes/sip/master/vdsmaker.py @@ -15,7 +15,7 @@ from lofarpipe.support.utilities import create_directory from lofarpipe.support.baserecipe import BaseRecipe from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn from lofarpipe.support.remotecommand import ComputeJob -from lofarpipe.support.group_data import load_data_map +from lofarpipe.support.data_map import DataMap from lofarpipe.support.pipelinelogging import log_process_output class vdsmaker(BaseRecipe, RemoteCommandRecipeMixIn): @@ -74,36 +74,38 @@ class vdsmaker(BaseRecipe, RemoteCommandRecipeMixIn): # 1. Load data from disk create output files args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) - data = load_data_map(args[0]) + data = DataMap.load(args[0]) + + # Skip items in `data` that have 'skip' set to True + data.iterator = DataMap.SkipIterator # Create output vds names vdsnames = [ os.path.join( - self.inputs['directory'], os.path.basename(x[1]) + '.vds' - ) for x in data + self.inputs['directory'], os.path.basename(item.file) + '.vds' + ) for item in data ] # ********************************************************************* # 2. Call vdsmaker command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] - for host, infile, outfile in (x + (y,) for x, y in zip(data, vdsnames)): + for inp, vdsfile in zip(data, vdsnames): jobs.append( ComputeJob( - host, command, + inp.host, command, arguments=[ - infile, + inp.file, self.config.get('cluster', 'clusterdesc'), - outfile, + vdsfile, self.inputs['makevds'] ] ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) - - if self.error.isSet(): - self.logger.warn("Failed vdsmaker process detected") - return 1 + for idx, job in enumerate(jobs): + if job.results['returncode'] != 0: + del vdsnames[idx] # ********************************************************************* # 3. Combine VDS files to produce GDS diff --git a/CEP/Pipeline/recipes/sip/nodes/gainoutliercorrection.py b/CEP/Pipeline/recipes/sip/nodes/gainoutliercorrection.py index 17ea475c2f8cf25177be1189c37c2cacb5ddefef..d5c0d0efaf88858ed6c8c0fa0697d5bef03f4dd5 100644 --- a/CEP/Pipeline/recipes/sip/nodes/gainoutliercorrection.py +++ b/CEP/Pipeline/recipes/sip/nodes/gainoutliercorrection.py @@ -135,11 +135,11 @@ class gainoutliercorrection(LOFARnodeTCP): polarization_data, type_pair = \ self._read_polarisation_data_and_type_from_db(parmdb, station) - corected_data = self._swap_outliers_with_median(polarization_data, + corrected_data = self._swap_outliers_with_median(polarization_data, type_pair, sigma) #print polarization_data self._write_corrected_data(parmdb, station, - polarization_data, corected_data) + polarization_data, corrected_data) def _read_polarisation_data_and_type_from_db(self, parmdb, station): """ @@ -203,7 +203,7 @@ class gainoutliercorrection(LOFARnodeTCP): median, amplitudes ) - # assign the corect data back to the complex_array + # assign the correct data back to the complex_array complex_array.amp = numpy.concatenate((corrected, complex_array.amp[-1:])) # collect all corrected data corrected_polarization_data[pol] = complex_array @@ -228,18 +228,18 @@ class gainoutliercorrection(LOFARnodeTCP): return complex_array def _write_corrected_data(self, parmdb, station, polarization_data, - corected_data): + corrected_data): """ Use pyparmdb to write (now corrected) data to the parmdb """ for pol, data in polarization_data.iteritems(): - if not pol in corected_data: + if not pol in corrected_data: error_message = "Requested polarisation type is unknown:" \ - "{0} \n valid polarisations: {1}".format(pol, corected_data.keys()) + "{0} \n valid polarisations: {1}".format(pol, corrected_data.keys()) self.logger.error(error_message) raise PipelineRecipeFailed(error_message) - corrected_data_pol = corected_data[pol] + corrected_data_pol = corrected_data[pol] #get the "complex" converted data from the complex array for component, value in corrected_data_pol.writeable.iteritems(): #Collect all the data needed to write an array diff --git a/CEP/Pipeline/recipes/sip/nodes/imager_bbs.py b/CEP/Pipeline/recipes/sip/nodes/imager_bbs.py index dc2172498222520f5946afdc10078fee14f791c9..995ca98c6eaa4ff57610771023a7cb4589533c18 100644 --- a/CEP/Pipeline/recipes/sip/nodes/imager_bbs.py +++ b/CEP/Pipeline/recipes/sip/nodes/imager_bbs.py @@ -9,6 +9,7 @@ import sys from lofarpipe.support.lofarnode import LOFARnodeTCP from lofarpipe.support.group_data import load_data_map from lofarpipe.support.subprocessgroup import SubProcessGroup +from lofarpipe.support.data_map import MultiDataMap class imager_bbs(LOFARnodeTCP): """ @@ -32,24 +33,27 @@ class imager_bbs(LOFARnodeTCP): # read in the mapfiles to data maps: The master recipe added the single # path to a mapfilem which allows usage of default data methods # (load_data_map) - node, ms_list = load_data_map(ms_list_path)[0] - node, parmdb_list = load_data_map(parmdb_list_path)[0] - node, sky_list = load_data_map(sky_list_path)[0] + # TODO: Datamap + ms_map = MultiDataMap.load(ms_list_path) + parmdb_map = MultiDataMap.load(parmdb_list_path) + sky_list = MultiDataMap.load(sky_list_path) + source_db = sky_list[0].file[0] # the sourcedb is the first file entry try: bbs_process_group = SubProcessGroup(self.logger) # ***************************************************************** # 2. start the bbs executable with data - for (measurement_set, parmdm, sky) in zip( - ms_list, parmdb_list, sky_list): + for (measurement_set, parmdm) in zip(ms_map[0].file, + parmdb_map[0].file): command = [ bbs_executable, - "--sourcedb={0}".format(sky), + "--sourcedb={0}".format(source_db), "--parmdb={0}".format(parmdm) , measurement_set, parset] self.logger.info("Executing bbs command: {0}".format(" ".join( command))) + bbs_process_group.run(command) # ***************************************************************** diff --git a/CEP/Pipeline/recipes/sip/nodes/imager_finalize.py b/CEP/Pipeline/recipes/sip/nodes/imager_finalize.py index e126cbb57e3908e3b5477d2a986a09fb63655409..123fc9d8327b84f7c38b616af1d2db9b34678b3c 100644 --- a/CEP/Pipeline/recipes/sip/nodes/imager_finalize.py +++ b/CEP/Pipeline/recipes/sip/nodes/imager_finalize.py @@ -16,7 +16,7 @@ from lofarpipe.support.utilities import log_time, create_directory import lofar.addImagingInfo as addimg import pyrap.images as pim from lofarpipe.support.utilities import catch_segfaults -from lofarpipe.support.group_data import load_data_map +from lofarpipe.support.data_map import DataMap from lofarpipe.support.pipelinelogging import CatchLog4CPlus import urllib2 @@ -54,7 +54,7 @@ class imager_finalize(LOFARnodeTCP): :rtype: self.outputs['image'] path to the produced hdf5 image """ with log_time(self.logger): - raw_ms_per_image_map = load_data_map(raw_ms_per_image) + raw_ms_per_image_map = DataMap.load(raw_ms_per_image) # ***************************************************************** # 1. add image info @@ -63,7 +63,8 @@ class imager_finalize(LOFARnodeTCP): # TODO: BUG!! the meta data might contain files that were copied # but failed in imager_bbs processed_ms_paths = [] - for (node, path) in raw_ms_per_image_map: + for item in raw_ms_per_image_map: + path = item.file raw_ms_file_name = os.path.split(path)[1] #if the raw ms is in the processed dir (additional check) if (raw_ms_file_name in file_list): diff --git a/CEP/Pipeline/recipes/sip/nodes/imager_prepare.py b/CEP/Pipeline/recipes/sip/nodes/imager_prepare.py index 8c4d1c73ac386153dcd2f19addcb5940fe594555..a7d6327235c1b9b1593ec18660d129087936fe98 100644 --- a/CEP/Pipeline/recipes/sip/nodes/imager_prepare.py +++ b/CEP/Pipeline/recipes/sip/nodes/imager_prepare.py @@ -9,14 +9,14 @@ import sys import shutil import os import subprocess - +import copy from lofarpipe.support.pipelinelogging import CatchLog4CPlus from lofarpipe.support.pipelinelogging import log_time from lofarpipe.support.utilities import patch_parset from lofarpipe.support.utilities import catch_segfaults from lofarpipe.support.lofarnode import LOFARnodeTCP from lofarpipe.support.utilities import create_directory -from lofarpipe.support.group_data import load_data_map +from lofarpipe.support.data_map import DataMap from lofarpipe.support.subprocessgroup import SubProcessGroup import pyrap.tables as pt @@ -30,7 +30,7 @@ class imager_prepare(LOFARnodeTCP): Steps perform on the node: 0. Create directories and assure that they are empty. - 1. Collect the Measurement Sets (MSs): copy to the current node. + 1. Collect the Measurement Sets (MSs): copy to the current node. 2. Start dppp: Combines the data from subgroups into single timeslice. 3. Flag rfi. 4. Add addImagingColumns to the casa ms. @@ -50,7 +50,7 @@ class imager_prepare(LOFARnodeTCP): """ self.environment.update(environment) with log_time(self.logger): - input_map = load_data_map(raw_ms_mapfile) + input_map = DataMap.load(raw_ms_mapfile) #****************************************************************** # I. Create the directories used in this recipe @@ -69,86 +69,57 @@ class imager_prepare(LOFARnodeTCP): self.logger.debug("and assured it is empty") #****************************************************************** - # 1. Copy the input files (caching included for testing purpose) - missing_files = self._cached_copy_input_files( - processed_ms_dir, input_map, - skip_copy=False) - if len(missing_files) != 0: - self.logger.warn("A number of measurement sets could not be" - "copied: {0}".format(missing_files)) + # 1. Copy the input files + copied_ms_map = self._copy_input_files( + processed_ms_dir, input_map) #****************************************************************** # 2. run dppp: collect frequencies into larger group - time_slices = \ + time_slices_path_list = \ self._run_dppp(working_dir, time_slice_dir, - time_slices_per_image, input_map, subbands_per_group, + time_slices_per_image, copied_ms_map, subbands_per_group, processed_ms_dir, parset, ndppp_executable) # If no timeslices were created, bail out with exit status 1 - if len(time_slices) == 0: + if len(time_slices_path_list) == 0: self.logger.error("No timeslices were created.") self.logger.error("Exiting with error state 1") return 1 - self.logger.debug("Produced time slices: {0}".format(time_slices)) + self.logger.debug( + "Produced time slices: {0}".format(time_slices_path_list)) #*********************************************************** # 3. run rfi_concole: flag datapoints which are corrupted self._run_rficonsole(rficonsole_executable, time_slice_dir, - time_slices) + time_slices_path_list) #****************************************************************** # 4. Add imaging columns to each timeslice # ndppp_executable fails if not present - for ms in time_slices: - pt.addImagingColumns(ms) + for time_slice_path in time_slices_path_list: + pt.addImagingColumns(time_slice_path) self.logger.debug( - "Added imaging columns to ms: {0}".format(ms)) + "Added imaging columns to time_slice: {0}".format( + time_slice_path)) #***************************************************************** # 5. Filter bad stations - group_measurement_filtered = self._filter_bad_stations( - time_slices, asciistat_executable, + time_slice_filtered_path_list = self._filter_bad_stations( + time_slices_path_list, asciistat_executable, statplot_executable, msselect_executable) #****************************************************************** # 6. Perform the (virtual) concatenation of the timeslices - self._concat_timeslices(group_measurement_filtered, + self._concat_timeslices(time_slice_filtered_path_list, output_measurement_set) #****************************************************************** # return - self.outputs["time_slices"] = group_measurement_filtered - self.outputs["completed"] = "true" + self.outputs["time_slices"] = \ + time_slice_filtered_path_list - return 0 - - def _cached_copy_input_files(self, processed_ms_dir, - input_map, skip_copy=False): - """ - Perform a optionalskip_copy copy of the input ms: - For testing purpose the output, the missing_files can be saved - allowing the skip of this step - """ - missing_files = [] - temp_missing = os.path.join(processed_ms_dir, "temp_missing") - - if not skip_copy: - #Collect all files and copy to current node - missing_files = self._copy_input_files(processed_ms_dir, - input_map) - - file_pointer = open(temp_missing, 'w') - file_pointer.write(repr(missing_files)) - self.logger.debug( - "Wrote file with missing measurement sets: {0}".format( - temp_missing)) - file_pointer.close() - else: - file_pointer = open(temp_missing) - missing_files = eval(file_pointer.read()) - file_pointer.close() - return missing_files + return 0 def _copy_input_files(self, processed_ms_dir, input_map): """ @@ -157,18 +128,26 @@ class imager_prepare(LOFARnodeTCP): This function collects all the file in the input map in the processed_ms_dir Return value is a set of missing files """ - missing_files = [] - + copied_ms_map = copy.deepcopy(input_map) #loop all measurement sets - for node, path in input_map: + for input_item, copied_item in zip(input_map, copied_ms_map): + # fill the copied item with the correct data + copied_item.host = self.host + copied_item.file = os.path.join( + processed_ms_dir, os.path.basename(input_item.file)) + + # If we have to skip this ms + if input_item.skip == True: + exit_status = 1 # + # construct copy command - command = ["rsync", "-r", "{0}:{1}".format(node, path), + command = ["rsync", "-r", "{0}:{1}".format( + input_item.host, input_item.file), "{0}".format(processed_ms_dir)] self.logger.debug("executing: " + " ".join(command)) # Spawn a subprocess and connect the pipes - # DO NOT USE SUBPROCESSGROUP # The copy step is performed 720 at once in that case which might # saturate the cluster. copy_process = subprocess.Popen( @@ -183,15 +162,18 @@ class imager_prepare(LOFARnodeTCP): exit_status = copy_process.returncode - #if copy failed log the missing file + #if copy failed log the missing file and update the skip fields if exit_status != 0: - missing_files.append(path) - self.logger.warning("Failed loading file: {0}".format(path)) + input_item.skip = True + copied_item.skip = True + self.logger.warning( + "Failed loading file: {0}".format(input_item.file)) self.logger.warning(stderrdata) + self.logger.debug(stdoutdata) - # return the missing files (for 'logging') - return set(missing_files) + return copied_ms_map + def _dppp_call(self, working_dir, ndppp, cmd, environment): """ @@ -205,7 +187,7 @@ class imager_prepare(LOFARnodeTCP): logger, cleanup=None) def _run_dppp(self, working_dir, time_slice_dir_path, slices_per_image, - input_map, subbands_per_image, collected_ms_dir_name, parset, + copied_ms_map, subbands_per_image, collected_ms_dir_name, parset, ndppp): """ Run NDPPP: @@ -213,21 +195,14 @@ class imager_prepare(LOFARnodeTCP): Call with log for cplus and catch segfaults. Pparameters are supplied in parset """ - time_slice_path_collected = [] + time_slice_path_list = [] for idx_time_slice in range(slices_per_image): - # Get the subset of ms that are part of the current timeslice - input_map_subgroup = \ - input_map[(idx_time_slice * subbands_per_image): \ - ((idx_time_slice + 1) * subbands_per_image)] - - # get the filenames - input_subgroups = map(lambda x: x.split("/")[-1], - list(zip(*input_map_subgroup)[1])) - - # join with the group_measurement_directory to get the locations - # on the local node - ndppp_input_ms = map(lambda x: os.path.join( - collected_ms_dir_name, x), input_subgroups) + start_slice_range = idx_time_slice * subbands_per_image + end_slice_range = (idx_time_slice + 1) * subbands_per_image + # Get the subset of ms that are part of the current timeslice, + # cast to datamap + input_map_subgroup = DataMap( + copied_ms_map[start_slice_range:end_slice_range]) output_ms_name = "time_slice_{0}.dppp.ms".format(idx_time_slice) @@ -235,6 +210,11 @@ class imager_prepare(LOFARnodeTCP): time_slice_path = os.path.join(time_slice_dir_path, output_ms_name) + # convert the datamap to a file list: Do not remove skipped files: + # ndppp needs the incorrect files there to allow filling with zeros + ndppp_input_ms = [item.file for item in input_map_subgroup] + + # Join into a single list of paths. msin = "['{0}']".format("', '".join(ndppp_input_ms)) # Update the parset with computed parameters patch_dictionary = {'uselogger': 'True', # enables log4cplus @@ -271,7 +251,7 @@ class imager_prepare(LOFARnodeTCP): # Actual dppp call to externals (allows mucking) self._dppp_call(working_dir, ndppp, cmd, self.environment) # append the created timeslice on succesfull run - time_slice_path_collected.append(time_slice_path) + time_slice_path_list.append(time_slice_path) # On error the current timeslice should be skipped except subprocess.CalledProcessError, exception: @@ -282,7 +262,7 @@ class imager_prepare(LOFARnodeTCP): self.logger.warning(str(exception)) continue - return time_slice_path_collected + return time_slice_path_list def _concat_timeslices(self, group_measurements_collected, output_file_path): @@ -333,7 +313,7 @@ class imager_prepare(LOFARnodeTCP): finally: shutil.rmtree(rfi_temp_dir) - def _filter_bad_stations(self, group_measurements_collected, + def _filter_bad_stations(self, time_slice_path_list, asciistat_executable, statplot_executable, msselect_executable): """ A Collection of scripts for finding and filtering of bad stations: @@ -352,7 +332,7 @@ class imager_prepare(LOFARnodeTCP): self.logger.debug("Collecting statistical properties of input data") asciistat_output = [] asciistat_proc_group = SubProcessGroup(self.logger) - for ms in group_measurements_collected: + for ms in time_slice_path_list: output_dir = ms + ".filter_temp" create_directory(output_dir) asciistat_output.append((ms, output_dir)) @@ -422,10 +402,9 @@ class imager_prepare(LOFARnodeTCP): filtered_list_of_ms = [] # The order of the inputs needs to be preserved when producing the # filtered output! - for input_ms in group_measurements_collected: + for input_ms in time_slice_path_list: filtered_list_of_ms.append(msselect_output[input_ms]) - self.logger.info(repr(filtered_list_of_ms)) return filtered_list_of_ms diff --git a/CEP/Pipeline/test/recipes/master/copier_test.py b/CEP/Pipeline/test/recipes/master/copier_test.py index d0d4bd4800961cc24b0c370b9cc4e6ba6cb9a754..518445c1a1ea456b6b4c612b63352c2d8fc409d4 100644 --- a/CEP/Pipeline/test/recipes/master/copier_test.py +++ b/CEP/Pipeline/test/recipes/master/copier_test.py @@ -10,6 +10,7 @@ import tempfile from logger import logger from lofarpipe.support.utilities import create_directory #@UnresolvedImport +from lofarpipe.support.data_map import DataMap from lofarpipe.recipes.master.copier import copier #@UnresolvedImport @@ -38,72 +39,29 @@ class copierTest(unittest.TestCase): #shutil.rmtree(self.test_path) pass - def test_validate_source_target_mapfile(self): - source_map = [("node1", "path1"), ("node2", "path2"), ("node2", "path3")] - target_map = [("node3", "path1"), ("node4", "path2"), ("node4", "path3")] - + def test_validate_mapfiles_norename(self): sut = copierWrapper() - self.assertTrue(sut._validate_source_target_mapfile(source_map, target_map)) - -# def test_construct_node_specific_mapfiles(self): -# temp_path = self.test_path -# -# source_map = [('node1', 'path1'), ('node2', 'path2')] -# target_map = [('node3', 'path3'), ('node4', 'path4')] -# -# # Targets on node 3 and 4: mapfiles named after them -# mapfile1 = os.path.join(temp_path, "copier_source_node3.map") -# mapfile2 = os.path.join(temp_path, "copier_target_node3.map") -# mapfile3 = os.path.join(temp_path, "copier_source_node4.map") -# mapfile4 = os.path.join(temp_path, "copier_target_node4.map") -# sut = copierWrapper() -# mapfile_dict = sut._construct_node_specific_mapfiles(source_map, -# target_map, temp_path) -# -# expected_output = {'node3':(mapfile1, mapfile2), -# 'node4':(mapfile3, mapfile4)} -# -# self.assertTrue(repr(expected_output) == repr(mapfile_dict), -# "Output of function incorrect. dict with mapfile pairs" -# "output: \n{0} \n expected: \n{1}".format( -# repr(mapfile_dict), repr(expected_output))) -# -# # validation -# #files exist -# self.assertTrue(os.path.exists(mapfile1), -# "mapfile for first node not created properly") -# # content -# fp = open(mapfile1) -# content = fp.read() -# fp.close() -# expected_content = "[('node1', 'path1')]" -# self.assertTrue(content == expected_content, "source mapfile content incorrect") -# #now for the target mapfile -# self.assertTrue(os.path.exists(mapfile2), -# "mapfile for second node not created properly") -# -# fp = open(mapfile2) -# content = fp.read() -# fp.close() -# expected_content = "[('node3', 'path3')]" -# self.assertTrue(content == expected_content, -# "target mapfile content incorrect, expected, output \n{0}\n{1}".format( -# expected_content, content)) - - -# def test_copier_create_correct_mapfile(self): -# sut = copierWrapper() -# -# instr = [('node1', '/path1/1'), ('node1', '/path1/2')] -# data = [('node2', '/path2/3'), ('node2', '/path2/4')] -# -# -# expected_result = [('node2', '/path2/1'), ('node2', '/path2/2')] -# target_map = sut._create_target_map_for_instruments(instr, data) -# -# self.assertTrue(expected_result == target_map, target_map) - - + sut.source_map = DataMap( + [("node1", "path1"), ("node2", "path2"), ("node2", "path3")] + ) + sut.target_map = DataMap( + [("node3", "path1"), ("node4", "path2"), ("node4", "path3")] + ) + self.assertTrue(sut._validate_mapfiles()) + self.assertTrue(sut._validate_mapfiles(allow_rename=True)) + + def test_validate_mapfiles_rename(self): + sut = copierWrapper() + sut.source_map = DataMap( + [("node1", "path1"), ("node2", "path2"), ("node2", "path3")] + ) + sut.target_map = DataMap( + [("node3", "path4"), ("node4", "path5"), ("node4", "path6")] + ) + self.assertFalse(sut._validate_mapfiles()) + self.assertTrue(sut._validate_mapfiles(allow_rename=True)) + + from logger import logger from lofarpipe.recipes.master.copier import MasterNodeInterface #@UnresolvedImport from lofarpipe.support.remotecommand import ComputeJob @@ -122,18 +80,33 @@ class MasterNodeInterfaceWrapper(MasterNodeInterface): self._function_calls = [] class Error(): - self._return_value = True + def __init__(self): + self._return_value = 0 def isSet(self): return self._return_value self.error = Error() + + class Job(): + def __init__(self): + self.results = {'returncode': 0} + + self._jobs = [Job(), Job()] def _schedule_jobs(self, *args): self._function_calls.append(('_schedule_jobs', args)) - if self._command == "fail": - self.error._return_value = True + if self._command == "failure": + self.error._return_value = -1 + for job in self._jobs: + job.results['returncode'] = 1 + elif self._command == "error": + self.error._return_value = 1 + self._jobs[0].results['returncode'] = 1 elif self._command == "succes": - self.error._return_value = False + self.error._return_value = 0 + + def on_failure(self, *args): + self._function_calls.append(('on_failure', args)) def on_error(self, *args): self._function_calls.append(('on_error', args)) @@ -155,11 +128,11 @@ class MasterNodeInterfaceTest(unittest.TestCase): def test__init__raise_exception(self): """ - Test if MasterNodeInterface constructor raises a notimplemented error + Test if MasterNodeInterface constructor raises a TypeError if called without an string (ideally containing the command to run on the node """ - self.assertRaises(NotImplementedError, MasterNodeInterface) + self.assertRaises(TypeError, MasterNodeInterface) def test__init__raise_called_with_command(self): @@ -173,20 +146,25 @@ class MasterNodeInterfaceTest(unittest.TestCase): "The constructor did create a list data member called _list") - def test_on_error_raise_exception(self): + def test_on_failure_return_value(self): command = "a string" sut = MasterNodeInterface(command) - # on error on the superclass cannot be called: throws an error for it - # needs an implementation in the inheriting class - self.assertRaises(NotImplementedError, sut.on_error) + # on_failure by default returns 1, check return value. + self.assertEqual(-1, sut.on_failure()) + + + def test_on_error_return_value(self): + command = "a string" + sut = MasterNodeInterface(command) + # on_error by default returns 0, check return value. + self.assertEqual(1, sut.on_error()) def test_on_succes_raise_exception(self): command = "a string" sut = MasterNodeInterface(command) - # on error on the superclass cannot be called: throws an error for it - # needs an implementation in the inheriting class - self.assertRaises(NotImplementedError, sut.on_succes) + # on_error by default returns 0, check return value. + self.assertEqual(0, sut.on_succes()) def test_append_job(self): @@ -202,12 +180,12 @@ class MasterNodeInterfaceTest(unittest.TestCase): "append_job did not add an object with the type ComputeJob to" " the job list") - def test_run_jobs_error(self): - command = "fail" + def test_run_jobs_fail(self): + command = "failure" sut = MasterNodeInterfaceWrapper(command) - # command fail will result in any calls to the run_jobs to 'fail' - # error.isSet will return true (used internaly) resulting in a call to - # on_error + # command failure will result in all calls to the run_jobs to 'fail' + # error.isSet will return True (used internaly) resulting in a call to + # on_fail sut.run_jobs() self.assertTrue(len(sut._function_calls) == 2, @@ -216,16 +194,35 @@ class MasterNodeInterfaceTest(unittest.TestCase): self.assertTrue(sut._function_calls[0][0] == '_schedule_jobs' , "the name of the first called function in a fail state should be _schedule_jobs") - self.assertTrue(sut._function_calls[1][0] == 'on_error' , + self.assertTrue(sut._function_calls[1][0] == 'on_failure' , "the name of the second called function in a fail state should be on_error") + def test_run_jobs_error(self): + command = "error" + sut = MasterNodeInterfaceWrapper(command) + # command error will result in any calls to the run_jobs to 'fail' + # error.isSet will return True (used internaly) resulting in a call to + # on_error + sut.run_jobs() + + self.assertTrue(len(sut._function_calls) == 2, + "run_jobs in a error state should return in two function calls") + + self.assertTrue(sut._function_calls[0][0] == '_schedule_jobs' , + "the name of the first called function in an error state should be _schedule_jobs") + + self.assertTrue(sut._function_calls[1][0] == 'on_error' , + sut._function_calls#"the name of the second called function in an error state should be on_error" + ) + + def test_run_jobs_succes(self): command = "succes" sut = MasterNodeInterfaceWrapper(command) - # command fail will result in any calls to the run_jobs to 'fail' - # error.isSet will return true (used internaly) resulting in a call to - # on_error + # command success will result in all calls to the run_jobs to 'succeed' + # error.isSet will return False (used internaly) resulting in a call to + # on_success sut.run_jobs() self.assertTrue(len(sut._function_calls) == 2, diff --git a/CEP/Pipeline/test/recipes/master/imager_prepare_test.py b/CEP/Pipeline/test/recipes/master/imager_prepare_test.py index ccb51a7d82f0d8b482204d5dfce60d5c589b3933..a4504406a2d623a0294d0f62cc036bc78aed9c76 100644 --- a/CEP/Pipeline/test/recipes/master/imager_prepare_test.py +++ b/CEP/Pipeline/test/recipes/master/imager_prepare_test.py @@ -6,6 +6,8 @@ import shutil import numpy import tempfile +from lofarpipe.support.data_map import DataMap + #imports from fixture: from logger import logger @@ -48,20 +50,28 @@ class imager_prepareTest(unittest.TestCase): sut = ImagerPrepareWrapper() output = sut._create_input_map_for_sbgroup(slices_per_image, n_subband_groups, subbands_per_image, idx_sb_group, input_mapfile) - self.assertTrue(output == input_mapfile) + target = DataMap(input_mapfile) - def test_create_input_map_for_sbgroup_2slice(self): + self.assertTrue(output == target, "Actual output = {0}".format(output)) + def test_create_input_map_for_sbgroup_2slice(self): + """ + Test correct collection of the subbands for the first subband group + with two timeslices and one subband per image + """ slices_per_image = 2 n_subband_groups = 1 subbands_per_image = 1 - idx_sb_group = 0 # get the 2nd sb group + idx_sb_group = 0 # get the 1st input_mapfile = [('host', "path"), ('host2', "path2"), ('host3', "path3")] sut = ImagerPrepareWrapper() output = sut._create_input_map_for_sbgroup(slices_per_image, n_subband_groups, subbands_per_image, idx_sb_group, input_mapfile) - self.assertTrue(output == [('host', "path"), ('host2', "path2")]) + target = DataMap([('host', "path"), ('host2', "path2")]) + self.assertTrue(target == output, + + "Actual output = {0}".format(output)) def test_create_input_map_for_sbgroup_2slice_2ndgroup(self): @@ -77,8 +87,9 @@ class imager_prepareTest(unittest.TestCase): sut = ImagerPrepareWrapper() output = sut._create_input_map_for_sbgroup(slices_per_image, n_subband_groups, subbands_per_image, idx_sb_group, input_mapfile) - self.assertTrue(output == [('host3', "path3"), ('host4', "path4"), - ('host7', "path7"), ('host8', "path8")], output) + target = DataMap([('host3', "path3"), ('host4', "path4"), + ('host7', "path7"), ('host8', "path8")]) + self.assertTrue(output == target, "Actual output = {0}".format(output)) def test_validate_input_map_succes(self): input_map = [(1), (1), (1), (1)] @@ -90,9 +101,9 @@ class imager_prepareTest(unittest.TestCase): output = sut._validate_input_map(input_map, output_map, slices_per_image, subbands_per_image) - self.assertTrue(output == 0, "validating input map failed: incorrect output") + self.assertTrue(output == True, "validating input map failed: incorrect output") - def test_validate_input_map_succes(self): + def test_validate_input_map_incorrect(self): input_map = [(1), (1), (1)] output_map = [(1)] slices_per_image = 2 @@ -102,8 +113,8 @@ class imager_prepareTest(unittest.TestCase): output = sut._validate_input_map(input_map, output_map, slices_per_image, subbands_per_image) - self.assertTrue(output == 1, - "validating input map failed: incorrect output") + self.assertTrue(output == False, + "Actual output = {0}".format(output)) self.assertTrue(sut.logger.last() == ('error', 'Incorrect number of input ms for supplied parameters:\n\tlen(input_map) = 3\n\tlen(output_map) * slices_per_image * subbands_per_image = 1 * 2 * 2 = 4'), "incorrect logger message retrieved") diff --git a/CEP/Pipeline/test/support/data_map_test.py b/CEP/Pipeline/test/support/data_map_test.py new file mode 100644 index 0000000000000000000000000000000000000000..71f28f5e0dc2f728d790af8b186c7a8a13143d37 --- /dev/null +++ b/CEP/Pipeline/test/support/data_map_test.py @@ -0,0 +1,260 @@ +import os +import shutil +import tempfile +import unittest + +from lofarpipe.support.data_map import ( + DataMap, MultiDataMap, DataProduct, MultiDataProduct, + DataMapError, load_data_map, store_data_map +) + +class DataMapTest(unittest.TestCase): + """ + Test class for the DataMap class in lofarpipe.support.data_map + """ + def __init__(self, arg): + super(DataMapTest, self).__init__(arg) + self.old_style_map = [ + ('locus001', 'L12345_SB101.MS'), + ('locus002', 'L12345_SB102.MS'), + ('locus003', 'L12345_SB103.MS'), + ('locus004', 'L12345_SB104.MS') + ] + self.new_style_map = [ + {'host': 'locus001', 'file': 'L12345_SB101.MS', 'skip': True}, + {'host': 'locus002', 'file': 'L12345_SB102.MS', 'skip': False}, + {'host': 'locus003', 'file': 'L12345_SB103.MS', 'skip': True}, + {'host': 'locus004', 'file': 'L12345_SB104.MS', 'skip': False} + ] + + def setUp(self): + """ + Create scratch directory and create required input files in there. + """ + self.tmpdir = tempfile.mkdtemp() + self.old_style_map_file = self._create_old_style_map_file() + self.new_style_map_file = self._create_new_style_map_file() + self.syntax_error_map_file = self._create_syntax_error_map_file() + + def tearDown(self): + """ + Cleanup all the files that were produced by this test + """ + shutil.rmtree(self.tmpdir, ignore_errors=True) + + def _create_old_style_map_file(self): + f = open(os.path.join(self.tmpdir, 'old_style.map'), 'w') + f.write(repr(self.old_style_map)) + f.close() + return f.name + + def _create_new_style_map_file(self): + f = open(os.path.join(self.tmpdir, 'new_style.map'), 'w') + f.write(repr(self.new_style_map)) + f.close() + return f.name + + def _create_syntax_error_map_file(self): + f = open(os.path.join(self.tmpdir, 'syntax_error.map'), 'w') + f.write("[{'host': 'locus001']") + f.close() + return f.name + + def test_old_style_map(self): + data_map = DataMap(self.old_style_map) + self.assertEqual(len(data_map), 4) + self.assertEqual(data_map[0].host, 'locus001') + self.assertEqual(data_map[1].file, 'L12345_SB102.MS') + self.assertTrue(all(item.skip for item in data_map)) + + def test_old_style_load_store(self): + tmp_file = self.old_style_map_file + '.tmp' + data_map = DataMap(self.old_style_map) + store_data_map(tmp_file, self.old_style_map) + reloaded_data_map = load_data_map(tmp_file) + self.assertEqual(data_map, reloaded_data_map) + self.assertTrue(reloaded_data_map.iterator is DataMap.TupleIterator) + + def test_new_style_map(self): + data_map = DataMap(self.new_style_map) + self.assertEqual(len(data_map), 4) + self.assertEqual(data_map[0].host, 'locus001') + self.assertEqual(data_map[1].file, 'L12345_SB102.MS') + self.assertTrue(data_map[2].skip) + + def test_new_style_load_store(self): + tmp_file = self.new_style_map_file + '.tmp' + data_map = DataMap(self.new_style_map) + data_map.save(tmp_file) + reloaded_data_map = DataMap.load(tmp_file) + self.assertEqual(data_map, reloaded_data_map) + + def test_tuple_iterator(self): + data_map = DataMap(self.new_style_map) + data_map.iterator = DataMap.TupleIterator + tuples = [item for item in data_map] + self.assertEqual(len(tuples), 4) + self.assertTrue(all(isinstance(item, tuple) for item in tuples)) + self.assertTrue(all(len(item) == 2 for item in tuples)) + self.assertEqual(tuples[0], ('locus001', 'L12345_SB101.MS')) + + def test_skip_iterator(self): + data_map = DataMap(self.new_style_map) + data_map.iterator = DataMap.SkipIterator + unskipped = [item for item in data_map] + self.assertEqual(len(unskipped), 2) + self.assertTrue(all(isinstance(item, DataProduct) for item in unskipped)) + self.assertEqual(unskipped[0].host, 'locus002') + self.assertEqual(unskipped[0].file, 'L12345_SB102.MS') + + def test_syntax_error_map_file(self): + self.assertRaises(SyntaxError, DataMap.load, self.syntax_error_map_file) + + def test_data_map_errors(self): + error_maps = [ + 42, # integer + [1, 2, 3], # list of integer + 'foo', # string + ('foo', 'bar', 'baz'), # tuple of string + [{'file': 'L12345_SB101.MS', 'skip': True}], # missing key + [{'host': 'locus001', 'file': 'L12345_SB101.MS', + 'slip': True}], # misspelled key + [{'host': 'locus001', 'file': 'L12345_SB101.MS', + 'skip': True, 'spurious':'Oops'}] # spurious key + ] + for data_map in error_maps: + self.assertRaises(DataMapError, DataMap, data_map) + + +class MultiDataMapTest(unittest.TestCase): + """ + Test class for the MultiDataMap class in lofarpipe.support.data_map + """ + def __init__(self, arg): + super(MultiDataMapTest, self).__init__(arg) + self.old_style_map = [ + ('locus001', ['L12345_SB101.MS']), + ('locus002', ['L12345_SB102.MS']), + ('locus003', ['L12345_SB103.MS']), + ('locus004', ['L12345_SB104.MS']) + ] + self.new_style_map = [ + {'host': 'locus001', 'file': ['L12345_SB101.MS'], + 'file_skip':[True], 'skip': True}, + {'host': 'locus002', 'file': ['L12345_SB102.MS'], + 'file_skip':[False], 'skip': False}, + {'host': 'locus003', 'file': ['L12345_SB103.MS'], + 'file_skip':[True], 'skip': True}, + {'host': 'locus004', 'file': ['L12345_SB104.MS'], + 'file_skip':[False], 'skip': False} + ] + + def setUp(self): + """ + Create scratch directory and create required input files in there. + """ + self.tmpdir = tempfile.mkdtemp() + self.old_style_map_file = self._create_old_style_map_file() + self.new_style_map_file = self._create_new_style_map_file() + self.syntax_error_map_file = self._create_syntax_error_map_file() + + def tearDown(self): + """ + Cleanup all the files that were produced by this test + """ + shutil.rmtree(self.tmpdir, ignore_errors=True) + + def _create_old_style_map_file(self): + f = open(os.path.join(self.tmpdir, 'old_style.map'), 'w') + f.write(repr(self.old_style_map)) + f.close() + return f.name + + def _create_new_style_map_file(self): + f = open(os.path.join(self.tmpdir, 'new_style.map'), 'w') + f.write(repr(self.new_style_map)) + f.close() + return f.name + + def _create_syntax_error_map_file(self): + f = open(os.path.join(self.tmpdir, 'syntax_error.map'), 'w') + f.write("[{'host': 'locus001']") + f.close() + return f.name + + def test_old_style_map(self): + data_map = MultiDataMap(self.old_style_map) + self.assertEqual(len(data_map), 4) + self.assertEqual(data_map[0].host, 'locus001') + self.assertEqual(data_map[1].file, ['L12345_SB102.MS']) + self.assertEqual(data_map[2].file_skip, [True]) + self.assertEqual(data_map[2].skip, True) + self.assertTrue(all(item.skip for item in data_map)) + + def test_new_style_map(self): + data_map = MultiDataMap(self.new_style_map) + self.assertEqual(len(data_map), 4) + self.assertEqual(data_map[0].host, 'locus001') + self.assertEqual(data_map[1].file, ['L12345_SB102.MS']) + self.assertEqual(data_map[1].file_skip, [False]) + self.assertTrue(data_map[2].skip) + + def test_new_style_load_store(self): + tmp_file = self.new_style_map_file + '.tmp' + data_map = MultiDataMap(self.new_style_map) + data_map.save(tmp_file) + reloaded_data_map = MultiDataMap.load(tmp_file) + self.assertEqual(data_map, reloaded_data_map) + + def test_tuple_iterator(self): + data_map = MultiDataMap(self.new_style_map) + data_map.iterator = MultiDataMap.TupleIterator + tuples = [item for item in data_map] + self.assertEqual(len(tuples), 4) + self.assertTrue(all(isinstance(item, tuple) for item in tuples)) + self.assertTrue(all(len(item) == 2 for item in tuples)) + self.assertEqual(tuples[0], ('locus001', ['L12345_SB101.MS'])) + + def test_skip_iterator(self): + data_map = MultiDataMap(self.new_style_map) + data_map.iterator = MultiDataMap.SkipIterator + unskipped = [item for item in data_map] + self.assertEqual(len(unskipped), 2) + self.assertTrue(all(isinstance(item, MultiDataProduct) for item in unskipped)) + self.assertEqual(unskipped[0].host, 'locus002') + self.assertEqual(unskipped[0].file, ['L12345_SB102.MS']) + + + def test_syntax_error_map_file(self): + self.assertRaises(SyntaxError, MultiDataMap.load, self.syntax_error_map_file) + + def test_data_map_errors(self): + error_maps = [ + 42, # integer + [1, 2, 3], # list of integer + 'foo', # string + ('foo', 'bar', 'baz'), # tuple of string + [{'file': 'L12345_SB101.MS', 'skip': True}], # missing key + [{'host': 'locus001', 'file': 'L12345_SB101.MS', + 'slip': True}], # misspelled key + [{'host': 'locus001', 'file': 'L12345_SB101.MS', + 'skip': True, 'spurious':'Oops'}], # spurious key + [{'host': 'locus001', 'file_skip':["dsf"], + 'file': ['L12345_SB101.MS'], 'skip': True}], # incorrect boollist + [{'host': 'locus001', 'file_skip':[True, False], + 'file': ['L12345_SB101.MS'], 'skip': True}], #len != len + ] + for data_map in error_maps: + self.assertRaises(DataMapError, MultiDataMap, data_map) + + def test_compare_DataMap_and_MultiDataMap(self): + data_map = DataMap([]) + multi_data_map = MultiDataMap([]) + # Empty maps should be unequal also + self.assertNotEqual(data_map, multi_data_map) + +if __name__ == '__main__': + import xmlrunner + unittest.main(testRunner=xmlrunner.XMLTestRunner(output='result.xml')) + +