diff --git a/.gitattributes b/.gitattributes index 40e41ca529ada75eb522eb3ca25c8ee0d09f711a..b422a200c855ea481d07f1b5499693b9e50addbd 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1411,6 +1411,10 @@ CEP/Pipeline/framework/lofarpipe/support/usagestats.py eol=lf CEP/Pipeline/framework/lofarpipe/support/utilities.py eol=lf CEP/Pipeline/framework/lofarpipe/support/xmllogging.py -text CEP/Pipeline/framework/setup.py eol=lf +CEP/Pipeline/helper_scripts/aggregate_stats.py eol=lf +CEP/Pipeline/helper_scripts/create_selfcal_parset.py eol=lf +CEP/Pipeline/helper_scripts/selfcal.settings -text +CEP/Pipeline/helper_scripts/state_to_stats.py eol=lf CEP/Pipeline/mac/CMakeLists.txt eol=lf CEP/Pipeline/mac/README eol=lf CEP/Pipeline/mac/ep/CMakeLists.txt eol=lf @@ -1445,6 +1449,7 @@ CEP/Pipeline/recipes/sip/bin/msss_calibrator_pipeline.py eol=lf CEP/Pipeline/recipes/sip/bin/msss_imager_pipeline.py eol=lf CEP/Pipeline/recipes/sip/bin/msss_target_pipeline.py eol=lf CEP/Pipeline/recipes/sip/bin/pulsar_pipeline.py -text +CEP/Pipeline/recipes/sip/bin/selfcal_imager_pipeline.py eol=lf CEP/Pipeline/recipes/sip/bin/startPython.sh eol=lf CEP/Pipeline/recipes/sip/bin/startPythonVersion.sh -text CEP/Pipeline/recipes/sip/demixing/bbs_CasA.parset eol=lf @@ -1463,6 +1468,7 @@ CEP/Pipeline/recipes/sip/helpers/ComplexArray.py -text CEP/Pipeline/recipes/sip/helpers/MultipartPostHandler.py -text CEP/Pipeline/recipes/sip/helpers/WritableParmDB.py -text CEP/Pipeline/recipes/sip/helpers/__init__.py eol=lf +CEP/Pipeline/recipes/sip/helpers/data_quality.py eol=lf CEP/Pipeline/recipes/sip/helpers/metadata.py eol=lf CEP/Pipeline/recipes/sip/master/__init__.py eol=lf CEP/Pipeline/recipes/sip/master/copier.py -text @@ -1489,6 +1495,9 @@ CEP/Pipeline/recipes/sip/master/imager_source_finding.py eol=lf CEP/Pipeline/recipes/sip/master/long_baseline.py eol=lf CEP/Pipeline/recipes/sip/master/new_bbs.py eol=lf CEP/Pipeline/recipes/sip/master/rficonsole.py eol=lf +CEP/Pipeline/recipes/sip/master/selfcal_awimager.py eol=lf +CEP/Pipeline/recipes/sip/master/selfcal_bbs.py eol=lf +CEP/Pipeline/recipes/sip/master/selfcal_finalize.py eol=lf CEP/Pipeline/recipes/sip/master/setupparmdb.py eol=lf CEP/Pipeline/recipes/sip/master/setupsourcedb.py eol=lf CEP/Pipeline/recipes/sip/master/vdsmaker.py eol=lf @@ -1519,6 +1528,9 @@ CEP/Pipeline/recipes/sip/nodes/imager_source_finding.py eol=lf CEP/Pipeline/recipes/sip/nodes/long_baseline.py eol=lf CEP/Pipeline/recipes/sip/nodes/new_bbs.py eol=lf CEP/Pipeline/recipes/sip/nodes/rficonsole.py eol=lf +CEP/Pipeline/recipes/sip/nodes/selfcal_awimager.py eol=lf +CEP/Pipeline/recipes/sip/nodes/selfcal_bbs.py eol=lf +CEP/Pipeline/recipes/sip/nodes/selfcal_finalize.py eol=lf CEP/Pipeline/recipes/sip/nodes/setupparmdb.py eol=lf CEP/Pipeline/recipes/sip/nodes/setupsourcedb.py eol=lf CEP/Pipeline/recipes/sip/nodes/vdsmaker.py eol=lf @@ -1575,10 +1587,12 @@ CEP/Pipeline/test/regression_tests/regression_test_runner.py -text CEP/Pipeline/test/regression_tests/regression_test_runner.sh eol=lf CEP/Pipeline/test/regression_tests/replace_config_values.cfg -text CEP/Pipeline/test/regression_tests/replace_parset_values.cfg -text +CEP/Pipeline/test/regression_tests/selfcal_imager_pipeline_test.py eol=lf CEP/Pipeline/test/regression_tests/target_pipeline.py -text CEP/Pipeline/test/regression_tests/trunk_imaging_regression.parset -text CEP/Pipeline/test/support/__init__.py eol=lf CEP/Pipeline/test/support/loggingdecorators_test.py -text +CEP/Pipeline/test/support/subprocessgroup_test.py eol=lf CEP/Pipeline/test/support/xmllogging_test.py -text CEP/Pipeline/test/test_framework/CMakeLists.txt eol=lf CEP/Pipeline/test/test_framework/__init__.py eol=lf diff --git a/CEP/Pipeline/framework/lofarpipe/support/data_map.py b/CEP/Pipeline/framework/lofarpipe/support/data_map.py index 24e2b67f2a72b2082382da36500a396693319fd2..878cc01968bae877c19b91bf5c90f40b5f14d59c 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/data_map.py +++ b/CEP/Pipeline/framework/lofarpipe/support/data_map.py @@ -1,10 +1,11 @@ -# LOFAR PIPELINE FRAMEWORK +# LOFAR PIPELINE FRAMEWORK # # Handle data-map file containing Data Product descriptions -# Marcel Loose, 2012 -# loose@astron.nl -# ------------------------------------------------------------------------------ - +# Marcel Loose, 2012 +# loose@astron.nl +# Wouter Klijn, 2015 +# klijn@astron.nl +# ----------------------------------------------------------------------------- """ This module contains methods to load and store so-called data-map file and to iterate over these maps. Data-map file contain a description of the @@ -96,7 +97,6 @@ class DataMap(object): return value def __init__(self, data=list(), iterator=iter): - self._data = list() self.data = data self.iterator = iterator @@ -157,20 +157,40 @@ class DataMap(object): except TypeError: raise DataMapError("Failed to validate data map: %s" % repr(data)) -class MultiDataMap(DataMap): - """ - Class representing a specialization of data-map, a collection of data - products located on the same node, skippable as a set and individually - """ - @DataMap.data.setter - def data(self, data): - self._set_data(data, dtype=MultiDataProduct) - - + def append(self, data, dtype=DataProduct): + """ + Append an item to the end of the internal storage, allows appending + of DataProduct or tuple. Default skip=False when only (host, file) is + supplied + """ + try: + if isinstance(data, dtype): + self._data.append(data) + + # tuple + elif isinstance(data, tuple): + item = None + if len(data) == 3: + # use the DataProduct validation to assure correct types + item = DataProduct(data[0], data[1], data[2]) + elif len(data) == 2: + item = DataProduct(data[0], data[1], False) + else: + raise TypeError( + "DataMap, Suplied tuple is not valid: {0}".format( + repr(tuple))) + self._data.append(item) + else: + raise TypeError + except TypeError: + raise DataMapError("Failed to append item: %s" % repr(data)) + class MultiDataProduct(object): """ - Class representing a single data product. + Class representing a multi data product. A structure respresenting a list + of files on a node with both the node and each file a skip field. + 'file' skip fields are based on node skip value """ def __init__(self, host, file, skip=True, file_skip=None): self.host = str(host) @@ -221,6 +241,51 @@ class MultiDataProduct(object): """Compare for non-equality""" return not self.__eq__(other) + +class MultiDataMap(DataMap): + """ + Class representing a specialization of data-map, a collection of data + products located on the same node, skippable as a set and individually + """ + @DataMap.data.setter + def data(self, data): + self._set_data(data, dtype=MultiDataProduct) + + def append(self, data, dtype=MultiDataProduct): + """ + Append an item to the end of the internal storage, allows appending + of DataProduct or tuple. Default skip=True + """ + try: + if isinstance(data, dtype): + self._data.append(data) + + # tuple or argument input + elif isinstance(data, tuple): + # use the DataProduct validation to assure correct types + item = None + if len(data) < 2: + raise TypeError( + "MultiDataMap: Incorrect tuple size (< 2): {0}".format( + repr(tuple))) + elif len(data) == 2: + item = MultiDataProduct(data[0], data[1]) + elif len(data) == 3: + item = MultiDataProduct(data[0], data[1], data[2]) + elif len(data) == 4: + item = MultiDataProduct(data[0], data[1], data[2], data[3]) + else: + raise TypeError( + "MultiDataMap: Incorrect tuple size (> 4): {0}".format( + repr(tuple))) + + self._data.append(item) + else: + raise TypeError + except TypeError: + raise DataMapError("Failed to append item: %s" % repr(data)) + + @deprecated def load_data_map(filename): """ @@ -291,7 +356,8 @@ def align_data_maps(*args): alignment is not possible. """ if len(args) < 2: - raise DataMapError("At least two datamaps are needed to perform align.") + raise DataMapError( + "At least two datamaps are needed to perform align.") if not validate_data_maps(*args): raise DataMapError( @@ -309,7 +375,6 @@ def align_data_maps(*args): entrie.skip = skip - @deprecated def tally_data_map(data, glob, logger=None): """ diff --git a/CEP/Pipeline/framework/lofarpipe/support/lofaringredient.py b/CEP/Pipeline/framework/lofarpipe/support/lofaringredient.py index 0651ce9bb48a089e86ca53d696473f61c228acfe..df9ca7e230b961d5bc195211f8613c8106c06771 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/lofaringredient.py +++ b/CEP/Pipeline/framework/lofarpipe/support/lofaringredient.py @@ -11,7 +11,6 @@ from UserDict import DictMixin from lofarpipe.cuisine.ingredient import WSRTingredient from lofarpipe.support.utilities import string_to_list, is_iterable -from lofar.parameterset import parameterset # These are currently only used by lofarrecipe.run_task to provide default # input and output dicts based on copying metadata from the parent. diff --git a/CEP/Pipeline/framework/lofarpipe/support/lofarnode.py b/CEP/Pipeline/framework/lofarpipe/support/lofarnode.py index bd7ee143d349e8c8337bc93d00e579ba208448e0..ab028f856f45ca341402ac206534c878cb0e9fcd 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/lofarnode.py +++ b/CEP/Pipeline/framework/lofarpipe/support/lofarnode.py @@ -48,7 +48,7 @@ class LOFARnode(object): self.logport = int(logport) self.outputs = {} self.environment = os.environ - self.resourceMonitor = UsageStats(self.logger) + self.resourceMonitor = UsageStats(self.logger, 10.0 * 60.0) # collect stats each 10 minutes def run_with_logging(self, *args): """ diff --git a/CEP/Pipeline/framework/lofarpipe/support/mac.py b/CEP/Pipeline/framework/lofarpipe/support/mac.py index bf550cdf0e6187cbd33f34f6ebe6cbbb43c4037e..7954e7287602b7019d80ab8453bb2b02fd462ac9 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/mac.py +++ b/CEP/Pipeline/framework/lofarpipe/support/mac.py @@ -65,7 +65,7 @@ class MAC_control(control): try: super(MAC_control, self).run_task(configblock, datafiles) except PipelineException, message: - self.logger.error(message) + self.logger.warn(message) # raise PipelineQuit def go(self): diff --git a/CEP/Pipeline/framework/lofarpipe/support/remotecommand.py b/CEP/Pipeline/framework/lofarpipe/support/remotecommand.py index 963ee22660ab255a8d1badfc0f5f6df68a733b1f..a6c0439fb94c8305365f66982b14b9865b7eb3f1 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/remotecommand.py +++ b/CEP/Pipeline/framework/lofarpipe/support/remotecommand.py @@ -222,7 +222,9 @@ class ComputeJob(object): self.arguments = arguments self.results = {} self.results['returncode'] = 123456 # Default to obscure code to allow - # test of failing ssh connections + # test of failing ssh connections + # TODO: This could be done nicer!!! THis error is shown in the logfile + # and tends to confuse users def dispatch(self, logger, config, limiter, id, jobhost, jobport, error, killswitch): diff --git a/CEP/Pipeline/framework/lofarpipe/support/stateful.py b/CEP/Pipeline/framework/lofarpipe/support/stateful.py index 5bf344f215a7ce686369329700ca99e72dfc022c..e33df7f102630ae4a08fe154d4a2091463243c6c 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/stateful.py +++ b/CEP/Pipeline/framework/lofarpipe/support/stateful.py @@ -1,9 +1,9 @@ -# LOFAR PIPELINE FRAMEWORK +# LOFAR PIPELINE FRAMEWORK # -# Stateful LOFAR Recipe -# John Swinbank, 2010 -# swinbank@transientskp.org -# ------------------------------------------------------------------------------ +# Stateful LOFAR Recipe +# Wouter Klijn, 2015 +# klijn@astron.nl +# ----------------------------------------------------------------------------- from functools import wraps @@ -13,6 +13,9 @@ import cPickle from lofarpipe.support.baserecipe import BaseRecipe from lofarpipe.support.lofarexceptions import PipelineException +from lofarpipe.support.xmllogging import add_child_to_active_stack_head +import xml.dom.minidom as xml + def stateful(run_task): @wraps(run_task) def wrapper(self, configblock, datafiles = [], **kwargs): @@ -26,11 +29,24 @@ def stateful(run_task): # We have already run this task and stored its state, or... self.logger.info("Task %s already exists in saved state; skipping" % configblock) + + # Get the (optional) node xml information, normally added + # in remotecommand.py + if "return_xml" in my_state[1]: + return_node = xml.parseString( + my_state[1]['return_xml']).documentElement + + add_child_to_active_stack_head(self, return_node) + # If no active stack, do nothing + return my_state[1] + elif my_state[0] != '': # There is a stored task, but it doesn't match this one, or... - self.logger.error("Stored state does not match pipeline definition; bailing out") - raise PipelineException("Stored state does not match pipeline definition") + self.logger.error( + "Stored state does not match pipeline definition; bailing out") + raise PipelineException( + "Stored state does not match pipeline definition") else: # We need to run this task now. outputs = run_task(self, configblock, datafiles, **kwargs) @@ -39,6 +55,7 @@ def stateful(run_task): return outputs return wrapper + class StatefulRecipe(BaseRecipe): """ Enables recipes to save and restore state. diff --git a/CEP/Pipeline/framework/lofarpipe/support/subprocessgroup.py b/CEP/Pipeline/framework/lofarpipe/support/subprocessgroup.py index b160197b80597b76138dcdd7414e89d1916cac2b..5eb731e7c416d983eb32488535b2756e73f7bedd 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/subprocessgroup.py +++ b/CEP/Pipeline/framework/lofarpipe/support/subprocessgroup.py @@ -1,4 +1,5 @@ import subprocess +import time from lofarpipe.support.lofarexceptions import PipelineException @@ -7,27 +8,30 @@ class SubProcessGroup(object): A wrapper class for the subprocess module: allows fire and forget insertion of commands with a an optional sync/ barrier/ return """ - def __init__(self, logger=None, usageStats = None): + def __init__(self, logger=None, + usageStats = None, + # Default CEP2 is max 8 cpu used + max_concurrent_processes = 8, + # poll each 10 seconds: we have a mix of short and long + # running processes + polling_interval = 10): self.process_group = [] self.logger = logger self.usageStats = usageStats + self.running_process_count = 0 + self.max_concurrent_processes = max_concurrent_processes + # list of command vdw pairs not started because the maximum + # number of processes was reached + self.processes_waiting_for_execution = [] + self.polling_interval = polling_interval - def run(self, cmd_in, unsave=False, cwd=None): + def _start_process(self, cmd, cwd): """ - Add the cmd as a subprocess to the current group: The process is - started! - cmd can be suplied as a single string (white space seperated) - or as a list of strings + Helper function collection all the coded needed to start a process """ - - if type(cmd_in) == type(""): #todo ugly - cmd = cmd_in.split() - elif type(cmd_in) == type([]): - cmd = cmd_in - else: - raise Exception("SubProcessGroup.run() expects a string or" + - "list[string] as arguments suplied: {0}".format(type(cmd))) + # About to start a process, increase the counter + self.running_process_count += 1 # Run subprocess process = subprocess.Popen( @@ -38,60 +42,111 @@ class SubProcessGroup(object): stderr=subprocess.PIPE) # save the process - self.process_group.append((cmd, process)) + self.process_group.append((False, (cmd, process))) # add to resource monitor if available if self.usageStats: self.usageStats.addPID(process.pid) - - # TODO: SubProcessGroup could saturate a system with to much - # concurent calss: artifical limit to 20 subprocesses - if not unsave and (len(self.process_group) > 20): - self.logger.error("Subprocessgroup could hang with more" - "then 20 concurent calls, call with unsave = True to run" - "with more than 20 subprocesses") - raise PipelineException("Subprocessgroup could hang with more" - "then 20 concurent calls. Aborting") - if self.logger == None: print "Subprocess started: {0}".format(cmd) else: self.logger.info("Subprocess started: {0}".format(cmd)) + def run(self, cmd_in, unsave=False, cwd=None): + """ + Add the cmd as a subprocess to the current group: The process is + started! + cmd can be suplied as a single string (white space seperated) + or as a list of strings + """ + + if isinstance(cmd_in, str): + cmd = cmd_in.split() + elif isinstance(cmd_in, list): + cmd = cmd_in + else: + raise Exception("SubProcessGroup.run() expects a string or" + + "list[string] as arguments suplied: {0}".format(type(cmd))) + + # We need to be able to limit the maximum number of processes + if self.running_process_count >= self.max_concurrent_processes: + # Save the command string and cwd + self.processes_waiting_for_execution.append((cmd, cwd)) + else: + self._start_process(cmd, cwd) + + def wait_for_finish(self): """ Wait for all the processes started in the current group to end. Return the return status of a processes in an dict (None of no processes failed - This is a Pipeline component: Of an logger is supplied the + This is a Pipeline component: If an logger is supplied the std out and error will be suplied to the logger """ collected_exit_status = [] - for cmd, process in self.process_group: - # communicate with the process - # TODO: This would be the best place to create a - # non mem caching interaction with the processes! - # TODO: should a timeout be introduced here to prevent never ending - # runs? - (stdoutdata, stderrdata) = process.communicate() - exit_status = process.returncode - - # get the exit status - if exit_status != 0: - collected_exit_status.append((cmd, exit_status)) - - # log the std out and err - if self.logger != None: - self.logger.info(cmd) - self.logger.debug(stdoutdata) - self.logger.warn(stderrdata) - else: - print cmd - print stdoutdata - print stderrdata + while (True): + # The exit test + if (self.running_process_count == 0 and + len(self.processes_waiting_for_execution) == 0): + break # the while loop + + # start with waiting + time.sleep(self.polling_interval) + + # check all the running processes for completion + for idx, (completed, (cmd, process)) in \ + enumerate(self.process_group): + if completed: + continue + + # poll returns null if the process is not completed + if process.poll() == None: + continue + + # We have a completed process + # communicate with the process + # TODO: This would be the best place to create a + # non mem caching interaction with the processes! + (stdoutdata, stderrdata) = process.communicate() + exit_status = process.returncode + + # get the exit status + if exit_status != 0: + collected_exit_status.append((cmd, exit_status)) + + # log the std out and err + if self.logger != None: + self.logger.info( + "Subprocesses group, completed command: ") + self.logger.info(cmd) + self.logger.debug(stdoutdata) + self.logger.warn(stderrdata) + else: + print "Subprocesses group, completed command: " + print cmd + print stdoutdata + print stderrdata + + # Now update the state of the internal state + self.process_group[idx] = (True, (cmd, process)) + self.running_process_count -= 1 + + + # if there are less then the allowed processes running and + # we have waiting processes start another on + while (self.running_process_count < self.max_concurrent_processes + and + len(self.processes_waiting_for_execution) != 0): + # Get the last process + cmd , cwd = self.processes_waiting_for_execution.pop() + # start it + self._start_process(cmd, cwd) + + # If none of the processes return with error status if len(collected_exit_status) == 0: collected_exit_status = None return collected_exit_status diff --git a/CEP/Pipeline/framework/lofarpipe/support/usagestats.py b/CEP/Pipeline/framework/lofarpipe/support/usagestats.py index d2d1ed2ebe5ebb5a8abe85deaa6e5b38b51081fd..c943f516673346d77d4860cb706a1455cfa7d087 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/usagestats.py +++ b/CEP/Pipeline/framework/lofarpipe/support/usagestats.py @@ -3,6 +3,34 @@ # UsageStat class # Wouter Klijn, 2014 # klijn@astron.nl +# +# TODO: +# 1. The stats are currently collecting using os calls and subprocess. +# A light weight version might use /proc/pid directly +# 2. Large scale refactor: +# Enforce the reading and writing of each recipe to a specific directory +# In combination with du we could then exactly measure how much file data is +# written. +# An idea is to make this part of the recipe. Although I like the idea of a +# decorator: this makes is explicit +# 3. Creating a decorator version of this stat collected might be a plan anyways +# Marcel suggested a with-block syntax. When lots of decorated functionality +# becomes available this might lead to boilerplate eg: +# +# @logging("scriptname") +# @usage_stat("scriptname") +# def recipename(bla): +# +# vs +# def recipename(bla): +# with logging("striptname"): +# with usage_stat("scriptname"): +# with xml_loggin("scriptname"): +# +# 4. I dont like that xml magix is needed to fill the nodes. +# A number of helper function on the xmllogging might be a good idea. +# Supporting direct xml should still be possible for lo-tech / api acces +# should be supported also. # ------------------------------------------------------------------------------ import threading import time @@ -18,7 +46,7 @@ poller_string = """ #!/bin/bash -e PID=$1 -exe=$(readlink /proc/${PID}/exe) +exe=$(readlink /proc/${PID}/exe) || exit 1 rb=$(grep ^read_bytes /proc/${PID}/io | awk '{print $2}') wb=$(grep ^write_bytes /proc/${PID}/io | awk '{print $2}') cwb=$(grep ^cancelled_write_bytes /proc/${PID}/io | awk '{print $2}') @@ -60,6 +88,21 @@ class UsageStats(threading.Thread): self.pid_tracked = [] self.pid_stats = {} self.poll_interval = poll_interval + self.poll_counter = poll_interval + self.temp_path = None + + # Write a bash file which will retrieve the needed info from proc + (temp_file, self.temp_path) = tempfile.mkstemp() + temp_file = open(self.temp_path, "w") + temp_file.write(poller_string) + temp_file.close() + + def __del__(self): + """ + Clean up the temp file after the file is not in use anymore + """ + os.remove(self.temp_path) + def run(self): """ @@ -73,6 +116,17 @@ class UsageStats(threading.Thread): sleep for poll_interval """ while not self.stopFlag.isSet(): + # If the timer waits for the full 10 minutes using scripts + # appear halting for lnog durations after ending + # poll in a tight wait loop to allow quick stop + if self.poll_counter < self.poll_interval: + self.poll_counter += 1 + time.sleep(1) + continue + + # reset the counter to zero + self.poll_counter = 0 + # ************************************* # first add new to track pids to the active list # in a lock to assure correct functioning @@ -87,29 +141,31 @@ class UsageStats(threading.Thread): self.pid_stats[pid] = [] self.pid_in = [] - self.lock.release() - - (temp_file, temp_path) = tempfile.mkstemp() - temp_file = open(temp_path, "w") - temp_file.write(poller_string) - temp_file.close() - - # now get stats for each tracked pid - try: - for pid in self.pid_tracked: - pps = subprocess.Popen(["bash", temp_path, str(pid)], - stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - out, err = pps.communicate() - # TODO: check return value of bash script using pps.returncode - parset_output = eval(out.rstrip()) # remove trailing white space - self.pid_stats[pid].append(parset_output) - finally: - os.remove(temp_path) - - time.sleep(self.poll_interval) - + + # now get stats for each tracked pid + pid_out = [] + for pid in self.pid_tracked: + pps = subprocess.Popen(["bash", self.temp_path, str(pid)], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, err = pps.communicate() + # Store pids that are not active anymore (process has closed) + if not pps.returncode == 0: + pid_out.append(pid) + continue # nothing to store continue + + # remove trailing white space + parset_output = eval(out.rstrip()) + self.pid_stats[pid].append(parset_output) + + # in the previous loop we stored al the pid that are invalid and + # can be cleared + for pid in pid_out: + try: + self.pid_tracked.remove(pid) + except ValueError: # key does not exist (should not happen) + pass def addPID(self, pid): """ @@ -144,26 +200,29 @@ class UsageStats(threading.Thread): # TODO: The returned values are not in order and the owner PID # might not be printed with idx 0. Maybee print seperately for idx,(key,value) in enumerate(self.pid_stats.iteritems()): - #if there are entries + # if there are entries if value: child_pid = add_child(resource_stat_xml, "process") child_pid.setAttribute("idx", str(idx)) - #The first entry should contain the exec name. only needed once + # The first entry should contain the exec name. + # only needed once child_pid.setAttribute("executable", str(value[0][0])) child_pid.setAttribute("pid", str(key)) for entry in value: - # TODO: probably no longer needed with updated bash script + # TODO: probably no longer needed with updated bash + # script if "MEM" in str(entry[6]): # this is the default value continue data_point = add_child(child_pid, "data_point") data_point.setAttribute("timestamp", str(entry[1])) data_point.setAttribute("read_bytes", str(entry[2])) data_point.setAttribute("write_bytes", str(entry[3])) - data_point.setAttribute("cancelled_bytes", str(entry[4])) + data_point.setAttribute("cancelled_bytes", + str(entry[4])) data_point.setAttribute("cpu", str(entry[5])) data_point.setAttribute("mem", str(entry[6])) except: - self.logger.error("monitoring statistic recording failed") + self.logger.warn("monitoring statistic recording failed") resource_stat_xml.setAttribute("noStatsRecorded", "Exception") # TODO: coalesce these two returns in one "finally:" return resource_stat_xml.toxml(encoding = "ascii") diff --git a/CEP/Pipeline/helper_scripts/aggregate_stats.py b/CEP/Pipeline/helper_scripts/aggregate_stats.py new file mode 100644 index 0000000000000000000000000000000000000000..e4c87b257b0a6055a8c716b08a22dca077cdeb91 --- /dev/null +++ b/CEP/Pipeline/helper_scripts/aggregate_stats.py @@ -0,0 +1,874 @@ +# LOFAR PIPELINE FRAMEWORK +# +# aggregate stats +# Wouter Klijn, 2014 +# klijn@astron.nl +# ------------------------------------------------------------------------------ +import os +import sys +import xml.dom.minidom as xml + +from matplotlib import pyplot as plt +import numpy as np + + +def usage(): + usage_string = """ + usage: python aggregate_stats.py <pipeline_xml_stats_file> + + This program parses and aggregates pipelines statistics from a + xml stat file generate by the Lofar automatic pipeline framework. + + It creates the following aggregate statistics, in both visual format + and as csv file for further visualization/processing (optional): + + 1. For the whole pipeline: + - Running time (total duration) + - CPU seconds (of all the node processing) (calculated) + TODO: For each stat the min, max, median, mean and std + 2. Per data product sets + - Running time ( max duration for this data product set) + - CPU seconds + - Max memory usage + - Bytes read and write + 3. Per recipe for each data product + - Running time + - CPU second + - memory usage + - disk usage + """ + + print usage_string + + +def open_file_and_parse(xml_stats_path): + """ + Performs the interaction with the xml_stats file. + Opens it from disk, throwing an ImportError if it fails + converts to an instantiated minidom xml three. Throwing a ImportError if + it fails. + """ + # Open the file raise on error + raw_file_string = "" + + try: + raw_file_string = open(xml_stats_path, 'r').read() + except: + # TODO: Failing to open a file is expected faulty behaviour, + # should we do something else than an exception here? + print "could not open file: {0}".format(xml_stats_path) + raise ImportError("Could not open supplied file for parsing") + + # Parse to xml + stats_xml = None + + try: #, encoding='ascii' + stats_xml = xml.parseString(raw_file_string).documentElement # only need the data not the xml fluff + except: + # Parsing of xml should succeed if written by the pipeline framework + # In this case an exception should be allowed + + print "Attempted to parse '{0}' as an xml file. This failed".format(xml_stats_path) + + # return as xml_minidom object + return stats_xml + + +def collect_information_for_a_recipe(recipe_xml): + """ + Collects all the information available in a recipe xml tree + Again, this is tightly coupled with the implementation in the pipeline + """ + + +def convert_xml_attributes_to_dict(attributes, clean_empty_values=True): + """ + helper function converting the xml.attributes return list + to a ascii dict, where possible entries are cast to their + python representation + """ + # put all the intries into a dict + attribute_dict = {} + for attribute in attributes.items(): + attribute_dict[attribute[0].encode("ascii", 'ignore')] = \ + attribute[1].encode("ascii", 'ignore') + + for key, value in attribute_dict.items(): + try: + casted_value = eval(value) + + attribute_dict[key] = casted_value + except: + # eval is expected to fail swallow all exceptions + pass + return attribute_dict + + +def collect_toplevel_information(stats_xml): + """ + Collects the information found at the top level of xml + Is simply converts the xml element information into python + dicts for ease of access + """ + toplevel_dict = convert_xml_attributes_to_dict(stats_xml.attributes, + clean_empty_values=False) + return toplevel_dict + + +def create_timeline_dict(data_points_list): + """ + receives a list of xml containing the actual resource traces + + Converts the data into python variables + Attempts to clean this data of empty datapoints + Then creates a dict of metric to list with the actual time trace + """ + # the list which will filled with the trance for the respective stat + timestamp = [] + mem = [] + cpu = [] + read_bytes = [] + write_bytes = [] + cancelled_bytes = [] + + for data_point in data_points_list: + # convert to python variables + single_data_point_dict = convert_xml_attributes_to_dict(data_point.attributes) + + # if there is no valid data in the correct point + if ((single_data_point_dict['mem'] == "") or + (single_data_point_dict['cpu'] == "") or + (single_data_point_dict['read_bytes'] == "")): + continue + + # assume that the entries are ordered when creating the time line + # TODO: this assumption might be broken when other ppl add info to the xml stats + timestamp.append(single_data_point_dict['timestamp']) + mem.append(single_data_point_dict['mem']) + cpu.append(single_data_point_dict['cpu']) + read_bytes.append(single_data_point_dict['read_bytes']) + write_bytes.append(single_data_point_dict['write_bytes']) + cancelled_bytes.append(single_data_point_dict['cancelled_bytes']) + + # add the collected traces to a nice dict + resource_traces_dict = {} + resource_traces_dict['timestamp'] = timestamp + resource_traces_dict['mem'] = mem + resource_traces_dict['cpu'] = cpu + resource_traces_dict['read_bytes'] = read_bytes + resource_traces_dict['write_bytes'] = write_bytes + resource_traces_dict['cancelled_bytes'] = cancelled_bytes + + return resource_traces_dict + + +def collect_job_information(job_xml): + """ + Collects all the information for an indivual jb run. Including + possible executable information added + """ + #first get the attribute information + jobs_dict = convert_xml_attributes_to_dict(job_xml.attributes) + + # now collect the actual 'statistics' + # statistics are burried one node deeper + resource_xml = job_xml.getElementsByTagName('resource_usage') + if len(resource_xml) != 1: + print "Encountered an error while parsing resource node" + print "Continue parsing with other available nodes." + print "information might be corrupted or incomplete" + + # get the attributes mainly needed for the pid of the job recipe + resource_dict = convert_xml_attributes_to_dict(resource_xml[0].attributes) + + # get the children, this is a list proces statistics + traces_dict = {} + processes_xml = job_xml.getElementsByTagName('process') + for idx, single_process_xml in enumerate(processes_xml): + # First grab information about the executable + single_process_dict = convert_xml_attributes_to_dict(single_process_xml.attributes) + + # Then create the time line + data_points_list = single_process_xml.getElementsByTagName('data_point') + resource_traces_dict = create_timeline_dict(data_points_list) + single_process_dict['trace'] = resource_traces_dict + + # we have no idea what is the order of the indiv jobs, store them with an idx + traces_dict[idx] = single_process_dict + + jobs_dict['traces'] = traces_dict + jobs_dict['number_of_jobs'] = len(processes_xml) + return jobs_dict + + +def collect_recipe_information(node_xml): + """ + Parses and collects information of a recipe step + """ + # get the name and duration for this specific step + node_dict = convert_xml_attributes_to_dict(node_xml.attributes) + node_dict['node_name'] = node_xml.nodeName.encode("ascii", 'ignore') + + # The actual node run information is stored 2 nodes deeper + nodes = node_xml.getElementsByTagName('nodes') + if len(nodes) == 0: # if zero this node did not run on the compute nodes + node_dict['info'] = "No node level information" + return node_dict + + if len(nodes) > 1: # there should only be a single node entry failure state otherwise + print "Encountered an error while parsing node {0}".format(node_dict['node_name']) + print "Continue parsing with other available nodes." + print "information might be corrupted or incomplete" + return node_dict + + # we have a single node as expected + # grab the job (node level information) + jobs = nodes[0].getElementsByTagName('job') + if len(jobs) == 0: + print "Encountered an error while parsing node {0}".format(node_dict['node_name']) + print "No job / node level information was found" + print "Continue parsing with other available nodes." + print "information might be corrupted or incomplete" + return node_dict + + # now parse the individual nodes + jobs_dict = {} + for job in jobs: + single_job_dict = collect_job_information(job) + jobs_dict[single_job_dict['job_id']] = single_job_dict + + # save the parsed information + node_dict['jobs'] = jobs_dict + + return node_dict + + +def get_pipeline_information(stats_xml): + """ + Collects all the information needed to create toplevel information about + the pipeline. + + The information produced by this function should be ready for plotting. + TODO: + This function in development. Needs a proper info string + """ + # The pipeline_information dictionary will contain all the parsed information + pipeline_information = {} + + # Get the toplevel pipeline information + # TODO: It might be easier to first collect all stats and then calculate + # attempt to extract as much as possible for now to get a proof of concept + # I do not realy like the dict keys. It would be better to use the index? + # works when itterating tru the information + pipeline_information[0] = collect_toplevel_information(stats_xml) + + for idx, dom in enumerate(stats_xml.childNodes): + node_name = dom.nodeName.encode("ascii", 'ignore') + + # First check if we are in the active_stack node. + # this node should be empty. print warning if not! + if (node_name == 'active_stack'): + if (len(dom.childNodes) != 0): + print "The active stack contained leftover nodes" + print "This probably means that the pipeline failed in a step" + + # TODO: The mem size could be of interest: might point into + # the direction of an issue with the config. + continue # do not parse information in this node + + # We have a pipeline node. Send it to function to get parsed + recipe_dict = collect_recipe_information(dom) + + pipeline_information[idx] = recipe_dict + + return pipeline_information + + +def create_recipe_duration_lists(pipeline_information): + """ + Collects the duration and the name of the steps in the pipeline. + """ + duration_list = [] + step_name_list = [] + for idx in range(1, len(pipeline_information.items())): + duration_list.append(pipeline_information[idx]["duration"]) + step_name_list.append( pipeline_information[idx]["node_name"]) + + return duration_list, step_name_list + + +def create_and_display_pie_chart(fracs, labels, title_string): + """ + Make a pie chart - see + http://matplotlib.sf.net/matplotlib.pylab.html#-pie for the docstring. + + This example shows a basic pie chart with labels optional features, + like autolabeling the percentage, offsetting a slice with "explode", + adding a shadow, and changing the starting angle. + + The version of pylab installed is old. Most advanced functionality is not + working: it is not possible to use startangle=90, or explode + + """ + # make a square figure and axes + plt.figure(1, figsize=(6,6)) + ax = plt.axes([0.1, 0.1, 0.8, 0.8]) + + # The slices will be ordered and plotted counter-clockwise. + plt.pie(fracs, labels=labels, autopct='%1.1f%%', shadow=True) + + plt.title(title_string, bbox={'facecolor':'0.8', 'pad':5}) + + plt.show() + + +def create_and_display_horizontal_bar(title_string, + dataset, data_orders, colors=["r","g","b","y"]): + """ + Create a horizontal bar chart + https://stackoverflow.com/questions/11273196/stacked-bar-chart-with-differently-ordered-colors-using-matplotlib + """ + #dataset = [{'A':19, 'B':39, 'C':61, 'D':70}, + # {'A':34, 'B':68, 'C':32, 'D':38}, + # {'A':35, 'B':45, 'C':66, 'D':50}, + # {'A':23, 'B':23, 'C':21, 'D':16}, + # {'A':35, 'B':45, 'C':66, 'D':50}] + #data_orders = [['A', 'B', 'C', 'D'], + # ['B', 'A', 'C', 'D'], + # ['A', 'B', 'D', 'C'], + # ['B', 'A', 'C', 'D'], + # ['A', 'B', 'C', 'D']] + + names = sorted(dataset[0].keys()) + values = np.array([[data[name] for name in order] for data,order in zip(dataset, data_orders)]) + lefts = np.insert(np.cumsum(values, axis=1),0,0, axis=1)[:, :-1] + orders = np.array(data_orders) + bottoms = np.arange(len(data_orders)) + + for name, color in zip(names, colors): + idx = np.where(orders == name) + value = values[idx] + left = lefts[idx] + plt.bar(left=left, height=0.8, width=value, bottom=bottoms, + color=color, orientation="horizontal", label=name) + + plt.yticks(bottoms+0.4, ["data %d" % (t+1) for t in bottoms]) + plt.legend(loc="best", bbox_to_anchor=(1.0, 1.00)) + plt.subplots_adjust(right=0.75) + plt.title(title_string, bbox={'facecolor':'0.8', 'pad':5}) + plt.show() + +def create_toplevel_pipeline_visualizations(duration_list, step_name_list): + """ + Some examples of visualizations possible using the stats collected from the + pipeline + """ + + # Create a label list for plotting: + formatted_step_name_list = [] + for duration, step_name in zip(duration_list, step_name_list): + formatted_step_name_list.append(step_name[1:] + + "\n({0}sec.)".format(format(duration, '.3g'))) + + if False: + create_and_display_pie_chart(duration_list[::-1], # reverse the list to get + formatted_step_name_list[::-1], # clockwise charts + "Pipeline steps") + + # The barchart function has been pulled from the internet + # the interface is fragile + data_dict = {} + formatted_step_name_list = [] + for duration, step_name in zip(duration_list, step_name_list): + formatted_step_name = step_name[1:] + data_dict[formatted_step_name]=float(format(duration, '.3g')) + formatted_step_name_list.append(formatted_step_name) + + dataset = [data_dict] + data_orders = [formatted_step_name_list] + colors = ["r","g","b","y", "c", "m"] + if False: + create_and_display_horizontal_bar( "Pipeline steps", + dataset, data_orders, colors) + + +def create_trace_plot_information(step_dict, plot_debug): + """ + Creates statistics timelines for all executables on all jobs + For this the min and max timestep are found + THen all the traces are binnen to 10 second bins. + Missing traces are padded in with zero untill all traces have the same lenght + This allows for a dull scatter plot of all the executables + + next all the traces for indiv nodes are added to get the 'total' load for + a dataproduct + + A lot of information is lost in this parse step. + This is an example what you could do with the data + """ + poll_step_ms = 10000 * 60 + # structures to be filled in this function + aggregate_traces = {} + aggregate_traces['cpu'] = [] + aggregate_traces['mem'] = [] + aggregate_traces['read_bytes'] = [] + aggregate_traces['write_bytes'] = [] + aggregate_traces['cancelled_bytes'] = [] + all_traces = {} + all_traces['cpu'] = [] + all_traces['mem'] = [] + all_traces['read_bytes'] = [] + all_traces['write_bytes'] = [] + all_traces['cancelled_bytes'] = [] + + # We first need to get the min and max timestamp + max_time_stamp = 0 + min_time_stamp = 9999999999999 # insanely large timestamp know larger then in the stats (somewhere aound 300 years in the future) + + if not step_dict.has_key('jobs'): + time_stamps = [] + return time_stamps, all_traces, aggregate_traces + + for id, node_dict in step_dict['jobs'].items(): # the node level information + for id, pid_dict in node_dict['traces'].items(): # traces of the actual executables + if len(pid_dict['trace']['timestamp']) == 0: + continue + + # Timestamp is in milli seconds + if pid_dict['trace']['timestamp'][-1] > max_time_stamp: # check last item in the array + max_time_stamp = pid_dict['trace']['timestamp'][-1] + + if pid_dict['trace']['timestamp'][0] < min_time_stamp: + min_time_stamp = pid_dict['trace']['timestamp'][0] + + + # floor all the timestamps to the nearest whole 10 min mark + #( the exact time does not matter for the stats) + # this allows us to create additional step inserts if needed + # first the min and max to allow us to pad + min_time_stamp = (min_time_stamp / poll_step_ms) * poll_step_ms + max_time_stamp = (max_time_stamp / poll_step_ms) * poll_step_ms + + # the time line for all data traces returned by this function + time_stamps = [x for x in range(min_time_stamp, max_time_stamp + poll_step_ms, poll_step_ms)] # we also need the last bin range() is exclusive + + # loop the data, clean and pad. + for id, node_dict in step_dict['jobs'].items(): # the nodes + # list needed to calculate the total load on a node: aggregate + cpu_job = [0] * len(time_stamps) + mem_job = [0] * len(time_stamps) + read_bytes_job = [0] * len(time_stamps) + write_bytes_job = [0] * len(time_stamps) + cancelled_bytes_job = [0] * len(time_stamps) + + for id2, pid_dict in node_dict['traces'].items(): # traces of the actual executables running on the node + # 'rounding' errors might cause the binning to be non continues + # therefore floor the first entry in the timestamp list and complete + # the array + binned_time_stamps = [] + # Not all traces contain entries for the whole traced timje + n_missing_stamps_start = 0 + n_missing_stamps_end = 0 + + # A executable call might be that short that not a single trace was recorded + if len(pid_dict['trace']['timestamp']) == 0: + n_missing_stamps_start = (max_time_stamp - min_time_stamp ) / poll_step_ms + 1 + else: + # only take the first timestamp and expand after. Rounding might + # put neighbouring samples with an additional bin between them + first_timestamp_floored = (pid_dict['trace']['timestamp'][0] / poll_step_ms) * poll_step_ms + binned_time_stamps = [first_timestamp_floored + idx * poll_step_ms for idx + in range(len(pid_dict['trace']['timestamp']))] + + n_missing_stamps_start = (binned_time_stamps[0] - min_time_stamp) / poll_step_ms + n_missing_stamps_end = (max_time_stamp - binned_time_stamps[-1]) / poll_step_ms + + # Now we create the time lines with padding + cpu = [] + mem = [] + read_bytes = [] + write_bytes = [] + cancelled_bytes = [] + + # add missing entries at begin + for idx in range(n_missing_stamps_start): + cpu.append(0) + mem.append(0) + read_bytes.append(0) + write_bytes.append(0) + cancelled_bytes.append(0) + + # add the recorded timelines + for cpu_value in pid_dict['trace']['cpu']: + if cpu_value > 10000: # TODO: Why this if statement? + print pid_dict['trace']['cpu'] + raise Exception + + cpu = cpu + pid_dict['trace']['cpu'] + mem = mem + pid_dict['trace']['mem'] + + # Convert the read and write to deltas, remember that the delta + # means the length is minus 1 so add extra 0 at the start + temp_list = pid_dict['trace']['read_bytes'] # use temp to prevent double + # dict access + if len(temp_list) >= 2: # if there are enough entries to de delta ( 2 or more) + read_bytes = read_bytes + [0] + [y-x for x, y in zip(temp_list[:-1], temp_list[1:])] + temp_list = pid_dict['trace']['write_bytes'] # use temp to prevent double + write_bytes = write_bytes + [0] + [y-x for x, y in zip(temp_list[:-1], temp_list[1:])] + temp_list = pid_dict['trace']['cancelled_bytes'] # use temp to prevent double + cancelled_bytes = cancelled_bytes + [0] + [y-x for x, y in zip(temp_list[:-1], temp_list[1:])] + + else: # just add the recorded data point (its 1 or np point) + # TODO: it might be best to add these as deltas!!! + read_bytes = read_bytes + pid_dict['trace']['read_bytes'] + write_bytes = write_bytes + pid_dict['trace']['write_bytes'] + cancelled_bytes = cancelled_bytes + pid_dict['trace']['cancelled_bytes'] + + # add the missing bins at the end + for idx in range(n_missing_stamps_end): + cpu.append(0) + mem.append(0) + # use the last value for the read and write bites + read_bytes.append(0) + write_bytes.append(0) + cancelled_bytes.append(cancelled_bytes[-1]) + + # save a unique trace + all_traces['cpu'].append(cpu) + all_traces['mem'].append(mem) + all_traces['read_bytes'].append(read_bytes) + all_traces['write_bytes'].append(write_bytes) + all_traces['cancelled_bytes'].append(cancelled_bytes) + + #check if the data parced is complete + + if len(cpu) != len(time_stamps): + + raise Exception("Length of the traces is incorrect, error parsing") + + + # Now we aggregate all the executables on a node + for idx,(cpu_entrie, mem_entrie, read_entrie, write_entrie, cancelled_entrie) in enumerate(zip(cpu, mem,read_bytes, write_bytes, cancelled_bytes)): + cpu_job[idx] += cpu_entrie + mem_job[idx] += mem_entrie + + try: + read_bytes_job[idx] += read_entrie + write_bytes_job[idx] += write_entrie + cancelled_bytes_job[idx] += cancelled_entrie + except: + print pid_dict + raise BaseException + + + # save the aggregate for the job trace + aggregate_traces['cpu'].append(cpu_job) + aggregate_traces['mem'].append(mem_job) + aggregate_traces['read_bytes'].append(read_bytes_job) + aggregate_traces['write_bytes'].append(write_bytes_job) + aggregate_traces['cancelled_bytes'].append(cancelled_bytes_job) + + return time_stamps, all_traces, aggregate_traces + +def create_plots_of_traces(time_stamps, all_traces, aggregate_traces, + super_title): + + # Clean the time stamps: start at zero and convert to seconds + first_time_stamp = time_stamps[0] + time_stamps = [(x - first_time_stamp) / 1000 for x in time_stamps] + plt.suptitle(super_title, fontsize=20) + plt.subplot(2,5,1) + + plt.title("CPU (% of core)") + plt.ylabel('Traces of all processes') + + + for trace in all_traces['cpu']: + plt.plot(time_stamps, trace) + + plt.subplot(2,5,2) + plt.title("MEM (% total)") + for trace in all_traces['mem']: + plt.plot(time_stamps, trace) + + + plt.subplot(2,5,3) + plt.title("Read (bytes)") + for trace in all_traces['read_bytes']: + plt.plot(time_stamps, trace) + + plt.subplot(2,5,4) + plt.title("Write (bytes)") + for trace in all_traces['write_bytes']: + plt.plot(time_stamps, trace) + + + plt.subplot(2,5,5) + plt.title("cancelled (bytes)") + for trace in all_traces['cancelled_bytes']: + plt.plot(time_stamps, trace) + + plt.subplot(2,5,6) + plt.ylabel('Traces of aggregate per node ') + for trace in aggregate_traces['cpu']: + plt.plot(time_stamps, trace) + + plt.subplot(2,5,7) + for trace in aggregate_traces['mem']: + plt.plot(time_stamps, trace) + + plt.subplot(2,5,8) + for trace in aggregate_traces['read_bytes']: + plt.plot(time_stamps, trace) + + plt.subplot(2,5,9) + for trace in aggregate_traces['write_bytes']: + plt.plot(time_stamps, trace) + + plt.subplot(2,5,10) + for trace in aggregate_traces['cancelled_bytes']: + plt.plot(time_stamps, trace) + plt.show() + +def create_pipeline_traces_and_stat(pipeline_information): + """ + + """ + stats = {'cpu':{'max_max':0.0}, + 'mem':{'max_max':0.0}, + 'read_bytes':{'max_max':0.0}, + 'write_bytes':{'max_max':0.0}, + 'cancelled_bytes':{'max_max':0.0}} + traces = {} + idx = 0 + for key, entrie in pipeline_information.items()[1:]: # skip first entry not a step + # First create the traces + + if idx == 2: + plot_debug = True + else: + plot_debug = False + + idx = idx + 1 + + + time_stamps, all_traces, aggregate_traces = create_trace_plot_information(entrie, plot_debug) + traces[key] = {'time_stamps':time_stamps, + 'all_traces':all_traces, + 'aggregate_traces':aggregate_traces} + + statistical_traces = {} + # use numpy to calculate some statistics + for metric_key, node_traces in aggregate_traces.items(): + statistical_traces[metric_key] = {} + # TODO: The current statistical properties have a problem: + # They are calculated on all traces, they might start delayed, due to node congestion + # only none zero instances should be take in account. + # possible solution is a search and replace of zero to Nan. and use the Nan save + # statistical method for calculating the stats. + median = np.median(node_traces, axis=0) + statistical_traces[metric_key]['median'] = \ + median.tolist() + + min = np.amin(node_traces, axis=0) + statistical_traces[metric_key]['min'] = \ + min.tolist() + + max = np.amax(node_traces, axis=0) + max_max = float(np.amax(max)) + if max_max > stats[metric_key]['max_max']: + stats[metric_key]['max_max']= max_max + + statistical_traces[metric_key]['max'] = \ + max.tolist() + + mean = np.mean(node_traces, axis=0) + statistical_traces[metric_key]['mean'] = \ + mean.tolist() + + # The std is not that interesting on it self, we need it to be + # as traces around the mean + std = np.std(node_traces, axis=0) + statistical_traces[metric_key]['std_top'] = \ + np.add(mean, std).tolist() + + statistical_traces[metric_key]['std_bot'] = \ + np.subtract(mean, std).tolist() + + + traces[key]['statistical_traces'] = statistical_traces + + return traces, stats + + +def create_plot_of_full_pipeline(pipeline_information, stats, + step_name_list, label_list): + + + first_loop = True + first_time_stamp = 0 + create_legend = True + # when printing the labels for steps, they need to be ofset in hight to each other + # to prevent that they are plotted on each other when running a short stop + # this bool flips each step + label_flip_flop = True + + f = plt.figure() + + # step 1, add all the information to the plots + for (key, entrie), step_name in zip(pipeline_information.items(), step_name_list): + if first_loop: + first_time_stamp = entrie['time_stamps'][0] + first_loop = False + + # Clean the time stamps: take the first timestamp as the zero point + time_stamps = [(x - first_time_stamp) / 1000 for x in entrie['time_stamps']] + + aggregate_traces = entrie['aggregate_traces'] + statistical_traces = entrie['statistical_traces'] + + stat_list = ['cpu', 'mem', 'read_bytes', 'write_bytes', 'cancelled_bytes'] + unit_list = [' (% core)', ' (% total)', ' (bytes)', ' (bytes_)', ' (bytes)'] + for idx, (key, unit) in enumerate(zip(stat_list, + unit_list)): + aggregate = aggregate_traces[key] + statistical = statistical_traces[key] + max_max = stats[key]['max_max'] + + # plot all the node traces + ax = plt.subplot(5,1,idx + 1) # subplot index starts at 1 + + plt.ylabel(key + unit) + + # Plit the traces for each each node (not individual pids) + for trace in aggregate: + line1 = ax.plot(time_stamps, trace, color='y') + + #plot the max in red, the mean in black and the std in black dotted + #line2 = ax.plot(time_stamps, statistical['max'], color='r') + #line3 = ax.plot(time_stamps, statistical['min'], color='g') + line4 = ax.plot(time_stamps, statistical['mean'], color='k', linewidth=2.0) + #line5 = ax.plot(time_stamps, statistical['std_top'], color='k', linestyle='--' ) + #line5 = ax.plot(time_stamps, statistical['std_bot'], color='k', linestyle='--' ) + + + # plot lines demarking the start of the individual step names + plt.plot([time_stamps[0], time_stamps[0]],[0, max_max], color='k', linestyle='--') + if idx == 0: # only print labels above the top trace + if step_name in label_list: # Only print names in the list + if label_flip_flop: + ax.text(time_stamps[0], max_max, '\n\n' + step_name) + label_flip_flop = not label_flip_flop + else: + ax.text(time_stamps[0], max_max, step_name + '\n\n') + label_flip_flop = not label_flip_flop + + # print the legend only once + if create_legend and idx == 4: # 4 is the botom plot. Used for cancelled bytes. Should not happen that often. Is the least important plot + line1[0].set_label('Node traces') + # line2[0].set_label('Max utilization') + # line3[0].set_label('Min utilization') + line4[0].set_label('Mean utilization') + #line5[0].set_label('Mean +- 1 std') + create_legend = False + + # make up the plot + plt.suptitle("Aggregated performance plots for a full pipeline", fontsize=20) + plt.subplots_adjust(hspace=0.001) # Set the spacing between the plot to almost zero + ax1 = plt.subplot(5,1,1) + ax1.yaxis.get_major_formatter().set_powerlimits((1, 4)) + ax2 = plt.subplot(5,1,2) + ax2.yaxis.tick_right() + ax2.yaxis.get_major_formatter().set_powerlimits((1, 4)) + ax3 = plt.subplot(5,1,3) + ax3.yaxis.get_major_formatter().set_powerlimits((1, 4)) + ax4 = plt.subplot(5,1,4) + ax4.yaxis.get_major_formatter().set_powerlimits((1, 4)) + ax4.yaxis.tick_right() + + ax5 = plt.subplot(5,1,5) + ax5.yaxis.get_major_formatter().set_powerlimits((1, 4)) + ax5.xaxis.get_major_formatter().set_powerlimits((1, 4)) + min_max_x = ax5.xaxis.get_data_interval() + # plot ticks at hours + #print min_max_x + #print type(min_max_x) + #print min_max_x.shape + + #print min_max_x[0] + #print min_max_x[1] + #raise Exception + + # create x-axis labels: print each hours mark + tick_step = 7200.0 + hr_multi = round(7200.0 / 3600) + xtick_range = np.arange(min_max_x[0], min_max_x[1] + 1, tick_step) + xtick_labels = [] + for tick_value in xtick_range: + xtick_labels.append(str((tick_value / tick_step) * hr_multi)) + + ax5.xaxis.set_ticks(xtick_range) + ax5.xaxis.set_ticklabels(xtick_labels) + + plt.xlabel('Time (Hrs)') + #ax5.legend() # The legend is only created for the bottom plot + ax5.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), + fancybox=True, shadow=True, ncol=5) + + + + # remove xlabel of the first 4 plots + xticklabels = plt.subplot(5,1,1).get_xticklabels()+plt.subplot(5,1,2).get_xticklabels()+ \ + plt.subplot(5,1,3).get_xticklabels()+plt.subplot(5,1,4).get_xticklabels() + plt.setp(xticklabels, visible=False) + + plt.show() + +if __name__ == '__main__': + if len(sys.argv) < 2: + usage() + exit(1) + + xml_stats_path = sys.argv[1] + stats_xml = open_file_and_parse(xml_stats_path) + + # From here the information is extracted from the xml + # This implementation is tightly coupled with the way it is represented + # in the file. The filling of this xml is done in the pipeline framework + # + # It might contain a large amount of information that is not of + # interest at this level. Search for specific tree information and entries + # first step is parsing and formatting the information in the xml + pipeline_information = get_pipeline_information(stats_xml) + + # Create the roughest stat possible: A duration list. + duration_list, step_name_list = create_recipe_duration_lists(pipeline_information) + + # Create all the traces including statistical traces of all the staps + traces, stats = create_pipeline_traces_and_stat(pipeline_information) + + + # list of labels to print: THe label is cluttered, only plot + # the interesting ones + label_list = ['selfcal_prepare', 'selfcal_prepare', 'selfcal_bbs', 'selfcal_awimager'] + # create and plot + create_plot_of_full_pipeline(traces, stats, step_name_list, label_list) + exit(0) + + + #print step specific_traces + #time_stamps, all_traces, aggregate_traces = create_trace_plot_information(pipeline_information[1]) + #create_plots_of_traces(time_stamps, all_traces, aggregate_traces, "prepare") + #time_stamps, all_traces, aggregate_traces = create_trace_plot_information(pipeline_information[2]) + #create_plots_of_traces(time_stamps, all_traces, aggregate_traces, "db create") + #time_stamps, all_traces, aggregate_traces = create_trace_plot_information(pipeline_information[3]) + #create_plots_of_traces(time_stamps, all_traces, aggregate_traces, "bbs") + #time_stamps, all_traces, aggregate_traces = create_trace_plot_information(pipeline_information[4]) + #create_plots_of_traces(time_stamps, all_traces, aggregate_traces, "awimager") + #time_stamps, all_traces, aggregate_traces = create_trace_plot_information(pipeline_information[5]) + #create_plots_of_traces(time_stamps, all_traces, aggregate_traces, "sourcefinding") + #time_stamps, all_traces, aggregate_traces = create_trace_plot_information(pipeline_information[6]) + #create_plots_of_traces(time_stamps, all_traces, aggregate_traces, "finalize") + + + \ No newline at end of file diff --git a/CEP/Pipeline/helper_scripts/create_selfcal_parset.py b/CEP/Pipeline/helper_scripts/create_selfcal_parset.py new file mode 100644 index 0000000000000000000000000000000000000000..2f97284575fe09bf0f854b6af86a373799b1d776 --- /dev/null +++ b/CEP/Pipeline/helper_scripts/create_selfcal_parset.py @@ -0,0 +1,245 @@ +import sys + +def open_and_parse_config_file(file_location): + """ + This script uses the most simple parameter parsing possible. + If you enter incorrect settings it will raise an exception, tough luck!! + """ + config_dict = {} + # ********************* + # Open the file containing parameters, remove comment, empty lines + # and end of line comments + raw_lines = open(file_location, 'r').read().splitlines() + + lines = [] + for line in raw_lines: + if len(line) == 0: # empty lines + continue + + if line[0] == '#': # skip lines starting with a # + continue + + # use strip to remove oel and trailing white space + line_without_comment = line.split('#')[0].strip() + lines.append(line_without_comment) + + + # ********************* + # new_obs_name, first entry + new_obs_name = "" + try: + + new_obs_name = lines[0] + except: + print "Tried parsing:" + print lines[0] + print "this failed" + raise ImportError + + config_dict["new_obs_name"] = new_obs_name + + # ********************* + # second line, output path + output_path = "" + try: + output_path = lines[1] # just read as a python list + except: + print "Tried parsing:" + print lines[1] + print "this failed" + raise ImportError + + config_dict["output_path"] = output_path + + # ********************* + # second line, list of the nodes to run the pipeline on + node_list = [] + try: + node_list = eval(lines[2]) # just read as a python list + except: + print "Tried parsing:" + print lines[2] + print "this failed" + raise ImportError + + if len(node_list) == 0: + print "opened node list did not contain any entries!\n\n" + raise ImportError + + config_dict["node_list"] = node_list + + + + # ********************* + # number of major cycles + number_of_major_cycles = "" + try: + number_of_major_cycles = int(eval(lines[3])) + except: + print "tried parsing:" + print lines[3] + print "this failed" + raise importerror + + config_dict["number_of_major_cycles"] = number_of_major_cycles + + return config_dict + + +def open_and_convert_parset(parset_file): + """ + Open the parset file. Parse all the entries, clean up comments and + then file a dict with the key values pairs + """ + parset_as_dict = {} + lines = open(parset_file, 'r') + + for idx, line in enumerate(lines): + if len(line) == 0: + continue + + if line[0] == '#': # skip lines starting with a # + continue + + line_without_comment = line.split('#')[0] + key, value = line_without_comment.split("=") + parset_as_dict[key.strip()] = value.strip() # use strip to remove oel + + return parset_as_dict + + +def create_output_lists(config_dict): + """ + Based on the information in the parst config_dict use string foo to + create the correct location and filename string lists + """ + filenames_h5 = [] + filenames_ms = [] + locations = [] + for idx, node in enumerate(config_dict["node_list"]): + # LIst of file names + filename_single = (config_dict["new_obs_name"] + "_" + str(idx) + ".H5") + filenames_h5.append(filename_single) + + filename_single = (config_dict["new_obs_name"] + "_" + str(idx) + ".MS") + filenames_ms.append(filename_single) + + location_single = (node + ":" + config_dict["output_path"] + config_dict["new_obs_name"] + "/") + locations.append(location_single) + + + return filenames_h5, filenames_ms, locations + + +def add_output_lists_to_parset(parset_as_dict, filenames_h5, + filenames_ms, locations, number_of_major_cycles): + """ + Insert the newly create lists into the parset dict using correct + HARD CODED key vlues + """ + parset_as_dict["ObsSW.Observation.DataProducts.Output_SkyImage.filenames"] = repr(filenames_h5) + + parset_as_dict["ObsSW.Observation.DataProducts.Output_SkyImage.locations"] = repr(locations) + + parset_as_dict["ObsSW.Observation.DataProducts.Output_Correlated.filenames"] = repr(filenames_ms) + + parset_as_dict["ObsSW.Observation.DataProducts.Output_Correlated.locations"] = repr(locations) + + # Grab the skip list from the output skyimage and add it with the correct c + # correlated key + parset_as_dict["ObsSW.Observation.DataProducts.Output_Correlated.skip"] = parset_as_dict["ObsSW.Observation.DataProducts.Output_SkyImage.skip"] + + parset_as_dict["ObsSW.Observation.ObservationControl.PythonControl.Imaging.number_of_major_cycles"] = str( number_of_major_cycles) + + + + + + +def output_parset_to_file(parset_as_dict_of_string_to_string, + output_parset_path): + """ + Print the parset dict to file. + Use a sorted version of the keys for itterating in the dict. This results + in the same order of the entries as produced by SAS/MAC + """ + sorted_keys = sorted(parset_as_dict_of_string_to_string.keys()) + + file_ptr = open(output_parset_path, 'w') + for key in sorted_keys: + file_ptr.write(key + "=" + parset_as_dict_of_string_to_string[key] + "\n") + + file_ptr.close() + + +def basic_validity_ok(locations, parset_as_dict_of_string_to_string): + """ + Performs a very basic (set of) test (s) to check if the created output + parset would make any sense + """ + skip_list_as_string = parset_as_dict_of_string_to_string["ObsSW.Observation.DataProducts.Output_SkyImage.skip"] + skip_list = eval(skip_list_as_string) + + # now check if the lenght is the same, if not the config does not match the parset + if len(skip_list) != len(locations): + print "The length of the skip list in the provided parset" + print "is not the same as the number of dataproduct specified in the config\n" + print "aborting, NO output parset written!" + + return False + + return True + + +def usage(): + print """"*************************** + usage: python create_selfcal_parset.py <config_file> <parset_file> <output_parset_path> + + create_selfcal_parset is a script which creates a 'new' selfcal parset based on + a valid imaging pipeline parset. + It takes a config file and applies the information contained in there on + the parset_file data outputting it to the output_parset_path + + The config file allows the controling of the locus nodes and the output + observation ID. It also allows the settings of the number of major cycles. + See config file for syntax + + output_parset_path will be overwritten without any prompth + **************************************** + """ + +if __name__ == "__main__": + if len(sys.argv) < 3: + usage() + exit(1) + + # Quick and dirty parsing of the parameters could use tooling but it is a + # one of script + config_file = sys.argv[1] + parset_file = sys.argv[2] + output_parset_path = sys.argv[3] + + # Single function attempting to parse the parameters minimal error checking + config_dict = open_and_parse_config_file(config_file) + # get the parset in a format we can easyly change + parset_as_dict_of_string_to_string = open_and_convert_parset(parset_file) + + # Create the new list with output location + filenames_h5, filenames_ms, locations = \ + create_output_lists(config_dict) + + # Very basic check if what was specified is correct + + if not basic_validity_ok(locations, parset_as_dict_of_string_to_string): + exit(1) + + # Add them to the parset, + add_output_lists_to_parset(parset_as_dict_of_string_to_string, filenames_h5, + filenames_ms, locations, + config_dict["number_of_major_cycles"]) + + # print as valid parset + output_parset_to_file(parset_as_dict_of_string_to_string, + output_parset_path) + + exit(0) diff --git a/CEP/Pipeline/helper_scripts/pipeline_commisioning.txt b/CEP/Pipeline/helper_scripts/pipeline_commisioning.txt new file mode 100644 index 0000000000000000000000000000000000000000..cb76e6f643e19d0b81768e442bbd62c16215cbff --- /dev/null +++ b/CEP/Pipeline/helper_scripts/pipeline_commisioning.txt @@ -0,0 +1,106 @@ +#!/bin/bash -e +# THIS SHOULD BE DONE USING YOU OWN ACCOUNT + +# TODO: +# Script to export the needed/minimal data from the xml stat file. +# If this can be automated we could automate the actual detailed statistics +# of a pipeline: Create pipeline reports. +# + +# Steps needed to checkout, install and run pipelines +# all work will done in local user space without a chance of disruption operations. + +# ***************************************************************************** +# 1. Prepare the directories and build for doing pipeline runs +# a. Checkout the current branch: +# Username and password of the svn are: lofar-guest and lofar-guest +mkdir ~/source/7058 -p +cd ~/source/7058 +# this might take some time: +svn export --ignore-externals https://svn.astron.nl/LOFAR/branches/CEP-Pipeline-Task7058 LOFAR + +# b. build the pipeline framework and needed executables: +# CMake collects information about the environment and creates a make file +mkdir ~/build/7058/gnu_debug -p +cd ~/build/7058/gnu_debug +cmake -DBUILD_PACKAGES="Offline" -DBUILD_ASKAPsoft=OFF -DUSE_OPENMP=ON ~/source/7058/LOFAR +make install -j4 # this might take some time + +# create the directory where logfiles and other runtime files will be stored +mkdir ~/build/7058/gnu_debug/installed/var/run/pipeline -p + +# c. Create a 'local' copy of the config file and generator script +mkdir ~/parset +cp ~/source/7058/LOFAR/CEP/Pipeline/helper_scripts/selfcal.settings ~/parset +cp ~/source/7058/LOFAR/CEP/Pipeline/helper_scripts/create_selfcal_parset.py ~/parset + +# ***************************************************************************** +# 2. Prepare the files based on a msss_imaging_pipeline parset +# This step must be repeated for each new observation/field to process +# b. Schedule a imaging pipeline, write down the obsid. Wait till the parset is +# created on the headnode (LHN001) and kill the pipeline run +# c. secure a selfcal pipeline based on the now generated parset +cp <imagingparset_path> ~/parset + +# ****************************************************************** +# 3. Prepare the enviroment. +# this must be done each time you log in to the headnode + +cd ~/build/7058/gnu_debug +use Lofar # always use the latest production software, if you want cutting edge: use LofIm +use Pythonlibs # needed for pywcs +. lofarinit.sh # sets the paths to use the current installation + + +# ********************************************************************** +# 4. Create a parset for an individual run +# These step must be taking for each new sample/run/experiment +# a. Adapt the settings file to your wishes: +# - Fill in unique run name +# - Fill in your username (you might need to create this dir on the locus nodes +# - Create the locus list (you could copy the list in the original pipeline parset) +# The list should by a python style list of strings +# - select the number of major cycles +cd ~/parset +emacs -nw ./selfcal.settings # or use vim or nano + +# b. Run the generator: +# Use a unique name for each parset: The parset name is used internally in the pipeline +# to generate the location of the logfiles et.al. +python ./create_selfcal_parset.py ./selfcal.settings ./selfcal_imager_pipeline.parset ./Unique_name.parset + +# c. Run the pipeline +python installed/bin/selfcal_imager_pipeline.py \ + selfcal_imager_pipeline.parset -c ~/build/7058/gnu_debug/installed/share/pipeline/pipeline.cfg -d + +# *********************************************************************** +# 5 Results of the pipeline. + +# a. The created images can be found in the by you specified output location. + +# b. The 'performance' of the pipeline +# The performace stats of the pipeline are created and collected 'on the fly'. They are +# stored in an xml file without a lot of structure or documentation. +# A python script has been created that collects the information. +# Calculates some statistics and presents them in a plot. + +cd ~/source/7058/LOFAR/CEP/Pipeline/helper_scripts + +# now run the visualization program on the statistics file as created by the pipeline +# The statistics file is produced next the the logfile: +# ~/build/7058/gnu_debug/installed/var/run/<Parsetname>/logs/<timestamp>/statistics.xml +# The program might run for a long time: It takes about 30 seconds to paint a figure on a very limited +# dataset. + +python aggregate_stats.py <statistics.xml> + + + + + + + + + + + diff --git a/CEP/Pipeline/helper_scripts/selfcal.parset b/CEP/Pipeline/helper_scripts/selfcal.parset new file mode 100644 index 0000000000000000000000000000000000000000..686c52afaa20bbcf4f4fa7eb151fad8e37b93384 --- /dev/null +++ b/CEP/Pipeline/helper_scripts/selfcal.parset @@ -0,0 +1,105 @@ +ObsSW.Observation.DataProducts.Input_Correlated.filenames=['predecessor_placeholder_subarray_placeholder_subband_placeholder_target_sub.MS.dppp', 'predecessor_SAP000_SB061_target_sub.MS.dppp', 'predecessor_SAP000_SB062_target_sub.MS.dppp', 'predecessor_SAP000_SB063_target_sub.MS.dppp', 'predecessor_SAP000_SB064_target_sub.MS.dppp', 'predecessor_SAP000_SB065_target_sub.MS.dppp', 'predecessor_SAP000_SB066_target_sub.MS.dppp', 'predecessor_SAP000_SB067_target_sub.MS.dppp', 'predecessor_SAP000_SB068_target_sub.MS.dppp', 'predecessor_SAP000_SB069_target_sub.MS.dppp','unexisting_dataset','unexisting_dataset','unexisting_dataset','unexisting_dataset','unexisting_dataset','unexisting_dataset','unexisting_dataset','unexisting_dataset','unexisting_dataset','unexisting_dataset', 'L41969_SAP000_SB060_target_sub.MS.dppp', 'L41969_SAP000_SB061_target_sub.MS.dppp', 'L41969_SAP000_SB062_target_sub.MS.dppp', 'L41969_SAP000_SB063_target_sub.MS.dppp', 'L41969_SAP000_SB064_target_sub.MS.dppp', 'L41969_SAP000_SB065_target_sub.MS.dppp', 'L41969_SAP000_SB066_target_sub.MS.dppp', 'L41969_SAP000_SB067_target_sub.MS.dppp', 'L41969_SAP000_SB068_target_sub.MS.dppp', 'L41969_SAP000_SB069_target_sub.MS.dppp', 'L41985_SAP000_SB060_target_sub.MS.dppp', 'L41985_SAP000_SB061_target_sub.MS.dppp', 'L41985_SAP000_SB062_target_sub.MS.dppp', 'L41985_SAP000_SB063_target_sub.MS.dppp', 'L41985_SAP000_SB064_target_sub.MS.dppp', 'L41985_SAP000_SB065_target_sub.MS.dppp', 'L41985_SAP000_SB066_target_sub.MS.dppp', 'L41985_SAP000_SB067_target_sub.MS.dppp', 'L41985_SAP000_SB068_target_sub.MS.dppp', 'L41985_SAP000_SB069_target_sub.MS.dppp', 'L41993_SAP000_SB060_target_sub.MS.dppp', 'L41993_SAP000_SB061_target_sub.MS.dppp', 'L41993_SAP000_SB062_target_sub.MS.dppp', 'L41993_SAP000_SB063_target_sub.MS.dppp', 'L41993_SAP000_SB064_target_sub.MS.dppp', 'L41993_SAP000_SB065_target_sub.MS.dppp', 'L41993_SAP000_SB066_target_sub.MS.dppp', 'L41993_SAP000_SB067_target_sub.MS.dppp', 'L41993_SAP000_SB068_target_sub.MS.dppp', 'L41993_SAP000_SB069_target_sub.MS.dppp', 'L42001_SAP000_SB060_target_sub.MS.dppp', 'L42001_SAP000_SB061_target_sub.MS.dppp', 'L42001_SAP000_SB062_target_sub.MS.dppp', 'L42001_SAP000_SB063_target_sub.MS.dppp', 'L42001_SAP000_SB064_target_sub.MS.dppp', 'L42001_SAP000_SB065_target_sub.MS.dppp', 'L42001_SAP000_SB066_target_sub.MS.dppp', 'L42001_SAP000_SB067_target_sub.MS.dppp', 'L42001_SAP000_SB068_target_sub.MS.dppp', 'L42001_SAP000_SB069_target_sub.MS.dppp', 'L42009_SAP000_SB060_target_sub.MS.dppp', 'L42009_SAP000_SB061_target_sub.MS.dppp', 'L42009_SAP000_SB062_target_sub.MS.dppp', 'L42009_SAP000_SB063_target_sub.MS.dppp', 'L42009_SAP000_SB064_target_sub.MS.dppp', 'L42009_SAP000_SB065_target_sub.MS.dppp', 'L42009_SAP000_SB066_target_sub.MS.dppp', 'L42009_SAP000_SB067_target_sub.MS.dppp', 'L42009_SAP000_SB068_target_sub.MS.dppp', 'L42009_SAP000_SB069_target_sub.MS.dppp', 'L42017_SAP000_SB060_target_sub.MS.dppp', 'L42017_SAP000_SB061_target_sub.MS.dppp', 'L42017_SAP000_SB062_target_sub.MS.dppp', 'L42017_SAP000_SB063_target_sub.MS.dppp', 'L42017_SAP000_SB064_target_sub.MS.dppp', 'L42017_SAP000_SB065_target_sub.MS.dppp', 'L42017_SAP000_SB066_target_sub.MS.dppp', 'L42017_SAP000_SB067_target_sub.MS.dppp', 'L42017_SAP000_SB068_target_sub.MS.dppp', 'L42017_SAP000_SB069_target_sub.MS.dppp', 'L42025_SAP000_SB060_target_sub.MS.dppp', 'L42025_SAP000_SB061_target_sub.MS.dppp', 'L42025_SAP000_SB062_target_sub.MS.dppp', 'L42025_SAP000_SB063_target_sub.MS.dppp', 'L42025_SAP000_SB064_target_sub.MS.dppp', 'L42025_SAP000_SB065_target_sub.MS.dppp', 'L42025_SAP000_SB066_target_sub.MS.dppp', 'L42025_SAP000_SB067_target_sub.MS.dppp', 'L42025_SAP000_SB068_target_sub.MS.dppp', 'L42025_SAP000_SB069_target_sub.MS.dppp'] +ObsSW.Observation.DataProducts.Input_Correlated.locations=['host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder', 'host1_placeholder:input_path1_placeholder'] +ObsSW.Observation.DataProducts.Input_Correlated.skip=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] +ObsSW.Observation.DataProducts.Output_SkyImage.skip=[0] +ObsSW.Observation.DataProducts.Output_Correlated.skip=[0] +ObsSW.Observation.ObservationControl.PythonControl.AWimager.data=CORRECTED_DATA +ObsSW.Observation.ObservationControl.PythonControl.AWimager.maxbaseline=10000 +ObsSW.Observation.ObservationControl.PythonControl.AWimager.operation=csclean +ObsSW.Observation.ObservationControl.PythonControl.AWimager.padding=1.6 +ObsSW.Observation.ObservationControl.PythonControl.AWimager.select="sumsqr(UVW[:2])<1e8" +ObsSW.Observation.ObservationControl.PythonControl.AWimager.niter=2 +ObsSW.Observation.ObservationControl.PythonControl.AWimager.stokes='I' +ObsSW.Observation.ObservationControl.PythonControl.AWimager.npix=256 +ObsSW.Observation.ObservationControl.PythonControl.AWimager.cellsize=125arcsec +ObsSW.Observation.ObservationControl.PythonControl.AWimager.wmax=6901.06832145 +ObsSW.Observation.ObservationControl.PythonControl.AWimager.wprojplanes=43 +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.correct.Model.Beam.Enable=T +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.correct.Model.Gain.Enable=T +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.correct.Model.Sources=[] +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.correct.Operation=CORRECT +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.correct.Output.Column=CORRECTED_DATA +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Baselines=[CR]S*& +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Model.Beam.Enable=T +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Model.Beam.Mode=ARRAY_FACTOR +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Model.Cache.Enable=T +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Model.Gain.Enable=T +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Model.Phasors.Enable=T +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Model.Sources=[] +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Operation=SOLVE +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.CalibrationGroups=[] +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.CellChunkSize=10 +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.CellSize.Freq=0 +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.CellSize.Time=1 +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.ExclParms=[] +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.Mode=PHASE +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.Options.BalancedEqs=F +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.Options.ColFactor=1e-9 +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.Options.EpsDerivative=1e-9 +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.Options.EpsValue=1e-9 +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.Options.LMFactor=1.0 +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.Options.MaxIter=20 +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.Options.UseSVD=T +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.Parms=["Gain:0:0:Phase:*","Gain:1:1:Phase:*"] +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.PropagateSolutions=F +ObsSW.Observation.ObservationControl.PythonControl.BBS.Step.solve.Solve.UVRange=[] +ObsSW.Observation.ObservationControl.PythonControl.BBS.Strategy.Baselines=*& +ObsSW.Observation.ObservationControl.PythonControl.BBS.Strategy.ChunkSize=100 +ObsSW.Observation.ObservationControl.PythonControl.BBS.Strategy.InputColumn=DATA +ObsSW.Observation.ObservationControl.PythonControl.BBS.Strategy.Steps=[solve, correct] +ObsSW.Observation.ObservationControl.PythonControl.BBS.Strategy.TimeRange=[] +ObsSW.Observation.ObservationControl.PythonControl.BBS.Strategy.UseSolver=F +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].advanced_opts=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].atrous_do=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].clobber=True +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].flagging_opts=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].interactive=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].mean_map='default' +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].multichan_opts=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].output_opts=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].polarisation_do=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].psf_vary_do=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].quiet=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].rms_box=(15.0,9.0) +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].rms_map=True +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].shapelet_do=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].spectralindex_do=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].thresh='hard' +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].thresh_isl=3.0 +ObsSW.Observation.ObservationControl.PythonControl.BDSM[0].thresh_pix=5.0 +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].advanced_opts=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].atrous_do=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].clobber=True +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].flagging_opts=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].interactive=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].mean_map='default' +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].multichan_opts=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].output_opts=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].polarisation_do=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].psf_vary_do=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].quiet=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].rms_box=None +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].rms_map=None +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].shapelet_do=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].spectralindex_do=False +ObsSW.Observation.ObservationControl.PythonControl.BDSM[1].thresh='None' +ObsSW.Observation.ObservationControl.PythonControl.DPPP.msin.baseline="[CR]S*&" +ObsSW.Observation.ObservationControl.PythonControl.DPPP.msin.datacolumn=CORRECTED_DATA +ObsSW.Observation.ObservationControl.PythonControl.DPPP.msin.missingdata=true +ObsSW.Observation.ObservationControl.PythonControl.DPPP.msin.orderms=false +ObsSW.Observation.ObservationControl.PythonControl.DPPP.steps=[] +ObsSW.Observation.ObservationControl.PythonControl.GSM.assoc_theta="" +ObsSW.Observation.ObservationControl.PythonControl.GSM.monetdb_hostname="ldb002" +ObsSW.Observation.ObservationControl.PythonControl.GSM.monetdb_name="gsm" +ObsSW.Observation.ObservationControl.PythonControl.GSM.monetdb_password="msss" +ObsSW.Observation.ObservationControl.PythonControl.GSM.monetdb_port=51000 +ObsSW.Observation.ObservationControl.PythonControl.GSM.monetdb_user="gsm" +ObsSW.Observation.ObservationControl.PythonControl.Imaging.mask_patch_size=1 +ObsSW.Observation.ObservationControl.PythonControl.Imaging.number_of_major_cycles=1 +ObsSW.Observation.ObservationControl.PythonControl.Imaging.slices_per_image=9 +ObsSW.Observation.ObservationControl.PythonControl.Imaging.subbands_per_image=10 +ObsSW.Observation.ObservationControl.PythonControl.Imaging.maxbaseline=10000 +ObsSW.Observation.ObservationControl.PythonControl.Imaging.auto_imaging_specs=false +ObsSW.Observation.ObservationControl.PythonControl.Imaging.specify_fov=false +ObsSW.Observation.ObservationControl.PythonControl.Imaging.fov=0.0 +prefix=LOFAR. \ No newline at end of file diff --git a/CEP/Pipeline/helper_scripts/selfcal.settings b/CEP/Pipeline/helper_scripts/selfcal.settings new file mode 100644 index 0000000000000000000000000000000000000000..10a8913dad412c75989f6c7544490daf640c76f5 --- /dev/null +++ b/CEP/Pipeline/helper_scripts/selfcal.settings @@ -0,0 +1,9 @@ +# lines starting with # are ignored. +# characters follwing a # on a line are ignored + +# Only the output needs to be flexible +TObservation12345 # New name of the run +/data/scratch/username/ # Output location of the data products +['locus101'] # The locus nodes the pipeline should run + +6 # number of major cycles \ No newline at end of file diff --git a/CEP/Pipeline/helper_scripts/state_to_stats.py b/CEP/Pipeline/helper_scripts/state_to_stats.py new file mode 100644 index 0000000000000000000000000000000000000000..f8828e4ce531be83f575243084735aa678677053 --- /dev/null +++ b/CEP/Pipeline/helper_scripts/state_to_stats.py @@ -0,0 +1,96 @@ +# LOFAR PIPELINE FRAMEWORK +# +# state_to_stats +# Wouter Klijn, 2014 +# klijn@astron.nl +# ------------------------------------------------------------------------------ +import os +import sys +import xml.dom.minidom as xml +import pickle + +def usage(): + usage_string = """ + state_to_stats converts a statefile for a partially succesfull run into + a valid resource usage stats file to be visualized + + usage: python state_to_stats.py <path_of_state_file> <output_path_of_stats> + """ + + print usage_string + + +def open_file_and_parse_to_python_data(path): + """ + Opens the state file and parses it to a valid xml dom + + """ + try: + + f = open(path) + data = pickle.load(f) + + except: + print "failed opening statefile: " + print path + exit(1) + + return data + + + +if __name__ == '__main__': + if len(sys.argv) < 3: + usage() + exit(1) + + state_path = sys.argv[1] # where to find the state file + xml_stats_path = sys.argv[2] # where to output the created stats file + + data = open_file_and_parse_to_python_data(state_path) + + # mirror the normal statefile + local_document = xml.Document() + xml_node = local_document.createElement("active_stack") + xml_node.setAttribute("duration", "0") + xml_node.setAttribute("type", "Manual_Created_statistics") + + # add a fake active stack (needed for later processing steps) + # This allows us to use the visualization script without + # aditional arguments + local_document = xml.Document() + step_node = local_document.createElement("active_stack") + step_node.setAttribute("type", "active_stack") + xml_node.appendChild(step_node) + + for entry in data[1]: + # parse the name and create a xml_node with this name + step_name = entry[0] + + print "processing step: {0}".format(step_name) + local_document = xml.Document() + step_node = local_document.createElement(step_name) + step_node.setAttribute("duration", "0") + step_node.setAttribute("type", "active_stack") + + # collect the statistics + step_xml_string = xml.parseString(entry[1]['return_xml']).documentElement # + + # append to the newly create node + step_node.appendChild(step_xml_string) + + # Save to the 'large' xml tree + xml_node.appendChild(step_node) + #print step_xml_string.toprettyxml() + + + f = open(xml_stats_path, 'w') + f.write(xml_node.toxml()) + print "wrote file: " + print xml_stats_path + + + + + + diff --git a/CEP/Pipeline/recipes/sip/CMakeLists.txt b/CEP/Pipeline/recipes/sip/CMakeLists.txt index b93dd298113dd18c5d97cccbdcf9c91b9c24062f..b1a0cfb4b769f9ee7f47c42e0d228d5b540e6bfc 100644 --- a/CEP/Pipeline/recipes/sip/CMakeLists.txt +++ b/CEP/Pipeline/recipes/sip/CMakeLists.txt @@ -8,6 +8,7 @@ python_install( helpers/WritableParmDB.py helpers/ComplexArray.py helpers/MultipartPostHandler.py + helpers/data_quality.py master/__init__.py master/bbs_reducer.py master/copier.py @@ -20,13 +21,16 @@ python_install( master/imager_finalize.py master/imager_prepare.py master/imager_source_finding.py + master/long_baseline.py master/new_bbs.py master/rficonsole.py master/setupparmdb.py master/setupsourcedb.py master/vdsmaker.py master/vdsreader.py - master/long_baseline.py + master/selfcal_awimager.py + master/selfcal_bbs.py + master/selfcal_finalize.py nodes/__init__.py nodes/bbs_reducer.py nodes/copier.py @@ -40,12 +44,15 @@ python_install( nodes/imager_finalize.py nodes/imager_prepare.py nodes/imager_source_finding.py + nodes/long_baseline.py nodes/new_bbs.py nodes/rficonsole.py nodes/setupparmdb.py nodes/setupsourcedb.py nodes/vdsmaker.py - nodes/long_baseline.py + nodes/selfcal_awimager.py + nodes/selfcal_bbs.py + nodes/selfcal_finalize.py DESTINATION lofarpipe/recipes) install(PROGRAMS @@ -57,6 +64,7 @@ install(PROGRAMS bin/imaging_pipeline.py bin/pulsar_pipeline.py bin/long_baseline_pipeline.py + bin/selfcal_imager_pipeline.py bin/startPython.sh bin/startPythonVersion.sh bin/stopPython.sh diff --git a/CEP/Pipeline/recipes/sip/bin/imaging_pipeline.py b/CEP/Pipeline/recipes/sip/bin/imaging_pipeline.py index f7f22a8a3f2943baf3aca4bfd0fb10d99a58a2ed..acac777e345f8683ab75231a7d50d1cc1abd4a55 100755 --- a/CEP/Pipeline/recipes/sip/bin/imaging_pipeline.py +++ b/CEP/Pipeline/recipes/sip/bin/imaging_pipeline.py @@ -155,20 +155,28 @@ class imaging_pipeline(control): # ****************************************************************** # (1) prepare phase: copy and collect the ms - concat_ms_map_path, timeslice_map_path, raw_ms_per_image_map_path, \ + concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \ processed_ms_dir = self._prepare_phase(input_mapfile, target_mapfile) - # We start with an empty source_list - source_list = "" # path to local sky model (list of 'found' sources) number_of_major_cycles = self.parset.getInt( "Imaging.number_of_major_cycles") + + # We start with an empty source_list map. It should contain n_output + # entries all set to empty strings + source_list_map_path = os.path.join(self.mapfile_dir, + "initial_sourcelist.mapfile") + source_list_map = DataMap.load(target_mapfile) # copy the output map + for item in source_list_map: + item.file = "" # set all to empty string + source_list_map.save(source_list_map_path) + for idx_loop in range(number_of_major_cycles): # ***************************************************************** # (2) Create dbs and sky model parmdbs_path, sourcedb_map_path = self._create_dbs( concat_ms_map_path, timeslice_map_path, - source_list = source_list, + source_list_map_path = source_list_map_path, skip_create_dbs = False) # ***************************************************************** @@ -197,7 +205,7 @@ class imaging_pipeline(control): # ********************************************************************* # (6) Finalize: placed_data_image_map = self._finalize(aw_image_mapfile, - processed_ms_dir, raw_ms_per_image_map_path, sourcelist_map, + processed_ms_dir, ms_per_image_map_path, sourcelist_map, minbaseline, maxbaseline, target_mapfile, output_image_mapfile, found_sourcedb_path) @@ -263,7 +271,7 @@ class imaging_pipeline(control): @xml_node def _finalize(self, awimager_output_map, processed_ms_dir, - raw_ms_per_image_map, sourcelist_map, minbaseline, + ms_per_image_map, sourcelist_map, minbaseline, maxbaseline, target_mapfile, output_image_mapfile, sourcedb_map, skip = False): """ @@ -283,7 +291,7 @@ class imaging_pipeline(control): # run the awimager recipe placed_image_mapfile = self.run_task("imager_finalize", target_mapfile, awimager_output_map = awimager_output_map, - raw_ms_per_image_map = raw_ms_per_image_map, + ms_per_image_map = ms_per_image_map, sourcelist_map = sourcelist_map, sourcedb_map = sourcedb_map, minbaseline = minbaseline, @@ -446,7 +454,7 @@ class imaging_pipeline(control): the time slices into a large virtual measurement set """ # Create the dir where found and processed ms are placed - # raw_ms_per_image_map_path contains all the original ms locations: + # ms_per_image_map_path contains all the original ms locations: # this list contains possible missing files processed_ms_dir = os.path.join(self.scratch_directory, "subbands") @@ -460,8 +468,8 @@ class imaging_pipeline(control): output_mapfile = self._write_datamap_to_file(None, "prepare_output") time_slices_mapfile = self._write_datamap_to_file(None, "prepare_time_slices") - raw_ms_per_image_mapfile = self._write_datamap_to_file(None, - "raw_ms_per_image") + ms_per_image_mapfile = self._write_datamap_to_file(None, + "ms_per_image") # get some parameters from the imaging pipeline parset: slices_per_image = self.parset.getInt("Imaging.slices_per_image") @@ -474,7 +482,7 @@ class imaging_pipeline(control): subbands_per_image = subbands_per_image, mapfile = output_mapfile, slices_mapfile = time_slices_mapfile, - raw_ms_per_image_mapfile = raw_ms_per_image_mapfile, + ms_per_image_mapfile = ms_per_image_mapfile, working_directory = self.scratch_directory, processed_ms_dir = processed_ms_dir) @@ -491,19 +499,19 @@ class imaging_pipeline(control): 'slices_mapfile') self.logger.error(error_msg) raise PipelineException(error_msg) - if not ('raw_ms_per_image_mapfile' in output_keys): + if not ('ms_per_image_mapfile' in output_keys): error_msg = "The imager_prepare master script did not"\ "return correct data. missing: {0}".format( - 'raw_ms_per_image_mapfile') + 'ms_per_image_mapfile') self.logger.error(error_msg) raise PipelineException(error_msg) # Return the mapfiles paths with processed data - return output_mapfile, outputs["slices_mapfile"], raw_ms_per_image_mapfile, \ + return output_mapfile, outputs["slices_mapfile"], ms_per_image_mapfile, \ processed_ms_dir @xml_node - def _create_dbs(self, input_map_path, timeslice_map_path, source_list = "", + def _create_dbs(self, input_map_path, timeslice_map_path, source_list_map_path, skip_create_dbs = False): """ Create for each of the concatenated input measurement sets @@ -534,7 +542,7 @@ class imaging_pipeline(control): parmdb_suffix = ".parmdb", parmdbs_map_path = parmdbs_map_path, sourcedb_map_path = sourcedb_map_path, - source_list_path = source_list, + source_list_map_path = source_list_map_path, working_directory = self.scratch_directory) return parmdbs_map_path, sourcedb_map_path diff --git a/CEP/Pipeline/recipes/sip/bin/long_baseline_pipeline.py b/CEP/Pipeline/recipes/sip/bin/long_baseline_pipeline.py index e8f12b773f23393bb2854636326b26872ef82b38..3375b5ae7a54b758b86ef2469e73c2005cf4e5c2 100644 --- a/CEP/Pipeline/recipes/sip/bin/long_baseline_pipeline.py +++ b/CEP/Pipeline/recipes/sip/bin/long_baseline_pipeline.py @@ -24,7 +24,7 @@ from lofarpipe.support.loggingdecorators import xml_node, mail_log_on_exception from lofar.parameterset import parameterset -class msss_imager_pipeline(control): +class longbaseline_pipeline(control): """ The Automatic MSSS long baselione pipeline is used to generate MSSS measurement sets combining information of multiple subbands and or @@ -78,7 +78,7 @@ class msss_imager_pipeline(control): Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ - self.logger.info("Starting imager pipeline") + self.logger.info("Starting longbaseline pipeline") # Define scratch directory to be used by the compute nodes. self.scratch_directory = os.path.join( @@ -371,4 +371,4 @@ class msss_imager_pipeline(control): if __name__ == '__main__': - sys.exit(msss_imager_pipeline().main()) + sys.exit(longbaseline_pipeline().main()) diff --git a/CEP/Pipeline/recipes/sip/bin/msss_imager_pipeline.py b/CEP/Pipeline/recipes/sip/bin/msss_imager_pipeline.py index a631a61cc0d180f4088a4a309a94eb697e803ac1..8b075b094d87a9eeeb29b077a73584357959721a 100755 --- a/CEP/Pipeline/recipes/sip/bin/msss_imager_pipeline.py +++ b/CEP/Pipeline/recipes/sip/bin/msss_imager_pipeline.py @@ -158,22 +158,28 @@ class msss_imager_pipeline(control): # ****************************************************************** # (1) prepare phase: copy and collect the ms - concat_ms_map_path, timeslice_map_path, raw_ms_per_image_map_path, \ + concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \ processed_ms_dir = self._prepare_phase(input_mapfile, target_mapfile, add_beam_tables) - # We start with an empty source_list - source_list = "" # path to local sky model (list of 'found' sources) number_of_major_cycles = self.parset.getInt( "Imaging.number_of_major_cycles") + # We start with an empty source_list map. It should contain n_output + # entries all set to empty strings + source_list_map_path = os.path.join(self.mapfile_dir, + "initial_sourcelist.mapfile") + source_list_map = DataMap.load(target_mapfile) # copy the output map + for item in source_list_map: + item.file = "" # set all to empty string + source_list_map.save(source_list_map_path) for idx_loop in range(number_of_major_cycles): # ***************************************************************** # (2) Create dbs and sky model parmdbs_path, sourcedb_map_path = self._create_dbs( concat_ms_map_path, timeslice_map_path, - source_list = source_list, + source_list_map_path = source_list_map_path, skip_create_dbs = False) # ***************************************************************** @@ -202,7 +208,7 @@ class msss_imager_pipeline(control): # ********************************************************************* # (6) Finalize: placed_data_image_map = self._finalize(aw_image_mapfile, - processed_ms_dir, raw_ms_per_image_map_path, sourcelist_map, + processed_ms_dir, ms_per_image_map_path, sourcelist_map, minbaseline, maxbaseline, target_mapfile, output_image_mapfile, found_sourcedb_path) @@ -273,7 +279,7 @@ class msss_imager_pipeline(control): @xml_node def _finalize(self, awimager_output_map, processed_ms_dir, - raw_ms_per_image_map, sourcelist_map, minbaseline, + ms_per_image_map, sourcelist_map, minbaseline, maxbaseline, target_mapfile, output_image_mapfile, sourcedb_map, skip = False): """ @@ -293,7 +299,7 @@ class msss_imager_pipeline(control): # run the awimager recipe placed_image_mapfile = self.run_task("imager_finalize", target_mapfile, awimager_output_map = awimager_output_map, - raw_ms_per_image_map = raw_ms_per_image_map, + ms_per_image_map = ms_per_image_map, sourcelist_map = sourcelist_map, sourcedb_map = sourcedb_map, minbaseline = minbaseline, @@ -457,7 +463,7 @@ class msss_imager_pipeline(control): the time slices into a large virtual measurement set """ # Create the dir where found and processed ms are placed - # raw_ms_per_image_map_path contains all the original ms locations: + # ms_per_image_map_path contains all the original ms locations: # this list contains possible missing files processed_ms_dir = os.path.join(self.scratch_directory, "subbands") @@ -471,8 +477,8 @@ class msss_imager_pipeline(control): output_mapfile = self._write_datamap_to_file(None, "prepare_output") time_slices_mapfile = self._write_datamap_to_file(None, "prepare_time_slices") - raw_ms_per_image_mapfile = self._write_datamap_to_file(None, - "raw_ms_per_image") + ms_per_image_mapfile = self._write_datamap_to_file(None, + "ms_per_image") # get some parameters from the imaging pipeline parset: slices_per_image = self.parset.getInt("Imaging.slices_per_image") @@ -485,7 +491,7 @@ class msss_imager_pipeline(control): subbands_per_image = subbands_per_image, mapfile = output_mapfile, slices_mapfile = time_slices_mapfile, - raw_ms_per_image_mapfile = raw_ms_per_image_mapfile, + ms_per_image_mapfile = ms_per_image_mapfile, working_directory = self.scratch_directory, processed_ms_dir = processed_ms_dir, add_beam_tables = add_beam_tables) @@ -503,19 +509,19 @@ class msss_imager_pipeline(control): 'slices_mapfile') self.logger.error(error_msg) raise PipelineException(error_msg) - if not ('raw_ms_per_image_mapfile' in output_keys): + if not ('ms_per_image_mapfile' in output_keys): error_msg = "The imager_prepare master script did not"\ "return correct data. missing: {0}".format( - 'raw_ms_per_image_mapfile') + 'ms_per_image_mapfile') self.logger.error(error_msg) raise PipelineException(error_msg) # Return the mapfiles paths with processed data - return output_mapfile, outputs["slices_mapfile"], raw_ms_per_image_mapfile, \ + return output_mapfile, outputs["slices_mapfile"], ms_per_image_mapfile, \ processed_ms_dir @xml_node - def _create_dbs(self, input_map_path, timeslice_map_path, source_list = "", + def _create_dbs(self, input_map_path, timeslice_map_path, source_list_map_path, skip_create_dbs = False): """ Create for each of the concatenated input measurement sets @@ -546,7 +552,7 @@ class msss_imager_pipeline(control): parmdb_suffix = ".parmdb", parmdbs_map_path = parmdbs_map_path, sourcedb_map_path = sourcedb_map_path, - source_list_path = source_list, + source_list_map_path = source_list_map_path, working_directory = self.scratch_directory) return parmdbs_map_path, sourcedb_map_path diff --git a/CEP/Pipeline/recipes/sip/bin/selfcal_imager_pipeline.py b/CEP/Pipeline/recipes/sip/bin/selfcal_imager_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..0d1e725ddf7cf44fffcea1dd13944c229ec07230 --- /dev/null +++ b/CEP/Pipeline/recipes/sip/bin/selfcal_imager_pipeline.py @@ -0,0 +1,870 @@ +#!/usr/bin/env python +# LOFAR IMAGING PIPELINE +# +# selfcal Pipeline recipe +# Marcel Loose, 2012 +# loose@astron.nl +# Wouter Klijn, 2012 +# klijn@astron.nl +# Nicolas Vilchez, 2014 +# vilchez@astron.nl +# ----------------------------------------------------------------------------- +import os +import sys +import copy +import shutil + +from lofarpipe.support.control import control +from lofarpipe.support.utilities import create_directory +from lofarpipe.support.lofarexceptions import PipelineException +from lofarpipe.support.data_map import DataMap, validate_data_maps,\ + MultiDataMap, align_data_maps +from lofarpipe.support.utilities import patch_parset, get_parset +from lofarpipe.support.loggingdecorators import xml_node, mail_log_on_exception + +from lofar.parameterset import parameterset + + +class selfcal_imager_pipeline(control): + """ + The self calibration pipeline is used to generate images and find + sources in the generated images. Generated images and lists of found + sources are complemented with meta data and thus ready for consumption by + the Long Term Storage (LTA) + + *subband groups* + The imager_pipeline is able to generate images on the frequency range of + LOFAR in parallel. Combining the frequency subbands together in so called + subbandgroups. Each subband group will result in an image and sourcelist, + (typically 8, because ten subband groups are combined). + + *Time Slices* + selfcal images are compiled from a number of so-called (time) slices. Each + slice comprises a short (approx. 10 min) observation of a field (an area on + the sky) containing typically 80 subbands. The number of slices will be + different for LBA observations (typically 9) and HBA observations + (typically 2), due to differences in sensitivity. + + Each image will be compiled on a different cluster node to balance the + processing load. The input- and output- files and locations are determined + by the scheduler and specified in the parset-file. + + **This pipeline performs the following operations:** + + 1. Prepare Phase. Copy the preprocessed MS's from the different compute + nodes to the nodes where the images will be compiled (the prepare phase) + Combine the subbands in subband groups, concattenate the timeslice in a + single large measurement set and perform flagging, RFI and bad station + exclusion. + 2. Create db. Generate a local sky model (LSM) from the global sky model + (GSM) for the sources that are in the field-of-view (FoV). The LSM + is stored as sourcedb. + In step 3 calibration of the measurement sets is performed on these + sources and in step 4 to create a mask for the awimager. The calibration + solution will be placed in an instrument table/db also created in this + step. + 3. BBS. Calibrate the measurement set with the sourcedb from the gsm. + In later iterations sourced found in the created images will be added + to this list. Resulting in a selfcalibration cycle. + 4. Awimager. The combined measurement sets are now imaged. The imaging + is performed using a mask: The sources in the sourcedb are used to + create an casa image masking known sources. Together with the + measurement set an image is created. + 5. Sourcefinding. The images created in step 4 are fed to pyBDSM to find + and describe sources. In multiple itterations substracting the found + sources, all sources are collectedin a sourcelist. + Step I. The sources found in step 5 are fed back into step 2. + This allows the Measurement sets to be calibrated with sources currently + found in the image. This loop will continue until convergence (3 times + for the time being). + 6. Finalize. Meta data with regards to the input, computations performed + and results are collected an added to the casa image. The images created + are converted from casa to HDF5 and copied to the correct output + location. + 7. Export meta data: An outputfile with meta data is generated ready for + consumption by the LTA and/or the LOFAR framework. + + + **Per subband-group, the following output products will be delivered:** + + a. An image + b. A source list + c. (Calibration solutions and corrected visibilities) + + """ + def __init__(self): + """ + Initialize member variables and call superclass init function + """ + control.__init__(self) + self.parset = parameterset() + self.input_data = DataMap() + self.target_data = DataMap() + self.output_data = DataMap() + self.output_correlated_data = DataMap() + self.scratch_directory = None + self.parset_feedback_file = None + self.parset_dir = None + self.mapfile_dir = None + + def usage(self): + """ + Display usage information + """ + print >> sys.stderr, "Usage: %s <parset-file> [options]" % sys.argv[0] + return 1 + + def go(self): + """ + Read the parset-file that was given as input argument, and set the + jobname before calling the base-class's `go()` method. + """ + try: + parset_file = os.path.abspath(self.inputs['args'][0]) + except IndexError: + return self.usage() + self.parset.adoptFile(parset_file) + self.parset_feedback_file = parset_file + "_feedback" + # Set job-name to basename of parset-file w/o extension, if it's not + # set on the command-line with '-j' or '--job-name' + if not 'job_name' in self.inputs: + self.inputs['job_name'] = ( + os.path.splitext(os.path.basename(parset_file))[0] + ) + return super(selfcal_imager_pipeline, self).go() + + @mail_log_on_exception + def pipeline_logic(self): + """ + Define the individual tasks that comprise the current pipeline. + This method will be invoked by the base-class's `go()` method. + """ + self.logger.info("Starting imager pipeline") + + # Define scratch directory to be used by the compute nodes. + self.scratch_directory = os.path.join( + self.inputs['working_directory'], self.inputs['job_name']) + # Get input/output-data products specifications. + self._get_io_product_specs() + + # remove prepending parset identifiers, leave only pipelinecontrol + full_parset = self.parset + self.parset = self.parset.makeSubset( + self.parset.fullModuleName('PythonControl') + '.') # remove this + + # Create directories to store communication and data files + + job_dir = self.config.get("layout", "job_directory") + + self.parset_dir = os.path.join(job_dir, "parsets") + create_directory(self.parset_dir) + self.mapfile_dir = os.path.join(job_dir, "mapfiles") + create_directory(self.mapfile_dir) + + # ********************************************************************* + # (INPUT) Get the input from external sources and create pipeline types + # Input measure ment sets + input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile") + self.input_data.save(input_mapfile) + # storedata_map(input_mapfile, self.input_data) + self.logger.debug( + "Wrote input UV-data mapfile: {0}".format(input_mapfile)) + + # Provides location for the scratch directory and concat.ms location + target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile") + self.target_data.save(target_mapfile) + self.logger.debug( + "Wrote target mapfile: {0}".format(target_mapfile)) + + # images datafiles + output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile") + self.output_data.save(output_image_mapfile) + self.logger.debug( + "Wrote output sky-image mapfile: {0}".format(output_image_mapfile)) + + # Location of the output measurement set + output_correlated_mapfile = os.path.join(self.mapfile_dir, + "correlated.mapfile") + self.output_correlated_data.save(output_correlated_mapfile) + self.logger.debug( + "Wrote output correlated mapfile: {0}".format(output_correlated_mapfile)) + + # Get pipeline parameters from the toplevel recipe + # TODO: This is a backdoor option to manually add beamtables when these + # are missing on the provided ms. There is NO use case for users of the + # pipeline + add_beam_tables = self.parset.getBool( + "Imaging.addBeamTables", False) + + + number_of_major_cycles = self.parset.getInt( + "Imaging.number_of_major_cycles") + + # Almost always a users wants a partial succes above a failed pipeline + output_result_of_last_succesfull_cycle = self.parset.getBool( + "Imaging.output_on_error", True) + + + if number_of_major_cycles < 3: + self.logger.error( + "The number of major cycles must be 3 or higher, correct" + " the key: Imaging.number_of_major_cycles") + raise PipelineException( + "Incorrect number_of_major_cycles in the parset") + + + # ****************************************************************** + # (1) prepare phase: copy and collect the ms + concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \ + processed_ms_dir = self._prepare_phase(input_mapfile, + target_mapfile, add_beam_tables) + + # We start with an empty source_list map. It should contain n_output + # entries all set to empty strings + source_list_map_path = os.path.join(self.mapfile_dir, + "initial_sourcelist.mapfile") + source_list_map = DataMap.load(target_mapfile) # copy the output map + for item in source_list_map: + item.file = "" # set all to empty string + source_list_map.save(source_list_map_path) + + succesfull_cycle_mapfiles_dict = None + for idx_cycle in range(number_of_major_cycles): + try: + # ***************************************************************** + # (2) Create dbs and sky model + parmdbs_path, sourcedb_map_path = self._create_dbs( + concat_ms_map_path, timeslice_map_path, idx_cycle, + source_list_map_path = source_list_map_path, + skip_create_dbs = False) + + + # ***************************************************************** + # (3) bbs_imager recipe. + bbs_output = self._bbs(concat_ms_map_path, timeslice_map_path, + parmdbs_path, sourcedb_map_path, idx_cycle, skip = False) + + + # TODO: Extra recipe: concat timeslices using pyrap.concatms + # (see prepare) redmine issue #6021 + # Done in imager_bbs.p at the node level after calibration + + # ***************************************************************** + # (4) Get parameters awimager from the prepare_parset and inputs + aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path, + idx_cycle, sourcedb_map_path, number_of_major_cycles, + skip = False) + + # ***************************************************************** + # (5) Source finding + source_list_map_path, found_sourcedb_path = self._source_finding( + aw_image_mapfile, idx_cycle, skip = False) + # should the output be a sourcedb? instead of a sourcelist + + # save the active mapfiles: locations and content + # Used to output last succesfull cycle on error + mapfiles_to_save = {'aw_image_mapfile':aw_image_mapfile, + 'source_list_map_path':source_list_map_path, + 'found_sourcedb_path':found_sourcedb_path, + 'concat_ms_map_path':concat_ms_map_path} + succesfull_cycle_mapfiles_dict = self._save_active_mapfiles(idx_cycle, + self.mapfile_dir, mapfiles_to_save) + + # On exception there is the option to output the results of the + # last cycle without errors + except KeyboardInterrupt, ex: + raise ex + + except Exception, ex: + self.logger.error("Encountered an fatal exception during self" + "calibration. Aborting processing and return" + " the last succesfull cycle results") + self.logger.error(str(ex)) + + # if we are in the first cycle always exit with exception + if idx_cycle == 0: + raise ex + + if not output_result_of_last_succesfull_cycle: + raise ex + + # restore the mapfile variables + aw_image_mapfile = succesfull_cycle_mapfiles_dict['aw_image_mapfile'] + source_list_map_path = succesfull_cycle_mapfiles_dict['source_list_map_path'] + found_sourcedb_path = succesfull_cycle_mapfiles_dict['found_sourcedb_path'] + concat_ms_map_path = succesfull_cycle_mapfiles_dict['concat_ms_map_path'] + + # set the number_of_major_cycles to the correct number + number_of_major_cycles = idx_cycle - 1 + max_cycles_reached = False + break + else: + max_cycles_reached = True + + + # TODO: minbaseline should be a parset value as is maxbaseline.. + minbaseline = 0 + + # ********************************************************************* + # (6) Finalize: + placed_data_image_map, placed_correlated_map = \ + self._finalize(aw_image_mapfile, + processed_ms_dir, ms_per_image_map_path, source_list_map_path, + minbaseline, maxbaseline, target_mapfile, output_image_mapfile, + found_sourcedb_path, concat_ms_map_path, output_correlated_mapfile) + + # ********************************************************************* + # (7) Get metadata + # create a parset with information that is available on the toplevel + + self._get_meta_data(number_of_major_cycles, placed_data_image_map, + placed_correlated_map, full_parset, + max_cycles_reached) + + + return 0 + + def _save_active_mapfiles(self, cycle_idx, mapfile_dir, mapfiles = {}): + """ + receives a dict with active mapfiles, var name to path + Each mapfile is copier to a seperate directory and saved + THis allows us to exit the last succesfull run + """ + # create a directory for storing the saved mapfiles, use cycle idx + mapfile_for_cycle_dir = os.path.join(mapfile_dir, "cycle_" + str(cycle_idx)) + create_directory(mapfile_for_cycle_dir) + + saved_mapfiles = {} + for (var_name,mapfile_path) in mapfiles.items(): + shutil.copy(mapfile_path, mapfile_for_cycle_dir) + # save the newly created file, get the filename, and append it + # to the directory name + saved_mapfiles[var_name] = os.path.join(mapfile_for_cycle_dir, + os.path.basename(mapfile_path)) + + return saved_mapfiles + + + + + def _get_meta_data(self, number_of_major_cycles, placed_data_image_map, + placed_correlated_map, full_parset, max_cycles_reached): + """ + Function combining all the meta data collection steps of the processing + """ + parset_prefix = full_parset.getString('prefix') + \ + full_parset.fullModuleName('DataProducts') + + toplevel_meta_data = parameterset() + toplevel_meta_data.replace( + parset_prefix + ".numberOfMajorCycles", + str(number_of_major_cycles)) + toplevel_meta_data_path = os.path.join( + self.parset_dir, "toplevel_meta_data.parset") + + toplevel_meta_data.replace(parset_prefix + ".max_cycles_reached", + str(max_cycles_reached)) + + try: + toplevel_meta_data.writeFile(toplevel_meta_data_path) + self.logger.info("Wrote meta data to: " + + toplevel_meta_data_path) + except RuntimeError, err: + self.logger.error( + "Failed to write toplevel meta information parset: %s" % str( + toplevel_meta_data_path)) + return 1 + + skyimage_metadata = "%s_feedback_SkyImage" % (self.parset_file,) + correlated_metadata = "%s_feedback_Correlated" % (self.parset_file,) + + # Create a parset-file containing the metadata for MAC/SAS at nodes + self.run_task("get_metadata", placed_data_image_map, + parset_prefix = parset_prefix, + product_type = "SkyImage", + metadata_file = skyimage_metadata) + + self.run_task("get_metadata", placed_correlated_map, + parset_prefix = parset_prefix, + product_type = "Correlated", + metadata_file = correlated_metadata) + + self.send_feedback_processing(toplevel_meta_data) + self.send_feedback_dataproducts(parameterset(skyimage_metadata)) + self.send_feedback_dataproducts(parameterset(correlated_metadata)) + + def _get_io_product_specs(self): + """ + Get input- and output-data product specifications from the + parset-file, and do some sanity checks. + """ + dps = self.parset.makeSubset( + self.parset.fullModuleName('DataProducts') + '.' + ) + # convert input dataproducts from parset value to DataMap + self.input_data = DataMap([ + tuple(os.path.join(location, filename).split(':')) + (skip,) + for location, filename, skip in zip( + dps.getStringVector('Input_Correlated.locations'), + dps.getStringVector('Input_Correlated.filenames'), + dps.getBoolVector('Input_Correlated.skip')) + ]) + self.logger.debug("%d Input_Correlated data products specified" % + len(self.input_data)) + + self.output_data = DataMap([ + tuple(os.path.join(location, filename).split(':')) + (skip,) + for location, filename, skip in zip( + dps.getStringVector('Output_SkyImage.locations'), + dps.getStringVector('Output_SkyImage.filenames'), + dps.getBoolVector('Output_SkyImage.skip')) + ]) + self.logger.debug("%d Output_SkyImage data products specified" % + len(self.output_data)) + + self.output_correlated_data = DataMap([ + tuple(os.path.join(location, filename).split(':')) + (skip,) + for location, filename, skip in zip( + dps.getStringVector('Output_Correlated.locations'), + dps.getStringVector('Output_Correlated.filenames'), + dps.getBoolVector('Output_Correlated.skip')) + ]) + + # assure that the two output maps contain the same skip fields + align_data_maps( self.output_data, self.output_correlated_data) + + self.logger.debug("%d Output_Correlated data products specified" % + len(self.output_correlated_data)) + + # # Sanity checks on input- and output data product specifications + # if not validate_data_maps(self.input_data, self.output_data): + # raise PipelineException( + # "Validation of input/output data product specification failed!" + # )#Turned off untill DataMap is extended.. + + # Target data is basically scratch data, consisting of one concatenated + # MS per image. It must be stored on the same host as the final image. + self.target_data = copy.deepcopy(self.output_data) + + for item in self.target_data: + item.file = os.path.join(self.scratch_directory, 'concat.ms') + + + @xml_node + def _finalize(self, awimager_output_map, processed_ms_dir, + ms_per_image_map, sourcelist_map, minbaseline, + maxbaseline, target_mapfile, + output_image_mapfile, sourcedb_map, concat_ms_map_path, + output_correlated_mapfile, skip = False): + """ + Perform the final step of the imager: + Convert the output image to hdf5 and copy to output location + Collect meta data and add to the image + """ + + placed_image_mapfile = self._write_datamap_to_file(None, + "placed_image") + self.logger.debug("Touched mapfile for correctly placed" + " hdf images: {0}".format(placed_image_mapfile)) + + placed_correlated_mapfile = self._write_datamap_to_file(None, + "placed_correlated") + self.logger.debug("Touched mapfile for correctly placed" + " correlated datasets: {0}".format(placed_correlated_mapfile)) + + if skip: + return placed_image_mapfile, placed_correlated_mapfile + else: + # run the awimager recipe + outputs = self.run_task("selfcal_finalize", + target_mapfile, awimager_output_map = awimager_output_map, + ms_per_image_map = ms_per_image_map, + sourcelist_map = sourcelist_map, + sourcedb_map = sourcedb_map, + minbaseline = minbaseline, + maxbaseline = maxbaseline, + target_mapfile = target_mapfile, + output_image_mapfile = output_image_mapfile, + processed_ms_dir = processed_ms_dir, + placed_image_mapfile = placed_image_mapfile, + placed_correlated_mapfile = placed_correlated_mapfile, + concat_ms_map_path = concat_ms_map_path, + output_correlated_mapfile = output_correlated_mapfile + ) + + return outputs["placed_image_mapfile"], \ + outputs["placed_correlated_mapfile"] + + @xml_node + def _source_finding(self, image_map_path, major_cycle, skip = True): + """ + Perform the sourcefinding step + """ + # Create the parsets for the different sourcefinder runs + bdsm_parset_pass_1 = self.parset.makeSubset("BDSM[0].") + + self._selfcal_modify_parset(bdsm_parset_pass_1, "pybdsm_first_pass.par") + parset_path_pass_1 = self._write_parset_to_file(bdsm_parset_pass_1, + "pybdsm_first_pass.par", "Sourcefinder first pass parset.") + + bdsm_parset_pass_2 = self.parset.makeSubset("BDSM[1].") + self._selfcal_modify_parset(bdsm_parset_pass_2, "pybdsm_second_pass.par") + parset_path_pass_2 = self._write_parset_to_file(bdsm_parset_pass_2, + "pybdsm_second_pass.par", "sourcefinder second pass parset") + + # touch a mapfile to be filled with created sourcelists + source_list_map = self._write_datamap_to_file(None, + "source_finding_outputs", + "map to sourcefinding outputs (sourcelist)") + sourcedb_map_path = self._write_datamap_to_file(None, + "source_dbs_outputs", "Map to sourcedbs based in found sources") + + # construct the location to save the output products of the + # sourcefinder + cycle_path = os.path.join(self.scratch_directory, + "awimage_cycle_{0}".format(major_cycle)) + catalog_path = os.path.join(cycle_path, "bdsm_catalog") + sourcedb_path = os.path.join(cycle_path, "bdsm_sourcedb") + + # Run the sourcefinder + if skip: + return source_list_map, sourcedb_map_path + else: + self.run_task("imager_source_finding", + image_map_path, + bdsm_parset_file_run1 = parset_path_pass_1, + bdsm_parset_file_run2x = parset_path_pass_2, + working_directory = self.scratch_directory, + catalog_output_path = catalog_path, + mapfile = source_list_map, + sourcedb_target_path = sourcedb_path, + sourcedb_map_path = sourcedb_map_path + ) + + return source_list_map, sourcedb_map_path + + @xml_node + def _bbs(self, concat_ms_map_path, timeslice_map_path, parmdbs_map_path, sourcedb_map_path, + major_cycle, skip = False): + """ + Perform a calibration step. First with a set of sources from the + gsm and in later iterations also on the found sources + """ + # create parset for bbs run + parset = self.parset.makeSubset("BBS.") + self._selfcal_modify_parset(parset, "bbs") + parset_path = self._write_parset_to_file(parset, "bbs", + "Parset for calibration with a local sky model") + + # create the output file path + output_mapfile = self._write_datamap_to_file(None, "bbs_output", + "Mapfile with calibrated measurement sets.") + + converted_sourcedb_map_path = self._write_datamap_to_file(None, + "source_db", "correctly shaped mapfile for input sourcedbs") + + if skip: + return output_mapfile + + # The create db step produces a mapfile with a single sourcelist for + # the different timeslices. Generate a mapfile with copies of the + # sourcelist location: This allows validation of maps in combination + # get the original map data + sourcedb_map = DataMap.load(sourcedb_map_path) + parmdbs_map = MultiDataMap.load(parmdbs_map_path) + converted_sourcedb_map = [] + + # sanity check for correcy output from previous recipes + if not validate_data_maps(sourcedb_map, parmdbs_map): + self.logger.error("The input files for bbs do not contain " + "matching host names for each entry content:") + self.logger.error(repr(sourcedb_map)) + self.logger.error(repr(parmdbs_map)) + raise PipelineException("Invalid input data for imager_bbs recipe") + + self.run_task("selfcal_bbs", + timeslice_map_path, + parset = parset_path, + instrument_mapfile = parmdbs_map_path, + sourcedb_mapfile = sourcedb_map_path, + mapfile = output_mapfile, + working_directory = self.scratch_directory, + concat_ms_map_path=concat_ms_map_path, + major_cycle=major_cycle) + + return output_mapfile + + @xml_node + def _aw_imager(self, prepare_phase_output, major_cycle, sky_path, + number_of_major_cycles, skip = False): + """ + Create an image based on the calibrated, filtered and combined data. + """ + # Create parset for the awimage recipe + parset = self.parset.makeSubset("AWimager.") + # Get maxbaseline from 'full' parset + max_baseline = self.parset.getInt("Imaging.maxbaseline") + patch_dictionary = {"maxbaseline": str( + max_baseline)} + try: + temp_parset_filename = patch_parset(parset, patch_dictionary) + aw_image_parset = get_parset(temp_parset_filename) + aw_image_parset_path = self._write_parset_to_file(aw_image_parset, + "awimager_cycle_{0}".format(major_cycle), + "Awimager recipe parset") + finally: + # remove tempfile + os.remove(temp_parset_filename) + + # Create path to write the awimage files + intermediate_image_path = os.path.join(self.scratch_directory, + "awimage_cycle_{0}".format(major_cycle), "image") + + output_mapfile = self._write_datamap_to_file(None, "awimager", + "output map for awimager recipe") + + mask_patch_size = self.parset.getInt("Imaging.mask_patch_size") + autogenerate_parameters = self.parset.getBool( + "Imaging.auto_imaging_specs") + specify_fov = self.parset.getBool( + "Imaging.specify_fov") + if skip: + pass + else: + # run the awimager recipe + self.run_task("selfcal_awimager", prepare_phase_output, + parset = aw_image_parset_path, + mapfile = output_mapfile, + output_image = intermediate_image_path, + mask_patch_size = mask_patch_size, + sourcedb_path = sky_path, + working_directory = self.scratch_directory, + autogenerate_parameters = autogenerate_parameters, + specify_fov = specify_fov, major_cycle = major_cycle, + nr_cycles = number_of_major_cycles, + perform_self_cal = True) + + return output_mapfile, max_baseline + + @xml_node + def _prepare_phase(self, input_ms_map_path, target_mapfile, + add_beam_tables): + """ + Copy ms to correct location, combine the ms in slices and combine + the time slices into a large virtual measurement set + """ + # Create the dir where found and processed ms are placed + # ms_per_image_map_path contains all the original ms locations: + # this list contains possible missing files + processed_ms_dir = os.path.join(self.scratch_directory, "subbands") + + # get the parameters, create a subset for ndppp, save + # Aditional parameters are added runtime on the node, based on data + ndppp_parset = self.parset.makeSubset("DPPP.") + ndppp_parset_path = self._write_parset_to_file(ndppp_parset, + "prepare_imager_ndppp", "parset for ndpp recipe") + + # create the output file paths + # [1] output -> prepare_output + output_mapfile = self._write_datamap_to_file(None, "prepare_output") + time_slices_mapfile = self._write_datamap_to_file(None, + "prepare_time_slices") + ms_per_image_mapfile = self._write_datamap_to_file(None, + "ms_per_image") + + # get some parameters from the imaging pipeline parset: + slices_per_image = self.parset.getInt("Imaging.slices_per_image") + subbands_per_image = self.parset.getInt("Imaging.subbands_per_image") + + outputs = self.run_task("imager_prepare", input_ms_map_path, + parset = ndppp_parset_path, + target_mapfile = target_mapfile, + slices_per_image = slices_per_image, + subbands_per_image = subbands_per_image, + mapfile = output_mapfile, + slices_mapfile = time_slices_mapfile, + ms_per_image_mapfile = ms_per_image_mapfile, + working_directory = self.scratch_directory, + processed_ms_dir = processed_ms_dir, + add_beam_tables = add_beam_tables, + do_rficonsole = False) + + # validate that the prepare phase produced the correct data + output_keys = outputs.keys() + if not ('mapfile' in output_keys): + error_msg = "The imager_prepare master script did not"\ + "return correct data. missing: {0}".format('mapfile') + self.logger.error(error_msg) + raise PipelineException(error_msg) + if not ('slices_mapfile' in output_keys): + error_msg = "The imager_prepare master script did not"\ + "return correct data. missing: {0}".format( + 'slices_mapfile') + self.logger.error(error_msg) + raise PipelineException(error_msg) + + if not ('ms_per_image_mapfile' in output_keys): + error_msg = "The imager_prepare master script did not"\ + "return correct data. missing: {0}".format( + 'ms_per_image_mapfile') + self.logger.error(error_msg) + raise PipelineException(error_msg) + + # Return the mapfiles paths with processed data + return output_mapfile, outputs["slices_mapfile"], ms_per_image_mapfile, \ + processed_ms_dir + + @xml_node + def _create_dbs(self, input_map_path, timeslice_map_path, + major_cycle, source_list_map_path , + skip_create_dbs = False): + """ + Create for each of the concatenated input measurement sets + an instrument model and parmdb + """ + # Create the parameters set + parset = self.parset.makeSubset("GSM.") + + # create the files that will contain the output of the recipe + parmdbs_map_path = self._write_datamap_to_file(None, "parmdbs", + "parmdbs output mapfile") + sourcedb_map_path = self._write_datamap_to_file(None, "sky_files", + "source db output mapfile") + + # run the master script + if skip_create_dbs: + pass + else: + self.run_task("imager_create_dbs", input_map_path, + monetdb_hostname = parset.getString("monetdb_hostname"), + monetdb_port = parset.getInt("monetdb_port"), + monetdb_name = parset.getString("monetdb_name"), + monetdb_user = parset.getString("monetdb_user"), + monetdb_password = parset.getString("monetdb_password"), + assoc_theta = parset.getString("assoc_theta"), + sourcedb_suffix = ".sourcedb", + slice_paths_mapfile = timeslice_map_path, + parmdb_suffix = ".parmdb", + parmdbs_map_path = parmdbs_map_path, + sourcedb_map_path = sourcedb_map_path, + source_list_map_path = source_list_map_path, + working_directory = self.scratch_directory, + major_cycle = major_cycle) + + return parmdbs_map_path, sourcedb_map_path + + # TODO: Move these helpers to the parent class + def _write_parset_to_file(self, parset, parset_name, message): + """ + Write the suplied the suplied parameterset to the parameter set + directory in the jobs dir with the filename suplied in parset_name. + Return the full path to the created file. + """ + parset_dir = os.path.join( + self.config.get("layout", "job_directory"), "parsets") + # create the parset dir if it does not exist + create_directory(parset_dir) + + # write the content to a new parset file + parset_path = os.path.join(parset_dir, + "{0}.parset".format(parset_name)) + parset.writeFile(parset_path) + + # display a debug log entrie with path and message + self.logger.debug("Wrote parset to path <{0}> : {1}".format( + parset_path, message)) + + return parset_path + + def _write_datamap_to_file(self, datamap, mapfile_name, message = ""): + """ + Write the suplied the suplied map to the mapfile. + directory in the jobs dir with the filename suplied in mapfile_name. + Return the full path to the created file. + If suplied data is None then the file is touched if not existing, but + existing files are kept as is + """ + + mapfile_dir = os.path.join( + self.config.get("layout", "job_directory"), "mapfiles") + # create the mapfile_dir if it does not exist + create_directory(mapfile_dir) + + # write the content to a new parset file + mapfile_path = os.path.join(mapfile_dir, + "{0}.map".format(mapfile_name)) + + # display a debug log entrie with path and message + if datamap != None: + datamap.save(mapfile_path) + + self.logger.debug( + "Wrote mapfile <{0}>: {1}".format(mapfile_path, message)) + else: + if not os.path.exists(mapfile_path): + DataMap().save(mapfile_path) + + self.logger.debug( + "Touched mapfile <{0}>: {1}".format(mapfile_path, message)) + + return mapfile_path + + + # This functionality should be moved outside into MOM/ default template. + # This is now a static we should be able to control this. + def _selfcal_modify_parset(self, parset, parset_name): + """ + Modification of the BBS parset for selfcal implementation, add, + remove, modify some values in bbs parset, done by + done by Nicolas Vilchez + """ + + if parset_name == "bbs": + + parset.replace('Step.solve.Model.Beam.UseChannelFreq', 'True') + parset.replace('Step.solve.Model.Ionosphere.Enable', 'F') + parset.replace('Step.solve.Model.TEC.Enable', 'F') + parset.replace('Step.correct.Model.Beam.UseChannelFreq', 'True') + parset.replace('Step.correct.Model.TEC.Enable', 'F') + parset.replace('Step.correct.Model.Phasors.Enable', 'T') + parset.replace('Step.correct.Output.WriteCovariance', 'T') + + #must be erased, by default I replace to the default value + parset.replace('Step.solve.Baselines', '*&') + + parset.replace('Step.solve.Solve.Mode', 'COMPLEX') + parset.replace('Step.solve.Solve.CellChunkSize', '100') + parset.replace('Step.solve.Solve.PropagateSolutions', 'F') + parset.replace('Step.solve.Solve.Options.MaxIter', '100') + + + if parset_name == "pybdsm_first_pass.par": + + parset.replace('advanced_opts', 'True') + parset.replace('atrous_do', 'True') + parset.replace('rms_box', '(80.0,15.0)') + parset.replace('thresh_isl', '5') + parset.replace('thresh_pix', '5') + parset.replace('adaptive_rms_box', 'True') + parset.replace('blank_limit', '1E-4') + parset.replace('ini_method', 'curvature') + parset.replace('atrous_do', 'True') + parset.replace('thresh', 'hard') + + + if parset_name == "pybdsm_second_pass.par": + + parset.replace('advanced_opts', 'True') + parset.replace('atrous_do', 'True') + parset.replace('rms_box', '(80.0,15.0)') + parset.replace('thresh_isl', '5') + parset.replace('thresh_pix', '5') + parset.replace('adaptive_rms_box', 'True') + parset.replace('blank_limit', '1E-4') + parset.replace('ini_method', 'curvature') + parset.replace('atrous_do', 'True') + parset.replace('thresh', 'hard') + + +if __name__ == '__main__': + sys.exit(selfcal_imager_pipeline().main()) diff --git a/CEP/Pipeline/recipes/sip/helpers/data_quality.py b/CEP/Pipeline/recipes/sip/helpers/data_quality.py new file mode 100644 index 0000000000000000000000000000000000000000..26e0491ed409fa9daffcd1ff31bb890dfaec48b5 --- /dev/null +++ b/CEP/Pipeline/recipes/sip/helpers/data_quality.py @@ -0,0 +1,149 @@ +# LOFAR IMAGING PIPELINE +# +# Helper function used for ms +# quality validation and filtering +# Wouter Klijn: 2015 +# klijn@astron.nl +# ----------------------------------------------------------------------------- +import sys +import shutil +import os + +from lofarpipe.support.subprocessgroup import SubProcessGroup +from lofarpipe.support.utilities import create_directory + +def run_rficonsole(rficonsole_executable, temp_dir, + input_ms_list, logger, resourceMonitor): + """ + _run_rficonsole runs the rficonsole application on the supplied + timeslices in time_slices. + This functionality has also been implemented in BBS. + """ + + # loop all measurement sets + rfi_temp_dir = os.path.join(temp_dir, "rfi_temp_dir") + create_directory(rfi_temp_dir) + + try: + rfi_console_proc_group = SubProcessGroup(logger=logger, + usageStats=resourceMonitor) + for time_slice in input_ms_list: + # Each rfi console needs own working space for temp files + temp_slice_path = os.path.join(rfi_temp_dir, + os.path.basename(time_slice)) + create_directory(temp_slice_path) + + # construct copy command + logger.info(time_slice) + command = [rficonsole_executable, "-indirect-read", + time_slice] + logger.info("executing rficonsole command: {0}".format( + " ".join(command))) + + # Add the command to the process group + rfi_console_proc_group.run(command, cwd = temp_slice_path) + + + # wait for all to finish + if rfi_console_proc_group.wait_for_finish() != None: + raise Exception("an rfi_console_proc_group run failed!") + + finally: + shutil.rmtree(rfi_temp_dir) + + +def filter_bad_stations(input_ms_list, + asciistat_executable, statplot_executable, msselect_executable, + logger, resourceMonitor): + """ + A Collection of scripts for finding and filtering of bad stations: + + 1. First a number of statistics with regards to the spread of the data + is collected using the asciistat_executable. + 2. Secondly these statistics are consumed by the statplot_executable + which produces a set of bad stations. + 3. In the final step the bad stations are removed from the dataset + using ms select + + REF: http://www.lofar.org/wiki/lib/exe/fetch.php?media=msss:pandeymartinez-week9-v1p2.pdf + """ + # run asciistat to collect statistics about the ms + logger.info("Filtering bad stations") + logger.debug("Collecting statistical properties of input data") + asciistat_output = [] + asciistat_proc_group = SubProcessGroup(logger=logger, + usageStats=resourceMonitor) + for ms in input_ms_list: + output_dir = ms + ".filter_temp" + create_directory(output_dir) + asciistat_output.append((ms, output_dir)) + + cmd_string = "{0} -i {1} -r {2}".format(asciistat_executable, + ms, output_dir) + asciistat_proc_group.run(cmd_string) + + if asciistat_proc_group.wait_for_finish() != None: + raise Exception("an ASCIIStats run failed!") + + # Determine the station to remove + logger.debug("Select bad stations depending on collected stats") + asciiplot_output = [] + asciiplot_proc_group = SubProcessGroup(logger) + for (ms, output_dir) in asciistat_output: + ms_stats = os.path.join(output_dir, os.path.split(ms)[1] + ".stats") + + cmd_string = "{0} -i {1} -o {2}".format(statplot_executable, + ms_stats, ms_stats) + asciiplot_output.append((ms, ms_stats)) + asciiplot_proc_group.run(cmd_string) + + if asciiplot_proc_group.wait_for_finish() != None: + raise Exception("an ASCIIplot run failed!") + + # remove the bad stations + logger.debug("Use ms select to remove bad stations") + msselect_output = {} + msselect_proc_group = SubProcessGroup(logger) + for ms, ms_stats in asciiplot_output: + # parse the .tab file containing the bad stations + station_to_filter = [] + file_pointer = open(ms_stats + ".tab") + + for line in file_pointer.readlines(): + # skip headed line + if line[0] == "#": + continue + + entries = line.split() + # if the current station is bad (the last entry on the line) + if entries[-1] == "True": + # add the name of station + station_to_filter.append(entries[1]) + + # if this measurement does not contain baselines to skip do not + # filter and provide the original ms as output + if len(station_to_filter) == 0: + msselect_output[ms] = ms + continue + + ms_output_path = ms + ".filtered" + msselect_output[ms] = ms_output_path + + # use msselect to remove the stations from the ms + msselect_baseline = "!{0}".format(",".join(station_to_filter)) + cmd_string = "{0} in={1} out={2} baseline={3} deep={4}".format( + msselect_executable, ms, ms_output_path, + msselect_baseline, "False") + msselect_proc_group.run(cmd_string) + + if msselect_proc_group.wait_for_finish() != None: + raise Exception("an MSselect run failed!") + + filtered_list_of_ms = [] + # The order of the inputs needs to be preserved when producing the + # filtered output! + for input_ms in input_ms_list: + filtered_list_of_ms.append(msselect_output[input_ms]) + + return filtered_list_of_ms + diff --git a/CEP/Pipeline/recipes/sip/helpers/metadata.py b/CEP/Pipeline/recipes/sip/helpers/metadata.py index 79fb1f802753193653fdae6f1bc8118359fb164b..247761de26ea56938e31469d793e1797a7b8226b 100644 --- a/CEP/Pipeline/recipes/sip/helpers/metadata.py +++ b/CEP/Pipeline/recipes/sip/helpers/metadata.py @@ -85,7 +85,7 @@ class DataProduct(object): """ Base class for data product metadata. """ - def __init__(self): + def __init__(self, logger): self._data = { 'size' : 0, 'fileFormat' : "", @@ -93,6 +93,7 @@ class DataProduct(object): 'location' : "", 'percentageWritten' : 0 } + self.logger = logger def data(self): @@ -128,8 +129,8 @@ class Correlated(DataProduct): Class representing the metadata associated with UV-correlated data. The optional argument `filename` is the name of the Measurement Set. """ - def __init__(self, filename=None): - super(Correlated, self).__init__() + def __init__(self, logger, filename=None): + super(Correlated, self).__init__(logger) self._data.update({ 'startTime' : "not-a-datetime", 'duration' : 0.0, @@ -186,12 +187,12 @@ class InstrumentModel(DataProduct): """ Class representing the metadata associated with an instrument model. """ - def __init__(self, filename=None): + def __init__(self, logger, filename=None): """ Constructor. The optional argument `filename` is the name of the Measurement Set containing the instrument model. """ - DataProduct.__init__(self) + DataProduct.__init__(self, logger) if filename: self.collect(filename) @@ -210,12 +211,12 @@ class SkyImage(DataProduct): """ Class representing the metadata associated with a sky image. """ - def __init__(self, filename=None): + def __init__(self, logger, filename=None): """ Constructor. The optional argument `filename` is the name of the CASA Image containing the sky image. """ - DataProduct.__init__(self) + DataProduct.__init__(self, logger) self._data.update({ 'numberOfAxes' : 0, 'nrOfDirectionCoordinates' : 0, @@ -291,6 +292,7 @@ class SkyImage(DataProduct): self._data.update({ 'imagerIntegrationTime':imagerIntegrationTime }) + self.logger.info("Succes fully collecting meta data for skyimage") except Exception, error: print >> sys.stderr, ( "%s: %s\n\twhile processing file %s" % diff --git a/CEP/Pipeline/recipes/sip/master/get_metadata.py b/CEP/Pipeline/recipes/sip/master/get_metadata.py index 94e313e48af801e3faf02eb727bed86e836b7b82..afdb4429bba9d9625b666e4999b53fdbbd2adb3d 100644 --- a/CEP/Pipeline/recipes/sip/master/get_metadata.py +++ b/CEP/Pipeline/recipes/sip/master/get_metadata.py @@ -66,7 +66,7 @@ class get_metadata(BaseRecipe, RemoteCommandRecipeMixIn): global_prefix += '.' if not product_type in self.valid_product_types: - self.logger.error( + self.logger.warn( "Unknown product type: %s\n\tValid product types are: %s" % (product_type, ', '.join(self.valid_product_types)) ) @@ -114,7 +114,8 @@ class get_metadata(BaseRecipe, RemoteCommandRecipeMixIn): # ******************************************************************** # 5. Create the parset-file and return it to the caller parset = parameterset() - prefix = "Output_%s_" % product_type + prefix = "Output_%s_" % product_type #Underscore is needed because + # Mom / LTA cannot differentiate input and output parset.replace('%snrOf%s' % (global_prefix, prefix), str(len(jobs))) prefix = global_prefix + prefix @@ -124,7 +125,10 @@ class get_metadata(BaseRecipe, RemoteCommandRecipeMixIn): # the Master/node communication adds a monitor_stats entry, # this must be remove manually here meta_data_parset = metadata.to_parset(job.results) - meta_data_parset.remove("monitor_stats") + try: + meta_data_parset.remove("monitor_stats") + except: + pass parset.adoptCollection(meta_data_parset, '%s[%d].' % (prefix, idx)) diff --git a/CEP/Pipeline/recipes/sip/master/imager_awimager.py b/CEP/Pipeline/recipes/sip/master/imager_awimager.py index 106626f34b16d49803080b41d50da9a6fcf951af..1e88c5ecb27e79bbebfe9ce598fdac4caf0c3eaf 100644 --- a/CEP/Pipeline/recipes/sip/master/imager_awimager.py +++ b/CEP/Pipeline/recipes/sip/master/imager_awimager.py @@ -175,7 +175,7 @@ class imager_awimager(BaseRecipe, RemoteCommandRecipeMixIn): # If partial succes if self.error.isSet(): - self.logger.error("Failed awimager node run detected. continue with" + self.logger.warn("Failed awimager node run detected. continue with" "successful tasks.") self._store_data_map(self.inputs['mapfile'], output_map, diff --git a/CEP/Pipeline/recipes/sip/master/imager_create_dbs.py b/CEP/Pipeline/recipes/sip/master/imager_create_dbs.py index 788005cfc7d9548902e1af14b0d54ed2952a506b..10783788c559c2a139ce6c30942b502a5f964274 100644 --- a/CEP/Pipeline/recipes/sip/master/imager_create_dbs.py +++ b/CEP/Pipeline/recipes/sip/master/imager_create_dbs.py @@ -11,7 +11,8 @@ import lofarpipe.support.lofaringredient as ingredient from lofarpipe.support.baserecipe import BaseRecipe from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn from lofarpipe.support.remotecommand import ComputeJob -from lofarpipe.support.data_map import DataMap, MultiDataMap, validate_data_maps +from lofarpipe.support.data_map import DataMap, MultiDataMap, \ + validate_data_maps, align_data_maps class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): @@ -83,9 +84,9 @@ class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): '--makesourcedb-path', help="Path to makesourcedb executable." ), - 'source_list_path': ingredient.StringField( - '--source-list-path', - help="Path to sourcelist from external source (eg. bdsm) "\ + 'source_list_map_path': ingredient.StringField( + '--source-list-map-path', + help="Path to sourcelist map from external source (eg. bdsm) "\ "use an empty string for gsm generated data" ), 'parmdbs_map_path': ingredient.StringField( @@ -96,6 +97,11 @@ class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): '--sourcedb-map-path', help="path to mapfile containing produced sourcedb files" ), + 'major_cycle': ingredient.IntField( + '--major_cycle', + default=0, + help = "The number of the current cycle" + ), } outputs = { @@ -119,16 +125,18 @@ class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): assoc_theta = None # Load mapfile data from files - self.logger.error(self.inputs["slice_paths_mapfile"]) + self.logger.info(self.inputs["slice_paths_mapfile"]) slice_paths_map = MultiDataMap.load(self.inputs["slice_paths_mapfile"]) input_map = DataMap.load(self.inputs['args'][0]) + source_list_map = DataMap.load(self.inputs['source_list_map_path']) if self._validate_input_data(input_map, slice_paths_map): return 1 # Run the nodes with now collected inputs jobs, output_map = self._run_create_dbs_node( - input_map, slice_paths_map, assoc_theta) + input_map, slice_paths_map, assoc_theta, + source_list_map) # Collect the output of the node scripts write to (map) files return self._collect_and_assign_outputs(jobs, output_map, @@ -163,7 +171,7 @@ class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): return 0 def _run_create_dbs_node(self, input_map, slice_paths_map, - assoc_theta): + assoc_theta, source_list_map): """ Decompose the input mapfiles into task for specific nodes and distribute these to the node recipes. Wait for the jobs to finish and @@ -177,12 +185,13 @@ class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. - for w, x, y in zip(input_map, output_map, slice_paths_map): - w.skip = x.skip = y.skip = ( - w.skip or x.skip or y.skip - ) - slice_paths_map.iterator = input_map.iterator = DataMap.SkipIterator - for (input_item, slice_item) in zip(input_map, slice_paths_map): + align_data_maps(input_map, output_map, slice_paths_map, + source_list_map) + + source_list_map.iterator = slice_paths_map.iterator = \ + input_map.iterator = DataMap.SkipIterator + for (input_item, slice_item, source_list_item) in zip( + input_map, slice_paths_map,source_list_map): host_ms, concat_ms = input_item.host, input_item.file host_slice, slice_paths = slice_item.host, slice_item.file @@ -205,7 +214,8 @@ class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): self.environment, self.inputs["working_directory"], self.inputs["makesourcedb_path"], - self.inputs["source_list_path"]] + source_list_item.file, + self.inputs["major_cycle"]] jobs.append(ComputeJob(host_ms, node_command, arguments)) # Wait the nodes to finish @@ -243,12 +253,12 @@ class imager_create_dbs(BaseRecipe, RemoteCommandRecipeMixIn): # The current job has to be skipped (due to skip field) # Or if the node failed: if not node_succeeded: - self.logger.warn("Warning failed ImagerCreateDBs run " + self.logger.warn("Warning failed selfcalCreateDBs run " "detected: No sourcedb file created, {0} continue".format( host)) output_item.file = "failed" output_item.skip = True - parmdbs_item.file = ["failed"] + parmdbs_item.file = [] parmdbs_item.skip = True # Else it succeeded and we can write te results diff --git a/CEP/Pipeline/recipes/sip/master/imager_finalize.py b/CEP/Pipeline/recipes/sip/master/imager_finalize.py index 82bffb635e9776042f52fde41393f77014fce753..227e915a49dd8aeed60a6a144b34c23130af739a 100644 --- a/CEP/Pipeline/recipes/sip/master/imager_finalize.py +++ b/CEP/Pipeline/recipes/sip/master/imager_finalize.py @@ -24,8 +24,8 @@ class imager_finalize(BaseRecipe, RemoteCommandRecipeMixIn): help="""Mapfile containing (host, path) pairs of created sky images """ ), - 'raw_ms_per_image_map': ingredient.FileField( - '--raw-ms-per-image-map', + 'ms_per_image_map': ingredient.FileField( + '--ms-per-image-map', help='''Mapfile containing (host, path) pairs of mapfiles used to create image on that node''' ), @@ -89,8 +89,8 @@ class imager_finalize(BaseRecipe, RemoteCommandRecipeMixIn): # 1. Load the datamaps awimager_output_map = DataMap.load( self.inputs["awimager_output_map"]) - raw_ms_per_image_map = DataMap.load( - self.inputs["raw_ms_per_image_map"]) + ms_per_image_map = DataMap.load( + self.inputs["ms_per_image_map"]) sourcelist_map = DataMap.load(self.inputs["sourcelist_map"]) sourcedb_map = DataMap.load(self.inputs["sourcedb_map"]) target_mapfile = DataMap.load(self.inputs["target_mapfile"]) @@ -100,13 +100,13 @@ class imager_finalize(BaseRecipe, RemoteCommandRecipeMixIn): fillrootimagegroup_exec = self.inputs["fillrootimagegroup_exec"] # Align the skip fields - align_data_maps(awimager_output_map, raw_ms_per_image_map, + align_data_maps(awimager_output_map, ms_per_image_map, sourcelist_map, target_mapfile, output_image_mapfile, sourcedb_map) # Set the correct iterator sourcelist_map.iterator = awimager_output_map.iterator = \ - raw_ms_per_image_map.iterator = target_mapfile.iterator = \ + ms_per_image_map.iterator = target_mapfile.iterator = \ output_image_mapfile.iterator = sourcedb_map.iterator = \ DataMap.SkipIterator @@ -114,13 +114,13 @@ class imager_finalize(BaseRecipe, RemoteCommandRecipeMixIn): # 2. Run the node side of the recupe command = " python %s" % (self.__file__.replace("master", "nodes")) jobs = [] - for (awimager_output_item, raw_ms_per_image_item, sourcelist_item, + for (awimager_output_item, ms_per_image_item, sourcelist_item, target_item, output_image_item, sourcedb_item) in zip( - awimager_output_map, raw_ms_per_image_map, sourcelist_map, + awimager_output_map, ms_per_image_map, sourcelist_map, target_mapfile, output_image_mapfile, sourcedb_map): # collect the files as argument arguments = [awimager_output_item.file, - raw_ms_per_image_item.file, + ms_per_image_item.file, sourcelist_item.file, target_item.file, output_image_item.file, diff --git a/CEP/Pipeline/recipes/sip/master/imager_prepare.py b/CEP/Pipeline/recipes/sip/master/imager_prepare.py index 671b9041a67d62bcb280ebc5d2b67f35d1f8e7ac..3d06ab9dcd50b41a67c98e774b60389364fb2104 100644 --- a/CEP/Pipeline/recipes/sip/master/imager_prepare.py +++ b/CEP/Pipeline/recipes/sip/master/imager_prepare.py @@ -86,6 +86,11 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): '--rficonsole-executable', help="The full path to the rficonsole executable " ), + 'do_rficonsole': ingredient.BoolField( + '--do_rficonsole', + default=True, + help="toggle the rficonsole step in preprocessing (default True)" + ), 'mapfile': ingredient.StringField( '--mapfile', help="Full path of mapfile; contains a list of the " @@ -95,9 +100,9 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): '--slices-mapfile', help="Path to mapfile containing the produced subband groups" ), - 'raw_ms_per_image_mapfile': ingredient.StringField( - '--raw-ms-per-image-mapfile', - help="Path to mapfile containing the raw ms for each produced" + 'ms_per_image_mapfile': ingredient.StringField( + '--ms-per-image-mapfile', + help="Path to mapfile containing the ms for each produced" "image" ), 'processed_ms_dir': ingredient.StringField( @@ -119,8 +124,8 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): 'slices_mapfile': ingredient.FileField( help="Path to mapfile containing the produced subband groups"), - 'raw_ms_per_image_mapfile': ingredient.FileField( - help="Path to mapfile containing the raw ms for each produced" + 'ms_per_image_mapfile': ingredient.FileField( + help="Path to mapfile containing the used ms for each produced" "image") } @@ -130,6 +135,7 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): """ super(imager_prepare, self).go() self.logger.info("Starting imager_prepare run") + job_directory = self.config.get("layout", "job_directory") # ********************************************************************* # input data input_map = DataMap.load(self.inputs['args'][0]) @@ -152,7 +158,8 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): jobs = [] paths_to_image_mapfiles = [] - n_subband_groups = len(output_map) + n_subband_groups = len(output_map) # needed for subsets in sb list + for idx_sb_group, item in enumerate(output_map): #create the input files for this node self.logger.debug("Creating input data subset for processing" @@ -163,14 +170,21 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): subbands_per_image, idx_sb_group, input_map) # Save the mapfile - job_directory = self.config.get( - "layout", "job_directory") inputs_for_image_mapfile_path = os.path.join( job_directory, "mapfiles", "ms_per_image_{0}".format(idx_sb_group)) + self._store_data_map(inputs_for_image_mapfile_path, inputs_for_image_map, "inputmap for location") + # skip the current step if skip is set, cannot use skip due to + # the enumerate: dependency on the index in the map + if item.skip == True: + # assure that the mapfile is correct + paths_to_image_mapfiles.append( + tuple([item.host, [], True])) + continue + #save the (input) ms, as a list of mapfiles paths_to_image_mapfiles.append( tuple([item.host, inputs_for_image_mapfile_path, False])) @@ -188,6 +202,7 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): self.inputs['statplot_executable'], self.inputs['msselect_executable'], self.inputs['rficonsole_executable'], + self.inputs['do_rficonsole'], self.inputs['add_beam_tables']] jobs.append(ComputeJob(item.host, node_command, arguments)) @@ -206,7 +221,20 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): slices = [] finished_runs = 0 #scan the return dict for completed key - for (item, job) in zip(concat_ms, jobs): + # loop over the potential jobs including the skipped + # If we have a skipped item, add the item to the slices with skip set + jobs_idx = 0 + for item in concat_ms: + # If this is an item that is skipped via the skip parameter in + # the parset, append a skipped + if item.skip: + slices.append(tuple([item.host, [], True])) + continue + + # we cannot use the skip iterator so we need to manually get the + # current job from the list + job = jobs[jobs_idx] + # only save the slices if the node has completed succesfull if job.results["returncode"] == 0: finished_runs += 1 @@ -215,11 +243,14 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): else: # Set the dataproduct to skipped!! item.skip = True - slices.append(tuple([item.host, ["/Failed"], True])) + slices.append(tuple([item.host, [], True])) msg = "Failed run on {0}. NOT Created: {1} ".format( item.host, item.file) self.logger.warn(msg) + # we have a non skipped workitem, increase the job idx + jobs_idx += 1 + if finished_runs == 0: self.logger.error("None of the started compute node finished:" "The current recipe produced no output, aborting") @@ -237,15 +268,15 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): self.inputs['slices_mapfile'])) #map with actual input mss. - self._store_data_map(self.inputs["raw_ms_per_image_mapfile"], + self._store_data_map(self.inputs["ms_per_image_mapfile"], DataMap(paths_to_image_mapfiles), - "mapfile containing (raw) input ms per image:") + "mapfile containing (used) input ms per image:") # Set the return values self.outputs['mapfile'] = output_ms_mapfile_path self.outputs['slices_mapfile'] = self.inputs['slices_mapfile'] - self.outputs['raw_ms_per_image_mapfile'] = \ - self.inputs["raw_ms_per_image_mapfile"] + self.outputs['ms_per_image_mapfile'] = \ + self.inputs["ms_per_image_mapfile"] return 0 def _create_input_map_for_sbgroup(self, slices_per_image, @@ -285,7 +316,7 @@ class imager_prepare(BaseRecipe, RemoteCommandRecipeMixIn): """ # The output_map contains a number of path/node pairs. The final data # dataproduct of the prepare phase: The 'input' for each of these pairs - # is a number of raw measurement sets: The number of time slices times + # is a number of measurement sets: The number of time slices times # the number of subbands collected into each of these time slices. # The total length of the input map should match this. if len(input_map) != len(output_map) * \ diff --git a/CEP/Pipeline/recipes/sip/master/selfcal_awimager.py b/CEP/Pipeline/recipes/sip/master/selfcal_awimager.py new file mode 100644 index 0000000000000000000000000000000000000000..961328db79bc90bcfc1ffc70bc2e6be617a8f380 --- /dev/null +++ b/CEP/Pipeline/recipes/sip/master/selfcal_awimager.py @@ -0,0 +1,206 @@ +# LOFAR IMAGING PIPELINE +# +# Example recipe with simple job distribution +# Wouter Klijn, 2010 +# swinbank@transientskp.org +# Nicolas Vilchez, 2014 +# vilchez@astron.nl +# ------------------------------------------------------------------------------ +import sys +import copy +import lofarpipe.support.lofaringredient as ingredient +from lofarpipe.support.baserecipe import BaseRecipe +from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn +from lofarpipe.support.remotecommand import ComputeJob +from lofarpipe.support.data_map import DataMap, validate_data_maps,\ + align_data_maps + +class selfcal_awimager(BaseRecipe, RemoteCommandRecipeMixIn): + """ + Master script for the awimager. Collects arguments from command line and + pipeline inputs. + + 1. Load mapfiles and validate these + 2. Run the awimage node scripts + 3. Retrieve output. Construct output map file succesfull runs + + Details regarding the implementation of the imaging step can be found in + the node recipe + **CommandLine Arguments** + + A mapfile containing (node, datafile) pairs. The measurements set use as + input for awimager executable + + """ + inputs = { + 'executable': ingredient.ExecField( + '--executable', + help = "The full path to the awimager executable" + ), + 'parset': ingredient.FileField( + '-p', '--parset', + help = "The full path to a awimager configuration parset." + ), + 'working_directory': ingredient.StringField( + '-w', '--working-directory', + help = "Working directory used on output nodes. Results location" + ), + 'output_image': ingredient.StringField( + '--output-image', + help = "Path of the image to be create by the awimager" + ), + 'mapfile': ingredient.StringField( + '--mapfile', + help = "Full path for output mapfile. A list of the" + "successfully generated images will be written here" + ), + 'sourcedb_path': ingredient.StringField( + '--sourcedb-path', + help = "Full path of sourcedb used to create a mask for known sources" + ), + 'mask_patch_size': ingredient.FloatField( + '--mask-patch-size', + help = "Scale factor for patches in the awimager mask" + ), + 'autogenerate_parameters': ingredient.BoolField( + '--autogenerate-parameters', + default = True, + help = "Turns on the autogeneration of: cellsize, image-size, fov." + " MSSS 'type' functionality" + ), + 'specify_fov': ingredient.BoolField( + '--specify-fov', + default = False, + help = "calculated Image parameters are relative to fov, parameter" + " is active when autogenerate_parameters is False" + ), + 'fov': ingredient.FloatField( + '--fov', + default = 0.0, + help = "calculated Image parameters are relative to this" + " Field Of View in arcSec. This parameter is obligatory when" + " specify_fov is True" + ), + 'major_cycle': ingredient.IntField( + '--major_cycle', + help = "The number of the current cycle to modify the parset." + ), + 'nr_cycles': ingredient.IntField( + '--nr-cycles', + help = "The number major cycles." + ) , + 'perform_self_cal': ingredient.BoolField( + '--perform-self-cal', + default=False, + help = "Control the usage of the self callibartion functionality" + ) + } + + outputs = { + 'mapfile': ingredient.StringField(), + } + + def go(self): + """ + This member contains all the functionality of the imager_awimager. + Functionality is all located at the node side of the script. + """ + super(selfcal_awimager, self).go() + self.logger.info("Starting imager_awimager run") + + # ********************************************************************* + # 1. collect the inputs and validate + input_map = DataMap.load(self.inputs['args'][0]) + sourcedb_map = DataMap.load(self.inputs['sourcedb_path']) + + if not validate_data_maps(input_map, sourcedb_map): + self.logger.error( + "the supplied input_ms mapfile and sourcedb mapfile" + "are incorrect. Aborting") + self.logger.error(repr(input_map)) + self.logger.error(repr(sourcedb_map)) + return 1 + + # ********************************************************************* + # 2. Start the node side of the awimager recipe + # Compile the command to be executed on the remote machine + node_command = "python %s" % (self.__file__.replace("master", "nodes")) + jobs = [] + + output_map = copy.deepcopy(input_map) + align_data_maps(input_map, output_map, sourcedb_map) + + sourcedb_map.iterator = input_map.iterator = output_map.iterator = \ + DataMap.SkipIterator + + for measurement_item, source_item in zip(input_map, sourcedb_map): + if measurement_item.skip or source_item.skip: + jobs.append(None) + continue + # both the sourcedb and the measurement are in a map + # unpack both + host , measurement_path = measurement_item.host, measurement_item.file + host2 , sourcedb_path = source_item.host, source_item.file + + # construct and save the output name + arguments = [self.inputs['executable'], + self.environment, + self.inputs['parset'], + self.inputs['working_directory'], + self.inputs['output_image'], + measurement_path, + sourcedb_path, + self.inputs['mask_patch_size'], + self.inputs['autogenerate_parameters'], + self.inputs['specify_fov'], + self.inputs['fov'], + self.inputs['major_cycle'], + self.inputs['nr_cycles'], + self.inputs['perform_self_cal'] + ] + + jobs.append(ComputeJob(host, node_command, arguments)) + self._schedule_jobs(jobs) + + # ********************************************************************* + # 3. Check output of the node scripts + + for job, output_item in zip(jobs, output_map): + # job == None on skipped job + if not "image" in job.results: + output_item.file = "failed" + output_item.skip = True + + else: + output_item.file = job.results["image"] + output_item.skip = False + + # Check if there are finished runs + succesfull_runs = None + for item in output_map: + if item.skip == False: + succesfull_runs = True + break + + if not succesfull_runs: + self.logger.error( + "None of the started awimager run finished correct") + self.logger.error( + "No work left to be done: exiting with error status") + return 1 + + # If partial succes + if self.error.isSet(): + self.logger.error("Failed awimager node run detected. continue with" + "successful tasks.") + + self._store_data_map(self.inputs['mapfile'], output_map, + "mapfile containing produces awimages") + + self.outputs["mapfile"] = self.inputs['mapfile'] + return 0 + + +if __name__ == "__main__": + sys.exit(selfcal_awimager().main()) + diff --git a/CEP/Pipeline/recipes/sip/master/selfcal_bbs.py b/CEP/Pipeline/recipes/sip/master/selfcal_bbs.py new file mode 100644 index 0000000000000000000000000000000000000000..cae2a9d40d5086532dc9379fad53788c5250e690 --- /dev/null +++ b/CEP/Pipeline/recipes/sip/master/selfcal_bbs.py @@ -0,0 +1,172 @@ +# LOFAR IMAGING PIPELINE +# selfcal_bbs BBS (BlackBoard Selfcal) recipe +# Wouter Klijn +# klijn@astron.nl +# ------------------------------------------------------------------------------ +from __future__ import with_statement +import sys +import os + +from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn +from lofarpipe.support.baserecipe import BaseRecipe +from lofarpipe.support.data_map import DataMap, MultiDataMap, \ + validate_data_maps, align_data_maps +import lofarpipe.support.lofaringredient as ingredient +from lofarpipe.support.remotecommand import ComputeJob + +class selfcal_bbs(BaseRecipe, RemoteCommandRecipeMixIn): + """ + Imager_bbs master performs a bbs run based on the supplied parset it is a + shallow wrapper around bbs. Additional functionality compared to the default + bbs recipe is the capability to add an id that allows multiple + runs to have different output files + + 1. Load and validates that the input mapfiles are correct + 2. and then starts the node script, use indexed path names for the + communication + 3. Check if all nodes succeeded. If so return a mapfile with calibrated + ms + + **Command line Arguments** + + 1. Path to a mapfile with measurement sets to calibrate + + """ + inputs = { + 'parset': ingredient.FileField( + '-p', '--parset', + help="BBS configuration parset" + ), + 'bbs_executable': ingredient.StringField( + '--bbs-executable', + help="BBS standalone executable (bbs-reducer)" + ), + 'instrument_mapfile': ingredient.FileField( + '--instrument-mapfile', + help="Full path to the mapfile containing the names of the " + "instrument model files generated by the `parmdb` recipe" + ), + 'sourcedb_mapfile': ingredient.FileField( + '--sourcedb-mapfile', + help="Full path to the mapfile containing the names of the " + "sourcedbs generated by the `sourcedb` recipe" + ), + 'id': ingredient.IntField( + '--id', + default=0, + help="Optional integer id for distinguishing multiple runs" + ), + 'mapfile': ingredient.StringField( + '--mapfile', + help="Full path to the file containing the output data products" + ), + 'concat_ms_map_path': ingredient.FileField( + '--concat-ms-map-path', + help="Output of the concat MS file" + ), + 'major_cycle': ingredient.IntField( + '--major_cycle', + help="ID for the current major cycle" + ) + } + + outputs = { + 'mapfile': ingredient.FileField( + help="Full path to a mapfile describing the processed data" + ) + } + + def go(self): + """ + imager_bbs functionality. Called by framework performing all the work + """ + super(selfcal_bbs, self).go() + self.logger.info("Starting imager_bbs run") + + # ******************************************************************** + # 1. Load the and validate the data + ms_map = MultiDataMap.load(self.inputs['args'][0]) + parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile']) + sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile']) + concat_ms_map = DataMap.load(self.inputs['concat_ms_map_path']) + + # ********************************************************************* + # 2. Start the node scripts + jobs = [] + node_command = " python %s" % (self.__file__.replace("master", "nodes")) + map_dir = os.path.join( + self.config.get("layout", "job_directory"), "mapfiles") + run_id = str(self.inputs.get("id")) + + # Update the skip fields of the four maps. If 'skip' is True in any of + # these maps, then 'skip' must be set to True in all maps. + align_data_maps(ms_map, parmdb_map, sourcedb_map, concat_ms_map) + + ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \ + concat_ms_map.iterator = DataMap.SkipIterator + + + # ********************************************************************* + for (ms, parmdb, sourcedb, concat_ms) in zip(ms_map, parmdb_map, + sourcedb_map, concat_ms_map): + #host is same for each entry (validate_data_maps) + host, ms_list = ms.host, ms.file + + # Write data maps to MultaDataMaps + ms_list_path = os.path.join( + map_dir, host + "_ms_" + run_id + ".map") + MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path) + + parmdb_list_path = os.path.join( + map_dir, host + "_parmdb_" + run_id + ".map") + MultiDataMap( + [tuple([host, parmdb.file, False])]).save(parmdb_list_path) + + sourcedb_list_path = os.path.join( + map_dir, host + "_sky_" + run_id + ".map") + MultiDataMap( + [tuple([host, [sourcedb.file], False])]).save(sourcedb_list_path) + + # THe concat ms does not have to be written: It already is a + # singular item (it is the output of the reduce step) + # redmine issue #6021 + arguments = [self.inputs['bbs_executable'], + self.inputs['parset'], + ms_list_path, + parmdb_list_path, + sourcedb_list_path, + concat_ms.file, + self.inputs['major_cycle']] + jobs.append(ComputeJob(host, node_command, arguments)) + + # start and wait till all are finished + self._schedule_jobs(jobs) + + # ********************************************************************** + # 3. validate the node output and construct the output mapfile. + if self.error.isSet(): #if one of the nodes failed + self.logger.warn("Failed bbs node run detected, skipping work" + "on this work item for further computations") + + # find failed job and set the skip field + for (ms_item, concat_item, job) in zip(ms_map, concat_ms_map, jobs): + if job.results["returncode"] == 0: + continue + else: + ms_item.skip = True + concat_item.skip = True + self.logger.warn("bbs failed on item: {0}".format(ms_item.file)) + + # return the output: The measurement set that are calibrated: + # calibrated data is placed in the ms sets + MultiDataMap(ms_map).save(self.inputs['mapfile']) + # also save the concat_ms map with possible skips + DataMap(concat_ms_map).save(self.inputs['concat_ms_map_path']) + self.logger.info("Wrote file with calibrated data") + + self.outputs['mapfile'] = self.inputs['mapfile'] + return 0 + + +if __name__ == '__main__': + sys.exit(selfcal_bbs().main()) diff --git a/CEP/Pipeline/recipes/sip/master/selfcal_finalize.py b/CEP/Pipeline/recipes/sip/master/selfcal_finalize.py new file mode 100644 index 0000000000000000000000000000000000000000..07ec8c839f431a1065298ae989b95a1f28e0684e --- /dev/null +++ b/CEP/Pipeline/recipes/sip/master/selfcal_finalize.py @@ -0,0 +1,208 @@ +from __future__ import with_statement +import sys + +import lofarpipe.support.lofaringredient as ingredient +from lofarpipe.support.baserecipe import BaseRecipe +from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn +from lofarpipe.support.remotecommand import ComputeJob +from lofarpipe.support.data_map import DataMap, validate_data_maps, \ + align_data_maps + +class selfcal_finalize(BaseRecipe, RemoteCommandRecipeMixIn): + """ + The Imager_finalizer performs a number of steps needed for integrating the + msss_imager_pipeline in the LOFAR framework: It places the image on the + output location in the correcy image type (hdf5). + It also adds some meta data collected from the individual measurement sets + and the found data. + + This recipe does not have positional commandline arguments + """ + inputs = { + 'awimager_output_map': ingredient.FileField( + '--awimager-output-mapfile', + help="""Mapfile containing (host, path) pairs of created sky + images """ + ), + 'ms_per_image_map': ingredient.FileField( + '--ms-per-image-map', + help='''Mapfile containing (host, path) pairs of mapfiles used + to create image on that node''' + ), + 'sourcelist_map': ingredient.FileField( + '--sourcelist-map', + help='''mapfile containing (host, path) pairs to a list of sources + found in the image''' + ), + 'sourcedb_map': ingredient.FileField( + '--sourcedb_map', + help='''mapfile containing (host, path) pairs to a db of sources + found in the image''' + ), + 'target_mapfile': ingredient.FileField( + '--target-mapfile', + help="Mapfile containing (host, path) pairs to the concatenated and" + "combined measurement set, the source for the actual sky image" + ), + 'minbaseline': ingredient.FloatField( + '--minbaseline', + help='''Minimum length of the baseline used for the images''' + ), + 'maxbaseline': ingredient.FloatField( + '--maxbaseline', + help='''Maximum length of the baseline used for the images''' + ), + 'output_image_mapfile': ingredient.FileField( + '--output-image-mapfile', + help='''mapfile containing (host, path) pairs with the final + output image (hdf5) location''' + ), + 'processed_ms_dir': ingredient.StringField( + '--processed-ms-dir', + help='''Path to directory for processed measurment sets''' + ), + 'fillrootimagegroup_exec': ingredient.ExecField( + '--fillrootimagegroup_exec', + help='''Full path to the fillRootImageGroup executable''' + ), + 'placed_image_mapfile': ingredient.FileField( + '--placed-image-mapfile', + help="location of mapfile with processed and correctly placed," + " hdf5 images" + ), + 'placed_correlated_mapfile': ingredient.FileField( + '--placed-correlated-mapfile', + help="location of mapfile with processedd and correctly placed," + " correlated ms" + ), + 'concat_ms_map_path': ingredient.FileField( + '--concat-ms-map-path', + help="Output of the concat MS file" + ), + 'output_correlated_mapfile': ingredient.FileField( + '--output-correlated-mapfile', + help="location of mapfile where output paths for mss are located" + ), + 'msselect_executable': ingredient.ExecField( + '--msselect-executable', + help="The full path to the msselect executable " + ), + } + + outputs = { + 'placed_image_mapfile': ingredient.StringField(), + 'placed_correlated_mapfile': ingredient.StringField(), + } + + def go(self): + """ + Steps: + + 1. Load and validate the input datamaps + 2. Run the node parts of the recipe + 3. Validate node output and format the recipe output + """ + super(selfcal_finalize, self).go() + # ********************************************************************* + # 1. Load the datamaps + awimager_output_map = DataMap.load( + self.inputs["awimager_output_map"]) + ms_per_image_map = DataMap.load( + self.inputs["ms_per_image_map"]) + sourcelist_map = DataMap.load(self.inputs["sourcelist_map"]) + sourcedb_map = DataMap.load(self.inputs["sourcedb_map"]) + target_mapfile = DataMap.load(self.inputs["target_mapfile"]) + output_image_mapfile = DataMap.load( + self.inputs["output_image_mapfile"]) + concat_ms_mapfile = DataMap.load( + self.inputs["concat_ms_map_path"]) + output_correlated_map = DataMap.load( + self.inputs["output_correlated_mapfile"]) + processed_ms_dir = self.inputs["processed_ms_dir"] + fillrootimagegroup_exec = self.inputs["fillrootimagegroup_exec"] + + # Align the skip fields + align_data_maps(awimager_output_map, ms_per_image_map, + sourcelist_map, target_mapfile, output_image_mapfile, + sourcedb_map, concat_ms_mapfile, output_correlated_map) + + # Set the correct iterator + sourcelist_map.iterator = awimager_output_map.iterator = \ + ms_per_image_map.iterator = target_mapfile.iterator = \ + output_image_mapfile.iterator = sourcedb_map.iterator = \ + concat_ms_mapfile.iterator = output_correlated_map.iterator = \ + DataMap.SkipIterator + + # ********************************************************************* + # 2. Run the node side of the recupe + command = " python %s" % (self.__file__.replace("master", "nodes")) + jobs = [] + for (awimager_output_item, ms_per_image_item, sourcelist_item, + target_item, output_image_item, sourcedb_item, + concat_ms_item, correlated_item) in zip( + awimager_output_map, ms_per_image_map, sourcelist_map, + target_mapfile, output_image_mapfile, sourcedb_map, + concat_ms_mapfile, output_correlated_map): + # collect the files as argument + arguments = [awimager_output_item.file, + ms_per_image_item.file, + sourcelist_item.file, + target_item.file, + output_image_item.file, + self.inputs["minbaseline"], + self.inputs["maxbaseline"], + processed_ms_dir, + fillrootimagegroup_exec, + self.environment, + sourcedb_item.file, + concat_ms_item.file, + correlated_item.file, + self.inputs["msselect_executable"],] + + self.logger.info( + "Starting finalize with the folowing args: {0}".format( + arguments)) + jobs.append(ComputeJob(target_item.host, command, arguments)) + + self._schedule_jobs(jobs) + + # ********************************************************************* + # 3. Validate the performance of the node script and assign output + succesful_run = False + for (job, output_image_item, output_correlated_item) in zip(jobs, + output_image_mapfile, output_correlated_map): + if not "hdf5" in job.results: + # If the output failed set the skip to True + output_image_item.skip = True + output_correlated_item = True + else: + succesful_run = True + # signal that we have at least a single run finished ok. + # No need to set skip in this case + + if not succesful_run: + self.logger.warn("Not a single finalizer succeeded") + return 1 + + # Save the location of the output images + output_image_mapfile.save(self.inputs['placed_image_mapfile']) + self.logger.debug( + "Wrote mapfile containing placed hdf5 images: {0}".format( + self.inputs['placed_image_mapfile'])) + + # save the location of measurements sets + output_correlated_map.save(self.inputs['placed_correlated_mapfile']) + self.logger.debug( + "Wrote mapfile containing placed mss: {0}".format( + self.inputs['placed_correlated_mapfile'])) + + self.outputs["placed_image_mapfile"] = self.inputs[ + 'placed_image_mapfile'] + self.outputs["placed_correlated_mapfile"] = self.inputs[ + 'placed_correlated_mapfile'] + + return 0 + + +if __name__ == '__main__': + sys.exit(selfcal_finalize().main()) diff --git a/CEP/Pipeline/recipes/sip/nodes/get_metadata.py b/CEP/Pipeline/recipes/sip/nodes/get_metadata.py index 3e258235aa9342c4cf3c4d5804f7dc311da053b1..bfd9f04ff53bd285b3d57854c44cc9732ead20d6 100644 --- a/CEP/Pipeline/recipes/sip/nodes/get_metadata.py +++ b/CEP/Pipeline/recipes/sip/nodes/get_metadata.py @@ -25,24 +25,14 @@ class get_metadata(LOFARnodeTCP): self.logger.error("Dataset %s does not exist" % (infile)) return 1 -# # Get the product metadata. If data product type was not specified, -# # derive it from the input filename's extension. -# if not product_type: -# ext = os.path.splitext(infile)[1] -# if ext == ".MS": product_type = "Correlated" -# elif ext == ".INST": product_type = "InstrumentModel" -# elif ext == ".IM": product_type = "SkyImage" -# if not product_type: -# self.logger.error("File %s has unknown product type" % infile) -# return 1 - self.logger.debug("Product type: %s" % product_type) if product_type == "Correlated": - self.outputs = metadata.Correlated(infile).data() + self.outputs = metadata.Correlated(self.logger, infile).data() elif product_type == "InstrumentModel": - self.outputs = metadata.InstrumentModel(infile).data() + self.outputs = metadata.InstrumentModel(self.logger, + infile).data() elif product_type == "SkyImage": - self.outputs = metadata.SkyImage(infile).data() + self.outputs = metadata.SkyImage(self.logger, infile).data() else: self.logger.error("Unknown product type: %s" % product_type) return 1 diff --git a/CEP/Pipeline/recipes/sip/nodes/imager_bbs.py b/CEP/Pipeline/recipes/sip/nodes/imager_bbs.py index 416874c12e176b0c7765d7624b4933b43b321603..839cc05ae88f5d7f82ead457b5d40653fbdfbf6c 100644 --- a/CEP/Pipeline/recipes/sip/nodes/imager_bbs.py +++ b/CEP/Pipeline/recipes/sip/nodes/imager_bbs.py @@ -62,6 +62,9 @@ class imager_bbs(LOFARnodeTCP): if bbs_process_group.wait_for_finish() != None: self.logger.error( "Failed bbs run detected Aborting") + return 1 # If bbs failed we need to abort: the concat + # is now corrupt + except OSError, exception: self.logger.error("Failed to execute bbs: {0}".format(str( exception))) diff --git a/CEP/Pipeline/recipes/sip/nodes/imager_create_dbs.py b/CEP/Pipeline/recipes/sip/nodes/imager_create_dbs.py index b9b1c92b28b3b2de5e54382aecd0678f372f3e6f..e258f8c279fc47beeee0e06edf418d645c6a58d2 100644 --- a/CEP/Pipeline/recipes/sip/nodes/imager_create_dbs.py +++ b/CEP/Pipeline/recipes/sip/nodes/imager_create_dbs.py @@ -51,24 +51,27 @@ class imager_create_dbs(LOFARnodeTCP): There is a single sourcedb for a concatenated measurement set/ image 3. Each individual timeslice needs a place to collect parameters: This is done in the paramdb. - 4. Assign the outputs of the script + 4. Add the created databases as meta information to the measurment set + + 5. Assign the outputs of the script """ def run(self, concatenated_measurement_set, sourcedb_target_path, monet_db_hostname, monet_db_port, monet_db_name, monet_db_user, monet_db_password, assoc_theta, parmdb_executable, slice_paths, parmdb_suffix, environment, working_directory, makesourcedb_path, - source_list_path_extern): + source_list_path_extern, major_cycle): self.logger.info("Starting imager_create_dbs Node") self.environment.update(environment) #******************************************************************* # 1. get a sourcelist: from gsm or from file - source_list, append = self._create_source_list(source_list_path_extern, - sourcedb_target_path, concatenated_measurement_set, - monet_db_hostname, monet_db_port, monet_db_name, monet_db_user, - monet_db_password, assoc_theta) + source_list, append = self._create_source_list( + source_list_path_extern,sourcedb_target_path, + concatenated_measurement_set,monet_db_hostname, + monet_db_port, monet_db_name, monet_db_user, + monet_db_password, assoc_theta) #******************************************************************* # 2convert it to a sourcedb (casa table) @@ -86,8 +89,14 @@ class imager_create_dbs(LOFARnodeTCP): self.logger.error("failed creating paramdb for slices") return 1 + # ******************************************************************* + # Add the create databases to the measurments set, + self._add_dbs_to_ms(concatenated_measurement_set, sourcedb_target_path, + parmdbs, major_cycle) + + #******************************************************************* - # 4. Assign the outputs + # 5. Assign the outputs self.outputs["sourcedb"] = sourcedb_target_path self.outputs["parmdbs"] = parmdbs return 0 @@ -118,7 +127,8 @@ class imager_create_dbs(LOFARnodeTCP): append = False else: source_list = source_list_path_extern - append = True + append = False # Nicolas Should this be true or false? + # later steps should not contain the original bootstrapping input return source_list, append @@ -460,7 +470,44 @@ class imager_create_dbs(LOFARnodeTCP): return None - + def _add_dbs_to_ms(self, concatenated_measurement_set, sourcedb_target_path, + parmdbs_path, major_cycle): + """ + Add the in this recipe created sourcedb and instrument table(parmdb) + to the local measurementset. + """ + self.logger.info("Adding sourcemodel and instrument model to output ms.") + # Create the base meta information directory + meta_directory = concatenated_measurement_set + "_selfcal_information" + if not os.path.exists(meta_directory): + os.makedirs(meta_directory) + + # Cycle dir + cycle_directory = os.path.join(meta_directory, + "cycle_" + str(major_cycle)) + if not os.path.exists(cycle_directory): + os.makedirs(cycle_directory) + + #COpy the actual data. parmdbs_path is a list! + sourcedb_directory = os.path.join(cycle_directory, + os.path.basename(sourcedb_target_path)) + if os.path.exists(sourcedb_directory): + shutil.rmtree(sourcedb_directory) # delete dir to assure copy succeeds + shutil.copytree(sourcedb_target_path, sourcedb_directory) + + #parmdbs_path is a list! + for parmdb_entry in parmdbs_path: + try: + parmdb_directory = os.path.join(cycle_directory, + os.path.basename(parmdb_entry)) + # delete dir to assure copy succeeds + if os.path.exists(parmdb_directory): + shutil.rmtree(parmdb_directory) + shutil.copytree(parmdb_entry, parmdb_directory) + except: + self.logger.warn("Failed copying parmdb:") + self.logger.warn(parmdb_entry) + continue # slices might be missing, not an exit error if __name__ == "__main__": diff --git a/CEP/Pipeline/recipes/sip/nodes/imager_finalize.py b/CEP/Pipeline/recipes/sip/nodes/imager_finalize.py index a06849c3dbdadb36aba4ca6d5c901b9c40eeb9cd..5d4be9c094ad92cf407c9a335a66c28e5bcd0e12 100644 --- a/CEP/Pipeline/recipes/sip/nodes/imager_finalize.py +++ b/CEP/Pipeline/recipes/sip/nodes/imager_finalize.py @@ -34,13 +34,13 @@ class imager_finalize(LOFARnodeTCP): 5. Export sourcelist to msss server, copy the sourcelist to hdf5 location 6. Return the outputs """ - def run(self, awimager_output, raw_ms_per_image, sourcelist, target, + def run(self, awimager_output, ms_per_image, sourcelist, target, output_image, minbaseline, maxbaseline, processed_ms_dir, fillrootimagegroup_exec, environment, sourcedb): self.environment.update(environment) """ :param awimager_output: Path to the casa image produced by awimager - :param raw_ms_per_image: The X (90) measurements set scheduled to + :param ms_per_image: The X (90) measurements set scheduled to create the image :param sourcelist: list of sources found in the image :param target: <unused> @@ -55,7 +55,7 @@ class imager_finalize(LOFARnodeTCP): :rtype: self.outputs['image'] path to the produced hdf5 image """ with log_time(self.logger): - raw_ms_per_image_map = DataMap.load(raw_ms_per_image) + ms_per_image_map = DataMap.load(ms_per_image) # ***************************************************************** # 1. add image info @@ -64,22 +64,22 @@ class imager_finalize(LOFARnodeTCP): # TODO: BUG!! the meta data might contain files that were copied # but failed in imager_bbs processed_ms_paths = [] - for item in raw_ms_per_image_map: + for item in ms_per_image_map: path = item.file - raw_ms_file_name = os.path.split(path)[1] - #if the raw ms is in the processed dir (additional check) - if (raw_ms_file_name in file_list): + ms_file_name = os.path.split(path)[1] + #if the ms is in the processed dir (additional check) + if (ms_file_name in file_list): # save the path processed_ms_paths.append(os.path.join(processed_ms_dir, - raw_ms_file_name)) + ms_file_name)) #add the information the image try: addimg.addImagingInfo(awimager_output, processed_ms_paths, sourcedb, minbaseline, maxbaseline) except Exception, error: - self.logger.error("addImagingInfo Threw Exception:") - self.logger.error(error) + self.logger.warn("addImagingInfo Threw Exception:") + self.logger.warn(error) # Catch raising of already done error: allows for rerunning # of the recipe if "addImagingInfo already done" in str(error): @@ -143,8 +143,8 @@ class imager_finalize(LOFARnodeTCP): (stdoutdata, stderrdata) = proc.communicate() exit_status = proc.returncode - self.logger.error(stdoutdata) - self.logger.error(stderrdata) + self.logger.info(stdoutdata) + self.logger.info(stderrdata) #if copy failed log the missing file if exit_status != 0: diff --git a/CEP/Pipeline/recipes/sip/nodes/imager_prepare.py b/CEP/Pipeline/recipes/sip/nodes/imager_prepare.py index 80acc58def0958122d1e7c16f603903ecc0d4032..63328ef3f2a3e43e84483635109ce1d645d6f66d 100644 --- a/CEP/Pipeline/recipes/sip/nodes/imager_prepare.py +++ b/CEP/Pipeline/recipes/sip/nodes/imager_prepare.py @@ -10,7 +10,8 @@ import shutil import os import subprocess import copy -import pyrap.tables as pt # order of pyrap import influences the type conversion binding +import pyrap.tables as pt # order of pyrap import influences the type + # conversion binding from lofarpipe.support.pipelinelogging import CatchLog4CPlus from lofarpipe.support.pipelinelogging import log_time from lofarpipe.support.utilities import patch_parset @@ -19,7 +20,7 @@ from lofarpipe.support.lofarnode import LOFARnodeTCP from lofarpipe.support.utilities import create_directory from lofarpipe.support.data_map import DataMap from lofarpipe.support.subprocessgroup import SubProcessGroup - +from lofarpipe.recipes.helpers.data_quality import run_rficonsole, filter_bad_stations # Some constant settings for the recipe _time_slice_dir_name = "time_slices" @@ -29,31 +30,30 @@ class imager_prepare(LOFARnodeTCP): """ Steps perform on the node: - 0. Create directories and assure that they are empty. - 1. Collect the Measurement Sets (MSs): copy to the current node. - 2. Start dppp: Combines the data from subgroups into single timeslice. - 3. Flag rfi. - 4. Add addImagingColumns to the casa ms. - 5. Concatenate the time slice measurment sets, to a single virtual ms. + 1. Create directories and assure that they are empty. + 2. Collect the Measurement Sets (MSs): copy to the current node. + 3. Start dppp: Combines the data from subgroups into single timeslice. + 4. Flag rfi (toggle by parameter) + 5. Add addImagingColumns to the casa ms. 6. Filter bad stations. Find station with repeated bad measurement and remove these completely from the dataset. - - **Members:** + 7. Add measurmentset tables + 8. Perform the (virtual) concatenation of the timeslices """ def run(self, environment, parset, working_dir, processed_ms_dir, - ndppp_executable, output_measurement_set, - time_slices_per_image, subbands_per_group, raw_ms_mapfile, + ndppp_executable, output_measurement_set, + time_slices_per_image, subbands_per_group, input_ms_mapfile, asciistat_executable, statplot_executable, msselect_executable, - rficonsole_executable, add_beam_tables): + rficonsole_executable, do_rficonsole, add_beam_tables): """ Entry point for the node recipe """ self.environment.update(environment) with log_time(self.logger): - input_map = DataMap.load(raw_ms_mapfile) + input_map = DataMap.load(input_ms_mapfile) #****************************************************************** - # I. Create the directories used in this recipe + # 1. Create the directories used in this recipe create_directory(processed_ms_dir) # time slice dir_to_remove: assure empty directory: Stale data @@ -69,15 +69,17 @@ class imager_prepare(LOFARnodeTCP): self.logger.debug("and assured it is empty") #****************************************************************** - # 1. Copy the input files - copied_ms_map = self._copy_input_files( + # 2. Copy the input files + # processed_ms_map will be the map containing all the 'valid' + # input ms + processed_ms_map = self._copy_input_files( processed_ms_dir, input_map) #****************************************************************** - # 2. run dppp: collect frequencies into larger group + # 3. run dppp: collect frequencies into larger group time_slices_path_list = \ self._run_dppp(working_dir, time_slice_dir, - time_slices_per_image, copied_ms_map, subbands_per_group, + time_slices_per_image, processed_ms_map, subbands_per_group, processed_ms_dir, parset, ndppp_executable) # If no timeslices were created, bail out with exit status 1 @@ -88,13 +90,16 @@ class imager_prepare(LOFARnodeTCP): self.logger.debug( "Produced time slices: {0}".format(time_slices_path_list)) + #*********************************************************** - # 3. run rfi_concole: flag datapoints which are corrupted - self._run_rficonsole(rficonsole_executable, time_slice_dir, - time_slices_path_list) + # 4. run rfi_concole: flag datapoints which are corrupted + if (do_rficonsole): + run_rficonsole(rficonsole_executable, time_slice_dir, + time_slices_path_list, self.logger, + self.resourceMonitor ) #****************************************************************** - # 4. Add imaging columns to each timeslice + # 5. Add imaging columns to each timeslice # ndppp_executable fails if not present for time_slice_path in time_slices_path_list: pt.addImagingColumns(time_slice_path) @@ -103,29 +108,34 @@ class imager_prepare(LOFARnodeTCP): time_slice_path)) #***************************************************************** - # 5. Filter bad stations - time_slice_filtered_path_list = self._filter_bad_stations( + # 6. Filter bad stations + time_slice_filtered_path_list = filter_bad_stations( time_slices_path_list, asciistat_executable, - statplot_executable, msselect_executable) + statplot_executable, msselect_executable, + self.logger, self.resourceMonitor) #***************************************************************** - # Add measurmenttables + # 7. Add measurementtables if add_beam_tables: - self.add_beam_tables(time_slice_filtered_path_list) + self._add_beam_tables(time_slice_filtered_path_list) #****************************************************************** - # 6. Perform the (virtual) concatenation of the timeslices + # 8. Perform the (virtual) concatenation of the timeslices self._concat_timeslices(time_slice_filtered_path_list, output_measurement_set) - #****************************************************************** + # ***************************************************************** + # Write the actually used ms for the created dataset to the input + # mapfile + processed_ms_map.save(input_ms_mapfile) + # return self.outputs["time_slices"] = \ time_slices_path_list return 0 - def add_beam_tables(self, time_slices_path_list): + def _add_beam_tables(self, time_slices_path_list): beamtable_proc_group = SubProcessGroup(self.logger) for ms_path in time_slices_path_list: self.logger.debug("makebeamtables start") @@ -134,6 +144,7 @@ class imager_prepare(LOFARnodeTCP): beamtable_proc_group.run(cmd_string) if beamtable_proc_group.wait_for_finish() != None: + # TODO: Exception on error: make time_slices_path_list a mapfile raise Exception("an makebeamtables run failed!") self.logger.debug("makebeamtables finished") @@ -145,58 +156,60 @@ class imager_prepare(LOFARnodeTCP): This function collects all the file in the input map in the processed_ms_dir Return value is a set of missing files """ - copied_ms_map = copy.deepcopy(input_map) + processed_ms_map = copy.deepcopy(input_map) # loop all measurement sets - for input_item, copied_item in zip(input_map, copied_ms_map): + for input_item, processed_item in zip(input_map, processed_ms_map): # fill the copied item with the correct data - copied_item.host = self.host - copied_item.file = os.path.join( + processed_item.host = self.host + processed_item.file = os.path.join( processed_ms_dir, os.path.basename(input_item.file)) + stderrdata = None # If we have to skip this ms if input_item.skip == True: - exit_status = 1 # - - # skip the copy if machine is the same (execution on localhost) - # make sure data is in the correct directory. for now: working_dir/[jobname]/subbands - #if input_item.host == "localhost": - # continue - # construct copy command - command = ["rsync", "-r", "{0}:{1}".format( - input_item.host, input_item.file), - "{0}".format(processed_ms_dir)] - if input_item.host == "localhost": - command = ["cp", "-r", "{0}".format(input_item.file), - "{0}".format(processed_ms_dir)] - - self.logger.debug("executing: " + " ".join(command)) - - # Spawn a subprocess and connect the pipes - # The copy step is performed 720 at once in that case which might - # saturate the cluster. - copy_process = subprocess.Popen( - command, - stdin = subprocess.PIPE, - stdout = subprocess.PIPE, - stderr = subprocess.PIPE) - - # Wait for finish of copy inside the loop: enforce single tread - # copy - (stdoutdata, stderrdata) = copy_process.communicate() - - exit_status = copy_process.returncode + exit_status = 1 + stderrdata = "SKIPPED_FILE" + + else: + # use cp the copy if machine is the same ( localhost) + # make sure data is in the correct directory. for now: + # working_dir/[jobname]/subbands + # construct copy command + command = ["rsync", "-r", "{0}:{1}".format( + input_item.host, input_item.file), + "{0}".format(processed_ms_dir)] + if input_item.host == "localhost": + command = ["cp", "-r", "{0}".format(input_item.file), + "{0}".format(processed_ms_dir)] + + self.logger.debug("executing: " + " ".join(command)) + + # Spawn a subprocess and connect the pipes + # The copy step is performed 720 at once in that case which + # might saturate the cluster. + copy_process = subprocess.Popen( + command, + stdin = subprocess.PIPE, + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) + + # Wait for finish of copy inside the loop: enforce single tread + # copy + (stdoutdata, stderrdata) = copy_process.communicate() + + exit_status = copy_process.returncode # if copy failed log the missing file and update the skip fields if exit_status != 0: input_item.skip = True - copied_item.skip = True + processed_item.skip = True self.logger.warning( "Failed loading file: {0}".format(input_item.file)) self.logger.warning(stderrdata) self.logger.debug(stdoutdata) - return copied_ms_map + return processed_ms_map def _dppp_call(self, working_dir, ndppp, cmd, environment): @@ -204,15 +217,34 @@ class imager_prepare(LOFARnodeTCP): Muckable function running the dppp executable. Wraps dppp with catchLog4CPLus and catch_segfaults """ + # TODO: cpu limited is static at this location + environment['OMP_NUM_THREADS'] = str(8) + self.logger.debug("Using %s threads for ndppp" % 8) with CatchLog4CPlus(working_dir, self.logger.name + "." + os.path.basename("imager_prepare_ndppp"), os.path.basename(ndppp)) as logger: catch_segfaults(cmd, working_dir, environment, logger, cleanup = None, usageStats=self.resourceMonitor) + def _get_nchan_from_ms(self, file): + """ + Wrapper for pt call to retrieve the number of channels in a ms + + Uses Pyrap functionality throws 'random' exceptions. + """ + + # open the datasetassume same nchan for all sb + table = pt.table(file) # + + # get the data column, get description, get the + # shape, first index returns the number of channels + nchan = str(pt.tablecolumn(table, 'DATA').getdesc()["shape"][0]) + + return nchan + def _run_dppp(self, working_dir, time_slice_dir_path, slices_per_image, - copied_ms_map, subbands_per_image, collected_ms_dir_name, parset, - ndppp): + processed_ms_map, subbands_per_image, collected_ms_dir_name, + parset, ndppp): """ Run NDPPP: Create dir for grouped measurements, assure clean workspace @@ -223,27 +255,59 @@ class imager_prepare(LOFARnodeTCP): for idx_time_slice in range(slices_per_image): start_slice_range = idx_time_slice * subbands_per_image end_slice_range = (idx_time_slice + 1) * subbands_per_image - # Get the subset of ms that are part of the current timeslice, - # cast to datamap - input_map_subgroup = DataMap( - copied_ms_map[start_slice_range:end_slice_range]) - output_ms_name = "time_slice_{0}.dppp.ms".format(idx_time_slice) # construct time slice name time_slice_path = os.path.join(time_slice_dir_path, output_ms_name) - # convert the datamap to a file list: Do not remove skipped files: - # ndppp needs the incorrect files there to allow filling with zeros - ndppp_input_ms = [item.file for item in input_map_subgroup] - - # Join into a single list of paths. + # convert the datamap to a file list: Add nonfalid entry for + # skipped files: ndppp needs the incorrect files there to allow + # filling with zeros + ndppp_input_ms = [] + nchan_known = False + + for item in processed_ms_map[start_slice_range:end_slice_range]: + if item.skip: + ndppp_input_ms.append("SKIPPEDSUBBAND") + else: + # From the first non skipped filed get the nchan + if not nchan_known: + try: + # We want toAutomatically average the number + # of channels in the output to 1, get the current + # nr of channels + nchan_input = self._get_nchan_from_ms(item.file) + nchan_known = True + + # corrupt input measurement set + except Exception, e: + self.logger.warn(str(e)) + item.skip = True + ndppp_input_ms.append("SKIPPEDSUBBAND") + continue + + ndppp_input_ms.append(item.file) + + # if none of the input files was valid, skip the creation of the + # timeslice all together, it will not show up in the timeslice + # mapfile + if not nchan_known: + continue + + # TODO/FIXME: dependency on the step name!!!! + ndppp_nchan_key = "avg1.freqstep" + + # Join into a single string list of paths. msin = "['{0}']".format("', '".join(ndppp_input_ms)) + # Update the parset with computed parameters patch_dictionary = {'uselogger': 'True', # enables log4cplus 'msin': msin, - 'msout': time_slice_path} + 'msout': time_slice_path, + ndppp_nchan_key:nchan_input} + + nddd_parset_path = time_slice_path + ".ndppp.par" try: temp_parset_filename = patch_parset(parset, patch_dictionary) @@ -278,11 +342,10 @@ class imager_prepare(LOFARnodeTCP): time_slice_path_list.append(time_slice_path) # On error the current timeslice should be skipped - except subprocess.CalledProcessError, exception: - self.logger.warning(str(exception)) - continue - + # and the input ms should have the skip set except Exception, exception: + for item in processed_ms_map[start_slice_range:end_slice_range]: + item.skip = True self.logger.warning(str(exception)) continue @@ -300,138 +363,6 @@ class imager_prepare(LOFARnodeTCP): "mentset: {1}".format( ", ".join(group_measurements_collected), output_file_path)) - def _run_rficonsole(self, rficonsole_executable, time_slice_dir, - time_slices): - """ - _run_rficonsole runs the rficonsole application on the supplied - timeslices in time_slices. - - """ - - # loop all measurement sets - rfi_temp_dir = os.path.join(time_slice_dir, "rfi_temp_dir") - create_directory(rfi_temp_dir) - - try: - rfi_console_proc_group = SubProcessGroup(self.logger, - self.resourceMonitor) - for time_slice in time_slices: - # Each rfi console needs own working space for temp files - temp_slice_path = os.path.join(rfi_temp_dir, - os.path.basename(time_slice)) - create_directory(temp_slice_path) - - # construct copy command - self.logger.info(time_slice) - command = [rficonsole_executable, "-indirect-read", - time_slice] - self.logger.info("executing rficonsole command: {0}".format( - " ".join(command))) - - # Add the command to the process group - rfi_console_proc_group.run(command, cwd = temp_slice_path) - - - # wait for all to finish - if rfi_console_proc_group.wait_for_finish() != None: - raise Exception("an rfi_console_proc_group run failed!") - - finally: - shutil.rmtree(rfi_temp_dir) - - def _filter_bad_stations(self, time_slice_path_list, - asciistat_executable, statplot_executable, msselect_executable): - """ - A Collection of scripts for finding and filtering of bad stations: - - 1. First a number of statistics with regards to the spread of the data - is collected using the asciistat_executable. - 2. Secondly these statistics are consumed by the statplot_executable - which produces a set of bad stations. - 3. In the final step the bad stations are removed from the dataset - using ms select - - REF: http://www.lofar.org/wiki/lib/exe/fetch.php?media=msss:pandeymartinez-week9-v1p2.pdf - """ - # run asciistat to collect statistics about the ms - self.logger.info("Filtering bad stations") - self.logger.debug("Collecting statistical properties of input data") - asciistat_output = [] - asciistat_proc_group = SubProcessGroup(self.logger) - for ms in time_slice_path_list: - output_dir = ms + ".filter_temp" - create_directory(output_dir) - asciistat_output.append((ms, output_dir)) - - cmd_string = "{0} -i {1} -r {2}".format(asciistat_executable, - ms, output_dir) - asciistat_proc_group.run(cmd_string) - - if asciistat_proc_group.wait_for_finish() != None: - raise Exception("an ASCIIStats run failed!") - - # Determine the station to remove - self.logger.debug("Select bad stations depending on collected stats") - asciiplot_output = [] - asciiplot_proc_group = SubProcessGroup(self.logger) - for (ms, output_dir) in asciistat_output: - ms_stats = os.path.join( - output_dir, os.path.split(ms)[1] + ".stats") - - cmd_string = "{0} -i {1} -o {2}".format(statplot_executable, - ms_stats, ms_stats) - asciiplot_output.append((ms, ms_stats)) - asciiplot_proc_group.run(cmd_string) - - if asciiplot_proc_group.wait_for_finish() != None: - raise Exception("an ASCIIplot run failed!") - - # remove the bad stations - self.logger.debug("Use ms select to remove bad stations") - msselect_output = {} - msselect_proc_group = SubProcessGroup(self.logger) - for ms, ms_stats in asciiplot_output: - # parse the .tab file containing the bad stations - station_to_filter = [] - file_pointer = open(ms_stats + ".tab") - - for line in file_pointer.readlines(): - # skip headed line - if line[0] == "#": - continue - - entries = line.split() - # if the current station is bad (the last entry on the line) - if entries[-1] == "True": - # add the name of station - station_to_filter.append(entries[1]) - - # if this measurement does not contain baselines to skip do not - # filter and provide the original ms as output - if len(station_to_filter) == 0: - msselect_output[ms] = ms - continue - - ms_output_path = ms + ".filtered" - msselect_output[ms] = ms_output_path - - # use msselect to remove the stations from the ms - msselect_baseline = "!{0}".format(",".join(station_to_filter)) - cmd_string = "{0} in={1} out={2} baseline={3} deep={4}".format( - msselect_executable, ms, ms_output_path, - msselect_baseline, "False") - msselect_proc_group.run(cmd_string) - - if msselect_proc_group.wait_for_finish() != None: - raise Exception("an MSselect run failed!") - - filtered_list_of_ms = [] - # The order of the inputs needs to be preserved when producing the - # filtered output! - for input_ms in time_slice_path_list: - filtered_list_of_ms.append(msselect_output[input_ms]) - - return filtered_list_of_ms if __name__ == "__main__": diff --git a/CEP/Pipeline/recipes/sip/nodes/selfcal_awimager.py b/CEP/Pipeline/recipes/sip/nodes/selfcal_awimager.py new file mode 100644 index 0000000000000000000000000000000000000000..db7267f7c3f03daac89d6ca2fe0ea83969834741 --- /dev/null +++ b/CEP/Pipeline/recipes/sip/nodes/selfcal_awimager.py @@ -0,0 +1,825 @@ +# LOFAR AUTOMATIC IMAGING PIPELINE +# awimager +# The awimager recipe creates based an image of the field of view. Based on +# nine concatenated and measurementsets each spanning 10 subbands +# The recipe contains two parts: The call to awimager +# and secondairy some functionality that calculates settings (for awimager) +# based on the information present in the measurement set +# The calculated parameters are: +# 1: The cellsize +# 2: The npixels in a each of the two dimension of the image +# 3. What columns use to determine the maximum baseline +# 4. The number of projection planes +# Wouter Klijn 2012 +# klijn@astron.nl +# Nicolas Vilchez, 2014 +# vilchez@astron.nl +# ----------------------------------------------------------------------------- +from __future__ import with_statement +import sys +import shutil +import os.path +import math +import pyfits + +from lofarpipe.support.lofarnode import LOFARnodeTCP +from lofarpipe.support.pipelinelogging import CatchLog4CPlus +from lofarpipe.support.pipelinelogging import log_time +from lofarpipe.support.utilities import patch_parset +from lofarpipe.support.utilities import get_parset +from lofarpipe.support.utilities import catch_segfaults +from lofarpipe.support.lofarexceptions import PipelineException +import pyrap.tables as pt # @UnresolvedImport +from subprocess import CalledProcessError +from lofarpipe.support.utilities import create_directory +import pyrap.images as pim # @UnresolvedImport +from lofarpipe.support.parset import Parset +import lofar.parmdb # @UnresolvedImport +import numpy as np + + +class selfcal_awimager(LOFARnodeTCP): + def run(self, executable, environment, parset, working_directory, + output_image, concatenated_measurement_set, sourcedb_path, + mask_patch_size, autogenerate_parameters, specify_fov, fov, + major_cycle, nr_cycles, perform_self_cal): + """ + :param executable: Path to awimager executable + :param environment: environment for catch_segfaults (executable runner) + :param parset: parameters for the awimager, + :param working_directory: directory the place temporary files + :param output_image: location and filesname to story the output images + the multiple images are appended with type extentions + :param concatenated_measurement_set: Input measurement set + :param sourcedb_path: Path the the sourcedb used to create the image + mask + :param mask_patch_size: Scaling of the patch around the source in the + mask + :param autogenerate_parameters: Turns on the autogeneration of: + cellsize, npix, wprojplanes, wmax, fov + :param fov: if autogenerate_parameters is false calculate + imageparameter (cellsize, npix, wprojplanes, wmax) relative to this + fov + :param major_cycle: number of the self calibration cycle to determine + the imaging parameters: cellsize, npix, wprojplanes, wmax, fov + :param nr_cycles: The requested number of self cal cycles + :param perform_self_cal: Bool used to control the selfcal functionality + or the old semi-automatic functionality + :rtype: self.outputs["image"] The path to the output image + """ + self.logger.info("Start selfcal_awimager node run:") + log4_cplus_name = "selfcal_awimager" + self.environment.update(environment) + + with log_time(self.logger): + # Read the parameters as specified in the parset + parset_object = get_parset(parset) + + # ************************************************************* + # 1. Calculate awimager parameters that depend on measurement set + # and the parset + if perform_self_cal: + # Calculate awimager parameters that depend on measurement set + # and the parset + self.logger.info( + "Calculating selfcalibration parameters ") + cell_size, npix, w_max, w_proj_planes, \ + UVmin, UVmax, robust, threshold =\ + self._get_selfcal_parameters( + concatenated_measurement_set, + parset, major_cycle, nr_cycles) + + self._save_selfcal_info(concatenated_measurement_set, + major_cycle, npix, UVmin, UVmax) + + else: + self.logger.info( + "Calculating parameters.. ( NOT selfcalibration)") + cell_size, npix, w_max, w_proj_planes = \ + self._get_imaging_parameters( + concatenated_measurement_set, + parset, + autogenerate_parameters, + specify_fov, + fov) + + self.logger.info("Using autogenerated parameters; ") + self.logger.info( + "Calculated parameters: cell_size: {0}, npix: {1}".format( + cell_size, npix)) + + self.logger.info("w_max: {0}, w_proj_planes: {1} ".format( + w_max, w_proj_planes)) + + # **************************************************************** + # 2. Get the target image location from the mapfile for the parset. + # Create target dir if it not exists + image_path_head = os.path.dirname(output_image) + create_directory(image_path_head) + self.logger.debug("Created directory to place awimager output" + " files: {0}".format(image_path_head)) + + # **************************************************************** + # 3. Create the mask + #mask_file_path = self._create_mask(npix, cell_size, output_image, + # concatenated_measurement_set, executable, + # working_directory, log4_cplus_name, sourcedb_path, + # mask_patch_size, image_path_head) + # ***************************************************************** + # 4. Update the parset with calculated parameters, and output image + patch_dictionary = {'uselogger': 'True', # enables log4cpluscd log + 'ms': str(concatenated_measurement_set), + 'cellsize': str(cell_size), + 'npix': str(npix), + 'wmax': str(w_max), + 'wprojplanes': str(w_proj_planes), + 'image': str(output_image), + 'maxsupport': str(npix) + # 'mask':str(mask_file_path), #TODO REINTRODUCE + # MASK, excluded to speed up in this debug stage + } + + # Add some aditional keys from the self calibration method + if perform_self_cal: + self_cal_patch_dict = { + 'weight': 'briggs', + 'padding': str(1.18), + 'niter' : str(1000000), + 'operation' : 'mfclark', + 'timewindow' : '300', + 'fits' : '', + 'threshold' : str(threshold), + 'robust' : str(robust), + 'UVmin' : str(UVmin), + 'UVmax' : str(UVmax), + 'maxbaseline' : str(10000000), + 'select' : str("sumsqr(UVW[:2])<1e12"), + } + patch_dictionary.update(self_cal_patch_dict) + + # save the parset at the target dir for the image + calculated_parset_path = os.path.join(image_path_head, + "parset.par") + + try: + temp_parset_filename = patch_parset(parset, patch_dictionary) + # Copy tmp file to the final location + shutil.copyfile(temp_parset_filename, calculated_parset_path) + self.logger.debug("Wrote parset for awimager run: {0}".format( + calculated_parset_path)) + finally: + # remove temp file + os.remove(temp_parset_filename) + + # ***************************************************************** + # 5. Run the awimager with the parameterset + + # TODO: FIXME: manually Limit number of threads used. + omp_num_threads = 8 + self.environment['OMP_NUM_THREADS'] = str(omp_num_threads) + self.logger.debug("Using %s threads for swimager" % omp_num_threads) + + cmd = [executable, calculated_parset_path] + self.logger.debug("Parset used for awimager run:") + self.logger.debug(cmd) + try: + with CatchLog4CPlus(working_directory, + self.logger.name + "." + + os.path.basename(log4_cplus_name), + os.path.basename(executable) + ) as logger: + catch_segfaults(cmd, working_directory, self.environment, + logger, usageStats=self.resourceMonitor) + + # Thrown by catch_segfault + except CalledProcessError, exception: + self.logger.error(str(exception)) + return 1 + + except Exception, exception: + self.logger.error(str(exception)) + return 1 + + # ********************************************************************* + # 6. Return output + # Append static .restored: This might change but prob. not + # The actual output image has this extention always, default of + # awimager + self.outputs["image"] = output_image + ".restored" + return 0 + + def _get_imaging_parameters(self, measurement_set, parset, + autogenerate_parameters, specify_fov, fov): + """ + (1) calculate and format some parameters that are determined runtime. + Based on values in the measurementset and input parameter (set): + + a. <string> The cellsize + b. <int> The npixels in a each of the two dimension of the image + c. <string> The largest baseline in the ms smaller then the maxbaseline + d. <string> The number of projection planes + + The calculation of these parameters is done in three steps: + + 1. Calculate intermediate results based on the ms. + 2. The calculation of the actual target values using intermediate + result + """ + # ********************************************************************* + # 1. Get partial solutions from the parameter set + # Get the parset and a number of raw parameters from this parset + parset_object = get_parset(parset) + baseline_limit = parset_object.getInt('maxbaseline') + + # Get the longest baseline + max_baseline = pt.taql( + 'CALC sqrt(max([select sumsqr(UVW[:2]) from ' + \ + '{0} where sumsqr(UVW[:2]) <{1} giving as memory]))'.format(\ + measurement_set, baseline_limit * + baseline_limit))[0] # ask ger van diepen for details if ness. + # Calculate the wave_length + table_ms = pt.table(measurement_set) + table_spectral_window = pt.table( + table_ms.getkeyword("SPECTRAL_WINDOW")) + freq = table_spectral_window.getcell("REF_FREQUENCY", 0) + + table_spectral_window.close() + wave_length = pt.taql('CALC C()') / freq + wave_length = wave_length[0] + + # Calculate the cell_size from the ms + arc_sec_in_degree = 3600 + arc_sec_in_rad = (180.0 / math.pi) * arc_sec_in_degree + cell_size = (1.0 / 3) * (wave_length / float(max_baseline))\ + * arc_sec_in_rad + + # Calculate the number of pixels in x and y dim + # fov and diameter depending on the antenna name + fov_from_ms, station_diameter = self._get_fov_and_station_diameter( + measurement_set) + + # use fov for to calculate a semi 'user' specified npix and cellsize + # The npix thus depends on the ms cellsize and fov + # Do not use use supplied Fov if autogenerating + if not autogenerate_parameters and specify_fov: + if fov == 0.0: + raise PipelineException("fov set to 0.0: invalid value.") + + # else use full resolution (calculate the fov) + else: + self.logger.info("Using fov calculated on measurement data: " + + str(fov_from_ms)) + fov = fov_from_ms + + # ******************************************************************** + # 2. Calculate the ms based output variables + # 'optimal' npix based on measurement set calculations or user specified + npix = (arc_sec_in_degree * fov) / cell_size + + # Get the closest power of two larger then the calculated pixel size + npix = self._nearest_ceiled_power2(npix) + + # Get the max w with baseline < 10000 + w_max = pt.taql('CALC max([select UVW[2] from ' + \ + '{0} where sumsqr(UVW[:2]) <{1} giving as memory])'.format( + measurement_set, baseline_limit * baseline_limit))[0] + + # Calculate number of projection planes + w_proj_planes = min(257, math.floor((max_baseline * wave_length) / + (station_diameter ** 2))) + w_proj_planes = int(round(w_proj_planes)) + + # MAximum number of proj planes set to 1024: George Heald, Ger van + # Diepen if this exception occurs + maxsupport = max(1024, npix) + if w_proj_planes > maxsupport: + raise Exception("The number of projections planes for the current" + + "measurement set is to large.") + + # ********************************************************************* + # 3. if the npix from the parset is different to the ms calculations, + # calculate a sizeconverter value (to be applied to the cellsize) + if npix < 256: + self.logger.warn("Using a image size smaller then 256x256:" + " This leads to problematic imaging in some instances!!") + + # If we are not autocalculating based on ms or fov, use the npix + # and cell_size specified in the parset + # keep the wmax and w_proj_planes + if (not autogenerate_parameters and not specify_fov): + npix = parset_object.getString('npix') + cell_size_formatted = parset_object.getString('cellsize') + else: + cell_size_formatted = str( + int(round(cell_size))) + 'arcsec' + + self.logger.info("Using the following awimager parameters:" + " cell_size: {0}, npix: {1},".format( + cell_size_formatted, npix) + + " w_max: {0}, w_proj_planes: {1}".format(w_max, w_proj_planes)) + + return cell_size_formatted, str(npix), str(w_max), str(w_proj_planes) + + + # Awimager parameters for selfcal process (depends with major cycle) + # nicolas: THis function needs a lot more documentation: + # THis is the function that does the magic. + # For each step everything must be cristal clear what is happening. + # I will need to support this function + # this function is full with magic numbers + # Instead of: + # variable_a = 3.14 * 3600 * 5 + # use: + # pi = math.pi + # second_in_hour = 3600 + # factorx = 5 # Factor controlling x, value based on manual optimalization + def _get_selfcal_parameters(self, measurement_set, parset, major_cycle, + nr_cycles): + """ + 0. modify the nof cycle to have a final step at the same resolution + as the previous last cycle + 1. Determine target coordinates especially declinaison, because + for low dec (<35 deg) UVmin = 0.1 to excluse very short baseline + 2. Determine the frequency and the wavelenght + 3. Determine the longuest baseline and the best resolution avaible + 4. Estimate all imaging parameters + 5. Calculate number of projection planes + 6. Pixelsize must be a string number : number +arcsec + + # Nicolas Vilchez, 2014 + # vilchez@astron.nl + """ + + + # ******************************************************************** + #0. modify the nof cycle to have a final step at the same resolution + #as the previous last cycle + + if major_cycle < nr_cycles-1: + nr_cycles = nr_cycles-1 + + scaling_factor = float(major_cycle) / float(nr_cycles - 1) + + # ******************************************************************** + #1. Determine Target coordinates for UVmin + tabtarget = pt.table(measurement_set) + tabfield = pt.table(tabtarget.getkeyword('FIELD')) + coords = tabfield.getcell('REFERENCE_DIR',0) + target = coords[0] * 180.0 / math.pi # Why + + UVmin=0 + if target[1] <= 35: # WHy? + UVmin = 0.1 + + ra_target = target[0] + 360.0 # Why + dec_target = target[1] + + # ******************************************************************** + # 2. Determine the frequency and the wavelenght + tabfreq = pt.table(measurement_set) + table_spectral_window = pt.table(tabfreq.getkeyword("SPECTRAL_WINDOW")) + frequency = table_spectral_window.getcell('REF_FREQUENCY', 0) + + wavelenght = 3.0E8 / frequency # Why + + # ******************************************************************** + # 3. Determine the longuest baseline and the best resolution avaible + + tabbaseline = pt.table(measurement_set, readonly=False, ack=True) + posbaseline = tabbaseline.getcol('UVW') + maxBaseline = max(posbaseline[:, 0] ** 2 + + posbaseline[:, 1] ** 2) ** 0.5 + + bestBeamresol = round((wavelenght / maxBaseline) * + (180.0 / math.pi) * 3600.0, 0) + + # Beam resolution limitation to 10arcsec to avoid too large images + if bestBeamresol < 10.0: + bestBeamresol = 10.0 + + # ******************************************************************** + # 4. Estimate all imaging parameters + + # estimate fov + # fov = 5 degree, except for High HBA Observation => 1.5 degree + if frequency > 1.9E8: + fov = 1.5 + else: + fov = 5.0 + + # we need 4 pixel/beam to have enough sampling + pixPerBeam = 4.0 + + # best resolution pixel size (i.e final pixel size for selfcal) + bestPixelResol = round(bestBeamresol / pixPerBeam, 2) + + # factor to estimate the starting resolution (9 times in this case) + badResolFactor = 9 + + pixsize = round((badResolFactor * bestPixelResol) - + (badResolFactor * bestPixelResol - bestPixelResol) * + scaling_factor , 3) + + + # number of pixel must be a multiple of 2 !! + nbpixel = int(fov * 3600.0 / pixsize) + if nbpixel % 2 ==1: + nbpixel = nbpixel + 1 + + robust = 0 #round(1.0 - (3.0 * scaling_factor), 2) + + UVmax = round((wavelenght) / + (pixPerBeam * pixsize / 3600.0 * math.pi / 180.0 ) / + (1E3 * wavelenght), 3) + + wmax = round(UVmax * (wavelenght) * 1E3, 3) + + # ******************************************************************** + # 5. Calculate number of projection planes + # Need to compute station diameter (the fov is fixed to 5 degree) + # using wouter's function, to compute the w_proj_planes + # fov and diameter depending on the antenna name + fov_from_ms, station_diameter = self._get_fov_and_station_diameter( + measurement_set) + + w_proj_planes = min(257, math.floor((maxBaseline * wavelenght) / + (station_diameter ** 2))) + w_proj_planes = int(round(w_proj_planes)) + + # MAximum number of proj planes set to 1024: George Heald, Ger van + # Diepen if this exception occurs + maxsupport = max(1024, nbpixel) + if w_proj_planes > maxsupport: + raise Exception("The number of projections planes for the current" + + "measurement set is to large.") + + # Warnings on pixel size + if nbpixel < 256: + self.logger.warn("Using a image size smaller then 256x256: This " + + "leads to problematic imaging in some instances!!") + + + # ******************************************************************** + # 6. Pixelsize must be a string number : number +arcsec + # conversion at this step + pixsize = str(pixsize)+'arcsec' + + # ******************************************************************** + # 7. Threshold determination from the previous cycle + if major_cycle == 0: + threshold = '0.075Jy' + else: + fits_image_path_list = measurement_set.split('concat.ms') + fits_image_path = fits_image_path_list[0] +\ + 'awimage_cycle_%s/image.fits'%(major_cycle-1) + + + # open a FITS file + fitsImage = pyfits.open(fits_image_path) + scidata = fitsImage[0].data + + dataRange = range(fitsImage[0].shape[2]) + sortedData = range(fitsImage[0].shape[2] ** 2) + + # FIXME We have the sneaking suspicion that this takes very long + # due to bad coding style... (double for loop with compute in inner loop) + for i in dataRange: + for j in dataRange: + sortedData[i * fitsImage[0].shape[2] + j] = scidata[0,0,i,j] + + sortedData = sorted(sortedData) + + # Percent of faintest data to use to determine 5sigma value : use 5% + dataPercent = int(fitsImage[0].shape[2] * 0.05) + + fiveSigmaData = sum(sortedData[0:dataPercent]) / dataPercent + threshold = (abs(fiveSigmaData) / 5.0) * (2.335 / 2.0) * 15 + + return pixsize, str(nbpixel), str(wmax), str(w_proj_planes), \ + str(UVmin), str(UVmax), str(robust), str(threshold) + + + + def _get_fov_and_station_diameter(self, measurement_set): + """ + _field_of_view calculates the fov, which is dependend on the + station type, location and mode: + For details see: + (1) http://www.astron.nl/radio-observatory/astronomers/lofar-imaging-capabilities-sensitivity/lofar-imaging-capabilities/lofar + + """ + # Open the ms + table_ms = pt.table(measurement_set) + + # Get antenna name and observation mode + antenna = pt.table(table_ms.getkeyword("ANTENNA")) + antenna_name = antenna.getcell('NAME', 0) + antenna.close() + + observation = pt.table(table_ms.getkeyword("OBSERVATION")) + antenna_set = observation.getcell('LOFAR_ANTENNA_SET', 0) + observation.close() + + # static parameters for the station diameters ref (1) + hba_core_diameter = 30.8 + hba_remote_diameter = 41.1 + lba_inner = 32.3 + lba_outer = 81.3 + + # use measurement set information to assertain antenna diameter + station_diameter = None + if antenna_name.count('HBA'): + if antenna_name.count('CS'): + station_diameter = hba_core_diameter + elif antenna_name.count('RS'): + station_diameter = hba_remote_diameter + elif antenna_name.count('LBA'): + if antenna_set.count('INNER'): + station_diameter = lba_inner + elif antenna_set.count('OUTER'): + station_diameter = lba_outer + + # raise exception if the antenna is not of a supported type + if station_diameter == None: + self.logger.error( + 'Unknown antenna type for antenna: {0} , {1}'.format(\ + antenna_name, antenna_set)) + raise PipelineException( + "Unknown antenna type encountered in Measurement set") + + # Get the wavelength + spectral_window_table = pt.table(table_ms.getkeyword( + "SPECTRAL_WINDOW")) + freq = float(spectral_window_table.getcell("REF_FREQUENCY", 0)) + wave_length = pt.taql('CALC C()') / freq + spectral_window_table.close() + + # Now calculate the FOV see ref (1) + # alpha_one is a magic parameter: The value 1.3 is representative for a + # WSRT dish, where it depends on the dish illumination + alpha_one = 1.3 + + # alpha_one is in radians so transform to degrees for output + fwhm = alpha_one * (wave_length / station_diameter) * (180 / math.pi) + fov = fwhm / 2.0 + table_ms.close() + + return fov, station_diameter + + def _create_mask(self, npix, cell_size, output_image, + concatenated_measurement_set, executable, + working_directory, log4_cplus_name, sourcedb_path, + mask_patch_size, image_path_directory): + """ + (3) create a casa image containing an mask blocking out the + sources in the provided sourcedb. + + It expects: + + a. the ms for which the mask will be created, it is used to de + termine some image details: (eg. pointing) + b. parameters for running within the catchsegfault framework + c. and the size of the mask_pach. + To create a mask, first a empty measurement set is created using + awimager: ready to be filled with mask data + + This function is a wrapper around some functionality written by: + fdg@mpa-garching.mpg.de + + steps: + 1. Create a parset with image paramters used by: + 2. awimager run. Creating an empty casa image. + 3. Fill the casa image with mask data + + """ + # ******************************************************************** + # 1. Create the parset used to make a mask + mask_file_path = output_image + ".mask" + + mask_patch_dictionary = {"npix": str(npix), + "cellsize": str(cell_size), + "image": str(mask_file_path), + "ms": str(concatenated_measurement_set), + "operation": "empty", + "stokes": "'I'" + } + mask_parset = Parset.fromDict(mask_patch_dictionary) + mask_parset_path = os.path.join(image_path_directory, "mask.par") + mask_parset.writeFile(mask_parset_path) + self.logger.debug( + "Write parset for awimager mask creation: {0}".format( + mask_parset_path)) + + # ********************************************************************* + # 2. Create an empty mask using awimager + cmd = [executable, mask_parset_path] + self.logger.info(" ".join(cmd)) + try: + with CatchLog4CPlus(working_directory, + self.logger.name + "." + os.path.basename(log4_cplus_name), + os.path.basename(executable) + ) as logger: + catch_segfaults(cmd, working_directory, self.environment, + logger) + # Thrown by catch_segfault + except CalledProcessError, exception: + self.logger.error(str(exception)) + return 1 + except Exception, exception: + self.logger.error(str(exception)) + return 1 + + # ******************************************************************** + # 3. create the actual mask + self.logger.debug("Started mask creation using mask_patch_size:" + " {0}".format(mask_patch_size)) + + self._msss_mask(mask_file_path, sourcedb_path, mask_patch_size) + self.logger.debug("Fished mask creation") + return mask_file_path + + def _msss_mask(self, mask_file_path, sourcedb_path, mask_patch_size = 1.0): + """ + Fill casa image with a mask based on skymodel(sourcedb) + Bugs: fdg@mpa-garching.mpg.de + + pipeline implementation klijn@astron.nl + version 0.32 + + Edited by JDS, 2012-03-16: + - Properly convert maj/minor axes to half length + - Handle empty fields in sky model by setting them to 0 + - Fix off-by-one error at mask boundary + + FIXED BUG + - if a source is outside the mask, the script ignores it + - if a source is on the border, the script draws only the inner part + - can handle skymodels with different headers + + KNOWN BUG + - not works with single line skymodels, workaround: add a fake + source outside the field + - mask patched display large amounts of aliasing. A possible + sollution would + be normalizing to pixel centre. ( int(normalize_x * npix) / + npix + (0.5 /npix)) + ideally the patch would increment in pixel radiuses + + Version 0.3 (Wouter Klijn, klijn@astron.nl) + - Usage of sourcedb instead of txt document as 'source' of sources + This allows input from different source sources + Version 0.31 (Wouter Klijn, klijn@astron.nl) + - Adaptable patch size (patch size needs specification) + - Patch size and geometry is broken: needs some astronomer magic to + fix it, problem with afine transformation prol. + Version 0.32 (Wouter Klijn, klijn@astron.nl) + - Renaming of variable names to python convention + """ + # increment in maj/minor axes [arcsec] + pad = 500. + + # open mask + mask = pim.image(mask_file_path, overwrite = True) + mask_data = mask.getdata() + xlen, ylen = mask.shape()[2:] + freq, stokes, null, null = mask.toworld([0, 0, 0, 0]) + + # Open the sourcedb: + table = pt.table(sourcedb_path + "::SOURCES") + pdb = lofar.parmdb.parmdb(sourcedb_path) + + # Get the data of interest + source_list = table.getcol("SOURCENAME") + source_type_list = table.getcol("SOURCETYPE") + # All date in the format valuetype:sourcename + all_values_dict = pdb.getDefValues() + + # Loop the sources + for source, source_type in zip(source_list, source_type_list): + if source_type == 1: + type_string = "Gaussian" + else: + type_string = "Point" + self.logger.info("processing: {0} ({1})".format(source, + type_string)) + + # Get de right_ascension and declination (already in radians) + right_ascension = all_values_dict["Ra:" + source][0, 0] + declination = all_values_dict["Dec:" + source][0, 0] + if source_type == 1: + # Get the raw values from the db + maj_raw = all_values_dict["MajorAxis:" + source][0, 0] + min_raw = all_values_dict["MinorAxis:" + source][0, 0] + pa_raw = all_values_dict["Orientation:" + source][0, 0] + # convert to radians (conversion is copy paste JDS) + # major radius (+pad) in rad + maj = (((maj_raw + pad)) / 3600.) * np.pi / 180. + # minor radius (+pad) in rad + minor = (((min_raw + pad)) / 3600.) * np.pi / 180. + pix_asc = pa_raw * np.pi / 180. + # wenss writes always 'GAUSSIAN' even for point sources + # -> set to wenss beam+pad + if maj == 0 or minor == 0: + maj = ((54. + pad) / 3600.) * np.pi / 180. + minor = ((54. + pad) / 3600.) * np.pi / 180. + # set to wenss beam+pad + elif source_type == 0: + maj = (((54. + pad) / 2.) / 3600.) * np.pi / 180. + minor = (((54. + pad) / 2.) / 3600.) * np.pi / 180. + pix_asc = 0. + else: + self.logger.info( + "WARNING: unknown source source_type ({0})," + "ignoring: ".format(source_type)) + continue + + # define a small square around the source to look for it + null, null, border_y1, border_x1 = mask.topixel( + [freq, stokes, declination - maj, + right_ascension - maj / np.cos(declination - maj)]) + null, null, border_y2, border_x2 = mask.topixel( + [freq, stokes, declination + maj, + right_ascension + maj / np.cos(declination + maj)]) + xmin = np.int(np.floor(np.min([border_x1, border_x2]))) + xmax = np.int(np.ceil(np.max([border_x1, border_x2]))) + ymin = np.int(np.floor(np.min([border_y1, border_y2]))) + ymax = np.int(np.ceil(np.max([border_y1, border_y2]))) + + if xmin > xlen or ymin > ylen or xmax < 0 or ymax < 0: + self.logger.info( + "WARNING: source {0} falls outside the mask," + " ignoring: ".format(source)) + continue + + if xmax > xlen or ymax > ylen or xmin < 0 or ymin < 0: + self.logger.info( + "WARNING: source {0} falls across map edge".format(source)) + + for pixel_x in xrange(xmin, xmax): + for pixel_y in xrange(ymin, ymax): + # skip pixels outside the mask field + if pixel_x >= xlen or pixel_y >= ylen or\ + pixel_x < 0 or pixel_y < 0: + continue + # get pixel right_ascension and declination in rad + null, null, pix_dec, pix_ra = mask.toworld( + [0, 0, pixel_y, pixel_x]) + # Translate and rotate coords. + translated_pixel_x = (pix_ra - right_ascension) * np.sin( + pix_asc) + (pix_dec - declination) * np.cos(pix_asc) + # to align with ellipse + translate_pixel_y = -(pix_ra - right_ascension) * np.cos( + pix_asc) + (pix_dec - declination) * np.sin(pix_asc) + if (((translated_pixel_x ** 2) / (maj ** 2)) + + ((translate_pixel_y ** 2) / (minor ** 2))) < \ + mask_patch_size: + mask_data[0, 0, pixel_y, pixel_x] = 1 + null = null + mask.putdata(mask_data) + table.close() + + # some helper functions + def _nearest_ceiled_power2(self, value): + """ + Return int value of the nearest Ceiled power of 2 for the + suplied argument + + """ + return int(pow(2, math.ceil(math.log(value, 2)))) + + + def _save_selfcal_info(self, concatenated_measurement_set, + major_cycle, npix, UVmin, UVmax): + """ + The selfcal team requested meta information to be added to + measurement set that allows the reproduction of intermediate + steps. + """ + self.logger.info("Save-ing selfcal parameters to file:") + meta_file = os.path.join( + concatenated_measurement_set + "_selfcal_information", + "uvcut_and_npix.txt") + self.logger.info(meta_file) + + # check if we have the output file? Add the header + if not os.path.exists(meta_file): + meta_file_pt = open(meta_file, 'w') + meta_file_pt.write("#cycle_nr npix uvmin(klambda) uvmax(klambda)\n") + meta_file_pt.close() + + meta_file_pt = open(meta_file, 'a') + + # Create the actual string with info + meta_info_str = " ".join([str(major_cycle), + str(npix), + str(UVmin), + str(UVmax)]) + meta_file_pt.write(meta_info_str + "\n") + meta_file_pt.close() + + +if __name__ == "__main__": + _JOBID, _JOBHOST, _JOBPORT = sys.argv[1:4] + sys.exit(selfcal_awimager( + _JOBID, _JOBHOST, _JOBPORT).run_with_stored_arguments()) + diff --git a/CEP/Pipeline/recipes/sip/nodes/selfcal_bbs.py b/CEP/Pipeline/recipes/sip/nodes/selfcal_bbs.py new file mode 100644 index 0000000000000000000000000000000000000000..874d4f4d7c449ec11c00e0a0d2d0ab572ca7e365 --- /dev/null +++ b/CEP/Pipeline/recipes/sip/nodes/selfcal_bbs.py @@ -0,0 +1,118 @@ +# LOFAR AUTOMATIC IMAGING PIPELINE +# selfcal_bbs +# Wouter Klijn 2012 +# klijn@astron.nl +# Nicolas Vilchez, 2014 +# vilchez@astron.nl +# ----------------------------------------------------------------------------- +from __future__ import with_statement +import sys +import os + +import pyrap.tables as pt + +from lofarpipe.support.lofarnode import LOFARnodeTCP +from lofarpipe.support.group_data import load_data_map +from lofarpipe.support.subprocessgroup import SubProcessGroup +from lofarpipe.support.data_map import MultiDataMap + +class selfcal_bbs(LOFARnodeTCP): + """ + imager_bbs node performs a bbs run for each of measuremt sets supplied in + the mapfile at ms_list_path. Calibration is done on the sources in + the sourcedb in the mapfile sky_list_path. Solutions are stored in the + parmdb_list_path + + 1. Load the mapfiles + 2. For each measurement set to calibrate start a subprocess + 3. Check if the processes finished correctly + 4. (added by Nicolas vilchez) concat in time the final MS + 5. (added by N.Vilchez) copy time slices directory to a new one + """ + + def run(self, bbs_executable, parset, ms_list_path, parmdb_list_path, + sky_list_path, concat_ms_path, major_cycle): + """ + selfcal_bbs functionality. Called by framework performing all the work + """ + self.logger.debug("Starting selfcal_bbs Node") + # ********************************************************************* + # 1. Load mapfiles + # read in the mapfiles to data maps: The master recipe added the single + # path to a mapfilem which allows usage of default data methods + # (load_data_map) + # TODO: Datamap + ms_map = MultiDataMap.load(ms_list_path) + parmdb_map = MultiDataMap.load(parmdb_list_path) + sky_list = MultiDataMap.load(sky_list_path) + source_db = sky_list[0].file[0] # the sourcedb is the first file entry + + try: + bbs_process_group = SubProcessGroup(self.logger, + self.resourceMonitor) + # ***************************************************************** + # 2. start the bbs executable with data + # The data is located in multimaps. We need the first entry + # TODO: THis is not 'nice' usage of the multimap + for (measurement_set, parmdm) in zip(ms_map[0].file, + parmdb_map[0].file): + command = [ + bbs_executable, + "--sourcedb={0}".format(source_db), + "--parmdb={0}".format(parmdm) , + measurement_set, + parset] + self.logger.info("Executing bbs command: {0}".format(" ".join( + command))) + bbs_process_group.run(command) + + # ***************************************************************** + # 3. check status of the processes + if bbs_process_group.wait_for_finish() != None: + self.logger.error( + "Failed bbs run detected Aborting") + return 1 + + except OSError, exception: + self.logger.error("Failed to execute bbs: {0}".format(str( + exception))) + return 1 + + # ********************************************************************* + # 4. Concat in time after bbs calibration your MSs using + # msconcat (pyrap.tables module) (added by N.Vilchez) + # this step has te be performed on this location. because the bbs run + # might add additional columns not present in the original ms + # and therefore not produced in the concat done in the prepare phase + # redmine issue #6021 + pt.msconcat(ms_map[0].file,concat_ms_path, concatTime=True) + + # ********************************************************************* + # 5. copy time slives directory to a new one + # This is done for debugging purpose: The copy is not used for anything + # The actual selfcal steps are done in place + # (added by N.Vilchez) + # THe save location is created relative to the concat.ms + # we could also use the self.scratch_directory from the toplevel recipe + # this would need an aditional ingredient + # This is a 'debugging' step and should never ever cause a failure of \ + # the pipeline + try: + working_dir = os.path.dirname(concat_ms_path) + time_slice_dir = os.path.join(working_dir, 'time_slices') + time_slice_copy_dir = os.path.join(working_dir, + 'time_slices_cycle_{0}'.format(major_cycle)) + + cmd = "cp -r {0} {1}".format(time_slice_dir, time_slice_copy_dir) + os.system(cmd) + except: + self.logger.warn( + "Debug copy of temporary files failed: continue operations") + pass # Do nothing + + return 0 + + +if __name__ == "__main__": + _JOBID, _JOBHOST, _JOBPORT = sys.argv[1:4] + sys.exit(selfcal_bbs(_JOBID, _JOBHOST, _JOBPORT).run_with_stored_arguments()) diff --git a/CEP/Pipeline/recipes/sip/nodes/selfcal_finalize.py b/CEP/Pipeline/recipes/sip/nodes/selfcal_finalize.py new file mode 100644 index 0000000000000000000000000000000000000000..a76465e9fabf12d79e7a6491d9a4edb2b0d171ed --- /dev/null +++ b/CEP/Pipeline/recipes/sip/nodes/selfcal_finalize.py @@ -0,0 +1,196 @@ +# LOFAR IMAGING PIPELINE +# +# selfcal_finalize +# Wouter Klijn 2012 +# klijn@astron.nl +# ------------------------------------------------------------------------------ +from __future__ import with_statement +import sys +import subprocess +import os +import tempfile +import shutil + +from lofarpipe.support.lofarnode import LOFARnodeTCP +from lofarpipe.support.utilities import log_time, create_directory +import lofar.addImagingInfo as addimg +import pyrap.images as pim +from lofarpipe.support.utilities import catch_segfaults +from lofarpipe.support.data_map import DataMap +from lofarpipe.support.pipelinelogging import CatchLog4CPlus +from lofarpipe.support.subprocessgroup import SubProcessGroup + +import urllib2 +import lofarpipe.recipes.helpers.MultipartPostHandler as mph + +class selfcal_finalize(LOFARnodeTCP): + """ + This script performs the folowing functions: + + 1. Add the image info to the casa image: + addimg.addImagingInfo (imageName, msNames, sourcedbName, minbl, maxbl) + 2. Convert the image to hdf5 and fits image + 3. Filling of the HDF5 root group + 4. Move meta data of selfcal to correct dir in ms + 5. Deepcopy ms to output location + """ + def run(self, awimager_output, ms_per_image, sourcelist, target, + output_image, minbaseline, maxbaseline, processed_ms_dir, + fillrootimagegroup_exec, environment, sourcedb, concat_ms, + correlated_output_location, msselect_executable): + self.environment.update(environment) + """ + :param awimager_output: Path to the casa image produced by awimager + :param ms_per_image: The X (90) measurements set scheduled to + create the image + :param sourcelist: list of sources found in the image + :param target: <unused> + :param minbaseline: Minimum baseline used for the image + :param maxbaseline: largest/maximum baseline used for the image + :param processed_ms_dir: The X (90) measurements set actually used to + create the image + :param fillrootimagegroup_exec: Executable used to add image data to + the hdf5 image + + :rtype: self.outputs['hdf5'] set to "succes" to signal node succes + :rtype: self.outputs['image'] path to the produced hdf5 image + """ + with log_time(self.logger): + ms_per_image_map = DataMap.load(ms_per_image) + + # ***************************************************************** + # 1. add image info + # Get all the files in the processed measurement dir + file_list = os.listdir(processed_ms_dir) + + processed_ms_paths = [] + ms_per_image_map.iterator = DataMap.SkipIterator + for item in ms_per_image_map: + ms_path = item.file + processed_ms_paths.append(ms_path) + + #add the information the image + try: + self.logger.debug("Start addImage Info") + addimg.addImagingInfo(awimager_output, processed_ms_paths, + sourcedb, minbaseline, maxbaseline) + + except Exception, error: + self.logger.warn("addImagingInfo Threw Exception:") + self.logger.warn(error) + # Catch raising of already done error: allows for rerunning + # of the recipe + if "addImagingInfo already done" in str(error): + self.logger.warn("addImagingInfo already done, continue") + pass + else: + raise Exception(error) + #The majority of the tables is updated correctly + + # *************************************************************** + # 2. convert to hdf5 image format + output_directory = None + pim_image = pim.image(awimager_output) + try: + self.logger.info("Saving image in HDF5 Format to: {0}" .format( + output_image)) + # Create the output directory + output_directory = os.path.dirname(output_image) + create_directory(output_directory) + # save the image + pim_image.saveas(output_image, hdf5=True) + + except Exception, error: + self.logger.error( + "Exception raised inside pyrap.images: {0}".format( + str(error))) + raise error + + # Convert to fits + # create target location + fits_output = output_image + ".fits" + # To allow reruns a possible earlier version needs to be removed! + # image2fits fails if not done!! + if os.path.exists(fits_output): + os.unlink(fits_output) + + try: + self.logger.debug("Start convert to fits") + temp_dir = tempfile.mkdtemp() + with CatchLog4CPlus(temp_dir, + self.logger.name + '.' + os.path.basename(awimager_output), + "image2fits") as logger: + catch_segfaults(["image2fits", '-in', awimager_output, + '-out', fits_output], + temp_dir, self.environment, logger) + except Exception, excp: + self.logger.error(str(excp)) + return 1 + finally: + shutil.rmtree(temp_dir) + + # **************************************************************** + # 3. Filling of the HDF5 root group + command = [fillrootimagegroup_exec, output_image] + self.logger.info(" ".join(command)) + #Spawn a subprocess and connect the pipes + proc = subprocess.Popen( + command, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + (stdoutdata, stderrdata) = proc.communicate() + + exit_status = proc.returncode + + + #if copy failed log the missing file + if exit_status != 0: + self.logger.error("Error using the fillRootImageGroup command" + ". Exit status: {0}".format(exit_status)) + self.logger.error(stdoutdata) + self.logger.error(stderrdata) + + return 1 + + # ***************************************************************** + # 4. Move the meta information to the correct directory next to the + # concat.ms + self.logger.info("Save-ing selfcal parameters to file:") + meta_dir = concat_ms + "_selfcal_information" + meta_dir_target = os.path.join(concat_ms, "selfcal_information") + if os.path.exists(meta_dir) and os.path.exists(concat_ms): + self.logger.info("Copy meta information to output measurementset") + + # Clear possible old data, allows for rerun of the pipeline + # if needed. + if os.path.exists(meta_dir_target): + shutil.rmtree(meta_dir_target) + shutil.copytree(meta_dir, meta_dir_target) + + # ***************************************************************** + # 4 Copy the measurement set to the output directory + # use msselect to copy all the data in the measurement sets + + cmd_string = "{0} in={1} out={2} baseline=* deep=True".format( + msselect_executable, concat_ms, correlated_output_location) + msselect_proc_group = SubProcessGroup(self.logger) + msselect_proc_group.run(cmd_string) + if msselect_proc_group.wait_for_finish() != None: + self.logger.error("failed copy of measurmentset to output dir") + raise Exception("an MSselect run failed!") + + self.outputs["hdf5"] = "succes" + self.outputs["image"] = output_image + self.outputs["correlated"] = correlated_output_location + + + return 0 + + +if __name__ == "__main__": + + _JOBID, _JOBHOST, _JOBPORT = sys.argv[1:4] + sys.exit(selfcal_finalize(_JOBID, _JOBHOST, + _JOBPORT).run_with_stored_arguments()) diff --git a/CEP/Pipeline/recipes/sip/tasks.cfg.in b/CEP/Pipeline/recipes/sip/tasks.cfg.in index 9bcac447a895546a6e7f1f0eae8823d3765981a6..1282aa18d15713b2b9309f3c3117b5fc8cf28a64 100644 --- a/CEP/Pipeline/recipes/sip/tasks.cfg.in +++ b/CEP/Pipeline/recipes/sip/tasks.cfg.in @@ -105,3 +105,15 @@ instrument_mapfile = %(runtime_directory)s/%(job_name)s/mapfiles/instrument.mapf sky_mapfile = %(runtime_directory)s/%(job_name)s/mapfiles/sky.mapfile data_mapfile = %(runtime_directory)s/%(job_name)s/mapfiles/bbs.mapfile +[selfcal_awimager] +recipe = selfcal_awimager +executable = %(lofarroot)s/bin/awimager + +[selfcal_bbs] +recipe = selfcal_bbs +bbs_executable = %(lofarroot)s/bin/bbs-reducer + +[selfcal_finalize] +recipe = selfcal_finalize +fillrootimagegroup_exec = %(lofarroot)s/bin/fillRootImageGroup +msselect_executable = %(lofarroot)s/bin/msselect diff --git a/CEP/Pipeline/test/recipes/nodes/imager_prepare_test.py b/CEP/Pipeline/test/recipes/nodes/imager_prepare_test.py index 35e61a823acbc5d50635faae89d9228840efb14b..d0716889161a307c33d068ff5616a5df091992c7 100644 --- a/CEP/Pipeline/test/recipes/nodes/imager_prepare_test.py +++ b/CEP/Pipeline/test/recipes/nodes/imager_prepare_test.py @@ -12,6 +12,7 @@ from lofarpipe.support.utilities import create_directory from lofarpipe.recipes.nodes.imager_prepare import imager_prepare \ as imager_prepare_node from logger import logger +from lofarpipe.support.data_map import DataMap class ImagerPrepareTestWrapper(imager_prepare_node): """ @@ -29,6 +30,9 @@ class ImagerPrepareTestWrapper(imager_prepare_node): def _dppp_call(self, working_dir, ndppp, cmd, environment): self.dppp_call_vars = (working_dir, ndppp, cmd, environment) + def _get_nchan_from_ms(self, file): + return 4 + class ImagerPrepareTest(unittest.TestCase): """ @@ -104,6 +108,11 @@ class ImagerPrepareTest(unittest.TestCase): ("lce072", "test_file_path2"), ("lce072", "test_file_path3"), ("lce072", "test_file_path4")] + + input_datamap = DataMap() + for entry in input_map: + input_datamap.append(entry) + subbands_per_image = 2 collected_ms_dir_name = "" fp = open(os.path.join(self.test_path, "parset"), 'w') @@ -115,7 +124,7 @@ class ImagerPrepareTest(unittest.TestCase): sut = ImagerPrepareTestWrapper() output = sut._run_dppp(working_dir, time_slice_dir_path, slices_per_image, - input_map, subbands_per_image, collected_ms_dir_name, parset, + input_datamap, subbands_per_image, collected_ms_dir_name, parset, ndppp) # The output should contain two timeslices ms prepended with the time_slice_dir_path @@ -127,6 +136,7 @@ class ImagerPrepareTest(unittest.TestCase): # Two parset should be written in the time_slice_dir_path parset_1_content_expected = [('replace', 'uselogger', 'True'), + ('replace', 'avg1.freqstep', '4'), ('replace', 'msin', "['test_file_path1', 'test_file_path2']"), ('replace', 'msout', '{0}'.format( os.path.join(time_slice_dir_path, "time_slice_0.dppp.ms")))] @@ -138,6 +148,7 @@ class ImagerPrepareTest(unittest.TestCase): # Two parset should be written in the time_slice_dir_path parset_2_content_expected = [('replace', 'uselogger', 'True'), + ('replace', 'avg1.freqstep', '4'), ('replace', 'msin', "['test_file_path3', 'test_file_path4']"), ('replace', 'msout', '{0}'.format( os.path.join(time_slice_dir_path, "time_slice_1.dppp.ms")))] diff --git a/CEP/Pipeline/test/regression_tests/long_baseline_pipeline_test.py b/CEP/Pipeline/test/regression_tests/long_baseline_pipeline_test.py index df873f32d297044ab5cb87e673b0cf48bee761d7..2a0de9e172a341ee8d58bd92b04bf3065ed79bcc 100644 --- a/CEP/Pipeline/test/regression_tests/long_baseline_pipeline_test.py +++ b/CEP/Pipeline/test/regression_tests/long_baseline_pipeline_test.py @@ -2,13 +2,19 @@ import pyrap.tables as pt import numpy import sys -def load_and_compare_data_sets(ms1, ms2): +def load_and_compare_data_sets(ms1, ms2, delta): # open the two datasets ms1 = pt.table(ms1) ms2 = pt.table(ms2) #get the amount of rows in the dataset n_row = len(ms1.getcol('CORRECTED_DATA')) + n_row_m2 = len(ms2.getcol('CORRECTED_DATA')) + + if (n_row != n_row_m2): + print "Length of the data columns is different, comparison failes" + return False + n_complex_vis = 4 # create a target array with the same length as the datacolumn @@ -21,13 +27,13 @@ def load_and_compare_data_sets(ms1, ms2): for idy in xrange(n_complex_vis): div_value = ms1_array[idx][0][idy] - ms2_array[idx][0][idy] - if numpy.abs(div_value) > numpy.abs(div_max): - div_max = div_value + if numpy.abs(div_value) > div_max: + div_max = numpy.abs(div_value) div_array[idx][0][idy] = div_value print "maximum different value between measurement sets: {0}".format(div_max) # Use a delta of about float precision - if div_max > 1e-6: + if div_max > delta: print "The measurement sets are contained a different value" print "failed delta test!" return False @@ -36,12 +42,13 @@ def load_and_compare_data_sets(ms1, ms2): if __name__ == "__main__": - ms_1, mw_2 = None, None + ms_1, mw_2, delta = None, None, None # Parse parameters from command line error = False print sys.argv try: - ms_1, mw_2 = sys.argv[1:3] + ms_1, mw_2, delta = sys.argv[1:4] + except Exception, e: print e print "usage: python {0} ms1 "\ @@ -51,7 +58,7 @@ if __name__ == "__main__": if not error: print "regression test:" - data_equality = load_and_compare_data_sets(ms_1, mw_2) + data_equality = load_and_compare_data_sets(ms_1, mw_2, delta) if not data_equality: print "Regression test failed: exiting with exitstatus 1" diff --git a/CEP/Pipeline/test/regression_tests/selfcal_imager_pipeline_test.py b/CEP/Pipeline/test/regression_tests/selfcal_imager_pipeline_test.py new file mode 100644 index 0000000000000000000000000000000000000000..30821350808654563ecf678f0331c6083a838000 --- /dev/null +++ b/CEP/Pipeline/test/regression_tests/selfcal_imager_pipeline_test.py @@ -0,0 +1,374 @@ +import math +import sys + +def validate_image_equality(image_1_path, image_2_path, max_delta): + import pyrap.images as pim + + # get the difference between the two images + print "comparing images from paths:" + print image_1_path + print image_2_path + im = pim.image('"{0}" - "{1}"'.format(image_1_path, image_2_path)) + im.saveas("difference.IM2") + # get the stats of the image + stats_dict = im.statistics() + return_value = compare_image_statistics(stats_dict, max_delta) + + if not return_value: + print "\n\n\n" + print "*"*30 + print "Statistics of the produced image:" + im = pim.image("{0}".format(image_1_path)) + stats_dict_single_image = im.statistics() + print stats_dict_single_image + print "\n\n\n" + print "Statistics of the compare image:" + im = pim.image("{0}".format(image_2_path)) + stats_dict_single_image = im.statistics() + print stats_dict_single_image + print "\n\n\n" + print "difference between produced image and the baseline image:" + print "maximum delta: {0}".format(max_delta) + print stats_dict + print "*"*30 + + return return_value + + +def _test_against_maxdelta(value, max_delta, name): + if math.fabs(value) > max_delta: + print "Dif found: '{0}' difference >{2}<is larger then " \ + "the maximum accepted delta: {1}".format(name, max_delta, value) + return True + return False + +def compare_image_statistics(stats_dict, max_delta=0.0001): + + return_value = False + found_incorrect_datapoint = False + for name, value in stats_dict.items(): + + if name == "rms": + found_incorrect_datapoint = _test_against_maxdelta( + float(value[0]), max_delta * 300, name) + elif name == "medabsdevmed": + found_incorrect_datapoint = _test_against_maxdelta( + float(value[0]), max_delta * 200, name) + elif name == "minpos": + pass + # this min location might move 100 points while still being the same image + elif name == "min": + found_incorrect_datapoint = _test_against_maxdelta( + float(value[0]), max_delta * 2000, name) + elif name == "maxpos": + pass + # this max location might move 100 points while still being the same image + elif name == "max": + found_incorrect_datapoint = _test_against_maxdelta( + float(value[0]), max_delta * 1500, name) + elif name == "sum": + found_incorrect_datapoint = _test_against_maxdelta( + float(value[0]), max_delta * 200000, name) + elif name == "quartile": + found_incorrect_datapoint = _test_against_maxdelta( + float(value[0]), max_delta * 4000, name) + elif name == "sumsq": + # tested with sum already + pass + + elif name == "median": + found_incorrect_datapoint = _test_against_maxdelta( + float(value[0]), max_delta, name) + elif name == "npts": + pass # cannot be tested.. + elif name == "sigma": + found_incorrect_datapoint = _test_against_maxdelta( + float(value[0]), max_delta * 300, name) + elif name == "mean": + found_incorrect_datapoint = _test_against_maxdelta( + float(value[0]), max_delta * 3, name) + + # if we found an incorrect datapoint in this run or with previous + # results: results in true value if any comparison failed + return_value = return_value or found_incorrect_datapoint + + return not return_value + + + +# from here sourcelist compare functions +def validate_source_list_files(source_list_1_path, source_list_2_path, max_delta): + # read the sourcelist files + fp = open(source_list_1_path) + sourcelist1 = fp.read() + fp.close() + + fp = open(source_list_2_path) + sourcelist2 = fp.read() + fp.close() + + # convert to dataarrays + sourcelist_data_1 = convert_sourcelist_as_string_to_data_array(sourcelist1) + sourcelist_data_2 = convert_sourcelist_as_string_to_data_array(sourcelist2) + + return compare_sourcelist_data_arrays(sourcelist_data_1, sourcelist_data_2, max_delta) + + +def convert_sourcelist_as_string_to_data_array(source_list_as_string): + #split in lines + source_list_lines = source_list_as_string.split("\n") + entries_array = [] + + #get the format line + format_line_entrie = source_list_lines[0] + + # get the format entries + entries_array.append([format_line_entrie.split(",")[0].split("=")[1].strip()]) + for entry in format_line_entrie.split(',')[1:]: + entries_array.append([entry.strip()]) + + # scan all the lines for the actual data + + for line in sorted(source_list_lines[2:]): # try sorting based on name (should work :P) + # if empty + if line == "": + continue + # add the data entries + for idx, entrie in enumerate(line.split(",")): + entries_array[idx].append(entrie.strip()) + + return entries_array + +def easyprint_data_arrays(data_array1, data_array2): + print "All data as red from the sourcelists:" + for (first_array, second_array) in zip(data_array1, data_array2): + print first_array + print second_array + +def compare_sourcelist_data_arrays(data_array1, data_array2, max_delta=0.0001): + """ + Ugly function to compare two sourcelists. + It needs major refactoring, but for a proof of concept it works + """ + print "######################################################" + found_incorrect_datapoint = False + for (first_array, second_array) in zip(data_array1, data_array2): + + # first check if the format string is the same, we have a major fail if this happens + if first_array[0] != second_array[0]: + print "******************* problem:" + print "format strings not equal: {0} != {1}".format(first_array[0], second_array[0]) + found_incorrect_datapoint = True + + # Hard check on equality of the name of the found sources + if first_array[0] == "Name": + for (entrie1, entrie2) in zip(first_array[1:], second_array[1:]): + if entrie1 != entrie2: + print "The sourcelist entrie names are not the same: \n{0} !=\n {1}".format(entrie1, entrie2) + found_incorrect_datapoint = True + + # Hard check on equality of the type of the found sources + elif first_array[0] == "Type": + for (entrie1, entrie2) in zip(first_array[1:], second_array[1:]): + if entrie1 != entrie2: + print "The sourcelist entrie types are not the same: {0} != {1}".format(entrie1, entrie2) + found_incorrect_datapoint = True + + # soft check on the Ra: convert to float and compare the values + elif first_array[0] == "Ra": + for (entrie1, entrie2) in zip(first_array[1:], second_array[1:]): + entrie1_as_array = entrie1.split(":") + entrie1_as_float = float(entrie1_as_array[0]) * 3600 + float(entrie1_as_array[1]) * 60 + float(entrie1_as_array[2])# float("".join(entrie1.split(":"))) + entrie2_as_array = entrie2.split(":") + entrie2_as_float = float(entrie2_as_array[0]) * 3600 + float(entrie2_as_array[1]) * 60 + float(entrie2_as_array[2]) + if not math.fabs(entrie1_as_float - entrie2_as_float) < (max_delta * 10000) : + print "we have a problem Ra's are not the same within max_delta: {0} != {1} max_delta_ra = {2}".format( + entrie1, entrie2, max_delta * 10000) + found_incorrect_datapoint = True + elif first_array[0] == "Dec": + for (entrie1, entrie2) in zip(first_array[1:], second_array[1:]): + entrie1_as_array = entrie1.strip("+").split(".") + entrie1_as_float = float(entrie1_as_array[0]) * 3600 + float(entrie1_as_array[1]) * 60 + \ + float("{0}.{1}".format(entrie1_as_array[2], entrie1_as_array[3])) + entrie2_as_array = entrie2.strip("+").split(".") + entrie2_as_float = float(entrie2_as_array[0]) * 3600 + float(entrie2_as_array[1]) * 60 + \ + float("{0}.{1}".format(entrie2_as_array[2], entrie2_as_array[3])) + if not math.fabs(entrie1_as_float - entrie2_as_float) < (max_delta * 10000) : + print "Dec's are not the same within max_delta: {0} != {1} max_delta_ra = {2}".format( + entrie1, entrie2, max_delta * 10000) + found_incorrect_datapoint = True + + elif first_array[0] == "I": + for (entrie1, entrie2) in zip(first_array[1:], second_array[1:]): + entrie1_as_float = float(entrie1) + entrie2_as_float = float(entrie2) + if not math.fabs(entrie1_as_float - entrie2_as_float) < (max_delta * 2000): + print "I's are not the same within max_delta {0} != {1} max_delta_I = {2} ".format( + entrie1_as_float, entrie2_as_float, max_delta * 1000) + found_incorrect_datapoint = True + + + elif first_array[0] == "Q": + for (entrie1, entrie2) in zip(first_array[1:], second_array[1:]): + entrie1_as_float = float(entrie1) + entrie2_as_float = float(entrie2) + if not math.fabs(entrie1_as_float - entrie2_as_float) < (max_delta * 1000): + print "Q's are not the same within max_delta {0} != {1} max_delta_I = {2} ".format( + entrie1_as_float, entrie2_as_float, max_delta * 1000) + found_incorrect_datapoint = True + elif first_array[0] == "U": + for (entrie1, entrie2) in zip(first_array[1:], second_array[1:]): + entrie1_as_float = float(entrie1) + entrie2_as_float = float(entrie2) + if not math.fabs(entrie1_as_float - entrie2_as_float) < (max_delta * 1000): + print "Q's are not the same within max_delta {0} != {1} max_delta_I = {2} ".format( + entrie1_as_float, entrie2_as_float, max_delta * 1000) + found_incorrect_datapoint = True + + elif first_array[0] == "V": + for (entrie1, entrie2) in zip(first_array[1:], second_array[1:]): + entrie1_as_float = float(entrie1) + entrie2_as_float = float(entrie2) + if not math.fabs(entrie1_as_float - entrie2_as_float) < (max_delta * 1000): + print "V's are not the same within max_delta {0} != {1} max_delta_I = {2} ".format( + entrie1_as_float, entrie2_as_float, max_delta * 1000) + found_incorrect_datapoint = True + + elif first_array[0] == "MajorAxis": + for (entrie1, entrie2) in zip(first_array[1:], second_array[1:]): + entrie1_as_float = float(entrie1) + entrie2_as_float = float(entrie2) + if not math.fabs(entrie1_as_float - entrie2_as_float) < (max_delta * 60000): + print "MajorAxis's are not the same within max_delta {0} != {1} max_delta_I = {2} ".format( + entrie1_as_float, entrie2_as_float, max_delta * 50000) + found_incorrect_datapoint = True + + elif first_array[0] == "MinorAxis": + for (entrie1, entrie2) in zip(first_array[1:], second_array[1:]): + entrie1_as_float = float(entrie1) + entrie2_as_float = float(entrie2) + if not math.fabs(entrie1_as_float - entrie2_as_float) < (max_delta * 30000): + print "MinorAxis's are not the same within max_delta {0} != {1} max_delta_I = {2} ".format( + entrie1_as_float, entrie2_as_float, max_delta * 30000) + found_incorrect_datapoint = True + + elif first_array[0] == "Orientation": + for (entrie1, entrie2) in zip(first_array[1:], second_array[1:]): + entrie1_as_float = float(entrie1) + entrie2_as_float = float(entrie2) + if not math.fabs(entrie1_as_float - entrie2_as_float) < (max_delta * 70000): + print "Orientation's are not the same within max_delta {0} != {1} max_delta_I = {2} ".format( + entrie1_as_float, entrie2_as_float, max_delta * 10000) + found_incorrect_datapoint = True + + elif first_array[0].split("=")[0].strip() == "ReferenceFrequency": + for (entrie1, entrie2) in zip(first_array[1:], second_array[1:]): + entrie1_as_float = float(entrie1) + entrie2_as_float = float(entrie2) + if not math.fabs(entrie1_as_float - entrie2_as_float) < (max_delta * 10000000): + print "Orientation's are not the same within max_delta {0} != {1} max_delta_I = {2} ".format( + entrie1_as_float, entrie2_as_float, max_delta * 10000000) + found_incorrect_datapoint = True + elif first_array[0].split("=")[0].strip() == "SpectralIndex": + # Not known yet what will be in the spectral index: therefore do not test it + pass + else: + print "unknown format line entrie found: delta fails" + print first_array[0] + found_incorrect_datapoint = True + + if found_incorrect_datapoint: + print "######################################################" + print "compared the following data arrays:" + easyprint_data_arrays(data_array1, data_array2) + print "######################################################" + + + # return inverse of found_incorrect_datapoint to signal delta test success + return not found_incorrect_datapoint + + +# Test data: +source_list_as_string = """ +format = Name, Type, Ra, Dec, I, Q, U, V, MajorAxis, MinorAxis, Orientation, ReferenceFrequency='6.82495e+07', SpectralIndex='[]' + +/data/scratch/klijn/out/awimage_cycle_0/image.restored_w0_i3_s3_g3, GAUSSIAN, 14:58:34.711, +71.42.19.636, 3.145e+01, 0.0, 0.0, 0.0, 1.79857e+02, 1.49783e+02, 1.24446e+02, 6.82495e+07, [0.000e+00] +/data/scratch/klijn/out/awimage_cycle_0/image.restored_w0_i2_s2_g2, GAUSSIAN, 15:09:52.818, +70.48.01.625, 2.321e+01, 0.0, 0.0, 0.0, 2.23966e+02, 1.09786e+02, 1.32842e+02, 6.82495e+07, [0.000e+00] +/data/scratch/klijn/out/awimage_cycle_0/image.restored_w0_i4_s4_g4, GAUSSIAN, 14:53:10.634, +69.29.31.920, 1.566e+01, 0.0, 0.0, 0.0, 1.25136e+02, 4.72783e+01, 6.49083e+01, 6.82495e+07, [0.000e+00] +/data/scratch/klijn/out/awimage_cycle_0/image.restored_w0_i0_s0_g0, POINT, 15:20:15.370, +72.27.35.077, 1.151e+01, 0.0, 0.0, 0.0, 0.00000e+00, 0.00000e+00, 0.00000e+00, 6.82495e+07, [0.000e+00] +/data/scratch/klijn/out/awimage_cycle_0/image.restored_w0_i1_s1_g1, POINT, 15:15:15.623, +66.54.31.670, 4.138e+00, 0.0, 0.0, 0.0, 0.00000e+00, 0.00000e+00, 0.00000e+00, 6.82495e+07, [0.000e+00] + +""" + +source_list_as_string2 = """ +format = Name, Type, Ra, Dec, I, Q, U, V, MajorAxis, MinorAxis, Orientation, ReferenceFrequency='6.82495e+07', SpectralIndex='[]' + +/data/scratch/klijn/out/awimage_cycle_0/image.restored_w0_i3_s3_g3, GAUSSIAN, 14:58:34.711, +71.42.19.636, 3.146e+01, 0.0, 0.0, 0.0, 1.79857e+02, 1.49783e+02, 1.24446e+02, 6.82496e+07, [0.000e+00] +/data/scratch/klijn/out/awimage_cycle_0/image.restored_w0_i2_s2_g2, GAUSSIAN, 15:09:52.818, +70.48.01.625, 2.321e+01, 0.0, 0.0, 0.0, 2.23966e+02, 1.09786e+02, 1.32842e+02, 6.82495e+07, [0.000e+00] +/data/scratch/klijn/out/awimage_cycle_0/image.restored_w0_i4_s4_g4, GAUSSIAN, 14:53:10.634, +69.29.31.920, 1.566e+01, 0.0, 0.0, 0.0, 1.25136e+02, 4.72783e+01, 6.49083e+01, 6.82495e+07, [0.000e+00] +/data/scratch/klijn/out/awimage_cycle_0/image.restored_w0_i0_s0_g0, POINT, 15:20:15.370, +72.27.35.077, 1.151e+01, 0.0, 0.0, 0.0, 0.00000e+00, 0.00000e+00, 0.00000e+00, 6.82495e+07, [0.000e+00] +/data/scratch/klijn/out/awimage_cycle_0/image.restored_w0_i1_s1_g1, POINT, 15:15:15.623, +66.54.31.670, 4.138e+00, 0.0, 0.0, 0.0, 0.00000e+00, 0.00000e+00, 0.00000e+00, 6.82495e+07, [0.000e+00] + +""" +#entries_array = convert_sourcelist_as_string_to_data_array(source_list_as_string) +#entries_array2 = convert_sourcelist_as_string_to_data_array(source_list_as_string2) + +#print compare_sourcelist_data_arrays(entries_array, entries_array2, 0.0001) + +image_data = {'rms': [ 0.], 'medabsdevmed':[ 0.], 'minpos': [0, 0, 0, 0] + , 'min':[ 0.], 'max': [ 0.], + 'quartile': [ 0.], 'sumsq': [ 0.], 'median': [ 0.], 'npts':[ 65536.], + 'maxpos': [0, 0, 0, 0], 'sigma': [ 0.], 'mean': [ 0.]} + + + #{'rms': array([ 0.52093363]), 'medabsdevmed': array([ 0.27387491]), 'minpos': array([156, 221, 0, 0], + #dtype=int32), 'min': array([-2.26162958]), 'max': array([ 24.01361465]), 'sum': array([ 1355.46549538]), + #'quartile': array([ 0.54873329]), 'sumsq': array([ 17784.62525496]), 'median': array([ 0.00240479]), + # 'npts': array([ 65536.]), 'maxpos': array([148, 199, 0, 0], dtype=int32), + # 'sigma': array([ 0.52052685]), 'mean': array([ 0.02068276])} + +image_data = {'rms': [ 0.52093363], 'medabsdevmed': [ 0.27387491], 'minpos': [[156, 221, 0, 0], "int32"], + 'min': [-2.26162958], 'max': [ 24.01361465], 'sum': [ 1355.46549538], + 'quartile' : [ 0.54873329], 'sumsq': [ 17784.62525496], 'median': [ 0.00240479], + 'npts': [ 65536.], 'maxpos':[ [148, 199, 0, 0], "int32"], + 'sigma': [ 0.52052685], 'mean': [ 0.02068276]} + +# print compare_image_statistics(image_data) + + + +if __name__ == "__main__": + source_list_1, image_1, source_list_2, image_2, max_delta = None, None, None, None, None + # Parse parameters from command line + error = False + print sys.argv[1:5] + try: + image_1, source_list_1, fist_1, image_2, source_list_2, fits_2 = sys.argv[1:7] + except: + print "Sourcelist comparison has been disabled! Arguments must still be provided" + print "usage: python {0} source_list_1_path "\ + " image_1_path source_list_2_path image_2_path (max_delta type=float)".format(sys.argv[0]) + sys.exit(1) + + max_delta = None + try: + max_delta = float(sys.argv[5]) + except: + max_delta = 0.0001 + + print "using max delta: {0}".format(max_delta) + + if not error: + image_equality = validate_image_equality(image_1, image_2, max_delta) + # sourcelist comparison is still unstable default to true + sourcelist_equality = True #validate_source_list_files(source_list_1, source_list_2, max_delta) + if not (image_equality and sourcelist_equality): + print "Regression test failed: exiting with exitstatus 1" + print " image_equality: {0}".format(image_equality) + print " sourcelist_equality: {0}".format(sourcelist_equality) + sys.exit(1) + + print "Regression test Succeed!!" + sys.exit(0) + + diff --git a/CEP/Pipeline/test/support/data_map_test.py b/CEP/Pipeline/test/support/data_map_test.py index 5321e766e946f50cf085f57133a76e5b8c808cc0..eff40fded4794cf142ada7ae2f5a389a3304f4b0 100644 --- a/CEP/Pipeline/test/support/data_map_test.py +++ b/CEP/Pipeline/test/support/data_map_test.py @@ -127,6 +127,38 @@ class DataMapTest(unittest.TestCase): for data_map in error_maps: self.assertRaises(DataMapError, DataMap, data_map) + def test_append_item_non_skip(self): + data_map = DataMap(self.new_style_map) + data_map.append(("host","file", False)) + + data_map.iterator = DataMap.TupleIterator + tuples = [item for item in data_map] + self.assertEqual(len(tuples), 5) + self.assertTrue(all(isinstance(item, tuple) for item in tuples)) + self.assertTrue(all(len(item) == 2 for item in tuples)) + self.assertEqual(tuples[-1], ('host', 'file')) + + def test_append_item_skip(self): + data_map = DataMap(self.new_style_map) + data_map.append(("host","file", True)) + + data_map.iterator = DataMap.SkipIterator + dataProducts = [item for item in data_map] + # default contains 2 nonskipped items + self.assertEqual(len(dataProducts), 2) + self.assertTrue(all(isinstance(item, DataProduct) + for item in dataProducts)) + # The map already contains 2 skipped items, the final item is tested + # here + self.assertEqual(dataProducts[-1].host, 'locus004') + self.assertEqual(dataProducts[-1].file, 'L12345_SB104.MS') + + def test_append_item_invalid(self): + data_map = DataMap(self.new_style_map) + + self.assertRaises(DataMapError, data_map.append, + ("host","file", True, "bwaaa")) + class MultiDataMapTest(unittest.TestCase): """ @@ -256,6 +288,41 @@ class MultiDataMapTest(unittest.TestCase): self.assertNotEqual(data_map, multi_data_map) + def test_append_item_non_skip(self): + data_map = MultiDataMap(self.new_style_map) + data_map.append(("host", ["file"], False, [False] )) + data_map.append(("host", ["file"], False)) + + data_map.iterator = DataMap.TupleIterator + tuples = [item for item in data_map] + self.assertEqual(len(tuples), 6) + self.assertTrue(all(isinstance(item, tuple) for item in tuples)) + self.assertTrue(all(len(item) == 2 for item in tuples)) + self.assertEqual(tuples[-1], ('host', ['file'])) + + def test_append_item_skip(self): + data_map = MultiDataMap(self.new_style_map) + data_map.append(("host",["file"], True, [True])) + data_map.append(("host",["file"], True)) + + data_map.iterator = DataMap.SkipIterator + dataProducts = [item for item in data_map] + # default contains 2 nonskipped items + self.assertEqual(len(dataProducts), 2) + self.assertTrue(all(isinstance(item, MultiDataProduct) + for item in dataProducts)) + # The map already contains 2 skipped items, the final item is tested + # here + self.assertEqual(dataProducts[-1].host, 'locus004') + self.assertEqual(dataProducts[-1].file, ['L12345_SB104.MS']) + + def test_append_item_invalid(self): + data_map = MultiDataMap(self.new_style_map) + + self.assertRaises(DataMapError, data_map.append, + ("host",True, "file", [False], "bwaaa")) + + class HelperFunctionDataMapTest(unittest.TestCase): """ Test class for the helper functions in lofarpipe.support.data_map diff --git a/CEP/Pipeline/test/support/subprocessgroup_test.py b/CEP/Pipeline/test/support/subprocessgroup_test.py new file mode 100644 index 0000000000000000000000000000000000000000..290be2967ce8fc88696b6307ebc13402a33e589e --- /dev/null +++ b/CEP/Pipeline/test/support/subprocessgroup_test.py @@ -0,0 +1,63 @@ +import os +import shutil +import tempfile +import unittest +import time + +from lofarpipe.support.subprocessgroup import SubProcessGroup + +class SubProcessGroupTest(unittest.TestCase): + """ + Test class for the SubProcessGroupTest class in lofarpipe.support.SubProcessGroupTest + """ + def __init__(self, arg): + super(SubProcessGroupTest, self).__init__(arg) + + + def setUp(self): + """ + Create scratch directory and create required input files in there. + """ + + def tearDown(self): + """ + Cleanup all the files that were produced by this test + """ + + + def test_limit_number_of_proc(self): + process_group = SubProcessGroup(polling_interval=1) + + # wait for 2 seconds + cmd = "sleep 2" + start_time = time.time() + # Quickly start a large number of commands, assur + for idx in range(10): + process_group.run(cmd) + + # if there is no serialization the test would take about 5 seconds + # with serialization i will take at a minimum 10 second, use 8 seconds + # to have some safety from rounding errors + + process_group.wait_for_finish() + end_time = time.time() + self.assertTrue((end_time - start_time) > 3) + + + def test_start_without_jobs(self): + process_group = SubProcessGroup(polling_interval=1) + + # wait for 5 seconds + start_time = time.time() + + process_group.wait_for_finish() + end_time = time.time() + + # The wait should complete without a polling interfal + self.assertTrue((end_time - start_time) < 1) + +if __name__ == '__main__': + import xmlrunner + #unittest.main(testRunner=xmlrunner.XMLTestRunner(output='result.xml')) + unittest.main() + diff --git a/CEP/Pipeline/visual_studio/Pipeline.v12.suo b/CEP/Pipeline/visual_studio/Pipeline.v12.suo index 1d2c795786e59d4613d66e8a6b0d804edd2f3d22..937237ab3ef2493da697e7a4a5da5485643c05d3 100644 Binary files a/CEP/Pipeline/visual_studio/Pipeline.v12.suo and b/CEP/Pipeline/visual_studio/Pipeline.v12.suo differ