diff --git a/doc/pipeline.yaml b/doc/pipeline.yaml new file mode 100644 index 0000000000000000000000000000000000000000..962f91847363cd13c170c852348b77665c050b22 --- /dev/null +++ b/doc/pipeline.yaml @@ -0,0 +1,615 @@ +--- + +mk_results_dir: + Input : name of results directory + Default : results + Operation: "[legacy] creates directory" + Software : prefactor/plugin/Pipelinestep_makeDirectory.py + Output : results directory created in job directory + +mk_inspect_dir: + Input : name of inspection directory + Default : inspection + Operation: "[legacy] creates directory" + Software : prefactor/plugin/Pipelinestep_makeDirectory.py + Output : inspection directory created under results + +mk_cal_values_dir: + Input : name of directory to put calibration results + Default : cal_values + Operation: "[legacy] creates directory" + Software : prefactor/plugin/Pipelinestep_makeDirectory.py + Output : cal_values directory created under results + +createmap_target: + Input : + - "{{ target_input_path }}" # directory where the LTA data is + - "{{ target_input_pattern }}" # regex to select measurement sets in {{ target_input_path }} + - "{{ ddf_soldir }}" # option, but if set, location of ddf-pipeline solutions + - "{{ prefac_tgt_dir }}" # the Pre-Facet-Target directory + - "{{ cal_solutions }}" # the absolute path to the calibration solutions from Pre-Facet-Target + Operation: > + "[legacy] This script locates the pre-facet-target solutions, + the measurement sets themselves, and optionally the + ddf solutions. + It checks the solutions for the frequency range + covered, and creates a list of measurement sets which + fall into that frequency range (if all data is + downloaded but only some subbands processed, this + skips any subbands that weren’t part of the + pre-facet-target [or optionally, ddf-pipeline] + calibration run)." + Software : plugins/Pipelinestep_makeTargetmap.py + Output : > + a mapfile, which is a list of filenames and statuses + understood by the generic pipeline. + Basically, a list of measurement sets to process. + +createmap_target_list: + Input : the mapfile output of createmap_target + Operation: > + convert the output of createmap_target, + which is a list with length N subbands, + to a length 1 “list” of all subbands + Software : prefactor/plugins/createMapfile.py + Output : > + "[legacy] A list with length one that contains the + information of all subbands (this is because the + genericpipeline will run a step N times, once + for each entry in the mapfile it’s given. + So by creating a list with length 1 we can pass + the information on all of the subbands but only + run the step once)." + +cp_cal_solutions: + Input : + - "{{ cal_solutions }}" # same as above + - "{{ solutions }}" # destination name of solutions to copy to + Operation: > + [legacy] copies the calibration solutions to + the working directory so it has a + new copy to work on. + Software : cp + Output : > + a new copy of the calibration solutions + which can be modified in future steps. + +check_station_mismatch: + Input : + - "{{ solutions }}" # same as above + - "{{ filter_baselines }}" # a user defined filter for which baselines to use + Operation: > + checks the lists of stations in the solutions and + the measurement sets (output of createmap_target) + to see if there is a mismatch; some stations may + have been flagged in pre-facet-target, for example. + These stations need to be added back in to the + h5parm so you don’t get an NDPPP error when applying. + Also checks for presence of international stations + in the solutions and adds them if necessary. + Software : plugins/Pipelinestep_compareStationListVLBI + Output : A filter command that is passed later on to NDPPP in the ndppp_prep_target step. + +download_cats: + Input : > + for a complete list of variables, see the Software script, + which can be run with –h to show you all the options. + Parameter names match those in the genericpipeline step. + Also needs the output mapfile from createmap_target_list, + and some user-defined variables from the setup section of the parset. + Operation: > + Queries both the LBCS and LoTSS databases for + information on the sources within the input + radii from the centre of the field (which is + taken from the first element of + createmap_target_list). + Cross-matches these two catalogues, does some + filtering, and generates several output + catalogues (see below). + Requires external internet access to query the databases. + Software : plugins/Pipelinestep_DownloadCats.py + Output : + - "{{ lotss_skymodel }}" # created if it doesn’t already exist; list of LoTSS sources within lotss_radius of field centre + - "{{ lbcs_skymodel }}" # created if it doesn’t already exist; list of LBCS sources within lbcs_radius of field centre + - "{{ image_cat }}" # list of directions in which to image + - "{{ delay_cat }}" # list of all potential delay calibrators + - "{{ subtract_cat }}" # list of sources that may need to be subtracted + +ndppp_prep_target: + Input : + - createmap_target mapfile + - check_station_mismatch output + - "{{ solutions }}" + # plus user defined: + - "{{ flag_baselines }}" + - "{{ phasesol }}" + Operation: > + flags/filters requested antennas, flags + erroneously low amplitudes, applies all + pre-facet-calibrator and pre-facet-target solutions + Software : NDPPP + Output : > + New measurement sets in the working directory, + containing the corrected data in the DATA column. + Also generates a mapfile which points to + these measurement sets. + +ndppp_prep_target_list: + Input : mapfile/output from ndppp_prep_target + Operation: convert the input to a length 1 list + Software : prefactor/plugins/Pipelinestep_createMapfile.py + Output : > + A list with length one that contains the + information of all subbands + +create_ateam_model_map: + Input : "{{ prefactor_dir }}/skymodels/Ateam_LBA_CC.skymodel" # I.e. the file which contains the models of the a-team sources + Operation: get a mapfile of the skymodel used to predict the A-team visibilities (i.e. a pipeline-friendly path to this file) + Software : prefactor/plugins/Pipelinestep_addListMapfile.py + Output : a mapfile pointing to the necessary file + +make_sourcedb_ateam: + Input : the mapfile from create_ateam_model_map + Operation: > + convert a skymodel (text file) to a + sourcedb (source database; NDPPP-readable model) + Software : makesourcedb + Output : a sourcedb (and its mapfile) + +expand_sourcedb_ateam: + Input : the mapfiles from ndppp_prep_target (length = number of subbands) and make_sourcedb_ateam (length = 1) + Operation: > + expand the make_sourcedb_ateam mapfile so + its length matches that of ndppp_prep_target. + That is, copy the single element to a vector + of length = number of subbands, so each + subband operation in subsequent steps is + mapped to the same sourcedb. + Software : prefactor/plugins/Pipelinestep_expandMapfile.py + Output : a mapfile with the value of make_sourcedb_ateam but length = length of ndppp_prep_target mapfile + +predict_ateam: + Input : ndppp_prep_target mapfile and expand_sourcedb_ateam mapfile + Operation: > + use the sourcedb to predict (make a MODEL_DATA + column in the ndppp_prep_target measurement sets) + with the A-team source information + Software : NDPPP + Output : > + MODEL_DATA column with observation-specific + predicted A-team visibilities + +ateamcliptar: + Input : ndppp_prep_target mapfile/measurement sets + Operation: > + compare MODEL_DATA with threshold and clip + visibilities from DATA for the times or + frequencies where the MODEL_DATA exceeds + the threshold + Software : prefactor/scripts/Ateamclipper.py + Output : clipped data in the DATA column of ndppp_prep_target + +sort_concatmap: + Input : ndppp_prep_target_list (length=1 but has all measurement set info) + Operation: > + using all of the frequency information + from the measurement sets, sort everything + into bands of 10 based on a regular frequency grid + Software : prefactor/scripts/sort_times_into_freqGroups.py + Output : > + a collection of mapfiles, one of which + gives a list of lists, where each + element-list has the names of the + measurement sets of a single band. + +do_sortmap_maps: + Input : results of sort_concatmap + Operation: makes sensible files for future use out of the input + Software : prefactor/plugins/Pipelinestep_mapfilenamesFromMapfiles + Output : mapfiles for use in dpppconcat + +dpppconcat: + Input : > + two mapfiles from do_sortmap_maps: + one which gives a list of N elements, + where N is the number of output bands, + and each element contains all of the + input measurement set names; and on + mapfile which is a list of N elements, + where each element is the output band + measurement set name + Operation: concatenate subbands into bands + Software : NDPPP + Output : bands of subbands in a regular frequency grid + +dpppconcat_list: + Input : mapfile output of dpppconcat + Operation: > + convert mapfile of N bands to one + element list with all band names + Software : prefactor/plugins/Pipelinestep_createMapfile + Output : a mapfile with one element that is a list of all the bands + +aoflag: + Input : + - dpppconcat_list mapfile + - "{{ prefactor_dir }}/rfistrategies/{{ rfistrategy }}" # RFI flagging strategy file + Operation: perform RFI flagging + Software : NDPPP + Output : flagged measurement sets + +createmap_ddf: + Input : "{{ ddf_soldir }}" # directory where ddf-pipeline outputs are + Operation: > + create a mapfile with the ddf-pipeline + calibrated measurement sets + Software : prefactor/plugins/Pipelinestep_createMapfile.py + Output : mapfile with measurement set names of ddf-pipeline output + +ddf_solutions: + Input : createmap_ddf mapfile + Operation: add suffix to input mapfile to map to the right solutions + Software : prefactor/plugins/Pipelinestep_createMapfile.py + Output : a list of ddf-pipeline solutions to be applied + +ddf_h5parms: + Input : ddf_solutions mapfile + Operation: > + create output filenames for h5parms - + the ddf-pipeline solutions need to be + converted from .npz to .h5 + Software : prefactor/plugins/Pipelinestep_makeResultsMapfile.py + Output : mapfile with h5parm names (which do not exist yet) + +convert_to_h5: + Input : + - the solutions mapfile # existing file + - the ddf_h5parms mapfile # the h5parm files to be written + Operation: convert the .npz files to .h5 + Software : losoto/bin/killMS2H5parm.py + Output : h5parms, one for each band + +expand_concat_map: + Input : + - dpppconcat mapfile # length N, for N bands + - ddf_h5parms mapfile # h5parms written from the ddf-pipeline results + Operation: > + checks that the lengths of these + are the same? I have no idea what + this step actually does or why it’s needed. + Software : prefactor/plugins/Pipelinestep_expandMapfile.py + Output : a new mapfile with the measurement sets from dpppconcat + +addIS: + Input : + - the h5parms (ddf_h5parms) + - and measurement sets (expand_concat_map) mapfiles + Operation: > + writes a new solution set in the h5parms + contained in the measurement sets, but + including the international stations + (initialized to phase=0 and amp=1) + Software : bin/addIS_to_h5.py + Output : a new solution set in each h5parm + +h5imp_ddf_map: + Input : ddf_h5parms mapfile + Operation: make a list with one entry that has all h5parm names + Software : prefactor/plugins/Pipelinestep_compressMapfile.py + Output : > + a compressed list of one entry that has all + h5parm names, in LoSoTo input format. + +h5imp_ddf: + Input : the ddf_h5parms via h5imp_ddf_map mapfile + Operation: collect all the h5parms information into a single h5parm + Software : losoto/bin/H5parm_collector.py + Output : a single h5parm valid for all subbands/bands + +ndppp_applycal: + Input : + - dpppconcat bands + - h5imp_ddf h5parm + Operation: "apply solutions “in place” (don’t write a new file)" + Software : NDPPP + Output : a new DATA column in the dpppconcat bands + +prep_delay_dir: + Input : + - dpppconcat bands + - "{{ best_delay_cat }} catalogue with information on in-field calibrator" + Operation: > + get information from the catalogue + on the source name, RA, and DEC, + and return that information formatted + for use by NDPPP + Software : plugins/Pipelinestep_TargetListToMapfile.py + Output : > + variables that are the source name, + RA, and DEC, formatted for NDPPP to + use in the next step + +dppp_phaseup: + Input : + - dpppconcat measurement sets (bands) + - variables from prep_delay_dir + Operation: > + phaseshift, average data, add core + stations together, filter them out + (remove from dataset) + Software : NDPPP + Output : new measurement sets for the in-field calibrator + +dppp_phaseup_list: + Input : dppp_phaseup mapfile + Operation: > + get a list with length 1 containing info + on all bands for the in-field calibrator + Software : prefactor/plugins/createMapfile.py + Output : > + a mapfile with info on all bands + for in-field calibrator, in a single + element of a list + +sort_phaseupmap: + Input : dppp_phaseup_list (length=1 but has all measurement set info) + Operation: > + using all of the frequency information + from the measurement sets, sort everything + by frequency for combination into a single + all-bandwidth measurement set + Software : prefactor/scripts/sort_times_into_freqGroups.py + Output : > + a collection of mapfiles, one of which + gives a list of lists, where each element-list + has the names of the measurement sets of a single band + +do_phaseup_maps: + Input : results of sort_phaseupmap + Operation: makes sensible files for future use out of the input + Software : prefactor/plugins/Pipelinestep_mapfilenamesFromMapfiles + Output : mapfiles for use in phaseup_concat + +phaseup_concat: + Input : + - two mapfiles from do_phaseup_maps: one which gives a list of N elements, where N is the number of output bands, and each element contains all of the input measurement set names; + - and one mapfile which is a list of N elements, where each element is the output band measurement set name + Operation: > + concatenate bands into a single + all-bandwidth measurement set + Software : NDPPP + Output : a single measurement set for the in-field calibrator + +correct_beam: + Input : the phaseup_concat measurement set + Operation: correct for the beam array factor + Software : bin/correct_array_factor.py + Output : + - beam-corrected measurement set + - some plots in the inspection directory + +delay_cal_model: + Input : + - phaseup_concat measurement set, + - "catalogue {{ best_delay_cat }}" + Operation: > + create a skymodel for use in + the self-calibration. This is + created inside the measurement + set with a hard-coded name. + Software : bin/skynet.py + Output : > + both the skymodel ( MS/skymodel ) + and sourcedb ( MS/sky ) for the + in-field calibrator, assuming it + is a point source with the LoTSS flux density + +delay_cal_model_map: + Input : phaseup_concat mapfile + Operation: > + add suffix to file, thus pointing + to the sourcedb from delay_cal_model + Software : prefactor/plugins/Pipelinestep_createMapfile.py + Output : a mapfile, pointing to the sourcedb + +delay_cal_parmmap: + Input : phaseup_concat mapfile + Operation: > + add suffix to file, creating output + name for the delay calibration h5parm + Software : prefactor/plugins/Pipelinestep_createMapfile.py + Output : > + a mapfile which has the name + of the solutions to be created. + +delay_solve: + Input : + - phaseup_concat measurement set + - the sourcedb for the calibrator + - the output h5parm names for the delay calibration + Operation: solve for TEC + Software : NDPPP + Output : an h5parm with TEC solutions + +make_losoto: + Input : path to inspection directory + Operation: > + using the arguments in this step, + creates a text file in the working + directory that contains LoSoTo steps + to plot TEC + Software : prefactor/plugins/Pipelinestep_makeLosotoParset.py + Output : losoto.parset text file in the working directory + +process_losoto: + Input : + - the h5parm that results from delay_solve # i.e. the delay_cal_parmmap output mapfile + - (implicitly) losoto.parset + Operation: > + run the losoto parset created in + the previous step on the h5parm + from delay_solve + Software : "losoto - specifically, uses {{ losoto_directory }}/bin/losoto" + Output : plot file (*.png) in the inspection directory + +apply_delay: + Input : + - the h5parm created by delay_solve + - the phaseup_concat measurement set + Operation: apply the solutions + Software : NDPPP + Output : > + a new measurement set (which will have + suffix *.apply_delay instead of + *.phaseup_concat) where the corrected + data is in the DATA column + +# NOTE: the following steps will be changed – we need to convert the pipeline to use Reinout’s self-cal script instead + +#selfcal: +# - Input: - the apply_delay measurement set +# - the .csv file with the delay calibrator information {{ best_delay_cat }} +# - Operation: run self-calibration using difmap +# - Software: > +# bin/selfcal_difmap.py is a python wrapper +# which writes a difmap-readable script, +# and runs it using a modified installation +# of difmap, using the hacked CORPLT function +# that Neal created +# - Output: > +# a new directory where everything is run, +# which results in a lot of intermediate files. +# The important-to-keep file is the h5parm +# written out at the end, which is copied +# back to the main working directory, +# ending in *_sols.h5 +# +#createmap_selfcal: +# - Input: > +# the regex to find the solutions +# generated in the previous step +# - Operation: > +# create a mapfile for the solutions +# generated in the previous step +# - Software: prefactor/plugins/Pipelinestep_createMapfile.py +# - Output: > +# a mapfile path to the solutions +# generated in the previous step +# +#copyST_gains: +# - Input: - the solutions from selfcal +# - - the dpppconcat bands # these are the general bands which are created before the delay calibrator is split out. +# - Operation: > +# the selfcal is run on a measurement +# set with the combined core stations (ST001) +# but we want to apply the solutions to +# the dpppconcat measurement sets, which +# have the core stations individually, so +# this creates a new solset, creates the +# core stations which exist in dpppconcat +# files, and copies the ST001 solutions to +# those core stations, and directly copies +# the solutions for the rest of the antennas. +# - Software: bin/gains_toCS_h5parm.py +# - Output: sol001 in the selfcal h5parm +# +## same step, different inputs +#copyST_gains: +# - Input: - the delay_solve solutions +# - the dpppconcat bands # these are the general bands which are created before the delay calibrator is split out. +# - Operation: > +# the delay_solve is run on a measurement set +# with the combined core stations (ST001) but +# we want to apply the solutions to the +# dpppconcat measurement sets, which have the +# core stations individually, so this creates +# a new solset, creates the core stations which +# exist in dpppconcat files, and copies the +# ST001 solutions to those core stations, and +# directly copies the solutions for the rest +# of the antennas. +# - Software: bin/gains_toCS_h5parm.py +# - Output: sol001 in the delay_solve h5parm – phase and TEC solutions +# +#copyST_gains_selfcal: +# - Input: - the solutions from selfcal +# - the dpppconcat bands +# - Operation: > +# same as above, but in the selfcal h5parm +# instead (which has phase and amplitude +# solutions rather than phase and TEC) +# - Software: bin/gains_toCS_h5parm.py +# - Output: sol001 in the selfcal h5parm – amplitude and phase solutions +# +#copy_delay_sols: +# - Input: the delay_solve h5parm +# - Operation: copy the h5parm to the results directory +# - Software: /bin/cp +# - Output: > +# the h5parm renamed to delay_cal_sols.h5 +# in the {{ cal_values_ directory }} +# +#expand_h5_map: +# - Input: - the delay_solve h5parm +# - delay_cal_parmmap mapfile +# - and the dpppconcat mapfile +# - Operation: > +# expand the delay_cal_parmmap mapfile so +# its length matches the number of dpppconcat +# measurement sets +# - Software: prefactor/plugins/Pipelinestep_expandMapfile.py +# - Output: > +# a mapfile of the delay_solve h5parm that has +# that h5parm copied into every entry of a list +# that has the same length as the number of +# dpppconcat files. +# +#expand_h5_selfcal_map: +# - Input: - the selfcal h5parm # createmap_selfcal mapfile +# - the dpppconcat mapfile +# - Operation: > +# expand the createmap_selfcal mapfile +# so its length matches the number of +# dpppconcat measurement sets +# - Software: prefactor/plugins/Pipelinestep_expandMapfile.py +# - Output: > +# a mapfile of the selfcal h5parm that has +# that h5parm copied into every entry of a +# list that has the same length as the number +# of dpppconcat files +# +#ndppp_apply_delay: +# - Input: - dpppconcat measurement sets +# - delay_solve h5parm +# - selfcal h5parm +# - Operation: apply the solutions (both delay_solve and selfcal) +# - Software: NDPPP +# - Output: CORRECTED_DATA column in the dpppconcat measurement sets +# +#delete_prep_target: +# - Input: mapfile for ndppp_prep_target measurement sets +# - Operation: delete individual subbands in working directory +# - Software: /bin/rm +# - Output: deleted files +# +#delete_phaseup: +# - Input: mapfile for dppp_phaseup measurement sets +# - Operation: delete dppp_phaseup measurement sets for delay calibrator # i.e., the individual bands (before combination into a single all-bandwidth measurement set) +# - Software: /bin/rm +# - Output: deleted files +# +#make_results_compress: +# - Input: dpppconcat mapfile +# - Operation: > +# some sort of preparation for the +# make_summary step – honestly I’m +# not sure what is done here. +# - Software: prefactor/plugins/Pipelinestep_compressMapfile.py +# - Output: a mapfile +# +#make_summary: +# - Input: > the mapfile from the previous step, +# and several arguments defined in the parset +# - Operation: write a summary text file of how things went +# - Software: bin/make_summaryVLBI.py +# - Output: a text file which should give a summary of how things went