Skip to content
Snippets Groups Projects
Commit 7bc8965e authored by Matthijs van der Wild's avatar Matthijs van der Wild
Browse files

Add LoTSS subtraction workflow

parent 1fc5e7af
No related branches found
No related tags found
1 merge request!54Add LoTSS subtraction workflow
Showing
with 682 additions and 27 deletions
#!/usr/bin/env bash
# Find all the DIS2 solutions from a ddf-pipeline run,
# convert them to H5parm format and merge them into a
# single H5parm.
H5MERGER=${1}
SOLSDIR=${2}
MSNAME=${3}
C=0
for f in ${SOLSDIR}/*MHz_uv_pre-cal.ms/*DIS2*.sols.npz; do
killMS2H5parm.py --solset sol000 --verbose DIS2_$(printf "%02d" $C).h5 $f
((C++))
done
python3 ${H5MERGER} --h5_tables DIS2*.h5 --h5_out DIS2_full.h5 --propagate_flags --merge_diff_freq --add_ms_stations --ms ${MSNAME} --h5_time_freq True
#!/usr/bin/env bash
DDFDIR=$1
SOLSDIR=$2
for folder in ${SOLSDIR}/L*MHz_uv_pre-cal.ms; do
echo "Checking symlinks in $folder"
cd $folder
for solution in *.npz; do
if [[ -L $solution ]]; then
echo "Fixing symlink for ${solution}"
solname=$(basename $(readlink $solution))
unlink $solution
cp ${DDFDIR}/${solname} $solution
fi
done
done
#!/usr/bin/env bash
cp $1/DDS3*.npz .
cp $1/image_full_ampphase_di_m.NS.mask01.fits .
cp $1/image_full_ampphase_di_m.NS.DicoModel .
cp $1/image_dirin_SSD_m.npy.ClusterCat.npy .
#!/usr/bin/env python
import argparse
import math
import casacore.tables as ct
def main(ms: str, box_size: float = 2.5):
"""Generates a DS9 box region file centred at the phase centre of the input MeasurementSet.
Args:
ms (str): a MeasurementSet to obtain the phase centre from.
box_size (float): size in degrees of the box's sides. Defaults to 2.5 deg.
"""
if not ms:
raise ValueError("Empty string passed to input")
if type(ms) is not str:
raise ValueError("Expected a single MeasurementSet.")
if box_size <= 0:
raise ValueError("Box size must be positive.")
with ct.table(ms + "::FIELD") as field:
phasedir = field.getcol("PHASE_DIR").squeeze()
phasedir_deg = phasedir * 180.0 / math.pi
ra = phasedir_deg[0]
dec = phasedir_deg[1]
# RA, DEC, Size, Rotation
regionstr = f"fk5\nbox({ra},{dec},{box_size},{box_size},0.0) # color=green"
with open("boxfile.reg", "w") as f:
f.write(regionstr)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Make a DS9-compatible box region centred on the phase centre of a MeasurementSet."
)
parser.add_argument(
"ms",
type=str,
help="A MeasurementSet from which the phase centre will be taken.",
)
parser.add_argument(
"box_size",
type=float,
default=2.5,
help="Size of the sides of the box region in degrees.",
)
args = parser.parse_args()
main(args.ms, box_size=args.box_size)
#!/usr/bin/env bash
for f in *pre-cal.ms; do
echo Adding $f to the subtract list
echo $f >> big-mslist.txt
done
cwlVersion: v1.2
class: CommandLineTool
id: dp3_applycal
id: dp3_applycal_ddf
label: DP3 applycal
doc: |
Applies the solutions generated by the
......@@ -9,10 +9,11 @@ doc: |
baseCommand: DP3
arguments:
- msout=.
- steps=[applyddf]
- applyddf.type=applycal
- applyddf.correction=fulljones
- applyddf.solset=sol001
- applyddf.solset=sol000
- applyddf.soltab=[amplitude000,phase000]
inputs:
......@@ -24,13 +25,13 @@ inputs:
prefix: msin=
separate: false
- id: ddf-solset
type: File?
- id: ddf_solset
type: File
doc: |
The solution tables generated by
the DDF pipeline in an HDF5 format.
inputBinding:
position: 0
position: 1
prefix: applyddf.parmdb=
separate: false
......@@ -51,9 +52,17 @@ inputs:
shellQuote: false
doc: The name of the input data column.
- id: msout
type: string?
default: "."
inputBinding:
prefix: msout=
separate: false
doc: The MeasurementSet to write to.
- id: msout_datacolumn
type: string?
default: DATA
default: DATA_DI_CORRECTED
inputBinding:
prefix: msout.datacolumn=
separate: false
......@@ -80,10 +89,10 @@ inputs:
the full resolution flags.
outputs:
- id: msout
- id: output_data
type: Directory
outputBinding:
glob: $(msin.basename)
glob: $(inputs.msin.basename)
doc: |
The output data with corrected
data in MeasurementSet format.
......@@ -103,6 +112,9 @@ requirements:
writable: true
- class: InplaceUpdateRequirement
inplaceUpdate: true
- class: ResourceRequirement
coresMax: $(inputs.max_dp3_threads)
coresMin: $(inputs.max_dp3_threads)
hints:
- class: DockerRequirement
......@@ -110,3 +122,4 @@ hints:
stdout: dp3_applycal.log
stderr: dp3_applycal_err.log
class: CommandLineTool
cwlVersion: v1.2
id: fixsymlinks
doc: |-
Deals with broken symlinks in the SOLSDIR directory
by replacing them with the files they are supposed to
link to.
baseCommand:
- fix_symlinks_ddf.sh
inputs:
- id: ddf_rundir
type: Directory?
doc: |
Path to the directory of the DDF-pipeline run, where
the output files for applying DI solutions and subtracting
LoTSS can be found.
inputBinding:
position: 0
valueFrom: $(self.path)
- id: ddf_solsdir
type: Directory?
doc: |
Path to the SOLSDIR directory of the ddf-pipeline run.
inputBinding:
position: 1
valueFrom: $(self.basename)
outputs:
- id: solsdir
type: Directory
outputBinding:
glob: SOLSDIR
doc: |
SOLSDIR with symlinks replaced by their corresponding files.
- id: logfiles
type: File[]
outputBinding:
glob: fixsymlinks*.log
doc: |
The files containing the stdout
and stderr from the step.
requirements:
- class: InitialWorkDirRequirement
listing:
- entry: $(inputs.ddf_solsdir)
hints:
- class: DockerRequirement
dockerPull: vlbi-cwl
stdout: fixsymlinks.log
stderr: fixsymlinks_err.log
class: CommandLineTool
cwlVersion: v1.2
id: gatherdds3
doc: |-
Gathers the final direction dependent solutions from the DDF-pipeline
and other files required for the subtraction: the clean component model,
the facet layout and the clean mask.
baseCommand:
- gather_dds3.sh
inputs:
- id: ddf_rundir
type: Directory
inputBinding:
position: 0
doc: |-
Directory containing the output of the DDF-pipeline run
or at the very least the required files for the subtract.
outputs:
- id: dds3sols
type: File[]
doc: The final direction dependent solutions from DDF-pipeline.
outputBinding:
glob: DDS3*.npz
- id: fitsfiles
type: File[]
doc: FITS files required for the subtract. This is the clean mask.
outputBinding:
glob: image*.fits
- id: dicomodels
type: File[]
doc: Clean component model required for the subtract.
outputBinding:
glob: image*.DicoModel
- id: facet_layout
type: File
doc: Numpy data containing the facet layout used during imaging.
outputBinding:
glob: image*.npy
hints:
- class: DockerRequirement
dockerPull: vlbi-cwl
class: CommandLineTool
cwlVersion: v1.2
id: gatherdis2
doc: |-
Gathers the final direction dependent solutions from the DDF-pipeline
and other files required for the subtraction: the clean component model,
the facet layout and the clean mask.
baseCommand:
- convert_DIS2_to_h5parm.sh
inputs:
- id: msin
type: Directory
doc: |
MeasurementSet to extract a reference station list from
for adding dummy international stations to the final h5parm.
inputBinding:
position: 2
- id: ddf_solsdir
type: Directory
doc: |-
Path to the SOLSIDR directory of the DDF-pipeline containing the
folders with DIS2 solutions.
inputBinding:
position: 1
- id: h5merger
type: Directory
doc: Path to external LOFAR helper scripts for merging h5 files.
inputBinding:
position: 0
valueFrom: $(self.path)/h5_merger.py
outputs:
- id: dis2_h5parm
type: File
doc: The direction independent solutions from DDF-pipeline.
outputBinding:
glob: DIS2_full.h5
hints:
- class: DockerRequirement
dockerPull: vlbi-cwl
......@@ -48,3 +48,6 @@ outputs:
outputBinding:
glob: delay_dir.csv
hints:
- class: DockerRequirement
dockerPull: vlbi-cwl
class: CommandLineTool
cwlVersion: v1.2
id: make-mslist
doc: |-
Generates a text file containing the names of MeasurementSets, used by e.g. the DDF-pipeline.
This will be the input for the subtract. This requires DDF-pipeline to be installed.
baseCommand:
- make_mslist_subtract.sh
inputs:
- id: ms
type: Directory
doc: Input MeasurementSet.
inputBinding:
position: 1
outputs:
- id: mslist
type: File
doc: Text file containing the names of the MeasurementSet.
outputBinding:
glob: big-mslist.txt
requirements:
- class: InitialWorkDirRequirement
listing:
- entry: $(inputs.ms)
hints:
- class: DockerRequirement
dockerPull: vlbi-cwl
class: CommandLineTool
cwlVersion: v1.2
id: makebox
doc: Generate a square DS9 region file centred on the observation phase centre.
baseCommand:
- make_box.py
inputs:
- id: ms
type: Directory
doc: MeasurementSet to take the phase centre from.
inputBinding:
position: 0
- id: box_size
type: float?
doc: Size in degrees of the sides of the square box. Defaults to 2.5 deg.
default: 2.5
inputBinding:
position: 1
outputs:
- id: box
type: File
doc: DS9 region file.
outputBinding:
glob: boxfile.reg
hints:
- class: DockerRequirement
dockerPull: vlbi-cwl
class: CommandLineTool
cwlVersion: v1.2
id: subtract-LoTSS
label: Subtract a LoTSS model from the data.
doc: Subtract a LoTSS model from the data using the images and DD solutions derived by the ddf-pipeline. This requires DDFacet.
baseCommand:
- sub-sources-outside-region.py
arguments:
- --onlyuseweightspectrum
- --noconcat
- --keeplongbaselines
- --nophaseshift
- --nofixsym
inputs:
- id: ms
type: Directory
doc: Input MeasurementSet to subtract the LoTSS model from.
- id: solsdir
type: Directory
doc: Path to the SOLSDIR directory of the ddf-pipeline run.
- id: boxfile
type: File
doc: DS9 region file outside which to subtract.
inputBinding:
position: 0
prefix: --boxfile
- id: mslist
type: File
doc: Text file with a list of MeasurementSets to subtract.
inputBinding:
position: 1
prefix: --mslist
- id: prefix
type: string?
doc: Prefix for the output MeasurementSet after subtracting. Defaults to sub6asec.
default: sub6asec
inputBinding:
prefix: --prefix
- id: column
type: string?
doc: Column from which to subtract. Defaults to DATA_DI_CORRECTED.
default: "DATA_DI_CORRECTED"
inputBinding:
position: 2
prefix: --column
- id: freqavg
type: int?
doc: Frequency averaging factor to average with after the subtract. Defaults to 1.
default: 1
inputBinding:
position: 3
prefix: --freqavg
- id: timeavg
type: int?
doc: Time averaging factor to average with after the subtract. Defaults to 1.
default: 1
inputBinding:
position: 4
prefix: --timeavg
- id: ncpu
type: int?
doc: Number of cores to use during the subtract. Defaults to 24.
default: 24
inputBinding:
position: 5
prefix: --ncpu
- id: chunkhours
type: float?
doc: The range of time to predict the model for at once. Lowering this value reduces memory footprint, but can increase runtime.
default: 0.5
inputBinding:
position: 6
prefix: --chunkhours
- id: dds3sols
type: File[]
doc: DDS3 solution files from the ddf-pipeline run.
- id: fitsfiles
type: File[]
doc: The clean mask of the final image from the ddf-pipeline run.
- id: dicomodels
type: File[]
doc: The clean component model of the final image from the ddf-pipeline run.
- id: facet_layout
type: File
doc: The facet layout from the ddf-pipeline run.
outputs:
- id: subms
type: Directory
doc: MeasurementSet containing the subtracted data.
outputBinding:
glob: $(inputs.prefix)*.ms
requirements:
- class: InlineJavascriptRequirement
- class: InitialWorkDirRequirement
listing:
- entry: $(inputs.ms)
writable: true
- entry: $(inputs.solsdir)
- entry: $(inputs.dds3sols)
- entry: $(inputs.fitsfiles)
- entry: $(inputs.dicomodels)
- entry: $(inputs.facet_layout)
- class: ResourceRequirement
coresMax: $(inputs.ncpu)
coresMin: $(inputs.ncpu)
hints:
- class: DockerRequirement
dockerPull: vlbi-cwl
......@@ -49,6 +49,6 @@ def test_compare_stations():
filter = "*&"
solset = f"{data_dir}/results_target/cal_solutions.h5"
solset_name = "vlbi"
msin = glob.glob(f"{data_dir}/L*")
msin = glob.glob(f"{data_dir}/L667520*.MS")
result = main_compare_stations(msin, filter=filter, h5parmdb=solset, solset_name=solset_name)
assert result == reference
......@@ -15,11 +15,11 @@ inputs:
Input data in MeasurementSets. A-team data
has been removed in the setup workflow.
- id: ddf_solset
type: File?
- id: ddf_solsdir
type: Directory?
doc: |
The solution tables generated by the
DDF pipeline in an HDF5 format.
The SOLSDIR directory of the ddf-pipeline run
containing the DIS2 solutions.
- id: numbands
type: int?
......@@ -56,6 +56,10 @@ inputs:
will be used by AOFlagger (and should be
available before an AOFlagger job can start).
- id: h5merger
type: Directory
doc: External LOFAR helper scripts for merging h5 files.
steps:
- id: get_memory
in:
......@@ -91,19 +95,35 @@ steps:
source: firstSB
- id: linc_libraries
source: collect_linc_libraries/libraries
- id: stepname
default: '_pre-cal.ms'
out:
- id: filenames
- id: groupnames
- id: logfile
run: ../steps/sort_concatmap.cwl
label: sort_concatmap
- id: convert_ddf_dis2
in:
- id: msin
source: msin
valueFrom: $(self[0])
- id: ddf_solsdir
source: ddf_solsdir
valueFrom: $(self)
- id: h5merger
source: h5merger
out:
- id: dis2_h5parm
when: $(inputs.ddf_solsdir != null)
run: ../steps/gatherdis2.cwl
- id: concatenate-flag
in:
- id: msin
source:
- msin
- id: ddf_solset
source: ddf_solset
source: convert_ddf_dis2/dis2_h5parm
- id: group_id
source: sort_concatenate/groupnames
- id: groups_specification
......
......@@ -9,6 +9,8 @@ doc: |
* concatenates the data in groups of 10, performs
flagging on the international stations, (optionally)
applies DDF solutions to the data,
* optionally subtracts the LoTSS model outside a
user-specifiable region.
* creates a MeasurementSet with data phase-shifted
to a given delay calibrator, calibrated for direction-
independent effects.
......@@ -16,6 +18,8 @@ doc: |
requirements:
- class: SubworkflowFeatureRequirement
- class: MultipleInputFeatureRequirement
- class: StepInputExpressionRequirement
- class: InlineJavascriptRequirement
inputs:
- id: msin
......@@ -92,6 +96,33 @@ inputs:
default: 5
doc: The number of threads per DP3 process.
- id: ddf_solsdir
type: Directory?
doc: |
[Required if subtracting LoTSS] Path to the SOLSDIR directory
of the DDF-pipeline run, where most of the calibration solutions
are stored.
- id: ddf_rundir
type: Directory?
doc: |
[Required if subtracting LoTSS] Path to the directory of the
DDF-pipeline run where files required for the subtract can be found.
- id: box_size
type: float?
default: 2.5
doc: |
[Required if subtracting LoTSS] Box size, in degrees, outside of which to subtract
the LoTSS model from the data.
- id: subtract_chunk_hours
type: float?
default: 0.5
doc: |
The range of time to predict the LoTSS model for at once. Lowering this value reduces
memory footprint at the (possible) cost of increased runtime and vice versa.
steps:
- id: setup
label: setup
......@@ -127,10 +158,12 @@ steps:
source: reference_stationSB
- id: max_dp3_threads
source: max_dp3_threads
- id: ddf_solset
source: ddf_solset
- id: ddf_solsdir
source: ddf_solsdir
- id: linc
source: linc
- id: h5merger
source: h5merger
out:
- id: logdir
- id: concat_flags
......@@ -138,10 +171,37 @@ steps:
run: ./concatenate-flag.cwl
label: sort-concatenate-flag
- id: phaseup
- id: subtract_lotss
in:
- id: msin
source: sort-concatenate-flag/msout
- id: solsdir
source: ddf_solsdir
valueFrom: $(self)
- id: ddf_rundir
source: ddf_rundir
valueFrom: $(self)
- id: box_size
source: box_size
- id: ncpu
source: number_cores
- id: chunkhours
source: subtract_chunk_hours
out:
- id: regionbox
- id: mslist
- id: msout
run: ./lotss_subtract.cwl
when: $(inputs.ddf_rundir != null && inputs.solsdir != null)
- id: phaseup
in:
- id: msin
source:
- subtract_lotss/msout
- sort-concatenate-flag/msout
linkMerge: merge_nested
pickValue: first_non_null
- id: delay_calibrator
source: delay_calibrator
- id: configfile
......@@ -194,7 +254,10 @@ outputs:
format, phase-shifted to the delay calibrator.
- id: msouts
outputSource: sort-concatenate-flag/msout
outputSource:
- subtract_lotss/msout
- sort-concatenate-flag/msout
pickValue: first_non_null
type: Directory[]
doc: |
The concatenated data in MeasurementSet format after
......
class: Workflow
cwlVersion: v1.2
id: subtract_lotss
doc: |-
Subtract a LoTSS model from the data using results from the DDF-pipeline.
This prepares the data for widefield imaging by subtracting sources outside a given region,
defaulting to the approximate FWHM of the international stations.
inputs:
- id: msin
type: Directory[]
doc: Input data from which the LoTSS skymodel will be subtracted.
- id: solsdir
type: Directory
doc: Path to the SOLSDIR directory of the DDF-pipeline run.
- id: ddf_rundir
type: Directory
doc: Directory containing the output from DDF-pipeline.
- id: box_size
type: float?
doc: |-
Side length of a square box in degrees. The LoTSS skymodel is subtracted outside of this box.
Defaults to 2.5 degrees.
default: 2.5
- id: freqavg
type: int?
doc: Number of frequency channels to average after the subtract has been performed. Defaults to 1 (no averaging).
default: 1
- id: timeavg
type: int?
doc: Number of time slots to average after the subtract has been performed. Defaults to 1 (no averaging).
default: 1
- id: ncpu
type: int?
doc: Number of cores to use during the subtract. Defaults to 24.
default: 24
- id: chunkhours
type: float?
doc: The range of time to predict the model for at once. Lowering this value reduces memory footprint, but can increase runtime.
outputs:
- id: regionbox
type: File
outputSource:
- makebox/box
doc: DS9 region file outside of which the LoTSS skymodel has been subtracted.
- id: mslist
type: File[]
outputSource:
- makemslist/mslist
doc: Text file containing the name of the input MS from which the LoTSS skymodel hase been subtracted.
- id: msout
type: Directory[]
outputSource:
- subtract/subms
doc: MS from which the LoTSS skymodel has been subtracted.
steps:
- id: makebox
in:
- id: ms
source: msin
valueFrom: $(self[0])
- id: box_size
source: box_size
out:
- id: box
run: ../steps/makebox.cwl
doc: Make the box outside which the LoTSS skymodel will be subtracted.
- id: makemslist
in:
- id: ms
source: msin
out:
- id: mslist
run: ../steps/make_mslist.cwl
scatter: ms
doc: Make the list of MSes to subtract.
- id: gather_dds3
in:
- id: ddf_rundir
source: ddf_rundir
out:
- id: dds3sols
- id: fitsfiles
- id: dicomodels
- id: facet_layout
run: ../steps/gatherdds3.cwl
doc: Gather the solutions and images required to subtract the LoTSS model.
- id: fix_symlinks
in:
- id: ddf_rundir
source: ddf_rundir
- id: ddf_solsdir
source: solsdir
out:
- id: logfiles
- id: solsdir
run: ../steps/fix_symlinks_ddf.cwl
- id: subtract
in:
- id: ms
source: msin
- id: boxfile
source: makebox/box
- id: mslist
source: makemslist/mslist
- id: column
valueFrom: DATA_DI_CORRECTED
- id: solsdir
source: fix_symlinks/solsdir
- id: dds3sols
source: gather_dds3/dds3sols
- id: fitsfiles
source: gather_dds3/fitsfiles
- id: dicomodels
source: gather_dds3/dicomodels
- id: facet_layout
source: gather_dds3/facet_layout
- id: freqavg
source: freqavg
- id: timeavg
source: timeavg
- id: ncpu
source: ncpu
- id: chunkhours
source: chunkhours
out:
- id: subms
run: ../steps/subtract.cwl
scatter:
- ms
- mslist
scatterMethod: dotproduct
doc: Subtract the LoTSS model from the data.
requirements:
- class: ScatterFeatureRequirement
- class: StepInputExpressionRequirement
......@@ -53,7 +53,7 @@ steps:
source: msin
- id: msout_name
source: msin
valueFrom: $("out_"+self.basename)
valueFrom: $(self.basename)
- id: solset
source: solset
out:
......
......@@ -11,8 +11,8 @@ doc: |
which case DP3 memory constraints and LINC flagging
strategies must be given as inputs.
- After flagging the data from the Dutch stations can
be corrected for direction dependent effects. This
requires the user to specify DDF solutions.
be corrected with additional direction-independent corrections
from the ddf-pipeline. This requires the user to specify DDF solutions.
inputs:
- id: msin
......@@ -24,7 +24,7 @@ inputs:
- id: ddf_solset
type: File?
doc: |
The solution tables generated by the
The DIS2 solution tables generated by the
DDF pipeline in an HDF5 format. Must
be set if the data from the Dutch
stations should be corrected for
......@@ -106,7 +106,8 @@ steps:
when: $((inputs.linc_libraries != null) && (inputs.memory != null))
run: ../../steps/aoflagger.cwl
label: AOflagging
- id: dp3_applycal
- id: dp3_applycal_ddf
in:
- id: msin
source:
......@@ -114,12 +115,14 @@ steps:
pickValue: first_non_null
- id: ddf_solset
source: ddf_solset
valueFrom: $(self)
out:
- id: msout
- id: output_data
- id: logfile
when: $(inputs.ddf_solset != null)
run: ../../steps/dp3_applycal.cwl
label: dp3_applycal
run: ../../steps/dp3_applycal_ddf.cwl
label: dp3_applycal_ddf
- id: concat_logfiles_aoflagging
in:
- id: file_list
......@@ -153,7 +156,7 @@ steps:
outputs:
- id: msout
outputSource:
- dp3_applycal/msout
- dp3_applycal_ddf/output_data
- AOflagging/msout
- dp3_concat/msout
pickValue: first_non_null
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment