diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..d4731a6ccdc8b3b80c226a0cba8229f3ef55ab77 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +# Virtual env +.venv + +# Measurement Sets +*.MS \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20f3e857d530fa0dad8f9701cd55d855884581dc --- /dev/null +++ b/README.md @@ -0,0 +1,41 @@ +# Lofar imaging compression pipeline + +This is a CWL workflow used to compress Lofar imaging data. + +## Workflow steps + + + +- identify known issues and apply them if necessary. It uses in place updates to prevent copying the measurement set + - in place update does not work well with CWL conditional steps, so a small bash script is used which either calls the fixing script or does nothing depending if the specific issue for the step was found +- Compress the measurement set with Dysco +- Produces inspection plots and collects metrics to quantify the amount of missing/flagged data + + +## Requirements + +- CWL v1.2 compatible runner (e.g. cwltool/toil) +- Docker + +## Docker images + +- astronsdc/lofar-legacy +- astronsdc/lofar-ms-software + +## Running the workflow +In the repositories there are two workflows. +One is capable to process the data as described in section [workflow steps](#workflow-steps) and can be executed as following +```bash +# Run the workflow +cwltool compress_pipeline.cwl [--flag_autocorrelation] --msin MEASUREMENT_SET +``` + +Another workflow, that is meant to be executed by the LDV infrastructure, takes as an input instead of a measurement set a SURL link of the data. Such a workflow can be execute with the command +```bash +# Run the workflow +cwltool download_and_compress_pipeline.cwl [--flag_autocorrelation] --surls [list of surl to process] +``` + +## License + +See `LICENSE` diff --git a/compress_pipeline.cwl b/compress_pipeline.cwl index f83ba79ec6e8ae9946ae3b31a8fa3dab36817975..fcd6f49a0c1b30ef7d3b660e0db115bb8ef600f8 100644 --- a/compress_pipeline.cwl +++ b/compress_pipeline.cwl @@ -1,5 +1,5 @@ class: Workflow -cwlVersion: v1.1 +cwlVersion: v1.2 id: compress_pipeline_cwl label: compress_pipeline.cwl inputs: @@ -33,6 +33,57 @@ outputs: outputSource: - inspect_flagging_dataloss/flags_output steps: + - id: identify_issues + run: steps/identify_issues.cwl + in: + - id: msin + source: msin + out: + - issue_list + - msout + - id: fix_ai_2013 + run: steps/fix_antenna_information_2013.cwl + in: + - id: msin + source: identify_issues/msout + - id: apply + source: identify_issues/issue_list + valueFrom: $(self.includes("FIX_ANTENNA_TABLE")) + out: + - id: msout + - id: fix_weight_issue + run: steps/fix_weightissue_flagging.cwl + in: + - id: msin + source: + - fix_ai_2013/msout + - id: apply + source: identify_issues/issue_list + valueFrom: $(self.includes("FIX_WEIGHT_SPECTRUM")) + out: + - id: msout + - id: fix_ai_2015 + run: steps/fix_antenna_information_2015.cwl + in: + - id: msin + source: + - fix_weight_issue/msout + - id: apply + source: identify_issues/issue_list + valueFrom: $(self.includes("FIX_BROKEN_TILES")) + out: + - id: msout + - id: fix_baselines + run: steps/fix_long_baselines.cwl + in: + - id: msin + source: + - fix_ai_2015/msout + - id: apply + source: identify_issues/issue_list + valueFrom: $(self.includes("FIX_STATION_ADDER")) + out: + - id: msout - id: extract_sip_meta in: - id: msin @@ -47,9 +98,10 @@ steps: - id: parset source: define_parset/output - id: msin - source: msin + source: + - fix_baselines/msout - id: msout_name - source: msin + source: fix_baselines/msout valueFrom: '$("COMPRESSED_" + self.basename)' - id: writefullresflag default: true @@ -81,7 +133,8 @@ steps: - id: inspect_flagging_dataloss in: - id: input - source: msin + source: + - fix_baselines/msout out: - id: output - id: flags_output @@ -105,3 +158,4 @@ steps: requirements: - class: StepInputExpressionRequirement - class: InlineJavascriptRequirement + - class: MultipleInputFeatureRequirement diff --git a/docker/Dockerfile.identify_issues b/docker/Dockerfile.identify_issues new file mode 100644 index 0000000000000000000000000000000000000000..bfcecb91344193be2962d6ca6bc4205954dfa49e --- /dev/null +++ b/docker/Dockerfile.identify_issues @@ -0,0 +1,6 @@ +FROM python:3.9-slim + +WORKDIR /scripts +COPY requirements.txt /scripts/ +RUN pip install -r requirements.txt +COPY fix_common_ms_issues.py /scripts/ \ No newline at end of file diff --git a/pipeline.png b/pipeline.png new file mode 100644 index 0000000000000000000000000000000000000000..0001d7f2ad03f79e6f2987f4c69b7d60024f0a2a Binary files /dev/null and b/pipeline.png differ diff --git a/steps/fix_antenna_information_2013.cwl b/steps/fix_antenna_information_2013.cwl new file mode 100644 index 0000000000000000000000000000000000000000..aaf5e8b94e41461ce911f44f7b5bf7ec7f2d7c58 --- /dev/null +++ b/steps/fix_antenna_information_2013.cwl @@ -0,0 +1,40 @@ +id: fix_antenna_information_2013 +label: Fix Antenna Information 2013 +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: + - bash + - script.sh +inputs: + - id: msin + type: Directory + - id: apply + type: boolean +outputs: + - id: msout + type: Directory + outputBinding: + glob: $(inputs.msin.basename) + +requirements: + - class: InitialWorkDirRequirement + listing: + - entry: $(inputs.msin) + writable: true + entryname: $(inputs.msin.basename) + - entryname: script.sh + entry: | + #!/bin/bash + + execute=$(inputs.apply) + if [ $execute = 'true' ] ; then + echo "Appling fix antenna information 2013" + fixinfo $(inputs.msin.basename) /opt/fixinfo + fi + echo "Skipping apply fix antenna information 2013" + + - class: InplaceUpdateRequirement + inplaceUpdate: true +hints: + - class: DockerRequirement + dockerPull: "astronsdc/lofar-legacy:latest" diff --git a/steps/fix_antenna_information_2015.cwl b/steps/fix_antenna_information_2015.cwl new file mode 100644 index 0000000000000000000000000000000000000000..27acaa1777f7451169a0e3c0442ec5747104f294 --- /dev/null +++ b/steps/fix_antenna_information_2015.cwl @@ -0,0 +1,43 @@ +id: fix_antenna_information_2015 +label: Fix Antenna Information_2015 +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: + - bash + - script.sh + +inputs: + - id: msin + type: Directory + - id: apply + type: boolean + +outputs: + - id: msout + type: Directory + outputBinding: + glob: $(inputs.msin.basename) + + +requirements: + - class: InitialWorkDirRequirement + listing: + - entry: $(inputs.msin) + writable: true + entryname: $(inputs.msin.basename) + - entryname: script.sh + entry: | + #!/bin/bash + + execute=$(inputs.apply) + if [ $execute = 'true' ] ; then + echo "Appling fix antenna information 2015" + fixbeaminfo $(inputs.msin.basename) /opt/fixbeaminfo + fi + echo "Skipping apply fix antenna information 2015" + + - class: InplaceUpdateRequirement + inplaceUpdate: true +hints: + - class: DockerRequirement + dockerPull: "astronsdc/lofar-legacy:latest" diff --git a/steps/fix_long_baselines.cwl b/steps/fix_long_baselines.cwl new file mode 100644 index 0000000000000000000000000000000000000000..ce51c9de3f3251b378793c7cbee343577d5102dd --- /dev/null +++ b/steps/fix_long_baselines.cwl @@ -0,0 +1,45 @@ +id: fix_long_baseline +label: Fix weighted sum for long baseline pipeline +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: + - bash + - script.sh + +requirements: + - class: InitialWorkDirRequirement + listing: + - entry: $(inputs.msin) + writable: true + entryname: $(inputs.msin.basename) + - entryname: script.sh + entry: | + #!/bin/bash + + execute=$(inputs.apply) + if [ $execute = 'true' ] ; then + echo "Appling fix weighted sum UVW" + fix_weightedsum_uvw -f $(inputs.msin.basename) -n $(inputs.new_station) + fi + echo "Skipping apply fix weighted sum UVW" + + - class: InplaceUpdateRequirement + inplaceUpdate: true +inputs: + - id: msin + type: Directory + - id: new_station + doc: new station name (default ST001) + type: string? + default: ST001 + - id: apply + type: boolean +outputs: + - id: msout + type: Directory + outputBinding: + glob: $(inputs.msin.basename) + +hints: + - class: DockerRequirement + dockerPull: "astronsdc/lofar-legacy:latest" diff --git a/steps/fix_weightissue_flagging.cwl b/steps/fix_weightissue_flagging.cwl new file mode 100644 index 0000000000000000000000000000000000000000..641e179768cf7539e5056a654886b6b50ebd65b2 --- /dev/null +++ b/steps/fix_weightissue_flagging.cwl @@ -0,0 +1,38 @@ +id: fix_weight_issue_flagging +label: Fix Weight Issue Flagging +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: + - bash + - script.sh +requirements: + - class: InitialWorkDirRequirement + listing: + - entry: $(inputs.msin) + writable: true + entryname: $(inputs.msin.basename) + - entryname: script.sh + entry: | + #!/bin/bash + + execute=$(inputs.apply) + if [ $execute = 'true' ] ; then + echo "Appling fix weight issue flagging" + fix_weightspectrum $(inputs.msin.basename) + fi + echo "Skipping apply fix weight issue flagging" + - class: InplaceUpdateRequirement + inplaceUpdate: true +inputs: + - id: msin + type: Directory + - id: apply + type: boolean +outputs: + - id: msout + type: Directory + outputBinding: + glob: $(inputs.msin.basename) +hints: + - class: DockerRequirement + dockerPull: "astronsdc/lofar-legacy:latest" diff --git a/steps/identify_issues.cwl b/steps/identify_issues.cwl new file mode 100644 index 0000000000000000000000000000000000000000..e0023870bd01935ea94528efd46d8c81b194fe00 --- /dev/null +++ b/steps/identify_issues.cwl @@ -0,0 +1,39 @@ +id: identify_issues +label: Identify Known issues +class: CommandLineTool +cwlVersion: v1.0 +hints: + DockerRequirement: + dockerPull: astronsdc/lofar-legacy:latest + +inputs: + - id: msin + type: Directory + inputBinding: + position: 1 + +outputs: + - id: issue_list + type: string[] + outputBinding: + glob: output.txt + loadContents: true + outputEval: $(JSON.parse(self[0].contents)) + + - id: msout + type: Directory + outputBinding: + glob: $(inputs.msin.basename) +stdout: output.txt + +baseCommand: + - python3 + - /usr/local/bin/fix_common_ms_issues + +requirements: + - class: InlineJavascriptRequirement + - class: InitialWorkDirRequirement + listing: + - entry: $(inputs.msin) + writable: true + entryname: $(inputs.msin.basename) diff --git a/steps/plot_uvw_coverage.cwl b/steps/plot_uvw_coverage.cwl index f188063f70104a5060d8a6ef909c390ee6b7a59d..7dc064d77d1f71e656f15a7ad52b1fad579d590f 100644 --- a/steps/plot_uvw_coverage.cwl +++ b/steps/plot_uvw_coverage.cwl @@ -18,6 +18,9 @@ outputs: type: File outputBinding: glob: $(inputs.output_name) +hints: + - class: DockerRequirement + dockerPull: astronsdc/lofar-ms-software requirements: - class: InlineJavascriptRequirement - class: InitialWorkDirRequirement