RAP-839 Add minimal set of parameters for the MVP

b7e82b74 · Mick Veldhuis · c97453c4 · b7e82b74 · b7e82b74 · b7e82b74
Commit b7e82b74 authored 8 months ago by Mick Veldhuis
--- a/.gitignore
+++ b/.gitignore
--- a/README.md
+++ b/README.md
@@ -3,3 +3,16 @@
 The Low-Frequency Array (LOFAR) Pre-Processing Pipeline is a workflow meant to be run on raw LOFAR data (post-correlation), and includes initial flagging, removal of bright off-axis sources (demixing), averaging in time and frequency, and Dysco compression.

 This is an implementation of the pre-processing pipeline in the Common Workflow Language (CWL), which is replacing the Generic Pipeline implementation.
+
+## Running the Pipeline
+
+The pipeline is currently not in a state that should be run yet! However, if you would like to test the development version, please refer to `tests/README.md`. 
+
+## Commissioning Notice
+
+For commissioning purposes, there are a number of parameters exposed that will most like be removed at a later stage. This concerns the following parameters:
+- `msout_storagemanager`: currently exposed for comparison
+- `dp3_checkparset`
+- `dysco_databitrate`
+- `dysco_weightbitrate`
+- `dysco_distribution`
--- a/steps/preprocess.cwl
+++ b/steps/preprocess.cwl
@@ -10,18 +10,48 @@ doc: |
  can further be processed by, e.g., the LOFAR Initial Calibration (LINC) Pipeline.

 arguments:
-  - steps=[aoflagger,demix]
-  - msin.autoweight=true
-  - msout.storagemanager=dysco
+  - steps=[edgeflagger,corrflagger,ampflagger,aoflagger,demix]
+  - edgeflagger.type=preflagger
+  - edgeflagger.chan=[0..nchan/32-1,31*nchan/32..nchan-1]
+  - corrflagger.type=preflagger
+  - ampflagger.type=preflagger
+  - demix.uselbfgssolver=true
+  - demix.lbfgs.solution.range=[-10000,10000]
+  - time_logging=true
+  - memory_logging=true

 inputs:
+  - id: sasid
+    type: string?
+    doc: SAS process identifier
  - id: msin
    type: Directory
    inputBinding:
      prefix: msin=
      separate: false
    doc: Input raw LOFAR MS
-  - id: rfistrategy
+  - id: msin_autoweight
+    type: boolean?
+    inputBinding:
+      prefix: msin.autoweight=true
+      separate: false
+    default: true
+    doc: Enable setting weights for raw LOFAR MSs
+  - id: preflag_corrtype
+    type: string?
+    inputBinding:
+      prefix: corrflagger.corrtype=
+      separate: false
+    default: "auto"
+    doc: Type of correlation to flag
+  - id: preflag_min_amplitude
+    type: double?
+    inputBinding:
+      prefix: ampflagger.amplmin=
+      separate: false
+    default: 1E-30
+    doc: Flag amplitudes below this value
+  - id: aoflagger_rfistrategy
    type:
      - string?
      - File?
@@ -74,12 +104,82 @@ inputs:
      prefix: demix.freqstep=
      separate: false
    doc: Number of channels to average
+  - id: demix_baselines
+    type: string?
+    inputBinding:
+      prefix: demix.baseline=
+      separate: false
+    default: "[CR]S*&"
+    doc: Baselines to demix
+  - id: demix_ignoretarget
+    type: boolean?
+    inputBinding:
+      prefix: demix.ignoretarget=true
+      separate: false
+    default: false
+    doc: Ignore target while demixing
+  - id: demix_lbfgs_historysize
+    type: int?
+    inputBinding:
+      prefix: demix.lbfgs.historysize=
+      separate: false
+    default: 10
+    doc: History size to approximate the inverse Hessian
+  - id: demix_lbfgs_robustdof
+    type: int?
+    inputBinding:
+      prefix: demix.lbfgs.robustdof=
+      separate: false
+    default: 200
+    doc: Noise model degrees-of-freedom
  - id: msout_name
    type: string
    inputBinding:
      prefix: msout=
      separate: false
    doc: Output MS
+  - id: msout_storagemanager
+    type: string?
+    inputBinding:
+      prefix: msout.storagemanager=
+      separate: false
+    default: dysco
+    doc: The storage manager used
+  - id: dysco_distribution
+    type: string?
+    inputBinding:
+      prefix: msout.storagemanager.distribution=
+      separate: false
+    default: TruncatedGaussian
+    doc: Compression distribution
+  - id: dysco_databitrate
+    type: int?
+    inputBinding:
+      prefix: msout.storagemanager.databitrate=
+      separate: false
+    default: 10
+    doc: Bits per float used to represent visibilities
+  - id: dysco_weightbitrate
+    type: int?
+    inputBinding:
+      prefix: msout.storagemanager.weightbitrate=
+      separate: false
+    default: 12
+    doc: Bits per float used for the weights
+  - id: dp3_checkparset
+    type: int?
+    inputBinding:
+      prefix: checkparset=
+      separate: false
+    default: 0
+    doc: Check parset for unused or invalid parameters
+  - id: dp3_numthreads
+    type: int?
+    inputBinding:
+      prefix: numthreads=
+      separate: false
+    default: 10
+    doc: Maximum number of threads used by DP3

 outputs:
  - id: msout

--- a/tests/README.md
+++ b/tests/README.md
@@ -25,3 +25,11 @@ For example, with `cwltool` you could run the top-level `pipeline.cwl` workflow
 cwltool --debug --preserve-entire-environment --outdir=pipeline-out $PREPROCESS_ROOT/workflows/pipeline.cwl $PREPROCESS_ROOT/tests/pipeline_input.json
 ```
 where `PREPROCESS_ROOT` refers to the location of the repository on your system. Please modify the command as desired, for an overview of possible arguments run `cwltool --help` or refer to their [Read the Docs pages](https://cwltool.readthedocs.io/).
+
+## Validate tests
+
+Since this pipeline is in its early development stages, it might be useful to validate the workflow and its input. This is achieved by running:
+```
+cwltool --validate $PREPROCESS_ROOT/workflows/pipeline.cwl $PREPROCESS_ROOT/tests/pipeline_input.json
+```
+This will not online validate the `pipeline.cwl` workflow, but also throw an error when missing parameters are not defined in `pipeline_input.json`.
--- a/tests/pipeline_input.json
+++ b/tests/pipeline_input.json
 {
+    "sasid": "123456",
    "msin": [
        {
            "class": "Directory",

--- a/workflows/pipeline.cwl
+++ b/workflows/pipeline.cwl
@@ -7,10 +7,25 @@ doc: |
  compression of raw LOFAR data.

 inputs:
+  - id: sasid
+    type: string
+    doc: SAS process identifier
  - id: msin
    type: Directory[]
    doc: List of raw LOFAR MeasurementSets (MSs)
-  - id: rfistrategy
+  - id: msin_autoweight
+    type: boolean?
+    default: true
+    doc: Enable setting weights for raw LOFAR MSs
+  - id: preflag_corrtype
+    type: string?
+    default: "auto"
+    doc: Type of correlation to flag
+  - id: preflag_min_amplitude
+    type: double?
+    default: 1E-30
+    doc: Flag amplitudes below this value
+  - id: aoflagger_rfistrategy
    type: File?
    default:
      class: File
@@ -38,6 +53,46 @@ inputs:
  - id: avg_freqstep
    type: int
    doc: Number of channels to average
+  - id: demix_baselines
+    type: string?
+    default: "[CR]S*&"
+    doc: Baselines to demix
+  - id: demix_ignoretarget
+    type: boolean?
+    default: false
+    doc: Ignore target while demixing
+  - id: demix_lbfgs_historysize
+    type: int?
+    default: 10
+    doc: History size to approximate the inverse Hessian
+  - id: demix_lbfgs_robustdof
+    type: int?
+    default: 200
+    doc: Noise model degrees-of-freedom
+  - id: msout_storagemanager
+    type: string?
+    default: dysco
+    doc: The storage manager used
+  - id: dysco_distribution
+    type: string?
+    default: TruncatedGaussian
+    doc: Compression distribution
+  - id: dysco_databitrate
+    type: int?
+    default: 10
+    doc: Bits per float used to represent visibilities
+  - id: dysco_weightbitrate
+    type: int?
+    default: 12
+    doc: Bits per float used for the weights
+  - id: dp3_checkparset
+    type: int?
+    default: 0
+    doc: Check parset for unused or invalid parameters
+  - id: dp3_numthreads
+    type: int?
+    default: 10
+    doc: Maximum number of threads used by DP3

 outputs:
  - id: msout
@@ -57,10 +112,18 @@ steps:
    scatterMethod: dotproduct
    run: ../steps/preprocess.cwl
    in:
+      - id: sasid
+        source: sasid
      - id: msin
        source: msin
-      - id: rfistrategy
-        source: rfistrategy
+      - id: msin_autoweight
+        source: msin_autoweight
+      - id: preflag_corrtype
+        source: preflag_corrtype
+      - id: preflag_min_amplitude
+        source: preflag_min_amplitude
+      - id: aoflagger_rfistrategy
+        source: aoflagger_rfistrategy
      - id: demix_skymodel
        source: demix_skymodel
      - id: demix_sources
@@ -73,11 +136,29 @@ steps:
        source: avg_timestep
      - id: avg_freqstep
        source: avg_freqstep
+      - id: demix_baselines
+        source: demix_baselines
+      - id: demix_ignoretarget
+        source: demix_ignoretarget
+      - id: demix_lbfgs_historysize
+        source: demix_lbfgs_historysize
+      - id: demix_lbfgs_robustdof
+        source: demix_lbfgs_robustdof
+      - id: msout_storagemanager
+        source: msout_storagemanager
+      - id: dysco_distribution
+        source: dysco_distribution
+      - id: dysco_databitrate
+        source: dysco_databitrate
+      - id: dysco_weightbitrate
+        source: dysco_weightbitrate
      - id: msout_name
        source: msin
-        # TODO: determine proper output scheme for preprocessed MSs. 
-        # Maybe append to file name or in new directory?
-        valueFrom: $(self.basename + ".prepout")
+        valueFrom: $("L" + inputs.sasid + self.basename.slice(self.basename.indexOf("_")))
+      - id: dp3_checkparset
+        source: dp3_checkparset
+      - id: dp3_numthreads
+        source: dp3_numthreads
    out: [msout]

 requirements: