From c32309a6ab880c9cda2703f2ec7003852ef1d33d Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Mon, 30 Sep 2019 14:41:03 +0200 Subject: [PATCH] Add python scripts and Docker image --- Docker/Dockerfile | 14 + Docker/entrypoint | 5 + Docker/scripts/Ateamclipper.py | 64 ++ Docker/scripts/BLsmooth.py | 168 ++++ .../InitSubtract_deep_sort_and_compute.py | 482 +++++++++++ .../scripts/InitSubtract_sort_and_compute.py | 479 +++++++++++ Docker/scripts/add_missing_stations.py | 144 ++++ Docker/scripts/check_Ateam_separation.py | 182 ++++ Docker/scripts/check_unflagged_fraction.py | 72 ++ Docker/scripts/concat_MS.py | 137 +++ Docker/scripts/convert_npys_to_h5parm.py | 134 +++ Docker/scripts/createRMh5parm.py | 226 +++++ Docker/scripts/download_skymodel_target.py | 160 ++++ Docker/scripts/find_skymodel_cal.py | 161 ++++ Docker/scripts/fits2sky.py | 195 +++++ Docker/scripts/getStructure_from_phases.py | 226 +++++ Docker/scripts/h5parm_pointingname.py | 41 + Docker/scripts/make_clean_mask.py | 793 ++++++++++++++++++ Docker/scripts/make_source_list.py | 136 +++ Docker/scripts/make_summary.py | 226 +++++ Docker/scripts/merge_skymodels.py | 54 ++ Docker/scripts/pad_image.py | 44 + Docker/scripts/plot_Ateamclipper.py | 49 ++ Docker/scripts/plot_image.py | 213 +++++ Docker/scripts/plot_subtract_images.py | 251 ++++++ Docker/scripts/plot_unflagged_fraction.py | 57 ++ Docker/scripts/plot_uvcov.py | 224 +++++ Docker/scripts/sort_times_into_freqGroups.py | 422 ++++++++++ Docker/scripts/transfer_solutions.py | 178 ++++ 29 files changed, 5537 insertions(+) create mode 100644 Docker/Dockerfile create mode 100644 Docker/entrypoint create mode 100755 Docker/scripts/Ateamclipper.py create mode 100755 Docker/scripts/BLsmooth.py create mode 100755 Docker/scripts/InitSubtract_deep_sort_and_compute.py create mode 100755 Docker/scripts/InitSubtract_sort_and_compute.py create mode 100755 Docker/scripts/add_missing_stations.py create mode 100755 Docker/scripts/check_Ateam_separation.py create mode 100755 Docker/scripts/check_unflagged_fraction.py create mode 100755 Docker/scripts/concat_MS.py create mode 100755 Docker/scripts/convert_npys_to_h5parm.py create mode 100755 Docker/scripts/createRMh5parm.py create mode 100755 Docker/scripts/download_skymodel_target.py create mode 100755 Docker/scripts/find_skymodel_cal.py create mode 100755 Docker/scripts/fits2sky.py create mode 100755 Docker/scripts/getStructure_from_phases.py create mode 100755 Docker/scripts/h5parm_pointingname.py create mode 100755 Docker/scripts/make_clean_mask.py create mode 100755 Docker/scripts/make_source_list.py create mode 100755 Docker/scripts/make_summary.py create mode 100755 Docker/scripts/merge_skymodels.py create mode 100755 Docker/scripts/pad_image.py create mode 100755 Docker/scripts/plot_Ateamclipper.py create mode 100755 Docker/scripts/plot_image.py create mode 100755 Docker/scripts/plot_subtract_images.py create mode 100755 Docker/scripts/plot_unflagged_fraction.py create mode 100755 Docker/scripts/plot_uvcov.py create mode 100755 Docker/scripts/sort_times_into_freqGroups.py create mode 100755 Docker/scripts/transfer_solutions.py diff --git a/Docker/Dockerfile b/Docker/Dockerfile new file mode 100644 index 00000000..cfe60578 --- /dev/null +++ b/Docker/Dockerfile @@ -0,0 +1,14 @@ +FROM lofaruser/imaging-pipeline +SHELL ["/bin/bash", "-c"] + +RUN groupadd -r lofaruser && useradd --no-log-init -r -g lofaruser lofaruser + +RUN pip install numpy scipy matplotlib pyrap +COPY scripts/* /usr/local/bin/ +RUN chmod +rx /usr/local/bin/* +ADD entrypoint /home/lofaruser/.entrypoint + +RUN chown lofaruser:lofaruser /home/lofaruser/.entrypoint +RUN chmod +x /home/lofaruser/.entrypoint +ENTRYPOINT ["/home/lofaruser/.entrypoint"] +USER lofaruser diff --git a/Docker/entrypoint b/Docker/entrypoint new file mode 100644 index 00000000..ef79ac44 --- /dev/null +++ b/Docker/entrypoint @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +source /opt/lofarsoft/lofarinit.sh + +exec "$@" diff --git a/Docker/scripts/Ateamclipper.py b/Docker/scripts/Ateamclipper.py new file mode 100755 index 00000000..da0f12ea --- /dev/null +++ b/Docker/scripts/Ateamclipper.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python + +## changelog +# W.Williams 2014/11/03 add - to give input/output statistics per channel +# W.Williams 2014/11/03 fix - statistics per correlation +# A.Drabent 2019/07/24 write fraction of flagged data into output file (for prefactor3) + +import numpy +import pyrap.tables as pt +import os, sys + +msname = str(sys.argv[1]) + +cliplevelhba = 5.0 +cliplevellba = 50.0 + +t = pt.table(msname, readonly=False) +data = t.getcol('MODEL_DATA') +flag = t.getcol('FLAG') +freq_tab= pt.table(msname + '::SPECTRAL_WINDOW') +freq = freq_tab.getcol('REF_FREQUENCY') + +if freq[0] > 100e6: + cliplevel = cliplevelhba +if freq[0] < 100e6: + cliplevel = cliplevellba + + +print('------------------------------') +print('SB Frequency [MHz]', freq[0]/1e6) +for chan in range(0,numpy.size(data[0,:,0])): + print('chan %i : %.5f%% input XX flagged' %( chan, 100.*numpy.sum(flag[:,chan,0] == True)/numpy.size(flag[:,chan,0]) )) + print('chan %i : %.5f%% input YY flagged' %( chan, 100.*numpy.sum(flag[:,chan,3] == True)/numpy.size(flag[:,chan,3]) )) +input_flags_xx = 100. * numpy.sum(flag[:,:,0] == True)/numpy.size(flag[:,:,0]) +input_flags_yy = 100. * numpy.sum(flag[:,:,3] == True)/numpy.size(flag[:,:,3]) +print('Total : %.5f%% input XX flagged' %( input_flags_xx )) +print('Total : %.5f%% input YY flagged' %( input_flags_yy )) +print('') +print('Cliplevel used [Jy]', cliplevel) +print('\n\n') + +for pol in range(0,numpy.size(data[0,0,:])): + for chan in range(0,numpy.size(data[0,:,0])): + print('Doing polarization,chan', pol, chan) + idx = numpy.where(abs(data[:,chan,pol]) > cliplevel) + flag[idx,chan,0] = True + flag[idx,chan,1] = True + flag[idx,chan,2] = True + flag[idx,chan,3] = True + +print('') +for chan in range(0,numpy.size(data[0,:,0])): + print('chan %i : %.5f%% output XX flagged' %( chan, 100.*numpy.sum(flag[:,chan,0] == True)/numpy.size(flag[:,chan,0]) )) + print('chan %i : %.5f%% output YY flagged' %( chan, 100.*numpy.sum(flag[:,chan,3] == True)/numpy.size(flag[:,chan,3]) )) +output_flags_xx = 100. * numpy.sum(flag[:,:,0] == True)/numpy.size(flag[:,:,0]) +output_flags_yy = 100. * numpy.sum(flag[:,:,3] == True)/numpy.size(flag[:,:,3]) +print('Total : %.5f%% input XX flagged' %( output_flags_xx )) +print('Total : %.5f%% input YY flagged' %( output_flags_yy )) +print('') +t.putcol('FLAG', flag) +t.close() +freq_tab.close() + +os.system('echo ' + str(freq[0]) + ' ' + str(output_flags_xx - input_flags_xx) + ' ' + str(output_flags_yy - input_flags_yy) + ' >> Ateamclipper.txt') \ No newline at end of file diff --git a/Docker/scripts/BLsmooth.py b/Docker/scripts/BLsmooth.py new file mode 100755 index 00000000..0bec38e6 --- /dev/null +++ b/Docker/scripts/BLsmooth.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2019 - Francesco de Gasperin +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +# Usage: BLavg.py vis.MS +# Load a MS, average visibilities according to the baseline lenght, +# i.e. shorter BLs are averaged more, and write a new MS + +import os, sys, time +import optparse, itertools +import logging +import numpy as np +from scipy.ndimage.filters import gaussian_filter1d as gfilter +import pyrap.tables as pt +logging.basicConfig(level=logging.DEBUG) + +def addcol(ms, incol, outcol): + if outcol not in ms.colnames(): + logging.info('Adding column: '+outcol) + coldmi = ms.getdminfo(incol) + coldmi['NAME'] = outcol + ms.addcols(pt.makecoldesc(outcol, ms.getcoldesc(incol)), coldmi) + if outcol != incol: + # copy columns val + logging.info('Set '+outcol+'='+incol) + pt.taql("update $ms set "+outcol+"="+incol) + +opt = optparse.OptionParser(usage="%prog [options] MS", version="%prog 0.1") +opt.add_option('-f', '--ionfactor', help='Gives an indication on how strong is the ionosphere [default: 0.2]', type='float', default=0.2) +opt.add_option('-s', '--bscalefactor', help='Gives an indication on how the smoothing varies with BL-lenght [default: 0.5]', type='float', default=0.5) +opt.add_option('-i', '--incol', help='Column name to smooth [default: DATA]', type='string', default='DATA') +opt.add_option('-o', '--outcol', help='Output column [default: SMOOTHED_DATA]', type="string", default='SMOOTHED_DATA') +opt.add_option('-w', '--weight', help='Save the newly computed WEIGHT_SPECTRUM, this action permanently modify the MS! [default: False]', action="store_true", default=False) +opt.add_option('-r', '--restore', help='If WEIGHT_SPECTRUM_ORIG exists then restore it before smoothing [default: False]', action="store_true", default=False) +opt.add_option('-b', '--nobackup', help='Do not backup the old WEIGHT_SPECTRUM in WEIGHT_SPECTRUM_ORIG [default: do backup if -w]', action="store_true", default=False) +opt.add_option('-a', '--onlyamp', help='Smooth only amplitudes [default: smooth real/imag]', action="store_true", default=False) +opt.add_option('-S', '--smooth', help='Performs smoothing (otherwise column will be only copied)', type="string", default=True) +(options, msfile) = opt.parse_args() + +if msfile == []: + opt.print_help() + sys.exit(0) +msfile = msfile[0] + +if not os.path.exists(msfile): + logging.error("Cannot find MS file.") + sys.exit(1) + +# open input/output MS +ms = pt.table(msfile, readonly=False, ack=False) + +freqtab = pt.table(msfile + '::SPECTRAL_WINDOW', ack=False) +freq = freqtab.getcol('REF_FREQUENCY')[0] +freqtab.close() +wav = 299792458. / freq +timepersample = ms.getcell('INTERVAL',0) + +# check if ms is time-ordered +times = ms.getcol('TIME_CENTROID') +if not all(times[i] <= times[i+1] for i in range(len(times)-1)): + logging.critical('This code cannot handle MS that are not time-sorted.') + sys.exit(1) + +# create column to smooth +addcol(ms, options.incol, options.outcol) +# if smoothing should not be performed +if options.smooth == 'False': + sys.exit(0) + +# retore WEIGHT_SPECTRUM +if 'WEIGHT_SPECTRUM_ORIG' in ms.colnames() and options.restore: + addcol(ms, 'WEIGHT_SPECTRUM_ORIG', 'WEIGHT_SPECTRUM') +# backup WEIGHT_SPECTRUM +elif options.weight and not options.nobackup: + addcol(ms, 'WEIGHT_SPECTRUM', 'WEIGHT_SPECTRUM_ORIG') + +# iteration on antenna1 +for ms_ant1 in ms.iter(["ANTENNA1"]): + ant1 = ms_ant1.getcol('ANTENNA1')[0] + a_ant2 = ms_ant1.getcol('ANTENNA2') + logging.debug('Working on antenna: %s' % ant1) + + a_uvw = ms_ant1.getcol('UVW') + a_data = ms_ant1.getcol(options.outcol) + a_weights = ms_ant1.getcol('WEIGHT_SPECTRUM') + a_flags = ms_ant1.getcol('FLAG') + + for ant2 in set(a_ant2): + if ant1 == ant2: continue # skip autocorr + idx = np.where(a_ant2 == ant2) + + uvw = a_uvw[idx] + data = a_data[idx] + weights = a_weights[idx] + flags = a_flags[idx] + + # compute the FWHM + uvw_dist = np.sqrt(uvw[:, 0]**2 + uvw[:, 1]**2 + uvw[:, 2]**2) + dist = np.mean(uvw_dist) / 1.e3 + if np.isnan(dist): continue # fix for missing anstennas + + stddev = options.ionfactor * (25.e3 / dist)**options.bscalefactor * (freq / 60.e6) # in sec + stddev = stddev/timepersample # in samples + logging.debug("%s - %s: dist = %.1f km: sigma=%.2f samples." % (ant1, ant2, dist, stddev)) + + if stddev == 0: continue # fix for flagged antennas + if stddev < 0.5: continue # avoid very small smoothing + + flags[ np.isnan(data) ] = True # flag NaNs + weights[flags] = 0 # set weight of flagged data to 0 + del flags + + # Multiply every element of the data by the weights, convolve both the scaled data and the weights, and then + # divide the convolved data by the convolved weights (translating flagged data into weight=0). That's basically the equivalent of a + # running weighted average with a Gaussian window function. + + # set bad data to 0 so nans do not propagate + data = np.nan_to_num(data*weights) + + # smear weighted data and weights + if options.onlyamp: + dataAMP = gfilter(np.abs(data), stddev, axis=0) + dataPH = np.angle(data) + else: + dataR = gfilter(np.real(data), stddev, axis=0)#, truncate=4.) + dataI = gfilter(np.imag(data), stddev, axis=0)#, truncate=4.) + + weights = gfilter(weights, stddev, axis=0)#, truncate=4.) + + # re-create data + if options.onlyamp: + data = dataAMP * ( np.cos(dataPH) + 1j*np.sin(dataPH) ) + else: + data = (dataR + 1j * dataI) + data[(weights != 0)] /= weights[(weights != 0)] # avoid divbyzero + + #print np.count_nonzero(data[~flags]), np.count_nonzero(data[flags]), 100*np.count_nonzero(data[flags])/np.count_nonzero(data) + #print "NANs in flagged data: ", np.count_nonzero(np.isnan(data[flags])) + #print "NANs in unflagged data: ", np.count_nonzero(np.isnan(data[~flags])) + #print "NANs in weights: ", np.count_nonzero(np.isnan(weights)) + + a_data[idx] = data + if options.weight: a_weights[idx] = weights + + #logging.info('Writing %s column.' % options.outcol) + ms_ant1.putcol(options.outcol, a_data) + + if options.weight: + #logging.warning('Writing WEIGHT_SPECTRUM column.') + ms_ant1.putcol('WEIGHT_SPECTRUM', a_weights) + +ms.close() +logging.info("Done.") diff --git a/Docker/scripts/InitSubtract_deep_sort_and_compute.py b/Docker/scripts/InitSubtract_deep_sort_and_compute.py new file mode 100755 index 00000000..0776b06a --- /dev/null +++ b/Docker/scripts/InitSubtract_deep_sort_and_compute.py @@ -0,0 +1,482 @@ +#! /usr/bin/env python +""" +Script to sort a list of MSs into frequency-bands, and compute additional values needed for initsubtract +""" +import casacore.tables as pt +import sys, os +import numpy as np +from lofarpipe.support.data_map import DataMap +from lofarpipe.support.data_map import DataProduct +import argparse +from argparse import RawTextHelpFormatter + +class Band(object): + """ + The Band object contains parameters needed for each band + """ + def __init__(self, MSfiles): + self.files = MSfiles + self.msnames = [ MS.split('/')[-1] for MS in self.files ] + self.numMS = len(self.files) + # Get the frequency info and set name + sw = pt.table(self.files[0]+'::SPECTRAL_WINDOW', ack=False) + self.freq = sw.col('REF_FREQUENCY')[0] + self.nchan = sw.col('NUM_CHAN')[0] + self.chan_freqs_hz = sw.col('CHAN_FREQ')[0] + self.chan_width_hz = sw.col('CHAN_WIDTH')[0][0] + sw.close() + self.name = str(int(self.freq/1e6)) + # Get the station diameter + ant = pt.table(self.files[0]+'::ANTENNA', ack=False) + self.diam = float(ant.col('DISH_DIAMETER')[0]) + ant.close() + + def get_image_sizes(self, cellsize_highres_deg=None, cellsize_lowres_deg=None, + fieldsize_highres=2.5, fieldsize_lowres=6.5): + """ + Sets sizes for initsubtract images + + The image sizes are scaled from the mean primary-beam FWHM. For + the high-res image, we use 2.5 * FWHM; for low-res, we use 6.5 * FHWM. + + Parameters + ---------- + cellsize_highres_deg : float, optional + cellsize for the high-res images in deg + cellsize_lowres_deg : float, optional + cellsize for the low-res images in deg + fieldsize_highres : float, optional + How many FWHM's shall the high-res images be. + fieldsize_lowres : float, optional + How many FWHM's shall the low-res images be. + """ + if cellsize_highres_deg: + self.cellsize_highres_deg = cellsize_highres_deg + if cellsize_lowres_deg: + self.cellsize_lowres_deg = cellsize_lowres_deg + if not hasattr(self, 'mean_el_rad'): + for MS_id in range(self.numMS): + # calculate mean elevation + tab = pt.table(self.files[MS_id], ack=False) + el_values = pt.taql("SELECT mscal.azel1()[1] AS el from " + + self.files[MS_id] + " limit ::10000").getcol("el") + if MS_id == 0: + global_el_values = el_values + else: + global_el_values = np.hstack( (global_el_values, el_values ) ) + self.mean_el_rad = np.mean(global_el_values) + + # Calculate mean FOV + sec_el = 1.0 / np.sin(self.mean_el_rad) + self.fwhm_deg = 1.1 * ((3.0e8 / self.freq) / self.diam) * 180. / np.pi * sec_el + self.imsize_high_res = self.get_optimum_size(self.fwhm_deg + /self.cellsize_highres_deg * fieldsize_highres) + self.imsize_low_res = self.get_optimum_size(self.fwhm_deg + /self.cellsize_lowres_deg * fieldsize_lowres) + return (self.imsize_high_res, self.imsize_low_res) + + def get_optimum_size(self, size): + """ + Gets the nearest optimum image size + + Taken from the casa source code (cleanhelper.py) + + Parameters + ---------- + size : int + Target image size in pixels + + Returns + ------- + optimum_size : int + Optimum image size nearest to target size + + """ + import numpy + + def prime_factors(n, douniq=True): + """ Return the prime factors of the given number. """ + factors = [] + lastresult = n + sqlast=int(numpy.sqrt(n))+1 + if n == 1: + return [1] + c=2 + while 1: + if (lastresult == 1) or (c > sqlast): + break + sqlast=int(numpy.sqrt(lastresult))+1 + while 1: + if(c > sqlast): + c=lastresult + break + if lastresult % c == 0: + break + c += 1 + factors.append(c) + lastresult = lastresult // c + if (factors==[]): factors=[n] + return numpy.unique(factors).tolist() if douniq else factors + + n = int(size) + if (n%2 != 0): + n+=1 + fac=prime_factors(n, False) + for k in range(len(fac)): + if (fac[k] > 7): + val=fac[k] + while (numpy.max(prime_factors(val)) > 7): + val +=1 + fac[k]=val + newlarge=numpy.product(fac) + for k in range(n, newlarge, 2): + if ((numpy.max(prime_factors(k)) < 8)): + return k + return newlarge + + def get_averaging_steps(self): + """ + Sets the averaging step sizes + + Note: the frequency step must be an even divisor of the number of + channels + + """ + # Get time per sample and number of samples + t = pt.table(self.files[0], readonly=True, ack=False) + for t2 in t.iter(["ANTENNA1","ANTENNA2"]): + if (t2.getcell('ANTENNA1',0)) < (t2.getcell('ANTENNA2',0)): + self.timestep_sec = t2[1]['TIME']-t2[0]['TIME'] # sec + break + t.close() + # generate a (numpy-)array with the divisors of nchan + tmp_divisors = [] + for step in range(self.nchan,0,-1): + if (self.nchan % step) == 0: + tmp_divisors.append(step) + freq_divisors = np.array(tmp_divisors) + + # Average to 0.2 MHz per channel and 16 sec per time slot + initsubtract_freqstep = max(1, min(int(round(0.2 * 1e6 / self.chan_width_hz)), self.nchan)) + initsubtract_freqstep = freq_divisors[np.argmin(np.abs(freq_divisors-initsubtract_freqstep))] + initsubtract_timestep = max(1, int(round(16.0 / self.timestep_sec))) + + return (initsubtract_freqstep, initsubtract_timestep) + + def get_nwavelengths(self, cellsize_deg, timestep_sec,): + """ + Returns nwavelengths for WSClean BL-based averaging + The value depends on the integration time given the specified maximum + allowed smearing. We scale it from the imaging cell size assuming normal + sampling as: + max baseline in nwavelengths = 1 / theta_rad ~= 1 / (cellsize_deg * 3 * pi / 180) + nwavelengths = max baseline in nwavelengths * 2 * pi * + integration time in seconds / (24 * 60 * 60) / 4 + Parameters + ---------- + cellsize_deg : float + Pixel size of image in degrees + timestep_sec : float + Length of one timestep in seconds + """ + max_baseline = 1 / (3 * cellsize_deg * np.pi / 180) + wsclean_nwavelengths_time = int(max_baseline * 2*np.pi * timestep_sec / + (24 * 60 * 60) / 4) + return wsclean_nwavelengths_time + + +def main(ms_input, outmapname=None, mapfile_dir=None, cellsize_highres_deg=0.00208, cellsize_lowres_deg=0.00694, + fieldsize_highres=2.5, fieldsize_lowres=6.5, image_padding=1., y_axis_stretch=1.): + """ + Check a list of MS files for missing frequencies + + Parameters + ---------- + ms_input : list or str + List of MS filenames, or string with list, or path to a mapfile + outmapname: str + Name of output mapfile + mapfile_dir : str + Directory for output mapfile + cellsize_highres_deg : float, optional + cellsize for the high-res images in deg + cellsize_lowres_deg : float, optional + cellsize for the low-res images in deg + fieldsize_highres : float, optional + How many FWHM's shall the high-res images be. + fieldsize_lowres : float, optional + How many FWHM's shall the low-res images be. + image_padding : float, optional + How much padding shall we add to the padded image sizes. + y_axis_stretch : float, optional + How much shall the y-axis be stretched or compressed. + + Returns + ------- + result : dict + Dict with the name of the generated mapfiles + + """ + + if not outmapname or not mapfile_dir: + raise ValueError('sort_times_into_freqGroups: outmapname and mapfile_dir are needed!') + if type(ms_input) is str: + if ms_input.startswith('[') and ms_input.endswith(']'): + ms_list = [f.strip(' \'\"') for f in ms_input.strip('[]').split(',')] + else: + map_in = DataMap.load(ms_input) + map_in.iterator = DataMap.SkipIterator + ms_list = [] + for fname in map_in: + if fname.startswith('[') and fname.endswith(']'): + for f in fname.strip('[]').split(','): + ms_list.append(f.strip(' \'\"')) + else: + ms_list.append(fname.strip(' \'\"')) + elif type(ms_input) is list: + ms_list = [str(f).strip(' \'\"') for f in ms_input] + else: + raise TypeError('sort_into_freqBands: type of "ms_input" unknown!') + + cellsize_highres_deg = float(cellsize_highres_deg) + cellsize_lowres_deg = float(cellsize_lowres_deg) + fieldsize_highres = float(fieldsize_highres) + fieldsize_lowres = float(fieldsize_lowres) + image_padding = float(image_padding) + y_axis_stretch = float(y_axis_stretch) + + msdict = {} + for ms in ms_list: + # group all MSs by frequency + sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False) + msfreq = int(sw.col('REF_FREQUENCY')[0]) + sw.close() + if msfreq in msdict: + msdict[msfreq].append(ms) + else: + msdict[msfreq] = [ms] + bands = [] + bandfreqs = [] + print("InitSubtract_deep_sort_and_compute.py: Putting files into bands.") + for MSkey in msdict.keys(): + bands.append( Band(msdict[MSkey]) ) + bandfreqs.append( Band(msdict[MSkey]).freq ) + + ## min freq gives largest image size for deep image + bandfreqs = np.array(bandfreqs) + minfreq = np.min(bandfreqs) + bandmin = np.argmin(bandfreqs) + ## need to map the output from wsclean channels to the right frequencies + ## just put the bands in the right freq order + wsclean_channum = np.argsort(bandfreqs) + bands = np.array(bands) + bands = bands[wsclean_channum] + + #minfreq = 1e9 + #for ib, band in enumerate(bands): + #if band.freq < minfreq: + #minfreq = band.freq + #bandmin = ib + + group_map = MultiDataMap() + file_single_map = DataMap([]) + high_size_map = DataMap([]) + low_size_map = DataMap([]) + high_paddedsize_map = DataMap([]) + low_paddedsize_map = DataMap([]) + numfiles = 0 + nbands = np.int(len(bands)/10) + if nbands > 8: + nchansout_clean1 = np.int(nbands/4) + elif nbands > 4: + nchansout_clean1 = np.int(nbands/2) + else: + nchansout_clean1 = np.int(nbands) + if nchansout_clean1 < 2: + nchansout_clean1 = 2 + + (freqstep, timestep) = bands[0].get_averaging_steps() + int_time_sec = bands[0].timestep_sec * timestep # timestep_sec gets added to band object in get_averaging_steps() + nwavelengths_high = bands[0].get_nwavelengths(cellsize_highres_deg, int_time_sec) + nwavelengths_low = bands[0].get_nwavelengths(cellsize_lowres_deg, int_time_sec) + + print("InitSubtract_deep_sort_and_compute.py: analyzing data...") + for band in bands: + group_map.append(MultiDataProduct('localhost', band.files, False)) + numfiles += len(band.files) + for filename in band.files: + file_single_map.append(DataProduct('localhost', filename, False)) + (imsize_high_res, imsize_low_res) = band.get_image_sizes(cellsize_highres_deg, cellsize_lowres_deg, + fieldsize_highres, fieldsize_lowres) + imsize_high_res_stretch = band.get_optimum_size(int(imsize_high_res*y_axis_stretch)) + high_size_map.append(DataProduct('localhost', str(imsize_high_res)+" "+str(imsize_high_res_stretch), False)) + imsize_low_res_stretch = band.get_optimum_size(int(imsize_low_res*y_axis_stretch)) + low_size_map.append(DataProduct('localhost', str(imsize_low_res)+" "+str(imsize_low_res_stretch), False)) + imsize_high_pad = band.get_optimum_size(int(imsize_high_res*image_padding)) + imsize_high_pad_stretch = band.get_optimum_size(int(imsize_high_res*image_padding*y_axis_stretch)) + high_paddedsize_map.append(DataProduct('localhost', str(imsize_high_pad)+" "+str(imsize_high_pad_stretch), False)) + imsize_low_pad = band.get_optimum_size(int(imsize_low_res*image_padding)) + imsize_low_pad_stretch = band.get_optimum_size(int(imsize_low_res*image_padding*y_axis_stretch)) + low_paddedsize_map.append(DataProduct('localhost', str(imsize_low_pad)+" "+str(imsize_low_pad_stretch), False)) + + if band.freq == minfreq: + deep_imsize_high_res = imsize_high_res + deep_imsize_high_res_stretch = imsize_high_res_stretch + deep_imsize_high_pad = imsize_high_pad + deep_imsize_high_pad_stretch = imsize_high_pad_stretch + deep_imsize_low_res = imsize_low_res + deep_imsize_low_res_stretch = imsize_low_res_stretch + deep_imsize_low_pad = imsize_low_pad + deep_imsize_low_pad_stretch = imsize_low_pad_stretch + + deep_high_size_map = DataMap([DataProduct('localhost', str(deep_imsize_high_res)+" "+str(deep_imsize_high_res_stretch), False)]) + deep_high_paddedsize_map = DataMap([DataProduct('localhost', str(deep_imsize_high_pad)+" "+str(deep_imsize_high_pad_stretch), False)]) + deep_low_size_map = DataMap([DataProduct('localhost', str(deep_imsize_low_res)+" "+str(deep_imsize_low_res_stretch), False)]) + deep_low_paddedsize_map = DataMap([DataProduct('localhost', str(deep_imsize_low_pad)+" "+str(deep_imsize_low_pad_stretch), False)]) + nbands_map = DataMap([DataProduct('localhost', str(nbands), False)]) + nchansout_clean1_map = DataMap([DataProduct('localhost', str(nchansout_clean1), False)]) + + # get mapfiles for freqstep and timestep with the length of single_map + freqstep_map = DataMap([]) + timestep_map = DataMap([]) + nwavelengths_high_map = DataMap([]) + nwavelengths_low_map = DataMap([]) + + for index in range(numfiles): + # set time and frequency averaging values (in sec and Hz) + freqstep_map.append(DataProduct('localhost', str(freqstep*bands[0].chan_width_hz), False)) + timestep_map.append(DataProduct('localhost', str(timestep*bands[0].timestep_sec), False)) + nwavelengths_high_map.append(DataProduct('localhost', str(nwavelengths_high), False)) + nwavelengths_low_map.append(DataProduct('localhost', str(nwavelengths_low), False)) + + groupmapname = os.path.join(mapfile_dir, outmapname) + group_map.save(groupmapname) + file_single_mapname = os.path.join(mapfile_dir, outmapname+'_single') + file_single_map.save(file_single_mapname) + + high_sizename = os.path.join(mapfile_dir, outmapname+'_high_size') + high_size_map.save(high_sizename) + low_sizename = os.path.join(mapfile_dir, outmapname+'_low_size') + low_size_map.save(low_sizename) + high_padsize_name = os.path.join(mapfile_dir, outmapname+'_high_padded_size') + high_paddedsize_map.save(high_padsize_name) + low_padsize_name = os.path.join(mapfile_dir, outmapname+'_low_padded_size') + low_paddedsize_map.save(low_padsize_name) + + deep_high_sizename = os.path.join(mapfile_dir, outmapname+'_deep_high_size') + deep_high_size_map.save(deep_high_sizename) + deep_low_sizename = os.path.join(mapfile_dir, outmapname+'_deep_low_size') + deep_low_size_map.save(deep_low_sizename) + deep_high_padsize_name = os.path.join(mapfile_dir, outmapname+'_deep_high_padded_size') + deep_high_paddedsize_map.save(deep_high_padsize_name) + deep_low_padsize_name = os.path.join(mapfile_dir, outmapname+'_deep_low_padded_size') + deep_low_paddedsize_map.save(deep_low_padsize_name) + + nbands_mapname = os.path.join(mapfile_dir, outmapname+'_nbands') + nbands_map.save(nbands_mapname) + nchansout_clean1_mapname = os.path.join(mapfile_dir, outmapname+'_nchansout_clean1') + nchansout_clean1_map.save(nchansout_clean1_mapname) + + freqstepname = os.path.join(mapfile_dir, outmapname+'_freqstep') + freqstep_map.save(freqstepname) + timestepname = os.path.join(mapfile_dir, outmapname+'_timestep') + timestep_map.save(timestepname) + nwavelengths_high_name = os.path.join(mapfile_dir, outmapname+'_nwavelengths_high') + nwavelengths_high_map.save(nwavelengths_high_name) + nwavelengths_low_name = os.path.join(mapfile_dir, outmapname+'_nwavelengths_low') + nwavelengths_low_map.save(nwavelengths_low_name) + + result = {'groupmap': groupmapname, 'single_mapfile' : file_single_mapname, + 'high_size_mapfile' : high_sizename, 'low_size_mapfile' : low_sizename, + 'high_padsize_mapfile' : high_padsize_name, 'low_padsize_mapfile' : low_padsize_name, + 'deep_high_size_mapfile' : deep_high_sizename, 'deep_low_size_mapfile' : deep_low_sizename, + 'deep_high_padsize_mapfile' : deep_high_padsize_name, 'deep_low_padsize_mapfile' : deep_low_padsize_name, + 'nbands' : nbands_mapname, 'nchansout_clean1' : nchansout_clean1_mapname, + 'freqstep' : freqstepname, 'timestep' : timestepname, 'nwavelengths_high_mapfile': nwavelengths_high_name, + 'nwavelengths_low_mapfile': nwavelengths_low_name} + return result + + +class MultiDataProduct(DataProduct): + """ + Class representing multiple files in a DataProduct. + """ + def __init__(self, host=None, file=None, skip=True): + super(MultiDataProduct, self).__init__(host, file, skip) + if not file: + self.file = list() + else: + self._set_file(file) + + def __repr__(self): + """Represent an instance as a Python dict""" + return ( + "{{'host': '{0}', 'file': {1}, 'skip': {2}}}".format(self.host, self.file, str(self.skip)) + ) + + def __str__(self): + """Print an instance as 'host:[filelist]'""" + return ':'.join((self.host, str(self.file))) + + def _set_file(self, data): + try: + # Try parsing as a list + if isinstance(data, list): + self.file = data + if isinstance(data, DataProduct): + self._from_dataproduct(data) + if isinstance(data, DataMap): + self._from_datamap(data) + + except TypeError: + raise DataProduct("No known method to set a filelist from %s" % str(file)) + + def _from_dataproduct(self, prod): + self.host = prod.host + self.file = prod.file + self.skip = prod.skip + + def _from_datamap(self, inmap): + filelist = {} + for item in inmap: + if not item.host in filelist: + filelist[item.host] = [] + filelist[item.host].append(item.file) + self.file = filelist['i am'] + + def append(self, item): + self.file.append(item) + + +class MultiDataMap(DataMap): + """ + Class representing a specialization of data-map, a collection of data + products located on the same node, skippable as a set and individually + """ + @DataMap.data.setter + def data(self, data): + if isinstance(data, DataMap): + mdpdict = {} + data.iterator = DataMap.SkipIterator + for item in data: + if not item.host in mdpdict: + mdpdict[item.host] = [] + mdpdict[item.host].append(item.file) + mdplist = [] + for k, v in mdpdict.iteritems(): + mdplist.append(MultiDataProduct(k, v, False)) + self._set_data(mdplist, dtype=MultiDataProduct) + elif isinstance(data, MultiDataProduct): + self._set_data(data, dtype=MultiDataProduct) + elif not data: + pass + else: + self._set_data(data, dtype=MultiDataProduct) + + def split_list(self, number): + mdplist = [] + for item in self.data: + for i in range(0, len(item.file), number): + chunk = item.file[i:i+number] + mdplist.append(MultiDataProduct(item.host, chunk, item.skip)) + self._set_data(mdplist) diff --git a/Docker/scripts/InitSubtract_sort_and_compute.py b/Docker/scripts/InitSubtract_sort_and_compute.py new file mode 100755 index 00000000..1bd14436 --- /dev/null +++ b/Docker/scripts/InitSubtract_sort_and_compute.py @@ -0,0 +1,479 @@ +#! /usr/bin/env python +""" +Script to sort a list of MSs into frequency-bands, and compute additional values needed for initsubtract +""" +import pyrap.tables as pt +import os +import numpy as np +from lofarpipe.support.data_map import DataMap +from lofarpipe.support.data_map import DataProduct + +class Band(object): + """ + The Band object contains parameters needed for each band + """ + def __init__(self, MSfiles): + self.files = MSfiles + self.msnames = [ MS.split('/')[-1] for MS in self.files ] + self.numMS = len(self.files) + # Get the frequency info and set name + sw = pt.table(self.files[0]+'::SPECTRAL_WINDOW', ack=False) + self.freq = sw.col('REF_FREQUENCY')[0] + self.nchan = sw.col('NUM_CHAN')[0] + self.chan_freqs_hz = sw.col('CHAN_FREQ')[0] + self.chan_width_hz = sw.col('CHAN_WIDTH')[0][0] + sw.close() + self.name = str(int(self.freq/1e6)) + # Get the station diameter + ant = pt.table(self.files[0]+'::ANTENNA', ack=False) + self.diam = float(ant.col('DISH_DIAMETER')[0]) + ant.close() + + + + def get_image_sizes(self, cellsize_highres_deg=None, cellsize_lowres_deg=None, + fieldsize_highres=2.5, fieldsize_lowres=6.5): + """ + Sets sizes for initsubtract images + + The image sizes are scaled from the mean primary-beam FWHM. For + the high-res image, we use 2.5 * FWHM; for low-res, we use 6.5 * FHWM. + + Parameters + ---------- + cellsize_highres_deg : float, optional + cellsize for the high-res images in deg + cellsize_lowres_deg : float, optional + cellsize for the low-res images in deg + fieldsize_highres : float, optional + How many FWHM's shall the high-res images be. + fieldsize_lowres : float, optional + How many FWHM's shall the low-res images be. + """ + if cellsize_highres_deg: + self.cellsize_highres_deg = cellsize_highres_deg + if cellsize_lowres_deg: + self.cellsize_lowres_deg = cellsize_lowres_deg + if not hasattr(self, 'mean_el_rad'): + for MS_id in xrange(self.numMS): + # calculate mean elevation + tab = pt.table(self.files[MS_id], ack=False) + el_values = pt.taql("SELECT mscal.azel1()[1] AS el from " + + self.files[MS_id] + " limit ::10000").getcol("el") + if MS_id == 0: + global_el_values = el_values + else: + global_el_values = np.hstack( (global_el_values, el_values ) ) + self.mean_el_rad = np.mean(global_el_values) + + # Calculate mean FOV + sec_el = 1.0 / np.sin(self.mean_el_rad) + self.fwhm_deg = 1.1 * ((3.0e8 / self.freq) / self.diam) * 180. / np.pi * sec_el + self.imsize_high_res = self.get_optimum_size(self.fwhm_deg + /self.cellsize_highres_deg * fieldsize_highres) + self.imsize_low_res = self.get_optimum_size(self.fwhm_deg + /self.cellsize_lowres_deg * fieldsize_lowres) + return (self.imsize_high_res, self.imsize_low_res) + + def get_optimum_size(self, size): + """ + Gets the nearest optimum image size + + Taken from the casa source code (cleanhelper.py) + + Parameters + ---------- + size : int + Target image size in pixels + + Returns + ------- + optimum_size : int + Optimum image size nearest to target size + + """ + import numpy + + def prime_factors(n, douniq=True): + """ Return the prime factors of the given number. """ + factors = [] + lastresult = n + sqlast=int(numpy.sqrt(n))+1 + if n == 1: + return [1] + c=2 + while 1: + if (lastresult == 1) or (c > sqlast): + break + sqlast=int(numpy.sqrt(lastresult))+1 + while 1: + if(c > sqlast): + c=lastresult + break + if lastresult % c == 0: + break + c += 1 + factors.append(c) + lastresult /= c + if (factors==[]): factors=[n] + return numpy.unique(factors).tolist() if douniq else factors + + n = int(size) + if (n%2 != 0): + n+=1 + fac=prime_factors(n, False) + for k in range(len(fac)): + if (fac[k] > 7): + val=fac[k] + while (numpy.max(prime_factors(val)) > 7): + val +=1 + fac[k]=val + newlarge=numpy.product(fac) + for k in range(n, newlarge, 2): + if ((numpy.max(prime_factors(k)) < 8)): + return k + return newlarge + + def get_averaging_steps(self): + """ + Sets the averaging step sizes + + Note: the frequency step must be an even divisor of the number of + channels + + """ + # Get time per sample and number of samples + t = pt.table(self.files[0], readonly=True, ack=False) + for t2 in t.iter(["ANTENNA1","ANTENNA2"]): + if (t2.getcell('ANTENNA1',0)) < (t2.getcell('ANTENNA2',0)): + self.timestep_sec = t2[1]['TIME']-t2[0]['TIME'] # sec + break + t.close() + # generate a (numpy-)array with the divisors of nchan + tmp_divisors = [] + for step in range(self.nchan,0,-1): + if (self.nchan % step) == 0: + tmp_divisors.append(step) + freq_divisors = np.array(tmp_divisors) + + # For initsubtract, average to 0.5 MHz per channel and 20 sec per time + # slot. Since each band is imaged separately and the smearing and image + # sizes both scale linearly with frequency, a single frequency and time + # step is valid for all bands + initsubtract_freqstep = max(1, min(int(round(0.5 * 1e6 / self.chan_width_hz)), self.nchan)) + initsubtract_freqstep = freq_divisors[np.argmin(np.abs(freq_divisors-initsubtract_freqstep))] + initsubtract_timestep = max(1, int(round(20.0 / self.timestep_sec))) + + return (initsubtract_freqstep, initsubtract_timestep) + + def nwavelengths(self, cellsize_highres_deg, cellsize_lowres_deg, initsubtract_timestep): + int_time_sec = self.timestep_sec * initsubtract_timestep + max_baseline_in_nwavelenghts_h = 1.0/(cellsize_highres_deg*3.0*np.pi/180.0) + max_baseline_in_nwavelenghts_l = 1.0/(cellsize_lowres_deg*3.0*np.pi/180.0) + self.nwavelengths_high = max_baseline_in_nwavelenghts_h*2.0*np.pi*int_time_sec/(24.0*60.0*60.0)/2 + self.nwavelengths_low = max_baseline_in_nwavelenghts_l*2.0*np.pi*int_time_sec/(24.0*60.0*60.0)/2 + return (self.nwavelengths_high, self.nwavelengths_low) + + +def input2bool(invar): + if invar is None: + return None + if isinstance(invar, bool): + return invar + elif isinstance(invar, str): + if invar.upper() == 'TRUE' or invar == '1': + return True + elif invar.upper() == 'FALSE' or invar == '0': + return False + else: + raise ValueError('input2bool: Cannot convert string "'+invar+'" to boolean!') + elif isinstance(invar, int) or isinstance(invar, float): + return bool(invar) + else: + raise TypeError('input2bool: Unsupported data type:'+str(type(invar))) + + +def main(ms_input, outmapname=None, mapfile_dir=None, cellsize_highres_deg=0.00208, cellsize_lowres_deg=0.00694, + fieldsize_highres=2.5, fieldsize_lowres=6.5, image_padding=1., y_axis_stretch=1., + calc_y_axis_stretch=False, apply_y_axis_stretch_highres=True, apply_y_axis_stretch_lowres=True): + """ + Check a list of MS files for missing frequencies + + Parameters + ---------- + ms_input : list or str + List of MS filenames, or string with list, or path to a mapfile + outmapname: str + Name of output mapfile + mapfile_dir : str + Directory for output mapfile + cellsize_highres_deg : float, optional + cellsize for the high-res images in deg + cellsize_lowres_deg : float, optional + cellsize for the low-res images in deg + fieldsize_highres : float, optional + How many FWHM's shall the high-res images be. + fieldsize_lowres : float, optional + How many FWHM's shall the low-res images be. + image_padding : float, optional + How much padding shall we add to the padded image sizes. + y_axis_stretch : float, optional + How much shall the y-axis be stretched or compressed. + calc_y_axis_stretch : bool, optional + Adjust the image sizes returned by this script for the mean elevation. + If True, the value of y_axis_stretch above is ignored + apply_y_axis_stretch_highres : bool, optional + Apply the y-axis stretch to the high-res image sizes + apply_y_axis_stretch_lowres : bool, optional + Apply the y-axis stretch to the low-res image sizes + + Returns + ------- + result : dict + Dict with the name of the generated mapfiles + + """ + if not outmapname or not mapfile_dir: + raise ValueError('sort_times_into_freqGroups: outmapname and mapfile_dir are needed!') + if type(ms_input) is str: + if ms_input.startswith('[') and ms_input.endswith(']'): + ms_list = [f.strip(' \'\"') for f in ms_input.strip('[]').split(',')] + else: + map_in = DataMap.load(ms_input) + map_in.iterator = DataMap.SkipIterator + ms_list = [] + for fname in map_in: + if fname.startswith('[') and fname.endswith(']'): + for f in fname.strip('[]').split(','): + ms_list.append(f.strip(' \'\"')) + else: + ms_list.append(fname.strip(' \'\"')) + elif type(ms_input) is list: + ms_list = [str(f).strip(' \'\"') for f in ms_input] + else: + raise TypeError('sort_into_freqBands: type of "ms_input" unknown!') + + cellsize_highres_deg = float(cellsize_highres_deg) + cellsize_lowres_deg = float(cellsize_lowres_deg) + fieldsize_highres = float(fieldsize_highres) + fieldsize_lowres = float(fieldsize_lowres) + image_padding = float(image_padding) + y_axis_stretch = float(y_axis_stretch) + calc_y_axis_stretch = input2bool(calc_y_axis_stretch) + apply_y_axis_stretch_highres = input2bool(apply_y_axis_stretch_highres) + apply_y_axis_stretch_lowres = input2bool(apply_y_axis_stretch_lowres) + + msdict = {} + for ms in ms_list: + # group all MSs by frequency + sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False) + msfreq = int(sw.col('REF_FREQUENCY')[0]) + sw.close() + if msfreq in msdict: + msdict[msfreq].append(ms) + else: + msdict[msfreq] = [ms] + bands = [] + print "InitSubtract_sort_and_compute.py: Putting files into bands." + for MSkey in msdict.keys(): + bands.append( Band(msdict[MSkey]) ) + + group_map = MultiDataMap() + file_single_map = DataMap([]) + high_size_map = DataMap([]) + low_size_map = DataMap([]) + high_paddedsize_map = DataMap([]) + low_paddedsize_map = DataMap([]) + numfiles = 0 + for i, band in enumerate(bands): + print "InitSubtract_sort_and_compute.py: Working on Band:",band.name + group_map.append(MultiDataProduct('localhost', band.files, False)) + numfiles += len(band.files) + for filename in band.files: + file_single_map.append(DataProduct('localhost', filename, False)) + (imsize_high_res, imsize_low_res) = band.get_image_sizes(cellsize_highres_deg, cellsize_lowres_deg, + fieldsize_highres, fieldsize_lowres) + + # Calculate y_axis_stretch if desired + if calc_y_axis_stretch: + if i == 0: + y_axis_stretch = 1.0 / np.sin(band.mean_el_rad) + print "InitSubtract_sort_and_compute.py: Using y-axis stretch of:",y_axis_stretch + y_axis_stretch_lowres = y_axis_stretch + y_axis_stretch_highres = y_axis_stretch + else: + y_axis_stretch_lowres = 1.0 + y_axis_stretch_highres = 1.0 + + # Adjust sizes so that we get the correct ones below + if not apply_y_axis_stretch_highres: + y_axis_stretch_highres = 1.0 + if not apply_y_axis_stretch_lowres: + y_axis_stretch_lowres = 1.0 + imsize_low_res /= y_axis_stretch_lowres + imsize_high_res /= y_axis_stretch_highres + + imsize_high_res = band.get_optimum_size(int(imsize_high_res)) + imsize_high_res_stretch = band.get_optimum_size(int(imsize_high_res*y_axis_stretch_highres)) + high_size_map.append(DataProduct('localhost', str(imsize_high_res)+" "+str(imsize_high_res_stretch), False)) + + imsize_low_res = band.get_optimum_size(int(imsize_low_res)) + imsize_low_res_stretch = band.get_optimum_size(int(imsize_low_res*y_axis_stretch_lowres)) + low_size_map.append(DataProduct('localhost', str(imsize_low_res)+" "+str(imsize_low_res_stretch), False)) + + imsize_high_pad = band.get_optimum_size(int(imsize_high_res*image_padding)) + imsize_high_pad_stretch = band.get_optimum_size(int(imsize_high_res*image_padding*y_axis_stretch_highres)) + high_paddedsize_map.append(DataProduct('localhost', str(imsize_high_pad)+" "+str(imsize_high_pad_stretch), False)) + + imsize_low_pad = band.get_optimum_size(int(imsize_low_res*image_padding)) + imsize_low_pad_stretch = band.get_optimum_size(int(imsize_low_res*image_padding*y_axis_stretch_lowres)) + low_paddedsize_map.append(DataProduct('localhost', str(imsize_low_pad)+" "+str(imsize_low_pad_stretch), False)) + + print "InitSubtract_sort_and_compute.py: Computing averaging steps." + (freqstep, timestep) = bands[0].get_averaging_steps() + (nwavelengths_high, nwavelengths_low) = bands[0].nwavelengths(cellsize_highres_deg, + cellsize_lowres_deg, timestep) + + # get mapfiles for freqstep and timestep with the length of single_map + freqstep_map = DataMap([]) + timestep_map = DataMap([]) + nwavelengths_high_map = DataMap([]) + nwavelengths_low_map = DataMap([]) + for index in xrange(numfiles): + freqstep_map.append(DataProduct('localhost', str(freqstep), False)) + timestep_map.append(DataProduct('localhost', str(timestep), False)) + freqstep_map.append(DataProduct('localhost', str(freqstep), False)) + timestep_map.append(DataProduct('localhost', str(timestep), False)) + nwavelengths_high_map.append(DataProduct('localhost', str(nwavelengths_high), False)) + nwavelengths_low_map.append(DataProduct('localhost', str(nwavelengths_low), False)) + + groupmapname = os.path.join(mapfile_dir, outmapname) + group_map.save(groupmapname) + file_single_mapname = os.path.join(mapfile_dir, outmapname+'_single') + file_single_map.save(file_single_mapname) + high_sizename = os.path.join(mapfile_dir, outmapname+'_high_size') + high_size_map.save(high_sizename) + low_sizename = os.path.join(mapfile_dir, outmapname+'_low_size') + low_size_map.save(low_sizename) + high_padsize_name = os.path.join(mapfile_dir, outmapname+'_high_padded_size') + high_paddedsize_map.save(high_padsize_name) + low_padsize_name = os.path.join(mapfile_dir, outmapname+'_low_padded_size') + low_paddedsize_map.save(low_padsize_name) + freqstepname = os.path.join(mapfile_dir, outmapname+'_freqstep') + freqstep_map.save(freqstepname) + timestepname = os.path.join(mapfile_dir, outmapname+'_timestep') + timestep_map.save(timestepname) + nwavelengths_high_name = os.path.join(mapfile_dir, outmapname+'_nwavelengths_high') + nwavelengths_high_map.save(nwavelengths_high_name) + nwavelengths_low_name = os.path.join(mapfile_dir, outmapname+'_nwavelengths_low') + nwavelengths_low_map.save(nwavelengths_low_name) + + result = {'groupmap': groupmapname, 'single_mapfile' : file_single_mapname, + 'high_size_mapfile' : high_sizename, 'low_size_mapfile' : low_sizename, + 'high_padsize_mapfile' : high_padsize_name, 'low_padsize_mapfile' : low_padsize_name, + 'freqstep' : freqstepname, 'timestep' : timestepname, 'nwavelengths_high_mapfile': nwavelengths_high_name, + 'nwavelengths_low_mapfile': nwavelengths_low_name} + return result + + +class MultiDataProduct(DataProduct): + """ + Class representing multiple files in a DataProduct. + """ + def __init__(self, host=None, file=None, skip=True): + super(MultiDataProduct, self).__init__(host, file, skip) + if not file: + self.file = list() + else: + self._set_file(file) + + def __repr__(self): + """Represent an instance as a Python dict""" + return ( + "{{'host': '{0}', 'file': '{1}', 'skip': {2}}}".format(self.host, + '[{}]'.format(','.join(self.file)), str(self.skip)) + ) + + def __str__(self): + """Print an instance as 'host:[filelist]'""" + return ':'.join((self.host, str(self.file))) + + def _set_file(self, data): + try: + # Try parsing as a list + if isinstance(data, list): + self.file = data + if isinstance(data, DataProduct): + self._from_dataproduct(data) + if isinstance(data, DataMap): + self._from_datamap(data) + + except TypeError: + raise DataProduct("No known method to set a filelist from %s" % str(file)) + + def _from_dataproduct(self, prod): + print 'setting filelist from DataProduct' + self.host = prod.host + self.file = prod.file + self.skip = prod.skip + + def _from_datamap(self, inmap): + print 'setting filelist from DataMap' + filelist = {} + for item in inmap: + if not item.host in filelist: + filelist[item.host] = [] + filelist[item.host].append(item.file) + self.file = filelist['i am'] + + def append(self, item): + self.file.append(item) + + +class MultiDataMap(DataMap): + """ + Class representing a specialization of data-map, a collection of data + products located on the same node, skippable as a set and individually + """ + def __init__(self, data=list(), iterator=iter): + super(MultiDataMap, self).__init__(data, iterator) + + @classmethod + def load(cls, filename): + """Load a data map from file `filename`. Return a DataMap instance.""" + with open(filename) as f: + datamap = eval(f.read()) + for i, d in enumerate(datamap): + file_entry = d['file'] + if file_entry.startswith('[') and file_entry.endswith(']'): + file_list = [e.strip(' \'\"') for e in file_entry.strip('[]').split(',')] + datamap[i] = {'host': d['host'], 'file': file_list, 'skip': d['skip']} + return cls(datamap) + + @DataMap.data.setter + def data(self, data): + if isinstance(data, DataMap): + mdpdict = {} + data.iterator = DataMap.SkipIterator + for item in data: + if not item.host in mdpdict: + mdpdict[item.host] = [] + mdpdict[item.host].append(item.file) + mdplist = [] + for k, v in mdpdict.iteritems(): + mdplist.append(MultiDataProduct(k, v, False)) + self._set_data(mdplist, dtype=MultiDataProduct) + elif isinstance(data, MultiDataProduct): + self._set_data(data, dtype=MultiDataProduct) + elif not data: + pass + else: + self._set_data(data, dtype=MultiDataProduct) + + def split_list(self, number): + mdplist = [] + for item in self.data: + for i in xrange(0, len(item.file), number): + chunk = item.file[i:i+number] + mdplist.append(MultiDataProduct(item.host, chunk, item.skip)) + self._set_data(mdplist) diff --git a/Docker/scripts/add_missing_stations.py b/Docker/scripts/add_missing_stations.py new file mode 100755 index 00000000..2801223f --- /dev/null +++ b/Docker/scripts/add_missing_stations.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python +# -* coding: utf-8 -*- + +""" +Adds phases (=0) and amplitudes (=1) to any missing station if they appear in an h5parm, but not in a particular soltab. + +Created on Tue Mar 14 2019 + +@author: Alexander Drabent +""" + +import argparse +import numpy as np +import os +from losoto.h5parm import h5parm +from losoto.lib_operations import * +import logging + +def main(h5parmfile, refh5 = None, solset='sol000', refsolset='sol000', soltab_in='phase000', soltab_out='GSMphase', filter='[CR]S*&', bad_antennas='[CR]S*&'): + + + if refh5 == None: + refh5 = h5parmfile + pass + + if not os.path.exists(h5parmfile) or not os.path.exists(refh5): + logging.error("H5parm file %s doesn't exist!" % h5parmfile) + return(1) + + if soltab_in == soltab_out: + logging.error("Output soltab has to be different from input soltab!") + return(1) + + ### Open up the h5parm, get an example value + data = h5parm(h5parmfile, readonly = False) + refdata = h5parm(refh5, readonly = False) + solset = data.getSolset(solset) + refsolset = refdata.getSolset(refsolset) + + ### Get antenna information of the solset + ref_station_names = sorted(refsolset.getAnt().keys()) + bad_antennas_list = bad_antennas.lstrip(filter).replace('!','').replace('*','').replace('&','').split(';') + new_station_names = [ ref_station_name for ref_station_name in ref_station_names if ref_station_name not in bad_antennas_list ] + + ### Load antenna list of input soltab + logging.info('Processing solution table %s'%(soltab_in)) + soltab = solset.getSoltab(soltab_in) + old_station_names = list(soltab.ant[:]) + soltab_type = soltab.getType() + out_axes_vals = [] + out_axes = soltab.getAxesNames() + out_axes.insert(1, out_axes.pop(out_axes.index('ant'))) ## put antenna table to the second place + + ### resort the val and weights array according to the new axes order + for vals, weights, coord, selection in soltab.getValuesIter(returnAxes=out_axes, weight=True): + vals = reorderAxes( vals, soltab.getAxesNames(), out_axes) + weights = reorderAxes( weights, soltab.getAxesNames(), out_axes ) + pass + + ### setting the proper soltab_axis + for axis in out_axes: + if axis == 'time': + out_axes_vals.append(soltab.time) + elif axis == 'freq': + out_axes_vals.append(soltab.freq) + elif axis == 'ant': + out_axes_vals.append(new_station_names) + elif axis == 'pol': + out_axes_vals.append(soltab.pol) + elif axis == 'dir': + out_axes_vals.append(soltab.dir) + else: + logging.error('Unknown axis in soltab: ' + str(axis)) + data.close() + refdata.close() + return 1 + + ### just copy if number of antennas is the same + if len(new_station_names) == len(old_station_names): + logging.warning('Station list in soltab ' + str(soltab_in) + ' matches the station list in the selected solset. Data will just be copied.') + new_soltab = solset.makeSoltab(soltype=soltab_type, soltabName=soltab_out, axesNames=out_axes, axesVals=out_axes_vals, vals=vals, weights=weights) + + ### add missing stations with zero phase/uniform amplitudes and uniform weights + elif len(new_station_names) > len(old_station_names): + + # define proper shape of array + logging.info('Adding missing antennas to the soltab: ' + str(soltab_out)) + dimension = list(np.shape(vals)) + dimension[1] = len(new_station_names) + + if soltab_type == 'amplitude': + new_vals = np.ones(tuple(dimension)) + else: + new_vals = np.zeros(tuple(dimension)) + new_weights = np.ones(tuple(dimension)) + + for i, new_station in enumerate(new_station_names): + if new_station in old_station_names: + ant_index = list(soltab.ant).index(new_station) + new_vals[:,i] = vals[:,ant_index] + new_weights[:,i] = weights[:,ant_index] + + new_soltab = solset.makeSoltab(soltype=soltab_type, soltabName=soltab_out, axesNames=out_axes, axesVals=out_axes_vals, vals=new_vals, weights=new_weights) + + else: + logging.error('There are less antennas in the solset than in the soltab ' + str(soltab_in)) + data.close() + refdata.close() + return(1) + + data.close() + refdata.close() + + return(0) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Adds phases and amplitudes to any missing station if they appear in an h5parm, but not in a particular soltab.') + + parser.add_argument('h5parm', type=str, + help='H5parm to which this action should be performed .') + parser.add_argument('--refh5', type=str, + help='External H5parm from which the full list of antennas is used from.') + parser.add_argument('--solset', type=str, default='sol000', + help='Input calibration solutions') + parser.add_argument('--refsolset', type=str, default='sol000', + help='Input calibration solutions of the reference h5parm file') + parser.add_argument('--soltab_in', type=str, default='phase000', + help='Input solution table') + parser.add_argument('--soltab_out', type=str, default='GSMphase', + help='Output solution table (has to be different from input solution table)') + + + args = parser.parse_args() + + format_stream = logging.Formatter("%(asctime)s\033[1m %(levelname)s:\033[0m %(message)s","%Y-%m-%d %H:%M:%S") + format_file = logging.Formatter("%(asctime)s %(levelname)s: %(message)s","%Y-%m-%d %H:%M:%S") + logging.root.setLevel(logging.INFO) + + log = logging.StreamHandler() + log.setFormatter(format_stream) + logging.root.addHandler(log) + + main(h5parmfile=args.h5parm, refh5=args.refh5, solset=args.solset, refsolset=args.refsolset, soltab_in=args.soltab_in, soltab_out=args.soltab_out) + diff --git a/Docker/scripts/check_Ateam_separation.py b/Docker/scripts/check_Ateam_separation.py new file mode 100755 index 00000000..51ca65a2 --- /dev/null +++ b/Docker/scripts/check_Ateam_separation.py @@ -0,0 +1,182 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# +# Written by Bas van der Tol (vdtol@strw.leidenuniv.nl), March 2011. +# Adapted for prefactor by Alexander Drabent (alex@tls-tautenburg.de), February 2019. + +#from pylab import * +import matplotlib +matplotlib.use('Agg') # Force matplotlib to not use any Xwindows backend. +import pylab +import pyrap.quanta as qa +import pyrap.tables as pt +import pyrap.measures as pm +import sys +import numpy +import os + +targets = [ {'name' : 'CasA', 'ra' : 6.123487680622104, 'dec' : 1.0265153995604648}, + {'name' : 'CygA', 'ra' : 5.233686575770755, 'dec' : 0.7109409582180791}, + {'name' : 'TauA', 'ra' : 1.4596748493730913, 'dec' : 0.38422502335921294}, + {'name' : 'HerA', 'ra' : 4.4119087330382163, 'dec' : 0.087135562905816893}, + {'name' : 'VirA', 'ra' : 3.276086511413598, 'dec' : 0.21626589533567378}, + {'name' : 'Sun'}, + {'name' : 'Jupiter'}, + {'name' : 'Moon'}] + +######################################################################## +def input2strlist_nomapfile(invar): + """ + from bin/download_IONEX.py + give the list of MSs from the list provided as a string + """ + str_list = None + if type(invar) is str: + if invar.startswith('[') and invar.endswith(']'): + str_list = [f.strip(' \'\"') for f in invar.strip('[]').split(',')] + else: + str_list = [invar.strip(' \'\"')] + elif type(invar) is list: + str_list = [str(f).strip(' \'\"') for f in invar] + else: + raise TypeError('input2strlist: Type '+str(type(invar))+' unknown!') + return str_list + +######################################################################## +def main(ms_input, min_separation = 30, outputimage = None): + + """ + Print seperation of the phase reference center of an input MS + + + Parameters + ---------- + ms_input : str + String from the list (map) of the calibrator MSs + + Returns + ------- + 0 --just for printing + """ + + msname = input2strlist_nomapfile(ms_input)[0] + + + # Create a measures object + me = pm.measures() + + # Open the measurement set and the antenna and pointing table + ms = pt.table(msname) + + # Get the position of the first antenna and set it as reference frame + ant_table = pt.table(msname + '::ANTENNA') + ant_no = 0 + pos = ant_table.getcol('POSITION') + x = qa.quantity( pos[ant_no,0], 'm' ) + y = qa.quantity( pos[ant_no,1], 'm' ) + z = qa.quantity( pos[ant_no,2], 'm' ) + position = me.position( 'wgs84', x, y, z ) + me.doframe( position ) + ant_table.close() + + # Get the first pointing of the first antenna + field_table = pt.table(msname + '::FIELD') + field_no = 0 + direction = field_table.getcol('PHASE_DIR') + ra = direction[ ant_no, field_no, 0 ] + dec = direction[ ant_no, field_no, 1 ] + targets.insert(0, {'name' : 'Pointing', 'ra' : ra, 'dec' : dec}) + field_table.close() + + # Get a ordered list of unique time stamps from the measurement set + time_table = pt.taql('select TIME from $1 orderby distinct TIME', tables = [ms]) + time = time_table.getcol('TIME') + time1 = time/3600.0 + time1 = time1 - pylab.floor(time1[0]/24)*24 + + ra_qa = qa.quantity( targets[0]['ra'], 'rad' ) + dec_qa = qa.quantity( targets[0]['dec'], 'rad' ) + pointing = me.direction('j2000', ra_qa, dec_qa) + + separations = [] + + print 'SEPERATION from A-Team sources' + print '------------------------------' + print 'The minimal accepted distance to an A-Team source is: ' + str(min_separation) + ' deg.' + for target in targets: + + t = qa.quantity(time[0], 's') + t1 = me.epoch('utc', t) + me.doframe(t1) + + if 'ra' in target.keys(): + ra_qa = qa.quantity( target['ra'], 'rad' ) + dec_qa = qa.quantity( target['dec'], 'rad' ) + direction = me.direction('j2000', ra_qa, dec_qa) + pass + else : + direction = me.direction(target['name']) + pass + + separations.append(me.separation(pointing, direction)) + + # Loop through all time stamps and calculate the elevation of the pointing + el = [] + for t in time: + t_qa = qa.quantity(t, 's') + t1 = me.epoch('utc', t_qa) + me.doframe(t1) + a = me.measure(direction, 'azel') + elevation = a['m1'] + el.append(elevation['value']/pylab.pi*180) + pass + + el = numpy.array(el) + pylab.plot(time1, el) + + if target['name'] != 'Pointing': + print target['name'] + ': ' + str(me.separation(pointing, direction)) + if int(float(min_separation)) > int(float(str(me.separation(pointing, direction)).split(' deg')[0])): + print 'WARNING: The A-Team source ' + target['name'] + ' is closer than ' + str(min_separation) + ' deg to the phase reference center. Calibration might not perform as expected.' + pass + pass + + pass + print '------------------------------' + pylab.title('Pointing Elevation') + pylab.title('Elevation') + pylab.ylabel('Elevation (deg)'); + pylab.xlabel('Time (h)'); + pylab.legend( [ target['name'] + '(' + separation.to_string() + ')' for target, separation in zip(targets, separations) ]) + + if outputimage != None: + inspection_directory = os.path.dirname(outputimage) + if not os.path.exists(inspection_directory): + os.makedirs(inspection_directory) + print "Directory" , inspection_directory , "created." + pass + else: + print("Directory", inspection_directory, "already exists.") + pass + print 'Plotting A-Team elevation to: ' + outputimage + pylab.savefig(outputimage) + pass + return 0 + pass + + +######################################################################## +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description='Print seperation of the phase reference center of an input MS to an A-team source') + + parser.add_argument('MSfile', type=str, nargs='+', help='One (or more MSs).') + parser.add_argument('--min_separation', type=int, default=30, help='minimal accepted distance to an A-team source on the sky in degrees (will raise a WARNING). Default: 30') + parser.add_argument('--outputimage', type=str, default=None, help='location of the elevation plot of the A-Team sources.') + + args = parser.parse_args() + + main(args.MSfile, args.min_separation, args.outputimage) + + pass diff --git a/Docker/scripts/check_unflagged_fraction.py b/Docker/scripts/check_unflagged_fraction.py new file mode 100755 index 00000000..3b1084a1 --- /dev/null +++ b/Docker/scripts/check_unflagged_fraction.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +""" +Flag MS if unflagged fraction is too low. +""" +import argparse +import os +import numpy as np + +def find_unflagged_fraction(ms_file): + """ + Finds the fraction of data that is unflagged + Parameters + ---------- + ms_file : str + Filename of input MS + Returns + ------- + unflagged_fraction : float + Fraction of unflagged data + """ + import subprocess + + # Call taql. Note that we do not use pt.taql(), as pt.taql() can cause + # hanging/locking issues on some systems + p = subprocess.Popen("taql 'CALC sum([select nfalse(FLAG) from {0}]) / " + "sum([select nelements(FLAG) from {0}])'".format(ms_file), + shell=True, stdout=subprocess.PIPE) + r = p.communicate() + unflagged_fraction = float(r[0]) + + return unflagged_fraction + +def main(ms_file, min_fraction=0.01, print_fraction=False): + """ + Flag MS if unflagged fraction is too low. + + Parameters + ---------- + ms_file : str + Name (path) of input MS + min_fraction : float , optional + minimum fraction of unflagged data needed to keep this MS + print_fraction : bool, optional + print the actual fration of unflagged data + + Returns + ------- + result : dict + Dict with the name of the input MS or "None" + + """ + min_fraction = float(min_fraction) + unflagged_fraction = find_unflagged_fraction(ms_file) + if print_fraction: + print("File %s has %.2f%% unflagged data."%(os.path.basename(ms_file),unflagged_fraction*100.)) + if unflagged_fraction < min_fraction: + print('check_unflagged_fraction.py: Unflagged fraction of {0} is: {1}, ' + 'removing file.'.format(os.path.basename(ms_file), str(unflagged_fraction))) + return {'flagged': 'None', 'unflagged_fraction': unflagged_fraction} + else: + return {'flagged': ms_file, 'unflagged_fraction': unflagged_fraction} + +if __name__ == '__main__': + descriptiontext = "Check a MS for a minimum fraction of unflagged data.\n" + + parser = argparse.ArgumentParser(description=descriptiontext, formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument('inputms', help='name of the input MS') + parser.add_argument('-f', '--min_fraction', help='Minimum fraction of unflagged data needed to keep this MS ' + '(default 0.01 = "keep if at least 1%% is not flagged")', type=float, default=0.01) + args = parser.parse_args() + + erg = main(args.inputms, args.min_fraction,print_fraction=True) diff --git a/Docker/scripts/concat_MS.py b/Docker/scripts/concat_MS.py new file mode 100755 index 00000000..75d1e971 --- /dev/null +++ b/Docker/scripts/concat_MS.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +import sys +import glob +import re +import pyrap.tables as pt +import numpy +import os +from lofarpipe.support.data_map import DataMap, DataProduct + +global_limit = 637871244 +######################################################################## +def input2strlist_nomapfile(invar): + """ + from bin/download_IONEX.py + give the list of MSs from the list provided as a string + """ + str_list = None + if type(invar) is str: + if invar.startswith('[') and invar.endswith(']'): + str_list = [f.strip(' \'\"') for f in invar.strip('[]').split(',')] + else: + str_list = [invar.strip(' \'\"')] + elif type(invar) is list: + str_list = [str(f).strip(' \'\"') for f in invar] + else: + raise TypeError('input2strlist: Type '+str(type(invar))+' unknown!') + return str_list + +######################################################################## +def getsystemmemory(): + + memory = int(os.popen('cat /proc/meminfo | grep MemAvailable').readlines()[0].split(':')[-1].split()[0]) + return memory + pass + +######################################################################## +def getfilesize(MS): + + size = int(os.popen('du -cks ' + MS).readlines()[0].split()[0]) + return size + + pass + +######################################################################## +def main(ms_input, ms_output, min_length, overhead = 0.8, filename=None, mapfile_dir=None): + + """ + Virtually concatenate subbands + + Parameters + ---------- + ms_input : str + String from the list (map) of the calibrator MSs + ms_output : str + String from the outut concatenated MS + min_length : int + minimum amount of subbands to concatenate in frequency necessary to perform the wide-band flagging in the RAM. It data is too big aoflag will use indirect-read. + overhead : float + Only use this fraction of the available memory for deriving the amount of data to be concatenated. + filename: str + Name of output mapfile + mapfile_dir : str + Directory for output mapfile + + """ + system_memory = getsystemmemory() + filelist = input2strlist_nomapfile(ms_input) + file_size = getfilesize(filelist[0]) + overhead = float(overhead) + + print "Detected available system memory is: " + str(int(((system_memory / 1024. / 1024.) + 0.5))) + " GB" + #print "Detected file size is: " + str(int(((file_size / 1024. / 1024.) + 0.5))) + " GB" + if overhead * system_memory > global_limit: + system_memory = global_limit + overhead = 1.0 + print "Number of files to concat will be limited to the global limit of: " + str(int(((global_limit / 1024. / 1024.) + 0.5))) + " GB" + pass + + #i = 1 + max_space = overhead * system_memory / file_size + max_length = len(filelist) / int((len(filelist) / max_space) + 1.) + + #while max_length * file_size > overhead * system_memory: + #i += 1 + #max_length = len(filelist) / ((len(filelist) / max_space) + i) + #pass + + if max_length >= int(min_length): + memory = '-memory-read' + pass + else: + memory = '-indirect-read' + max_length = len(filelist) + if max_length * file_size > global_limit: + max_space = global_limit / file_size + max_length = len(filelist) / int((len(filelist) / max_space) + 1.) + print "Number of files to concat was limited to the global limit of: " + str(int(((global_limit / 1024. / 1024.) + 0.5))) + " GB" + print "WARNING: The number of concatenated files will thus be lower than the min_length of: " + str(min_length) + pass + pass + + print "Applying an overhead of: " + str(overhead) + print "The max_length value is: " + str(max_length) + set_ranges = list(numpy.arange(0, len(filelist) + 1, int(max_length))) + set_ranges[-1] = len(filelist) + + map_out = DataMap([]) + for i in numpy.arange(len(set_ranges) - 1): + f = ms_output + '_' + str(i) + pt.msconcat(filelist[set_ranges[i]:set_ranges[i + 1]], f) + map_out.data.append(DataProduct('localhost', f, False)) + + fileid = os.path.join(mapfile_dir, filename) + map_out.save(fileid) + result = {'concatmapfile': fileid, 'memory': memory} + + return result + +######################################################################## +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description='Virtually concatenate subbands') + + parser.add_argument('MSfile', type=str, nargs='+', + help='One (or more MSs) that we want to concatenate.') + parser.add_argument('MSout', type=str, + help='Output MS file') + parser.add_argument('--min_length', type=str, + help='Minimum amount of subbands to concatenate in frequency.') + parser.add_argument('--overhead', type=float, default=0.8, + help='Only use this fraction of the available memory for deriving the amount of data to be concatenated.') + + + + args = parser.parse_args() + + main(args.MSfile,args.MSout,args.min_length,args.overhead) diff --git a/Docker/scripts/convert_npys_to_h5parm.py b/Docker/scripts/convert_npys_to_h5parm.py new file mode 100755 index 00000000..499684b1 --- /dev/null +++ b/Docker/scripts/convert_npys_to_h5parm.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python +import numpy as np +import pyrap.tables as pt +from losoto.h5parm import h5parm +from losoto.lib_operations import * +import logging +from os.path import join + +def makesolset(MS, data, solset_name): + solset = data.makeSolset(solset_name) + + logging.info('Collecting information from the ANTENNA table.') + antennaTable = pt.table(MS + "::ANTENNA", ack=False) + antennaNames = antennaTable.getcol('NAME') + antennaPositions = antennaTable.getcol('POSITION') + antennaTable.close() + antennaTable = solset.obj._f_get_child('antenna') + antennaTable.append(zip(*(antennaNames,antennaPositions))) + + logging.info('Collecting information from the FIELD table.') + fieldTable = pt.table(MS + "::FIELD", ack=False) + phaseDir = fieldTable.getcol('PHASE_DIR') + pointing = phaseDir[0, 0, :] + fieldTable.close() + + sourceTable = solset.obj._f_get_child('source') + # add the field centre, that is also the direction for Gain and Common* + sourceTable.append([('pointing',pointing)]) + + return antennaNames + pass + +######################################################################## +def main(MSfiles, h5parmfile, store_basename='caldata_transfer', store_directory='.', solset_out = 'calibrator'): + + # select on MS + mslist = MSfiles.lstrip('[').rstrip(']').replace(' ','').replace("'","").split(',') + + if len(mslist) == 0: + logging.error("Did not find any existing directory in input MS list!") + return(1) + pass + else: + MSfile = mslist[0] + pass + + # Open up the h5parm, get an example value + data = h5parm(h5parmfile, readonly = False) + + # name (path) for parmdb to be written + reference_station_names = makesolset(MSfile, data, solset_out) + OutSolset = data.getSolset(solset_out) + + # load the numpy arrays written by the previous scripts + # (filenames constructed in the same way as in these scripts) + freqs_ampl = np.load(join(store_directory, 'freqs_for_amplitude_array.npy')) + amps_array = np.load(join(store_directory, store_basename + '_amplitude_array.npy')) + clock_array = np.load(join(store_directory, 'fitted_data_dclock_' + store_basename + '_1st.npy')) + tec_array = np.load(join(store_directory, 'fitted_data_dTEC_' + store_basename + '_1st.npy')) + freqs_phase = np.load(join(store_directory, 'freqs_for_phase_array.npy')) + phases_array = np.load(join(store_directory, store_basename + '_phase_array.npy')) + station_names = np.load(join(store_directory, store_basename + '_station_names.npy')) + + if len(reference_station_names) > len(station_names): + missing_stations = set(reference_station_names) - set(station_names) + logging.warning('CAUTION: For these stations now calibration solutions are available: ' + str(missing_stations)) + pass + + # create weights + dimensions_amp = np.shape(amps_array) + dimensions_phase = np.shape(phases_array) + dimensions_clock = np.shape(clock_array) + dimensions_tec = np.shape(tec_array) + weights_amp = np.ndarray(shape = dimensions_amp) + weights_phase = np.ndarray(shape = dimensions_phase) + weights_clock = np.ndarray(shape = dimensions_clock) + weights_tec = np.ndarray(shape = dimensions_tec) + weights_amp.fill(1) + weights_phase.fill(1) + weights_clock.fill(1) + weights_tec.fill(1) + + OutSolset.makeSoltab(soltype='clock', soltabName='clock', + axesNames=['ant'], + axesVals=[station_names], + vals=clock_array[-1,:], weights=weights_clock[-1,:]) + OutSolset.makeSoltab(soltype='tec', soltabName='tec', + axesNames=['ant'], + axesVals=[station_names], + vals=tec_array[-1,:], weights=weights_tec[-1,:]) + OutSolset.makeSoltab(soltype='amplitude', soltabName='amplitude', + axesNames=['ant', 'freq', 'pol'], + axesVals=[station_names, freqs_ampl, ['XX','YY']], + vals=amps_array[:,-1,:,:], weights=weights_amp[:,-1,:,:]) + OutSolset.makeSoltab(soltype='phase', soltabName='phase', + axesNames=['freq', 'ant'], + axesVals=[freqs_phase, station_names], + vals=phases_array, weights=weights_phase) + + data.close() + return(0) + + +######################################################################## +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description='Create a parmDB with values from the calibrator.') + + parser.add_argument('MSfiles', type=str, + help='One or more MSs to use as a reference.') + parser.add_argument('h5parmfile', type=str, + help='Output H5parm file.') + parser.add_argument('--basename', type=str, default='caldata_transfer', + help='Base-name of the numpy files with the calibrator values. (default: \"caldata_transfer\")') + parser.add_argument('--storedir', type=str, default='.', + help='Directory of the numpy files with the calibrator values. (default: \".\")') + parser.add_argument('--solset_out', type=str, default='calibrator', + help='Name of the solution set') + + format_stream = logging.Formatter("%(asctime)s\033[1m %(levelname)s:\033[0m %(message)s","%Y-%m-%d %H:%M:%S") + format_file = logging.Formatter("%(asctime)s %(levelname)s: %(message)s","%Y-%m-%d %H:%M:%S") + logging.root.setLevel(logging.INFO) + + log = logging.StreamHandler() + log.setFormatter(format_stream) + logging.root.addHandler(log) + + args = parser.parse_args() + + MSfiles = args.MSfiles + logging.error(MSfiles) + h5parmfile = args.h5parmfile + + main(MSfiles, h5parmfile, store_basename=args.basename, store_directory=args.storedir, solset_out=args.solset_out) diff --git a/Docker/scripts/createRMh5parm.py b/Docker/scripts/createRMh5parm.py new file mode 100755 index 00000000..6e1e5e8c --- /dev/null +++ b/Docker/scripts/createRMh5parm.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Extract RM values for a LOFAR observation. Fill h5parm + +Created on Tue Aug 7 11:46:57 2018 + +@author: mevius +""" +from losoto.h5parm import h5parm +from RMextract import getRM +from RMextract import PosTools +import RMextract.getIONEX as ionex +import pyrap.tables as pt +import os +import numpy as np +import sys +import logging + + +def makesolset(MS, data, solset_name): + solset = data.makeSolset(solset_name) + + logging.info('Collecting information from the ANTENNA table.') + antennaTable = pt.table(MS + "::ANTENNA", ack=False) + antennaNames = antennaTable.getcol('NAME') + antennaPositions = antennaTable.getcol('POSITION') + antennaTable.close() + antennaTable = solset.obj._f_get_child('antenna') + antennaTable.append(zip(*(antennaNames,antennaPositions))) + + logging.info('Collecting information from the FIELD table.') + fieldTable = pt.table(MS + "::FIELD", ack=False) + phaseDir = fieldTable.getcol('PHASE_DIR') + pointing = phaseDir[0, 0, :] + fieldTable.close() + + sourceTable = solset.obj._f_get_child('source') + # add the field centre, that is also the direction for Gain and Common* + sourceTable.append([('pointing',pointing)]) + + + + +def main(MSfiles, h5parmdb, solset_name = "sol000",timestepRM=300, + ionex_server="ftp://ftp.aiub.unibe.ch/CODE/", + ionex_prefix='CODG',ionexPath="./",earth_rot=0,proxyServer=None,proxyPort=None,proxyType=None,proxyUser=None,proxyPass=None): + '''Add rotation measure to existing h5parmdb + + Args: + MSfiles (string) : path + filename of Measurement Set + h5parmdb (string) : name of existing h5parm + solset_name (string) : optional name of solset in h5parmdb, + if not set, first one will be chosen + timestep (float) : timestep in seconds + ionex_server (string) : ftp server for IONEX files + ionex_prefix (string) : prefix of IONEX files + ionexPath (string) : location where IONEX files are stored + earth_rot (float) : parameter to determine how much earth rotation is taken \ + into account when interpolating IONEX data. (0 is none, 1 is full) + ''' + + try: + mslist = MSfiles.lstrip('[').rstrip(']').replace(' ','').replace("'","").split(',') + except AttributeError: + mslist = MSfiles + + if len(mslist) == 0: + raise ValueError("Did not find any existing directory in input MS list!") + pass + else: + MS = mslist[0] + pass + + if not os.path.exists(h5parmdb): + logging.error('Could not find h5parmdb: ' + h5parmdb) + return(1) + + data = h5parm(h5parmdb, readonly=False) + if not solset_name in data.getSolsetNames(): + makesolset(MS,data,solset_name) + solset = data.getSolset(solset_name) + soltabs = solset.getSoltabs() + station_names = sorted(solset.getAnt().keys()) + + if 'RMextract' in [s.name for s in soltabs]: + logging.warning('Soltab RMextract exists already. Skipping...') + return(0) + + #extract the timerange information + (timerange,timestep,pointing,stat_names,stat_pos) = PosTools.getMSinfo(MS) + start_time = timerange[0] + end_time = timerange[1] + timerange[0] = start_time - timestep + timerange[1] = end_time + timestep + times,timerange = PosTools.getIONEXtimerange(timerange, timestep) + if len(times[-1]) == 0 or times[-1][-1] < timerange[1]: + timestmp = list(times[-1]) + timestmp.append(timerange[1]) #add one extra step to make sure you have a value for all times in the MS in case timestep hase been changed + times[-1] = np.array(timestmp) + + for time_array in times[::-1]: #check in reverse order, since datamight not be available for latest days + starttime = time_array[0] + date_parms = PosTools.obtain_observation_year_month_day_fraction(starttime) + if not proxyServer: + if not "http" in ionex_server: #ftp server use ftplib + ionexf = ionex.getIONEXfile(time=date_parms, + server = ionex_server, + prefix = ionex_prefix, + outpath = ionexPath) + else: + ionexf = ionex.get_urllib_IONEXfile(time = date_parms, + server = ionex_server, + prefix = ionex_prefix, + outpath = ionexPath) + + else: + ionexf = ionex.get_urllib_IONEXfile(time = date_parms, + server = ionex_server, + prefix = ionex_prefix, + outpath = ionexPath, + proxy_server = proxyServer, + proxy_type = proxyType, + proxy_port = proxyPort, + proxy_user = proxyUser, + proxy_pass = proxyPass) + + if ionexf == -1: + if not "igsiono.uwm.edu.pl" in ionex_server: + logging.info("cannot get IONEX data, try fast product server instead") + if not proxyServer: + ionexf = ionex.get_urllib_IONEXfile(time = date_parms, + server = "https://igsiono.uwm.edu.pl", + prefix = "igrg", + outpath = ionexPath) + else: + ionexf = ionex.get_urllib_IONEXfile(time = date_parms, + server = "https://igsiono.uwm.edu.pl", + prefix = "igrg", + outpath = ionexPath, + proxy_server = proxyServer, + proxy_type = proxyType, + proxy_port = proxyPort, + proxy_user = proxyUser, + proxy_pass = proxyPass) + if ionexf == -1: + logging.error("IONEX data not available, even not from fast product server") + return -1 + + + if not proxyServer: + rmdict = getRM.getRM(MS, + server = ionex_server, + prefix = ionex_prefix, + ionexPath = ionexPath, + timestep = timestepRM, + earth_rot = earth_rot) + else: + rmdict = getRM.getRM(MS, + server = ionex_server, + prefix = ionex_prefix, + ionexPath = ionexPath, + timestep = timestepRM, + earth_rot = earth_rot, + proxy_server = proxyServer, + proxy_type = proxyType, + proxy_port = proxyPort, + proxy_user = proxyUser, + proxy_pass = proxyPass) + + + if not rmdict: + if not ionex_server: + raise ValueError("One or more IONEX files is not found on disk and download is disabled!\n" + "(You can run \"bin/download_IONEX.py\" outside the pipeline if needed.)") + else: + raise ValueError("Couldn't get RM information from RMextract! (But I don't know why.)") + + logging.info('Adding rotation measure values to: ' + solset_name + ' of ' + h5parmdb) + rm_vals=np.array([rmdict["RM"][stat].flatten() for stat in station_names]) + new_soltab = solset.makeSoltab(soltype='rotationmeasure', soltabName='RMextract', + axesNames=['ant', 'time'], axesVals=[station_names, rmdict['times']], + vals=rm_vals, + weights=np.ones_like(rm_vals, dtype=np.float16)) + + return(0) + + data.close() + + + ######################################################################## +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description='Adds CommonRotationAngle to an H5parm from RMextract.') + + parser.add_argument('MSfiles', type=str, nargs='+', + help='MS for which the parmdb should be created.') + parser.add_argument('h5parm', type=str, + help='H5parm to which the results of the CommonRotationAngle is added.') + parser.add_argument('--server', type=str, default='ftp://ftp.aiub.unibe.ch/CODE/', + help='URL of the server to use. (default: ftp://ftp.aiub.unibe.ch/CODE/)') + parser.add_argument('--prefix', type=str, default='CODG', + help='Prefix of the IONEX files. (default: \"CODG\")') + parser.add_argument('--ionexpath', '--path', type=str, default='./', + help='Directory where to store the IONEX files. (default: \"./\")') + parser.add_argument('--solsetName', '--solset', type=str, default='sol000', + help='Name of the h5parm solution set (default: sol000)') + parser.add_argument('-t','--timestep', help= + 'timestep in seconds. for values <=0 (default) the timegrid of the MS is used ', + dest="timestep",type=float, default=300.) + parser.add_argument('-e','--smart_interpol', type=float, default=0, help= + 'float parameter describing how much of Earth rotation is taken in to account \ + in interpolation of the IONEX files. 1.0 means time interpolation assumes \ + ionosphere rotates in opposite direction of the Earth. 0.0 (default) means \ + no rotation applied',dest="earth_rot",) + + args = parser.parse_args() + + MS = args.MSfiles + h5parmdb = args.h5parm + logging.info("Working on: %s %s" % (MS, h5parmdb)) + main(MS, h5parmdb, ionex_server=args.server, ionex_prefix=args.prefix, + ionexPath=args.ionexpath, solset_name=args.solsetName, + timestepRM=args.timestep, earth_rot=args.earth_rot) + diff --git a/Docker/scripts/download_skymodel_target.py b/Docker/scripts/download_skymodel_target.py new file mode 100755 index 00000000..3475c19c --- /dev/null +++ b/Docker/scripts/download_skymodel_target.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python +import os,sys +import glob +import pyrap.tables as pt +import numpy as np +import time +import lsmtool + +######################################################################## +def grab_coord_MS(MS): + """ + Read the coordinates of a field from one MS corresponding to the selection given in the parameters + + Parameters + ---------- + MS : str + Full name (with path) to one MS of the field + + Returns + ------- + RA, Dec : "tuple" + coordinates of the field (RA, Dec in deg , J2000) + """ + + # reading the coordinates ("position") from the MS + # NB: they are given in rad,rad (J2000) + [[[ra,dec]]] = pt.table(MS+'::FIELD', readonly=True, ack=False).getcol('PHASE_DIR') + + # RA is stocked in the MS in [-pi;pi] + # => shift for the negative angles before the conversion to deg (so that RA in [0;2pi]) + if ra<0: + ra=ra+2*np.pi + + # convert radians to degrees + ra_deg = ra/np.pi*180. + dec_deg = dec/np.pi*180. + + # and sending the coordinates in deg + return ra_deg,dec_deg + + +######################################################################## +def input2strlist_nomapfile(invar): + """ from bin/download_IONEX.py + give the list of MSs from the list provided as a string + """ + + str_list = None + if type(invar) is str: + if invar.startswith('[') and invar.endswith(']'): + str_list = [f.strip(' \'\"') for f in invar.strip('[]').split(',')] + else: + str_list = [invar.strip(' \'\"')] + elif type(invar) is list: + str_list = [str(f).strip(' \'\"') for f in invar] + else: + raise TypeError('input2strlist: Type '+str(type(invar))+' unknown!') + return str_list + + +######################################################################## +def main(ms_input, SkymodelPath, Radius="5.", DoDownload="True", Source="TGSS"): + """ + Download the skymodel for the target field + + Parameters + ---------- + ms_input : str + String from the list (map) of the target MSs + SkymodelPath : str + Full name (with path) to the skymodel; if YES is true, the skymodel will be downloaded here + Radius : string with float (default = "5.") + Radius for the TGSS/GSM cone search in degrees + DoDownload : str ("Force" or "True" or "False") + Download or not the TGSS skymodel or GSM. + "Force": download skymodel from TGSS or GSM, delete existing skymodel if needed. + "True" or "Yes": use existing skymodel file if it exists, download skymodel from + TGSS or GSM if it does not. + "False" or "No": Do not download skymodel, raise an exception if skymodel + file does not exist. + + """ + + FileExists = os.path.isfile(SkymodelPath) + if (not FileExists and os.path.exists(SkymodelPath)): + raise ValueError("download_tgss_skymodel_target: Path: \"%s\" exists but is not a file!"%(SkymodelPath)) + download_flag = False + if not os.path.exists(os.path.dirname(SkymodelPath)): + os.makedirs(os.path.dirname(SkymodelPath)) + if DoDownload.upper() == "FORCE": + if FileExists: + os.remove(SkymodelPath) + download_flag = True + elif DoDownload.upper() == "TRUE" or DoDownload.upper() == "YES": + if FileExists: + print "USING the exising skymodel in "+ SkymodelPath + return + else: + download_flag = True + elif DoDownload.upper() == "FALSE" or DoDownload.upper() == "NO": + if FileExists: + print "USING the exising skymodel in "+ SkymodelPath + return + else: + raise ValueError("download_tgss_skymodel_target: Path: \"%s\" does not exist and skymodel download is disabled!"%(SkymodelPath)) + + # If we got here, then we are supposed to download the skymodel. + assert download_flag is True # Jaja, belts and suspenders... + print "DOWNLOADING skymodel for the target into "+ SkymodelPath + + # Reading a MS to find the coordinate (pyrap) + [RATar,DECTar]=grab_coord_MS(input2strlist_nomapfile(ms_input)[0]) + + # Downloading the skymodel, skip after five tries + errorcode = 1 + tries = 0 + while errorcode != 0 and tries < 5: + if Source == 'TGSS': + errorcode = os.system("wget -O "+SkymodelPath+ " \'http://tgssadr.strw.leidenuniv.nl/cgi-bin/gsmv3.cgi?coord="+str(RATar)+","+str(DECTar)+"&radius="+str(Radius)+"&unit=deg&deconv=y\' ") + pass + elif Source == 'GSM': + errorcode = os.system("wget -O "+SkymodelPath+ " \'https://lcs165.lofar.eu/cgi-bin/gsmv1.cgi?coord="+str(RATar)+","+str(DECTar)+"&radius="+str(Radius)+"&unit=deg&deconv=y\' ") + pass + time.sleep(5) + tries += 1 + pass + + if not os.path.isfile(SkymodelPath): + raise IOError("download_tgss_skymodel_target: Path: \"%s\" does not exist after trying to download the skymodel."%(SkymodelPath)) + + # Treat all sources as one group (direction) + skymodel = lsmtool.load(SkymodelPath) + skymodel.group('single') + skymodel.write(clobber=True) + + return + + +######################################################################## +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description=' Download the TGSS or GSM skymodel for the target field') + + parser.add_argument('MSfile', type=str, nargs='+', + help='One (or more MSs) for which a TGSS/GSM skymodel will be download.') + parser.add_argument('SkyTar', type=str, + help='Full name (with path) to the skymodel; the TGSS/GSM skymodel will be downloaded here') + parser.add_argument('--Radius', type=float, default=5., + help='Radius for the TGSS/GSM cone search in degrees') + parser.add_argument('--Source', type=str, default='TGSS', + help='Choose source for skymodel: TGSS or GSM') + parser.add_argument('--DoDownload', type=str, default="True", + help='Download or not the TGSS skymodel or GSM ("Force" or "True" or "False").') + + args = parser.parse_args() + radius=5 + if args.Radius: + radius=args.Radius + + main(args.MSfile, args.SkyTar, str(radius), args.DoDownload, args.Source) diff --git a/Docker/scripts/find_skymodel_cal.py b/Docker/scripts/find_skymodel_cal.py new file mode 100755 index 00000000..9f3ae967 --- /dev/null +++ b/Docker/scripts/find_skymodel_cal.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python +import os +import glob +import pyrap.tables as pt +import math +import lsmtool +import numpy + +def grab_pointing(MS): + """ + Read the name of the calibrator from one MS corresponding to the selection given in the parameters + + Parameters + ---------- + MS : str + Full name (with path) to one MS of the field + + + Returns + ------- + nameCal : str + Name of the calibrator as provided in the MS + NB: we suppose that all the calibrators' observations have this field filled in (MS/Observation, column LOFAR_TARGET) + """ + + [ra, dec] = pt.table(MS+'::FIELD', readonly=True, ack=False).getcol('PHASE_DIR')[0][0] * 180 / math.pi + return ra, dec + + + +######################################################################## + +def check_skymodel(skymodel, ra, dec, max_separation_arcmin = 1.0): + """ + Searches for an appropriate sky model + """ + s = lsmtool.load(skymodel) + dist_deg = s.getDistance(ra, dec) + if any(dist_deg * 60.0 < max_separation_arcmin): + patch_position = int(numpy.where(dist_deg * 60 < max_separation_arcmin)[0][0]) + patch_name = s.getPatchNames()[patch_position] + return (True, patch_name) + pass + else: + return (False, '') + pass + pass + +######################################################################## +def find_skymodel(ra, dec, PathSkyMod, extensionSky = ".skymodel", max_separation_arcmin = 1.0): + """ + Find in the provided folder the correponding skymodel for the given source + + Parameters + ---------- + NameSouce : str + Name of the source for which we want to find the skymodel + PathSkyMod : str + Path to the folder containing the skymodels in use + (with Pattern, will enable to read and get the info from a MS) + [extensionSky] : str + Default: ".skymodel" + extension of the skymodel files + + NB: we suppose that the name of the calibrator is included in the name of the skymodel + + Returns + ------- + list_skymodel[0] : str + Full name (with path) to the matching skymodel + """ + + if os.path.isfile(PathSkyMod): + print("Checking the skymodel provided: " + PathSkyMod) + skymodels = [PathSkyMod] + else: + skymodels = glob.glob(PathSkyMod + "/*" + extensionSky) + + for skymodel in skymodels: + check = check_skymodel(skymodel, ra, dec, max_separation_arcmin) + if check[0]: + print "The following skymodel will be used for the calibrator: " + skymodel.split("/")[-1] + " (in " + PathSkyMod + ")" + return (skymodel, check[-1]) + pass + else: + pass + pass + + raise TypeError('find_skymodel: SKYMODEL FOR THE CALIBRATOR NOT FOUND IN ' + PathSkyMod) + pass + +######################################################################## +def input2strlist_nomapfile(invar): + """ + from bin/download_IONEX.py + give the list of MSs from the list provided as a string + """ + str_list = None + if type(invar) is str: + if invar.startswith('[') and invar.endswith(']'): + str_list = [f.strip(' \'\"') for f in invar.strip('[]').split(',')] + else: + str_list = [invar.strip(' \'\"')] + elif type(invar) is list: + str_list = [str(f).strip(' \'\"') for f in invar] + else: + raise TypeError('input2strlist: Type '+str(type(invar))+' unknown!') + return str_list + +######################################################################## +def main(ms_input, DirSkymodelCal, extensionSky=".skymodel", max_separation_arcmin = 1.0): + + """ + Find automatically the skymodel to use for the Calibrator + + + Parameters + ---------- + ms_input : str + String from the list (map) of the calibrator MSs + DirSkymodelCal : str + Path to the skymodel file, or to the folder where the skymodels are stored + [extensionSky] : str + Default: ".skymodel" + extension of the skymodel files + + + Returns + ------- + {'SkymodelCal':skymodelCal} : "dict" + Path to the skymodel of the calibrator + """ + + if os.path.isfile(DirSkymodelCal) or os.path.isdir(DirSkymodelCal): + ra, dec = grab_pointing(input2strlist_nomapfile(ms_input)[0]) + skymodelCal, skymodelName = find_skymodel(ra, dec, DirSkymodelCal, extensionSky, float(max_separation_arcmin)) + return { 'SkymodelCal' : skymodelCal, 'SkymodelName': skymodelName} + else: + raise ValueError("find_skymodel_cal: The path \"%s\" is neither a file nor a directory!"%(DirSkymodelCal)) + + +######################################################################## +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description='Find automatically between skymodels the one to use (for the Calibrator)') + + parser.add_argument('MSfile', type=str, nargs='+', + help='One (or more MSs) for which we search the matching skymodel.') + parser.add_argument('DirSky', type=str, + help='Path to the skymodel file, or to the folder where the skymodels are stored.') + parser.add_argument('--extsky', type=str, + help='extension of the skymodel files. (default: \".skymodel\")') + + args = parser.parse_args() + extensionSky='.skymodel' + if args.extsky: + extensionSky=args.extsky + + main(args.MSfile,args.DirSky, extensionSky) + + pass diff --git a/Docker/scripts/fits2sky.py b/Docker/scripts/fits2sky.py new file mode 100755 index 00000000..13594290 --- /dev/null +++ b/Docker/scripts/fits2sky.py @@ -0,0 +1,195 @@ +#! /usr/bin/env python +""" +Script to make a sky model from fits model images +""" +import argparse +from argparse import RawTextHelpFormatter +from astropy.io import fits +from astropy import wcs +import numpy as np +import scipy.interpolate +import casacore.tables as pt +import sys +import os +import glob + + +def ra2hhmmss(deg): + """Convert RA coordinate (in degrees) to HH MM SS""" + + from math import modf + if deg < 0: + deg += 360.0 + x, hh = modf(deg/15.) + x, mm = modf(x*60) + ss = x*60 + + return (int(hh), int(mm), ss) + + +def dec2ddmmss(deg): + """Convert DEC coordinate (in degrees) to DD MM SS""" + + from math import modf + sign = (-1 if deg < 0 else 1) + x, dd = modf(abs(deg)) + x, ma = modf(x*60) + sa = x*60 + + return (int(dd), int(ma), sa, sign) + + +def convert_radec_str(ra, dec): + """Takes ra, dec in degrees and returns makesourcedb strings""" + ra = ra2hhmmss(ra) + sra = str(ra[0]).zfill(2)+':'+str(ra[1]).zfill(2)+':'+str("%.3f" % (ra[2])).zfill(6) + dec = dec2ddmmss(dec) + decsign = ('-' if dec[3] < 0 else '+') + sdec = decsign+str(dec[0]).zfill(2)+'.'+str(dec[1]).zfill(2)+'.'+str("%.3f" % (dec[2])).zfill(6) + + return sra, sdec + + +def main(fits_models, ms_file, skymodel, fits_masks, min_flux_jy=0.005, interp='linear'): + """ + Make a makesourcedb sky model for input MS from WSClean fits model images + + Parameters + ---------- + fits_models : str + Filename of FITS model images. Can be a list of files (e.g., + '[mod1.fits,mod2.fits,...]') + ms_file : str + Filename of MS for which sky model is to be made. Can be a list of files + (e.g., '[ms1,ms2,...]', in which case they should all have the same + frequency + skymodel : str + Filename of the output makesourcedb sky model + fits_masks : str + Filename of FITS mask images. Can be a list of files (e.g., + '[msk1.fits,msk2.fits,...]') + min_flux_jy : float, optional + Minimum value of flux in Jy of a source to include in output model + interp : str, optional + Interpolation method. Can be any supported by scipy.interpolate.interp1d: + 'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic' + + """ + min_flux_jy = float(min_flux_jy) + + # Get filenames of model images and masks + if '[' in fits_models and ']' in fits_models: + fits_models = fits_models.strip('[] ').split(',') + fits_models = [f.strip('\'\" ') for f in fits_models] + else: + fits_models = [fits_models.strip('\'\" ')] + if '[' in fits_masks and ']' in fits_masks: + fits_masks = fits_masks.strip('[]').split(',') + fits_masks = [f.strip('\'\" ') for f in fits_masks] + else: + fits_masks = [fits_masks.strip('\'\" ')] + + # Read (first) MS file and get the frequency info + if '[' in ms_file and ']' in ms_file: + files = ms_file.strip('[] ').split(',') + #files = [f.strip() for f in files] + ms_file = files[0].strip('\'\" ') + sw = pt.table(ms_file+'::SPECTRAL_WINDOW', ack=False) + ms_freq = sw.col('REF_FREQUENCY')[0] + ms_freq_low = sw.col('CHAN_FREQ')[0][0] + ms_freq_high = sw.col('CHAN_FREQ')[0][-1] + sw.close() + + # Get frequencies and data of model images and masks + freqs = [] + model_images = [] + mask_images = [] + for f in fits_models: + hdr = fits.getheader(f, 0) + freqs.append(hdr['CRVAL3']) # Hz + model_images.append(fits.getdata(f, 0)) + for f in fits_masks: + mask_images.append(fits.getdata(f, 0)) + + # Sort by freq + sorted_ind = np.argsort(freqs) + freqs = np.array(freqs)[sorted_ind] + fits_models = np.array(fits_models)[sorted_ind] + model_images = np.array(model_images)[sorted_ind] + mask_images = np.array(mask_images)[sorted_ind] + + # Check if there is a model at the ms frequency. If so, just use that one + ind = np.where( np.logical_and(freqs >= ms_freq_low, freqs <= ms_freq_high) ) + if len(ind[0]) == 1: + freqs = freqs[ind] + fits_models = fits_models[ind] + model_images = model_images[ind] + mask_images = mask_images[ind] + + # Set the WCS reference + hdr = fits.getheader(fits_models[0], 0) + w = wcs.WCS(hdr) + + # Find nonzero pixels in stacked image + stacked_model = np.zeros(model_images[0].shape) + stacked_mask = np.zeros(mask_images[0].shape) + for im, ma in zip(model_images, mask_images): + stacked_model += im + stacked_mask += ma + nonzero_ind = np.where((stacked_model != 0.0) & (stacked_mask > 0)) + + # Interpolate the fluxes to the frequency of the MS + nsources = len(nonzero_ind[0]) + fluxes = [] + names = [] + ras = [] + decs = [] + for i in range(nsources): + flux_array = [] + freq_array = [] + for ind, (im, ma) in enumerate(zip(model_images, mask_images)): + index = [nonzero_ind[j][i] for j in range(4)] + if ma[tuple(index)] > 0: + flux_array.append(im[tuple(index)]) + freq_array.append(freqs[ind]) + + if len(flux_array) > 0: + # Only create a source entry if MS frequency is within +/- 4 MHz of + # the sampled frequency range to prevent large extrapolations when + # the primary beam may cause rapid spectral changes + if ms_freq > freq_array[0]-4e6 and ms_freq < freq_array[-1]+4e6: + # If MS frequency lies outside range, just use nearest freq + if ms_freq < freq_array[0]: + flux = flux_array[0] + elif ms_freq > freq_array[-1]: + flux = flux_array[-1] + else: + # Otherwise interpolate + flux = scipy.interpolate.interp1d(freq_array, flux_array, kind=interp)(ms_freq) + if flux > min_flux_jy: + index.reverse() # change to WCS coords + ras.append(w.wcs_pix2world(np.array([index]), 0, ra_dec_order=True)[0][0]) + decs.append(w.wcs_pix2world(np.array([index]), 0, ra_dec_order=True)[0][1]) + names.append('cc{}'.format(i)) + fluxes.append(flux) + + # Write sky model + with open(skymodel, 'w') as outfile: + outfile.write('FORMAT = Name, Type, Ra, Dec, I, Q, U, V, ReferenceFrequency\n') + for name, ra, dec, flux in zip(names, ras, decs, fluxes): + ra_str, dec_str = convert_radec_str(ra, dec) + outfile.write('{0}, POINT, {1}, {2}, {3}, 0.0, 0.0, 0.0, {4}\n' + .format(name, ra_str, dec_str, flux, ms_freq)) + + +if __name__ == '__main__': + descriptiontext = "Make a makesourcedb sky model from WSClean fits model images.\n" + + parser = argparse.ArgumentParser(description=descriptiontext, formatter_class=RawTextHelpFormatter) + parser.add_argument('fits_models', help='Model images') + parser.add_argument('msfile', help='MS file') + parser.add_argument('skymodel', help='Filename of output sky model') + parser.add_argument('fits_masks', help='Mask images') + + args = parser.parse_args() + main(args.fits_models, args.msfile, args.skymodel, args.fits_masks) diff --git a/Docker/scripts/getStructure_from_phases.py b/Docker/scripts/getStructure_from_phases.py new file mode 100755 index 00000000..fcfe1b5b --- /dev/null +++ b/Docker/scripts/getStructure_from_phases.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python + +########################################################################## +# +# Written my Maaijke Mevius -- see Radio Science publication DOI: 10.1002/2016RS006028 +# +########################################################################## +# History +# 20/07/2016 Outlier rejection and argparse added -- Tim Shimwell +# 06/06/2019 Changed for the use with h5parms -- Alexander Drabent + +import matplotlib as mpl +mpl.use("Agg") +from losoto.h5parm import h5parm +from losoto.lib_operations import * +from pylab import * +import scipy +from scipy.optimize import leastsq +import argparse + +def mad(arr): + """ Median Absolute Deviation: a "Robust" version of standard deviation. + Indices variabililty of the sample. + https://en.wikipedia.org/wiki/Median_absolute_deviation + """ + arr = np.ma.array(arr).compressed() # should be faster to not use masked arrays. + med = np.median(arr) + return np.median(np.abs(arr - med)) + +def model(t, coeffs): + coeffs[0] = abs(coeffs[0]) + return coeffs[0] + (t**coeffs[2])/coeffs[1] +def residuals(coeffs, y, t): + coeffs[0] = abs(coeffs[0]) + return y - model(t, coeffs) + +def main(h5parmfile,solset='sol000',soltab='phase000',nr_grid=1,doplot=True,outbasename='ionosphere',output_dir='./',dofilter=True): + outfile = open(str(output_dir) + '/' + outbasename +'_structure.txt','w') + data = h5parm(h5parmfile) + solset = data.getSolset(solset) + soltab = solset.getSoltab(soltab) + soltype = soltab.getType() + if soltype != 'phase': + logging.warning("Soltab is of type " + soltype + ", but should be phase. Skipping.") + data.close() + return(0) + vals = soltab.val + station_names = soltab.ant[:] + stations = solset.getAnt() + phx=[] + phy=[] + allposx=[] + allposy=[] + nrtimes = len(soltab.time) + nrfreqss = len(soltab.freq) + timestep=int(nrtimes/nr_grid) + for vals, coord, selection in soltab.getValuesIter(returnAxes=soltab.getAxesNames(), weight=False): + try: + vals = reorderAxes( vals, soltab.getAxesNames(), ['pol', 'ant', 'time', 'freq', 'dir']) + vals = vals[:,:,:,:,0] + except: + vals = reorderAxes( vals, soltab.getAxesNames(), ['pol', 'ant', 'time', 'freq']) + + valx = vals[0] + valy = vals[1] + for i,station_name in enumerate(station_names): + if not "CS" in station_name: + continue + phx.append(valx[i,:,0].flatten()) + allposx.append(stations[station_name]) + phy.append(valy[i,:,0].flatten()) + allposy.append(stations[station_name]) + + phx=np.array(phx) + phy=np.array(phy) + allposx=np.array(allposx) + allposy=np.array(allposy) + D=allposx[:,np.newaxis,:]-allposx[np.newaxis,:] + D2=np.sqrt(np.sum(D**2,axis=-1)) + Dy=allposy[:,np.newaxis,:]-allposy[np.newaxis,:] + D2y=np.sqrt(np.sum(Dy**2,axis=-1)) + S0s=[] + betas=[] + for itime in xrange(nr_grid+1): + tm=[0,1000000000] + if itime<nr_grid: + tm[0]=int(itime*timestep) + tm[1]=int(tm[0]+timestep) + dphx=phx[:,np.newaxis,tm[0]:tm[1]]-phx[np.newaxis,:,tm[0]:tm[1]] + dphy=phy[:,np.newaxis,tm[0]:tm[1]]-phy[np.newaxis,:,tm[0]:tm[1]] + dphx=np.unwrap(np.remainder(dphx-dphx[:,:,0][:,:,np.newaxis]+np.pi,2*np.pi)) + dvarx=np.var(dphx,axis=-1) + dphy=np.unwrap(np.remainder(dphy-dphy[:,:,0][:,:,np.newaxis]+np.pi,2*np.pi)) + dvary=np.var(dphy,axis=-1) + myselect=np.logical_and(D2>0,np.logical_and(np.any(np.logical_and(dvarx>1e-7,dvarx<.1),axis=0)[np.newaxis],np.any(np.logical_and(dvarx>1e-7,dvarx<.1),axis=1)[:,np.newaxis])) + if dofilter: + x=D2[myselect] + y=dvarx[myselect] + # Seems like real values dont occur above 1.0 + flagselect = np.where(y > 1.0) + xplot = np.delete(x,flagselect) + yplot = np.delete(y,flagselect) + + + bins = np.logspace(np.log10(np.min(xplot)),np.log10(np.max(xplot)),10) + binys = [] + binxs = [] + for i in range(0,len(bins)-1): + binmin = bins[i] + binmax = bins[i+1] + inbin = np.intersect1d(np.where(xplot > binmin),np.where(xplot < binmax)) + binxs.append((binmin+binmax)/2.0) + binys.append(np.median(yplot[inbin])+10*mad(yplot[inbin])) + x0 = [0.1,1.0,1.0] + xfit, flag = scipy.optimize.leastsq(residuals, x0, args=(binys,binxs)) + flagselect = np.where(y > model(x,xfit)) + print xfit,'fitting' + if doplot: + x=D2[myselect] + y=dvarx[myselect] + subplot(3,1,1) + scatter(x,y,color='b') + if dofilter: + y2 = y[flagselect] + x2 = x[flagselect] + scatter(x2,y2,color='r') + scatter(x,model(x,xfit),color='gray',linestyle='-')#,'gray',linestyle='-',linewidth=3) + x=D2[np.logical_and(D2>1e3,myselect)] + y=dvarx[np.logical_and(D2>1e3,myselect)] + if dofilter: + flagselect = np.where(y > model(x,xfit)) + x = np.delete(x,flagselect) + y = np.delete(y,flagselect) + A=np.ones((2,x.shape[0]),dtype=float) + A[1,:]=np.log10(x) + par=np.dot(np.linalg.inv(np.dot(A,A.T)),np.dot(A,np.log10(y))) + S0=10**(-1*np.array(par)[0]/np.array(par)[1]) + if doplot: + plot(x,10**(par[0]+par[1]*np.log10(x)),'r-') + xscale("log") + yscale("log") + xlim(30,4000) + xlabel('baseline length [m]') + ylabel('XX phase variance [rad$^2$]') + title('XX diffractive scale: %3.1f km'%(float(S0)/1000.)) + myselect=np.logical_and(D2y>0,np.logical_and(np.any(np.logical_and(dvary>1e-7,dvary<.1),axis=0)[np.newaxis],np.any(np.logical_and(dvary>1e-7,dvary<.1),axis=1)[:,np.newaxis])) + if dofilter: + x=D2y[myselect] + y=dvary[myselect] + # seems like real values dont occur above 1.0 + flagselect = np.where(y > 1.0) + xplot = np.delete(x,flagselect) + yplot = np.delete(y,flagselect) + bins = np.logspace(np.log10(np.min(xplot)),np.log10(np.max(xplot)),20) + binys = [] + binxs = [] + for i in range(0,len(bins)-1): + binmin = bins[i] + binmax = bins[i+1] + inbin = np.intersect1d(np.where(xplot > binmin),np.where(xplot < binmax)) + binxs.append((binmin+binmax)/2.0) + binys.append(np.median(yplot[inbin])+10*mad(yplot[inbin])) + x0 = [0.1,1.0,1.0] + xfit, flag = scipy.optimize.leastsq(residuals, x0, args=(binys,binxs)) + flagselect = np.where(y > model(x,xfit)) + print xfit,'fitting' + if doplot: + x=D2y[myselect] + y=dvary[myselect] + subplot(3,1,3) + scatter(x,y,color='b') + if dofilter: + y2 = y[flagselect] + x2 = x[flagselect] + scatter(x2,y2,color='r') + scatter(x,model(x,xfit),color='gray',linestyle='-')#,'gray',linestyle='-',linewidth=3) + x=D2y[np.logical_and(D2y>1e3,myselect)] + y=dvary[np.logical_and(D2y>1e3,myselect)] + if dofilter: + flagselect = np.where(y > model(x,xfit)) + x = np.delete(x,flagselect) + y = np.delete(y,flagselect) + A=np.ones((2,x.shape[0]),dtype=float) + A[1,:]=np.log10(x) + pary=np.dot(np.linalg.inv(np.dot(A,A.T)),np.dot(A,np.log10(y))) + S0y=10**(-1*np.array(pary)[0]/np.array(pary)[1]) + if doplot: + plot(x,10**(pary[0]+pary[1]*np.log10(x)),'r-') + xscale("log") + yscale("log") + xlim(30,4000) + xlabel('baseline length [m]') + ylabel('YY phase variance [rad$^2$]') + title('YY diffractive scale: %3.1f km'%(float(S0y)/1000.)) + savefig(output_dir + '/' + outbasename + '_structure.png') + close() + cla() + S0s.append([S0,S0y]) + betas.append([par[1],pary[1]]) + outfile.write('S0s ****%s**** %s beta %s %s\n'%(S0,S0y,par[1],pary[1])) + data.close() + return(0) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('h5parmfile',help='Name of input h5parm file') + parser.add_argument('--solset',help='Name of solution set to use') + parser.add_argument('--soltab',help='Name of solution table to use') + parser.add_argument('--outbasename',help='Outfile base name') + parser.add_argument('--output_dir', help='Output directory', default='./') + parser.add_argument('-p',dest='doplot',default=True,help='Do plotting') + parser.add_argument('-f',dest='dofilter',default=True,help='Attempt to filter out odd solutions') + parser.add_argument('-nr_grid',dest='nr_grid',default=1,help='Time step') + + args = parser.parse_args() + h5parmfile = args.h5parmfile + solset = args.solset + soltab = args.soltab + dofilter = args.dofilter + doplot = args.doplot + outbasename = args.outbasename + output_dir = args.output_dir + nr_grid = args.nr_grid + + main(h5parmfile,solset,soltab,nr_grid,doplot,outbasename,output_dir,dofilter) diff --git a/Docker/scripts/h5parm_pointingname.py b/Docker/scripts/h5parm_pointingname.py new file mode 100755 index 00000000..60d7a879 --- /dev/null +++ b/Docker/scripts/h5parm_pointingname.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +from losoto.h5parm import h5parm +from losoto.lib_operations import * + +import numpy +######################################################################## +def main(h5parmdb, solsetName='sol000', pointing='POINTING'): + + + data = h5parm(h5parmdb, readonly = False) + solset = data.getSolset(solsetName) + + sources = solset.obj._f_get_child('source') + direction = list(sources[0][-1]) + + for i in numpy.arange(len(sources)): + sources[i] = (pointing, direction) + pass + + data.close() + return 0 + + +######################################################################## +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description='Define the pointing direction of an h5parm.') + + parser.add_argument('h5parmdb', type=str, + help='H5parm of which pointing will be set.') + parser.add_argument('--solsetName', '--solset', type=str, default='sol000', + help='Name of the h5parm solution set (default: sol000)') + parser.add_argument('--pointing', type=str, default='POINTING', + help='Name of the h5parm solution set (default: POINTING)') + + args = parser.parse_args() + + h5parmdb = args.h5parmdb + logging.info("Working on: %s" % (h5parmdb)) + main(h5parmdb, solsetName=args.solsetName, pointing=args.pointing) diff --git a/Docker/scripts/make_clean_mask.py b/Docker/scripts/make_clean_mask.py new file mode 100755 index 00000000..b504be4a --- /dev/null +++ b/Docker/scripts/make_clean_mask.py @@ -0,0 +1,793 @@ +#! /usr/bin/env python +""" +Script to make a clean mask +""" +import argparse +from argparse import RawTextHelpFormatter +import casacore.images as pim +from astropy.io import fits as pyfits +from astropy.coordinates import Angle +import pickle +import numpy as np +import sys +import os + +# I copy&pasted that into this file +#from factor.lib.polygon import Polygon + +# workaround for a bug / ugly behavior in matplotlib / pybdsf +# (some installtaions of matplotlib.pyplot fail if there is no X-Server available +# which is not caught in pybdsf.) +try: + import matplotlib + matplotlib.use('Agg') +except (RuntimeError, ImportError): + pass +try: + import bdsf +except ImportError: + from lofar import bdsm as bdsf + +class Polygon: + """ + Generic polygon class + + Polygons are used to define the facet boundaries. + + Parameters + ---------- + x : array + A sequence of nodal x-coords. + y : array + A sequence of nodal y-coords. + + """ + def __init__(self, x, y): + if len(x) != len(y): + raise IndexError('x and y must be equally sized.') + self.x = np.asfarray(x) + self.y = np.asfarray(y) + + # Closes the polygon if were open + x1, y1 = x[0], y[0] + xn, yn = x[-1], y[-1] + if x1 != xn or y1 != yn: + self.x = np.concatenate((self.x, [x1])) + self.y = np.concatenate((self.y, [y1])) + + # Anti-clockwise coordinates + if _det(self.x, self.y) < 0: + self.x = self.x[::-1] + self.y = self.y[::-1] + + + def is_inside(self, xpoint, ypoint, smalld=1e-12): + """ + Check if point is inside a general polygon. + + An improved version of the algorithm of Nordbeck and Rydstedt. + + REF: SLOAN, S.W. (1985): A point-in-polygon program. Adv. Eng. + Software, Vol 7, No. 1, pp 45-47. + + Parameters + ---------- + xpoint : array or float + The x-coord of the point to be tested. + ypoint : array or float + The y-coords of the point to be tested. + smalld : float + Tolerance within which point is considered to be on a side. + + Returns + ------- + mindst : array or float + The distance from the point to the nearest point of the polygon: + + If mindst < 0 then point is outside the polygon. + If mindst = 0 then point in on a side of the polygon. + If mindst > 0 then point is inside the polygon. + + """ + xpoint = np.asfarray(xpoint) + ypoint = np.asfarray(ypoint) + + # Scalar to array + if xpoint.shape is tuple(): + xpoint = np.array([xpoint], dtype=float) + ypoint = np.array([ypoint], dtype=float) + scalar = True + else: + scalar = False + # Check consistency + if xpoint.shape != ypoint.shape: + raise IndexError('x and y must be equally sized.') + + # If snear = True: Dist to nearest side < nearest vertex + # If snear = False: Dist to nearest vertex < nearest side + snear = np.ma.masked_all(xpoint.shape, dtype=bool) + + # Initialize arrays + mindst = np.ones_like(xpoint, dtype=float) * np.inf + j = np.ma.masked_all(xpoint.shape, dtype=int) + x = self.x + y = self.y + n = len(x) - 1 # Number of sides/vertices defining the polygon + + # Loop over each side defining polygon + for i in range(n): + d = np.ones_like(xpoint, dtype=float) * np.inf + + # Start of side has coords (x1, y1) + # End of side has coords (x2, y2) + # Point has coords (xpoint, ypoint) + x1 = x[i] + y1 = y[i] + x21 = x[i + 1] - x1 + y21 = y[i + 1] - y1 + x1p = x1 - xpoint + y1p = y1 - ypoint + + # Points on infinite line defined by + # x = x1 + t * (x1 - x2) + # y = y1 + t * (y1 - y2) + # where + # t = 0 at (x1, y1) + # t = 1 at (x2, y2) + # Find where normal passing through (xpoint, ypoint) intersects + # infinite line + t = -(x1p * x21 + y1p * y21) / (x21 ** 2 + y21 ** 2) + tlt0 = t < 0 + tle1 = (0 <= t) & (t <= 1) + + # Normal intersects side + d[tle1] = ((x1p[tle1] + t[tle1] * x21) ** 2 + + (y1p[tle1] + t[tle1] * y21) ** 2) + + # Normal does not intersects side + # Point is closest to vertex (x1, y1) + # Compute square of distance to this vertex + d[tlt0] = x1p[tlt0] ** 2 + y1p[tlt0] ** 2 + + # Store distances + mask = d < mindst + mindst[mask] = d[mask] + j[mask] = i + + # Point is closer to (x1, y1) than any other vertex or side + snear[mask & tlt0] = False + + # Point is closer to this side than to any other side or vertex + snear[mask & tle1] = True + + if np.ma.count(snear) != snear.size: + raise IndexError('Error computing distances') + mindst **= 0.5 + + # Point is closer to its nearest vertex than its nearest side, check if + # nearest vertex is concave. + # If the nearest vertex is concave then point is inside the polygon, + # else the point is outside the polygon. + jo = j.copy() + jo[j == 0] -= 1 + area = _det([x[j + 1], x[j], x[jo - 1]], [y[j + 1], y[j], y[jo - 1]]) + mindst[~snear] = np.copysign(mindst, area)[~snear] + + # Point is closer to its nearest side than to its nearest vertex, check + # if point is to left or right of this side. + # If point is to left of side it is inside polygon, else point is + # outside polygon. + area = _det([x[j], x[j + 1], xpoint], [y[j], y[j + 1], ypoint]) + mindst[snear] = np.copysign(mindst, area)[snear] + + # Point is on side of polygon + mindst[np.fabs(mindst) < smalld] = 0 + + # If input values were scalar then the output should be too + if scalar: + mindst = float(mindst) + return mindst + + + +def read_vertices(filename): + """ + Returns facet vertices stored in input file + """ + with open(filename, 'r') as f: + direction_dict = pickle.load(f) + return direction_dict['vertices'] + + +def read_casa_polys(filename, image): + """ + Reads casa region file and returns polys + + Note: only regions of type "poly" are supported + """ + with open(filename, 'r') as f: + lines = f.readlines() + + polys = [] + for line in lines: + if line.startswith('poly'): + poly_str_temp = line.split('[[')[1] + poly_str = poly_str_temp.split(']]')[0] + poly_str_list = poly_str.split('], [') + ra = [] + dec = [] + for pos in poly_str_list: + RAstr, Decstr = pos.split(',') + ra.append(Angle(RAstr, unit='hourangle').to('deg').value) + dec.append(Angle(Decstr.replace('.', ':', 2), unit='deg').to('deg').value) + poly_vertices = [np.array(ra), np.array(dec)] + + # Convert to image-plane polygon + xvert = [] + yvert = [] + for RAvert, Decvert in zip(np.array(ra), np.array(dec)): + try: + pixels = image.topixel([0, 1, Decvert*np.pi/180.0, + RAvert*np.pi/180.0]) + except: + pixels = image.topixel([1, 1, Decvert*np.pi/180.0, + RAvert*np.pi/180.0]) + xvert.append(pixels[2]) # x -> Dec + yvert.append(pixels[3]) # y -> RA + polys.append(Polygon(xvert, yvert)) + + elif line.startswith('ellipse'): + ell_str_temp = line.split('[[')[1] + if '], 0.0' not in ell_str_temp and '], 90.0' not in ell_str_temp: + print('Only position angles of 0.0 and 90.0 are support for CASA ' + 'regions of type "ellipse"') + sys.exit(1) + if '], 0.0' in ell_str_temp: + ell_str = ell_str_temp.split('], 0.0')[0] + pa = 0 + else: + ell_str = ell_str_temp.split('], 90.0')[0] + pa = 90 + ell_str_list = ell_str.split('], [') + + # Ellipse center + RAstr, Decstr = ell_str_list[0].split(',') + ra_center = Angle(RAstr, unit='hourangle').to('deg').value + dec_center = Angle(Decstr.replace('.', ':', 2), unit='deg').to('deg').value + pixels = image.topixel([0, 1, dec_center*np.pi/180.0, + ra_center*np.pi/180.0]) + x_center = pixels[2] # x -> Dec + y_center = pixels[3] # y -> RA + + # Ellipse semimajor and semiminor axes + a_str, b_str = ell_str_list[1].split(',') + a_deg = float(a_str.split('arcsec')[0])/3600.0 + b_deg = float(b_str.split('arcsec')[0])/3600.0 + pixels1 = image.topixel([0, 1, (dec_center-a_deg/2.0)*np.pi/180.0, + ra_center*np.pi/180.0]) + a_pix1 = pixels1[2] + pixels2 = image.topixel([0, 1, (dec_center+a_deg/2.0)*np.pi/180.0, + ra_center*np.pi/180.0]) + a_pix2 = pixels2[2] + a_pix = abs(a_pix2 - a_pix1) + ex = [] + ey = [] + for th in range(0, 360, 1): + if pa == 0: + # semimajor axis is along x-axis + ex.append(a_pix * np.cos(th * np.pi / 180.0) + + x_center) # x -> Dec + ey.append(a_pix * b_deg / a_deg * np.sin(th * np.pi / 180.0) + y_center) # y -> RA + elif pa == 90: + # semimajor axis is along y-axis + ex.append(a_pix * b_deg / a_deg * np.cos(th * np.pi / 180.0) + + x_center) # x -> Dec + ey.append(a_pix * np.sin(th * np.pi / 180.0) + y_center) # y -> RA + polys.append(Polygon(ex, ey)) + + elif line.startswith('box'): + poly_str_temp = line.split('[[')[1] + poly_str = poly_str_temp.split(']]')[0] + poly_str_list = poly_str.split('], [') + ra = [] + dec = [] + for pos in poly_str_list: + RAstr, Decstr = pos.split(',') + ra.append(Angle(RAstr, unit='hourangle').to('deg').value) + dec.append(Angle(Decstr.replace('.', ':', 2), unit='deg').to('deg').value) + ra.insert(1, ra[0]) + dec.insert(1, dec[1]) + ra.append(ra[2]) + dec.append(dec[0]) + poly_vertices = [np.array(ra), np.array(dec)] + + # Convert to image-plane polygon + xvert = [] + yvert = [] + for RAvert, Decvert in zip(np.array(ra), np.array(dec)): + try: + pixels = image.topixel([0, 1, Decvert*np.pi/180.0, + RAvert*np.pi/180.0]) + except: + pixels = image.topixel([1, 1, Decvert*np.pi/180.0, + RAvert*np.pi/180.0]) + xvert.append(pixels[2]) # x -> Dec + yvert.append(pixels[3]) # y -> RA + polys.append(Polygon(xvert, yvert)) + + elif line.startswith('#'): + pass + + else: + print('Only CASA regions of type "poly", "box", or "ellipse" are supported') + sys.exit(1) + + return polys + + +def make_template_image(image_name, reference_ra_deg, reference_dec_deg, + imsize=512, cellsize_deg=0.000417): + """ + Make a blank image and save it to disk + + Parameters + ---------- + image_name : str + Filename of output image + reference_ra_deg : float, optional + RA for center of output mask image + reference_dec_deg : float, optional + Dec for center of output mask image + imsize : int, optional + Size of output image + cellsize_deg : float, optional + Size of a pixel in degrees + + """ + shape_out = [1, 1, imsize, imsize] + hdu = pyfits.PrimaryHDU(np.zeros(shape_out, dtype=np.float32)) + hdulist = pyfits.HDUList([hdu]) + header = hdulist[0].header + + # Add WCS info + header['CRVAL1'] = reference_ra_deg + header['CDELT1'] = -cellsize_deg + header['CRPIX1'] = imsize/2.0 + header['CUNIT1'] = 'deg' + header['CTYPE1'] = 'RA---SIN' + header['CRVAL2'] = reference_dec_deg + header['CDELT2'] = cellsize_deg + header['CRPIX2'] = imsize/2.0 + header['CUNIT2'] = 'deg' + header['CTYPE2'] = 'DEC--SIN' + + # Add STOKES info + header['CRVAL3'] = 1.0 + header['CDELT3'] = 1.0 + header['CRPIX3'] = 1.0 + header['CUNIT3'] = '' + header['CTYPE3'] = 'STOKES' + + # Add frequency info + header['RESTFRQ'] = 15036 + header['CRVAL4'] = 150e6 + header['CDELT4'] = 3e8 + header['CRPIX4'] = 1.0 + header['CUNIT4'] = 'HZ' + header['CTYPE4'] = 'FREQ' + header['SPECSYS'] = 'TOPOCENT' + + # Add equinox + header['EQUINOX'] = 2000.0 + + # Add telescope + header['TELESCOP'] = 'LOFAR' + + hdulist[0].header = header + + hdulist.writeto(image_name, clobber=True) + hdulist.close() + + + +def main(image_name, mask_name, atrous_do=False, threshisl=0.0, threshpix=0.0, rmsbox=None, + rmsbox_bright=(35, 7), iterate_threshold=False, adaptive_rmsbox=False, img_format='fits', + threshold_format='float', trim_by=0.0, vertices_file=None, atrous_jmax=6, + pad_to_size=None, skip_source_detection=False, region_file=None, nsig=1.0, + reference_ra_deg=None, reference_dec_deg=None, cellsize_deg=0.000417, + use_adaptive_threshold=False, adaptive_thresh=150.0): + """ + Make a clean mask and return clean threshold + + Parameters + ---------- + image_name : str + Filename of input image from which mask will be made. If the image does + not exist, a template image with center at (reference_ra_deg, + reference_dec_deg) will be made internally + mask_name : str + Filename of output mask image + atrous_do : bool, optional + Use wavelet module of PyBDSF? + threshisl : float, optional + Value of thresh_isl PyBDSF parameter + threshpix : float, optional + Value of thresh_pix PyBDSF parameter + rmsbox : tuple of floats, optional + Value of rms_box PyBDSF parameter + rmsbox_bright : tuple of floats, optional + Value of rms_box_bright PyBDSF parameter + iterate_threshold : bool, optional + If True, threshold will be lower in 20% steps until + at least one island is found + adaptive_rmsbox : tuple of floats, optional + Value of adaptive_rms_box PyBDSF parameter + img_format : str, optional + Format of output mask image (one of 'fits' or 'casa') + threshold_format : str, optional + Format of output threshold (one of 'float' or 'str_with_units') + trim_by : float, optional + Fraction by which the perimeter of the output mask will be + trimmed (zeroed) + vertices_file : str, optional + Filename of file with vertices (must be a pickle file containing + a dictionary with the vertices in the 'vertices' entry) + atrous_jmax : int, optional + Value of atrous_jmax PyBDSF parameter + pad_to_size : int, optional + Pad output mask image to a size of pad_to_size x pad_to_size + skip_source_detection : bool, optional + If True, source detection is not run on the input image + region_file : str, optional + Filename of region file in CASA format. If given, no mask image + is made (the region file is used as the clean mask) + nsig : float, optional + Number of sigma of returned threshold value + reference_ra_deg : float, optional + RA for center of output mask image + reference_dec_deg : float, optional + Dec for center of output mask image + cellsize_deg : float, optional + Size of a pixel in degrees + use_adaptive_threshold : bool, optional + If True, use an adaptive threshold estimated from the negative values in + the image + adaptive_thresh : float, optional + If adaptive_rmsbox is True, this value sets the threshold above + which a source will use the small rms box + + Returns + ------- + result : dict + Dict with nsig-sigma rms threshold + + """ + if rmsbox is not None and type(rmsbox) is str: + rmsbox = eval(rmsbox) + + if type(rmsbox_bright) is str: + rmsbox_bright = eval(rmsbox_bright) + + if pad_to_size is not None and type(pad_to_size) is str: + pad_to_size = int(pad_to_size) + + if type(atrous_do) is str: + if atrous_do.lower() == 'true': + atrous_do = True + threshisl = 4.0 # override user setting to ensure proper source fitting + else: + atrous_do = False + + if type(iterate_threshold) is str: + if iterate_threshold.lower() == 'true': + iterate_threshold = True + else: + iterate_threshold = False + + if type(adaptive_rmsbox) is str: + if adaptive_rmsbox.lower() == 'true': + adaptive_rmsbox = True + else: + adaptive_rmsbox = False + + if type(skip_source_detection) is str: + if skip_source_detection.lower() == 'true': + skip_source_detection = True + else: + skip_source_detection = False + + if type(use_adaptive_threshold) is str: + if use_adaptive_threshold.lower() == 'true': + use_adaptive_threshold = True + else: + use_adaptive_threshold = False + + if reference_ra_deg is not None and reference_dec_deg is not None: + reference_ra_deg = float(reference_ra_deg) + reference_dec_deg = float(reference_dec_deg) + + if not os.path.exists(image_name): + print('Input image not found. Making empty image...') + if not skip_source_detection: + print('ERROR: Source detection cannot be done on an empty image') + sys.exit(1) + if reference_ra_deg is not None and reference_dec_deg is not None: + image_name = mask_name + '.tmp' + make_template_image(image_name, reference_ra_deg, reference_dec_deg, + cellsize_deg=float(cellsize_deg)) + else: + print('ERROR: if image not found, a refernce position must be given') + sys.exit(1) + + trim_by = float(trim_by) + atrous_jmax = int(atrous_jmax) + threshpix = float(threshpix) + threshisl = float(threshisl) + nsig = float(nsig) + adaptive_thresh = float(adaptive_thresh) + threshold = 0.0 + + if not skip_source_detection: + if vertices_file is not None: + # Modify the input image to blank the regions outside of the polygon + temp_img = pim.image(image_name) + image_name += '.blanked' + temp_img.saveas(image_name, overwrite=True) + input_img = pim.image(image_name) + data = input_img.getdata() + + vertices = read_vertices(vertices_file) + RAverts = vertices[0] + Decverts = vertices[1] + xvert = [] + yvert = [] + for RAvert, Decvert in zip(RAverts, Decverts): + pixels = input_img.topixel([1, 1, Decvert*np.pi/180.0, + RAvert*np.pi/180.0]) + xvert.append(pixels[2]) # x -> Dec + yvert.append(pixels[3]) # y -> RA + poly = Polygon(xvert, yvert) + + # Find masked regions + masked_ind = np.where(data[0, 0]) + + # Find distance to nearest poly edge and set to NaN those that + # are outside the facet (dist < 0) + dist = poly.is_inside(masked_ind[0], masked_ind[1]) + outside_ind = np.where(dist < 0.0) + if len(outside_ind[0]) > 0: + data[0, 0, masked_ind[0][outside_ind], masked_ind[1][outside_ind]] = np.nan + + # Save changes + input_img.putdata(data) + + if use_adaptive_threshold: + # Get an estimate of the rms + img = bdsf.process_image(image_name, mean_map='zero', rms_box=rmsbox, + thresh_pix=threshpix, thresh_isl=threshisl, + atrous_do=atrous_do, ini_method='curvature', thresh='hard', + adaptive_rms_box=adaptive_rmsbox, adaptive_thresh=adaptive_thresh, + rms_box_bright=rmsbox_bright, rms_map=True, quiet=True, + atrous_jmax=atrous_jmax, stop_at='isl') + + # Find min and max pixels + max_neg_val = abs(np.min(img.ch0_arr)) + max_neg_pos = np.where(img.ch0_arr == np.min(img.ch0_arr)) + max_pos_val = abs(np.max(img.ch0_arr)) + max_pos_pos = np.where(img.ch0_arr == np.max(img.ch0_arr)) + + # Estimate new thresh_isl from min pixel value's sigma, but don't let + # it get higher than 1/2 of the peak's sigma + threshisl_neg = 2.0 * max_neg_val / img.rms_arr[max_neg_pos][0] + max_sigma = max_pos_val / img.rms_arr[max_pos_pos][0] + if threshisl_neg > max_sigma / 2.0: + threshisl_neg = max_sigma / 2.0 + + # Use the new threshold only if it is larger than the user-specified one + if threshisl_neg > threshisl: + threshisl = threshisl_neg + + if iterate_threshold: + # Start with given threshold and lower it until we get at least one island + nisl = 0 + while nisl == 0: + img = bdsf.process_image(image_name, mean_map='zero', rms_box=rmsbox, + thresh_pix=threshpix, thresh_isl=threshisl, + atrous_do=atrous_do, ini_method='curvature', thresh='hard', + adaptive_rms_box=adaptive_rmsbox, adaptive_thresh=adaptive_thresh, + rms_box_bright=rmsbox_bright, rms_map=True, quiet=True, + atrous_jmax=atrous_jmax) + nisl = img.nisl + threshpix /= 1.2 + threshisl /= 1.2 + if threshpix < 5.0: + break + else: + img = bdsf.process_image(image_name, mean_map='zero', rms_box=rmsbox, + thresh_pix=threshpix, thresh_isl=threshisl, + atrous_do=atrous_do, ini_method='curvature', thresh='hard', + adaptive_rms_box=adaptive_rmsbox, adaptive_thresh=adaptive_thresh, + rms_box_bright=rmsbox_bright, rms_map=True, quiet=True, + atrous_jmax=atrous_jmax) + + if img.nisl == 0: + if region_file is None or region_file == '[]': + print('No islands found. Clean mask cannot be made.') + sys.exit(1) + else: + # Continue on and use user-supplied region file + skip_source_detection = True + threshold = nsig * img.clipped_rms + + # Check if there are large islands preset (indicating that multi-scale + # clean is needed) + has_large_isl = False + for isl in img.islands: + if isl.size_active > 100: + # Assuming normal sampling, a size of 100 pixels would imply + # a source of ~ 10 beams + has_large_isl = True + + if (region_file is not None and region_file != '[]' and skip_source_detection): + # Copy region file and return if source detection was not done + os.system('cp {0} {1}'.format(region_file.strip('[]"'), mask_name)) + if threshold_format == 'float': + return {'threshold_5sig': threshold} + elif threshold_format == 'str_with_units': + # This is done to get around the need for quotes around strings in casapy scripts + # 'casastr/' is removed by the generic pipeline + return {'threshold_5sig': 'casastr/{0}Jy'.format(threshold)} + elif not skip_source_detection: + img.export_image(img_type='island_mask', mask_dilation=0, outfile=mask_name, + img_format=img_format, clobber=True) + + if (vertices_file is not None or trim_by > 0 or pad_to_size is not None + or (region_file is not None and region_file != '[]') + or skip_source_detection): + # Alter the mask in various ways + if skip_source_detection: + # Read the image + mask_im = pim.image(image_name) + else: + # Read the PyBDSF mask + mask_im = pim.image(mask_name) + data = mask_im.getdata() + coordsys = mask_im.coordinates() + if reference_ra_deg is not None and reference_dec_deg is not None: + values = coordsys.get_referencevalue() + values[2][0] = reference_dec_deg/180.0*np.pi + values[2][1] = reference_ra_deg/180.0*np.pi + coordsys.set_referencevalue(values) + imshape = mask_im.shape() + del(mask_im) + + if pad_to_size is not None: + imsize = pad_to_size + coordsys['direction'].set_referencepixel([imsize/2, imsize/2]) + pixmin = (imsize - imshape[2]) / 2 + if pixmin < 0: + print("The padded size must be larger than the original size.") + sys.exit(1) + pixmax = pixmin + imshape[2] + data_pad = np.zeros((1, 1, imsize, imsize), dtype=np.float32) + data_pad[0, 0, pixmin:pixmax, pixmin:pixmax] = data[0, 0] + new_mask = pim.image('', shape=(1, 1, imsize, imsize), coordsys=coordsys) + new_mask.putdata(data_pad) + else: + new_mask = pim.image('', shape=imshape, coordsys=coordsys) + new_mask.putdata(data) + + data = new_mask.getdata() + + if skip_source_detection: + # Mask all pixels + data[:] = 1 + + if vertices_file is not None: + # Modify the clean mask to exclude regions outside of the polygon + vertices = read_vertices(vertices_file) + RAverts = vertices[0] + Decverts = vertices[1] + xvert = [] + yvert = [] + for RAvert, Decvert in zip(RAverts, Decverts): + try: + pixels = new_mask.topixel([0, 1, Decvert*np.pi/180.0, + RAvert*np.pi/180.0]) + except: + pixels = new_mask.topixel([1, 1, Decvert*np.pi/180.0, + RAvert*np.pi/180.0]) + xvert.append(pixels[2]) # x -> Dec + yvert.append(pixels[3]) # y -> RA + poly = Polygon(xvert, yvert) + + # Find masked regions + masked_ind = np.where(data[0, 0]) + + # Find distance to nearest poly edge and unmask those that + # are outside the facet (dist < 0) + dist = poly.is_inside(masked_ind[0], masked_ind[1]) + outside_ind = np.where(dist < 0.0) + if len(outside_ind[0]) > 0: + data[0, 0, masked_ind[0][outside_ind], masked_ind[1][outside_ind]] = 0 + + if trim_by > 0.0: + sh = np.shape(data) + margin = int(sh[2] * trim_by / 2.0 ) + data[0, 0, 0:sh[2], 0:margin] = 0 + data[0, 0, 0:margin, 0:sh[3]] = 0 + data[0, 0, 0:sh[2], sh[3]-margin:sh[3]] = 0 + data[0, 0, sh[2]-margin:sh[2], 0:sh[3]] = 0 + + if region_file is not None and region_file != '[]': + # Merge the CASA regions with the mask + casa_polys = read_casa_polys(region_file.strip('[]"'), new_mask) + for poly in casa_polys: + # Find unmasked regions + unmasked_ind = np.where(data[0, 0] == 0) + + # Find distance to nearest poly edge and mask those that + # are inside the casa region (dist > 0) + dist = poly.is_inside(unmasked_ind[0], unmasked_ind[1]) + inside_ind = np.where(dist > 0.0) + if len(inside_ind[0]) > 0: + data[0, 0, unmasked_ind[0][inside_ind], unmasked_ind[1][inside_ind]] = 1 + + # Save changes + new_mask.putdata(data) + if img_format == 'fits': + new_mask.tofits(mask_name, overwrite=True) + elif img_format == 'casa': + new_mask.saveas(mask_name, overwrite=True) + else: + print('Output image format "{}" not understood.'.format(img_format)) + sys.exit(1) + + if not skip_source_detection: + if threshold_format == 'float': + return {'threshold_5sig': nsig * img.clipped_rms, 'multiscale': has_large_isl} + elif threshold_format == 'str_with_units': + # This is done to get around the need for quotes around strings in casapy scripts + # 'casastr/' is removed by the generic pipeline + return {'threshold_5sig': 'casastr/{0}Jy'.format(nsig * img.clipped_rms), + 'multiscale': has_large_isl} + else: + return {'threshold_5sig': '0.0'} + + +if __name__ == '__main__': + descriptiontext = "Make a clean mask.\n" + + parser = argparse.ArgumentParser(description=descriptiontext, formatter_class=RawTextHelpFormatter) + parser.add_argument('image_name', help='Image name') + parser.add_argument('mask_name', help='Mask name') + parser.add_argument('-a', '--atrous_do', help='use wavelet fitting', type=bool, default=False) + parser.add_argument('-i', '--threshisl', help='', type=float, default=3.0) + parser.add_argument('-p', '--threshpix', help='', type=float, default=5.0) + parser.add_argument('-r', '--rmsbox', help='rms box width and step (e.g., "(60, 20)")', + type=str, default='(60, 20)') + parser.add_argument('--rmsbox_bright', help='rms box for bright sources(?) width and step (e.g., "(60, 20)")', + type=str, default='(60, 20)') + parser.add_argument('-t', '--iterate_threshold', help='iteratively decrease threshold until at least ' + 'one island is found', type=bool, default=False) + parser.add_argument('-o', '--adaptive_rmsbox', help='use an adaptive rms box', type=bool, default=False) + parser.add_argument('-f', '--img_format', help='format of output mask', type=str, default='casa') + parser.add_argument('-d', '--threshold_format', help='format of return value', type=str, default='float') + parser.add_argument('-b', '--trim_by', help='Trim masked region by this number of pixels', type=float, default=0.0) + parser.add_argument('-v', '--vertices_file', help='file containing facet polygon vertices', type=str, default=None) + parser.add_argument('--region_file', help='File containing casa regions to be merged with the detected mask', type=str, default=None) + parser.add_argument('-j', '--atrous_jmax', help='Max wavelet scale', type=int, default=3) + parser.add_argument('-z', '--pad_to_size', help='pad mask to this size', type=int, default=None) + parser.add_argument('-s', '--skip_source_detection', help='skip source detection', type=bool, default=False) + + args = parser.parse_args() + erg = main(args.image_name, args.mask_name, atrous_do=args.atrous_do, + threshisl=args.threshisl, threshpix=args.threshpix, rmsbox=args.rmsbox, + rmsbox_bright=args.rmsbox_bright, + iterate_threshold=args.iterate_threshold, + adaptive_rmsbox=args.adaptive_rmsbox, img_format=args.img_format, + threshold_format=args.threshold_format, trim_by=args.trim_by, + vertices_file=args.vertices_file, atrous_jmax=args.atrous_jmax, + pad_to_size=args.pad_to_size, skip_source_detection=args.skip_source_detection, + region_file=args.region_file) + print erg diff --git a/Docker/scripts/make_source_list.py b/Docker/scripts/make_source_list.py new file mode 100755 index 00000000..3ce98750 --- /dev/null +++ b/Docker/scripts/make_source_list.py @@ -0,0 +1,136 @@ +#! /usr/bin/env python +""" +Script to make a source catalog for an image +""" +import argparse +from argparse import RawTextHelpFormatter +import sys +import os +try: + import matplotlib + matplotlib.use('Agg') +except (RuntimeError, ImportError): + pass +import bdsf +import lsmtool + + +def main(image_name, catalog_name, atrous_do=False, threshisl=3.0, threshpix=5.0, rmsbox=(60, 20), + rmsbox_bright=(35, 7), adaptive_rmsbox=False, atrous_jmax=6, adaptive_thresh=150.0, + compare_dir=None, format='png'): + """ + Make a source catalog for an image + + Parameters + ---------- + image_name : str + Filename of input image from which catalog will be made + catalog_name : str + Filename of output source catalog in FITS format + atrous_do : bool, optional + Use wavelet module of PyBDSF? + threshisl : float, optional + Value of thresh_isl PyBDSF parameter + threshpix : float, optional + Value of thresh_pix PyBDSF parameter + rmsbox : tuple of floats, optional + Value of rms_box PyBDSF parameter + rmsbox_bright : tuple of floats, optional + Value of rms_box_bright PyBDSF parameter + adaptive_rmsbox : tuple of floats, optional + Value of adaptive_rms_box PyBDSF parameter + atrous_jmax : int, optional + Value of atrous_jmax PyBDSF parameter + adaptive_thresh : float, optional + If adaptive_rmsbox is True, this value sets the threshold above + which a source will use the small rms box + compare_dir : str, optional + Directory for output plots/info of comparison of source properties to surveys + format : str, optional + Format of output plots: 'png' or 'pdf' + """ + if type(rmsbox) is str: + rmsbox = eval(rmsbox) + + if type(rmsbox_bright) is str: + rmsbox_bright = eval(rmsbox_bright) + + if type(atrous_do) is str: + if atrous_do.lower() == 'true': + atrous_do = True + threshisl = 4.0 # override user setting to ensure proper source fitting + else: + atrous_do = False + + if type(adaptive_rmsbox) is str: + if adaptive_rmsbox.lower() == 'true': + adaptive_rmsbox = True + else: + adaptive_rmsbox = False + + if not os.path.exists(image_name): + print('ERROR: input image not found') + sys.exit(1) + + atrous_jmax = int(atrous_jmax) + threshpix = float(threshpix) + threshisl = float(threshisl) + adaptive_thresh = float(adaptive_thresh) + + img = bdsf.process_image(image_name, mean_map='zero', rms_box=rmsbox, + thresh_pix=threshpix, thresh_isl=threshisl, + atrous_do=atrous_do, ini_method='curvature', thresh='hard', + adaptive_rms_box=adaptive_rmsbox, adaptive_thresh=adaptive_thresh, + rms_box_bright=rmsbox_bright, rms_map=True, quiet=True, + atrous_jmax=atrous_jmax) + srcroot = 'prefactor' + img.write_catalog(outfile=catalog_name, bbs_patches='source', srcroot=srcroot, clobber=True) + + # Use LSMTool to make some basic comparisons to surveys + if compare_dir is not None: + s = lsmtool.load(catalog_name) + s.setPatchPositions(method='wmean') + _, _, refRA, refDec = s._getXY() + def_dict = s.getDefaultValues() + if 'ReferenceFrequency' in def_dict: + ref_freq_hz = def_dict['ReferenceFrequency'] + else: + ref_freq_hz = 0.0 + if ref_freq_hz > 140e6 and ref_freq_hz < 160e6: + # use TGSS + vocat = 'TGSS' + ignspec = None + else: + # too far in freq from TGSS -> use GSM, ignoring sources that lack spectral + # information (spec_indx = -0.7) + vocat = 'GSM' + ignspec = -0.7 + s_vo = lsmtool.load(vocat, VOPosition=[refRA, refDec], VORadius='5 deg') + s_vo.group('threshold', FWHM='30 arcsec', threshold=0.001) + s.compare(s_vo, radius='30 arcsec', excludeMultiple=False, outDir=compare_dir, + name1='LOFAR', name2=vocat, format=format, ignoreSpec=ignspec) + + +if __name__ == '__main__': + descriptiontext = "Make a source catalog from an image.\n" + + parser = argparse.ArgumentParser(description=descriptiontext, formatter_class=RawTextHelpFormatter) + parser.add_argument('image_name', help='Image name') + parser.add_argument('catalog_name', help='Output catalog name') + parser.add_argument('-a', '--atrous_do', help='use wavelet fitting', type=bool, default=False) + parser.add_argument('-i', '--threshisl', help='', type=float, default=3.0) + parser.add_argument('-p', '--threshpix', help='', type=float, default=5.0) + parser.add_argument('-r', '--rmsbox', help='rms box width and step (e.g., "(60, 20)")', + type=str, default='(60, 20)') + parser.add_argument('--rmsbox_bright', help='rms box for bright sources(?) width and step (e.g., "(60, 20)")', + type=str, default='(60, 20)') + parser.add_argument('-o', '--adaptive_rmsbox', help='use an adaptive rms box', type=bool, default=False) + parser.add_argument('-j', '--atrous_jmax', help='Max wavelet scale', type=int, default=3) + parser.add_argument('-c', '--compare_dir', help='', type=str, default=None) + parser.add_argument('-f', '--format', help='', type=str, default='pdf') + + args = parser.parse_args() + main(args.image_name, args.catalog_name, atrous_do=args.atrous_do, + threshisl=args.threshisl, threshpix=args.threshpix, rmsbox=args.rmsbox, + rmsbox_bright=args.rmsbox_bright, adaptive_rmsbox=args.adaptive_rmsbox, + atrous_jmax=args.atrous_jmax, compare_dir=args.compare_dir, format=args.format) diff --git a/Docker/scripts/make_summary.py b/Docker/scripts/make_summary.py new file mode 100755 index 00000000..70c230b2 --- /dev/null +++ b/Docker/scripts/make_summary.py @@ -0,0 +1,226 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +import os,sys +import argparse +import subprocess, platform +import numpy, re +import multiprocessing + +######################################################################## +def input2strlist_nomapfile(invar): + """ + from bin/download_IONEX.py + give the list of MSs from the list provided as a string + """ + str_list = None + if type(invar) is str: + if invar.startswith('[') and invar.endswith(']'): + str_list = [f.strip(' \'\"') for f in invar.strip('[]').split(',')] + else: + str_list = [invar.strip(' \'\"')] + elif type(invar) is list: + str_list = [str(f).strip(' \'\"') for f in invar] + else: + raise TypeError('input2strlist: Type '+str(type(invar))+' unknown!') + return str_list + +############################################################################### +def find_flagged_fraction(ms_file): + + outputs = os.popen('DPPP msin=' + ms_file + ' msout=. steps=[count] count.type=counter count.warnperc=0.0000001 | grep NOTE').readlines() + fraction_flagged = { output.split('(')[-1].rstrip(')\n'):output.split('%')[0][-5:].strip() for output in outputs if 'station' in output } + return fraction_flagged + +############################################################################### +def main(observation_directory = '/data/share/pipeline/Observation', logfile = '/data/share/pipeline/Observation/logs/pipeline.log', h5parmdb = 'solutions.h5', MSfile = '[]'): + """ + Creates summary of a given prefactor3 run + + Parameters + ---------- + observation_directory: directory where all the processed data and solutions are stored + logfile: directory where all the log files are stored + h5parmdb: name of the solutions h5parm database + MSfile: pattern of the final MS files + + """ + ID = os.path.basename(observation_directory) + summary = os.path.normpath(observation_directory + '/../' + ID + '.log') + f_summary = open(summary, 'w') + + # location of logfile + print('Summary logfile is written to ' + summary) + + if not os.path.exists(logfile): + f_summary.write(logfile + ' does not exist. Skipping!') + f_summary.close() + return(1) + pass + + ## check for the h5parm + if os.path.exists(observation_directory + '/results/cal_values/' + h5parmdb): + h5parmdb = observation_directory + '/results/cal_values/' + h5parmdb + pass + elif os.path.exists(observation_directory + '/results/cal_values/cal_' + h5parmdb): + h5parmdb = observation_directory + '/results/cal_values/cal_' + h5parmdb + else: + f_summary.write('No h5parm solutions file found in ' + observation_directory + '/results/cal_values/' + h5parmdb) + f_summary.close() + return(1) + pass + + ## convert stringlist to list + mslist = input2strlist_nomapfile(MSfile) + + ## get proper solset + from losoto.h5parm import h5parm + data = h5parm(h5parmdb, readonly = True) + if 'target' in data.getSolsetNames(): + solset = 'target' + pass + elif 'calibrator' in data.getSolsetNames(): + solset = 'calibrator' + pass + else: + f_summary.write('Neither calibrator or target solset has been found in ' + h5parmdb) + f_summary.close() + return(1) + pass + + ## get antenna information + solset = data.getSolset(solset) + soltabs = list([soltab.name for soltab in solset.getSoltabs()]) + source = solset.obj._f_get_child('source')[0][0] + antenna_len = '{:<' + str(max([ len(antenna) for antenna in solset.getAnt()])) + '}' + soltab_len = '{:^' + str(max([ len(soltab_name) for soltab_name in soltabs ])) + '}' + + ## get software versions + try: + DPPP_version = subprocess.Popen(['DPPP', '-v'], stdout=subprocess.PIPE).stdout.read() + losoto_version = 'losoto ' + subprocess.Popen(['losoto', '--version'], stderr=subprocess.PIPE).stderr.read() + lsmtool_version = subprocess.Popen(['lsmtool', '--version'], stdout=subprocess.PIPE).stdout.read() + aoflagger_version = subprocess.Popen(['aoflagger', '--version'], stdout=subprocess.PIPE).stdout.read() + wsclean_version = subprocess.Popen(['wsclean', '--version'], stdout=subprocess.PIPE).stdout.read().split('\n')[1] + '\n' + python_version = subprocess.Popen(['python', '--version'], stderr=subprocess.PIPE).stderr.read() + + import matplotlib, scipy, astropy + modules_version = 'matplotlib ' + str(matplotlib.__version__) + ', scipy ' + str(scipy.__version__) + ', astropy ' + str(astropy.__version__) + '\n' + os_version = ' '.join(platform.linux_distribution()) + ' ' + platform.release() + ' ' + platform.version() + '\n' + f_summary.write('Software versions currently used:\n' + os_version \ + + DPPP_version \ + + aoflagger_version \ + + losoto_version \ + + lsmtool_version \ + + wsclean_version \ + + python_version \ + + modules_version ) + except OSError: + f_summary.write('Could not find all LOFAR or related software packages. Could not determine the used software versions.\n') + pass + + ## get the list of removed stations: + baseline_list = list(set([ str(line.split(";")[1:]) for line in open(logfile) if 'baseline:' in line and ';' in line ])) + bad_antennas = list(set(re.sub("[\[\]\"\ '!*]", "", str(baseline_list)).replace('\\n','').replace('\\','').split(','))) + if len(bad_antennas) == 1 and bad_antennas[0] == '': + bad_antennas = 'NONE' + pass + + ## check for A-Team warning: + Ateam_list = list(set([ str(line.split('source ')[-1]).split(' is')[0] for line in open(logfile) if 'WARNING: The A-Team source' in line ])) + if len(Ateam_list) == 0: + Ateam_list = 'NONE' + pass + + f_summary.write('\n') + f_summary.write('Antennas removed from the data: ' + re.sub("[\[\]']", "", str(bad_antennas)) + '\n') + f_summary.write('A-Team sources close to the phase reference center: ' + re.sub("[\[\]']", "", str(Ateam_list)) + '\n') + f_summary.write('\n') + + ## diffractive scale + structure_file = observation_directory + '/results/inspection/' + source + '_structure.txt' + if os.path.exists(structure_file): + with open(structure_file, 'r') as infile: + for line in infile: + diffractive_scale_XX = float(line.split()[2]) + diffractive_scale_YY = float(line.split()[2]) + f_summary.write('XX diffractive scale: %3.1f km'%(diffractive_scale_XX/1000.) + '\n') + f_summary.write('YY diffractive scale: %3.1f km'%(diffractive_scale_YY/1000.) + '\n') + + ## check whether reference has been used + if 'bandpass' in soltabs: + soltab = solset.getSoltab('bandpass') + history = soltab.getHistory() + if not history.strip('\n') == '': + f_summary.write(history + '\n') + pass + + ## get table of flagged solutions + f_summary.write('\n') + flagged_solutions = {} + import losoto.lib_operations as losoto + for soltab_name in soltabs: + soltab = solset.getSoltab(soltab_name) + antennas = soltab.ant + axes = soltab.getAxesNames() + axes.insert(0, axes.pop(axes.index('ant'))) ## put antenna table to the second place + flagged_solutions[soltab_name] = {} + for vals, weights, coord, selection in soltab.getValuesIter(returnAxes=axes, weight=True): + vals = losoto.reorderAxes(vals, soltab.getAxesNames(), axes) + weights = losoto.reorderAxes(weights, soltab.getAxesNames(), axes) + for i, antenna in enumerate(antennas): + flagged_fraction = 1. - float(numpy.mean(weights[i])) + flagged_solutions[soltab_name][antenna] = soltab_len.format('{:.2f}'.format(100 * flagged_fraction) + '%') + soltab_names = ' '.join([ soltab_len.format(soltab_name) for soltab_name in soltabs ]) + f_summary.write('Amount of flagged solutions per station and solution table:\n') + f_summary.write(antenna_len.format('Station') + ' ' + soltab_names + '\n') + for antenna in antennas: + values_to_print = [] + for soltab_name in soltabs: + try: + values_to_print.append(flagged_solutions[soltab_name][antenna]) + except KeyError: + values_to_print.append(soltab_len.format(' ')) + values_to_print = ' '.join(values_to_print) + f_summary.write(antenna_len.format(antenna) + ' ' + values_to_print + '\n') + + + ## derive the fraction of flagged data of the entire observation + pool = multiprocessing.Pool(processes = multiprocessing.cpu_count()) + flagged_fraction_list = pool.map(find_flagged_fraction, mslist) + + flagged_fraction_data = {} + for entry in flagged_fraction_list: + antennas = entry.keys() + for antenna in antennas: + try: + flagged_fraction_data[antenna].append(float(entry[antenna])) + except KeyError: + flagged_fraction_data[antenna] = [float(entry[antenna])] + + f_summary.write('\n') + f_summary.write('Overall amount of flagged data in the final data:\n') + f_summary.write(antenna_len.format('Station') + '\n') + for antenna in sorted(flagged_fraction_data.keys()): + flagged_fraction_data[antenna] = sum(flagged_fraction_data[antenna]) / len(flagged_fraction_data[antenna]) + f_summary.write(antenna_len.format(antenna) + ' ' + '{:>10}'.format('{:.2f}'.format(flagged_fraction_data[antenna]) + '%') + '\n') + + print('Summary has been created.') + return 0 + + +if __name__=='__main__': + + parser = argparse.ArgumentParser(description='Creates summary of a given prefactor3 run.') + parser.add_argument('--obsdir', type=str, default='/data/scratch/pipeline/Observation', help='Directory where to find the processed data and solutions.') + parser.add_argument('--logfile', type=str, default='/data/share/pipeline/Observation/logs/pipeline.log', help='Location of the pipeline logfile.') + parser.add_argument('--h5parm', '--h5parm', type=str, default='solutions.h5', help='Name of the h5parm solutions file (default: solutions.h5)') + parser.add_argument('--MSfile', '--MSfile', type=str, default='[]', help='List of MS to be analysed (default: [])') + + args = parser.parse_args() + + # start running script + main(args.obsdir, args.logfile, args.h5parm, args.MSfile) + + sys.exit(0) + pass diff --git a/Docker/scripts/merge_skymodels.py b/Docker/scripts/merge_skymodels.py new file mode 100755 index 00000000..20ce30f8 --- /dev/null +++ b/Docker/scripts/merge_skymodels.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +""" +Script to merge two sky models +""" +import argparse +from argparse import RawTextHelpFormatter +import os +import lsmtool +import sys + + +def main(inmodel1, inmodel2, outmodel, match_by='name', radius=0.0, keep='all'): + """ + Creates mosaic + + Parameters + ---------- + inmodel1 : str + Filename of input model 1 + inmodel2 : str + Filename of input model 2 + outmodel : str + Filename of output model + match_by : str, optional + The match_by parameter of LSMTool + radius : float, optional + Radius in degrees for cross match + keep : str, optional + The keep parameter of LSMTool + + """ + s1 = lsmtool.load(inmodel1) + s2 = lsmtool.load(inmodel2) + + s1.concatenate(s2, matchBy=match_by, radius=float(radius), keep=keep, + inheritPatches=True) + s1.group('every') + s1.write(fileName=outmodel, clobber=True) + + +if __name__ == '__main__': + descriptiontext = "Merge two sky models.\n" + + parser = argparse.ArgumentParser(description=descriptiontext, formatter_class=RawTextHelpFormatter) + parser.add_argument('inmodel1', help='input model 1 filename') + parser.add_argument('inmodel2', help='input model 2 filename') + parser.add_argument('outmodel', help='output model filename') + parser.add_argument('-m', '--match_by', help='match type', type=str, default='name') + parser.add_argument('-r', '--radius', help='radius in degrees for matching', type=float, default=0.0) + parser.add_argument('-k', '--keep', help='', type=str, default='all') + + args = parser.parse_args() + main(args.inmodel1, args.inmodel2, args.outmodel, match_by=args.match_by, + radius=args.radius, keep=args.keep) diff --git a/Docker/scripts/pad_image.py b/Docker/scripts/pad_image.py new file mode 100755 index 00000000..15f81dce --- /dev/null +++ b/Docker/scripts/pad_image.py @@ -0,0 +1,44 @@ +#! /usr/bin/env python +""" +Script to pad a FITS image +""" +import argparse +from argparse import RawTextHelpFormatter +from astropy.io import fits as pyfits +import numpy as np +import sys +import os + + +def main(infile, xypadsize): + """Pad a fits image with zeros to padsize""" + pad_xsize, pad_ysize = [int(s) for s in xypadsize.split(' ')] + + hdu = pyfits.open(infile) + imdata = hdu[0].data[0, 0] + (ysize, xsize) = imdata.shape + if xsize > pad_xsize or ysize > pad_ysize: + raise ValueError('pad_image: padded size is smaller than current size!') + if xsize == pad_xsize and ysize == pad_ysize: + return + + xoffset = (pad_xsize - xsize) / 2 + yoffset = (pad_ysize - ysize) / 2 + newdata=np.zeros((1, 1, pad_ysize, pad_xsize)) + + newdata[0, 0, yoffset:yoffset+ysize, xoffset:xoffset+xsize] = imdata + hdu[0].data = newdata + hdu[0].header['CRPIX1'] += xoffset + hdu[0].header['CRPIX2'] += yoffset + hdu.writeto(infile, clobber=True) + + +if __name__ == '__main__': + descriptiontext = "Pad FITS image.\n" + + parser = argparse.ArgumentParser(description=descriptiontext, formatter_class=RawTextHelpFormatter) + parser.add_argument('infile', help='filename of input image') + parser.add_argument('xypadsize', help='padded size as "xsize ysize"') + args = parser.parse_args() + + main(args.infile, args.xypadsize) diff --git a/Docker/scripts/plot_Ateamclipper.py b/Docker/scripts/plot_Ateamclipper.py new file mode 100755 index 00000000..56e5d169 --- /dev/null +++ b/Docker/scripts/plot_Ateamclipper.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# -* coding: utf-8 -*- + +""" +Adds phases (=0) and amplitudes (=1) to any missing station if they appear in an h5parm, but not in a particular soltab. + +Created on Tue Jul 24 2019 + +@author: Alexander Drabent +""" + +import argparse +import matplotlib as mpl +mpl.use('Agg') +import matplotlib +import matplotlib.pyplot as plt +import numpy + +def main(txtfile = 'Ateamclipper.txt', outfile = 'Ateamclipper.png'): + + frac_list_xx = [] + frac_list_yy = [] + freq_list = [] + with open(txtfile, 'r') as infile: + for line in infile: + freq_list.append(float(line.split()[0])) + frac_list_xx.append(float(line.split()[1])) + frac_list_yy.append(float(line.split()[2])) + + # Plot the amount of clipped data vs. frequency potentially contaminated by the A-team + plt.scatter(numpy.array(freq_list) / 1e6, numpy.array(frac_list_xx), marker = '.', s = 10) + plt.xlabel('frequency [MHz]') + plt.ylabel('A-team clipping fraction [%]') + plt.savefig(outfile) + return(0) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Adds phases and amplitudes to any missing station if they appear in an h5parm, but not in a particular soltab.') + + parser.add_argument('txtfile', type=str, + help='Input text file containing frequency and flag fraction of the XX and YY polarization.') + parser.add_argument('outfile', type=str, + help='Input text file containing frequency and flag fraction of the XX and YY polarization.') + + + args = parser.parse_args() + + main(txtfile = args.txtfile, outfile = args.outfile) + diff --git a/Docker/scripts/plot_image.py b/Docker/scripts/plot_image.py new file mode 100755 index 00000000..46086596 --- /dev/null +++ b/Docker/scripts/plot_image.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python +""" +plot image +Original code from Reinout +""" +import os +import matplotlib as mpl +mpl.use('Agg') +import aplpy +import numpy +import astropy.io.fits + + +def meanclip(indata, clipsig=4.0, maxiter=10, converge_num=0.001, verbose=0): + """ + Computes an iteratively sigma-clipped mean on a + data set. Clipping is done about median, but mean + is returned. + + .. note:: MYMEANCLIP routine from ACS library. + + :History: + * 21/10/1998 Written by RSH, RITSS + * 20/01/1999 Added SUBS, fixed misplaced paren on float call, improved doc. RSH + * 24/11/2009 Converted to Python. PLL. + + Examples + -------- + >>> mean, sigma = meanclip(indata) + + Parameters + ---------- + indata: array_like + Input data. + + clipsig: float + Number of sigma at which to clip. + + maxiter: int + Ceiling on number of clipping iterations. + + converge_num: float + If the proportion of rejected pixels is less than + this fraction, the iterations stop. + + verbose: {0, 1} + Print messages to screen? + + Returns + ------- + mean: float + N-sigma clipped mean. + + sigma: float + Standard deviation of remaining pixels. + + """ + # Flatten array + skpix = indata.reshape(indata.size,) + + ct = indata.size + iter = 0 + c1 = 1.0 + c2 = 0.0 + + while (c1 >= c2) and (iter < maxiter): + lastct = ct + medval = numpy.median(skpix) + sig = numpy.std(skpix) + wsm = numpy.where(abs(skpix-medval) < clipsig*sig) + ct = len(wsm[0]) + if ct > 0: + skpix = skpix[wsm] + + c1 = abs(ct - lastct) + c2 = converge_num * lastct + iter += 1 + + mean = numpy.mean(skpix) + sigma = robust_sigma(skpix) + + return mean, sigma + + +def find_imagenoise(data): + mean, rms = meanclip(data) + return rms + + +def robust_sigma(in_y, zero=0): + """ + Calculate a resistant estimate of the dispersion of + a distribution. For an uncontaminated distribution, + this is identical to the standard deviation. + + Use the median absolute deviation as the initial + estimate, then weight points using Tukey Biweight. + See, for example, Understanding Robust and + Exploratory Data Analysis, by Hoaglin, Mosteller + and Tukey, John Wiley and Sons, 1983. + + .. note:: ROBUST_SIGMA routine from IDL ASTROLIB. + + Examples + -------- + >>> result = robust_sigma(in_y, zero=1) + + Parameters + ---------- + in_y : array_like + Vector of quantity for which the dispersion is + to be calculated + + zero : int + If set, the dispersion is calculated w.r.t. 0.0 + rather than the central value of the vector. If + Y is a vector of residuals, this should be set. + + Returns + ------- + out_val : float + Dispersion value. If failed, returns -1. + + """ + # Flatten array + y = in_y.ravel() + + eps = 1.0E-20 + c1 = 0.6745 + c2 = 0.80 + c3 = 6.0 + c4 = 5.0 + c_err = -1.0 + min_points = 3 + + if zero: + y0 = 0.0 + else: + y0 = numpy.median(y) + + dy = y - y0 + del_y = abs(dy) + + # First, the median absolute deviation MAD about the median: + mad = numpy.median(del_y) / c1 + + # If the MAD=0, try the MEAN absolute deviation: + if mad < eps: + mad = del_y.mean() / c2 + if mad < eps: + return 0.0 + + # Now the biweighted value: + u = dy / (c3 * mad) + uu = u * u + q = numpy.where(uu <= 1.0) + count = len(q[0]) + if count < min_points: + print('ROBUST_SIGMA: This distribution is TOO WEIRD! ' + 'Returning {}'.format(c_err)) + return c_err + + numerator = numpy.sum((y[q] - y0)**2.0 * (1.0 - uu[q])**4.0) + n = y.size + den1 = numpy.sum((1.0 - uu[q]) * (1.0 - c4 * uu[q])) + siggma = n * numerator / (den1 * (den1 - 1.0)) + + if siggma > 0: + out_val = numpy.sqrt(siggma) + else: + out_val = 0.0 + + return out_val + + +def main(fitsimage, outfilename, outdir=None): + + # find image noise + hdulist = astropy.io.fits.open(fitsimage, mode='update') + data = hdulist[0].data + imagenoise = find_imagenoise(data) + dynamicrange = numpy.max(data) / imagenoise + + # Store the rms value in the image header, for later reference by metadata.py (see + # CEP/Pipeline/recipes/sip/helpers/metadata.py in the LOFAR software repository) + header = hdulist[0].header + header['MEANRMS'] = imagenoise + beammaj = header['BMAJ'] + beammin = header['BMIN'] + beampa = header['BPA'] + hdulist.close() + + # plot + f = aplpy.FITSFigure(fitsimage, slices=[0, 0]) + f.show_colorscale(vmax=16*imagenoise, vmin=-4*imagenoise, cmap='bone') + f.add_grid() + f.grid.set_color('white') + f.grid.set_alpha(0.5) + f.grid.set_linewidth(0.2) + f.add_colorbar() + f.colorbar.set_axis_label_text('Flux Density (Jy/beam)') + f.add_beam() + f.beam.set_frame(True) + f.set_title('Mean rms = {0:1.2e} Jy/beam; Dynamic range = {1:1.2e}; \n ' + 'Restoring beam = ({2:3.1f} x {3:3.1f}) arcsec, PA = {4:3.1f} deg'.format( + imagenoise, dynamicrange, beammaj*3600.0, beammin*3600.0, beampa)) + + if outdir is not None: + if not os.path.exists(outdir): + os.makedirs(outdir) + outfilename = os.path.join(outdir, os.path.basename(outfilename)) + f.save('{}.png'.format(outfilename), dpi=400) + diff --git a/Docker/scripts/plot_subtract_images.py b/Docker/scripts/plot_subtract_images.py new file mode 100755 index 00000000..3ffd2bad --- /dev/null +++ b/Docker/scripts/plot_subtract_images.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python +""" +plot image overlaid with mask contours +Original code from Reinout +""" +import argparse +from argparse import RawTextHelpFormatter +#import pyrap.images as pim +#import pyrap.tables as pt +#import numpy as np +import matplotlib as mpl +mpl.use('Agg') +import aplpy +import numpy +import astropy.io.fits + +def meanclip(indata, clipsig=4.0, maxiter=10, converge_num=0.001, verbose=0): + """ + Computes an iteratively sigma-clipped mean on a + data set. Clipping is done about median, but mean + is returned. + + .. note:: MYMEANCLIP routine from ACS library. + + :History: + * 21/10/1998 Written by RSH, RITSS + * 20/01/1999 Added SUBS, fixed misplaced paren on float call, improved doc. RSH + * 24/11/2009 Converted to Python. PLL. + + Examples + -------- + >>> mean, sigma = meanclip(indata) + + Parameters + ---------- + indata: array_like + Input data. + + clipsig: float + Number of sigma at which to clip. + + maxiter: int + Ceiling on number of clipping iterations. + + converge_num: float + If the proportion of rejected pixels is less than + this fraction, the iterations stop. + + verbose: {0, 1} + Print messages to screen? + + Returns + ------- + mean: float + N-sigma clipped mean. + + sigma: float + Standard deviation of remaining pixels. + + """ + # Flatten array + skpix = indata.reshape( indata.size, ) + + ct = indata.size + iter = 0; c1 = 1.0 ; c2 = 0.0 + + while (c1 >= c2) and (iter < maxiter): + lastct = ct + medval = numpy.median(skpix) + sig = numpy.std(skpix) + wsm = numpy.where( abs(skpix-medval) < clipsig*sig ) + ct = len(wsm[0]) + if ct > 0: + skpix = skpix[wsm] + + c1 = abs(ct - lastct) + c2 = converge_num * lastct + iter += 1 + # End of while loop + + mean = numpy.mean( skpix ) + sigma = robust_sigma( skpix ) + + if verbose: + prf = 'MEANCLIP:' + print '%s %.1f-sigma clipped mean' % (prf, clipsig) + print '%s Mean computed in %i iterations' % (prf, iter) + print '%s Mean = %.6f, sigma = %.6f' % (prf, mean, sigma) + + return mean, sigma + + +def find_imagenoise(data): + mean, rms = meanclip(data) + return rms + +def robust_sigma(in_y, zero=0): + """ + Calculate a resistant estimate of the dispersion of + a distribution. For an uncontaminated distribution, + this is identical to the standard deviation. + + Use the median absolute deviation as the initial + estimate, then weight points using Tukey Biweight. + See, for example, Understanding Robust and + Exploratory Data Analysis, by Hoaglin, Mosteller + and Tukey, John Wiley and Sons, 1983. + + .. note:: ROBUST_SIGMA routine from IDL ASTROLIB. + + Examples + -------- + >>> result = robust_sigma(in_y, zero=1) + + Parameters + ---------- + in_y : array_like + Vector of quantity for which the dispersion is + to be calculated + + zero : int + If set, the dispersion is calculated w.r.t. 0.0 + rather than the central value of the vector. If + Y is a vector of residuals, this should be set. + + Returns + ------- + out_val : float + Dispersion value. If failed, returns -1. + + """ + # Flatten array + y = in_y.ravel() + + eps = 1.0E-20 + c1 = 0.6745 + c2 = 0.80 + c3 = 6.0 + c4 = 5.0 + c_err = -1.0 + min_points = 3 + + if zero: + y0 = 0.0 + else: + y0 = numpy.median(y) + + dy = y - y0 + del_y = abs( dy ) + + # First, the median absolute deviation MAD about the median: + + mad = numpy.median( del_y ) / c1 + + # If the MAD=0, try the MEAN absolute deviation: + if mad < eps: + mad = del_y.mean() / c2 + if mad < eps: + return 0.0 + + # Now the biweighted value: + u = dy / (c3 * mad) + uu = u * u + q = numpy.where(uu <= 1.0) + count = len(q[0]) + if count < min_points: + module_logger.warn('ROBUST_SIGMA: This distribution is TOO WEIRD! ' + 'Returning {}'.format(c_err)) + return c_err + + numerator = numpy.sum( (y[q] - y0)**2.0 * (1.0 - uu[q])**4.0 ) + n = y.size + den1 = numpy.sum( (1.0 - uu[q]) * (1.0 - c4 * uu[q]) ) + siggma = n * numerator / ( den1 * (den1 - 1.0) ) + + if siggma > 0: + out_val = numpy.sqrt( siggma ) + else: + out_val = 0.0 + + return out_val + + + + +def main(fitsimage, maskfits, outfilename): + + #imagenoise = 5E-3 + + #f = aplpy.FITSFigure(fitsimage,slices=[0,0]) + #f.show_colorscale(vmax=10*imagenoise,vmin=-3*imagenoise) + ##outplotname = fitsimage.replace('.fits','.png') + #f.add_colorbar() + #f.save('%s.png'%(outfilename),dpi=750) + #f.show_contour(maskfits, levels=[0.5], colors='m') + #f.save('%s_mask.png'%(outfilename),dpi=750) + + ##f = aplpy.FITSFigure(maskfits,slices=[0,0]) + ##f.show_colorscale(vmax=0,vmin=1) + ###f.show_contour(maskfits, levels=[0.5], colors='w') + ###outplotname = fitsimage.replace('.fits','.png') + ##f.add_colorbar() + ##f.save('%s-mask.png'%(outfilename),dpi=500) + + + + #outplotname = fitsimage.replace('.fits','.png') + + # find image noise + hdulist = astropy.io.fits.open(fitsimage) + data = hdulist[0].data + imagenoise = find_imagenoise(data) + hdulist.close() + print 'Image noise is: ', imagenoise*1e3, ' mJy' + + + f = aplpy.FITSFigure(fitsimage,slices=[0,0]) + f.show_colorscale(vmax=16*imagenoise,vmin=-4*imagenoise, cmap='bone') + + + #f.add_beam() + #f.beam.set_color('white') + #f.beam.set_hatch('.') + + + f.add_grid() + f.grid.set_color('white') + f.grid.set_alpha(0.5) + f.grid.set_linewidth(0.2) + + f.add_colorbar() + + + f.show_contour(maskfits, colors='red', levels=[0.0],filled=False, smooth=1,alpha=0.6, linewidths=0.25) + f.colorbar.set_axis_label_text('Flux (Jy beam$^{-1}$)') + + f.save('%s.png'%(outfilename),dpi=400) + + + + +if __name__ == '__main__': + descriptiontext = "Convert a fits image to a CASA image.\n" + + parser = argparse.ArgumentParser(description=descriptiontext, formatter_class=RawTextHelpFormatter) + parser.add_argument('fitsimage', help='name of the image') + parser.add_argument('maskfits', help='name of the mask') + parser.add_argument('outname', help='name for the output') + args = parser.parse_args() + + main(args.fitsimage, args.maskfits, args.outname) diff --git a/Docker/scripts/plot_unflagged_fraction.py b/Docker/scripts/plot_unflagged_fraction.py new file mode 100755 index 00000000..49485b26 --- /dev/null +++ b/Docker/scripts/plot_unflagged_fraction.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +import argparse +import glob +import signal +import os +import sys +import matplotlib as mpl +mpl.use('Agg') +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import casacore.tables as pt +from matplotlib import cm + + +def input2strlist_nomapfile(invar): + """ + from bin/download_IONEX.py + give the list of MSs from the list provided as a string + """ + str_list = None + if type(invar) is str: + if invar.startswith('[') and invar.endswith(']'): + str_list = [f.strip(' \'\"') for f in invar.strip('[]').split(',')] + else: + str_list = [invar.strip(' \'\"')] + elif type(invar) is list: + str_list = [str(f).strip(' \'\"') for f in invar] + else: + raise TypeError('input2strlist: Type '+str(type(invar))+' unknown!') + return str_list + + +def main(ms_list, frac_list, outfile='unflagged_fraction.png'): + + ms_list = input2strlist_nomapfile(ms_list) + frac_list = input2strlist_nomapfile(frac_list) + frac_list = np.array([float(f) for f in frac_list]) + outdir = os.path.dirname(outfile) + if not os.path.exists(outdir): + os.makedirs(outdir) + + # Get frequencies + freq_list = [] + for ms in ms_list: + # open the main table and print some info about the MS + t = pt.table(ms, readonly=True, ack=False) + tfreq = pt.table(t.getkeyword('SPECTRAL_WINDOW'),readonly=True,ack=False) + ref_freq = tfreq.getcol('REF_FREQUENCY',nrow=1)[0] + freq_list.append(ref_freq) + freq_list = np.array(freq_list) / 1e6 # MHz + + # Plot the unflagged fraction vs. frequency + plt.scatter(freq_list, frac_list) + plt.xlabel('frequency [MHz]') + plt.ylabel('unflagged fraction') + plt.savefig(outfile) diff --git a/Docker/scripts/plot_uvcov.py b/Docker/scripts/plot_uvcov.py new file mode 100755 index 00000000..f752725b --- /dev/null +++ b/Docker/scripts/plot_uvcov.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python +import argparse +import glob +import signal +import os +import sys +import matplotlib as mpl +mpl.use('Agg') +import matplotlib +import matplotlib.pyplot as plt +import numpy +import casacore.tables as pt +from matplotlib import cm + + +def input2strlist_nomapfile(invar): + """ + from bin/download_IONEX.py + give the list of MSs from the list provided as a string + """ + str_list = None + if type(invar) is str: + if invar.startswith('[') and invar.endswith(']'): + str_list = [f.strip(' \'\"') for f in invar.strip('[]').split(',')] + else: + str_list = [invar.strip(' \'\"')] + elif type(invar) is list: + str_list = [str(f).strip(' \'\"') for f in invar] + else: + raise TypeError('input2strlist: Type '+str(type(invar))+' unknown!') + return str_list + + +def string2bool(instring): + if isinstance(instring, bool): + return instring + if instring.upper() == 'TRUE' or instring == '1': + return True + elif instring.upper() == 'FALSE' or instring == '0': + return False + else: + raise ValueError('string2bool: Cannot convert string "'+instring+'" to boolean!') + + +def main(input, output, title='uv coverage', limits=',,,', timeslots='0,10,0', antennas='-1', + wideband=True): + + MSlist = input2strlist_nomapfile(input) + if len(MSlist) == 0: + print('Error: You must specify at least one MS name.') + sys.exit(1) + plottitle = title + fileformat = output.split('.')[-1] + if fileformat not in ['png','pdf','eps','ps']: + print('Error: Unknown file extension') + sys.exit(1) + axlimits = limits.strip().split(',') + if len(axlimits) == 4: + xmin,xmax,ymin,ymax = axlimits + else: + print('Error: You must specify four axis limits') + sys.exit(1) + timeslots = timeslots.split(',') + if len(timeslots) != 3: + print('Error: Timeslots format is start,skip,end') + sys.exit(1) + for i in range(len(timeslots)): + timeslots[i] = int(timeslots[i]) + if timeslots[i] < 0: + print('Error: timeslots values must not be negative') + sys.exit(1) + antToPlotSpl = antennas.split(',') + antToPlot = [] + for i in range(len(antToPlotSpl)): + tmpspl = antToPlotSpl[i].split('..') + if len(tmpspl) == 1: + antToPlot.append(int(antToPlotSpl[i])) + elif len(tmpspl) == 2: + for j in range(int(tmpspl[0]),int(tmpspl[1])+1): + antToPlot.append(j) + else: + print('Error: Could not understand antenna list.') + sys.exit(1) + wideband = string2bool(wideband) + + outdir = os.path.dirname(output) + if not os.path.exists(outdir): + os.makedirs(outdir) + + badval = 0.0 + xaxisvals = [] + yaxisvals = [] + flagvals = [] + savex = numpy.array([]) + savey = numpy.array([]) + prev_firstTime = None + prev_lastTime = None + prev_intTime = None + for inputMS in MSlist: + # open the main table and print some info about the MS + t = pt.table(inputMS, readonly=True, ack=False) + tfreq = pt.table(t.getkeyword('SPECTRAL_WINDOW'),readonly=True,ack=False) + ref_freq = tfreq.getcol('REF_FREQUENCY',nrow=1)[0] + ch_freq = tfreq.getcol('CHAN_FREQ',nrow=1)[0] + if wideband: + ref_wavelength = 2.99792458e8/ch_freq + else: + ref_wavelength = [2.99792458e8/ref_freq] + + firstTime = t.getcell("TIME", 0) + lastTime = t.getcell("TIME", t.nrows()-1) + intTime = t.getcell("INTERVAL", 0) + nTimeslots = (lastTime - firstTime) / intTime + if timeslots[1] == 0: + timeskip = 1 + else: + timeskip = int(timeslots[1]) + if timeslots[2] == 0: + timeslots[2] = nTimeslots + # open the antenna subtable + tant = pt.table(t.getkeyword('ANTENNA'), readonly=True, ack=False) + + # Station names + antList = tant.getcol('NAME') + if len(antToPlot)==1 and antToPlot[0]==-1: + antToPlot = list(range(len(antList))) + + # select by time from the beginning, and only use specified antennas + tsel = t.query('TIME >= %f AND TIME <= %f AND ANTENNA1 IN %s AND ANTENNA2 IN %s' % (firstTime+timeslots[0]*intTime,firstTime+timeslots[2]*intTime,str(antToPlot),str(antToPlot)), columns='ANTENNA1,ANTENNA2,UVW,FLAG_ROW') + + # Now we loop through the baselines + i = 0 + nb = (len(antToPlot)*(len(antToPlot)-1))/2 + for tpart in tsel.iter(["ANTENNA1","ANTENNA2"]): + ant1 = tpart.getcell("ANTENNA1", 0) + ant2 = tpart.getcell("ANTENNA2", 0) + if ant1 not in antToPlot or ant2 not in antToPlot: continue + if ant1 == ant2: continue + i += 1 + # Get the values to plot (only need to read again if this ms has new times) + if prev_firstTime != firstTime or prev_lastTime != lastTime or prev_intTime != intTime: + uvw = tpart.getcol('UVW', rowincr=timeskip) + savex = numpy.append(savex,[uvw[:,0],-uvw[:,0]]) + savey = numpy.append(savey,[uvw[:,1],-uvw[:,1]]) + + # Get the flags + flags = tpart.getcol('FLAG_ROW', rowincr=timeskip) + for w in ref_wavelength: + flagvals.extend(flags.tolist()) + flagvals.extend(flags.tolist()) + for w in ref_wavelength: + xaxisvals.extend((savex/w/1000.).tolist()) + yaxisvals.extend((savey/w/1000.).tolist()) + prev_firstTime = firstTime + prev_lastTime = lastTime + prev_intTime = intTime + + # Plot the uv coverage + xaxisvals = numpy.array(xaxisvals) + yaxisvals = numpy.array(yaxisvals) + zvals = numpy.zeros(xaxisvals.shape) + flagvals = numpy.array(flagvals) + flagged_ind = numpy.where(flagvals) + zvals[flagged_ind] = 1.0 + uvmax = max(xaxisvals.max(),yaxisvals.max()) + uvmin = min(xaxisvals.min(),yaxisvals.min()) + uvuplim = 0.02*(uvmax-uvmin)+uvmax + uvlolim = uvmin-0.02*(uvmax-uvmin) + if xmin == '': + minx = uvlolim + else: + minx = float(xmin) + if xmax == '': + maxx = uvuplim + else: + maxx = float(xmax) + if ymin == '': + miny = uvlolim + else: + miny = float(ymin) + if ymax == '': + maxy = uvuplim + else: + maxy = float(ymax) + if minx == maxx: + minx = -1.0 + maxx = 1.0 + if miny == maxy: + miny = -1.0 + maxy = 1.0 + plt.xlabel(r'u [k$\lambda$]') + plt.ylabel(r'v [k$\lambda$]') + plt.xlim([minx,maxx]) + plt.ylim([miny,maxy]) + gridsize = 300 + plt.hexbin(xaxisvals, yaxisvals, C=zvals, gridsize=gridsize, cmap=cm.jet, bins=None) + plt.title(plottitle) + plt.axes().set_aspect('equal') + plt.grid(True) + cb = plt.colorbar() + cb.set_label('flagged fraction') + plt.savefig(output) + + # Plot histogram of the flagged fraction vs. uv distance + uvdist = numpy.sqrt(xaxisvals**2 + yaxisvals**2) + sorted_ind = numpy.argsort(uvdist) + uvdist = uvdist[sorted_ind] + zvals = 1.0-zvals[sorted_ind] + hist, bins = numpy.histogram(uvdist, 50) + bins_mean = [0.5 * (bins[i] + bins[i+1]) for i in range(len(bins)-1)] + inds = [] + for b in bins[:-1]: + inds.append(numpy.where(uvdist > b)[0][0]) + inds.append(len(bins)) + flagged_mean = [numpy.mean(zvals[inds[i]:inds[i+1]]) for i in range(len(inds)-1)] + plt.clf() + plt.bar(bins_mean, flagged_mean, (bins[1]-bins[0])/2.0) + plt.xlabel(r'uv distance [k$\lambda$]') + plt.ylabel('unflagged fraction') + plt.savefig('{0}_uvdist{1}'.format(*os.path.splitext(output))) + + + + diff --git a/Docker/scripts/sort_times_into_freqGroups.py b/Docker/scripts/sort_times_into_freqGroups.py new file mode 100755 index 00000000..b97e2b0c --- /dev/null +++ b/Docker/scripts/sort_times_into_freqGroups.py @@ -0,0 +1,422 @@ +#! /usr/bin/env python +""" +Script to sort a list of MSs by into frequency groups by time-stamp +""" +import pyrap.tables as pt +import sys, os +import numpy as np +from lofarpipe.support.data_map import DataMap +from lofarpipe.support.data_map import DataProduct + +def _calc_edge_chans(inmap, numch, edgeFactor=32): + """ + Generates a map with strings that can be used as input for NDPPP to flag the edges + of the input MSs during (or after) concatenation. + + inmap - MultiDataMap (not mapfilename!) with the files to be concatenated. + numch - Number of channels per input file (All files are assumed to have the same number + of channels.) + edgeFactor - Divisor to compute how many channels are to be flagged at beginning and end. + (numch=64 and edgeFactor=32 means "flag two channels at beginning and two at end") + """ + outmap = DataMap([]) + for group in inmap: + flaglist = [] + for i in xrange(len(group.file)): + flaglist.extend(range(i*numch,i*numch+numch/edgeFactor)) + flaglist.extend(range((i+1)*numch-numch/edgeFactor,(i+1)*numch)) + outmap.append(DataProduct(group.host,str(flaglist).replace(' ',''),group.skip)) + print '_calc_edge_chans: flaglist:', str(flaglist).replace(' ','') + return outmap + +def input2bool(invar): + if invar == None: + return None + if isinstance(invar, bool): + return invar + elif isinstance(invar, str): + if invar.upper() == 'TRUE' or invar == '1': + return True + elif invar.upper() == 'FALSE' or invar == '0': + return False + else: + raise ValueError('input2bool: Cannot convert string "'+invar+'" to boolean!') + elif isinstance(invar, int) or isinstance(invar, float): + return bool(invar) + else: + raise TypeError('input2bool: Unsupported data type:'+str(type(invar))) + +def input2int(invar): + if invar == None: + return None + if isinstance(invar, int): + return invar + elif isinstance(invar, float): + return int(invar) + elif isinstance(invar, str): + if invar.strip().upper() == 'NONE' or invar == 'FALSE': + return None + else: + return int(invar) + else: + raise TypeError('input2int: Unsupported data type:'+str(type(invar))) + + + +def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None, + mergeLastGroup=False, truncateLastSBs=True, firstSB=None): + """ + Check a list of MS files for missing frequencies + + Parameters + ---------- + ms_input : list or str + List of MS filenames, or string with list, or path to a mapfile + filename: str + Name of output mapfile + mapfile_dir : str + Directory for output mapfile + numSB : int, optional + How many files should go into one frequency group. Values <= 0 mean put + all files of the same time-step into one group. + default = -1 + hosts : list or str + List of hostnames or string with list of hostnames + NDPPPfill : bool, optional + Add dummy file-names for missing frequencies, so that NDPPP can + fill the data with flagged dummy data. + default = True + target_path : str, optional + Change the path of the "groups" files to this. (I.e. write output files + into this directory with the subsequent NDPPP call.) + default = keep path of input files + stepname : str, optional + Add this step-name into the file-names of the output files. + mergeLastGroup, truncateLastSBs : bool, optional + mergeLastGroup = True, truncateLastSBs = True: + not allowed + mergeLastGroup = True, truncateLastSBs = False: + put the files from the last group that doesn't have SBperGroup subbands + into the second last group (which will then have more than SBperGroup entries). + mergeLastGroup = False, truncateLastSBs = True: + ignore last files, that don't make for a full group (not all files are used). + mergeLastGroup = False, truncateLastSBs = False: + keep inclomplete last group, or - with NDPPPfill=True - fill + last group with dummies. + firstSB : int, optional + If set, then reference the grouping of files to this station-subband. As if a file + with this station-subband would be included in the input files. + (For HBA-low, i.e. 0 -> 100MHz, 55 -> 110.74MHz, 512 -> 200MHz) + + Returns + ------- + result : dict + Dict with the name of the generated mapfile + + """ + + NDPPPfill = input2bool(NDPPPfill) + mergeLastGroup = input2bool(mergeLastGroup) + truncateLastSBs = input2bool(truncateLastSBs) + firstSB = input2int(firstSB) + numSB = int(numSB) + + if not filename or not mapfile_dir: + raise ValueError('sort_times_into_freqGroups: filename and mapfile_dir are needed!') + if mergeLastGroup and truncateLastSBs: + raise ValueError('sort_times_into_freqGroups: Can either merge the last partial group or truncate at last full group, not both!') +# if mergeLastGroup: +# raise ValueError('sort_times_into_freqGroups: mergeLastGroup is not (yet) implemented!') + if type(ms_input) is str: + if ms_input.startswith('[') and ms_input.endswith(']'): + ms_list = [f.strip(' \'\"') for f in ms_input.strip('[]').split(',')] + else: + map_in = DataMap.load(ms_input) + map_in.iterator = DataMap.SkipIterator + ms_list = [] + for fname in map_in: + if fname.startswith('[') and fname.endswith(']'): + for f in fname.strip('[]').split(','): + ms_list.append(f.strip(' \'\"')) + else: + ms_list.append(fname.strip(' \'\"')) + elif type(ms_input) is list: + ms_list = [str(f).strip(' \'\"') for f in ms_input] + else: + raise TypeError('sort_times_into_freqGroups: type of "ms_input" unknown!') + + if type(hosts) is str: + hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')] + if not hosts: + hosts = ['localhost'] + numhosts = len(hosts) + print "sort_times_into_freqGroups: Working on",len(ms_list),"files (including flagged files)." + + time_groups = {} + # sort by time + for i, ms in enumerate(ms_list): + # work only on files selected by a previous step + if ms.lower() != 'none': + # use the slower but more reliable way: + obstable = pt.table(ms, ack=False) + timestamp = int(round(np.min(obstable.getcol('TIME')))) + #obstable = pt.table(ms+'::OBSERVATION', ack=False) + #timestamp = int(round(obstable.col('TIME_RANGE')[0][0])) + obstable.close() + if timestamp in time_groups: + time_groups[timestamp]['files'].append(ms) + else: + time_groups[timestamp] = {'files': [ ms ], 'basename' : os.path.splitext(ms)[0] } + print "sort_times_into_freqGroups: found",len(time_groups),"time-groups" + + # sort time-groups by frequency + timestamps = time_groups.keys() + timestamps.sort() # not needed now, but later + first = True + nchans = 0 + for time in timestamps: + freqs = [] + for ms in time_groups[time]['files']: + # Get the frequency info + sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False) + freq = sw.col('REF_FREQUENCY')[0] + if first: + file_bandwidth = sw.col('TOTAL_BANDWIDTH')[0] + nchans = sw.col('CHAN_WIDTH')[0].shape[0] + chwidth = sw.col('CHAN_WIDTH')[0][0] + freqset = set([freq]) + first = False + else: + assert file_bandwidth == sw.col('TOTAL_BANDWIDTH')[0] + assert nchans == sw.col('CHAN_WIDTH')[0].shape[0] + assert chwidth == sw.col('CHAN_WIDTH')[0][0] + freqset.add(freq) + freqs.append(freq) + sw.close() + time_groups[time]['freq_names'] = zip(freqs,time_groups[time]['files']) + time_groups[time]['freq_names'].sort(key=lambda pair: pair[0]) + #time_groups[time]['files'] = [name for (freq,name) in freq_names] + #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names] + print "sort_times_into_freqGroups: Collected the frequencies for the time-groups" + + freqliste = np.array(list(freqset)) + freqliste.sort() + freq_width = np.min(freqliste[1:]-freqliste[:-1]) + if file_bandwidth > freq_width: + raise ValueError("Bandwidth of files is larger than minimum frequency step between two files!") + if file_bandwidth < (freq_width*0.51): + raise ValueError("Bandwidth of files is smaller than 51% of the minimum frequency step between two files! (More than about half the data is missing.)") + #the new output map + filemap = MultiDataMap() + groupmap = DataMap() + # add 1% of the SB badwidth in case maxfreq might be "exactly" on a group-border + maxfreq = np.max(freqliste)+freq_width*0.51 + if firstSB != None: + if freqliste[0] < 100e6: + # LBA Data + minfreq = (float(firstSB)/512.*100e6)-freq_width/2. + elif freqliste[0] > 100e6 and freqliste[0] < 200e6: + # HBA-Low + minfreq = (float(firstSB)/512.*100e6)+100e6-freq_width/2. + elif freqliste[0] > 200e6 and freqliste[0] < 300e6: + # HBA-high + minfreq = (float(firstSB)/512.*100e6)+200e6-freq_width/2. + else: + raise ValueError('sort_times_into_freqGroups: Frequency of lowest input data is higher than 300MHz!') + if np.min(freqliste) < minfreq: + raise ValueError('sort_times_into_freqGroups: Frequency of lowest input data is lower than reference frequency!') + else: + minfreq = np.min(freqliste)-freq_width/2. + groupBW = freq_width*numSB + if groupBW < 1e6 and groupBW > 0: + print 'sort_times_into_freqGroups: ***WARNING***: Bandwidth of concatenated MS is lower than 1 MHz. This may cause conflicts with the concatenated file names!' + if groupBW < 0: + # this is the case for concatenating all subbands + groupBW = maxfreq-minfreq + truncateLastSBs = input2bool(False) + NDPPPfill = input2bool(True) + freqborders = np.arange(minfreq,maxfreq,groupBW) + if mergeLastGroup: + freqborders[-1] = maxfreq + elif truncateLastSBs: + pass #nothing to do! # left to make the logic more clear! + elif not truncateLastSBs and NDPPPfill: + freqborders = np.append(freqborders,(freqborders[-1]+groupBW)) + elif not truncateLastSBs and not NDPPPfill: + freqborders = np.append(freqborders,maxfreq) + + freqborders = freqborders[freqborders>(np.min(freqliste)-groupBW)] + ngroups = len(freqborders)-1 + if ngroups == 0: + raise ValueError('sort_times_into_freqGroups: Not enough input subbands to create at least one full (frequency-)group!') + + print "sort_times_into_freqGroups: Will create",ngroups,"group(s) with",numSB,"file(s) each." + + hostID = 0 + for time in timestamps: + (freq,fname) = time_groups[time]['freq_names'].pop(0) + for groupIdx in xrange(ngroups): + files = [] + skip_this = True + filefreqs_low = np.arange(freqborders[groupIdx],freqborders[groupIdx+1],freq_width) + for lower_freq in filefreqs_low: + if freq > lower_freq and freq < lower_freq+freq_width: + assert freq!=1e12 + files.append(fname) + if len(time_groups[time]['freq_names'])>0: + (freq,fname) = time_groups[time]['freq_names'].pop(0) + else: + (freq,fname) = (1e12,'This_shouldn\'t_show_up') + skip_this = False + elif NDPPPfill: + files.append('dummy.ms') + if not skip_this: + filemap.append(MultiDataProduct(hosts[hostID%numhosts], files, skip_this)) + freqID = int((freqborders[groupIdx]+freqborders[groupIdx+1])/2e6) + groupname = time_groups[time]['basename']+'_%Xt_%dMHz.ms'%(time,freqID) + if type(stepname) is str: + groupname += stepname + if type(target_path) is str: + groupname = os.path.join(target_path,os.path.basename(groupname)) + groupmap.append(DataProduct(hosts[hostID%numhosts],groupname, skip_this)) + orphan_files = len(time_groups[time]['freq_names']) + if freq < 1e12: + orphan_files += 1 + if orphan_files > 0: + print "sort_times_into_freqGroups: Had %d unassigned files in time-group %xt."%(orphan_files, time) + filemapname = os.path.join(mapfile_dir, filename) + filemap.save(filemapname) + groupmapname = os.path.join(mapfile_dir, filename+'_groups') + groupmap.save(groupmapname) + # genertate map with edge-channels to flag + flagmap = _calc_edge_chans(filemap, nchans) + flagmapname = os.path.join(mapfile_dir, filename+'_flags') + flagmap.save(flagmapname) + result = {'mapfile': filemapname, 'groupmapfile': groupmapname, 'flagmapfile': flagmapname} + return result + +class MultiDataProduct(DataProduct): + """ + Class representing multiple files in a DataProduct. + """ + def __init__(self, host=None, file=None, skip=True): + super(MultiDataProduct, self).__init__(host, file, skip) + if not file: + self.file = list() + else: + self._set_file(file) + + def __repr__(self): + """Represent an instance as a Python dict""" + return ( + "{{'host': '{0}', 'file': {1}, 'skip': {2}}}".format(self.host, self.file, str(self.skip)) + ) + + def __str__(self): + """Print an instance as 'host:[filelist]'""" + return ':'.join((self.host, str(self.file))) + + def _set_file(self, data): + try: + # Try parsing as a list + if isinstance(data, list): + self.file = data + if isinstance(data, DataProduct): + self._from_dataproduct(data) + if isinstance(data, DataMap): + self._from_datamap(data) + + except TypeError: + raise DataProduct("No known method to set a filelist from %s" % str(file)) + + def _from_dataproduct(self, prod): + print 'setting filelist from DataProduct' + self.host = prod.host + self.file = prod.file + self.skip = prod.skip + + def _from_datamap(self, inmap): + print 'setting filelist from DataMap' + filelist = {} + for item in inmap: + if not item.host in filelist: + filelist[item.host] = [] + filelist[item.host].append(item.file) + self.file = filelist['i am'] + + def append(self, item): + self.file.append(item) + + +class MultiDataMap(DataMap): + """ + Class representing a specialization of data-map, a collection of data + products located on the same node, skippable as a set and individually + """ + @DataMap.data.setter + def data(self, data): + if isinstance(data, DataMap): + mdpdict = {} + data.iterator = DataMap.SkipIterator + for item in data: + if not item.host in mdpdict: + mdpdict[item.host] = [] + mdpdict[item.host].append(item.file) + mdplist = [] + for k, v in mdpdict.iteritems(): + mdplist.append(MultiDataProduct(k, v, False)) + self._set_data(mdplist, dtype=MultiDataProduct) + elif isinstance(data, MultiDataProduct): + self._set_data(data, dtype=MultiDataProduct) + elif not data: + pass + else: + self._set_data(data, dtype=MultiDataProduct) + + def split_list(self, number): + mdplist = [] + for item in self.data: + for i in xrange(0, len(item.file), number): + chunk = item.file[i:i+number] + mdplist.append(MultiDataProduct(item.host, chunk, item.skip)) + self._set_data(mdplist) + + + +if __name__ == '__main__': + import optparse + import glob + import random + + opt = optparse.OptionParser(usage='%prog [options] <MSPattern> \n') + opt.add_option('-v', '--verbose', help='Go Vebose! (default=False)', action='store_true', default=False) + opt.add_option('-r', '--randomize', help='Randomize order of the input files. (default=False)', action='store_true', default=False) + opt.add_option('-d', '--decimate', help='Remove every 10th file (after randomization if that is done). (default=False)', action='store_true', default=False) + opt.add_option('-n', '--numbands', help='Number of how many files should be grouped together in frequency. (default=all files in one group)', type='int', default=-1) + opt.add_option('-f', '--filename', help='Name for the mapfiles to write. (default=\"test.mapfile\")', type='string', default='test.mapfile') + + (options, args) = opt.parse_args() + + # Check options + if len(args) != 1: + opt.print_help() + sys.exit() + + # first argument: pattern for measurement-sets + inMSs = glob.glob(args[0]) + if options.randomize: + random.shuffle(inMSs) + if options.decimate: + for i in range((len(inMSs)-1),-1,-10): + inMSs.pop(i) + + ergdict = main(inMSs, options.filename, '.', numSB=options.numbands, hosts=None, NDPPPfill=True) + + groupmap = DataMap.load(ergdict['groupmapfile']) + filemap = MultiDataMap.load(ergdict['mapfile']) + print "len(groupmap) : %d , len(filemap) : %d " % (len(groupmap),len(filemap)) + if len(groupmap) != len(filemap): + print "groupmap and filemap have different length!" + sys.exit(1) + for i in xrange(len(groupmap)): + print "Group \"%s\" has %d entries."%(groupmap[i].file,len(filemap[i].file)) diff --git a/Docker/scripts/transfer_solutions.py b/Docker/scripts/transfer_solutions.py new file mode 100755 index 00000000..6b59e3e7 --- /dev/null +++ b/Docker/scripts/transfer_solutions.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python +# Written for prefactor by Alexander Drabent (alex@tls-tautenburg.de), 19th July 2019. + +from losoto.h5parm import h5parm +from losoto.lib_operations import * + +import os +import re +import numpy +import subprocess +######################################################################## +def main(h5parmdb, refh5parm, insolset='sol000', outsolset='sol000', insoltab='amplitude000', outsoltab='amplitude000', antenna = '[FUSPID].*', trusted_sources = ['3C48', '3C147'], parset = None): + + + ### Open up the h5parm, get an example value + data = h5parm(h5parmdb, readonly=False) + refdata = h5parm(refh5parm, readonly=True) + insolset = refdata.getSolset(insolset) + outsolset = data.getSolset(outsolset) + insoltab = insolset.getSoltab(insoltab) + outsoltab = outsolset.getSoltab(outsoltab, useCache = True) + + ### Get the calibrator information from the target solset and check whether it is trusted or not. + calibrators = [] + sources = outsolset.obj._f_get_child('source') + for i in numpy.arange(len(sources)): + calibrators.append(sources[i][0]) + calibrator = numpy.unique(calibrators) + if len(calibrator) > 1: + logging.error('There is more than one calibrator used in the target solution set: ' + str(calibrator) + '. No solutions will be transferred.') + data.close() + refdata.close() + return(1) + if calibrator[0].upper() in trusted_sources: + logging.info(calibrator[0] + ' is a trusted calibrator source. No solutions from a reference solution set will be transferred!') + data.close() + refdata.close() + return(0) + else: + logging.info(calibrator[0] + ' is not a trusted calibrator source. Solutions from a reference solution set will be transferred!') + + + ### check for matching antennas + station_names = outsoltab.ant + stations_to_transfer = [ station_name for station_name in station_names if re.match(antenna, station_name) ] + if len(stations_to_transfer) == 0: + logging.warning('No stations found matching the regular expression: ' + antenna) + data.close() + refdata.close() + return(0) + + ### Properly define axes order + out_axes = outsoltab.getAxesNames() + out_axes.insert(0, out_axes.pop(out_axes.index('time'))) ## put time table to the first place + out_axes.insert(1, out_axes.pop(out_axes.index('freq'))) ## put frequency table to the second place + out_axes.insert(2, out_axes.pop(out_axes.index('ant'))) ## put antenna table to the third place + in_axes = insoltab.getAxesNames() + in_axes.insert(0, in_axes.pop(in_axes.index('freq'))) ## put frequency table to the second place + in_axes.insert(1, in_axes.pop(in_axes.index('ant'))) ## put antenna table to the third place + + if 'time' in in_axes: + logging.warning('Reference solution table provides a time axis. The bandpass is assumed to be constant in time. Thus, only the first time step will be transferred.') + in_axes.insert(2, in_axes.pop(in_axes.index('time'))) ## put time table to the third place + + ### resort the val and weights array according to the new axes order + for outvals, outweights, coord, selection in outsoltab.getValuesIter(returnAxes=out_axes, weight=True): + outvals = reorderAxes( outvals, outsoltab.getAxesNames(), out_axes) + outweights = reorderAxes( outweights, outsoltab.getAxesNames(), out_axes ) + pass + + ### resort the val and weights array according to the new axes order + for invals, inweights, coord, selection in insoltab.getValuesIter(returnAxes=in_axes, weight=True): + invals = reorderAxes( invals, insoltab.getAxesNames(), in_axes) + inweights = reorderAxes( inweights, insoltab.getAxesNames(), in_axes ) + pass + + ### check for equistance in frequency + freq_resolution = numpy.unique(numpy.diff(outsoltab.freq)) + if len(freq_resolution) > 1: + logging.error('Frequency axis is not equidistant!') + data.close() + refdata.close() + return(1) + + ### look for nearest neighbours in frequency and write them into a dictionary + nearest_freq = {} + freq_idx = [] + for i, frequency in enumerate(numpy.array(outsoltab.freq)): + nearest_freq[i] = numpy.abs(frequency - numpy.array(insoltab.freq)).argmin() + freq_idx.append(nearest_freq[i]) + + ### do the job! + for station_to_transfer in stations_to_transfer: + logging.info('Transferring solutions for antenna: ' + str(station_to_transfer)) + unique_elements, counts_elements = numpy.unique(freq_idx, return_counts = True) + ant_index = list(outsoltab.ant).index(station_to_transfer) + try: + ref_ant_index = list(insoltab.ant).index(station_to_transfer) + except ValueError: + logging.warning('Station ' + station_to_transfer + ' does not exist in reference soltab. No solutions will be transferred!') + continue + for j, frequency in enumerate(numpy.array(outsoltab.freq)): + if 'time' in in_axes: + weight = inweights[nearest_freq[j], ref_ant_index, 0] + else: + weight = inweights[nearest_freq[j], ref_ant_index] + if counts_elements[nearest_freq[j]] > 1: + if frequency < insoltab.freq[0] or frequency > insoltab.freq[-1]: + counts_elements[nearest_freq[j]] -= 1 + logging.warning('No entry for frequency ' + str(frequency) + '. Solution will be flagged.') + weight = 0 + for i, time in enumerate(outsoltab.time): + outweights[i,j,ant_index] = weight + if 'time' in in_axes: + outvals[i,j,ant_index] = invals[nearest_freq[j], ref_ant_index, 0] + else: + outvals[i,j,ant_index] = invals[nearest_freq[j], ref_ant_index] + + + ### writing to the soltab + outsoltab.setValues(outvals, selection) + outsoltab.setValues(outweights, selection, weight = True) + outsoltab.addHistory('Transferred solutions from ' + os.path.basename(refh5parm) + ' for the stations ' + str(stations_to_transfer).lstrip('[').rstrip(']') ) + outsoltab.flush() + + ### plotting new tables if provided + if parset: + if os.path.exists(parset): + download = subprocess.Popen(['losoto', '-v', h5parmdb, parset], stdout=subprocess.PIPE) + errorcode = download.wait() + if errorcode != 0: + logging.error('An error has occured while plotting.') + data.close() + refdata.close() + return(1) + else: + logging.error('Parset file ' + parset + ' has not been found.') + data.close() + refdata.close() + return(1) + + data.close() + refdata.close() + return(0) + +######################################################################## +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description='Transfer solutions from a solution table of a reference h5parm to a solution table of a target h5parm.') + + parser.add_argument('h5parm', type=str, + help='H5parm to which the solutions should be transferred.') + parser.add_argument('--refh5parm', '--outh5parm', type=str, + help='Name of the h5parm from which the solutions should be transferred.') + parser.add_argument('--insolset', '--insolset', type=str, default='sol000', + help='Name of the input h5parm solution set (default: sol000)') + parser.add_argument('--outsolset', '--outsolset', type=str, default='sol000', + help='Name of the output h5parm solution set (default: sol000)') + parser.add_argument('--insoltab', '--insoltab', type=str, default='amplitude000', + help='Name of the input h5parm solution set (default: amplitude000)') + parser.add_argument('--outsoltab', '--outsoltab', type=str, default='amplitude000', + help='Name of the output h5parm solution set (default: amplitude000)') + parser.add_argument('--antenna', '--antenna', type=str, default='[FUSPID].*', + help='Regular expression of antenna solutions to be transferred (default: [FUSPID].*)') + + args = parser.parse_args() + + format_stream = logging.Formatter("%(asctime)s\033[1m %(levelname)s:\033[0m %(message)s","%Y-%m-%d %H:%M:%S") + format_file = logging.Formatter("%(asctime)s %(levelname)s: %(message)s","%Y-%m-%d %H:%M:%S") + logging.root.setLevel(logging.INFO) + + log = logging.StreamHandler() + log.setFormatter(format_stream) + logging.root.addHandler(log) + + logging.info('Transferring solutions from ' + args.refh5parm + ' to ' + args.h5parm + '.') + logging.info('Solutions will be transferred from soltab ' + str(args.insoltab) + ' to ' + str(args.outsoltab) + '.') + main(args.h5parm, refh5parm = args.refh5parm, insolset=args.insolset, outsolset=args.outsolset, insoltab=args.insoltab, outsoltab=args.outsoltab, antenna = args.antenna, parset = None) -- GitLab